diff -Naurp linux-2.4.20-wolk4.0s/Documentation/Configure.help linux-2.4.20-wolk4.1-fullkernel/Documentation/Configure.help --- linux-2.4.20-wolk4.0s/Documentation/Configure.help 2003-05-15 21:52:18.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/Documentation/Configure.help 2003-05-19 13:03:55.000000000 +0200 @@ -1485,7 +1485,8 @@ CONFIG_HZ your desktop behaviour, for example sound output *may* stutter. So, if you have very unusual behaviour of your desktop with HZ=1000, set - this back to 100 and try again. + this back to 100 and try again. You can also try using HZ=200. This should + work ok. If unsure, leave the default 100. @@ -2252,10 +2253,11 @@ CONFIG_BLK_DEV_ELEVATOR_LOWLAT For the interested ones: ------------------------ - nr_requests: 4 + nr_requests: 32 read_passovers: 0 write_passovers: 0 - bdflush: 50, 500, 0, 0, 5*HZ, 30*HZ, 60, 20, 0 + max_bomb_segments: 1 + bdflush: 30/50, 500, 0, 0, 5*HZ, 30*HZ, 60, 20, 0 You can, for sure, lower the latency much more but you'll experience less write throughput. @@ -25511,7 +25513,7 @@ CONFIG_FILE_RESERVED root after the hard limit is reached. Don't lower this value unless you know what you are doing! - If unsure, use the default of 128. + If unsure, use the default of 256. Maximum amount of unix sockets CONFIG_UNIX_MAX_SOCKETS diff -Naurp linux-2.4.20-wolk4.0s/Documentation/filesystems/proc.txt linux-2.4.20-wolk4.1-fullkernel/Documentation/filesystems/proc.txt --- linux-2.4.20-wolk4.0s/Documentation/filesystems/proc.txt 2003-05-15 21:52:18.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/Documentation/filesystems/proc.txt 2003-05-16 13:36:57.000000000 +0200 @@ -1892,25 +1892,6 @@ values here give more preferance to runn of expired tasks. Lower values provide more fair scheduling behavior, at the expense of interactivity. The units are in milliseconds. -thead_penalty -------------- - -Limit sum of timeslices used by a threadgroup to 100/n timeslices. This -is used to prevent heavily thread applications from slowing down the system -when many threads are active. For this item, threads are defined as processes -sharing their mm and files. This implies that if this is set to 33 and six -processes from a given threadgroup are in runqueues each process will have its -timeslice reduced by 50%. Set to zero to disable. - -user_penalty ------------- - -Limit the sum of timeslices used by a user to 100/n timeslices. This prevents -one user from stealing the cpu by creating many active threads. For example, -if this is set to 25 and six processes are in runqueues the timeslice of each -process will be reduced by 33%. Set to zero to disable - root is always -excluded from this logic. - ------------------------------------------------------------------------------ Summary ------------------------------------------------------------------------------ diff -Naurp linux-2.4.20-wolk4.0s/INDEX linux-2.4.20-wolk4.1-fullkernel/INDEX --- linux-2.4.20-wolk4.0s/INDEX 2003-05-15 21:52:19.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/INDEX 1970-01-01 01:00:00.000000000 +0100 @@ -1,29 +0,0 @@ -Kernel patches against 2.4.20 + 2.4.20-dm-10 for EVMS 2.0.1. Apply in the -following order, with: - -cd /usr/src/linux-2.4.20/ -patch -p1 < filename.patch - -1-dm-base.patch: - Extra patch for the base device-mapper code to operate correctly with - the new EVMS engine. - -2-syncio.patch: - Patch to provide a synchronous I/O service to device-mapper targets. - -3-dm-bbr.patch: - Patch to provide a Bad-Block-Relocation target for device-mapper. - -4-dm-sparse.patch: - Patch to provide a Sparse-device target for device-mapper. - -5-md.c.patch: - Extra patch for the base software-RAID code to operate correctly with - the new EVMS engine. - -6-vsprintf.c.patch: - Extra patch for a kernel library to properly scan hex digits. - -7-vfs-lock.patch - Patch to add VFS-locking code. Required for using snapshots with - journalled filesystems. diff -Naurp linux-2.4.20-wolk4.0s/Makefile linux-2.4.20-wolk4.1-fullkernel/Makefile --- linux-2.4.20-wolk4.0s/Makefile 2003-05-15 21:52:19.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/Makefile 2003-05-16 14:21:50.000000000 +0200 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 20 -EXTRAVERSION = -wolk4.0s +EXTRAVERSION = -wolk4.1s KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -Naurp linux-2.4.20-wolk4.0s/REPORTING-BUGS linux-2.4.20-wolk4.1-fullkernel/REPORTING-BUGS --- linux-2.4.20-wolk4.0s/REPORTING-BUGS 2003-05-15 21:52:19.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/REPORTING-BUGS 2003-05-16 14:21:51.000000000 +0200 @@ -1,6 +1,8 @@ WOLK SPECIFIC OR IN OTHER WORDS: THE RIGHT WAY TO SUBMIT BUGS REPORTS: ---------------------------------------------------------------------- + NOTE: IF YOU DON'T REPORT BUGS DON'T EXPECT ANYTHING GET FIXED!!!!!!! + 1. What _exactly_ is not working for you? Tell me as much as you can! 2. Steps to reproduce? @@ -9,28 +11,32 @@ WOLK SPECIFIC OR IN OTHER WORDS: THE RIG 4. What filesystem? / What mount options? - 4. Changed the fs from ext3 to ext2 for example and see a difference? + 5. Changed the fs from ext3 to ext2 for example and see a difference? - 5. What tweaks? + 6. What tweaks? - 6. You've read the CHANGELOG _carefully_ and noticed the things you can + 7. You've read the CHANGELOG _carefully_ and noticed the things you can change at, for example, mount time, sched_yield_scale in /proc? - 7. Your ".config" + 8. Your ".config" - 8. Output of: "dmesg" just after reboot or: "/var/log/dmesg" after some uptime + 9. Output of: "dmesg" just after reboot or: "/var/log/dmesg" after some uptime - 9. Output of: "lspci -vvv" +10. Output of: "lspci -vvv" -10. If you have an OOPS, ksymoops it and do _not_ send the OOPS only! +11. If you have an OOPS, ksymoops it and do _not_ send the OOPS only! + Also enable these options so the oops output is more helpfull: + - enable CONFIG_FRAME_POINTER + - enable CONFIG_KALLSYMS -11. If you have a deadlock, en-/disable the following, test again and +12. If you have a deadlock, en-/disable the following, test again and come back: - enable CONFIG_FRAME_POINTER - enable CONFIG_KALLSYMS - enable CONFIG_DEBUG_SLAB - enable CONFIG_DEBUG_IOVIRT - enable CONFIG_KDB (and read the docs in Documentation/kdb) + - disable CONFIG_ACPI *or* boot with "acpi=off" - disable CONFIG_DEBUG_STACKOVERFLOW - disable CONFIG_MEMORYPOOL - disable CONFIG_PREEMPT @@ -38,12 +44,12 @@ WOLK SPECIFIC OR IN OTHER WORDS: THE RIG - disable _all_ GRSECURITY stuff - less Documentation/nmi_watchdog.txt. Read it, do it, come back. -12. Disable all useless stuff for your machine you are actually testing! +13. Disable all useless stuff for your machine you are actually testing! -13. What compiler version? / What Distribution? +14. What compiler version? / What Distribution? Redhat is known to always fuck up their compiler! -14. Write those stuff to the list, _not_ as private mail! +15. Write those stuff to the list, _not_ as private mail! 16. Output of: "/proc/slabinfo" diff -Naurp linux-2.4.20-wolk4.0s/VERSION linux-2.4.20-wolk4.1-fullkernel/VERSION --- linux-2.4.20-wolk4.0s/VERSION 2003-05-15 21:52:19.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/VERSION 2003-05-16 14:17:06.000000000 +0200 @@ -1 +1 @@ -WOLK v4.0s "Server Edition" FINAL, based on 2.4.20 +WOLK v4.1s "Server Edition" FINAL, based on 2.4.20 diff -Naurp linux-2.4.20-wolk4.0s/WOLK-CHANGELOG linux-2.4.20-wolk4.1-fullkernel/WOLK-CHANGELOG --- linux-2.4.20-wolk4.0s/WOLK-CHANGELOG 2003-05-15 21:52:19.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/WOLK-CHANGELOG 2003-05-19 12:52:07.000000000 +0200 @@ -1,3 +1,26 @@ +Changelog from v4.0s -> v4.1s +----------------------------- +o fixed: hashing exploits in ipv4 routing, IP conntrack, and TCP synq +o fixed: IOPERM system call I/O port access vulnerability ++ fixed: cloop unresolved symbols vs. zlib +o fixed: NFS client stuck in D state +o fixed: mm corrupting SMP race between remove_inode_page and prune_icache +o fixed: Must wakeup with end_buffer_io_kiobuf as last thing to be sure + the wakeup will happen on a still allocated kiobuf +o fixed: Avoid spurious duplicate acks for very minor window updates, + that generates the double outgoing traffic with streaming + services that tends to fill the whole receive window to buffer +o fixed: fd leak +o fixed: vma merging issue with device driver supplied mappings +o fixed: tcp_tw_death_row corruption +o fixed: OOM killer braindamage (RMAP) +o fixed: VesaFB and highmem where screen stays black +o update: AIO-18 +o update: Super FreeS/WAN v1.99.7 Final (including Dead Pear Detection) +o removed: Network checksumming speed runtime detection + does not give any real advantage, causes bugs on recent Athlons + + Changelog from v4.0s-rc8 -> v4.0s-final --------------------------------------- o update: rmap VM v15i diff -Naurp linux-2.4.20-wolk4.0s/WOLK-README linux-2.4.20-wolk4.1-fullkernel/WOLK-README --- linux-2.4.20-wolk4.0s/WOLK-README 2003-05-15 21:52:19.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/WOLK-README 2003-05-19 12:52:51.000000000 +0200 @@ -1,4 +1,4 @@ -Kernel - patched - WOLK v4.0s - Base: Linux kernel 2.4.20 +Kernel - patched - WOLK v4.1s - Base: Linux kernel 2.4.20 located at http://sf.net/projects/wolk by Marc-Christian Petersen -------------------------------------------------------------------------- @@ -42,7 +42,7 @@ Some of the features: --------------------- O(1) Scheduler, RMAP VM, GRsecurity, Crypto, XFS, KDB, Preempt, Systrace, Super FreeS/WAN, Trustees, IPVS, i2c/lmsensors, TUX, EVMS, BadMEM, ftpfs, -HostAP ... and many more. +HostAP, all known security fixes, all known filesystem fixes, and many more. Credits go to all the people who created the patches, working hard on diff -Naurp linux-2.4.20-wolk4.0s/arch/all/Config-TWEAKS.in linux-2.4.20-wolk4.1-fullkernel/arch/all/Config-TWEAKS.in --- linux-2.4.20-wolk4.0s/arch/all/Config-TWEAKS.in 2003-05-15 21:52:19.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/all/Config-TWEAKS.in 2003-05-19 13:05:09.000000000 +0200 @@ -16,7 +16,7 @@ fi # Low Latency / Low Latency Elevator comment 'Low Latency fixes are enabled' -bool 'Low Latency Elevator - block atomic Edition' CONFIG_BLK_DEV_ELEVATOR_LOWLAT +bool 'Low Latency Elevator - Read Latency v2 Edition' CONFIG_BLK_DEV_ELEVATOR_LOWLAT # Preempt diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/kernel/ioport.c linux-2.4.20-wolk4.1-fullkernel/arch/i386/kernel/ioport.c --- linux-2.4.20-wolk4.0s/arch/i386/kernel/ioport.c 2003-05-15 21:52:19.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/kernel/ioport.c 2003-05-17 12:55:41.000000000 +0200 @@ -113,17 +113,18 @@ asmlinkage int sys_ioperm(unsigned long */ memset(t->io_bitmap,0xff,(IO_BITMAP_SIZE+1)*4); t->ioperm = 1; - /* - * this activates it in the TSS - */ - tss->bitmap = IO_BITMAP_OFFSET; } /* * do it in the per-thread copy and in the TSS ... */ set_bitmap(t->io_bitmap, from, num, !turn_on); - set_bitmap(tss->io_bitmap, from, num, !turn_on); + if (tss->bitmap == IO_BITMAP_OFFSET) { /* already active? */ + set_bitmap(tss->io_bitmap, from, num, !turn_on); + } else { + memcpy(tss->io_bitmap, t->io_bitmap, IO_BITMAP_SIZE); + tss->bitmap = IO_BITMAP_OFFSET; /* Activate it in the TSS */ + } preempt_enable(); return 0; diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/Makefile linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/Makefile --- linux-2.4.20-wolk4.0s/arch/i386/lib/Makefile 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/Makefile 2003-05-19 08:37:47.000000000 +0200 @@ -7,16 +7,9 @@ L_TARGET = lib.a -obj-y = old-checksum.o delay.o \ +obj-y = checksum.o old-checksum.o delay.o \ usercopy.o getuser.o \ - memcpy.o strstr.o \ - bench_csum.o \ - csum.o \ - csum_basic.o \ - csum_naive.o \ - csum_3dnow.o \ - csum_ssemmxplus.o \ - csumcpy.o + memcpy.o strstr.o obj-$(CONFIG_X86_USE_3DNOW) += mmx.o obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/bench_csum.c linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/bench_csum.c --- linux-2.4.20-wolk4.0s/arch/i386/lib/bench_csum.c 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/bench_csum.c 1970-01-01 01:00:00.000000000 +0100 @@ -1,217 +0,0 @@ -#include // for get_pages -#include // for access_ok in asm/checksum.h -#include // for in6_addr in asm/checksum.h -#include // for ntoh in asm/checksum.h -#include // for X86_FEATURE_xx -#include // for ntohX in asm/checksum.h -#include // for NULL in asm/checksum.h -#include // for asmlinkage in asm/checksum.h -#include - -//#include "bench_csum.h" -#include -#include - -//#define dprintk(a...) printk(a) -#define dprintk(a...) ((void)0) - -/* Features usable for mem optimization: - Intel -X86_FEATURE_FPU Onboard FPU -X86_FEATURE_MMX Multimedia Extensions -X86_FEATURE_XMM Streaming SIMD Extensions -X86_FEATURE_XMM2 Streaming SIMD Extensions-2 - AMD -X86_FEATURE_3DNOW 3DNow! -X86_FEATURE_MMXEXT AMD MMX extensions -X86_FEATURE_3DNOWEXT AMD 3DNow! extensions - Cyrix -X86_FEATURE_CXMMX Cyrix MMX extensions -*/ - -typedef typeof(jiffies) jiffies_t; - -typedef void asm_helper(void); - -extern asm_helper csum_basic; -extern asm_helper csum_naive; -extern asm_helper csum_3dnow; -extern asm_helper csum_ssemmxplus; - -static struct candidate csum_runner[] = { - { "basic" , csum_basic , 1, { -1 } }, - { "simple" , csum_naive , 1, { -1 } }, - { "3Dnow!" , csum_3dnow , 1, { X86_FEATURE_3DNOW, -1 } }, - { "AMD MMX", csum_ssemmxplus, 1, { X86_FEATURE_MMXEXT, -1 } }, - { "SSE1+", csum_ssemmxplus, 1, { X86_FEATURE_XMM, -1 } }, -}; - -extern asm_helper csumcpy_basic; -extern asm_helper csumcpy_naive; -extern asm_helper csumcpy_ssemmxplus; -extern asm_helper csumcpy_sse; - -static struct candidate csumcpy_runner[] = { - { "basic" , csumcpy_basic , 2, { -1 } }, - { "simple" , csumcpy_naive , 2, { -1 } }, - /* higher weight: we prefer these for less cache pollution: */ - { "AMD MMX", csumcpy_ssemmxplus, 3, { X86_FEATURE_MMXEXT, -1 } }, - { "SSE1+", csumcpy_ssemmxplus, 3, { X86_FEATURE_XMM, -1 } }, - { "SSE1" , csumcpy_sse , 3, { X86_FEATURE_XMM, -1 } }, -}; - -//====== TODO: split here: above: arch, below:generic - -/* set this to value bigger than cache(s) */ -/* TODO: heuristic for buffer size */ -#define bufshift 20 /* 10=1kb, 20=1MB etc */ -/* typical size of a packet */ -#define chunksz (4*1024) - -#define bufsz (1<f); - - max = 0; - // In practice these are pretty repeatable - // so 3 runs is an overkill - for(i=0; i<3; i++) { - int count = 0; - jiffies_t limit; - wait_for_jiffy(); - limit = jiffies+duration; - while(time_before(jiffies, limit)) { - int i; - mb(); - // interleaved to avoid bias due to prefetch - for(i=0; imax) - max = count; - } - - if(report) { - int kb_sec = max * (((chunksz*chunkcnt)/1024) * HZ) / duration; - printk(" %-10s:%6d.%03d MB/sec\n", cand->name, - kb_sec / 1000, kb_sec % 1000); - } - - return max; -} - -static int -bench_csumcpy(struct candidate *cand, char *buf) -{ - int err; - int i, max; - best_csumcpy = (asm_helper*)(cand->f); - - max = 0; - for(i=0; i<3; i++) { - int count = 0; - jiffies_t limit; - wait_for_jiffy(); - limit = jiffies+duration; - while(time_before(jiffies, limit)) { - int i; - mb(); - // interleaved to avoid bias due to prefetch - for(i=0; imax) - max = count; - } - - if(report) { - int kb_sec = max * (((chunksz*chunkcnt)/1024) * HZ) / duration; - printk(" %-10s:%6d.%03d MB/sec\n", cand->name, - kb_sec / 1000, kb_sec % 1000); - } - - return max; -} - -static int -find_best_csum(void) -{ - struct candidate *best; - char *buffer = (char *) __get_free_pages(GFP_KERNEL, - (bufshift-PAGE_SHIFT)); - - printk(KERN_INFO "Measuring network checksumming speed\n"); - if(!buffer) { - printk("csum: cannot allocate %i pages\n", - 1<<(bufshift-PAGE_SHIFT) - ); - return -ENOMEM; - } - dprintk("allocated %i pages\n",1<<(bufshift-PAGE_SHIFT)); - - // find # of jiffies suitable for reliable results - // (at least %5 accuracy) - while(bench_csumcpy(&csumcpy_runner[0], buffer)<20) { - duration<<=1; - } - dprintk("test run will last %i ticks\n", duration); - report = 1; - - best = find_best(bench_csum, buffer, csum_runner, - VECTOR_SZ(csum_runner)); - printk("csum: using csum function: %s\n", best->name); - best_csum = (asm_helper*)(best->f); - - best = find_best(bench_csumcpy, buffer, csumcpy_runner, - VECTOR_SZ(csumcpy_runner)); - printk("csum: using csum_copy function: %s\n", best->name); - best_csumcpy = (asm_helper*)(best->f); - - free_pages((unsigned long)buffer, (bufshift-PAGE_SHIFT)); - dprintk("freed %i pages\n",1<<(bufshift-PAGE_SHIFT)); - return 0; -} - -MODULE_LICENSE("GPL"); - -module_init(find_best_csum); diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/checksum.S linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/checksum.S --- linux-2.4.20-wolk4.0s/arch/i386/lib/checksum.S 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/checksum.S 2003-05-19 08:37:47.000000000 +0200 @@ -0,0 +1,496 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IP/TCP/UDP checksumming routines + * + * Authors: Jorge Cwik, + * Arnt Gulbrandsen, + * Tom May, + * Pentium Pro/II routines: + * Alexander Kjeldaas + * Finn Arne Gangstad + * Lots of code moved from tcp.c and ip.c; see those files + * for more names. + * + * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception + * handling. + * Andi Kleen, add zeroing on error + * converted to pure assembler + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include + +/* + * computes a partial checksum, e.g. for TCP/UDP fragments + */ + +/* +unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) + */ + +.text +.align 4 +.globl csum_partial + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + + /* + * Experiments with Ethernet and SLIP connections show that buff + * is aligned on either a 2-byte or 4-byte boundary. We get at + * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. + * Fortunately, it is easy to convert 2-byte alignment to 4-byte + * alignment for the unrolled loop. + */ +csum_partial: + pushl %esi + pushl %ebx + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: unsigned char *buff + testl $3, %esi # Check alignment. + jz 2f # Jump if alignment is ok. + testl $1, %esi # Check alignment. + jz 10f # Jump if alignment is boundary of 2bytes. + + # buf is odd + dec %ecx + jl 8f + movzbl (%esi), %ebx + adcl %ebx, %eax + roll $8, %eax + inc %esi + testl $2, %esi + jz 2f +10: + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +1: movw (%esi), %bx + addl $2, %esi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, %edx + shrl $5, %ecx + jz 2f + testl %esi, %esi +1: movl (%esi), %ebx + adcl %ebx, %eax + movl 4(%esi), %ebx + adcl %ebx, %eax + movl 8(%esi), %ebx + adcl %ebx, %eax + movl 12(%esi), %ebx + adcl %ebx, %eax + movl 16(%esi), %ebx + adcl %ebx, %eax + movl 20(%esi), %ebx + adcl %ebx, %eax + movl 24(%esi), %ebx + adcl %ebx, %eax + movl 28(%esi), %ebx + adcl %ebx, %eax + lea 32(%esi), %esi + dec %ecx + jne 1b + adcl $0, %eax +2: movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +3: adcl (%esi), %eax + lea 4(%esi), %esi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f + movw (%esi),%cx + leal 2(%esi),%esi + je 6f + shll $16,%ecx +5: movb (%esi),%cl +6: addl %ecx,%eax + adcl $0, %eax +7: + testl $1, 12(%esp) + jz 8f + roll $8, %eax +8: + popl %ebx + popl %esi + ret + +#else + +/* Version for PentiumII/PPro */ + +csum_partial: + pushl %esi + pushl %ebx + movl 20(%esp),%eax # Function arg: unsigned int sum + movl 16(%esp),%ecx # Function arg: int len + movl 12(%esp),%esi # Function arg: const unsigned char *buf + + testl $3, %esi + jnz 25f +10: + movl %ecx, %edx + movl %ecx, %ebx + andl $0x7c, %ebx + shrl $7, %ecx + addl %ebx,%esi + shrl $2, %ebx + negl %ebx + lea 45f(%ebx,%ebx,2), %ebx + testl %esi, %esi + jmp *%ebx + + # Handle 2-byte-aligned regions +20: addw (%esi), %ax + lea 2(%esi), %esi + adcl $0, %eax + jmp 10b +25: + testl $1, %esi + jz 30f + # buf is odd + dec %ecx + jl 90f + movzbl (%esi), %ebx + addl %ebx, %eax + adcl $0, %eax + roll $8, %eax + inc %esi + testl $2, %esi + jz 10b + +30: subl $2, %ecx + ja 20b + je 32f + addl $2, %ecx + jz 80f + movzbl (%esi),%ebx # csumming 1 byte, 2-aligned + addl %ebx, %eax + adcl $0, %eax + jmp 80f +32: + addw (%esi), %ax # csumming 2 bytes, 2-aligned + adcl $0, %eax + jmp 80f + +40: + addl -128(%esi), %eax + adcl -124(%esi), %eax + adcl -120(%esi), %eax + adcl -116(%esi), %eax + adcl -112(%esi), %eax + adcl -108(%esi), %eax + adcl -104(%esi), %eax + adcl -100(%esi), %eax + adcl -96(%esi), %eax + adcl -92(%esi), %eax + adcl -88(%esi), %eax + adcl -84(%esi), %eax + adcl -80(%esi), %eax + adcl -76(%esi), %eax + adcl -72(%esi), %eax + adcl -68(%esi), %eax + adcl -64(%esi), %eax + adcl -60(%esi), %eax + adcl -56(%esi), %eax + adcl -52(%esi), %eax + adcl -48(%esi), %eax + adcl -44(%esi), %eax + adcl -40(%esi), %eax + adcl -36(%esi), %eax + adcl -32(%esi), %eax + adcl -28(%esi), %eax + adcl -24(%esi), %eax + adcl -20(%esi), %eax + adcl -16(%esi), %eax + adcl -12(%esi), %eax + adcl -8(%esi), %eax + adcl -4(%esi), %eax +45: + lea 128(%esi), %esi + adcl $0, %eax + dec %ecx + jge 40b + movl %edx, %ecx +50: andl $3, %ecx + jz 80f + + # Handle the last 1-3 bytes without jumping + notl %ecx # 1->2, 2->1, 3->0, higher bits are masked + movl $0xffffff,%ebx # by the shll and shrl instructions + shll $3,%ecx + shrl %cl,%ebx + andl -128(%esi),%ebx # esi is 4-aligned so should be ok + addl %ebx,%eax + adcl $0,%eax +80: + testl $1, 12(%esp) + jz 90f + roll $8, %eax +90: + popl %ebx + popl %esi + ret + +#endif + +/* +unsigned int csum_partial_copy_generic (const char *src, char *dst, + int len, int sum, int *src_err_ptr, int *dst_err_ptr) + */ + +/* + * Copy from ds while checksumming, otherwise like csum_partial + * + * The macros SRC and DST specify the type of access for the instruction. + * thus we can call a custom exception handler for all access types. + * + * FIXME: could someone double-check whether I haven't mixed up some SRC and + * DST definitions? It's damn hard to trigger all cases. I hope I got + * them all but there's no guarantee. + */ + +#define SRC(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6001f ; \ + .previous + +#define DST(y...) \ + 9999: y; \ + .section __ex_table, "a"; \ + .long 9999b, 6002f ; \ + .previous + +.align 4 +.globl csum_partial_copy_generic + +#ifndef CONFIG_X86_USE_PPRO_CHECKSUM + +#define ARGBASE 16 +#define FP 12 + +csum_partial_copy_generic: + subl $4,%esp + pushl %edi + pushl %esi + pushl %ebx + movl ARGBASE+16(%esp),%eax # sum + movl ARGBASE+12(%esp),%ecx # len + movl ARGBASE+4(%esp),%esi # src + movl ARGBASE+8(%esp),%edi # dst + + testl $2, %edi # Check alignment. + jz 2f # Jump if alignment is ok. + subl $2, %ecx # Alignment uses up two bytes. + jae 1f # Jump if we had at least two bytes. + addl $2, %ecx # ecx was < 2. Deal with it. + jmp 4f +SRC(1: movw (%esi), %bx ) + addl $2, %esi +DST( movw %bx, (%edi) ) + addl $2, %edi + addw %bx, %ax + adcl $0, %eax +2: + movl %ecx, FP(%esp) + shrl $5, %ecx + jz 2f + testl %esi, %esi +SRC(1: movl (%esi), %ebx ) +SRC( movl 4(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + adcl %edx, %eax +DST( movl %edx, 4(%edi) ) + +SRC( movl 8(%esi), %ebx ) +SRC( movl 12(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 8(%edi) ) + adcl %edx, %eax +DST( movl %edx, 12(%edi) ) + +SRC( movl 16(%esi), %ebx ) +SRC( movl 20(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 16(%edi) ) + adcl %edx, %eax +DST( movl %edx, 20(%edi) ) + +SRC( movl 24(%esi), %ebx ) +SRC( movl 28(%esi), %edx ) + adcl %ebx, %eax +DST( movl %ebx, 24(%edi) ) + adcl %edx, %eax +DST( movl %edx, 28(%edi) ) + + lea 32(%esi), %esi + lea 32(%edi), %edi + dec %ecx + jne 1b + adcl $0, %eax +2: movl FP(%esp), %edx + movl %edx, %ecx + andl $0x1c, %edx + je 4f + shrl $2, %edx # This clears CF +SRC(3: movl (%esi), %ebx ) + adcl %ebx, %eax +DST( movl %ebx, (%edi) ) + lea 4(%esi), %esi + lea 4(%edi), %edi + dec %edx + jne 3b + adcl $0, %eax +4: andl $3, %ecx + jz 7f + cmpl $2, %ecx + jb 5f +SRC( movw (%esi), %cx ) + leal 2(%esi), %esi +DST( movw %cx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%ecx +SRC(5: movb (%esi), %cl ) +DST( movb %cl, (%edi) ) +6: addl %ecx, %eax + adcl $0, %eax +7: +5000: + +# Exception handler: +.section .fixup, "ax" + +6001: + movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + + # zero the complete destination - computing the rest + # is too much work + movl ARGBASE+8(%esp), %edi # dst + movl ARGBASE+12(%esp), %ecx # len + xorl %eax,%eax + rep ; stosb + + jmp 5000b + +6002: + movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT,(%ebx) + jmp 5000b + +.previous + + popl %ebx + popl %esi + popl %edi + popl %ecx # equivalent to addl $4,%esp + ret + +#else + +/* Version for PentiumII/PPro */ + +#define ROUND1(x) \ + SRC(movl x(%esi), %ebx ) ; \ + addl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ROUND(x) \ + SRC(movl x(%esi), %ebx ) ; \ + adcl %ebx, %eax ; \ + DST(movl %ebx, x(%edi) ) ; + +#define ARGBASE 12 + +csum_partial_copy_generic: + pushl %ebx + pushl %edi + pushl %esi + movl ARGBASE+4(%esp),%esi #src + movl ARGBASE+8(%esp),%edi #dst + movl ARGBASE+12(%esp),%ecx #len + movl ARGBASE+16(%esp),%eax #sum +# movl %ecx, %edx + movl %ecx, %ebx + movl %esi, %edx + shrl $6, %ecx + andl $0x3c, %ebx + negl %ebx + subl %ebx, %esi + subl %ebx, %edi + lea -1(%esi),%edx + andl $-32,%edx + lea 3f(%ebx,%ebx), %ebx + testl %esi, %esi + jmp *%ebx +1: addl $64,%esi + addl $64,%edi + SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) + ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) + ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) + ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) + ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) +3: adcl $0,%eax + addl $64, %edx + dec %ecx + jge 1b +4: movl ARGBASE+12(%esp),%edx #len + andl $3, %edx + jz 7f + cmpl $2, %edx + jb 5f +SRC( movw (%esi), %dx ) + leal 2(%esi), %esi +DST( movw %dx, (%edi) ) + leal 2(%edi), %edi + je 6f + shll $16,%edx +5: +SRC( movb (%esi), %dl ) +DST( movb %dl, (%edi) ) +6: addl %edx, %eax + adcl $0, %eax +7: +.section .fixup, "ax" +6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr + movl $-EFAULT, (%ebx) + # zero the complete destination (computing the rest is too much work) + movl ARGBASE+8(%esp),%edi # dst + movl ARGBASE+12(%esp),%ecx # len + xorl %eax,%eax + rep; stosb + jmp 7b +6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr + movl $-EFAULT, (%ebx) + jmp 7b +.previous + + popl %esi + popl %edi + popl %ebx + ret + +#undef ROUND +#undef ROUND1 + +#endif diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/csum.S linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csum.S --- linux-2.4.20-wolk4.0s/arch/i386/lib/csum.S 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csum.S 1970-01-01 01:00:00.000000000 +0100 @@ -1,97 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * IP/TCP/UDP checksumming routines - * - * Authors: Jorge Cwik, - * Arnt Gulbrandsen, - * Tom May, - * Pentium Pro/II routines: - * Alexander Kjeldaas - * Finn Arne Gangstad - * Lots of code moved from tcp.c and ip.c; see those files - * for more names. - * - * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception - * handling. - * Andi Kleen, add zeroing on error converted to pure assembler - * 2002-10-30 Denis Vlasenko - * boot-time benchmarking, 3Dnow/MMX+/SSE versions - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* -** computes a partial checksum, e.g. for TCP/UDP fragments -** -** unsigned int csum_partial(const unsigned char * buff, -** int len, unsigned int sum) -*/ - -.text -.align 4 -.globl csum_partial - -csum_partial: - pushl %esi - pushl %ebx - movl 20(%esp), %eax # arg: sum - movl 16(%esp), %ecx # arg: len - movl 12(%esp), %esi # arg: buf - - testl $3, %esi - jz 40f -20: - # not 4-aligned: analyze and align... - testl $1, %esi - jz 30f - - # unaligned start addr - decl %ecx - js 90f # sz==0, exit - movzbl (%esi), %ebx # eat one byte... - addl %ebx, %eax - adcl $0, %eax - roll $8, %eax # NB: need to be undone at exit! - incl %esi - testl $2, %esi - jz 40f -30: - # This is 2-aligned, but not 4-aligned - cmpl $3, %ecx - jbe 60f - addw (%esi), %ax # eat 2 bytes - leal 2(%esi), %esi - adcl $0, %eax - subl $2, %ecx -40: - # esi is 4-aligned here, call block routine - movl $csum_basic, %ebx # known ok even for ecx==0 etc - cmpl $128, %ecx # use optimized routine - jb 50f # only for large blocks - movl best_csum, %ebx -50: call *%ebx -60: - # handle the last 0-3 bytes without much jumping - jecxz 80f - notl %ecx # 0->3, 1->2, 2->1, 3->0, higher bits are masked - movl $0xffffff, %ebx # by the shll and shrl instructions - shll $3, %ecx - shrl %cl, %ebx - andl (%esi), %ebx # esi is 4-aligned so should be ok - addl %ebx, %eax - adcl $0, %eax -80: - # undo csum rotation if start addr was odd - testl $1, 12(%esp) - jz 90f - roll $8, %eax -90: - popl %ebx - popl %esi - ret diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/csum_3dnow.S linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csum_3dnow.S --- linux-2.4.20-wolk4.0s/arch/i386/lib/csum_3dnow.S 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csum_3dnow.S 1970-01-01 01:00:00.000000000 +0100 @@ -1,4 +0,0 @@ -#define PREFETCH(a) prefetch a -#define NAME csum_3dnow - -#include "csum_pf.inc" diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/csum_basic.S linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csum_basic.S --- linux-2.4.20-wolk4.0s/arch/i386/lib/csum_basic.S 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csum_basic.S 1970-01-01 01:00:00.000000000 +0100 @@ -1,63 +0,0 @@ -.text -.align 4 -.globl csum_basic - -/* Experiments with Ethernet and SLIP connections show that buff -** is aligned on either a 2-byte or 4-byte boundary. We get at -** least a twofold speedup on 486 and Pentium if it is 4-byte aligned. -** Fortunately, it is easy to convert 2-byte alignment to 4-byte -** alignment for the unrolled loop. -*/ -csum_basic: - movl %ecx, %ebx - movl %ecx, %edx - shrl $7, %ecx - andl $0x7c, %ebx - addl %ebx, %esi - shrl $2, %ebx - negl %ebx - leal 50f(%ebx,%ebx,2), %ebx - clc - jmp *%ebx -40: - leal 128(%esi), %esi - adcl -128(%esi), %eax - adcl -124(%esi), %eax - adcl -120(%esi), %eax - adcl -116(%esi), %eax - adcl -112(%esi), %eax - adcl -108(%esi), %eax - adcl -104(%esi), %eax - adcl -100(%esi), %eax - adcl -96(%esi), %eax - adcl -92(%esi), %eax - adcl -88(%esi), %eax - adcl -84(%esi), %eax - adcl -80(%esi), %eax - adcl -76(%esi), %eax - adcl -72(%esi), %eax - adcl -68(%esi), %eax - adcl -64(%esi), %eax - adcl -60(%esi), %eax - adcl -56(%esi), %eax - adcl -52(%esi), %eax - adcl -48(%esi), %eax - adcl -44(%esi), %eax - adcl -40(%esi), %eax - adcl -36(%esi), %eax - adcl -32(%esi), %eax - adcl -28(%esi), %eax - adcl -24(%esi), %eax - adcl -20(%esi), %eax - adcl -16(%esi), %eax - adcl -12(%esi), %eax - adcl -8(%esi), %eax - adcl -4(%esi), %eax -50: - decl %ecx - jge 40b - - adcl $0, %eax - movl %edx, %ecx - andl $3, %ecx - ret diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/csum_naive.S linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csum_naive.S --- linux-2.4.20-wolk4.0s/arch/i386/lib/csum_naive.S 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csum_naive.S 1970-01-01 01:00:00.000000000 +0100 @@ -1,17 +0,0 @@ -.text -.align 4 -.globl csum_naive - -csum_naive: - mov %ecx, %edx - shrl $2, %ecx - clc -1: - adcl (%esi), %eax - leal 4(%esi), %esi - loop 1b - - adcl $0, %eax - mov %edx, %ecx - andl $3, %ecx - ret diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/csum_pf.inc linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csum_pf.inc --- linux-2.4.20-wolk4.0s/arch/i386/lib/csum_pf.inc 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csum_pf.inc 1970-01-01 01:00:00.000000000 +0100 @@ -1,95 +0,0 @@ -//#define PREFETCH(a) prefetchnta a -//#define PREFETCH(a) prefetch a -//#define PREFETCH(a) - -// How much unrolling do you want? -//vda: 5 is best on Duron 650 -#define ITER_BITS 5 // ...5,6,7 - ...32,64,128 bytes - // NB: tweak unrolled loop too... -/* -** computes a partial checksum, e.g. for TCP/UDP fragments -** int csum_partial(const char *buff, int len, int sum) -*/ - -#define ITER_SZ (1<=16 -10: - PREFETCH((%esi)) # Prefetch _each_ cacheline - PREFETCH(32(%esi)) # Note! Athlons have 64 bytes long ones, but - PREFETCH(64(%esi)) # PIIIs only 32! This gives ~20% speedup - PREFETCH(64+32(%esi)) # for PIII - PREFETCH(128(%esi)) - PREFETCH(128+32(%esi)) - PREFETCH(192(%esi)) - PREFETCH(192+32(%esi)) - movl %ecx, %ebx - movl %ecx, %edx - andl $ITER_MSK, %ebx # = bytes to handle in first (partial) iteration - shrl $ITER_BITS, %ecx # = iterations to make - addl %ebx, %esi # => 1st byte to handle in 2nd complete iteration - shrl $2, %ebx # = dwords to handle - negl %ebx - lea 50f(%ebx,%ebx,2), %ebx # = 45f - 3*dwords_to_handle - clc - jmp *%ebx # here we go! - -40: - PREFETCH(256(%esi)) -41: - lea ITER_SZ(%esi), %esi # does NOT change CF! -/* - addl -128(%esi), %eax - adcl -124(%esi), %eax - adcl -120(%esi), %eax - adcl -116(%esi), %eax - adcl -112(%esi), %eax - adcl -108(%esi), %eax - adcl -104(%esi), %eax - adcl -100(%esi), %eax - adcl -96(%esi), %eax - adcl -92(%esi), %eax - adcl -88(%esi), %eax - adcl -84(%esi), %eax - adcl -80(%esi), %eax - adcl -76(%esi), %eax - adcl -72(%esi), %eax - adcl -68(%esi), %eax - adcl -64(%esi), %eax - adcl -60(%esi), %eax - adcl -56(%esi), %eax - adcl -52(%esi), %eax - adcl -48(%esi), %eax - adcl -44(%esi), %eax - adcl -40(%esi), %eax - adcl -36(%esi), %eax -*/ - addl -32(%esi), %eax - adcl -28(%esi), %eax - adcl -24(%esi), %eax - adcl -20(%esi), %eax - adcl -16(%esi), %eax - adcl -12(%esi), %eax - adcl -8(%esi), %eax - adcl -4(%esi), %eax -50: - adcl $0, %eax - dec %ecx # does NOT change CF! - # We can do just "jge 40b" here, but we can be a bit clever... - # This little twist gives surprisingly noticeable benefits! - # Seen 11% increase on random 1K blocks on Duron 650 - js 60f - cmp $256/ITER_SZ, %ecx - jae 40b # need prefetch - jmp 41b # do not need it -60: - movl %edx, %ecx - andl $3, %ecx - ret diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/csum_ssemmxplus.S linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csum_ssemmxplus.S --- linux-2.4.20-wolk4.0s/arch/i386/lib/csum_ssemmxplus.S 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csum_ssemmxplus.S 1970-01-01 01:00:00.000000000 +0100 @@ -1,4 +0,0 @@ -#define PREFETCH(a) prefetchnta a -#define NAME csum_ssemmxplus - -#include "csum_pf.inc" diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/csumcpy.S linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csumcpy.S --- linux-2.4.20-wolk4.0s/arch/i386/lib/csumcpy.S 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csumcpy.S 1970-01-01 01:00:00.000000000 +0100 @@ -1,178 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * IP/TCP/UDP checksumming routines - * - * Authors: Jorge Cwik, - * Arnt Gulbrandsen, - * Tom May, - * Pentium Pro/II routines: - * Alexander Kjeldaas - * Finn Arne Gangstad - * Lots of code moved from tcp.c and ip.c; see those files - * for more names. - * - * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception - * handling. - * Andi Kleen, add zeroing on error converted to pure assembler - * 2002-10-30 Denis Vlasenko - * boot-time benchmarking, 3Dnow/MMX+/SSE versions - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include - -/* -** computes a partial checksum, e.g. for TCP/UDP fragments -** -** unsigned int csum_partial(const unsigned char * buff, -** int len, unsigned int sum) -*/ - -#ifdef __KERNEL__ -#define K(a...) a -#else -#define K(a...) -#endif - -#define SRC(y...) \ -9999: y ;\ - .section __ex_table, "a";\ - .long 9999b, 6001f ;\ - .previous - -#define DST(y...) \ -9999: y ;\ - .section __ex_table, "a";\ - .long 9999b, 6002f ;\ - .previous - -#define KERNEL_FPU_BEGIN \ - call kernel_fpu_begin - -#define KERNEL_FPU_END(r) \ -K( movl %cr0, r ;)\ -K( orl $8, r ;)\ -K( movl r, %cr0 ;) - -.text - -#include "csumcpy_naive.inc" -#include "csumcpy_basic.inc" -#include "csumcpy_ssemmxplus.inc" -#include "csumcpy_sse.inc" - -.align 4 -.globl csum_partial_copy_generic - -csum_partial_copy_generic: - pushl %ebx - pushl %edi - pushl %esi - pushl %ebp - movl %esp, %ebp - -#define STK_DERR 40(%ebp) -#define STK_SERR 36(%ebp) -#define STK_SUM 32(%ebp) -#define STK_LEN 28(%ebp) -#define STK_DST 24(%ebp) -#define STK_SRC 20(%ebp) -#define STK_EIP 16(%ebp) -#define STK_EBX 12(%ebp) -#define STK_EDI 8(%ebp) -#define STK_ESI 4(%ebp) -#define STK_EBP (%ebp) - - movl STK_SRC, %esi #src - movl STK_DST, %edi #dst - movl STK_LEN, %ecx #len - movl STK_SUM, %eax #sum - - testl $3, %edi # Check dst alignment - jz 40f - - # not 4-aligned: analyze and align... - testl $1, %edi - jz 30f - - # unaligned start addr - decl %ecx - js 90f # sz==0, exit - movzbl (%esi), %ebx # eat one byte... - movb %bl, (%edi) - addl %ebx, %eax - adcl $0, %eax - roll $8, %eax # NB: need to be undone at exit! - incl %esi - incl %edi - testl $2, %edi - jz 40f -30: - # This is 2-aligned, but not 4-aligned - cmpl $3, %ecx - jbe 60f - movw (%esi), %bx # eat 2 bytes - addw %bx, %ax - movw %bx, (%edi) - adcl $0, %eax - leal 2(%esi), %esi - leal 2(%edi), %edi - subl $2, %ecx -40: - # edi is 4-aligned now: call block routine - movl $csumcpy_basic, %ebx # 'default', known good for ecx==0 etc - cmpl $128, %ecx # use optimized routine - jb 50f # only for large blocks - movl best_csumcpy, %ebx -50: call *%ebx -60: - # handle last 0-3 bytes - jecxz 80f - cmpl $2, %ecx - jb 70f -SRC( movw (%esi), %cx ) - leal 2(%esi), %esi -DST( movw %cx, (%edi) ) - leal 2(%edi), %edi - je 75f - shll $16, %ecx -70: -SRC( movb (%esi), %cl ) -DST( movb %cl, (%edi) ) -75: addl %ecx, %eax - adcl $0, %eax -80: - # undo csum rotation if dst was unaligned - testl $1, STK_DST - jz 90f - roll $8, %eax -90: - movl %esp, %ebp - popl %ebp - popl %esi - popl %edi - popl %ebx - ret - - -.section .fixup, "ax" -6001: movl STK_SERR, %ebx # src_err_ptr - movl $-EFAULT, (%ebx) - # zero the complete destination (computing the rest is too much work) - movl STK_DST, %edi # dst - movl STK_LEN, %ecx # len - xorl %eax, %eax - cld - rep; stosb - jmp 90b -6002: movl STK_DERR, %ebx # dst_err_ptr - movl $-EFAULT, (%ebx) - jmp 90b -.previous diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/csumcpy_basic.inc linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csumcpy_basic.inc --- linux-2.4.20-wolk4.0s/arch/i386/lib/csumcpy_basic.inc 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csumcpy_basic.inc 1970-01-01 01:00:00.000000000 +0100 @@ -1,40 +0,0 @@ -// Please somebody experiment with unroll length -// on a PII. Do _not_ optimize for PIII/Athlons/etc, -// they won't typically use this... - -.align 4 -.globl csumcpy_basic - -csumcpy_basic: - movl %ecx, %ebx - movl %ecx, %edx - shrl $6, %ecx - andl $0x3c, %ebx - negl %ebx - subl %ebx, %esi - subl %ebx, %edi - leal 50f(%ebx,%ebx), %ebx - clc - jmp *%ebx -40: - leal 64(%esi), %esi - leal 64(%edi), %edi - -#undef ROUND -#define ROUND(x) \ -SRC( movl x(%esi), %ebx ); \ - adcl %ebx, %eax ; \ -DST( movl %ebx, x(%edi) ); - - ROUND(-64) ROUND(-60) ROUND(-56) ROUND(-52) - ROUND(-48) ROUND(-44) ROUND(-40) ROUND(-36) - ROUND(-32) ROUND(-28) ROUND(-24) ROUND(-20) - ROUND(-16) ROUND(-12) ROUND(-8) ROUND(-4) -50: - decl %ecx - jge 40b - - adcl $0, %eax - movl %edx, %ecx - andl $3, %ecx - ret diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/csumcpy_naive.inc linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csumcpy_naive.inc --- linux-2.4.20-wolk4.0s/arch/i386/lib/csumcpy_naive.inc 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csumcpy_naive.inc 1970-01-01 01:00:00.000000000 +0100 @@ -1,21 +0,0 @@ -// Heh... at least it's small ;) - -.align 4 -.globl csumcpy_naive - -csumcpy_naive: - mov %ecx, %edx - shrl $2, %ecx - clc -1: -SRC( movl (%esi), %ebx ) -DST( movl %ebx, (%edi) ) - adcl %ebx, %eax - leal 4(%esi), %esi - leal 4(%edi), %edi - loop 1b - - adcl $0, %eax - mov %edx, %ecx - and $3, %ecx - ret diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/csumcpy_sse.inc linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csumcpy_sse.inc --- linux-2.4.20-wolk4.0s/arch/i386/lib/csumcpy_sse.inc 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csumcpy_sse.inc 1970-01-01 01:00:00.000000000 +0100 @@ -1,147 +0,0 @@ -// Huge routine, I don't like it's size and number -// of fixups... think of that when you want -// to unroll loop more -// TODO: benchmark and reduce size -// I won't stand 1K behemot just for 5% speedup - -#undef PREFETCH -#define PREFETCH(a) prefetchnta a - -// How much unrolling do you want? -// vda: celeron 1200: 5 with movaps, 4 with movups -#undef ITER_BITS -#define ITER_BITS 6 // ...4,5,6,7 - ...16,32,64,128 bytes - // NB: tweak unrolled loop too... - -#undef ITER_SZ -#undef ITER_MSK -#define ITER_SZ (1<6 don't mix - adcl $0, %eax - jmp 19f -15: # esi is 16-aligned - PREFETCH(256(%esi)) - ROUND0(a,%xmm0) - ROUND(a,16,%xmm1) - PREFETCH(256+32(%esi)) - ROUND(a,32,%xmm0) - ROUND(a,48,%xmm1) - lea ITER_SZ(%esi), %esi - lea ITER_SZ(%edi), %edi - //dec %ecx - //jnz 15b - loop 15b // Beware: loop and ITER_BITS>6 don't mix - adcl $0, %eax -19: - sfence # clean up XMM - //KERNEL_FPU_END(%ebx) - movups (%esp), %xmm0 - movups 16(%esp), %xmm1 - addl $32, %esp -K( movl %ebx, %cr0 ) - -20: - # loop for dwords - movl %edx, %ecx - andl $ITER_MSK, %edx - jz 40f - shrl $2, %edx # this also clears CF -30: -SRC( movl (%esi), %ebx ) - adcl %ebx, %eax -DST( movl %ebx, (%edi) ) - lea 4(%esi), %esi - lea 4(%edi), %edi - dec %edx - jnz 30b - adcl $0, %eax -40: - # last 1, 2 or 3 bytes: handled by caller - andl $3, %ecx - ret - - -# This is 16-align edi and get back -5500: cmp $ITER_SZ, %ecx # edi is 4-aligned here - mov %ecx, %edx # edx needed at 20: - jb 20b # not worthy: too short - -5520: test $0xe, %edi # loop until we are 16-aligned - jz 1b -SRC( movl (%esi), %ebx ) - addl $4, %esi -DST( movl %ebx, (%edi) ) - addl $4, %edi - addl %ebx, %eax - adcl $0, %eax - subl $4, %ecx - jmp 5520b diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/lib/csumcpy_ssemmxplus.inc linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csumcpy_ssemmxplus.inc --- linux-2.4.20-wolk4.0s/arch/i386/lib/csumcpy_ssemmxplus.inc 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/lib/csumcpy_ssemmxplus.inc 1970-01-01 01:00:00.000000000 +0100 @@ -1,103 +0,0 @@ -#undef PREFETCH -#define PREFETCH(a) prefetchnta a - -// How much unrolling do you want? -#undef ITER_BITS -#define ITER_BITS 5 // ...5,6,7 - ...32,64,128 bytes - // NB: tweak unrolled loop too... - -#undef ITER_SZ -#undef ITER_MSK -#define ITER_SZ (1<5 don't mix - adcl $0, %eax - - sfence - //KERNEL_FPU_END(%ebx) - frstor (%esp) - addl $108, %esp -K( movl %ebx, %cr0 ) - -20: - # loop for dwords - movl %edx, %ecx - andl $ITER_MSK, %edx - jz 40f - shrl $2, %edx # this also clears CF -30: -SRC( movl (%esi), %ebx ) - adcl %ebx, %eax -DST( movl %ebx, (%edi) ) - lea 4(%esi), %esi - lea 4(%edi), %edi - dec %edx - jnz 30b - adcl $0, %eax - -40: andl $3, %ecx - ret diff -Naurp linux-2.4.20-wolk4.0s/arch/i386/vmlinux.lds.S linux-2.4.20-wolk4.1-fullkernel/arch/i386/vmlinux.lds.S --- linux-2.4.20-wolk4.0s/arch/i386/vmlinux.lds.S 2003-05-15 21:52:20.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/arch/i386/vmlinux.lds.S 2003-05-16 13:45:27.000000000 +0200 @@ -18,15 +18,6 @@ SECTIONS SHORT(__KERNEL_CS) } - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; - - __start___ksymtab = .; /* Kernel symbol table */ - __ksymtab : { *(__ksymtab) } - __stop___ksymtab = .; - __start___kallsyms = .; /* All kernel symbols */ __kallsyms : { *(__kallsyms) } __stop___kallsyms = .; @@ -39,13 +30,13 @@ SECTIONS . = ALIGN(32); .data.cacheline_aligned : { *(.data.cacheline_aligned) } + . = ALIGN(8192); .data.init_task : { - . = ALIGN(8192); *(.data.init_task) } + . = ALIGN(4096); .data.page_aligned : { - . = ALIGN(4096); *(.data.swapper_pg_dir) *(.data.pg0) *(.data.pg1) @@ -84,23 +75,18 @@ SECTIONS . = ALIGN(4*1024*1024) - 1; BYTE(0) } - . += __KERNEL_TEXT_OFFSET; -#else - .text.init : { *(.text.init) } - . = ALIGN(4096); -#endif - - __init_end = .; - + __init_end = . + __KERNEL_TEXT_OFFSET; -#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC /* * PaX: this must be kept in synch with the KERNEL_CS base * in the GDTs in arch/i386/kernel/head.S */ - _text = . - __KERNEL_TEXT_OFFSET; /* Text and read-only data */ - .text (. - __KERNEL_TEXT_OFFSET) : AT (_text + __KERNEL_TEXT_OFFSET) { + _text = .; /* Text and read-only data */ + .text : AT (. + __KERNEL_TEXT_OFFSET) { #else + .text.init : { *(.text.init) } + . = ALIGN(4096); + __init_end = .; _text = .; /* Text and read-only data */ .text : { #endif @@ -117,13 +103,23 @@ SECTIONS . += __KERNEL_TEXT_OFFSET; #endif + .rodata.page_aligned : { *(.data.idt) } .rodata : { *(.rodata) *(.rodata.*) } - .rodata.page_aligned : { - . = ALIGN(4096); - *(.data.idt) - } .kstrtab : { *(.kstrtab) } + . = ALIGN(16); /* Exception table */ + __start___ex_table = .; + __ex_table : { *(__ex_table) } + __stop___ex_table = .; + + __start___ksymtab = .; /* Kernel symbol table */ + __ksymtab : { *(__ksymtab) } + __stop___ksymtab = .; + + __start___kallsyms = .; /* All kernel symbols */ + __kallsyms : { *(__kallsyms) } + __stop___kallsyms = .; + #ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC _end = ALIGN(4*1024*1024); #else diff -Naurp linux-2.4.20-wolk4.0s/drivers/block/cloop.c linux-2.4.20-wolk4.1-fullkernel/drivers/block/cloop.c --- linux-2.4.20-wolk4.0s/drivers/block/cloop.c 2003-05-15 21:52:23.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/drivers/block/cloop.c 2003-05-16 13:54:58.000000000 +0200 @@ -73,6 +73,7 @@ #include #include +#include #include #include #include @@ -130,6 +131,7 @@ MODULE_LICENSE("GPL"); static char *file=NULL; MODULE_PARM(file, "s"); +#define HAS_BUILTIN_ZLIB CONFIG_ZLIB_INFLATE || CONFIG_ZLIB_INFLATE_MODULE struct cloop_device { /* Copied straight from the file */ @@ -144,7 +146,7 @@ struct cloop_device void *buffer; void *compressed_buffer; -#ifndef CONFIG_ZLIB_INFLATE +#ifndef HAS_BUILTIN_ZLIB /* Preallocated scratch area for zlib, saves a lot of kmalloc()s */ void *zlib_used; void *zlib_scratch; @@ -170,7 +172,7 @@ static const int max_cloop = 1; static devfs_handle_t devfs_handle; /* For the directory */ #endif -#ifndef CONFIG_ZLIB_INFLATE +#ifndef HAS_BUILTIN_ZLIB /* Use zlib uncompress */ extern int uncompress(char *dest, unsigned long *destLen, const char *source, unsigned long sourceLen); @@ -180,8 +182,8 @@ void free(void *p) /* Memory pointer is reset after uncompress(); */ } -void *calloc(size_t nmemb, size_t size) -{ +void *calloc(size_t nmemb, size_t size) { + /* Rusty was right, preallocating gives better performance. */ /* return(kmalloc(nmemb*size, GFP_KERNEL)); */ void *ret = cloop_dev.zlib_used; @@ -194,6 +196,7 @@ void *calloc(size_t nmemb, size_t size) } return ret; } + #else /* Use zlib_inflate from lib/zlib_inflate */ #include @@ -409,7 +412,7 @@ static int load_buffer(int blocknum) /* Do decompression into real buffer. */ buflen = ntohl(cloop_dev.head.block_size); -#ifndef CONFIG_ZLIB_INFLATE +#ifndef HAS_BUILTIN_ZLIB /* Reset zlib usage pool */ cloop_dev.zlib_used = cloop_dev.zlib_scratch; #endif @@ -736,7 +739,7 @@ static int init_loopback(void) cloop_name, largest_block); goto error_release_free_buffer; } -#ifndef CONFIG_ZLIB_INFLATE +#ifndef HAS_BUILTIN_ZLIB /* largest_block / 10 + 48000 seems to be sufficient for zlib working area */ cloop_dev.zlib_size=largest_block/10+ZLIB_NEEDS; cloop_dev.zlib_scratch = kmalloc(cloop_dev.zlib_size, GFP_KERNEL); @@ -764,7 +767,7 @@ static int init_loopback(void) cloop_name, ntohl(cloop_dev.offsets[ntohl(cloop_dev.head.num_blocks)]), (unsigned long)inode->i_size); -#ifndef CONFIG_ZLIB_INFLATE +#ifndef HAS_BUILTIN_ZLIB kfree(cloop_dev.zlib_scratch); #else vfree(zstream.workspace); zstream.workspace=NULL; @@ -908,7 +911,7 @@ void cleanup_module(void) kfree(cloop_dev.offsets); kfree(cloop_dev.buffer); kfree(cloop_dev.compressed_buffer); -#ifndef CONFIG_ZLIB_INFLATE +#ifndef HAS_BUILTIN_ZLIB kfree(cloop_dev.zlib_scratch); #ifdef DEBUGMEM printk("%s: Maximum zlib_scratch usage was %lu bytes.\n", diff -Naurp linux-2.4.20-wolk4.0s/drivers/block/elevator.c linux-2.4.20-wolk4.1-fullkernel/drivers/block/elevator.c --- linux-2.4.20-wolk4.0s/drivers/block/elevator.c 2003-05-15 21:52:23.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/drivers/block/elevator.c 2003-05-16 14:38:42.000000000 +0200 @@ -87,8 +87,8 @@ static int rq_mergeable(struct request * return 0; if (req->nr_sectors + count > max_sectors) return 0; - if (bh_elv_seq(bh) != bh_elv_seq(req->bh)) - return 0; +// if (bh_elv_seq(bh) != bh_elv_seq(req->bh)) +// return 0; return 1; } @@ -98,16 +98,15 @@ int elevator_linus_merge(request_queue_t struct buffer_head *bh, int rw, int max_sectors) { - struct list_head *entry, *real_head; - unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE; + struct list_head *entry; + unsigned int count = bh->b_size >> 9; + unsigned int ret = ELEVATOR_NO_MERGE; + const int max_bomb_segments = q->elevator.max_bomb_segments; struct request *__rq; int backmerge_only = 0; + int passed_a_read = 0; - if (!bh_elv_seq(bh)) - entry = &q->queue_head; - else - entry = &q->atomic_head; - real_head = entry; + entry = &q->queue_head; /* * check last merge hint @@ -131,32 +130,27 @@ int elevator_linus_merge(request_queue_t while (!backmerge_only && (entry = entry->prev) != head) { __rq = blkdev_entry_to_request(entry); - /* - * we can't insert beyond a zero sequence point - */ - if (__rq->elevator_sequence <= 0 && !bh_elv_seq(bh)) + if (__rq->elevator_sequence-- <= 0) { /* * OK, we've exceeded someone's latency limit. * But we still continue to look for merges, * because they're so much better than seeks. */ backmerge_only = 1; + } if (__rq->waiting) continue; if (__rq->rq_dev != bh->b_rdev) continue; - if (!*req && bh_rq_in_between(bh, __rq, real_head) && !backmerge_only) + if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head) && !backmerge_only) *req = __rq; + if (__rq->cmd != WRITE) + passed_a_read = 1; if (__rq->cmd != rw) continue; if (__rq->nr_sectors + count > max_sectors) continue; - /* - * possibly move this inside the merge path and make it a break - */ - if (bh_elv_seq(bh) != bh_elv_seq(__rq->bh)) - continue; if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { /* * Really here we could re-increase the elevator_latency of __rq, @@ -184,12 +178,63 @@ out: int scan_cost = ret ? 1 : ELV_LINUS_SEEK_COST; struct list_head *entry = &(*req)->queue; - while ((entry = entry->next) != real_head) { + while ((entry = entry->next) != &q->queue_head) { __rq = blkdev_entry_to_request(entry); __rq->elevator_sequence -= scan_cost; } } + /* + * If we failed to merge a read anywhere in the request + * queue, we really don't want to place it at the end + * of the list, behind lots of writes. So place it near + * the front. + * + * We don't want to place it in front of _all_ writes: that + * would create lots of seeking, and isn't tunable. + * We try to avoid promoting this read in front of existing + * reads. + * + * max_bomb_segments becomes the maximum number of write + * requests which we allow to remain in place in front of + * a newly introduced read. We weight things a little bit, + * so large writes are more expensive than small ones, but it's + * requests which count, not sectors. + */ + if (max_bomb_segments && rw == READ && !passed_a_read && + ret == ELEVATOR_NO_MERGE) { + int cur_latency = 0; + struct request * const cur_request = *req; + + entry = head->next; + while (entry != &q->queue_head) { + struct request *__rq; + + if (entry == &q->queue_head) + BUG(); + if (entry == q->queue_head.next && + q->head_active && !q->plugged) + BUG(); + __rq = blkdev_entry_to_request(entry); + + if (__rq == cur_request) { + /* + * This is where the old algorithm placed it. + * There's no point pushing it further back, + * so leave it here, in sorted order. + */ + break; + } + if (__rq->cmd == WRITE) { + cur_latency += 1 + __rq->nr_sectors / 64; + if (cur_latency >= max_bomb_segments) { + *req = __rq; + break; + } + } + entry = entry->next; + } + } return ret; } @@ -209,18 +254,13 @@ int elevator_noop_merge(request_queue_t struct buffer_head *bh, int rw, int max_sectors) { - struct list_head *entry, *real_head; + struct list_head *entry; unsigned int count = bh->b_size >> 9; - if (!bh_elv_seq(bh)) - entry = &q->queue_head; - else - entry = &q->atomic_head; - real_head = entry; - - if (list_empty(real_head)) + if (list_empty(&q->queue_head)) return ELEVATOR_NO_MERGE; + entry = &q->queue_head; while ((entry = entry->prev) != head) { struct request *__rq = blkdev_entry_to_request(entry); @@ -232,11 +272,6 @@ int elevator_noop_merge(request_queue_t continue; if (__rq->waiting) continue; - /* - * possibly move this inside the merge path and make it a break - */ - if (bh_elv_seq(bh) != bh_elv_seq(__rq->bh)) - continue; if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { *req = __rq; return ELEVATOR_BACK_MERGE; @@ -246,7 +281,7 @@ int elevator_noop_merge(request_queue_t } } - *req = blkdev_entry_to_request(real_head->prev); + *req = blkdev_entry_to_request(q->queue_head.prev); return ELEVATOR_NO_MERGE; } @@ -259,7 +294,7 @@ int blkelvget_ioctl(elevator_t * elevato output.queue_ID = elevator->queue_ID; output.read_latency = elevator->read_latency; output.write_latency = elevator->write_latency; - output.max_bomb_segments = 0; + output.max_bomb_segments = elevator->max_bomb_segments; if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t))) return -EFAULT; @@ -278,9 +313,12 @@ int blkelvset_ioctl(elevator_t * elevato return -EINVAL; if (input.write_latency < 0) return -EINVAL; + if (input.max_bomb_segments < 0) + return -EINVAL; elevator->read_latency = input.read_latency; elevator->write_latency = input.write_latency; + elevator->max_bomb_segments = input.max_bomb_segments; return 0; } diff -Naurp linux-2.4.20-wolk4.0s/drivers/block/ll_rw_blk.c linux-2.4.20-wolk4.1-fullkernel/drivers/block/ll_rw_blk.c --- linux-2.4.20-wolk4.0s/drivers/block/ll_rw_blk.c 2003-05-15 21:52:23.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/drivers/block/ll_rw_blk.c 2003-05-17 23:30:17.000000000 +0200 @@ -51,8 +51,6 @@ static kmem_cache_t *request_cachep; */ DECLARE_TASK_QUEUE(tq_disk); -LIST_HEAD(blk_atomic_head); - /* * Protect the request list against multiple users.. * @@ -127,63 +125,9 @@ int * max_sectors[MAX_BLKDEV]; */ char * blkdev_varyio[MAX_BLKDEV]; -/* - * only allow merging of buffer_heads with identical sequence, for transparent - * support for writing atomic blocks larger than what a single bh can hold - */ -static unsigned int blk_atomic_seq; -static spinlock_cacheline_t blk_atomic_lock_cacheline = {SPIN_LOCK_UNLOCKED}; -static spinlock_cacheline_t blk_atomic_queue_lock_cacheline = {SPIN_LOCK_UNLOCKED}; - -#ifdef CONFIG_SMP -struct blk_atomic_cpu { - unsigned int seq; - unsigned int left; -} ____cacheline_aligned_in_smp; - -struct blk_atomic_cpu __cacheline_aligned_in_smp blk_atomic_cpu[NR_CPUS]; - -#define BLK_ATOMIC_SEQ_GRAB 1024 -#endif - unsigned long blk_max_low_pfn, blk_max_pfn; int blk_nohighio = 0; -unsigned int blk_get_atomic_seq(void) -{ - unsigned int ret; - -#ifdef CONFIG_SMP - { - struct blk_atomic_cpu *bcpu = &blk_atomic_cpu[smp_processor_id()]; - -restart: - if (unlikely(!bcpu->left)) { - spin_lock_irq(&blk_atomic_lock); - bcpu->seq = blk_atomic_seq; - blk_atomic_seq += BLK_ATOMIC_SEQ_GRAB; - spin_unlock_irq(&blk_atomic_lock); - bcpu->left = BLK_ATOMIC_SEQ_GRAB; - } - bcpu->seq++; - bcpu->left--; - if (unlikely(!bcpu->seq)) - goto restart; - - ret = bcpu->seq; - } -#else - spin_lock_irq(&blk_atomic_lock); - ret = ++blk_atomic_seq; - if (unlikely(!ret)) { - ret = 1; - ++blk_atomic_seq; - } - spin_unlock_irq(&blk_atomic_lock); -#endif - return ret; -} - static inline int get_max_sectors(kdev_t dev) { if (!max_sectors[MAJOR(dev)]) @@ -446,91 +390,6 @@ void generic_unplug_device(void *data) spin_unlock_irqrestore(q->queue_lock, flags); } -static void blk_atomic_add(request_queue_t *q) -{ - spin_lock_irq(&blk_atomic_queue_lock); - /* it's empty only when it's out of the blk_atomic_head queue */ - if (list_empty(&q->atomic_entry)) - list_add_tail(&q->atomic_entry, &blk_atomic_head); - spin_unlock_irq(&blk_atomic_queue_lock); -} - -static struct list_head *blk_find_insert_point(request_queue_t *q, - struct request *rq) -{ - struct list_head *head = &q->queue_head, *insert = q->queue_head.prev; - struct buffer_head *bh; - int elv_seq; - struct request *dummy; - - if (list_empty(head)) - goto done; - else if (q->head_active && !q->plugged) - head = head->next; - - dummy = NULL; - bh = rq->bh; - - elv_seq = bh_elv_seq(bh); - bh_elv_seq(bh) = 0; - - q->elevator.elevator_merge_fn(q, &dummy, head, bh, - -1 /* non cmd -> no merge */, - 0 /* too small max_sectors -> no merge */); - - bh_elv_seq(bh) = elv_seq; - - if (dummy) - insert = &dummy->queue; - -done: - return insert; -} - -void blk_refile_atomic_queue(int sequence) -{ - request_queue_t *q; - struct request * rq; - unsigned long flags; - struct list_head * q_entry, * rq_entry; - int __sequence; - - spin_lock_irqsave(&blk_atomic_queue_lock, flags); - - q_entry = blk_atomic_head.next; - while (q_entry != &blk_atomic_head) { - q = list_entry(q_entry, request_queue_t, atomic_entry); - q_entry = q_entry->next; - - spin_lock(q->queue_lock); - rq_entry = q->atomic_head.next; - while (rq_entry != &q->atomic_head) { - rq = list_entry(rq_entry, struct request, queue); - rq_entry = rq_entry->next; - - BUG_ON(!rq->q); - BUG_ON(!rq->bh); - __sequence = bh_elv_seq(rq->bh); - BUG_ON(!__sequence); - if (__sequence == sequence) { - struct list_head *ipoint; - - list_del(&rq->queue); - if (list_empty(&q->queue_head)) - q->plug_device_fn(q, rq->bh->b_rdev); - - ipoint = blk_find_insert_point(q, rq); - list_add(&rq->queue, ipoint); - } - } - if (list_empty(&q->atomic_head)) - list_del_init(&q->atomic_entry); - spin_unlock(q->queue_lock); - } - - spin_unlock_irqrestore(&blk_atomic_queue_lock, flags); -} - /** blk_grow_request_list * @q: The &request_queue_t * @nr_requests: how many requests are desired @@ -589,11 +448,13 @@ static void blk_init_free_list(request_q si_meminfo(&si); megs = si.totalram >> (20 - PAGE_SHIFT); #ifndef CONFIG_BLK_DEV_ELEVATOR_LOWLAT - nr_requests = 128; - if (megs < 32) - nr_requests /= 2; + nr_requests = (megs * 2) & ~15; /* One per half-megabyte */ + if (nr_requests < 32) + nr_requests = 32; + if (nr_requests > 1024) + nr_requests = 1024; #else - nr_requests = 4; + nr_requests = 32; #endif blk_grow_request_list(q, nr_requests); @@ -639,8 +500,6 @@ static int __make_request(request_queue_ void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) { INIT_LIST_HEAD(&q->queue_head); - INIT_LIST_HEAD(&q->atomic_head); - INIT_LIST_HEAD(&q->atomic_entry); elevator_init(&q->elevator, ELEVATOR_LINUS); q->queue_lock = &io_request_lock; blk_init_free_list(q); @@ -982,6 +841,11 @@ static inline void add_request(request_q { drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); + if (!q->plugged && q->head_active && insert_here == &q->queue_head) { + spin_unlock_irq(q->queue_lock); + BUG(); + } + /* * elevator indicated where it wants this request to be * inserted at elevator_merge time @@ -1035,8 +899,6 @@ static void attempt_merge(request_queue_ || req->nr_sectors + next->nr_sectors > max_sectors || next->waiting) return; - if (bh_elv_seq(req->bh) != bh_elv_seq(next->bh)) - return; /* * If we are not allowed to merge these requests, then * return. If we are allowed to merge, then the count @@ -1060,12 +922,11 @@ static void attempt_merge(request_queue_ } static inline void attempt_back_merge(request_queue_t * q, - struct list_head * head, struct request *req, int max_sectors, int max_segments) { - if (&req->queue == head->prev) + if (&req->queue == q->queue_head.prev) return; attempt_merge(q, req, max_sectors, max_segments); } @@ -1091,10 +952,9 @@ static int __make_request(request_queue_ int max_segments = MAX_SEGMENTS; struct request * req, *freereq = NULL; int rw_ahead, max_sectors, el_ret; - struct list_head *head, *real_head, *insert_here; + struct list_head *head, *insert_here; int latency; elevator_t *elevator = &q->elevator; - int atomic = bh_elv_seq(bh), atomic_add = 0; count = bh->b_size >> 9; sector = bh->b_rsector; @@ -1136,7 +996,7 @@ static int __make_request(request_queue_ max_sectors = get_max_sectors(bh->b_rdev); req = NULL; - real_head = head = !atomic ? &q->queue_head : &q->atomic_head; + head = &q->queue_head; /* * Now we acquire the request spinlock, we have to be mega careful * not to schedule or do something nonatomic @@ -1145,14 +1005,11 @@ static int __make_request(request_queue_ again: insert_here = head->prev; - if (!atomic) { - if (list_empty(head)) { - q->plug_device_fn(q, bh->b_rdev); /* is atomic */ - goto get_rq; - } else if (q->head_active && !q->plugged) - head = head->next; - } else if (list_empty(head)) + if (list_empty(head)) { + q->plug_device_fn(q, bh->b_rdev); /* is atomic */ goto get_rq; + } else if (q->head_active && !q->plugged) + head = head->next; el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); switch (el_ret) { @@ -1168,7 +1025,7 @@ again: blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); req_new_io(req, 1, count); - attempt_back_merge(q, real_head, req, max_sectors, max_segments); + attempt_back_merge(q, req, max_sectors, max_segments); goto out; case ELEVATOR_FRONT_MERGE: @@ -1231,10 +1088,9 @@ get_rq: req = get_request(q, rw); if (req == NULL) { spin_unlock_irq(q->queue_lock); - if (atomic) - blk_refile_atomic_queue(atomic); freereq = __get_request_wait(q, rw); - head = real_head; + req = NULL; + head = &q->queue_head; spin_lock_irq(q->queue_lock); get_request_wait_wakeup(q, rw); goto again; @@ -1260,13 +1116,10 @@ get_rq: req_new_io(req, 0, count); blk_started_io(count); add_request(q, req, insert_here); - atomic_add = atomic; out: if (freereq) blkdev_release_request(freereq); spin_unlock_irq(q->queue_lock); - if (atomic_add) - blk_atomic_add(q); return 0; end_io: bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); @@ -1315,8 +1168,6 @@ void generic_make_request (int rw, struc if (!bh->b_end_io) BUG(); - if (!buffer_atomic(bh)) - bh->b_elv_sequence = 0; /* Test device size, when known. */ if (blk_size[major]) @@ -1608,10 +1459,6 @@ int __init blk_dev_init(void) memset(max_readahead, 0, sizeof(max_readahead)); memset(max_sectors, 0, sizeof(max_sectors)); -#ifdef CONFIG_SMP - memset(blk_atomic_cpu, 0, sizeof(blk_atomic_cpu)); -#endif - blk_max_low_pfn = max_low_pfn - 1; blk_max_pfn = max_pfn - 1; @@ -1737,5 +1584,3 @@ EXPORT_SYMBOL(blk_max_low_pfn); EXPORT_SYMBOL(blk_max_pfn); EXPORT_SYMBOL(blk_seg_merge_ok); EXPORT_SYMBOL(blk_nohighio); -EXPORT_SYMBOL(blk_get_atomic_seq); -EXPORT_SYMBOL(blk_refile_atomic_queue); diff -Naurp linux-2.4.20-wolk4.0s/drivers/char/wdt977.c linux-2.4.20-wolk4.1-fullkernel/drivers/char/wdt977.c --- linux-2.4.20-wolk4.0s/drivers/char/wdt977.c 2002-12-18 01:03:53.000000000 +0100 +++ linux-2.4.20-wolk4.1-fullkernel/drivers/char/wdt977.c 2003-05-16 14:00:41.000000000 +0200 @@ -27,6 +27,7 @@ #include #include #include +#include #define WATCHDOG_MINOR 130 diff -Naurp linux-2.4.20-wolk4.0s/drivers/md/raid1.c linux-2.4.20-wolk4.1-fullkernel/drivers/md/raid1.c --- linux-2.4.20-wolk4.0s/drivers/md/raid1.c 2003-05-15 21:52:27.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/drivers/md/raid1.c 2002-12-18 01:03:54.000000000 +0100 @@ -686,7 +686,6 @@ static int raid1_make_request (mddev_t * mbh->b_list = BUF_LOCKED; mbh->b_end_io = raid1_end_request; mbh->b_private = r1_bh; - mbh->b_elv_sequence = bh->b_elv_sequence; mbh->b_next = r1_bh->mirror_bh_list; r1_bh->mirror_bh_list = mbh; @@ -1457,7 +1456,6 @@ static int raid1_sync_request (mddev_t * bh->b_private = r1_bh; bh->b_blocknr = sector_nr; bh->b_rsector = sector_nr; - bh->b_elv_sequence = 0; init_waitqueue_head(&bh->b_wait); generic_make_request(READ, bh); diff -Naurp linux-2.4.20-wolk4.0s/drivers/md/raid5.c linux-2.4.20-wolk4.1-fullkernel/drivers/md/raid5.c --- linux-2.4.20-wolk4.0s/drivers/md/raid5.c 2003-05-15 21:52:27.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/drivers/md/raid5.c 2003-05-16 13:56:12.000000000 +0200 @@ -151,7 +151,7 @@ static void shrink_buffers(struct stripe return; sh->bh_cache[i] = NULL; free_page((unsigned long) bh->b_data); - kmem_cache_free(bh_cachep, bh); + kfree(bh); } } @@ -162,7 +162,7 @@ static int grow_buffers(struct stripe_he for (i=0; ib_data = page_address(page); else { - kmem_cache_free(bh_cachep, bh); + kfree(bh); return 1; } atomic_set(&bh->b_count, 0); @@ -474,7 +474,6 @@ static struct buffer_head *raid5_build_b bh->b_state = (1 << BH_Req) | (1 << BH_Mapped); bh->b_size = sh->size; bh->b_list = BUF_LOCKED; - bh->b_elv_sequence = 0; return bh; } @@ -942,6 +941,7 @@ static void handle_stripe(struct stripe_ /* if already written requests can't be returned as successful fail them */ if (failed > 1 && written) { + printk(KERN_CRIT "DEBUG: RAID5: already written requests can't be returned as successfull so fail them!\n"); for (i=disks; i--; ) { if (sh->bh_written[i]) written--; while ((bh = sh->bh_written[i])) { diff -Naurp linux-2.4.20-wolk4.0s/drivers/scsi/aic7xxx/aic79xx_osm.c linux-2.4.20-wolk4.1-fullkernel/drivers/scsi/aic7xxx/aic79xx_osm.c --- linux-2.4.20-wolk4.0s/drivers/scsi/aic7xxx/aic79xx_osm.c 2003-05-15 21:52:32.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/drivers/scsi/aic7xxx/aic79xx_osm.c 2003-05-17 12:27:20.000000000 +0200 @@ -761,8 +761,9 @@ ahd_linux_map_seg(struct ahd_softc *ahd, * Due to DAC restrictions, we can't * cross a 4GB boundary. */ - if ((addr ^ (addr + len - 1)) & ~0xFFFFFFFF) { + if ((addr ^ (addr + len - 1)) & 0xFFFFFFFF00000000ULL) { struct ahd_dma_seg *next_sg; + uint32_t first_len; uint32_t next_len; printf("Crossed Seg\n"); @@ -773,12 +774,14 @@ ahd_linux_map_seg(struct ahd_softc *ahd, consumed++; next_sg = sg + 1; next_sg->addr = 0; - next_len = 0x100000000 - (addr & 0xFFFFFFFF); - len -= next_len; - next_len |= ((addr >> 8) + 0x1000000) & 0x7F000000; + first_len = 0x100000000ULL - (addr & 0xFFFFFFFF); + next_len = len - first_len; + len = next_len; + next_len |= + ((addr >> 8) + 0x1000000) & AHD_SG_HIGH_ADDR_MASK; next_sg->len = ahd_htole32(next_len); } - len |= (addr >> 8) & 0x7F000000; + len |= (addr >> 8) & AHD_SG_HIGH_ADDR_MASK; } sg->len = ahd_htole32(len); return (consumed); diff -Naurp linux-2.4.20-wolk4.0s/drivers/scsi/aic7xxx/aic7xxx_osm.c linux-2.4.20-wolk4.1-fullkernel/drivers/scsi/aic7xxx/aic7xxx_osm.c --- linux-2.4.20-wolk4.0s/drivers/scsi/aic7xxx/aic7xxx_osm.c 2003-05-15 21:52:32.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/drivers/scsi/aic7xxx/aic7xxx_osm.c 2003-05-17 12:27:20.000000000 +0200 @@ -751,12 +751,14 @@ ahc_linux_map_seg(struct ahc_softc *ahc, scb->platform_data->xfer_len += len; if (sizeof(bus_addr_t) > 4 && (ahc->flags & AHC_39BIT_ADDRESSING) != 0) { + /* * Due to DAC restrictions, we can't * cross a 4GB boundary. */ - if ((addr ^ (addr + len - 1)) & ~0xFFFFFFFF) { + if ((addr ^ (addr + len - 1)) & 0xFFFFFFFF00000000ULL) { struct ahc_dma_seg *next_sg; + uint32_t first_len; uint32_t next_len; printf("Crossed Seg\n"); @@ -767,12 +769,14 @@ ahc_linux_map_seg(struct ahc_softc *ahc, consumed++; next_sg = sg + 1; next_sg->addr = 0; - next_len = 0x100000000 - (addr & 0xFFFFFFFF); - len -= next_len; - next_len |= ((addr >> 8) + 0x1000000) & 0x7F000000; + first_len = 0x100000000ULL - (addr & 0xFFFFFFFF); + next_len = len - first_len; + len = first_len; + next_len |= + ((addr >> 8) + 0x1000000) & AHC_SG_HIGH_ADDR_MASK; next_sg->len = ahc_htole32(next_len); } - len |= (addr >> 8) & 0x7F000000; + len |= (addr >> 8) & AHC_SG_HIGH_ADDR_MASK; } sg->len = ahc_htole32(len); return (consumed); diff -Naurp linux-2.4.20-wolk4.0s/drivers/video/vesafb.c linux-2.4.20-wolk4.1-fullkernel/drivers/video/vesafb.c --- linux-2.4.20-wolk4.0s/drivers/video/vesafb.c 2003-05-15 21:52:38.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/drivers/video/vesafb.c 2003-05-16 14:00:42.000000000 +0200 @@ -637,7 +637,7 @@ int __init vesafb_init(void) video_width = screen_info.lfb_width; video_height = screen_info.lfb_height; video_linelength = screen_info.lfb_linelength; - video_size = screen_info.lfb_size * 65536; + video_size = screen_info.lfb_width * screen_info.lfb_height * video_bpp / 8; video_visual = (video_bpp == 8) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR; diff -Naurp linux-2.4.20-wolk4.0s/fs/Config.in linux-2.4.20-wolk4.1-fullkernel/fs/Config.in --- linux-2.4.20-wolk4.0s/fs/Config.in 2003-05-15 21:52:38.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/fs/Config.in 2003-05-19 12:51:33.000000000 +0200 @@ -98,7 +98,7 @@ mainmenu_option next_comment comment 'VFS settings' int 'Soft limit of filedescriptors' CONFIG_FILE_SOFT 8192 int 'Hard limit of filedescriptors' CONFIG_FILE_HARD 65536 - int 'Reserved for root' CONFIG_FILE_RESERVED 128 + int 'Reserved for root' CONFIG_FILE_RESERVED 256 endmenu # Journalling File Systems diff -Naurp linux-2.4.20-wolk4.0s/fs/aio.c linux-2.4.20-wolk4.1-fullkernel/fs/aio.c --- linux-2.4.20-wolk4.0s/fs/aio.c 2003-05-15 21:52:38.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/fs/aio.c 2003-05-16 13:34:28.000000000 +0200 @@ -228,6 +228,7 @@ static struct kioctx *ioctx_alloc(unsign { struct kioctx *ctx; unsigned i; + int ret = 0; /* Prevent overflows */ if ((nr_reqs > (0x10000000U / sizeof(struct io_event))) || @@ -256,7 +257,8 @@ static struct kioctx *ioctx_alloc(unsign INIT_LIST_HEAD(&ctx->free_reqs); INIT_LIST_HEAD(&ctx->active_reqs); - if (aio_setup_ring(ctx) < 0) + ret = aio_setup_ring(ctx); + if (unlikely(ret < 0)) goto out_freectx; /* Allocate nr_reqs iocbs for io. Free iocbs are on the @@ -298,7 +300,7 @@ out_freering: ioctx_free_reqs(ctx); out_freectx: kmem_cache_free(kioctx_cachep, ctx); - ctx = ERR_PTR(-ENOMEM); + ctx = ERR_PTR(ret); dprintk("aio: error allocating ioctx %p\n", ctx); return ctx; @@ -761,6 +763,7 @@ static int read_events(struct kioctx *ct ret = -EFAULT; if (unlikely(copy_from_user(&ts, timeout, sizeof(ts)))) goto out; + ret = 0; init_timeout(&to); set_timeout(start_jiffies, &to, &ts); @@ -1209,61 +1212,9 @@ ssize_t generic_file_aio_read(struct fil return generic_aio_rw(READ, file, req, iocb, iocb->aio_nbytes); } -ssize_t generic_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb, size_t min_size) -{ - return generic_aio_rw(WRITE, file, req, iocb, 1); -#if 0 - unsigned long buf = iocb.aio_buf; - size_t size = iocb.aio_nbytes; - loff_t pos = iocb.aio_offset; - ssize_t nr_written = 0; - kvec_cb_t cb; - long res; -#if 0 - if (likely(NULL != file->f_op->new_write)) { - nr_written = file->f_op->new_write(file, (void *)buf, size, - &pos, F_ATOMIC); - pr_debug("generic_aio_write: new_write: %ld\n", (long)nr_written); - if (-EAGAIN == nr_written) - nr_written = 0; - if ((nr_written >= min_size) || (nr_written < 0)) - return nr_written; - } -#endif - - req->nr_transferred = nr_written; - size -= nr_written; - if (size > aio_max_size) - size = aio_max_size; - req->this_size = size; - buf += nr_written; - cb.vec = map_user_kvec(WRITE, buf, size); - cb.fn = generic_aio_complete_write; - cb.data = req; - - if (IS_ERR(cb.vec)) { - pr_debug("generic_aio_write: map_user_kvec: %ld\n", PTR_ERR(cb.vec)); - return nr_written ? nr_written : PTR_ERR(cb.vec); - } - - res = file->f_op->kvec_write(file, cb, size, iocb.aio_offset); - pr_debug("generic_aio_write: kvec_write: %ld\n", res); - if (unlikely(res != 0)) { - unmap_kvec(cb.vec, 0); - free_kvec(cb.vec); - if (nr_written) { - if (res < 0) - res = 0; - res += nr_written; - } - } - return res; -#endif -} - ssize_t generic_file_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb) { - return generic_aio_write(file, req, iocb, iocb->aio_nbytes); + return generic_aio_rw(WRITE, file, req, iocb, 1); } /* lookup_kiocb diff -Naurp linux-2.4.20-wolk4.0s/fs/buffer.c linux-2.4.20-wolk4.1-fullkernel/fs/buffer.c --- linux-2.4.20-wolk4.0s/fs/buffer.c 2003-05-15 21:52:38.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/fs/buffer.c 2003-05-16 14:17:00.000000000 +0200 @@ -142,7 +142,6 @@ void unlock_buffer(struct buffer_head *b { clear_bit(BH_Wait_IO, &bh->b_state); clear_bit(BH_Launder, &bh->b_state); - clear_bit(BH_Atomic, &bh->b_state); /* * When a locked buffer is visible to the I/O layer BH_Launder * is set. This means before unlocking we must clear BH_Launder, @@ -1680,10 +1679,6 @@ static int __block_write_full_page(struc /* Stage 3: submit the IO */ do { struct buffer_head *next = bh->b_this_page; - /* - * Stick it on BUF_LOCKED so shrink_buffer_cache() can nail it. - */ - refile_buffer(bh); submit_bh(WRITE, bh); bh = next; } while (bh != head); @@ -2373,8 +2368,8 @@ static void end_buffer_io_kiobuf(struct mark_buffer_uptodate(bh, uptodate); kiobuf = bh->b_private; - unlock_buffer(bh); end_kio_request(kiobuf, uptodate); + unlock_buffer(bh); } /* @@ -2437,7 +2432,6 @@ int brw_kiovec(int rw, int nr, struct ki struct page * map; struct buffer_head *tmp, **bhs = NULL; int iosize = size; - unsigned int atomic_seq = 0; if (!nr) return 0; @@ -2454,9 +2448,6 @@ int brw_kiovec(int rw, int nr, struct ki panic("brw_kiovec: iobuf not initialised"); } - if (rw == WRITE) - atomic_seq = blk_get_atomic_seq(); - /* * OK to walk down the iovec doing page IO on each page we find. */ @@ -2514,8 +2505,6 @@ int brw_kiovec(int rw, int nr, struct ki tmp->b_dev = dev; tmp->b_blocknr = blocknr; tmp->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req); - bh_elv_seq(tmp) = atomic_seq; - set_bit(BH_Atomic, &tmp->b_state); if (rw == WRITE) { set_bit(BH_Uptodate, &tmp->b_state); @@ -2533,15 +2522,12 @@ int brw_kiovec(int rw, int nr, struct ki * Wait for IO if we have got too much */ if (bhind >= KIO_MAX_SECTORS) { - blk_refile_atomic_queue(atomic_seq); kiobuf_wait_for_io(iobuf); /* wake-one */ err = wait_kio(rw, bhind, bhs, size); if (err >= 0) transferred += err; else goto finished; - if (rw == WRITE) - atomic_seq = blk_get_atomic_seq(); bhind = 0; } @@ -2560,11 +2546,12 @@ int brw_kiovec(int rw, int nr, struct ki /* Is there any IO still left to submit? */ if (bhind) { - blk_refile_atomic_queue(atomic_seq); kiobuf_wait_for_io(iobuf); /* wake-one */ err = wait_kio(rw, bhind, bhs, size); if (err >= 0) transferred += err; + else + goto finished; } finished: @@ -2830,25 +2817,6 @@ static void sync_page_buffers(struct buf int try_to_free_buffers(struct page * page, unsigned int gfp_mask) { struct buffer_head * tmp, * bh = page->buffers; - int was_uptodate = 1; - - if (!PageLocked(page)) - BUG(); - - if (!bh) - return 1; - /* - * Quick check for freeable buffers before we go take three - * global locks. - */ - if (!(gfp_mask & __GFP_IO)) { - tmp = bh; - do { - if (buffer_busy(tmp)) - return 0; - tmp = tmp->b_this_page; - } while (tmp != bh); - } spin_lock(&lru_list_lock); write_lock(&hash_table_lock); @@ -2870,8 +2838,7 @@ int try_to_free_buffers(struct page * pa tmp = tmp->b_this_page; if (p->b_dev == B_FREE) BUG(); - if (!buffer_uptodate(p)) - was_uptodate = 0; + remove_inode_queue(p); __remove_from_queues(p); __put_unused_buffer_head(p); @@ -2879,15 +2846,7 @@ int try_to_free_buffers(struct page * pa spin_unlock(&unused_list_lock); /* Wake up anyone waiting for buffer heads */ - smp_mb(); - if (waitqueue_active(&buffer_wait)) - wake_up(&buffer_wait); - - /* - * Make sure we don't read buffers again when they are reattached - */ - if (was_uptodate) - SetPageUptodate(page); + wake_up(&buffer_wait); /* And free the page */ page->buffers = NULL; @@ -2910,62 +2869,6 @@ busy_buffer_page: } EXPORT_SYMBOL(try_to_free_buffers); -/* - * Returns the number of pages which might have become freeable - */ -int shrink_buffer_cache(void) -{ - struct buffer_head *bh; - int nr_todo; - int nr_shrunk = 0; - - /* - * Move any clean unlocked buffers from BUF_LOCKED onto BUF_CLEAN - */ - spin_lock(&lru_list_lock); - for ( ; ; ) { - bh = lru_list[BUF_LOCKED]; - if (!bh || buffer_locked(bh)) - break; - __refile_buffer(bh); - } - - /* - * Now start liberating buffers - */ - nr_todo = nr_buffers_type[BUF_CLEAN]; - while (nr_todo--) { - struct page *page; - - bh = lru_list[BUF_CLEAN]; - if (!bh) - break; - - /* - * Park the buffer on BUF_LOCKED so we don't revisit it on - * this pass. - */ - __remove_from_lru_list(bh); - bh->b_list = BUF_LOCKED; - __insert_into_lru_list(bh, BUF_LOCKED); - page = bh->b_page; - if (TryLockPage(page)) - continue; - - page_cache_get(page); - spin_unlock(&lru_list_lock); - if (try_to_release_page(page, GFP_NOIO)) - nr_shrunk++; - unlock_page(page); - page_cache_release(page); - spin_lock(&lru_list_lock); - } - spin_unlock(&lru_list_lock); -// printk("%s: liberated %d page's worth of buffer_heads\n", -// __FUNCTION__, nr_shrunk); - return (nr_shrunk * sizeof(struct buffer_head)) / PAGE_CACHE_SIZE; -} - /* ================== Debugging =================== */ void show_buffers(void) @@ -3339,7 +3242,6 @@ int kupdate(void *startup) printk(KERN_DEBUG "kupdate() activated...\n"); #endif do_io_postprocessing(); -// shrink_buffer_cache(); sync_old_buffers(); run_task_queue(&tq_disk); } @@ -3539,6 +3441,8 @@ error: return err; } +EXPORT_SYMBOL(brw_kvec_async); + #if 0 int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], kdev_t dev, int nr_blocks, unsigned long b[], int sector_size) diff -Naurp linux-2.4.20-wolk4.0s/fs/jfs/jfs_logmgr.c linux-2.4.20-wolk4.1-fullkernel/fs/jfs/jfs_logmgr.c --- linux-2.4.20-wolk4.0s/fs/jfs/jfs_logmgr.c 2003-05-15 21:52:41.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/fs/jfs/jfs_logmgr.c 2003-05-16 14:35:35.000000000 +0200 @@ -1834,7 +1834,6 @@ static inline void lbmRedrive(struct lbu static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) { struct lbuf *bp; - unsigned long flags; /* * allocate a log buffer @@ -1842,8 +1841,6 @@ static int lbmRead(struct jfs_log * log, *bpp = bp = lbmAllocate(log, pn); jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn); - LCACHE_LOCK(flags); /* disable+lock */ - bp->l_flag |= lbmREAD; bp->l_bh.b_reqnext = NULL; clear_bit(BH_Uptodate, &bp->l_bh.b_state); @@ -1852,13 +1849,10 @@ static int lbmRead(struct jfs_log * log, set_bit(BH_Req, &bp->l_bh.b_state); bp->l_bh.b_rdev = bp->l_bh.b_dev; bp->l_bh.b_rsector = bp->l_blkno << (log->l2bsize - 9); - bh_elv_seq(&bp->l_bh) = 0; generic_make_request(READ, &bp->l_bh); run_task_queue(&tq_disk); - LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag != lbmREAD), flags); - - LCACHE_UNLOCK(flags); /* unlock+enable */ + wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); return 0; } @@ -1991,7 +1985,6 @@ static void lbmStartIO(struct lbuf * bp) set_bit(BH_Req, &bp->l_bh.b_state); bp->l_bh.b_rdev = bp->l_bh.b_dev; bp->l_bh.b_rsector = bp->l_blkno << (bp->l_log->l2bsize - 9); - bh_elv_seq(&bp->l_bh) = 0; generic_make_request(WRITE, &bp->l_bh); INCREMENT(lmStat.submitted); diff -Naurp linux-2.4.20-wolk4.0s/fs/nfs/inode.c linux-2.4.20-wolk4.1-fullkernel/fs/nfs/inode.c --- linux-2.4.20-wolk4.0s/fs/nfs/inode.c 2003-05-15 21:52:41.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/fs/nfs/inode.c 2003-05-16 13:55:00.000000000 +0200 @@ -849,19 +849,18 @@ printk("nfs_notify_change: revalidate fa goto out; } while (flusher && NFS_I(inode)->npages); + /* Truncate now in order to avoid races on the client side */ + if (attr->ia_valid & ATTR_SIZE) + vmtruncate(inode, attr->ia_size); + error = NFS_PROTO(inode)->setattr(inode, &fattr, attr); if (error) goto out; - /* - * If we changed the size or mtime, update the inode - * now to avoid invalidating the page cache. - */ - if (attr->ia_valid & ATTR_SIZE) { + + if (attr->ia_valid & ATTR_SIZE) if (attr->ia_size != fattr.size) printk("nfs_notify_change: attr=%Ld, fattr=%Ld??\n", (long long) attr->ia_size, (long long)fattr.size); - vmtruncate(inode, attr->ia_size); - } /* * If we changed the size or mtime, update the inode diff -Naurp linux-2.4.20-wolk4.0s/include/asm-ia64/bitops.h linux-2.4.20-wolk4.1-fullkernel/include/asm-ia64/bitops.h --- linux-2.4.20-wolk4.0s/include/asm-ia64/bitops.h 2003-05-15 21:52:45.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/include/asm-ia64/bitops.h 2003-05-16 13:35:52.000000000 +0200 @@ -449,8 +449,6 @@ find_next_bit (void *addr, unsigned long #ifdef __KERNEL__ -#define __clear_bit(nr, addr) clear_bit(nr, addr) - #define ext2_set_bit test_and_set_bit #define ext2_clear_bit test_and_clear_bit #define ext2_test_bit test_bit diff -Naurp linux-2.4.20-wolk4.0s/include/linux/bench_func.h linux-2.4.20-wolk4.1-fullkernel/include/linux/bench_func.h --- linux-2.4.20-wolk4.0s/include/linux/bench_func.h 2003-05-15 21:52:46.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/include/linux/bench_func.h 1970-01-01 01:00:00.000000000 +0100 @@ -1,16 +0,0 @@ -#ifndef _LINUX_BENCH_FUNC_H -#define _LINUX_BENCH_FUNC_H - -struct candidate { - const char *name; - void *f; // pointer to func - int weight; - int cpu_caps_needed[4]; -}; - -typedef int bench_func(struct candidate *cand, char *opaque); - -struct candidate* find_best(bench_func *bench, char *opaque, - struct candidate runner[], int count); - -#endif diff -Naurp linux-2.4.20-wolk4.0s/include/linux/blkdev.h linux-2.4.20-wolk4.1-fullkernel/include/linux/blkdev.h --- linux-2.4.20-wolk4.0s/include/linux/blkdev.h 2003-05-15 21:52:46.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/include/linux/blkdev.h 2003-05-19 08:44:09.000000000 +0200 @@ -96,7 +96,6 @@ struct request_queue * Together with queue_head for cacheline sharing */ struct list_head queue_head; - struct list_head atomic_head; elevator_t elevator; struct request *last_merge; @@ -116,7 +115,6 @@ struct request_queue * This is used to remove the plug when tq_disk runs. */ struct tq_struct plug_tq; - struct list_head atomic_entry; /* * Boolean that indicates whether this queue is plugged or not. @@ -148,14 +146,6 @@ extern unsigned long blk_max_low_pfn, bl #define BLK_BOUNCE_HIGH (blk_max_low_pfn << PAGE_SHIFT) #define BLK_BOUNCE_ANY (blk_max_pfn << PAGE_SHIFT) -/* - * max guaranteed atomic I/O size while dealing with bounce buffers. - * highmemio capable devices (pci64 in particular) can go well beyond - * this limit. Must be a multiple of 512bytes obviously. - */ -#define BLK_ATOMIC_BOUNCE_SIZE 32768 -#define BLK_ATOMIC_BOUNCE_ENTRIES (BLK_ATOMIC_BOUNCE_SIZE >> 9) - extern void blk_queue_bounce_limit(request_queue_t *, u64); #ifdef CONFIG_HIGHMEM @@ -213,13 +203,6 @@ extern void generic_make_request(int rw, extern inline request_queue_t *blk_get_queue(kdev_t dev); extern void blkdev_release_request(struct request *); -extern spinlock_cacheline_t blk_atomic_lock_cacheline; -#define blk_atomic_lock (blk_atomic_lock_cacheline.lock) -extern unsigned int blk_get_atomic_seq(void); -extern spinlock_cacheline_t blk_atomic_queue_lock_cacheline; -#define blk_atomic_queue_lock (blk_atomic_queue_lock_cacheline.lock) -extern void FASTCALL(blk_refile_atomic_queue(int sequence)); - /* * Access functions for manipulating queue properties */ diff -Naurp linux-2.4.20-wolk4.0s/include/linux/elevator.h linux-2.4.20-wolk4.1-fullkernel/include/linux/elevator.h --- linux-2.4.20-wolk4.0s/include/linux/elevator.h 2003-05-15 21:52:46.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/include/linux/elevator.h 2003-05-19 13:02:42.000000000 +0200 @@ -1,12 +1,9 @@ #ifndef _LINUX_ELEVATOR_H #define _LINUX_ELEVATOR_H -typedef void (elevator_fn) (struct request *, elevator_t *, - struct list_head *, - struct list_head *, int); - -typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *, - struct buffer_head *, int, int); +typedef int (elevator_merge_fn)(request_queue_t *, struct request **, + struct list_head *, struct buffer_head *bh, + int rw, int max_sectors); typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int); @@ -16,6 +13,7 @@ struct elevator_s { int read_latency; int write_latency; + int max_bomb_segments; elevator_merge_fn *elevator_merge_fn; elevator_merge_req_fn *elevator_merge_req_fn; @@ -23,13 +21,13 @@ struct elevator_s unsigned int queue_ID; }; -int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); -void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int); -void elevator_noop_merge_req(struct request *, struct request *); - -int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); -void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int); -void elevator_linus_merge_req(struct request *, struct request *); +elevator_merge_fn elevator_noop_merge; +elevator_merge_cleanup_fn elevator_noop_merge_cleanup; +elevator_merge_req_fn elevator_noop_merge_req; + +elevator_merge_fn elevator_linus_merge; +elevator_merge_cleanup_fn elevator_linus_merge_cleanup; +elevator_merge_req_fn elevator_linus_merge_req; typedef struct blkelv_ioctl_arg_s { int queue_ID; @@ -53,22 +51,6 @@ extern void elevator_init(elevator_t *, #define ELEVATOR_FRONT_MERGE 1 #define ELEVATOR_BACK_MERGE 2 -/* - * This is used in the elevator algorithm. We don't prioritise reads - * over writes any more --- although reads are more time-critical than - * writes, by treating them equally we increase filesystem throughput. - * This turns out to give better overall performance. -- sct - */ -#define IN_ORDER(s1,s2) \ - ((((s1)->rq_dev == (s2)->rq_dev && \ - (s1)->sector < (s2)->sector)) || \ - (s1)->rq_dev < (s2)->rq_dev) - -#define BHRQ_IN_ORDER(bh, rq) \ - ((((bh)->b_rdev == (rq)->rq_dev && \ - (bh)->b_rsector < (rq)->sector)) || \ - (bh)->b_rdev < (rq)->rq_dev) - static inline int elevator_request_latency(elevator_t * elevator, int rw) { int latency; @@ -86,7 +68,7 @@ static inline int elevator_request_laten ((elevator_t) { \ 0, /* read_latency */ \ 0, /* write_latency */ \ - \ + 0, /* max_bomb_segments */ \ elevator_noop_merge, /* elevator_merge_fn */ \ elevator_noop_merge_req, /* elevator_merge_req_fn */ \ }) @@ -94,12 +76,12 @@ static inline int elevator_request_laten #if (!defined (CONFIG_BLK_DEV_ELEVATOR_LOWLAT) && defined (CONFIG_SCHED_SERVER)) || (!defined (CONFIG_BLK_DEV_ELEVATOR_LOWLAT) && !defined (CONFIG_SCHED_SERVER) && !defined (CONFIG_SCHED_DESKTOP)) #define ELEVATOR_READ_LATENCY 2048 #define ELEVATOR_WRITE_LATENCY 8192 -#define ELEVATOR_MAX_BOMB_SEGMENTS 0 +#define ELEVATOR_MAX_BOMB_SEGMENTS 6 #define ELEVATOR_LINUS \ ((elevator_t) { \ 2048, /* read passovers */ \ 8192, /* write passovers */ \ - \ + 6, /* max_bomb_segments */ \ elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \ }) @@ -107,12 +89,12 @@ static inline int elevator_request_laten #elif (!defined (CONFIG_BLK_DEV_ELEVATOR_LOWLAT) && defined (CONFIG_SCHED_DESKTOP)) #define ELEVATOR_READ_LATENCY 512 #define ELEVATOR_WRITE_LATENCY 8192 -#define ELEVATOR_MAX_BOMB_SEGMENTS 0 +#define ELEVATOR_MAX_BOMB_SEGMENTS 2 #define ELEVATOR_LINUS \ ((elevator_t) { \ 512, /* read passovers */ \ 8192, /* write passovers */ \ - \ + 2, /* max_bomb_segments */ \ elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \ }) @@ -125,7 +107,7 @@ static inline int elevator_request_laten ((elevator_t) { \ 0, /* read passovers */ \ 0, /* write passovers */ \ - \ + 1, /* max_bomb_segments */ \ elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \ }) diff -Naurp linux-2.4.20-wolk4.0s/include/linux/fs.h linux-2.4.20-wolk4.1-fullkernel/include/linux/fs.h --- linux-2.4.20-wolk4.0s/include/linux/fs.h 2003-05-15 21:52:46.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/include/linux/fs.h 2003-05-19 12:51:33.000000000 +0200 @@ -242,7 +242,6 @@ enum bh_state_bits { BH_Attached, /* 1 if b_inode_buffers is linked into a list */ BH_JBD, /* 1 if it has an attached journal_head */ BH_Delay, /* 1 if the buffer is delayed allocate */ - BH_Atomic, /* 1 if b_elv_sequence is valid */ BH_PrivateStart,/* not a state bit, but the first bit available * for private allocation by other entities @@ -287,7 +286,6 @@ struct buffer_head { void *b_private; /* reserved for b_end_io */ void *b_journal_head; /* FS journal_heads */ unsigned long b_rsector; /* Real buffer location on disk */ - int b_elv_sequence; /* for atomic blocks */ wait_queue_head_t b_wait; struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */ @@ -308,7 +306,6 @@ void init_buffer(struct buffer_head *, b #define buffer_async(bh) __buffer_state(bh,Async) #define buffer_launder(bh) __buffer_state(bh,Launder) #define buffer_delay(bh) __buffer_state(bh,Delay) -#define buffer_atomic(bh) __buffer_state(bh,Atomic) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) @@ -316,7 +313,6 @@ extern void set_bh_page(struct buffer_he #define touch_buffer(bh) mark_page_accessed(bh->b_page) -#define bh_elv_seq(bh) (bh)->b_elv_sequence #include #include @@ -433,6 +429,7 @@ struct address_space_operations { int (*releasepage) (struct page *, int); #define KERNEL_HAS_O_DIRECT /* this is for modules out of the kernel */ int (*direct_IO)(int, struct file *, struct kiobuf *, unsigned long, int); + void (*removepage)(struct page *); /* called when page gets removed from the inode */ }; struct address_space { @@ -1286,7 +1283,6 @@ extern void refile_buffer(struct buffer_ extern void create_empty_buffers(struct page *, kdev_t, unsigned long); extern void end_buffer_io_sync(struct buffer_head *bh, int uptodate); extern void end_buffer_io_async(struct buffer_head *bh, int uptodate); -extern int shrink_buffer_cache(void); /* reiserfs_writepage needs this */ extern void set_buffer_async_io(struct buffer_head *bh) ; diff -Naurp linux-2.4.20-wolk4.0s/include/linux/jhash.h linux-2.4.20-wolk4.1-fullkernel/include/linux/jhash.h --- linux-2.4.20-wolk4.0s/include/linux/jhash.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.20-wolk4.1-fullkernel/include/linux/jhash.h 2003-05-16 14:00:34.000000000 +0200 @@ -0,0 +1,143 @@ +#ifndef _LINUX_JHASH_H +#define _LINUX_JHASH_H + +/* jhash.h: Jenkins hash support. + * + * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net) + * + * http://burtleburtle.net/bob/hash/ + * + * These are the credits from Bob's sources: + * + * lookup2.c, by Bob Jenkins, December 1996, Public Domain. + * hash(), hash2(), hash3, and mix() are externally useful functions. + * Routines to test the hash are included if SELF_TEST is defined. + * You can use this free for any purpose. It has no warranty. + * + * Copyright (C) 2003 David S. Miller (davem@redhat.com) + * + * I've modified Bob's hash to be useful in the Linux kernel, and + * any bugs present are surely my fault. -DaveM + */ + +/* NOTE: Arguments are modified. */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* The golden ration: an arbitrary value */ +#define JHASH_GOLDEN_RATIO 0x9e3779b9 + +/* The most generic version, hashes an arbitrary sequence + * of bytes. No alignment or length assumptions are made about + * the input key. + */ +static inline u32 jhash(void *key, u32 length, u32 initval) +{ + u32 a, b, c, len; + u8 *k = key; + + len = length; + a = b = JHASH_GOLDEN_RATIO; + c = initval; + + while (len >= 12) { + a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24)); + b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24)); + c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24)); + + __jhash_mix(a,b,c); + + k += 12; + len -= 12; + } + + c += length; + switch (len) { + case 11: c += ((u32)k[10]<<24); + case 10: c += ((u32)k[9]<<16); + case 9 : c += ((u32)k[8]<<8); + case 8 : b += ((u32)k[7]<<24); + case 7 : b += ((u32)k[6]<<16); + case 6 : b += ((u32)k[5]<<8); + case 5 : b += k[4]; + case 4 : a += ((u32)k[3]<<24); + case 3 : a += ((u32)k[2]<<16); + case 2 : a += ((u32)k[1]<<8); + case 1 : a += k[0]; + }; + + __jhash_mix(a,b,c); + + return c; +} + +/* A special optimized version that handles 1 or more of u32s. + * The length parameter here is the number of u32s in the key. + */ +static inline u32 jhash2(u32 *k, u32 length, u32 initval) +{ + u32 a, b, c, len; + + a = b = JHASH_GOLDEN_RATIO; + c = initval; + len = length; + + while (len >= 3) { + a += k[0]; + b += k[1]; + c += k[2]; + __jhash_mix(a, b, c); + k += 3; len -= 3; + } + + c += length * 4; + + switch (len) { + case 2 : b += k[1]; + case 1 : a += k[0]; + }; + + __jhash_mix(a,b,c); + + return c; +} + + +/* A special ultra-optimized versions that knows they are hashing exactly + * 3, 2 or 1 word(s). + * + * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally + * done at the end is not done here. + */ +static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +{ + a += JHASH_GOLDEN_RATIO; + b += JHASH_GOLDEN_RATIO; + c += initval; + + __jhash_mix(a, b, c); + + return c; +} + +static inline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return jhash_3words(a, b, 0, initval); +} + +static inline u32 jhash_1word(u32 a, u32 initval) +{ + return jhash_3words(a, 0, 0, initval); +} + +#endif /* _LINUX_JHASH_H */ diff -Naurp linux-2.4.20-wolk4.0s/include/linux/kmap_types.h linux-2.4.20-wolk4.1-fullkernel/include/linux/kmap_types.h --- linux-2.4.20-wolk4.0s/include/linux/kmap_types.h 2003-05-15 21:52:46.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/include/linux/kmap_types.h 2003-05-16 13:55:58.000000000 +0200 @@ -14,8 +14,8 @@ enum km_type { KM_PTE0, KM_PTE1, KM_PTE2, - KM_NETDUMP, KM_KDB, + KM_NETDUMP, KM_TYPE_NR }; diff -Naurp linux-2.4.20-wolk4.0s/include/linux/mm.h linux-2.4.20-wolk4.1-fullkernel/include/linux/mm.h --- linux-2.4.20-wolk4.0s/include/linux/mm.h 2003-05-15 21:52:47.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/include/linux/mm.h 2003-05-18 18:33:16.000000000 +0200 @@ -761,6 +761,8 @@ static inline void __vma_unlink(struct m mm->mmap_cache = prev; } +#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED) + #define can_vma_merge(vma, vm_flags) __can_vma_merge(vma, vm_flags, NULL, 0, 0) /* * We don't check here for the merged mmap wrapping around the end of pagecache @@ -776,7 +778,9 @@ static inline int __can_vma_merge(struct else #endif - if (vma->vm_file == file && vma->vm_flags == vm_flags) { + if (vma->vm_file == file && vma->vm_flags == vm_flags && + likely((!vma->vm_ops || !vma->vm_ops->close) && !vma->vm_private_data && + !(vm_flags & VM_SPECIAL))) { if (file) { if (vma->vm_pgoff == vm_pgoff + offset) return 1; diff -Naurp linux-2.4.20-wolk4.0s/include/linux/pagemap.h linux-2.4.20-wolk4.1-fullkernel/include/linux/pagemap.h --- linux-2.4.20-wolk4.0s/include/linux/pagemap.h 2003-05-15 21:52:47.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/include/linux/pagemap.h 2003-05-18 18:33:28.000000000 +0200 @@ -86,9 +86,14 @@ extern void add_to_page_cache_locked(str extern int add_to_page_cache_unique(struct page * page, struct address_space *mapping, unsigned long index, struct page **hash); extern wait_queue_head_t *FASTCALL(page_waitqueue(struct page *page)); +extern void ___wait_on_page(struct page *); extern int wait_on_page_timeout(struct page *page, int timeout); -extern void wait_on_page(struct page *); +static inline void wait_on_page(struct page * page) +{ + if (PageLocked(page)) + ___wait_on_page(page); +} extern void FASTCALL(wakeup_page_waiters(struct page * page)); diff -Naurp linux-2.4.20-wolk4.0s/include/linux/sched.h linux-2.4.20-wolk4.1-fullkernel/include/linux/sched.h --- linux-2.4.20-wolk4.0s/include/linux/sched.h 2003-05-15 21:52:47.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/include/linux/sched.h 2003-05-18 18:33:16.000000000 +0200 @@ -241,13 +241,6 @@ struct files_struct { extern int max_map_count; struct kioctx; - -struct ptg_struct { /* pseudo thread groups */ - atomic_t active; /* number of tasks in run queues */ - atomic_t count; /* number of refs */ -}; - - struct mm_struct { struct vm_area_struct * mmap; /* list of VMAs */ rb_root_t mm_rb; @@ -327,7 +320,6 @@ struct signal_struct { struct user_struct { atomic_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ - atomic_t active; /* How many active processes does this user have? */ atomic_t files; /* How many open files does this user have? */ /* Hash table maintenance information */ @@ -439,7 +431,6 @@ struct task_struct { task_t *next_task, *prev_task; struct mm_struct *mm, *active_mm; - struct ptg_struct * ptgroup; /* pseudo thread group for this task */ #ifdef CONFIG_SYSTRACE /* back pointer to systrace */ diff -Naurp linux-2.4.20-wolk4.0s/include/linux/sunrpc/sched.h linux-2.4.20-wolk4.1-fullkernel/include/linux/sunrpc/sched.h --- linux-2.4.20-wolk4.0s/include/linux/sunrpc/sched.h 2002-12-29 18:46:57.000000000 +0100 +++ linux-2.4.20-wolk4.1-fullkernel/include/linux/sunrpc/sched.h 2003-05-18 18:33:28.000000000 +0200 @@ -128,7 +128,12 @@ typedef void (*rpc_action)(struct rpc_ #define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) #define rpc_set_running(t) (set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) -#define rpc_clear_running(t) (clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) +#define rpc_clear_running(t) \ + do { \ + smp_mb__before_clear_bit(); \ + clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \ + smp_mb__after_clear_bit(); \ + } while(0) #define rpc_set_sleeping(t) (set_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate)) diff -Naurp linux-2.4.20-wolk4.0s/include/linux/sunrpc/xprt.h linux-2.4.20-wolk4.1-fullkernel/include/linux/sunrpc/xprt.h --- linux-2.4.20-wolk4.0s/include/linux/sunrpc/xprt.h 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/include/linux/sunrpc/xprt.h 2003-05-18 18:33:28.000000000 +0200 @@ -186,7 +186,12 @@ void xprt_sock_setbufsize(struct rpc_x #define xprt_connected(xp) (!(xp)->stream || test_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_test_and_set_connected(xp) (test_and_set_bit(XPRT_CONNECT, &(xp)->sockstate)) -#define xprt_clear_connected(xp) (clear_bit(XPRT_CONNECT, &(xp)->sockstate)) +#define xprt_clear_connected(xp) \ + do { \ + smp_mb__before_clear_bit(); \ + clear_bit(XPRT_CONNECT, &(xp)->sockstate); \ + smp_mb__after_clear_bit(); \ + } while(0) #endif /* __KERNEL__*/ diff -Naurp linux-2.4.20-wolk4.0s/include/linux/sysctl.h linux-2.4.20-wolk4.1-fullkernel/include/linux/sysctl.h --- linux-2.4.20-wolk4.0s/include/linux/sysctl.h 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/include/linux/sysctl.h 2003-05-18 18:33:20.000000000 +0200 @@ -338,7 +338,8 @@ enum { NET_IPV4_ROUTE_GC_ELASTICITY=14, NET_IPV4_ROUTE_MTU_EXPIRES=15, NET_IPV4_ROUTE_MIN_PMTU=16, - NET_IPV4_ROUTE_MIN_ADVMSS=17 + NET_IPV4_ROUTE_MIN_ADVMSS=17, + NET_IPV4_ROUTE_SECRET_INTERVAL=18, }; enum diff -Naurp linux-2.4.20-wolk4.0s/include/net/tcp.h linux-2.4.20-wolk4.1-fullkernel/include/net/tcp.h --- linux-2.4.20-wolk4.0s/include/net/tcp.h 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/include/net/tcp.h 2003-05-18 18:37:14.000000000 +0200 @@ -1634,6 +1634,7 @@ struct tcp_listen_opt int qlen; int qlen_young; int clock_hand; + u32 hash_rnd; struct open_request *syn_table[TCP_SYNQ_HSIZE]; }; diff -Naurp linux-2.4.20-wolk4.0s/kernel/Makefile linux-2.4.20-wolk4.1-fullkernel/kernel/Makefile --- linux-2.4.20-wolk4.0s/kernel/Makefile 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/kernel/Makefile 2003-05-19 08:37:47.000000000 +0200 @@ -17,10 +17,6 @@ obj-y = sched.o dma.o fork.o exec_do sysctl.o acct.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o context.o rcupdate.o futex.o -ifeq ($(CONFIG_X86),y) - obj-y += bench_func.o -endif - obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += ksyms.o obj-$(CONFIG_PM) += pm.o diff -Naurp linux-2.4.20-wolk4.0s/kernel/bench_func.c linux-2.4.20-wolk4.1-fullkernel/kernel/bench_func.c --- linux-2.4.20-wolk4.0s/kernel/bench_func.c 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/kernel/bench_func.c 1970-01-01 01:00:00.000000000 +0100 @@ -1,51 +0,0 @@ -#include // for KERN_DEBUG - -#include // for test_bit -#include // cpu caps -#include // cpu features constants -#include - -//#define dprintk(a...) printk(a) -#define dprintk(a...) ((void)0) - -// 2.4 only, already in 2.5 -extern inline int boot_cpu_has_cap(int cap) -{ - return test_bit(cap, boot_cpu_data.x86_capability); -} - -extern inline int cpu_supports(int *cap) -{ - while(*cap != -1) { - if(!boot_cpu_has_cap(*cap)) { - dprintk("unsupported caps: %i\n", *cap); - return 0; - } - cap++; - } - return 1; -} - -/* -** Call all the candidates which can be run on this CPU, -** find the best -*/ -struct candidate* -find_best(bench_func *bench, char *opaque, struct candidate runner[], int count) -{ - int score, max = 0; - struct candidate *best = 0; - while(count--) { - if(!cpu_supports(runner->cpu_caps_needed)) { - printk("func %s skipped: not supported by CPU\n", runner->name); - } else { - score = bench(runner,opaque) * runner->weight; - if(max < score) { - max = score; - best = runner; - } - } - runner++; - } - return best; -} diff -Naurp linux-2.4.20-wolk4.0s/kernel/exit.c linux-2.4.20-wolk4.1-fullkernel/kernel/exit.c --- linux-2.4.20-wolk4.0s/kernel/exit.c 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/kernel/exit.c 2003-05-16 14:21:51.000000000 +0200 @@ -13,12 +13,11 @@ #include #include #include -#include -#include - #ifdef CONFIG_BSD_PROCESS_ACCT #include #endif +#include +#include #ifdef CONFIG_SYSTRACE #include @@ -46,12 +45,6 @@ static void release_task(struct task_str { if (p == current) BUG(); - - if (p->ptgroup && atomic_sub_and_test(1,&p->ptgroup->count)) { - kfree(p->ptgroup); - p->ptgroup = NULL; - } - #ifdef CONFIG_SMP wait_task_inactive(p); #endif diff -Naurp linux-2.4.20-wolk4.0s/kernel/fork.c linux-2.4.20-wolk4.1-fullkernel/kernel/fork.c --- linux-2.4.20-wolk4.0s/kernel/fork.c 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/kernel/fork.c 2003-05-16 14:21:51.000000000 +0200 @@ -449,7 +449,6 @@ static int copy_mm(unsigned long clone_f tsk->mm = NULL; tsk->active_mm = NULL; - tsk->ptgroup = NULL; /* * Are we cloning a kernel thread? @@ -900,21 +899,6 @@ int do_fork(unsigned long clone_flags, u goto bad_fork_cleanup_namespace; gr_copy_label(p); p->semundo = NULL; - - /* detect a 'thread' and link to the ptg block for group */ - if ( ((clone_flags & CLONE_VM) && (clone_flags & CLONE_FILES)) || - (clone_flags & CLONE_THREAD)) { - if (current->ptgroup) - atomic_inc(¤t->ptgroup->count); - else { - current->ptgroup = kmalloc(sizeof(struct ptg_struct), GFP_ATOMIC); - if (current->ptgroup) { - atomic_set(¤t->ptgroup->count,2); - atomic_set(¤t->ptgroup->active,1); - } - } - p->ptgroup = current->ptgroup; - } /* Our parent execution domain becomes current domain These must match for thread signalling to apply */ diff -Naurp linux-2.4.20-wolk4.0s/kernel/ksyms.c linux-2.4.20-wolk4.1-fullkernel/kernel/ksyms.c --- linux-2.4.20-wolk4.0s/kernel/ksyms.c 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/kernel/ksyms.c 2003-05-19 12:51:33.000000000 +0200 @@ -265,6 +265,7 @@ EXPORT_SYMBOL(ll_rw_block); EXPORT_SYMBOL(__submit_bh); EXPORT_SYMBOL(unlock_buffer); EXPORT_SYMBOL(__wait_on_buffer); +EXPORT_SYMBOL(___wait_on_page); EXPORT_SYMBOL(generic_direct_IO); EXPORT_SYMBOL(discard_bh_page); EXPORT_SYMBOL(block_write_full_page); diff -Naurp linux-2.4.20-wolk4.0s/kernel/sched.c linux-2.4.20-wolk4.1-fullkernel/kernel/sched.c --- linux-2.4.20-wolk4.0s/kernel/sched.c 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/kernel/sched.c 2003-05-18 18:49:41.000000000 +0200 @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -64,8 +63,6 @@ #define MAX_TIMESLICE ( 10 * HZ / 1000) #define CHILD_PENALTY 95 #define PARENT_PENALTY 100 -#define THREAD_PENALTY 50 /* allow threads groups 2 full timeslices */ -#define USER_PENALTY 10 /* allow user 10 full timeslices */ #define PRIO_BONUS_RATIO 25 #define INTERACTIVE_DELTA 2 #define MAX_SLEEP_AVG (2*HZ) @@ -75,11 +72,9 @@ #warning INFO: Server Scheduler Tweaks will be used. #define MIN_TIMESLICE ( 10 * HZ / 1000) -#define MAX_TIMESLICE (300 * HZ / 1000) +#define MAX_TIMESLICE (200 * HZ / 1000) #define CHILD_PENALTY 50 #define PARENT_PENALTY 100 -#define THREAD_PENALTY 50 /* allow threads groups 2 full timeslices */ -#define USER_PENALTY 10 /* allow user 10 full timeslices */ #define PRIO_BONUS_RATIO 25 #define INTERACTIVE_DELTA 2 #define MAX_SLEEP_AVG (2*HZ) @@ -141,20 +136,7 @@ static inline unsigned int task_timeslice(task_t *p) { - int work, slice, weight = 100; - if (p->ptgroup) { - work = atomic_read(&p->ptgroup->active) * THREAD_PENALTY; - if (work > weight) - weight = work; - } - if (p->user->uid) { - work = atomic_read(&p->user->active) * USER_PENALTY; - if (work > weight) - weight = work; - } - slice = 100 * BASE_TIMESLICE(p) / weight; - return slice > MIN_TIMESLICE ? slice : MIN_TIMESLICE; -// return BASE_TIMESLICE(p); + return BASE_TIMESLICE(p); } runqueue_t runqueues[NR_CPUS] __cacheline_aligned; @@ -273,16 +255,10 @@ static inline void __activate_task(task_ } __enqueue_task(p, array, parent); rq->nr_running++; - if (p->ptgroup) - atomic_inc(&p->ptgroup->active); - atomic_inc(&p->user->active); } static inline void deactivate_task(struct task_struct *p, runqueue_t *rq) { - atomic_dec(&p->user->active); - if (p->ptgroup) - atomic_dec(&p->ptgroup->active); rq->nr_running--; if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; diff -Naurp linux-2.4.20-wolk4.0s/kernel/softirq.c linux-2.4.20-wolk4.1-fullkernel/kernel/softirq.c --- linux-2.4.20-wolk4.0s/kernel/softirq.c 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/kernel/softirq.c 2003-05-16 14:21:51.000000000 +0200 @@ -377,7 +377,7 @@ static int ksoftirqd(void * __bind_cpu) if (cpu() != cpu) BUG(); - sprintf(current->comm, "ksoftirqd_CPU%d", bind_cpu); + sprintf(current->comm, "ksoftirqd/%d", bind_cpu); __set_current_state(TASK_INTERRUPTIBLE); mb(); diff -Naurp linux-2.4.20-wolk4.0s/kernel/user.c linux-2.4.20-wolk4.1-fullkernel/kernel/user.c --- linux-2.4.20-wolk4.0s/kernel/user.c 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/kernel/user.c 2003-05-16 13:54:44.000000000 +0200 @@ -49,7 +49,6 @@ static spinlock_t uidhash_lock = SPIN_LO struct user_struct root_user = { __count: ATOMIC_INIT(1), processes: ATOMIC_INIT(1), - active: ATOMIC_INIT(1), files: ATOMIC_INIT(0) }; @@ -140,7 +139,6 @@ struct user_struct * alloc_uid(uid_t uid #endif /* CONFIG_SCONTEXTS */ atomic_set(&new->__count, 1); atomic_set(&new->processes, 0); - atomic_set(&new->active, 0); atomic_set(&new->files, 0); /* diff -Naurp linux-2.4.20-wolk4.0s/mm/filemap.c linux-2.4.20-wolk4.1-fullkernel/mm/filemap.c --- linux-2.4.20-wolk4.0s/mm/filemap.c 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/mm/filemap.c 2003-05-16 13:54:46.000000000 +0200 @@ -97,11 +97,15 @@ static inline void remove_page_from_inod { struct address_space * mapping = page->mapping; - mapping->nrpages--; + if (mapping->a_ops->removepage) + mapping->a_ops->removepage(page); + list_del(&page->list); if (!mapping->nrpages) refile_inode(mapping->host); page->mapping = NULL; + wmb(); + mapping->nrpages--; } static inline void remove_page_from_hash_queue(struct page * page) @@ -614,7 +618,7 @@ int filemap_fdatawait(struct address_spa page_cache_get(page); spin_unlock(&pagecache_lock); - wait_on_page(page); + ___wait_on_page(page); if (PageError(page)) ret = -EIO; @@ -828,18 +832,12 @@ void wakeup_page_waiters(struct page * p wake_up(head); } -static void kill_buffers(struct page *page) -{ - if (!PageLocked(page)) - BUG(); - if (page->buffers) - try_to_release_page(page, GFP_NOIO); -} - /* - * Wait for a page to come unlocked. Then try to ditch its buffer_heads. + * Wait for a page to get unlocked. * - * FIXME: Make the ditching dependent on CONFIG_MONSTER_BOX or something. + * This must be called with the caller "holding" the page, + * ie with increased "page->count" so that the page won't + * go away during the wait.. * * The waiting strategy is to get on a waitqueue determined * by hashing. Waiters will then collide, and the newly woken @@ -856,17 +854,27 @@ static void kill_buffers(struct page *pa * be very rare due to the few pages that are actually being * waited on at any given time and the quality of the hash function. */ -void wait_on_page(struct page *page) +void ___wait_on_page(struct page *page) { - lock_page(page); - kill_buffers(page); - unlock_page(page); + wait_queue_head_t *waitqueue = page_waitqueue(page); + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + add_wait_queue(waitqueue, &wait); + do { + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (!PageLocked(page)) + break; + sync_page(page); + schedule(); + } while (PageLocked(page)); + __set_task_state(tsk, TASK_RUNNING); + remove_wait_queue(waitqueue, &wait); } -EXPORT_SYMBOL(wait_on_page); /* * unlock_page() is the other half of the story just above - * lock_page().. Here a couple of quick checks are done + * __wait_on_page(). Here a couple of quick checks are done * and a couple of flags are set on the page, and then all * of the waiters for all of the pages in the appropriate * wait queue are woken. @@ -1569,11 +1577,6 @@ found_page: } goto page_not_up_to_date; } - if (page->buffers) { - lock_page(page); - kill_buffers(page); - unlock_page(page); - } if (!nonblock) generic_file_readahead(reada_ok, filp, inode, page); page_ok: @@ -1648,7 +1651,6 @@ page_not_up_to_date: /* Did somebody else fill it already? */ if (Page_Uptodate(page)) { - kill_buffers(page); UnlockPage(page); goto page_ok; } @@ -2172,11 +2174,6 @@ retry_find: */ if (!Page_Uptodate(page)) goto page_not_uptodate; - if (page->buffers) { - lock_page(page); - kill_buffers(page); - unlock_page(page); - } success: /* @@ -2235,7 +2232,6 @@ page_not_uptodate: /* Did somebody else get it up-to-date? */ if (Page_Uptodate(page)) { - kill_buffers(page); UnlockPage(page); goto success; } @@ -2263,7 +2259,6 @@ page_not_uptodate: /* Somebody else successfully read it in? */ if (Page_Uptodate(page)) { - kill_buffers(page); UnlockPage(page); goto success; } @@ -3088,7 +3083,6 @@ retry: goto retry; } if (Page_Uptodate(page)) { - kill_buffers(page); UnlockPage(page); goto out; } diff -Naurp linux-2.4.20-wolk4.0s/mm/highmem.c linux-2.4.20-wolk4.1-fullkernel/mm/highmem.c --- linux-2.4.20-wolk4.0s/mm/highmem.c 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/mm/highmem.c 2003-05-16 13:54:47.000000000 +0200 @@ -21,7 +21,6 @@ #include #include #include -#include /* * Virtual_count is not a pure "count". @@ -212,14 +211,6 @@ static LIST_HEAD(emergency_pages); int nr_emergency_bhs; static LIST_HEAD(emergency_bhs); -int nr_atomic_emergency_pages; -static LIST_HEAD(atomic_emergency_pages); - -int nr_atomic_emergency_bhs; -static LIST_HEAD(atomic_emergency_bhs); - -int atomic_emergency_owner; - /* * Simple bounce buffer support for highmem pages. * This will be moved to the block layer in 2.5. @@ -259,66 +250,35 @@ static inline void bounce_end_io (struct struct page *page; struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); unsigned long flags; - int atomic = bh_elv_seq(bh); bh_orig->b_end_io(bh_orig, uptodate); page = bh->b_page; spin_lock_irqsave(&emergency_lock, flags); - if (!atomic) { - if (nr_emergency_pages >= POOL_SIZE) - __free_page(page); - else { - /* - * We are abusing page->list to manage - * the highmem emergency pool: - */ - list_add(&page->list, &emergency_pages); - nr_emergency_pages++; - } - - if (nr_emergency_bhs >= POOL_SIZE) { + if (nr_emergency_pages >= POOL_SIZE) + __free_page(page); + else { + /* + * We are abusing page->list to manage + * the highmem emergency pool: + */ + list_add(&page->list, &emergency_pages); + nr_emergency_pages++; + } + + if (nr_emergency_bhs >= POOL_SIZE) { #ifdef HIGHMEM_DEBUG - /* Don't clobber the constructed slab cache */ - init_waitqueue_head(&bh->b_wait); + /* Don't clobber the constructed slab cache */ + init_waitqueue_head(&bh->b_wait); #endif - kmem_cache_free(bh_cachep, bh); - } else { - /* - * Ditto in the bh case, here we abuse b_inode_buffers: - */ - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } + kmem_cache_free(bh_cachep, bh); } else { - if (nr_atomic_emergency_pages >= BLK_ATOMIC_BOUNCE_ENTRIES) - __free_page(page); - else { - /* - * We are abusing page->list to manage - * the highmem emergency pool: - */ - list_add(&page->list, &atomic_emergency_pages); - nr_atomic_emergency_pages++; - } - - if (nr_atomic_emergency_bhs >= BLK_ATOMIC_BOUNCE_ENTRIES) { -#ifdef HIGHMEM_DEBUG - /* Don't clobber the constructed slab cache */ - init_waitqueue_head(&bh->b_wait); -#endif - kmem_cache_free(bh_cachep, bh); - } else { - /* - * Ditto in the bh case, here we abuse b_inode_buffers: - */ - list_add(&bh->b_inode_buffers, &atomic_emergency_bhs); - nr_atomic_emergency_bhs++; - } - BUG_ON(nr_atomic_emergency_pages != nr_atomic_emergency_bhs); - if (nr_atomic_emergency_pages >= BLK_ATOMIC_BOUNCE_ENTRIES) - atomic_emergency_owner = 0; + /* + * Ditto in the bh case, here we abuse b_inode_buffers: + */ + list_add(&bh->b_inode_buffers, &emergency_bhs); + nr_emergency_bhs++; } spin_unlock_irqrestore(&emergency_lock, flags); } @@ -351,24 +311,6 @@ static __init int init_emergency_pool(vo list_add(&bh->b_inode_buffers, &emergency_bhs); nr_emergency_bhs++; } - while (nr_atomic_emergency_pages < BLK_ATOMIC_BOUNCE_ENTRIES) { - struct page * page = alloc_page(GFP_ATOMIC); - if (!page) { - printk("couldn't refill highmem emergency pages"); - break; - } - list_add(&page->list, &atomic_emergency_pages); - nr_atomic_emergency_pages++; - } - while (nr_atomic_emergency_bhs < BLK_ATOMIC_BOUNCE_ENTRIES) { - struct buffer_head * bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC); - if (!bh) { - printk("couldn't refill highmem emergency bhs"); - break; - } - list_add(&bh->b_inode_buffers, &atomic_emergency_bhs); - nr_atomic_emergency_bhs++; - } spin_unlock_irq(&emergency_lock); printk("allocated %d pages and %d bhs reserved for the highmem bounces\n", nr_emergency_pages, nr_emergency_bhs); @@ -392,7 +334,7 @@ static void bounce_end_io_read (struct b bounce_end_io(bh, uptodate); } -struct page *alloc_bounce_page (int atomic) +struct page *alloc_bounce_page (void) { struct list_head *tmp; struct page *page = NULL; @@ -424,30 +366,17 @@ repeat_alloc: /* * Try to allocate from the emergency pool. */ + tmp = &emergency_pages; spin_lock_irq(&emergency_lock); - if (!atomic) { - tmp = &emergency_pages; - if (!list_empty(tmp)) { - page = list_entry(tmp->next, struct page, list); - list_del(tmp->next); - nr_emergency_pages--; - } - } else { - tmp = &atomic_emergency_pages; - if ((!atomic_emergency_owner || atomic_emergency_owner == atomic) && - !list_empty(tmp)) { - page = list_entry(tmp->next, struct page, list); - list_del(tmp->next); - nr_atomic_emergency_pages--; - atomic_emergency_owner = atomic; - } + if (!list_empty(tmp)) { + page = list_entry(tmp->next, struct page, list); + list_del(tmp->next); + nr_emergency_pages--; } spin_unlock_irq(&emergency_lock); if (page) return page; - if (atomic) - blk_refile_atomic_queue(atomic); /* we need to wait I/O completion */ run_task_queue(&tq_disk); @@ -456,7 +385,7 @@ repeat_alloc: goto repeat_alloc; } -struct buffer_head *alloc_bounce_bh (int atomic) +struct buffer_head *alloc_bounce_bh (void) { struct list_head *tmp; struct buffer_head *bh = NULL; @@ -488,31 +417,17 @@ repeat_alloc: /* * Try to allocate from the emergency pool. */ + tmp = &emergency_bhs; spin_lock_irq(&emergency_lock); - if (!atomic) { - tmp = &emergency_bhs; - if (!list_empty(tmp)) { - bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers); - list_del(tmp->next); - nr_emergency_bhs--; - } - } else { - tmp = &atomic_emergency_bhs; - if ((!atomic_emergency_owner || atomic_emergency_owner == atomic) && - !list_empty(tmp)) { - bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers); - list_del(tmp->next); - nr_atomic_emergency_bhs--; - atomic_emergency_owner = atomic; - } - + if (!list_empty(tmp)) { + bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers); + list_del(tmp->next); + nr_emergency_bhs--; } spin_unlock_irq(&emergency_lock); if (bh) return bh; - if (atomic) - blk_refile_atomic_queue(atomic); /* we need to wait I/O completion */ run_task_queue(&tq_disk); @@ -529,14 +444,14 @@ struct buffer_head * create_bounce(int r if (!PageHighMem(bh_orig->b_page)) return bh_orig; - bh = alloc_bounce_bh(bh_elv_seq(bh_orig)); + bh = alloc_bounce_bh(); /* * This is wasteful for 1k buffers, but this is a stopgap measure * and we are being ineffective anyway. This approach simplifies * things immensly. On boxes with more than 4GB RAM this should * not be an issue anyway. */ - page = alloc_bounce_page(bh_elv_seq(bh_orig)); + page = alloc_bounce_page(); set_bh_page(bh, page, 0); @@ -564,7 +479,6 @@ struct buffer_head * create_bounce(int r bh->b_end_io = bounce_end_io_read; bh->b_private = (void *)bh_orig; bh->b_rsector = bh_orig->b_rsector; - bh_elv_seq(bh) = bh_elv_seq(bh_orig); #ifdef HIGHMEM_DEBUG memset(&bh->b_wait, -1, sizeof(bh->b_wait)); #endif diff -Naurp linux-2.4.20-wolk4.0s/mm/mmap.c linux-2.4.20-wolk4.1-fullkernel/mm/mmap.c --- linux-2.4.20-wolk4.0s/mm/mmap.c 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/mm/mmap.c 2003-05-16 14:21:51.000000000 +0200 @@ -390,6 +390,8 @@ static int vma_merge(struct mm_struct * spin_unlock(lock); if (need_unlock) unlock_vma_mappings(next); + if (file) + fput(file); mm->map_count--; kmem_cache_free(vm_area_cachep, next); diff -Naurp linux-2.4.20-wolk4.0s/mm/page_alloc.c linux-2.4.20-wolk4.1-fullkernel/mm/page_alloc.c --- linux-2.4.20-wolk4.0s/mm/page_alloc.c 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/mm/page_alloc.c 2003-05-16 14:21:51.000000000 +0200 @@ -531,7 +531,7 @@ try_again: /* * Oh well, we didn't succeed. */ - if (!(current->flags & PF_MEMALLOC)) { + if (!(current->flags & (PF_MEMALLOC|PF_MEMDIE))) { /* * Are we dealing with a higher order allocation? * diff -Naurp linux-2.4.20-wolk4.0s/mm/vmscan.c linux-2.4.20-wolk4.1-fullkernel/mm/vmscan.c --- linux-2.4.20-wolk4.0s/mm/vmscan.c 2003-05-15 21:52:48.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/mm/vmscan.c 2003-05-16 13:43:23.000000000 +0200 @@ -556,9 +556,6 @@ int refill_inactive_zone(struct zone_str continue; } - if (page->buffers) - try_to_release_page(page, 0); - /* * Do aging on the pages. */ @@ -903,16 +900,6 @@ static int do_try_to_free_pages(unsigned */ ret += kmem_cache_reap(gfp_mask); - if ((gfp_mask & __GFP_WAIT) && (shrink_buffer_cache() > 16)) - ret += kmem_cache_reap(gfp_mask); - - /* - * Hmm.. Cache shrink failed - time to kill something? - * Mhwahahhaha! This is the part I really like. Giggle. - */ - if (!ret && free_low(ANY_ZONE) && (gfp_mask&__GFP_WAIT)) - out_of_memory(); - return ret; } @@ -937,7 +924,7 @@ static int do_try_to_free_pages_kswapd(u for_each_zone(zone) { int worktodo = max(free_low(zone), BATCH_WORK_AMOUNT); if (need_rebalance_laundry(zone)) - rebalance_laundry_zone(zone, worktodo, 0); + ret += rebalance_laundry_zone(zone, worktodo, 0); if (need_rebalance_dirty(zone)) rebalance_dirty_zone(zone, 4 * worktodo, gfp_mask); @@ -963,9 +950,11 @@ static int do_try_to_free_pages_kswapd(u refill_freelist(); - /* Start IO when needed. */ - if (free_plenty(ALL_ZONES) > 0 || free_low(ANY_ZONE) > 0) - run_task_queue(&tq_disk); + /* + * Mhwahahhaha! This is the part I really like. Giggle. + */ + if (!ret && free_min(ANY_ZONE)) + out_of_memory(); return ret; } diff -Naurp linux-2.4.20-wolk4.0s/net/ipsec/Makefile.inc linux-2.4.20-wolk4.1-fullkernel/net/ipsec/Makefile.inc --- linux-2.4.20-wolk4.0s/net/ipsec/Makefile.inc 2003-05-15 21:52:49.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/net/ipsec/Makefile.inc 2003-05-16 13:36:25.000000000 +0200 @@ -11,7 +11,7 @@ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # -# RCSID $Id: Makefile.inc,v 1.2 2002/11/05 03:00:10 ken Exp $ +# RCSID $Id: Makefile.inc,v 1.3 2003/04/26 02:27:17 ken Exp $ @@ -42,7 +42,7 @@ SHELL=/bin/sh # reside rather than where install puts them, are exempt from this.) # The prefixing is done in this file, so as to have central control over # it; DESTDIR itself should never appear in any other Makefile. -DESTDIR= +DESTDIR?= # "local" part of tree, used in building other pathnames INC_USRLOCAL=/usr/local @@ -92,7 +92,7 @@ RCDIR=$(DESTDIR)$(FINALRCDIR) ### kernel pathnames # Kernel location: where patches are inserted, where kernel builds are done. -KERNELSRC=/usr/src/linux +KERNELSRC?=/usr/src/linux # things whose existence indicates what kernel version we have DIRIN22=$(KERNELSRC)/net/netlink diff -Naurp linux-2.4.20-wolk4.0s/net/ipsec/Makefile.ver linux-2.4.20-wolk4.1-fullkernel/net/ipsec/Makefile.ver --- linux-2.4.20-wolk4.0s/net/ipsec/Makefile.ver 2003-05-15 21:52:49.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/net/ipsec/Makefile.ver 2003-05-16 13:36:25.000000000 +0200 @@ -1 +1 @@ -IPSECVERSION=super-freeswan-1.99.6.2 +IPSECVERSION=super-freeswan-1.99.7 diff -Naurp linux-2.4.20-wolk4.0s/net/ipsec/alg/ipsec_alg_blowfish.c linux-2.4.20-wolk4.1-fullkernel/net/ipsec/alg/ipsec_alg_blowfish.c --- linux-2.4.20-wolk4.0s/net/ipsec/alg/ipsec_alg_blowfish.c 2003-05-15 21:52:49.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/net/ipsec/alg/ipsec_alg_blowfish.c 2003-05-16 13:36:25.000000000 +0200 @@ -3,7 +3,7 @@ * * Author: JuanJo Ciarlante * - * $Id: ipsec_alg_blowfish.c,v 1.4 2003/02/07 13:14:25 ken Exp $ + * $Id: ipsec_alg_blowfish.c,v 1.5 2003/05/08 13:48:39 jjo Exp $ * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -49,6 +49,7 @@ #define ESP_BLOWFISH_KEY_SZ_MIN 12 /* 96 bit secret key min */ #define ESP_BLOWFISH_KEY_SZ 16 /* 128 bit secret key */ +#define ESP_BLOWFISH_KEY_SZ_MAX 56 /* 448 bit secret key */ #define ESP_BLOWFISH_CBC_BLK_LEN 8 /* block size */ MODULE_AUTHOR("JuanJo Ciarlante "); @@ -94,7 +95,7 @@ static struct ipsec_alg_enc ipsec_alg_BL ixt_name: "blowfish", ixt_blocksize: ESP_BLOWFISH_CBC_BLK_LEN, ixt_keyminbits: ESP_BLOWFISH_KEY_SZ_MIN*8, - ixt_keymaxbits: ESP_BLOWFISH_KEY_SZ*8, + ixt_keymaxbits: ESP_BLOWFISH_KEY_SZ_MAX*8, ixt_e_keylen: ESP_BLOWFISH_KEY_SZ, ixt_e_ctx_size: sizeof(blowfish_context), ixt_e_set_key: _blowfish_set_key, diff -Naurp linux-2.4.20-wolk4.0s/net/ipv4/netfilter/ip_conntrack_core.c linux-2.4.20-wolk4.1-fullkernel/net/ipv4/netfilter/ip_conntrack_core.c --- linux-2.4.20-wolk4.0s/net/ipv4/netfilter/ip_conntrack_core.c 2003-05-15 21:52:51.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/net/ipv4/netfilter/ip_conntrack_core.c 2003-05-16 14:00:34.000000000 +0200 @@ -31,6 +31,8 @@ #include #include #include +#include +#include /* For ERR_PTR(). Yeah, I know... --RR */ #include @@ -47,9 +49,6 @@ #define IP_CONNTRACK_VERSION "2.1" -/* Multiplier used to avoid hash clashes - should be a prime, and better not 2. */ -#define HASH_CONNTRACK_SRC_MULTIPLIER 7 - #if 0 #define DEBUGP printk #else @@ -110,21 +109,19 @@ ip_conntrack_put(struct ip_conntrack *ct nf_conntrack_put(&ct->infos[0]); } -static inline u_int32_t +static int ip_conntrack_hash_rnd_initted; +static unsigned int ip_conntrack_hash_rnd; + +static u_int32_t hash_conntrack(const struct ip_conntrack_tuple *tuple) { #if 0 dump_tuple(tuple); #endif - /* ntohl because more differences in low bits. */ - /* ports must be outside ntohl or else they will add to high bits. */ - /* To ensure that halves of the same connection don't hash - clash, we use a multiplier for the src port. */ - return (ntohl(tuple->src.ip + tuple->dst.ip) - + HASH_CONNTRACK_SRC_MULTIPLIER * ntohs (tuple->src.u.all) - + ntohs (tuple->dst.u.all) - + tuple->dst.protonum) - % ip_conntrack_htable_size; + return (jhash_3words(tuple->src.ip, + (tuple->dst.ip ^ tuple->dst.protonum), + (tuple->src.u.all | (tuple->dst.u.all << 16)), + ip_conntrack_hash_rnd) % ip_conntrack_htable_size); } inline int @@ -643,11 +640,16 @@ init_conntrack(const struct ip_conntrack { struct ip_conntrack *conntrack; struct ip_conntrack_tuple repl_tuple; - size_t hash, repl_hash; + size_t hash; struct ip_conntrack_expect *expected; int i; static unsigned int drop_next = 0; + if (!ip_conntrack_hash_rnd_initted) { + get_random_bytes(&ip_conntrack_hash_rnd, 4); + ip_conntrack_hash_rnd_initted = 1; + } + hash = hash_conntrack(tuple); if (ip_conntrack_max && @@ -671,7 +673,6 @@ init_conntrack(const struct ip_conntrack DEBUGP("Can't invert tuple.\n"); return NULL; } - repl_hash = hash_conntrack(&repl_tuple); conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { @@ -1415,7 +1416,7 @@ int __init ip_conntrack_init(void) ip_conntrack_max = 8 * ip_conntrack_htable_size; printk("ip_conntrack version %s (%u buckets, %d max)" - " - %d bytes per conntrack\n", IP_CONNTRACK_VERSION, + " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION, ip_conntrack_htable_size, ip_conntrack_max, sizeof(struct ip_conntrack)); diff -Naurp linux-2.4.20-wolk4.0s/net/ipv4/route.c linux-2.4.20-wolk4.1-fullkernel/net/ipv4/route.c --- linux-2.4.20-wolk4.0s/net/ipv4/route.c 2003-05-15 21:52:52.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/net/ipv4/route.c 2003-05-16 14:00:34.000000000 +0200 @@ -85,6 +85,7 @@ #include #include #include +#include #include #include #include @@ -117,13 +118,14 @@ int ip_rt_gc_elasticity = 8; int ip_rt_mtu_expires = 10 * 60 * HZ; int ip_rt_min_pmtu = 512 + 20 + 20; int ip_rt_min_advmss = 256; - +int ip_rt_secret_interval = 10 * 60 * HZ; static unsigned long rt_deadline; #define RTprint(a...) printk(KERN_DEBUG a) static struct timer_list rt_flush_timer; static struct timer_list rt_periodic_timer; +static struct timer_list rt_secret_timer; /* * Interface to generic destination cache. @@ -194,19 +196,17 @@ struct rt_hash_bucket { static struct rt_hash_bucket *rt_hash_table; static unsigned rt_hash_mask; static int rt_hash_log; +static unsigned int rt_hash_rnd; struct rt_cache_stat rt_cache_stat[NR_CPUS]; static int rt_intern_hash(unsigned hash, struct rtable *rth, struct rtable **res); -static __inline__ unsigned rt_hash_code(u32 daddr, u32 saddr, u8 tos) +static unsigned int rt_hash_code(u32 daddr, u32 saddr, u8 tos) { - unsigned hash = ((daddr & 0xF0F0F0F0) >> 4) | - ((daddr & 0x0F0F0F0F) << 4); - hash ^= saddr ^ tos; - hash ^= (hash >> 16); - return (hash ^ (hash >> 8)) & rt_hash_mask; + return (jhash_3words(daddr, saddr, (u32) tos, rt_hash_rnd) + & rt_hash_mask); } static int rt_cache_get_info(char *buffer, char **start, off_t offset, @@ -479,6 +479,15 @@ void rt_cache_flush(int delay) spin_unlock_bh(&rt_flush_lock); } +static void rt_secret_rebuild(unsigned long dummy) +{ + unsigned long now = jiffies; + + get_random_bytes(&rt_hash_rnd, 4); + rt_cache_flush(0); + mod_timer(&rt_secret_timer, now + ip_rt_secret_interval); +} + /* Short description of GC goals. @@ -2454,6 +2463,15 @@ ctl_table ipv4_route_table[] = { mode: 0644, proc_handler: &proc_dointvec, }, + { + ctl_name: NET_IPV4_ROUTE_SECRET_INTERVAL, + procname: "secret_interval", + data: &ip_rt_secret_interval, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec_jiffies, + strategy: &sysctl_jiffies, + }, { 0 } }; #endif @@ -2464,7 +2482,7 @@ struct ip_rt_acct *ip_rt_acct; /* This code sucks. But you should have seen it before! --RR */ /* IP route accounting ptr for this logical cpu number. */ -#define IP_RT_ACCT_CPU(i) ((u8*)ip_rt_acct + cpu_logical_map(i) * 256) +#define IP_RT_ACCT_CPU(i) (ip_rt_acct + cpu_logical_map(i) * 256) static int ip_rt_acct_read(char *buffer, char **start, off_t offset, int length, int *eof, void *data) @@ -2484,22 +2502,27 @@ static int ip_rt_acct_read(char *buffer, *eof = 1; } - *start = buffer; + offset /= sizeof(u32); + + if (length > 0) { + u32 *src = ((u32 *) IP_RT_ACCT_CPU(0)) + offset; + u32 *dst = (u32 *) buffer; - if (length > 0) - { /* Copy first cpu. */ - memcpy(buffer, IP_RT_ACCT_CPU(0) + offset, length); + *start = buffer; + memcpy(dst, src, length); /* Add the other cpus in, one int at a time */ for (i = 1; i < smp_num_cpus; i++) { unsigned int j; + + src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset; + for (j = 0; j < length/4; j++) - ((u32*)buffer)[j] += ((u32*)(IP_RT_ACCT_CPU(i) + offset))[j]; + dst[j] += src[j]; } - return length; } - return 0; + return length; } #endif @@ -2507,6 +2530,9 @@ void __init ip_rt_init(void) { int i, order, goal; + rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ + (jiffies ^ (jiffies >> 7))); + #ifdef CONFIG_NET_CLS_ROUTE for (order = 0; (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++) @@ -2563,6 +2589,7 @@ void __init ip_rt_init(void) rt_flush_timer.function = rt_run_flush; rt_periodic_timer.function = rt_check_expire; + rt_secret_timer.function = rt_secret_rebuild; /* All the timers, started at system startup tend to synchronize. Perturb it a bit. @@ -2571,6 +2598,10 @@ void __init ip_rt_init(void) ip_rt_gc_interval; add_timer(&rt_periodic_timer); + rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + + ip_rt_secret_interval; + add_timer(&rt_secret_timer); + proc_net_create ("rt_cache", 0, rt_cache_get_info); proc_net_create ("rt_cache_stat", 0, rt_cache_stat_get_info); #ifdef CONFIG_NET_CLS_ROUTE diff -Naurp linux-2.4.20-wolk4.0s/net/ipv4/tcp.c linux-2.4.20-wolk4.1-fullkernel/net/ipv4/tcp.c --- linux-2.4.20-wolk4.0s/net/ipv4/tcp.c 2003-05-15 21:52:52.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/net/ipv4/tcp.c 2003-05-16 14:00:24.000000000 +0200 @@ -252,6 +252,7 @@ #include #include #include +#include #include #include @@ -553,6 +554,7 @@ int tcp_listen_start(struct sock *sk) for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++) if ((1<max_qlen_log) >= sysctl_max_syn_backlog) break; + get_random_bytes(&lopt->hash_rnd, 4); write_lock_bh(&tp->syn_wait_lock); tp->listen_opt = lopt; @@ -1333,7 +1335,7 @@ void cleanup_rbuf(struct sock *sk, int c __u32 rcv_window_now = tcp_receive_window(tp); /* Optimize, __tcp_select_window() is not cheap. */ - if (2*rcv_window_now <= tp->window_clamp) { + if (2*rcv_window_now < tp->window_clamp) { __u32 new_window = __tcp_select_window(sk); /* Send ACK now, if this read freed lots of space @@ -1341,7 +1343,7 @@ void cleanup_rbuf(struct sock *sk, int c * We can advertise it now, if it is not less than current one. * "Lots" means "at least twice" here. */ - if(new_window && new_window >= 2*rcv_window_now) + if(new_window && new_window > 2*rcv_window_now) time_to_ack = 1; } } diff -Naurp linux-2.4.20-wolk4.0s/net/ipv4/tcp_ipv4.c linux-2.4.20-wolk4.1-fullkernel/net/ipv4/tcp_ipv4.c --- linux-2.4.20-wolk4.0s/net/ipv4/tcp_ipv4.c 2003-05-15 21:52:52.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/net/ipv4/tcp_ipv4.c 2003-05-16 14:00:34.000000000 +0200 @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -1006,12 +1007,9 @@ static __inline__ int tcp_v4_iif(struct return ((struct rtable*)skb->dst)->rt_iif; } -static __inline__ unsigned tcp_v4_synq_hash(u32 raddr, u16 rport) +static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd) { - unsigned h = raddr ^ rport; - h ^= h>>16; - h ^= h>>8; - return h&(TCP_SYNQ_HSIZE-1); + return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); } static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, @@ -1022,7 +1020,7 @@ static struct open_request *tcp_v4_searc struct tcp_listen_opt *lopt = tp->listen_opt; struct open_request *req, **prev; - for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport)]; + for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; (req = *prev) != NULL; prev = &req->dl_next) { if (req->rmt_port == rport && @@ -1042,7 +1040,7 @@ static void tcp_v4_synq_add(struct sock { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct tcp_listen_opt *lopt = tp->listen_opt; - unsigned h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port); + u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd); req->expires = jiffies + TCP_TIMEOUT_INIT; req->retrans = 0; diff -Naurp linux-2.4.20-wolk4.0s/net/ipv4/tcp_minisocks.c linux-2.4.20-wolk4.1-fullkernel/net/ipv4/tcp_minisocks.c --- linux-2.4.20-wolk4.0s/net/ipv4/tcp_minisocks.c 2003-05-15 21:52:52.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/net/ipv4/tcp_minisocks.c 2003-05-16 13:54:44.000000000 +0200 @@ -452,6 +452,8 @@ static void SMP_TIMER_NAME(tcp_twkill)(u while((tw = tcp_tw_death_row[tcp_tw_death_row_slot]) != NULL) { tcp_tw_death_row[tcp_tw_death_row_slot] = tw->next_death; + if(tw->next_death) + tw->next_death->pprev_death = tw->pprev_death; tw->pprev_death = NULL; spin_unlock(&tw_death_lock); diff -Naurp linux-2.4.20-wolk4.0s/net/ipv6/tcp_ipv6.c linux-2.4.20-wolk4.1-fullkernel/net/ipv6/tcp_ipv6.c --- linux-2.4.20-wolk4.0s/net/ipv6/tcp_ipv6.c 2003-05-15 21:52:52.000000000 +0200 +++ linux-2.4.20-wolk4.1-fullkernel/net/ipv6/tcp_ipv6.c 2003-05-16 14:00:34.000000000 +0200 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -357,12 +358,11 @@ __inline__ struct sock *tcp_v6_lookup(st * Open request hash tables. */ -static __inline__ unsigned tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport) +static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd) { - unsigned h = raddr->s6_addr32[3] ^ rport; - h ^= h>>16; - h ^= h>>8; - return h&(TCP_SYNQ_HSIZE-1); + return (jhash_3words(raddr->s6_addr32[0] ^ raddr->s6_addr32[1], + raddr->s6_addr32[2] ^ raddr->s6_addr32[3], + (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); } static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, @@ -375,7 +375,7 @@ static struct open_request *tcp_v6_searc struct tcp_listen_opt *lopt = tp->listen_opt; struct open_request *req, **prev; - for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport)]; + for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; (req = *prev) != NULL; prev = &req->dl_next) { if (req->rmt_port == rport && @@ -1121,7 +1121,7 @@ static void tcp_v6_synq_add(struct sock { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct tcp_listen_opt *lopt = tp->listen_opt; - unsigned h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port); + u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd); req->sk = NULL; req->expires = jiffies + TCP_TIMEOUT_INIT; diff -Naurp linux-2.4.20-wolk4.0s/userspace-programs/cap/cap.c linux-2.4.20-wolk4.1-fullkernel/userspace-programs/cap/cap.c --- linux-2.4.20-wolk4.0s/userspace-programs/cap/cap.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.4.20-wolk4.1-fullkernel/userspace-programs/cap/cap.c 2003-05-16 14:17:02.000000000 +0200 @@ -0,0 +1,84 @@ +/* $Id: cap.in,v 1.3 2002/07/08 11:14:33 karol Exp $ + * cap - interface for CPU patch; setting CPU limit + * + * Copyright (c) 2002 Karol 'Broege' Golab, TLS-Technologies + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include + +/* ugly hack - syscall number is not constant so it will be set by make */ +#ifndef __NR_setcpucap +#define __NR_setcpucap 260 +#endif + +_syscall3(int, setcpucap, int, which, int, who, int, cap); + +void show_help(void) +{ + printf("Usage: cap \n"); + + exit(0); +} + + +void show_version(void) +{ + printf("This is cap - CPU patch utility, $version: $\n\n"); + printf("Written by Karol 'Broege' Golab\n"); + printf("Copyright (c) 2002 TLS-Technologies\n\n"); + printf("This is free software; see the source for copying conditions.\n"); + printf("There is NO warranty; not even for MERCHANTABILITY\n"); + printf("or FITNESS FOR A PARTICULAR PURPOSE.\n"); + + exit(0); +} + + +void error(const char *msg) +{ + fprintf(stderr, "%s\n", msg); + + exit(1); +} + + +int main(int argc, const char *argv[]) +{ + int pid, perc; + int ret; + char *bad; + + if (argc<3) show_help(); + if (! strcmp(argv[1], "--version")) show_version(); + if (argc!=3) show_help(); + + pid=strtol(argv[1], &bad, 10); + if (! argv[1][0] || bad[0]) error("Cannot parse argument 1"); + perc=strtol(argv[2], &bad, 10); + if (! argv[2][0] || bad[0]) error("Cannot parse argument 2"); + + ret=setcpucap(PRIO_PROCESS, pid, perc); + if (ret!=0) error("Priority was not changed"); + return 0; +} + +