diff -Naurp linux-2.4.20-wolk4.2-fullkernel/Documentation/Configure.help linux-2.4.20-wolk4.3-fullkernel/Documentation/Configure.help --- linux-2.4.20-wolk4.2-fullkernel/Documentation/Configure.help 2003-06-28 09:23:17.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/Documentation/Configure.help 2003-07-03 21:15:07.000000000 +0200 @@ -1388,6 +1388,20 @@ CONFIG_X86_UP_APIC If you have a system with several CPUs, you do not need to say Y here: the local APIC will be used automatically. +Do not report APIC errors on CPU(s) +CONFIG_X86_UP_APIC_ERRORS + If you see annoying messages in your dmesg|syslog like: + + kernel: APIC error on CPU0: 08(00) + kernel: APIC error on CPU1: 02(00) + kernel: APIC error on CPU1: 04(00) + + but your system and the kernel are running absolutely fine, + select this option so you won't see messages like above + filling up your logs. + + If unsure, say N. + Preemptible Kernel CONFIG_PREEMPT This option reduces the latency of the kernel when reacting to @@ -2241,7 +2255,7 @@ CONFIG_BLK_DEV_ELEVATOR_LOWLAT This decreases throughput slightly (~20%), but this is irrelevant for most desktop usage. - --------------------------------------------------------------- + --------------------------------------------------------------- For all of you unbelievers out there: This setting is the SAME as -ck uses with its 'Desktop Tuning patches'. So if you want to compare -ck with -wolk, select this option. Otherwise you'll @@ -2251,7 +2265,7 @@ CONFIG_BLK_DEV_ELEVATOR_LOWLAT For the interested ones: ------------------------ - nr_requests: 32 + nr_requests: 16 read_passovers: 128 write_passovers: 256 max_bomb_segments: 2 @@ -6383,7 +6397,7 @@ CONFIG_SCHED_SERVER max_sleep_avg = 2 * HZ; starvation_limit = 2 * HZ; - vm.bdflush = 50 500 0 0 500 3000 60 20 0 (if HZ == 100) + vm.bdflush = 50 500 0 0 500 3000 80 50 0 (if HZ == 100) If unsure or you don't select this, the Server Scheduler will be used. If you select none of the Scheduler Tweaks, the Server @@ -6409,7 +6423,7 @@ CONFIG_SCHED_DESKTOP max_sleep_avg = 2 * HZ; starvation_limit = 2 * HZ; - vm.bdflush = 30 500 0 0 500 3000 60 20 0 (if HZ == 100) + vm.bdflush = 30 500 0 0 500 3000 80 30 0 (if HZ == 100) If unsure or you don't select this, the Server Scheduler will be used. If you select none of the Scheduler Tweaks, the Server @@ -7814,11 +7828,6 @@ CONFIG_IP_ROUTE_VERBOSE handled by the klogd daemon which is responsible for kernel messages ("man klogd"). -Large routing tables -CONFIG_IP_ROUTE_LARGE_TABLES - If you have routing zones that grow to more than about 64 entries, - you may want to say Y here to speed up the routing process. - Fast network address translation CONFIG_IP_ROUTE_NAT If you say Y here, your router will be able to modify source and diff -Naurp linux-2.4.20-wolk4.2-fullkernel/Makefile linux-2.4.20-wolk4.3-fullkernel/Makefile --- linux-2.4.20-wolk4.2-fullkernel/Makefile 2003-06-28 09:23:17.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/Makefile 2003-06-23 17:24:50.000000000 +0200 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 20 -EXTRAVERSION = -wolk4.2s +EXTRAVERSION = -wolk4.3s KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -Naurp linux-2.4.20-wolk4.2-fullkernel/TODO linux-2.4.20-wolk4.3-fullkernel/TODO --- linux-2.4.20-wolk4.2-fullkernel/TODO 2003-06-28 09:23:17.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/TODO 1970-01-01 01:00:00.000000000 +0100 @@ -1,7 +0,0 @@ - in the O(1) page launder code pages being swapped out need to go in this order: -<-- matt_ has quit (Client Quit) - inactive dirty ----> inactive laundry ----> inactive clean - meaning that when IO is scheduled they go to the laundry list - and when IO is finished, they need to go to the clean list - this also means that the laundry balancing code needs to be able to deal properly with pages on which IO has finished - I have a feeling it's not quite correct yet ... diff -Naurp linux-2.4.20-wolk4.2-fullkernel/VERSION linux-2.4.20-wolk4.3-fullkernel/VERSION --- linux-2.4.20-wolk4.2-fullkernel/VERSION 2003-06-28 09:23:17.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/VERSION 2003-06-23 17:24:46.000000000 +0200 @@ -1 +1 @@ -WOLK v4.2s "Server Edition" FINAL, based on 2.4.20 +WOLK v4.3s "Server Edition" FINAL, based on 2.4.20 diff -Naurp linux-2.4.20-wolk4.2-fullkernel/WOLK-CHANGELOG linux-2.4.20-wolk4.3-fullkernel/WOLK-CHANGELOG --- linux-2.4.20-wolk4.2-fullkernel/WOLK-CHANGELOG 2003-06-28 09:23:17.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/WOLK-CHANGELOG 2003-07-03 20:42:00.000000000 +0200 @@ -1,3 +1,55 @@ +Changelog from v4.2s -> v4.3s +----------------------------- +o add: allows O_DIRECT writes I/O to run in parallel (not serialized + anymore by the i_sem). +o add: extended OOM killer functions via /proc + Max childs per parent oom-killed before we kill the parent + tunable via "/proc/sys/vm/oom_parent_expire" + Min numbers of seconds before we forget about parents + tunable via "/proc/sys/vm/oom_parent_max" +o add: workaround for annoying "APIC error on CPUx" error messages +o fixed: large routing table problems with new hash algo (jenkins) +o fixed: unexpected IO-APIC +o fixed: ext3fs htree sets the index too early +o fixed: dmesg cleanup: printf CPU inconsistency +o fixed: dnotify read/writev +o fixed: buffer_insert_list should use list_add_tail +o fixed: cleanup kmem_cache_reap() +o fixed: smp race condition in submit_bh (though nearly impossible to + trigger) and put some BUG_ON to verify nobody calls writepage + w/o a reference on the page (SetPageUptodate needs it too) +o fixed: grsec: pci bios problem on smp w/ kernexec +o fixed: grsec: oops on init if we're out of memory +o fixed: RMAP: sequential writeout performance tuning +o fixed: RMAP: rate limit slab cache pruning, to reduce IPI load + on SMP systems +o fixed: RMAP: don't cache-align buffer heads, in order to save space +o fixed: RMAP: shrink kiobuf slab when reclaiming buffer heads +o fixed: RMAP: if we reset the zone size due to highmem being + all IO space, we shouldn't BUG() when we see such + zones in the page allocator +o fixed: RMAP: corner case where all highmem pages are in an + IO window and not released into the free list + at bootup time +o fixed: RMAP: compile warnings +o fixed: RMAP: call oom killer only when allocations fail +o fixed: RMAP: only count the real freeing of pages for the + OOM killer +o fixed: RMAP: make bdflush writeout smoother +o fixed: RMAP: logic inversion in inode reclaim +o fixed: RMAP: only reclaim bufferheads on highmem machines, + and only when the bufferheads take more than + 10% of the spage used by pageable low memory. +o fixed: RMAP: make OOM killer less agressive +o fixed: RMAP: avoid expensive atomic pagetable operation +o fixed: RMAP: backport next_and_idx optimisation from 2.5 +o updated: AIC7xxx v6.2.36 / AIC79xx v1.3.10 (v2003-06-03) +o updated: Super FreeS/WAN v1.99.7.3 Final +o updated: CODA v6.0.1 +o updated: HTB v3.12 +o changed: converted /proc to seq operations + + Changelog from v4.1s -> v4.2s ----------------------------- o re-add: Scheduler Tunables (/proc/sys/sched): I need it! diff -Naurp linux-2.4.20-wolk4.2-fullkernel/WOLK-README linux-2.4.20-wolk4.3-fullkernel/WOLK-README --- linux-2.4.20-wolk4.2-fullkernel/WOLK-README 2003-06-28 09:23:17.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/WOLK-README 2003-07-02 21:31:03.000000000 +0200 @@ -1,4 +1,4 @@ -Kernel - patched - WOLK v4.2s - Base: Linux kernel 2.4.20 +Kernel - patched - WOLK v4.3s - Base: Linux kernel 2.4.20 located at http://sf.net/projects/wolk by Marc-Christian Petersen -------------------------------------------------------------------------- diff -Naurp linux-2.4.20-wolk4.2-fullkernel/WOLK-TODO linux-2.4.20-wolk4.3-fullkernel/WOLK-TODO --- linux-2.4.20-wolk4.2-fullkernel/WOLK-TODO 2003-05-13 13:54:45.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/WOLK-TODO 2003-07-02 21:31:13.000000000 +0200 @@ -1,4 +1,4 @@ -Todo for the next release (2.4.21-wolk5.0): +Todo for the next release (2.4.22-wolk5.0): ------------------------------------------- o < fill in something > diff -Naurp linux-2.4.20-wolk4.2-fullkernel/arch/i386/config.in linux-2.4.20-wolk4.3-fullkernel/arch/i386/config.in --- linux-2.4.20-wolk4.2-fullkernel/arch/i386/config.in 2003-05-08 11:11:44.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/arch/i386/config.in 2003-07-02 14:08:33.000000000 +0200 @@ -430,6 +430,7 @@ fi bool 'Symmetric multi-processing support' CONFIG_SMP if [ "$CONFIG_SMP" != "y" ]; then bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC + dep_bool ' Do not report APIC errors on CPU' CONFIG_X86_UP_APIC_ERRORS $CONFIG_X86_UP_APIC dep_bool 'IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC $CONFIG_X86_UP_APIC if [ "$CONFIG_X86_UP_APIC" = "y" ]; then define_bool CONFIG_X86_LOCAL_APIC y @@ -438,6 +439,7 @@ if [ "$CONFIG_SMP" != "y" ]; then define_bool CONFIG_X86_IO_APIC y fi else + dep_bool 'Do not report APIC errors on CPUs' CONFIG_X86_UP_APIC_ERRORS $CONFIG_X86_UP_APIC int 'Maximum number of CPUs (2-32)' CONFIG_NR_CPUS 2 bool 'Multi-node NUMA system support' CONFIG_X86_NUMA if [ "$CONFIG_X86_NUMA" = "y" ]; then diff -Naurp linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/apic.c linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/apic.c --- linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/apic.c 2003-05-03 02:37:14.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/apic.c 2003-07-02 13:22:11.000000000 +0200 @@ -383,10 +383,10 @@ void __init setup_local_APIC (void) value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; if (!smp_processor_id() && (pic_mode || !value)) { value = APIC_DM_EXTINT; - printk("enabled ExtINT on CPU#%d\n", smp_processor_id()); + printk("CPU#%d: enabled ExtINT.\n", smp_processor_id()); } else { value = APIC_DM_EXTINT | APIC_LVT_MASKED; - printk("masked ExtINT on CPU#%d\n", smp_processor_id()); + printk("CPU#%d: masked ExtINT.\n", smp_processor_id()); } apic_write_around(APIC_LVT0, value); @@ -406,7 +406,8 @@ void __init setup_local_APIC (void) if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); value = apic_read(APIC_ESR); - printk("ESR value before enabling vector: %08lx\n", value); + printk("CPU#%d: ESR value before enabling vector: %08lx\n", + smp_processor_id(), value); value = ERROR_APIC_VECTOR; // enables sending errors apic_write_around(APIC_LVTERR, value); @@ -416,7 +417,8 @@ void __init setup_local_APIC (void) if (maxlvt > 3) apic_write(APIC_ESR, 0); value = apic_read(APIC_ESR); - printk("ESR value after enabling vector: %08lx\n", value); + printk("CPU#%d: ESR value after enabling vector: %08lx\n", + smp_processor_id(), value); } else { if (esr_disable) /* @@ -425,9 +427,9 @@ void __init setup_local_APIC (void) * ESR disabled - we can't do anything useful with the * errors anyway - mbligh */ - printk("Leaving ESR disabled.\n"); + printk("CPU#%d: Leaving ESR disabled.\n", smp_processor_id()); else - printk("No ESR for 82489DX.\n"); + printk("CPU#%d: No ESR for 82489DX.\n", smp_processor_id()); } if (nmi_watchdog == NMI_LOCAL_APIC) @@ -1156,8 +1158,10 @@ asmlinkage void smp_error_interrupt(void 6: Received illegal vector 7: Illegal register address */ +#ifndef CONFIG_X86_UP_APIC_ERRORS printk (KERN_INFO "APIC error on CPU%d: %02lx(%02lx)\n", smp_processor_id(), v , v1); +#endif } /* diff -Naurp linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/bluesmoke.c linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/bluesmoke.c --- linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/bluesmoke.c 2003-05-03 02:10:34.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/bluesmoke.c 2003-07-01 17:31:46.000000000 +0200 @@ -31,7 +31,7 @@ static void intel_machine_check(struct p if(mcgstl&(1<<0)) /* Recoverable ? */ recover=0; - printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); + printk(KERN_EMERG "CPU#%d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl); for(i=0;ithread.i387.fxsave, &buf->_fxsr_env[0], - sizeof(struct i387_fxsave_struct)); + err = __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0], + sizeof(struct i387_fxsave_struct) ); /* mxcsr bit 6 and 31-16 must be zero for security reasons */ tsk->thread.i387.fxsave.mxcsr &= 0xffbf; - if (err) - return 1; - return convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf ); + return err ? 1 : convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf ); } int restore_i387( struct _fpstate *buf ) diff -Naurp linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/io_apic.c linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/io_apic.c --- linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/io_apic.c 2003-06-28 09:23:17.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/io_apic.c 2003-07-01 17:31:46.000000000 +0200 @@ -859,6 +859,7 @@ void __init print_IO_APIC(void) struct IO_APIC_reg_00 reg_00; struct IO_APIC_reg_01 reg_01; struct IO_APIC_reg_02 reg_02; + struct IO_APIC_reg_03 reg_03; unsigned long flags; printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); @@ -879,9 +880,10 @@ void __init print_IO_APIC(void) *(int *)®_01 = io_apic_read(apic, 1); if (reg_01.version >= 0x10) *(int *)®_02 = io_apic_read(apic, 2); + if (reg_01.version >= 0x20) + *(int *)®_03 = io_apic_read(apic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); - printk("\n"); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)®_00); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.ID); @@ -916,13 +918,31 @@ void __init print_IO_APIC(void) if (reg_01.__reserved_1 || reg_01.__reserved_2) UNEXPECTED_IO_APIC(); - if (reg_01.version >= 0x10) { + /* + * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, + * but the value of reg_02 is read as the previous read register + * value, so ignore it if reg_02 == reg_01. + */ + if (reg_01.version >= 0x10 && *(int *)®_02 != *(int *)®_01) { printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)®_02); printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.arbitration); if (reg_02.__reserved_1 || reg_02.__reserved_2) UNEXPECTED_IO_APIC(); } + /* + * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 + * or reg_03, but the value of reg_0[23] is read as the previous read + * register value, so ignore it if reg_03 == reg_0[12]. + */ + if (reg_01.version >= 0x20 && *(int *)®_03 != *(int *)®_02 && + *(int *)®_03 != *(int *)®_01) { + printk(KERN_DEBUG ".... register #03: %08X\n", *(int *)®_03); + printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.boot_DT); + if (reg_03.__reserved_1) + UNEXPECTED_IO_APIC(); + } + printk(KERN_DEBUG ".... IRQ redirection table:\n"); printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" diff -Naurp linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/mpparse.c linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/mpparse.c --- linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/mpparse.c 2003-05-03 02:36:45.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/mpparse.c 2003-07-03 08:34:14.000000000 +0200 @@ -986,7 +986,14 @@ void __init mp_register_lapic ( processor.mpc_type = MP_PROCESSOR; processor.mpc_apicid = id; - processor.mpc_apicver = 0x10; /* TBD: lapic version */ + + /* + * mp_register_lapic_address() which is called before the + * current function does the fixmap of FIX_APIC_BASE. + * Read in the correct APIC version from there + */ + processor.mpc_apicver = apic_read(APIC_LVR); + processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | diff -Naurp linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/pci-irq.c linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/pci-irq.c --- linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/pci-irq.c 2003-05-03 02:37:12.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/pci-irq.c 2003-07-03 21:28:43.000000000 +0200 @@ -199,12 +199,27 @@ static int pirq_piix_set(struct pci_dev */ static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { - return read_config_nybble(router, 0x55, pirq); + u8 x; + + if ( pirq == 4 ) { + pci_read_config_byte(router, 0x57, &x); + return (x >> 4); + } else { + return read_config_nybble(router, 0x55, pirq); + } } static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - write_config_nybble(router, 0x55, pirq, irq); + u8 x; + + if ( pirq == 4 ) { + pci_read_config_byte(router, 0x57, &x); + x = (x & 0x0f) | (irq << 4); + pci_write_config_byte(router, 0x57, x); + } else { + write_config_nybble(router, 0x55, pirq, irq); + } return 1; } diff -Naurp linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/pci-pc.c linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/pci-pc.c --- linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/pci-pc.c 2003-05-03 02:37:22.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/pci-pc.c 2003-07-03 20:28:47.000000000 +0200 @@ -1482,18 +1482,6 @@ void __init pcibios_init(void) pcibios_sort(); #endif -#ifdef CONFIG_GRKERNSEC_PAX_KERNEXEC - /* PaX: nuke __FLAT_KERNEL_CS, no longer needed */ - gdt_table[1].a = 0UL; - gdt_table[1].b = 0UL; - -#ifdef CONFIG_GRKERNSEC_PAX_SEGMEXEC - gdt_table2[1].a = 0UL; - gdt_table2[1].b = 0UL; -#endif - -#endif - } char * __devinit pcibios_setup(char *str) diff -Naurp linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/setup.c linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/setup.c --- linux-2.4.20-wolk4.2-fullkernel/arch/i386/kernel/setup.c 2003-05-03 02:37:23.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/arch/i386/kernel/setup.c 2003-07-01 17:31:46.000000000 +0200 @@ -1381,8 +1381,8 @@ static void __init display_cacheinfo(str if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); + printk(KERN_INFO "CPU#%d: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", + smp_processor_id(), edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size=(ecx>>24)+(edx>>24); } @@ -1416,8 +1416,8 @@ static void __init display_cacheinfo(str c->x86_cache_size = l2size; - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); + printk(KERN_INFO "CPU#%d: L2 Cache: %dK (%d bytes/line)\n", + smp_processor_id(), l2size, ecx & 0xFF); } /* @@ -1472,9 +1472,9 @@ static int __init init_amd(struct cpuinf int n; void (*f_vide)(void); unsigned long d, d2; - - printk(KERN_INFO "AMD K6 stepping B detected - "); - + + printk(KERN_INFO "CPU#%d: AMD K6 stepping B detected - ", smp_processor_id()); + /* * It looks like AMD fixed the 2.6.2 bug and improved indirect * calls at the same time. @@ -1514,8 +1514,8 @@ static int __init init_amd(struct cpuinf wbinvd(); wrmsr(MSR_K6_WHCR, l, h); local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", - mbytes); + printk(KERN_INFO "CPU#%d: Enabling old style K6 write allocation for %d Mb\n", + smp_processor_id(), mbytes); } break; } @@ -1535,8 +1535,8 @@ static int __init init_amd(struct cpuinf wbinvd(); wrmsr(MSR_K6_WHCR, l, h); local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", - mbytes); + printk(KERN_INFO "CPU#%d: Enabling new style K6 write allocation for %d Mb\n", + smp_processor_id(), mbytes); } /* Set MTRR capability flag if appropriate */ @@ -1557,8 +1557,8 @@ static int __init init_amd(struct cpuinf if (c->x86_model >= 6 && c->x86_model <= 10) { if (!test_bit(X86_FEATURE_XMM, &c->x86_capability)) { - printk(KERN_INFO - "Enabling Disabled K7/SSE Support...\n"); + printk(KERN_INFO "CPU#%d: Enabling disabled K7/SSE Support.\n", + smp_processor_id()); rdmsr(MSR_K7_HWCR, l, h); l &= ~0x00008000; wrmsr(MSR_K7_HWCR, l, h); @@ -1574,8 +1574,8 @@ static int __init init_amd(struct cpuinf if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) { rdmsr(MSR_K7_CLK_CTL, l, h); if ((l & 0xfff00000) != 0x20000000) { - printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, - ((l & 0x000fffff)|0x20000000)); + printk ("CPU#%d: CLK_CTL MSR was %x. Reprogramming to %x\n", + smp_processor_id(), l, ((l & 0x000fffff)|0x20000000)); wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); } } @@ -1676,7 +1676,8 @@ static void __init check_cx686_slop(stru local_irq_restore(flags); if (ccr5 & 2) { /* possible wrong calibration done */ - printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); + printk(KERN_INFO "CPU#%d: Recalibrating delay loop with SLOP bit reset\n", + smp_processor_id()); calibrate_delay(); c->loops_per_jiffy = loops_per_jiffy; } @@ -1762,7 +1763,8 @@ static void __init init_cyrix(struct cpu VSA1 we work around however. */ - printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); + printk(KERN_INFO "CPU#%d: Working around Cyrix MediaGX virtual DMA bugs.\n", + smp_processor_id()); isa_dma_bridge_buggy = 2; #endif c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ @@ -2115,7 +2117,7 @@ static void __init init_centaur(struct c name="C6"; fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK; fcr_clr=DPDC; - printk(KERN_NOTICE "Disabling bugged TSC.\n"); + printk(KERN_NOTICE "CPU#%d: Disabling bugged TSC.\n", smp_processor_id()); clear_bit(X86_FEATURE_TSC, &c->x86_capability); #ifdef CONFIG_X86_OOSTORE winchip_create_optimal_mcr(); @@ -2189,10 +2191,12 @@ static void __init init_centaur(struct c newlo=(lo|fcr_set) & (~fcr_clr); if (newlo!=lo) { - printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo ); + printk(KERN_INFO "CPU#%d: Centaur FCR was 0x%X now 0x%X\n", + smp_processor_id(), lo, newlo ); wrmsr(MSR_IDT_FCR1, newlo, hi ); } else { - printk(KERN_INFO "Centaur FCR is 0x%X\n",lo); + printk(KERN_INFO "CPU#%d: Centaur FCR is 0x%X\n", + smp_processor_id(), lo); } /* Emulate MTRRs using Centaur's MCR. */ set_bit(X86_FEATURE_CENTAUR_MCR, &c->x86_capability); @@ -2244,7 +2248,8 @@ static void __init init_transmeta(struct max = cpuid_eax(0x80860000); if ( max >= 0x80860001 ) { cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); - printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", + printk(KERN_INFO "CPU#%d: Processor revision %u.%u.%u.%u, %u MHz\n", + smp_processor_id(), (cpu_rev >> 24) & 0xff, (cpu_rev >> 16) & 0xff, (cpu_rev >> 8) & 0xff, @@ -2253,7 +2258,8 @@ static void __init init_transmeta(struct } if ( max >= 0x80860002 ) { cpuid(0x80860002, &dummy, &cms_rev1, &cms_rev2, &dummy); - printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", + printk(KERN_INFO "CPU#%d: Code Morphing Software revision %u.%u.%u-%u-%u\n", + smp_processor_id(), (cms_rev1 >> 24) & 0xff, (cms_rev1 >> 16) & 0xff, (cms_rev1 >> 8) & 0xff, @@ -2282,7 +2288,7 @@ static void __init init_transmeta(struct (void *)&cpu_info[56], (void *)&cpu_info[60]); cpu_info[64] = '\0'; - printk(KERN_INFO "CPU: %s\n", cpu_info); + printk(KERN_INFO "CPU#%d: %s\n", smp_processor_id(), cpu_info); } /* Unhide possibly hidden capability flags */ @@ -2300,7 +2306,7 @@ static void __init init_transmeta(struct static void __init init_rise(struct cpuinfo_x86 *c) { - printk("CPU: Rise iDragon"); + printk("CPU#%d: Rise iDragon", smp_processor_id()); if (c->x86_model > 2) printk(" II"); printk("\n"); @@ -2398,7 +2404,8 @@ static void __init init_intel(struct cpu c->f00f_bug = 1; if (!f00f_workaround_enabled) { trap_init_f00f_bug(); - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + printk(KERN_NOTICE "CPU#%d: Intel Pentium with F0 0F bug - workaround enabled.\n", + smp_processor_id()); f00f_workaround_enabled = 1; } } @@ -2469,16 +2476,17 @@ static void __init init_intel(struct cpu l2 = cachesize_override; if ( trace ) - printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); + printk (KERN_INFO "CPU#%d: Trace cache: %dK uops", + smp_processor_id(), trace); else if ( l1i ) - printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); + printk (KERN_INFO "CPU#%d: L1 I cache: %dK", smp_processor_id(), l1i); if ( l1d ) printk(", L1 D cache: %dK\n", l1d); if ( l2 ) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + printk(KERN_INFO "CPU#%d: L2 cache: %dK\n", smp_processor_id(), l2); if ( l3 ) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + printk(KERN_INFO "CPU#%d: L3 cache: %dK\n", smp_processor_id(), l3); /* * This assumes the L3 cache is shared; it typically lives in @@ -2533,7 +2541,7 @@ static void __init init_intel(struct cpu smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + printk(KERN_INFO "CPU#%d: Hyper-Threading is disabled\n", cpu); } else if (smp_num_siblings > 1 ) { index_lsb = 0; index_msb = 31; @@ -2543,7 +2551,8 @@ static void __init init_intel(struct cpu */ #define NR_SIBLINGS 2 if (smp_num_siblings != NR_SIBLINGS) { - printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); + printk(KERN_WARNING "CPU#%d: Unsupported number of the siblings %d", + cpu, smp_num_siblings); smp_num_siblings = 1; return; } @@ -2562,8 +2571,8 @@ static void __init init_intel(struct cpu initial_apic_id = ebx >> 24 & 0xff; phys_proc_id[cpu] = initial_apic_id >> index_msb; - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - phys_proc_id[cpu]); + printk(KERN_INFO "CPU#%d: Physical Processor ID: %d\n", + cpu, phys_proc_id[cpu]); } } @@ -2696,7 +2705,7 @@ static void __init squash_the_stupid_ser rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi); lo |= 0x200000; wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); + printk(KERN_NOTICE "CPU#%d: serial number disabled.\n", smp_processor_id()); clear_bit(X86_FEATURE_PN, &c->x86_capability); /* Disabling the serial number may affect the cpuid level */ @@ -2985,7 +2994,8 @@ void __init identify_cpu(struct cpuinfo_ /* Now the feature flags better reflect actual CPU features! */ - printk(KERN_DEBUG "CPU: After generic, caps: %08x %08x %08x %08x\n", + printk(KERN_DEBUG "CPU#%d: After generic, caps: %08lx %08lx %08lx %08lx\n", + smp_processor_id(), c->x86_capability[0], c->x86_capability[1], c->x86_capability[2], @@ -3003,7 +3013,8 @@ void __init identify_cpu(struct cpuinfo_ boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } - printk(KERN_DEBUG "CPU: Common caps: %08x %08x %08x %08x\n", + printk(KERN_DEBUG "CPU#%d: Common caps: %08x %08x %08x %08x\n", + smp_processor_id(), boot_cpu_data.x86_capability[0], boot_cpu_data.x86_capability[1], boot_cpu_data.x86_capability[2], @@ -3192,16 +3203,16 @@ void __init cpu_init (void) struct tss_struct * t = &init_tss[nr]; if (test_and_set_bit(nr, &cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", nr); + printk(KERN_WARNING "CPU#%d: already initialized!\n", nr); for (;;) __sti(); } - printk(KERN_INFO "Initializing CPU#%d\n", nr); + printk(KERN_INFO "CPU#%d: Initializing\n", nr); if (cpu_has_vme || cpu_has_tsc || cpu_has_de) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); #ifndef CONFIG_X86_TSC if (tsc_disable && cpu_has_tsc) { - printk(KERN_NOTICE "Disabling TSC...\n"); + printk(KERN_NOTICE "CPU#%d: Disabling TSC...\n", nr); /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/ clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); set_in_cr4(X86_CR4_TSD); @@ -3399,10 +3410,11 @@ int __init ppro_with_ram_bug(void) if((ident&15) < 8) { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); + printk(KERN_INFO "CPU#%d: Pentium Pro with Errata#50 detected. Taking evasive action.\n", + smp_processor_id()); return 1; } - printk(KERN_INFO "Your Pentium Pro seems ok.\n"); + printk(KERN_INFO "CPU#%d: Your Pentium Pro seems ok.\n", smp_processor_id()); return 0; } diff -Naurp linux-2.4.20-wolk4.2-fullkernel/arch/i386/mm/init.c linux-2.4.20-wolk4.3-fullkernel/arch/i386/mm/init.c --- linux-2.4.20-wolk4.2-fullkernel/arch/i386/mm/init.c 2003-06-28 09:23:18.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/arch/i386/mm/init.c 2003-06-19 14:16:28.000000000 +0200 @@ -497,6 +497,7 @@ static int __init free_pages_init(void) #ifdef CONFIG_HIGHMEM for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro); + reset_highmem_zone(totalhigh_pages); totalram_pages += totalhigh_pages; #endif return reservedpages; diff -Naurp linux-2.4.20-wolk4.2-fullkernel/drivers/block/ll_rw_blk.c linux-2.4.20-wolk4.3-fullkernel/drivers/block/ll_rw_blk.c --- linux-2.4.20-wolk4.2-fullkernel/drivers/block/ll_rw_blk.c 2003-06-28 09:23:18.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/drivers/block/ll_rw_blk.c 2003-07-03 21:30:56.000000000 +0200 @@ -596,7 +596,7 @@ static void blk_init_free_list(request_q if (nr_requests > 1024) nr_requests = 1024; #else - nr_requests = 32; + nr_requests = 16; #endif blk_grow_request_list(q, nr_requests); @@ -811,12 +811,24 @@ static struct request *__get_request_wai unsigned long time_waited; DECLARE_WAITQUEUE(wait, current); +#ifdef CONFIG_SCHED_DESKTOP add_wait_queue(&q->wait_for_requests[rw], &wait); +#else + add_wait_queue_exclusive(&q->wait_for_requests[rw], &wait); +#endif do { set_current_state(TASK_UNINTERRUPTIBLE); - generic_unplug_device(q); - if (q->rq[rw].count == 0) + if (q->rq[rw].count == 0) { + /* + * All we care about is not to stall if any request + * is been released after we set TASK_UNINTERRUPTIBLE. + * This is the most efficient place to unplug the queue + * in case we hit the race and we can get the request + * without waiting. + */ + generic_unplug_device(q); schedule(); + } spin_lock_irq(q->queue_lock); rq = get_request(q, rw); spin_unlock_irq(q->queue_lock); @@ -845,8 +857,8 @@ static void get_request_wait_wakeup(requ * generic_unplug_device while our __get_request_wait was running * w/o the queue_lock held and w/ our request out of the queue. */ - if (waitqueue_active(&q->wait_for_requests[rw])) - wake_up(&q->wait_for_requests[rw]); + if (q->rq[rw].count == 0 && waitqueue_active(&q->wait_for_requests[rw])) + __generic_unplug_device(q); } /* RO fail safe mechanism */ @@ -1071,10 +1083,22 @@ void blkdev_release_request(struct reque */ if (q && (rw == READ || rw == WRITE)) { list_add(&req->queue, &q->rq[rw].free); + +#ifdef CONFIG_SCHED_DESKTOP if (++q->rq[rw].count >= q->batch_requests) { smp_mb(); wake_up(&q->wait_for_requests[rw]); } + +#else + if (++q->rq[rw].count >= q->batch_requests) { + smp_mb(); + if (waitqueue_active(&q->wait_for_requests[rw])) + wake_up(&q->wait_for_requests[rw]); + } + +#endif /* CONFIG_SCHED_DESKTOP */ + } } @@ -1148,6 +1172,7 @@ static inline void attempt_front_merge(r static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh) { + int need_unplug = 0; unsigned int sector, count; int max_segments = MAX_SEGMENTS; struct request * req, *freereq = NULL; @@ -1296,8 +1321,8 @@ get_rq: blk_refile_atomic_queue(atomic); freereq = __get_request_wait(q, rw); head = real_head; + need_unplug = 1; spin_lock_irq(q->queue_lock); - get_request_wait_wakeup(q, rw); goto again; } } @@ -1326,6 +1351,8 @@ get_rq: out: if (freereq) blkdev_release_request(freereq); + if (need_unplug) + get_request_wait_wakeup(q, rw); spin_unlock_irq(q->queue_lock); if (atomic_add) blk_atomic_add(q); @@ -1463,12 +1490,20 @@ void __submit_bh(int rw, struct buffer_h bh->b_rdev = bh->b_dev; bh->b_rsector = blocknr; + /* + * Really we could read random memory in the waitqueue + * check and as worse we would trigger a false positive + * queue unplug, however getting the reference + * on the bh and reading allocated memory is cleaner. + */ + get_bh(bh); generic_make_request(rw, bh); /* fix race condition with wait_on_buffer() */ smp_mb(); /* spin_unlock may have inclusive semantics */ if (waitqueue_active(&bh->b_wait)) - wake_up(&bh->b_wait); + run_task_queue(&tq_disk); + put_bh(bh); switch (rw) { case WRITE: diff -Naurp linux-2.4.20-wolk4.2-fullkernel/drivers/video/vesafb.c linux-2.4.20-wolk4.3-fullkernel/drivers/video/vesafb.c --- linux-2.4.20-wolk4.2-fullkernel/drivers/video/vesafb.c 2003-06-28 09:23:23.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/drivers/video/vesafb.c 2003-06-19 14:16:19.000000000 +0200 @@ -95,7 +95,7 @@ static union { static int inverse = 0; static int mtrr = 1; -static int vram __initdata = 0; /* needed for vram boot option */ +static int vram __initdata = 0; /* needed for vram boot option */ static int currcon = 0; static int pmi_setpal = 0; /* pmi for palette changes ??? */ @@ -642,12 +642,18 @@ int __init vesafb_init(void) video_width = screen_info.lfb_width; video_height = screen_info.lfb_height; video_linelength = screen_info.lfb_linelength; - video_size = screen_info.lfb_width * screen_info.lfb_height * video_bpp / 8; + + /* remap memory according to videomode, multiply by 2 to get space for doublebuffering */ + video_size = screen_info.lfb_width * screen_info.lfb_height * video_bpp / 8 * 2; + + /* check that we don't remap more memory than old cards have */ + if (video_size > screen_info.lfb_size * 65536) + video_size = screen_info.lfb_size * 65536; /* FIXME: Should we clip against declared size for banked devices ? */ /* sets video_size according to vram boot option */ - if (vram && vram * 1024 * 1024 > video_size) + if (vram && vram * 1024 * 1024 != video_size) video_size = vram * 1024 * 1024; video_visual = (video_bpp == 8) ? diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/buffer.c linux-2.4.20-wolk4.3-fullkernel/fs/buffer.c --- linux-2.4.20-wolk4.2-fullkernel/fs/buffer.c 2003-06-28 09:23:23.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/buffer.c 2003-07-02 21:17:29.000000000 +0200 @@ -99,9 +99,9 @@ static unsigned long bdflush_needs_wakin static struct timer_list bdflush_timer; #ifdef CONFIG_SCHED_DESKTOP - union bdflush_param bdf_prm = {{30, 500, 0, 0, 5*HZ, 30*HZ, 60, 20, 0}}; + union bdflush_param bdf_prm = {{30, 500, 0, 0, 5*HZ, 30*HZ, 80, 30, 0}}; #else - union bdflush_param bdf_prm = {{50, 500, 0, 0, 5*HZ, 30*HZ, 60, 20, 0}}; + union bdflush_param bdf_prm = {{50, 500, 0, 0, 5*HZ, 30*HZ, 80, 50, 0}}; #endif /* These are the min and max parameter values that we will allow to be assigned */ @@ -520,26 +520,18 @@ out: return ret; } -asmlinkage long sys_fdatasync(unsigned int fd) +int do_fdatasync(struct file *file) { - struct file * file; - struct dentry * dentry; - struct inode * inode; int ret, err; + struct dentry *dentry; + struct inode *inode; - ret = -EBADF; - file = fget(fd); - if (!file) - goto out; - + if (unlikely(!file->f_op || !file->f_op->fsync)) + return -EINVAL; + dentry = file->f_dentry; inode = dentry->d_inode; - ret = -EINVAL; - if (!file->f_op || !file->f_op->fsync) - goto out_putf; - - down(&inode->i_sem); ret = filemap_fdatasync(inode->i_mapping); err = file->f_op->fsync(file, dentry, 1); if (err && !ret) @@ -547,6 +539,23 @@ asmlinkage long sys_fdatasync(unsigned i err = filemap_fdatawait(inode->i_mapping); if (err && !ret) ret = err; + return ret; +} + +asmlinkage long sys_fdatasync(unsigned int fd) +{ + struct file * file; + struct inode *inode; + int ret; + + ret = -EBADF; + file = fget(fd); + if (!file) + goto out; + + inode = file->f_dentry->d_inode; + down(&inode->i_sem); + ret = do_fdatasync(file); up(&inode->i_sem); out_putf: @@ -687,7 +696,7 @@ void buffer_insert_list(struct buffer_he if (buffer_attached(bh)) list_del(&bh->b_inode_buffers); set_buffer_attached(bh); - list_add(&bh->b_inode_buffers, list); + list_add_tail(&bh->b_inode_buffers, list); spin_unlock(&lru_list_lock); } @@ -1109,21 +1118,6 @@ static int balance_dirty_state(void) return -1; } -static int bdflush_stop(void) -{ - unsigned long dirty, tot, dirty_limit; - - dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT; - tot = nr_free_buffer_pages(); - - dirty *= 100; - dirty_limit = tot * bdf_prm.b_un.nfract_stop_bdflush; - - if (dirty > dirty_limit) - return 0; - return 1; -} - /* * if a new dirty buffer is created we need to balance bdflush. * @@ -1480,6 +1474,7 @@ try_to_free: * We rotate the buffers on the buffer_lru list, trying to reclaim * them. */ +#ifdef CONFIG_HIGHMEM int try_to_reclaim_buffers(int priority, unsigned int gfp_mask) { int todo = nr_used_buffer_heads >> priority; @@ -1487,6 +1482,18 @@ int try_to_reclaim_buffers(int priority, struct buffer_head * bh; struct page * page; int reclaimed = 0; + + if (RATE_LIMIT(HZ)) + kmem_cache_shrink(kiobuf_cachep); + + /* + * Since removing buffer heads can be bad for performance, we + * don't bother reclaiming any if the buffer heads take up less + * than 10% of pageable low memory. + */ + if (nr_unused_buffer_heads * sizeof(struct buffer_head) * 10 < + freeable_lowmem() * PAGE_SIZE) + return 0; spin_lock(&unused_list_lock); while (todo-- && !list_empty(&buffer_lru)) { @@ -1512,6 +1519,7 @@ int try_to_reclaim_buffers(int priority, return reclaimed; } +#endif /* CONFIG_HIGHMEM */ /* * We don't have to release all buffers here, but @@ -1639,6 +1647,7 @@ static int __block_write_full_page(struc if (!page->buffers) create_empty_buffers(page, inode->i_dev, 1 << inode->i_blkbits); + BUG_ON(page_count(page) < 3); head = page->buffers; block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); @@ -1893,6 +1902,7 @@ int block_read_full_page(struct page *pa blocksize = 1 << inode->i_blkbits; if (!page->buffers) create_empty_buffers(page, inode->i_dev, blocksize); + BUG_ON(page_count(page) < 3); head = page->buffers; blocks = PAGE_CACHE_SIZE >> inode->i_blkbits; @@ -2303,6 +2313,7 @@ int generic_direct_IO(int rw, struct ino int i, nr_blocks, retval; unsigned long * blocks = iobuf->blocks; int length; + int beyond_eof = 0; length = iobuf->length; nr_blocks = (length + blocksize - 1) / blocksize; @@ -2315,13 +2326,19 @@ int generic_direct_IO(int rw, struct ino bh.b_size = blocksize; bh.b_page = NULL; - retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1); + if (((loff_t) blocknr) * blocksize >= inode->i_size) + beyond_eof = 1; + + /* Only allow get_block to create new blocks if we are safely + beyond EOF. O_DIRECT is unsafe inside sparse files. */ + retval = get_block(inode, blocknr, &bh, + ((rw != READ) && beyond_eof)); if (retval) { if (!i) /* report error to userspace */ goto out; else - /* do short I/O utill 'i' */ + /* do short I/O until 'i' */ break; } @@ -2337,14 +2354,20 @@ int generic_direct_IO(int rw, struct ino if (buffer_new(&bh)) unmap_underlying_metadata(&bh); if (!buffer_mapped(&bh)) - BUG(); + /* upper layers need to pass the error on or + * fall back to buffered IO. */ + return -ENOTBLK; } blocks[i] = bh.b_blocknr; } /* patch length to handle short I/O */ iobuf->length = i * blocksize; + if (!beyond_eof) + up(&inode->i_sem); retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize); + if (!beyond_eof) + down(&inode->i_sem); /* restore orig length */ iobuf->length = length; @@ -2591,6 +2614,7 @@ int brw_page(int rw, struct page *page, if (!page->buffers) create_empty_buffers(page, dev, size); + BUG_ON(page_count(page) < 3); head = bh = page->buffers; /* Stage 1: lock all the buffers */ @@ -3185,7 +3209,7 @@ int bdflush(void *startup) break; ndirty -= NRSYNC; } - if (ndirty > 0 || bdflush_stop()) { + if (ndirty > 0 || balance_dirty_state() < 1) { run_task_queue(&tq_disk); interruptible_sleep_on(&bdflush_wait); } diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/coda/cache.c linux-2.4.20-wolk4.3-fullkernel/fs/coda/cache.c --- linux-2.4.20-wolk4.2-fullkernel/fs/coda/cache.c 2002-09-27 23:25:57.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/coda/cache.c 2003-06-29 22:48:36.000000000 +0200 @@ -29,8 +29,8 @@ void coda_cache_enter(struct inode *inod { struct coda_inode_info *cii = ITOC(inode); - if ( !coda_cred_ok(&cii->c_cached_cred) ) { - coda_load_creds(&cii->c_cached_cred); + if ( cii->c_cached_uid != current->fsuid ) { + cii->c_cached_uid = current->fsuid; cii->c_cached_perm = mask; } else cii->c_cached_perm |= mask; @@ -43,8 +43,8 @@ void coda_cache_clear_inode(struct inode cii->c_cached_perm = 0; } -/* remove all acl caches for a principal (or all principals when cred == NULL)*/ -void coda_cache_clear_all(struct super_block *sb, struct coda_cred *cred) +/* remove all acl caches for a principal (or all principals when uid == NULL)*/ +void coda_cache_clear_all(struct super_block *sb, uid_t *uid) { struct coda_sb_info *sbi; struct coda_inode_info *cii; @@ -56,7 +56,7 @@ void coda_cache_clear_all(struct super_b list_for_each(tmp, &sbi->sbi_cihead) { cii = list_entry(tmp, struct coda_inode_info, c_cilist); - if (!cred || coda_cred_eq(cred, &cii->c_cached_cred)) + if (!uid || cii->c_cached_uid == *uid) cii->c_cached_perm = 0; } } @@ -68,8 +68,8 @@ int coda_cache_check(struct inode *inode struct coda_inode_info *cii = ITOC(inode); int hit; - hit = ((mask & cii->c_cached_perm) == mask) && - coda_cred_ok(&cii->c_cached_cred); + hit = ((mask & cii->c_cached_perm) == mask && + cii->c_cached_uid == current->fsuid); CDEBUG(D_CACHE, "%s for ino %ld\n", hit ? "HIT" : "MISS", inode->i_ino); return hit; diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/coda/cnode.c linux-2.4.20-wolk4.3-fullkernel/fs/coda/cnode.c --- linux-2.4.20-wolk4.2-fullkernel/fs/coda/cnode.c 2002-09-27 23:25:57.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/coda/cnode.c 2003-06-29 22:48:36.000000000 +0200 @@ -13,23 +13,20 @@ extern int coda_debug; -inline int coda_fideq(ViceFid *fid1, ViceFid *fid2) +inline int coda_fideq(struct CodaFid *fid1, struct CodaFid *fid2) { - if (fid1->Vnode != fid2->Vnode) return 0; - if (fid1->Volume != fid2->Volume) return 0; - if (fid1->Unique != fid2->Unique) return 0; - return 1; + return memcmp(fid1, fid2, sizeof(struct CodaFid)) == 0; } -inline int coda_isnullfid(ViceFid *fid) +static struct CodaFid NullFid; +inline int coda_isnullfid(struct CodaFid *fid) { - if (fid->Vnode || fid->Volume || fid->Unique) return 0; - return 1; + return coda_fideq(fid, &NullFid); } static int coda_inocmp(struct inode *inode, unsigned long ino, void *opaque) { - return (coda_fideq((ViceFid *)opaque, &(ITOC(inode)->c_fid))); + return (coda_fideq((struct CodaFid *)opaque, &(ITOC(inode)->c_fid))); } static struct inode_operations coda_symlink_inode_operations = { @@ -62,7 +59,7 @@ static void coda_fill_inode(struct inode init_special_inode(inode, inode->i_mode, attr->va_rdev); } -struct inode * coda_iget(struct super_block * sb, ViceFid * fid, +struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid, struct coda_vattr * attr) { struct inode *inode; @@ -97,7 +94,7 @@ struct inode * coda_iget(struct super_bl - link the two up if this is needed - fill in the attributes */ -int coda_cnode_make(struct inode **inode, ViceFid *fid, struct super_block *sb) +int coda_cnode_make(struct inode **inode, struct CodaFid *fid, struct super_block *sb) { struct coda_vattr attr; int error; @@ -125,8 +122,8 @@ int coda_cnode_make(struct inode **inode } -void coda_replace_fid(struct inode *inode, struct ViceFid *oldfid, - struct ViceFid *newfid) +void coda_replace_fid(struct inode *inode, struct CodaFid *oldfid, + struct CodaFid *newfid) { struct coda_inode_info *cii; @@ -144,7 +141,7 @@ void coda_replace_fid(struct inode *inod } /* convert a fid to an inode. */ -struct inode *coda_fid_to_inode(ViceFid *fid, struct super_block *sb) +struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb) { ino_t nr; struct inode *inode; diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/coda/coda_linux.c linux-2.4.20-wolk4.3-fullkernel/fs/coda/coda_linux.c --- linux-2.4.20-wolk4.2-fullkernel/fs/coda/coda_linux.c 2002-09-27 23:25:57.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/coda/coda_linux.c 2003-06-29 22:48:36.000000000 +0200 @@ -29,10 +29,10 @@ int coda_access_cache = 1; int coda_fake_statfs; /* print a fid */ -char * coda_f2s(ViceFid *f) +char * coda_f2s(struct CodaFid *f) { static char s[60]; - sprintf(s, "(%-#lx.%-#lx.%-#lx)", f->Volume, f->Vnode, f->Unique); + sprintf(s, "(%-#x.%-#x.%-#x)", f->opaque[1], f->opaque[2], f->opaque[3]); return s; } @@ -49,30 +49,6 @@ int coda_isroot(struct inode *i) return ( i->i_sb->s_root->d_inode == i ); } -/* put the current process credentials in the cred */ -void coda_load_creds(struct coda_cred *cred) -{ - cred->cr_uid = (vuid_t) current->uid; - cred->cr_euid = (vuid_t) current->euid; - cred->cr_suid = (vuid_t) current->suid; - cred->cr_fsuid = (vuid_t) current->fsuid; - - cred->cr_groupid = (vgid_t) current->gid; - cred->cr_egid = (vgid_t) current->egid; - cred->cr_sgid = (vgid_t) current->sgid; - cred->cr_fsgid = (vgid_t) current->fsgid; -} - -int coda_cred_ok(struct coda_cred *cred) -{ - return(current->fsuid == cred->cr_fsuid); -} - -int coda_cred_eq(struct coda_cred *cred1, struct coda_cred *cred2) -{ - return (cred1->cr_fsuid == cred2->cr_fsuid); -} - unsigned short coda_flags_to_cflags(unsigned short flags) { unsigned short coda_flags = 0; @@ -173,8 +149,8 @@ void coda_iattr_to_vattr(struct iattr *i /* clean out */ vattr->va_mode = (umode_t) -1; - vattr->va_uid = (vuid_t) -1; - vattr->va_gid = (vgid_t) -1; + vattr->va_uid = (uid_t) -1; + vattr->va_gid = (gid_t) -1; vattr->va_size = (off_t) -1; vattr->va_atime.tv_sec = (time_t) -1; vattr->va_mtime.tv_sec = (time_t) -1; @@ -212,10 +188,10 @@ void coda_iattr_to_vattr(struct iattr *i vattr->va_mode = iattr->ia_mode; } if ( valid & ATTR_UID ) { - vattr->va_uid = (vuid_t) iattr->ia_uid; + vattr->va_uid = iattr->ia_uid; } if ( valid & ATTR_GID ) { - vattr->va_gid = (vgid_t) iattr->ia_gid; + vattr->va_gid = iattr->ia_gid; } if ( valid & ATTR_SIZE ) { vattr->va_size = iattr->ia_size; diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/coda/dir.c linux-2.4.20-wolk4.3-fullkernel/fs/coda/dir.c --- linux-2.4.20-wolk4.2-fullkernel/fs/coda/dir.c 2002-09-27 23:25:57.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/coda/dir.c 2003-06-29 22:48:36.000000000 +0200 @@ -79,6 +79,7 @@ struct inode_operations coda_dir_inode_o }; struct file_operations coda_dir_operations = { + llseek: generic_file_llseek, read: generic_read_dir, readdir: coda_readdir, open: coda_open, @@ -93,12 +94,14 @@ struct file_operations coda_dir_operatio static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry) { struct inode *res_inode = NULL; - struct ViceFid resfid = {0,0,0}; + struct CodaFid resfid; int dropme = 0; /* to indicate entry should not be cached */ int type = 0; int error = 0; const char *name = entry->d_name.name; size_t length = entry->d_name.len; + + memset(&resfid, 0, sizeof(struct CodaFid)); if ( length > CODA_MAXNAMLEN ) { printk("name too long: lookup, %s (%*s)\n", @@ -207,7 +210,7 @@ static int coda_create(struct inode *dir const char *name=de->d_name.name; int length=de->d_name.len; struct inode *inode; - struct ViceFid newfid; + struct CodaFid newfid; struct coda_vattr attrs; coda_vfs_stat.create++; @@ -245,7 +248,7 @@ static int coda_mknod(struct inode *dir, const char *name=de->d_name.name; int length=de->d_name.len; struct inode *inode; - struct ViceFid newfid; + struct CodaFid newfid; struct coda_vattr attrs; if ( coda_hasmknod == 0 ) @@ -288,7 +291,7 @@ static int coda_mkdir(struct inode *dir, const char *name = de->d_name.name; int len = de->d_name.len; int error; - struct ViceFid newfid; + struct CodaFid newfid; coda_vfs_stat.mkdir++; @@ -507,7 +510,11 @@ int coda_readdir(struct file *coda_file, ret = coda_venus_readdir(host_file, filldir, dirent, coda_dentry); } else { /* potemkin case: we were handed a directory inode */ - ret = vfs_readdir(host_file, filldir, dirent); + /* Yuk, we can't call vfs_readdir because we are already + * holding the inode semaphore. */ + ret = -ENOENT; + if (!IS_DEADDIR(host_file->f_dentry->d_inode)) + ret = host_file->f_op->readdir(host_file, filldir, dirent); } coda_file->f_pos = host_file->f_pos; diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/coda/file.c linux-2.4.20-wolk4.3-fullkernel/fs/coda/file.c --- linux-2.4.20-wolk4.2-fullkernel/fs/coda/file.c 2002-09-27 23:25:57.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/coda/file.c 2003-06-29 22:48:36.000000000 +0200 @@ -117,10 +117,8 @@ int coda_open(struct inode *coda_inode, coda_vfs_stat.open++; cfi = kmalloc(sizeof(struct coda_file_info), GFP_KERNEL); - if (!cfi) { - unlock_kernel(); + if (!cfi) return -ENOMEM; - } lock_kernel(); @@ -137,7 +135,6 @@ int coda_open(struct inode *coda_inode, cfi->cfi_magic = CODA_MAGIC; cfi->cfi_mapcount = 0; cfi->cfi_container = host_file; - coda_load_creds(&cfi->cfi_cred); host_inode = host_file->f_dentry->d_inode; if (coda_inode->i_mapping == &coda_inode->i_data) @@ -185,7 +182,7 @@ int coda_flush(struct file *coda_file) if (!cfi || cfi->cfi_magic != CODA_MAGIC) BUG(); err = venus_store(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags, - &cfi->cfi_cred); + coda_file->f_uid); if (err == -EOPNOTSUPP) { use_coda_close = 1; @@ -221,7 +218,7 @@ int coda_release(struct inode *coda_inod if (use_coda_close) err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode), - coda_flags, &cfi->cfi_cred); + coda_flags, coda_file->f_uid); host_inode = cfi->cfi_container->f_dentry->d_inode; cii = ITOC(coda_inode); diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/coda/inode.c linux-2.4.20-wolk4.3-fullkernel/fs/coda/inode.c --- linux-2.4.20-wolk4.2-fullkernel/fs/coda/inode.c 2002-09-27 23:25:57.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/coda/inode.c 2003-06-29 22:48:36.000000000 +0200 @@ -95,7 +95,7 @@ static struct super_block * coda_read_su struct inode *root = 0; struct coda_sb_info *sbi = NULL; struct venus_comm *vc = NULL; - ViceFid fid; + struct CodaFid fid; int error; int idx; diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/coda/psdev.c linux-2.4.20-wolk4.3-fullkernel/fs/coda/psdev.c --- linux-2.4.20-wolk4.2-fullkernel/fs/coda/psdev.c 2002-09-27 23:25:57.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/coda/psdev.c 2003-06-29 22:48:36.000000000 +0200 @@ -114,7 +114,7 @@ static ssize_t coda_psdev_write(struct f if (copy_from_user(&hdr, buf, 2 * sizeof(u_long))) return -EFAULT; - CDEBUG(D_PSDEV, "(process,opc,uniq)=(%d,%ld,%ld), nbytes %ld\n", + CDEBUG(D_PSDEV, "(process,opc,uniq)=(%d,%d,%d), nbytes %ld\n", current->pid, hdr.opcode, hdr.unique, (long)nbytes); if (DOWNCALL(hdr.opcode)) { @@ -131,13 +131,13 @@ static ssize_t coda_psdev_write(struct f CDEBUG(D_PSDEV, "handling downcall\n"); if ( nbytes < sizeof(struct coda_out_hdr) ) { - printk("coda_downcall opc %ld uniq %ld, not enough!\n", + printk("coda_downcall opc %d uniq %d, not enough!\n", hdr.opcode, hdr.unique); count = nbytes; goto out; } if ( nbytes > size ) { - printk("Coda: downcall opc %ld, uniq %ld, too much!", + printk("Coda: downcall opc %d, uniq %d, too much!", hdr.opcode, hdr.unique); nbytes = size; } @@ -176,17 +176,17 @@ static ssize_t coda_psdev_write(struct f unlock_kernel(); if (!req) { - printk("psdev_write: msg (%ld, %ld) not found\n", + printk("psdev_write: msg (%d, %d) not found\n", hdr.opcode, hdr.unique); retval = -ESRCH; goto out; } - CDEBUG(D_PSDEV,"Eureka: uniq %ld on queue!\n", hdr.unique); + CDEBUG(D_PSDEV,"Eureka: uniq %d on queue!\n", hdr.unique); /* move data into response buffer. */ if (req->uc_outSize < nbytes) { - printk("psdev_write: too much cnt: %d, cnt: %ld, opc: %ld, uniq: %ld.\n", + printk("psdev_write: too much cnt: %d, cnt: %ld, opc: %d, uniq: %d.\n", req->uc_outSize, (long)nbytes, hdr.opcode, hdr.unique); nbytes = req->uc_outSize; /* don't have more space! */ } @@ -210,7 +210,7 @@ static ssize_t coda_psdev_write(struct f } CDEBUG(D_PSDEV, - "Found! Count %ld for (opc,uniq)=(%ld,%ld), upc_req at %p\n", + "Found! Count %ld for (opc,uniq)=(%d,%d), upc_req at %p\n", (long)count, hdr.opcode, hdr.unique, &req); wake_up(&req->uc_sleep); diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/coda/upcall.c linux-2.4.20-wolk4.3-fullkernel/fs/coda/upcall.c --- linux-2.4.20-wolk4.2-fullkernel/fs/coda/upcall.c 2002-09-27 23:25:57.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/coda/upcall.c 2003-06-29 22:48:36.000000000 +0200 @@ -55,7 +55,7 @@ static void *alloc_upcall(int opcode, in inp->ih.opcode = opcode; inp->ih.pid = current->pid; inp->ih.pgid = current->pgrp; - coda_load_creds(&(inp->ih.cred)); + inp->ih.uid = current->fsuid; return (void*)inp; } @@ -74,7 +74,7 @@ do {\ /* the upcalls */ -int venus_rootfid(struct super_block *sb, ViceFid *fidp) +int venus_rootfid(struct super_block *sb, struct CodaFid *fidp) { union inputArgs *inp; union outputArgs *outp; @@ -88,16 +88,14 @@ int venus_rootfid(struct super_block *sb if (error) { printk("coda_get_rootfid: error %d\n", error); } else { - *fidp = (ViceFid) outp->coda_root.VFid; - CDEBUG(D_SUPER, "VolumeId: %lx, VnodeId: %lx.\n", - fidp->Volume, fidp->Vnode); + *fidp = (struct CodaFid) outp->coda_root.Fid; } CODA_FREE(inp, insize); return error; } -int venus_getattr(struct super_block *sb, struct ViceFid *fid, +int venus_getattr(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr) { union inputArgs *inp; @@ -106,7 +104,7 @@ int venus_getattr(struct super_block *sb insize = SIZE(getattr); UPARG(CODA_GETATTR); - inp->coda_getattr.VFid = *fid; + inp->coda_getattr.Fid = *fid; error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); @@ -116,7 +114,7 @@ int venus_getattr(struct super_block *sb return error; } -int venus_setattr(struct super_block *sb, struct ViceFid *fid, +int venus_setattr(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *vattr) { union inputArgs *inp; @@ -126,7 +124,7 @@ int venus_setattr(struct super_block *sb insize = SIZE(setattr); UPARG(CODA_SETATTR); - inp->coda_setattr.VFid = *fid; + inp->coda_setattr.Fid = *fid; inp->coda_setattr.attr = *vattr; error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); @@ -136,9 +134,9 @@ int venus_setattr(struct super_block *sb return error; } -int venus_lookup(struct super_block *sb, struct ViceFid *fid, +int venus_lookup(struct super_block *sb, struct CodaFid *fid, const char *name, int length, int * type, - struct ViceFid *resfid) + struct CodaFid *resfid) { union inputArgs *inp; union outputArgs *outp; @@ -149,7 +147,7 @@ int venus_lookup(struct super_block *sb, insize = max_t(unsigned int, offset + length +1, OUTSIZE(lookup)); UPARG(CODA_LOOKUP); - inp->coda_lookup.VFid = *fid; + inp->coda_lookup.Fid = *fid; inp->coda_lookup.name = offset; inp->coda_lookup.flags = CLU_CASE_SENSITIVE; /* send Venus a null terminated string */ @@ -158,15 +156,14 @@ int venus_lookup(struct super_block *sb, error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); - *resfid = outp->coda_lookup.VFid; + *resfid = outp->coda_lookup.Fid; *type = outp->coda_lookup.vtype; CODA_FREE(inp, insize); return error; } -int venus_store(struct super_block *sb, struct ViceFid *fid, int flags, - struct coda_cred *cred) +int venus_store(struct super_block *sb, struct CodaFid *fid, int flags, uid_t uid) { union inputArgs *inp; union outputArgs *outp; @@ -175,9 +172,8 @@ int venus_store(struct super_block *sb, insize = SIZE(store); UPARG(CODA_STORE); - memcpy(&(inp->ih.cred), cred, sizeof(*cred)); - - inp->coda_store.VFid = *fid; + inp->ih.uid = uid; + inp->coda_store.Fid = *fid; inp->coda_store.flags = flags; error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); @@ -186,7 +182,7 @@ int venus_store(struct super_block *sb, return error; } -int venus_release(struct super_block *sb, struct ViceFid *fid, int flags) +int venus_release(struct super_block *sb, struct CodaFid *fid, int flags) { union inputArgs *inp; union outputArgs *outp; @@ -195,7 +191,7 @@ int venus_release(struct super_block *sb insize = SIZE(release); UPARG(CODA_RELEASE); - inp->coda_release.VFid = *fid; + inp->coda_release.Fid = *fid; inp->coda_release.flags = flags; error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); @@ -204,8 +200,7 @@ int venus_release(struct super_block *sb return error; } -int venus_close(struct super_block *sb, struct ViceFid *fid, int flags, - struct coda_cred *cred) +int venus_close(struct super_block *sb, struct CodaFid *fid, int flags, uid_t uid) { union inputArgs *inp; union outputArgs *outp; @@ -214,9 +209,8 @@ int venus_close(struct super_block *sb, insize = SIZE(release); UPARG(CODA_CLOSE); - memcpy(&(inp->ih.cred), cred, sizeof(*cred)); - - inp->coda_close.VFid = *fid; + inp->ih.uid = uid; + inp->coda_close.Fid = *fid; inp->coda_close.flags = flags; error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); @@ -225,7 +219,7 @@ int venus_close(struct super_block *sb, return error; } -int venus_open(struct super_block *sb, struct ViceFid *fid, +int venus_open(struct super_block *sb, struct CodaFid *fid, int flags, struct file **fh) { union inputArgs *inp; @@ -235,7 +229,7 @@ int venus_open(struct super_block *sb, s insize = SIZE(open_by_fd); UPARG(CODA_OPEN_BY_FD); - inp->coda_open.VFid = *fid; + inp->coda_open.Fid = *fid; inp->coda_open.flags = flags; error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); @@ -246,9 +240,9 @@ int venus_open(struct super_block *sb, s return error; } -int venus_mkdir(struct super_block *sb, struct ViceFid *dirfid, +int venus_mkdir(struct super_block *sb, struct CodaFid *dirfid, const char *name, int length, - struct ViceFid *newfid, struct coda_vattr *attrs) + struct CodaFid *newfid, struct coda_vattr *attrs) { union inputArgs *inp; union outputArgs *outp; @@ -259,7 +253,7 @@ int venus_mkdir(struct super_block *sb, insize = max_t(unsigned int, offset + length + 1, OUTSIZE(mkdir)); UPARG(CODA_MKDIR); - inp->coda_mkdir.VFid = *dirfid; + inp->coda_mkdir.Fid = *dirfid; inp->coda_mkdir.attr = *attrs; inp->coda_mkdir.name = offset; /* Venus must get null terminated string */ @@ -269,15 +263,15 @@ int venus_mkdir(struct super_block *sb, error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); *attrs = outp->coda_mkdir.attr; - *newfid = outp->coda_mkdir.VFid; + *newfid = outp->coda_mkdir.Fid; CODA_FREE(inp, insize); return error; } -int venus_rename(struct super_block *sb, struct ViceFid *old_fid, - struct ViceFid *new_fid, size_t old_length, +int venus_rename(struct super_block *sb, struct CodaFid *old_fid, + struct CodaFid *new_fid, size_t old_length, size_t new_length, const char *old_name, const char *new_name) { @@ -315,9 +309,9 @@ int venus_rename(struct super_block *sb, return error; } -int venus_create(struct super_block *sb, struct ViceFid *dirfid, +int venus_create(struct super_block *sb, struct CodaFid *dirfid, const char *name, int length, int excl, int mode, int rdev, - struct ViceFid *newfid, struct coda_vattr *attrs) + struct CodaFid *newfid, struct coda_vattr *attrs) { union inputArgs *inp; union outputArgs *outp; @@ -328,7 +322,7 @@ int venus_create(struct super_block *sb, insize = max_t(unsigned int, offset + length + 1, OUTSIZE(create)); UPARG(CODA_CREATE); - inp->coda_create.VFid = *dirfid; + inp->coda_create.Fid = *dirfid; inp->coda_create.attr.va_mode = mode; inp->coda_create.attr.va_rdev = rdev; inp->coda_create.excl = excl; @@ -342,13 +336,13 @@ int venus_create(struct super_block *sb, error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); *attrs = outp->coda_create.attr; - *newfid = outp->coda_create.VFid; + *newfid = outp->coda_create.Fid; CODA_FREE(inp, insize); return error; } -int venus_rmdir(struct super_block *sb, struct ViceFid *dirfid, +int venus_rmdir(struct super_block *sb, struct CodaFid *dirfid, const char *name, int length) { union inputArgs *inp; @@ -360,7 +354,7 @@ int venus_rmdir(struct super_block *sb, insize = max_t(unsigned int, offset + length + 1, OUTSIZE(rmdir)); UPARG(CODA_RMDIR); - inp->coda_rmdir.VFid = *dirfid; + inp->coda_rmdir.Fid = *dirfid; inp->coda_rmdir.name = offset; memcpy((char *)(inp) + offset, name, length); *((char *)inp + offset + length) = '\0'; @@ -371,7 +365,7 @@ int venus_rmdir(struct super_block *sb, return error; } -int venus_remove(struct super_block *sb, struct ViceFid *dirfid, +int venus_remove(struct super_block *sb, struct CodaFid *dirfid, const char *name, int length) { union inputArgs *inp; @@ -382,7 +376,7 @@ int venus_remove(struct super_block *sb, insize = max_t(unsigned int, offset + length + 1, OUTSIZE(remove)); UPARG(CODA_REMOVE); - inp->coda_remove.VFid = *dirfid; + inp->coda_remove.Fid = *dirfid; inp->coda_remove.name = offset; memcpy((char *)(inp) + offset, name, length); *((char *)inp + offset + length) = '\0'; @@ -393,7 +387,7 @@ int venus_remove(struct super_block *sb, return error; } -int venus_readlink(struct super_block *sb, struct ViceFid *fid, +int venus_readlink(struct super_block *sb, struct CodaFid *fid, char *buffer, int *length) { union inputArgs *inp; @@ -406,7 +400,7 @@ int venus_readlink(struct super_block *s INSIZE(readlink), OUTSIZE(readlink)+ *length + 1); UPARG(CODA_READLINK); - inp->coda_readlink.VFid = *fid; + inp->coda_readlink.Fid = *fid; error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); @@ -427,8 +421,8 @@ int venus_readlink(struct super_block *s -int venus_link(struct super_block *sb, struct ViceFid *fid, - struct ViceFid *dirfid, const char *name, int len ) +int venus_link(struct super_block *sb, struct CodaFid *fid, + struct CodaFid *dirfid, const char *name, int len ) { union inputArgs *inp; union outputArgs *outp; @@ -454,7 +448,7 @@ int venus_link(struct super_block *sb, s return error; } -int venus_symlink(struct super_block *sb, struct ViceFid *fid, +int venus_symlink(struct super_block *sb, struct CodaFid *fid, const char *name, int len, const char *symname, int symlen) { @@ -468,7 +462,7 @@ int venus_symlink(struct super_block *sb UPARG(CODA_SYMLINK); /* inp->coda_symlink.attr = *tva; XXXXXX */ - inp->coda_symlink.VFid = *fid; + inp->coda_symlink.Fid = *fid; /* Round up to word boundary and null terminate */ inp->coda_symlink.srcname = offset; @@ -490,7 +484,7 @@ int venus_symlink(struct super_block *sb return error; } -int venus_fsync(struct super_block *sb, struct ViceFid *fid) +int venus_fsync(struct super_block *sb, struct CodaFid *fid) { union inputArgs *inp; union outputArgs *outp; @@ -499,7 +493,7 @@ int venus_fsync(struct super_block *sb, insize=SIZE(fsync); UPARG(CODA_FSYNC); - inp->coda_fsync.VFid = *fid; + inp->coda_fsync.Fid = *fid; error = coda_upcall(coda_sbp(sb), sizeof(union inputArgs), &outsize, inp); @@ -507,7 +501,7 @@ int venus_fsync(struct super_block *sb, return error; } -int venus_access(struct super_block *sb, struct ViceFid *fid, int mask) +int venus_access(struct super_block *sb, struct CodaFid *fid, int mask) { union inputArgs *inp; union outputArgs *outp; @@ -516,7 +510,7 @@ int venus_access(struct super_block *sb, insize = SIZE(access); UPARG(CODA_ACCESS); - inp->coda_access.VFid = *fid; + inp->coda_access.Fid = *fid; inp->coda_access.flags = mask; error = coda_upcall(coda_sbp(sb), insize, &outsize, inp); @@ -526,7 +520,7 @@ int venus_access(struct super_block *sb, } -int venus_pioctl(struct super_block *sb, struct ViceFid *fid, +int venus_pioctl(struct super_block *sb, struct CodaFid *fid, unsigned int cmd, struct PioctlData *data) { union inputArgs *inp; @@ -543,7 +537,7 @@ int venus_pioctl(struct super_block *sb, goto exit; } - inp->coda_ioctl.VFid = *fid; + inp->coda_ioctl.Fid = *fid; /* the cmd field was mutated by increasing its size field to * reflect the path and follow args. We need to subtract that @@ -763,7 +757,7 @@ static int coda_upcall(struct coda_sb_in /* here we map positive Venus errors to kernel errors */ error = -out->oh.result; CDEBUG(D_UPCALL, - "upcall: (u,o,r) (%ld, %ld, %ld) out at %p\n", + "upcall: (u,o,r) (%d, %d, %d) out at %p\n", out->oh.unique, out->oh.opcode, out->oh.result, out); *outSize = req->uc_outSize; goto exit; @@ -865,7 +859,7 @@ static int coda_upcall(struct coda_sb_in * The last allows Venus to replace local fids with global ones * during reintegration. * - * CODA_REPLACE -- replace one ViceFid with another throughout the name cache */ + * CODA_REPLACE -- replace one CodaFid with another throughout the name cache */ int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb) { @@ -887,20 +881,20 @@ int coda_downcall(int opcode, union outp } case CODA_PURGEUSER : { - struct coda_cred *cred = &out->coda_purgeuser.cred; + uid_t *uid = &out->coda_purgeuser.uid; CDEBUG(D_DOWNCALL, "CODA_PURGEUSER\n"); - if ( !cred ) { - printk("PURGEUSER: null cred!\n"); + if ( !uid ) { + printk("PURGEUSER: no user!\n"); return 0; } clstats(CODA_PURGEUSER); - coda_cache_clear_all(sb, cred); + coda_cache_clear_all(sb, uid); return(0); } case CODA_ZAPDIR : { struct inode *inode; - ViceFid *fid = &out->coda_zapdir.CodaFid; + struct CodaFid *fid = &out->coda_zapdir.Fid; CDEBUG(D_DOWNCALL, "zapdir: fid = %s...\n", coda_f2s(fid)); clstats(CODA_ZAPDIR); @@ -920,7 +914,7 @@ int coda_downcall(int opcode, union outp case CODA_ZAPFILE : { struct inode *inode; - struct ViceFid *fid = &out->coda_zapfile.CodaFid; + struct CodaFid *fid = &out->coda_zapfile.Fid; clstats(CODA_ZAPFILE); CDEBUG(D_DOWNCALL, "zapfile: fid = %s\n", coda_f2s(fid)); inode = coda_fid_to_inode(fid, sb); @@ -936,7 +930,7 @@ int coda_downcall(int opcode, union outp case CODA_PURGEFID : { struct inode *inode; - ViceFid *fid = &out->coda_purgefid.CodaFid; + struct CodaFid *fid = &out->coda_purgefid.Fid; CDEBUG(D_DOWNCALL, "purgefid: fid = %s\n", coda_f2s(fid)); clstats(CODA_PURGEFID); inode = coda_fid_to_inode(fid, sb); @@ -957,8 +951,8 @@ int coda_downcall(int opcode, union outp case CODA_REPLACE : { struct inode *inode; - ViceFid *oldfid = &out->coda_replace.OldFid; - ViceFid *newfid = &out->coda_replace.NewFid; + struct CodaFid *oldfid = &out->coda_replace.OldFid; + struct CodaFid *newfid = &out->coda_replace.NewFid; clstats(CODA_REPLACE); CDEBUG(D_DOWNCALL, "CODA_REPLACE\n"); inode = coda_fid_to_inode(oldfid, sb); diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/dcache.c linux-2.4.20-wolk4.3-fullkernel/fs/dcache.c --- linux-2.4.20-wolk4.2-fullkernel/fs/dcache.c 2003-05-03 02:37:24.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/dcache.c 2003-07-02 20:16:53.000000000 +0200 @@ -571,7 +571,9 @@ int shrink_dcache_memory(int priority, u count = dentry_stat.nr_unused / priority; prune_dcache(count); - return kmem_cache_shrink(dentry_cache); + if (RATE_LIMIT(HZ)) + return kmem_cache_shrink(dentry_cache); + return 0; } #define NAME_ALLOC_LEN(len) ((len+16) & ~15) @@ -1349,7 +1351,7 @@ void __init vfs_caches_init(unsigned lon { bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), 0, - SLAB_HWCACHE_ALIGN, init_buffer_head, NULL); + 0, init_buffer_head, NULL); if(!bh_cachep) panic("Cannot create buffer head SLAB cache"); diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/ext3/namei.c linux-2.4.20-wolk4.3-fullkernel/fs/ext3/namei.c --- linux-2.4.20-wolk4.2-fullkernel/fs/ext3/namei.c 2003-05-03 02:37:07.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/ext3/namei.c 2003-06-27 12:34:47.000000000 +0200 @@ -1265,12 +1265,12 @@ static int make_indexed_dir(handle_t *ha } root = (struct dx_root *) bh->b_data; - EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; bh2 = ext3_append (handle, dir, &block, &retval); if (!(bh2)) { brelse(bh); return retval; } + EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; data1 = bh2->b_data; /* The 0th block becomes the root, move the dirents out */ diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/inode.c linux-2.4.20-wolk4.3-fullkernel/fs/inode.c --- linux-2.4.20-wolk4.2-fullkernel/fs/inode.c 2003-06-28 09:23:23.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/inode.c 2003-07-02 20:28:09.000000000 +0200 @@ -156,6 +156,7 @@ void inode_init_once(struct inode *inode INIT_LIST_HEAD(&inode->i_devices); sema_init(&inode->i_sem, 1); sema_init(&inode->i_zombie, 1); + init_rwsem(&inode->i_alloc_sem); spin_lock_init(&inode->i_data.i_shared_lock); i_size_ordered_init(inode); } @@ -805,7 +806,8 @@ static void _prune_icache(int goal) spin_unlock(&inode_lock); dispose_list(freeable); - kmem_cache_shrink(inode_cachep); + if (RATE_LIMIT(HZ)) + kmem_cache_shrink(inode_cachep); /* * If we didn't freed enough clean inodes @@ -818,8 +820,8 @@ static void _prune_icache(int goal) /* Excuse the double negative; the code below is emergency. */ if (goal <= 0) return; - if (freeable_lowmem() * PAGE_SIZE < 10 * inodes_stat.nr_unused * - sizeof(struct inode)) + if (inodes_stat.nr_unused * sizeof(struct inode) * 10 < + freeable_lowmem() * PAGE_SIZE) return; wakeup_bdflush(); /* @@ -875,7 +877,8 @@ static void _prune_icache(int goal) #endif /* CONFIG_HIGHMEM */ } -void prune_icache(int goal) { +void prune_icache(int goal) +{ atomic_add(goal, &kinoded_goal); if (atomic_read(&kinoded_goal) > 16) { wake_up_interruptible(&kinoded_wait); diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/iobuf.c linux-2.4.20-wolk4.3-fullkernel/fs/iobuf.c --- linux-2.4.20-wolk4.2-fullkernel/fs/iobuf.c 2003-05-05 19:47:41.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/iobuf.c 2003-06-23 17:29:25.000000000 +0200 @@ -6,11 +6,11 @@ * */ -#include #include +#include -static kmem_cache_t *kiobuf_cachep; +kmem_cache_t *kiobuf_cachep; void end_kio_request(struct kiobuf *kiobuf, int uptodate) { diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/namei.c linux-2.4.20-wolk4.3-fullkernel/fs/namei.c --- linux-2.4.20-wolk4.2-fullkernel/fs/namei.c 2003-05-06 15:52:44.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/namei.c 2003-06-29 22:48:36.000000000 +0200 @@ -750,6 +750,8 @@ lookup_parent: nd->last_type = LAST_DOT; else if (this.len == 2 && this.name[1] == '.') nd->last_type = LAST_DOTDOT; + else + goto return_base; return_reval: /* * We bypassed the ordinary revalidation routines. diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/open.c linux-2.4.20-wolk4.3-fullkernel/fs/open.c --- linux-2.4.20-wolk4.2-fullkernel/fs/open.c 2003-05-03 02:37:07.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/open.c 2003-06-19 14:12:03.000000000 +0200 @@ -221,11 +221,13 @@ int do_truncate(struct dentry *dentry, l if (!gr_acl_handle_truncate(dentry, mnt)) return -EACCES; + down_write(&inode->i_alloc_sem); down(&inode->i_sem); newattrs.ia_size = length; newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; error = notify_change(dentry, &newattrs); up(&inode->i_sem); + up_write(&inode->i_alloc_sem); return error; } diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/proc/generic.c linux-2.4.20-wolk4.3-fullkernel/fs/proc/generic.c --- linux-2.4.20-wolk4.2-fullkernel/fs/proc/generic.c 2003-06-28 09:23:23.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/proc/generic.c 2003-07-03 20:32:29.000000000 +0200 @@ -518,7 +518,10 @@ struct proc_dir_entry *proc_priv_mkdir(c ent->proc_fops = &proc_dir_operations; ent->proc_iops = &proc_dir_inode_operations; - proc_register(parent, ent); + if (proc_register(parent, ent) < 0) { + kfree(ent); + ent = NULL; + } } return ent; } diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/proc/proc_misc.c linux-2.4.20-wolk4.3-fullkernel/fs/proc/proc_misc.c --- linux-2.4.20-wolk4.2-fullkernel/fs/proc/proc_misc.c 2003-06-28 09:23:23.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/proc/proc_misc.c 2003-06-19 14:18:35.000000000 +0200 @@ -52,9 +52,6 @@ */ extern int get_hardware_list(char *); extern int get_stram_list(char *); -#ifdef CONFIG_MODULES -extern int get_module_list(char *); -#endif extern int get_device_list(char *); extern int get_filesystem_list(char *); extern int get_exec_domain_list(char *); @@ -341,13 +338,19 @@ static int config_read_proc(char *page, #endif #ifdef CONFIG_MODULES -static int modules_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) +extern struct seq_operations modules_op; +static int modules_open(struct inode *inode, struct file *file) { - int len = get_module_list(page); - return proc_calc_metrics(page, start, off, count, eof, len); + return seq_open(file, &modules_op); } +static struct file_operations proc_modules_operations = { + open: modules_open, + read: seq_read, + llseek: seq_lseek, + release: seq_release, +}; + extern struct seq_operations ksyms_op; static int ksyms_open(struct inode *inode, struct file *file) { @@ -702,9 +705,6 @@ void __init proc_misc_init(void) #ifdef CONFIG_STRAM_PROC {"stram", stram_read_proc}, #endif -#if defined(CONFIG_MODULES) && !defined(CONFIG_GRKERNSEC_PROC) - {"modules", modules_read_proc}, -#endif {"stat", kstat_read_proc}, #ifndef CONFIG_GRKERNSEC_PROC_ADD {"devices", devices_read_proc}, @@ -739,7 +739,7 @@ void __init proc_misc_init(void) #endif #if defined(CONFIG_GRKERNSEC_PROC) && defined(CONFIG_MODULES) - create_proc_read_entry("modules", gr_mode, NULL, &modules_read_proc, NULL); + create_seq_entry("modules", gr_mode, &proc_modules_operations); #endif #ifdef CONFIG_GRKERNSEC_PROC_ADD create_proc_read_entry("devices", gr_mode, NULL, &devices_read_proc, NULL); @@ -770,6 +770,9 @@ void __init proc_misc_init(void) create_seq_entry("partitions", 0, &proc_partitions_operations); #ifdef CONFIG_MODULES create_seq_entry("ksyms", gr_mode, &proc_ksyms_operations); +#if !defined(CONFIG_GRKERNSEC_PROC) + create_seq_entry("modules", 0, &proc_modules_operations); +#endif #endif #ifndef CONFIG_GRKERNSEC_PROC_ADD proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/read_write.c linux-2.4.20-wolk4.3-fullkernel/fs/read_write.c --- linux-2.4.20-wolk4.2-fullkernel/fs/read_write.c 2003-05-03 02:37:07.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/read_write.c 2003-06-20 23:46:25.000000000 +0200 @@ -642,7 +642,7 @@ out_nofree: /* VERIFY_WRITE actually means a read, as we write to user space */ if ((ret + (type == VERIFY_WRITE)) > 0) dnotify_parent(file->f_dentry, - (type == VERIFY_WRITE) ? DN_MODIFY : DN_ACCESS); + (type == VERIFY_WRITE) ? DN_ACCESS : DN_MODIFY); return ret; } diff -Naurp linux-2.4.20-wolk4.2-fullkernel/fs/reiserfs/stree.c linux-2.4.20-wolk4.3-fullkernel/fs/reiserfs/stree.c --- linux-2.4.20-wolk4.2-fullkernel/fs/reiserfs/stree.c 2003-05-03 01:58:42.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/fs/reiserfs/stree.c 2003-06-19 14:05:00.000000000 +0200 @@ -1893,6 +1893,8 @@ void reiserfs_do_truncate (struct reiser reiserfs_restart_transaction(th, jbegin_count) ; reiserfs_update_inode_transaction(p_s_inode) ; } + if (current->need_resched) + schedule() ; } while ( n_file_size > ROUND_UP (n_new_file_size) && search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path) == POSITION_FOUND ) ; diff -Naurp linux-2.4.20-wolk4.2-fullkernel/grsecurity/gracl.c linux-2.4.20-wolk4.3-fullkernel/grsecurity/gracl.c --- linux-2.4.20-wolk4.2-fullkernel/grsecurity/gracl.c 2003-05-10 10:41:41.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/grsecurity/gracl.c 2003-07-03 20:28:41.000000000 +0200 @@ -689,6 +689,7 @@ gracl_init(struct gr_arg *args) GR_VERSION); error = -ENOMEM; free_variables(); + goto out; } error = copy_user_acl(args); diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/asm-i386/bugs.h linux-2.4.20-wolk4.3-fullkernel/include/asm-i386/bugs.h --- linux-2.4.20-wolk4.2-fullkernel/include/asm-i386/bugs.h 2003-06-28 09:23:23.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/asm-i386/bugs.h 2003-07-01 17:31:46.000000000 +0200 @@ -84,12 +84,14 @@ static void __init check_fpu(void) __buggy_fxsr_alignment(); } if (cpu_has_fxsr) { - printk(KERN_INFO "Enabling fast FPU save and restore... "); + printk(KERN_INFO "CPU#%d: Enabling fast FPU save and restore... ", + smp_processor_id()); set_in_cr4(X86_CR4_OSFXSR); printk("done.\n"); } if (cpu_has_xmm) { - printk(KERN_INFO "Enabling unmasked SIMD FPU exception support... "); + printk(KERN_INFO "CPU#%d: Enabling unmasked SIMD FPU exception support... ", + smp_processor_id()); set_in_cr4(X86_CR4_OSXMMEXCPT); printk("done.\n"); } @@ -112,7 +114,7 @@ static void __init check_fpu(void) static void __init check_hlt(void) { - printk(KERN_INFO "Checking 'hlt' instruction... "); + printk(KERN_INFO "CPU#%d: Checking 'hlt' instruction... ", smp_processor_id()); if (!boot_cpu_data.hlt_works_ok) { printk("disabled\n"); return; @@ -131,7 +133,7 @@ static void __init check_popad(void) #ifndef CONFIG_X86_POPAD_OK int res, inp = (int) &res; - printk(KERN_INFO "Checking for popad bug... "); + printk(KERN_INFO "CPU#%d: Checking for popad bug... ", smp_processor_id()); __asm__ __volatile__( "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " : "=&a" (res) @@ -215,7 +217,7 @@ static void __init check_bugs(void) identify_cpu(&boot_cpu_data); boot_init_fpu(); #ifndef CONFIG_SMP - printk("CPU: "); + printk("CPU#%d: ", smp_processor_id()); print_cpu_info(&boot_cpu_data); #endif check_config(); diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/asm-i386/byteorder.h linux-2.4.20-wolk4.3-fullkernel/include/asm-i386/byteorder.h --- linux-2.4.20-wolk4.2-fullkernel/include/asm-i386/byteorder.h 2003-05-13 12:18:44.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/asm-i386/byteorder.h 2003-06-27 20:18:00.000000000 +0200 @@ -34,7 +34,7 @@ static __inline__ __const__ __u16 ___arc return x; } - +#ifndef __STRICT_ANSI__ static inline __u64 ___arch__swab64(__u64 val) { union { @@ -54,12 +54,14 @@ static inline __u64 ___arch__swab64(__u6 return v.u; } +#define __BYTEORDER_HAS_U64__ #define __arch__swab64(x) ___arch__swab64(x) + +#endif /* !__STRICT_ANSI__ */ + #define __arch__swab32(x) ___arch__swab32(x) #define __arch__swab16(x) ___arch__swab16(x) -#define __BYTEORDER_HAS_U64__ - #endif /* __GNUC__ */ #include diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/asm-i386/io_apic.h linux-2.4.20-wolk4.3-fullkernel/include/asm-i386/io_apic.h --- linux-2.4.20-wolk4.2-fullkernel/include/asm-i386/io_apic.h 2003-06-28 09:23:23.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/asm-i386/io_apic.h 2003-06-25 09:43:06.000000000 +0200 @@ -45,6 +45,11 @@ struct IO_APIC_reg_02 { __reserved_1 : 4; } __attribute__ ((packed)); +struct IO_APIC_reg_03 { + __u32 boot_DT : 1, + __reserved_1 : 31; +} __attribute__ ((packed)); + /* * # of IO-APICs and # of IRQ routing registers */ diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/asm-i386/pgtable.h linux-2.4.20-wolk4.3-fullkernel/include/asm-i386/pgtable.h --- linux-2.4.20-wolk4.2-fullkernel/include/asm-i386/pgtable.h 2003-05-13 12:18:45.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/asm-i386/pgtable.h 2003-06-19 14:09:07.000000000 +0200 @@ -317,7 +317,6 @@ static inline pte_t pte_mkwrite(pte_t pt static inline int ptep_test_and_clear_dirty(pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_DIRTY, ptep); } static inline int ptep_test_and_clear_young(pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_ACCESSED, ptep); } -static inline int ptep_test_young(pte_t *ptep) { return test_bit(_PAGE_BIT_ACCESSED, ptep); } static inline void ptep_set_wrprotect(pte_t *ptep) { clear_bit(_PAGE_BIT_RW, ptep); } static inline void ptep_mkdirty(pte_t *ptep) { set_bit(_PAGE_BIT_DIRTY, ptep); } diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/asm-i386/types.h linux-2.4.20-wolk4.3-fullkernel/include/asm-i386/types.h --- linux-2.4.20-wolk4.2-fullkernel/include/asm-i386/types.h 2003-05-13 12:18:44.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/asm-i386/types.h 2003-06-27 20:16:31.000000000 +0200 @@ -17,10 +17,8 @@ typedef unsigned short __u16; typedef __signed__ int __s32; typedef unsigned int __u32; -#if defined(__GNUC__) && !defined(__STRICT_ANSI__) typedef __signed__ long long __s64; typedef unsigned long long __u64; -#endif /* * These aren't exported outside the kernel to avoid name space clashes diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/asm-x86_64/pgtable.h linux-2.4.20-wolk4.3-fullkernel/include/asm-x86_64/pgtable.h --- linux-2.4.20-wolk4.2-fullkernel/include/asm-x86_64/pgtable.h 2003-05-03 02:37:11.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/asm-x86_64/pgtable.h 2003-06-19 13:37:25.000000000 +0200 @@ -329,7 +329,6 @@ extern inline pte_t pte_mkyoung(pte_t pt extern inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; } static inline int ptep_test_and_clear_dirty(pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_DIRTY, ptep); } static inline int ptep_test_and_clear_young(pte_t *ptep) { return test_and_clear_bit(_PAGE_BIT_ACCESSED, ptep); } -static inline int ptep_test_young(pte_t *ptep) { return test_bit(_PAGE_BIT_ACCESSED, ptep); } static inline void ptep_set_wrprotect(pte_t *ptep) { clear_bit(_PAGE_BIT_RW, ptep); } static inline void ptep_mkdirty(pte_t *ptep) { set_bit(_PAGE_BIT_DIRTY, ptep); } diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/linux/coda.h linux-2.4.20-wolk4.3-fullkernel/include/linux/coda.h --- linux-2.4.20-wolk4.2-fullkernel/include/linux/coda.h 2001-04-26 01:18:54.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/linux/coda.h 2003-06-29 22:48:36.000000000 +0200 @@ -112,13 +112,6 @@ typedef unsigned long long u_quad_t; #define cdev_t dev_t #endif -#ifdef __CYGWIN32__ -struct timespec { - time_t tv_sec; /* seconds */ - long tv_nsec; /* nanoseconds */ -}; -#endif - #ifndef __BIT_TYPES_DEFINED__ #define __BIT_TYPES_DEFINED__ typedef signed char int8_t; @@ -193,54 +186,16 @@ struct venus_dirent { #endif -#ifndef _FID_T_ -#define _FID_T_ 1 -typedef u_long VolumeId; -typedef u_long VnodeId; -typedef u_long Unique_t; -typedef u_long FileVersion; -#endif +struct CodaFid { + u_int32_t opaque[4]; +}; -#ifndef _VICEFID_T_ -#define _VICEFID_T_ 1 -typedef struct ViceFid { - VolumeId Volume; - VnodeId Vnode; - Unique_t Unique; -} ViceFid; -#endif /* VICEFID */ - - -#ifdef __linux__ -static __inline__ ino_t coda_f2i(struct ViceFid *fid) +static __inline__ ino_t coda_f2i(struct CodaFid *fid) { - if ( ! fid ) - return 0; - if (fid->Vnode == 0xfffffffe || fid->Vnode == 0xffffffff) - return ((fid->Volume << 20) | (fid->Unique & 0xfffff)); - else - return (fid->Unique + (fid->Vnode<<10) + (fid->Volume<<20)); + if (!fid) return 0; + return (fid->opaque[3] ^ (fid->opaque[2]<<10) ^ (fid->opaque[1]<<20) ^ + fid->opaque[0]); } - -#else -#define coda_f2i(fid)\ - ((fid) ? ((fid)->Unique + ((fid)->Vnode<<10) + ((fid)->Volume<<20)) : 0) -#endif - - -#ifndef _VUID_T_ -#define _VUID_T_ -typedef u_int32_t vuid_t; -typedef u_int32_t vgid_t; -#endif /*_VUID_T_ */ - -#ifndef _CODACRED_T_ -#define _CODACRED_T_ -struct coda_cred { - vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/ - vgid_t cr_groupid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */ -}; -#endif #ifndef _VENUS_VATTR_T_ #define _VENUS_VATTR_T_ @@ -253,8 +208,8 @@ struct coda_vattr { long va_type; /* vnode type (for create) */ u_short va_mode; /* files access mode and type */ short va_nlink; /* number of references to file */ - vuid_t va_uid; /* owner user id */ - vgid_t va_gid; /* owner group id */ + uid_t va_uid; /* owner user id */ + gid_t va_gid; /* owner group id */ long va_fileid; /* file id */ u_quad_t va_size; /* file size in bytes */ long va_blocksize; /* blocksize preferred for i/o */ @@ -328,32 +283,32 @@ struct coda_statfs { #if 0 #define CODA_KERNEL_VERSION 0 /* don't care about kernel version number */ #define CODA_KERNEL_VERSION 1 /* The old venus 4.6 compatible interface */ -#endif #define CODA_KERNEL_VERSION 2 /* venus_lookup gets an extra parameter */ +#endif +#define CODA_KERNEL_VERSION 3 /* 128-bit file identifiers */ /* * Venus <-> Coda RPC arguments */ struct coda_in_hdr { - unsigned long opcode; - unsigned long unique; /* Keep multiple outstanding msgs distinct */ - u_short pid; /* Common to all */ - u_short pgid; /* Common to all */ - u_short sid; /* Common to all */ - struct coda_cred cred; /* Common to all */ + u_int32_t opcode; + u_int32_t unique; /* Keep multiple outstanding msgs distinct */ + pid_t pid; + pid_t pgid; + uid_t uid; }; /* Really important that opcode and unique are 1st two fields! */ struct coda_out_hdr { - unsigned long opcode; - unsigned long unique; - unsigned long result; + u_int32_t opcode; + u_int32_t unique; + u_int32_t result; }; /* coda_root: NO_IN */ struct coda_root_out { struct coda_out_hdr oh; - ViceFid VFid; + struct CodaFid Fid; }; struct coda_root_in { @@ -363,7 +318,7 @@ struct coda_root_in { /* coda_open: */ struct coda_open_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; int flags; }; @@ -377,7 +332,7 @@ struct coda_open_out { /* coda_store: */ struct coda_store_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; int flags; }; @@ -388,7 +343,7 @@ struct coda_store_out { /* coda_release: */ struct coda_release_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; int flags; }; @@ -399,7 +354,7 @@ struct coda_release_out { /* coda_close: */ struct coda_close_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; int flags; }; @@ -410,7 +365,7 @@ struct coda_close_out { /* coda_ioctl: */ struct coda_ioctl_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; int cmd; int len; int rwflag; @@ -427,7 +382,7 @@ struct coda_ioctl_out { /* coda_getattr: */ struct coda_getattr_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; }; struct coda_getattr_out { @@ -439,7 +394,7 @@ struct coda_getattr_out { /* coda_setattr: NO_OUT */ struct coda_setattr_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; struct coda_vattr attr; }; @@ -450,7 +405,7 @@ struct coda_setattr_out { /* coda_access: NO_OUT */ struct coda_access_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; int flags; }; @@ -466,14 +421,14 @@ struct coda_access_out { /* coda_lookup: */ struct coda_lookup_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; int name; /* Place holder for data. */ int flags; }; struct coda_lookup_out { struct coda_out_hdr oh; - ViceFid VFid; + struct CodaFid Fid; int vtype; }; @@ -481,7 +436,7 @@ struct coda_lookup_out { /* coda_create: */ struct coda_create_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; struct coda_vattr attr; int excl; int mode; @@ -490,7 +445,7 @@ struct coda_create_in { struct coda_create_out { struct coda_out_hdr oh; - ViceFid VFid; + struct CodaFid Fid; struct coda_vattr attr; }; @@ -498,7 +453,7 @@ struct coda_create_out { /* coda_remove: NO_OUT */ struct coda_remove_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; int name; /* Place holder for data. */ }; @@ -509,8 +464,8 @@ struct coda_remove_out { /* coda_link: NO_OUT */ struct coda_link_in { struct coda_in_hdr ih; - ViceFid sourceFid; /* cnode to link *to* */ - ViceFid destFid; /* Directory in which to place link */ + struct CodaFid sourceFid; /* cnode to link *to* */ + struct CodaFid destFid; /* Directory in which to place link */ int tname; /* Place holder for data. */ }; @@ -522,9 +477,9 @@ struct coda_link_out { /* coda_rename: NO_OUT */ struct coda_rename_in { struct coda_in_hdr ih; - ViceFid sourceFid; + struct CodaFid sourceFid; int srcname; - ViceFid destFid; + struct CodaFid destFid; int destname; }; @@ -535,14 +490,14 @@ struct coda_rename_out { /* coda_mkdir: */ struct coda_mkdir_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; struct coda_vattr attr; int name; /* Place holder for data. */ }; struct coda_mkdir_out { struct coda_out_hdr oh; - ViceFid VFid; + struct CodaFid Fid; struct coda_vattr attr; }; @@ -550,7 +505,7 @@ struct coda_mkdir_out { /* coda_rmdir: NO_OUT */ struct coda_rmdir_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; int name; /* Place holder for data. */ }; @@ -561,7 +516,7 @@ struct coda_rmdir_out { /* coda_symlink: NO_OUT */ struct coda_symlink_in { struct coda_in_hdr ih; - ViceFid VFid; /* Directory to put symlink in */ + struct CodaFid Fid; /* Directory to put symlink in */ int srcname; struct coda_vattr attr; int tname; @@ -574,7 +529,7 @@ struct coda_symlink_out { /* coda_readlink: */ struct coda_readlink_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; }; struct coda_readlink_out { @@ -587,7 +542,7 @@ struct coda_readlink_out { /* coda_fsync: NO_OUT */ struct coda_fsync_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; }; struct coda_fsync_out { @@ -597,12 +552,12 @@ struct coda_fsync_out { /* coda_vget: */ struct coda_vget_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; }; struct coda_vget_out { struct coda_out_hdr oh; - ViceFid VFid; + struct CodaFid Fid; int vtype; }; @@ -615,50 +570,49 @@ struct coda_vget_out { /* CODA_PURGEUSER is a venus->kernel call */ struct coda_purgeuser_out { struct coda_out_hdr oh; - struct coda_cred cred; + uid_t uid; }; /* coda_zapfile: */ /* CODA_ZAPFILE is a venus->kernel call */ struct coda_zapfile_out { struct coda_out_hdr oh; - ViceFid CodaFid; + struct CodaFid Fid; }; /* coda_zapdir: */ /* CODA_ZAPDIR is a venus->kernel call */ struct coda_zapdir_out { struct coda_out_hdr oh; - ViceFid CodaFid; + struct CodaFid Fid; }; /* coda_zapnode: */ /* CODA_ZAPVNODE is a venus->kernel call */ struct coda_zapvnode_out { struct coda_out_hdr oh; - struct coda_cred cred; - ViceFid VFid; + struct CodaFid Fid; }; /* coda_purgefid: */ /* CODA_PURGEFID is a venus->kernel call */ struct coda_purgefid_out { struct coda_out_hdr oh; - ViceFid CodaFid; + struct CodaFid Fid; }; /* coda_replace: */ /* CODA_REPLACE is a venus->kernel call */ struct coda_replace_out { /* coda_replace is a venus->kernel call */ struct coda_out_hdr oh; - ViceFid NewFid; - ViceFid OldFid; + struct CodaFid NewFid; + struct CodaFid OldFid; }; /* coda_open_by_fd: */ struct coda_open_by_fd_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; int flags; }; @@ -674,7 +628,7 @@ struct coda_open_by_fd_out { /* coda_open_by_path: */ struct coda_open_by_path_in { struct coda_in_hdr ih; - ViceFid VFid; + struct CodaFid Fid; int flags; }; @@ -785,10 +739,9 @@ struct PioctlData { #define CTL_INO -1 #define CTL_FILE "/coda/.CONTROL" - -#define IS_CTL_FID(fidp) ((fidp)->Volume == CTL_VOL &&\ - (fidp)->Vnode == CTL_VNO &&\ - (fidp)->Unique == CTL_UNI) +#define IS_CTL_FID(fidp) ((fidp)->opaque[1] == CTL_VOL &&\ + (fidp)->opaque[2] == CTL_VNO &&\ + (fidp)->opaque[3] == CTL_UNI) /* Data passed to mount */ diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/linux/coda_cache.h linux-2.4.20-wolk4.3-fullkernel/include/linux/coda_cache.h --- linux-2.4.20-wolk4.2-fullkernel/include/linux/coda_cache.h 2001-06-12 20:06:54.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/linux/coda_cache.h 2003-06-29 22:48:36.000000000 +0200 @@ -13,7 +13,7 @@ /* credential cache */ void coda_cache_enter(struct inode *inode, int mask); void coda_cache_clear_inode(struct inode *); -void coda_cache_clear_all(struct super_block *sb, struct coda_cred *cred); +void coda_cache_clear_all(struct super_block *sb, uid_t *uid); int coda_cache_check(struct inode *inode, int mask); /* for downcalls and attributes and lookups */ diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/linux/coda_fs_i.h linux-2.4.20-wolk4.3-fullkernel/include/linux/coda_fs_i.h --- linux-2.4.20-wolk4.2-fullkernel/include/linux/coda_fs_i.h 2003-05-13 12:18:44.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/linux/coda_fs_i.h 2003-06-29 22:48:36.000000000 +0200 @@ -17,11 +17,11 @@ * coda fs inode data */ struct coda_inode_info { - struct ViceFid c_fid; /* Coda identifier */ + struct CodaFid c_fid; /* Coda identifier */ u_short c_flags; /* flags (see below) */ struct list_head c_cilist; /* list of all coda inodes */ int c_mapcount; /* how often is this inode mmapped */ - struct coda_cred c_cached_cred; /* credentials of cached perms */ + uid_t c_cached_uid; /* credentials of cached perms */ unsigned int c_cached_perm; /* cached access permissions */ }; @@ -33,7 +33,6 @@ struct coda_file_info { int cfi_magic; /* magic number */ int cfi_mapcount; /* how often this file is mapped */ struct file *cfi_container; /* container file for this cnode */ - struct coda_cred cfi_cred; /* credentials of opener */ }; #define CODA_FTOC(file) ((struct coda_file_info *)((file)->private_data)) @@ -44,11 +43,11 @@ struct coda_file_info { #define C_DYING 0x4 /* from venus (which died) */ #define C_PURGE 0x8 -int coda_cnode_make(struct inode **, struct ViceFid *, struct super_block *); -struct inode *coda_iget(struct super_block *sb, struct ViceFid *fid, struct coda_vattr *attr); +int coda_cnode_make(struct inode **, struct CodaFid *, struct super_block *); +struct inode *coda_iget(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr); int coda_cnode_makectl(struct inode **inode, struct super_block *sb); -struct inode *coda_fid_to_inode(ViceFid *fid, struct super_block *sb); -void coda_replace_fid(struct inode *, ViceFid *, ViceFid *); +struct inode *coda_fid_to_inode(struct CodaFid *fid, struct super_block *sb); +void coda_replace_fid(struct inode *, struct CodaFid *, struct CodaFid *); #endif #endif diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/linux/coda_linux.h linux-2.4.20-wolk4.3-fullkernel/include/linux/coda_linux.h --- linux-2.4.20-wolk4.2-fullkernel/include/linux/coda_linux.h 2002-09-27 23:26:08.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/linux/coda_linux.h 2003-06-29 22:48:36.000000000 +0200 @@ -42,7 +42,7 @@ int coda_release(struct inode *i, struct int coda_permission(struct inode *inode, int mask); int coda_revalidate_inode(struct dentry *); int coda_notify_change(struct dentry *, struct iattr *); -int coda_isnullfid(ViceFid *fid); +int coda_isnullfid(struct CodaFid *fid); /* global variables */ extern int coda_debug; @@ -50,20 +50,17 @@ extern int coda_access_cache; extern int coda_fake_statfs; /* this file: heloers */ -static __inline__ struct ViceFid *coda_i2f(struct inode *); +static __inline__ struct CodaFid *coda_i2f(struct inode *); static __inline__ char *coda_i2s(struct inode *); static __inline__ void coda_flag_inode(struct inode *, int flag); -char *coda_f2s(ViceFid *f); +char *coda_f2s(struct CodaFid *f); int coda_isroot(struct inode *i); int coda_iscontrol(const char *name, size_t length); -void coda_load_creds(struct coda_cred *cred); void coda_vattr_to_iattr(struct inode *, struct coda_vattr *); void coda_iattr_to_vattr(struct iattr *, struct coda_vattr *); unsigned short coda_flags_to_cflags(unsigned short); void print_vattr( struct coda_vattr *attr ); -int coda_cred_ok(struct coda_cred *cred); -int coda_cred_eq(struct coda_cred *cred1, struct coda_cred *cred2); /* sysctl.h */ void coda_sysctl_init(void); @@ -109,7 +106,7 @@ void coda_sysctl_clean(void); #define ITOC(inode) (&((inode)->u.coda_i)) -static __inline__ struct ViceFid *coda_i2f(struct inode *inode) +static __inline__ struct CodaFid *coda_i2f(struct inode *inode) { return &(ITOC(inode)->c_fid); } diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/linux/coda_psdev.h linux-2.4.20-wolk4.3-fullkernel/include/linux/coda_psdev.h --- linux-2.4.20-wolk4.2-fullkernel/include/linux/coda_psdev.h 2002-09-27 23:26:08.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/linux/coda_psdev.h 2003-06-29 22:48:36.000000000 +0200 @@ -32,46 +32,44 @@ static inline struct coda_sb_info *coda_ /* upcalls */ -int venus_rootfid(struct super_block *sb, ViceFid *fidp); -int venus_getattr(struct super_block *sb, struct ViceFid *fid, +int venus_rootfid(struct super_block *sb, struct CodaFid *fidp); +int venus_getattr(struct super_block *sb, struct CodaFid *fid, struct coda_vattr *attr); -int venus_setattr(struct super_block *, struct ViceFid *, +int venus_setattr(struct super_block *, struct CodaFid *, struct coda_vattr *); -int venus_lookup(struct super_block *sb, struct ViceFid *fid, +int venus_lookup(struct super_block *sb, struct CodaFid *fid, const char *name, int length, int *type, - struct ViceFid *resfid); -int venus_store(struct super_block *sb, struct ViceFid *fid, int flags, - struct coda_cred *); -int venus_release(struct super_block *sb, struct ViceFid *fid, int flags); -int venus_close(struct super_block *sb, struct ViceFid *fid, int flags, - struct coda_cred *); -int venus_open(struct super_block *sb, struct ViceFid *fid, + struct CodaFid *resfid); +int venus_store(struct super_block *sb, struct CodaFid *fid, int flags, uid_t); +int venus_release(struct super_block *sb, struct CodaFid *fid, int flags); +int venus_close(struct super_block *sb, struct CodaFid *fid, int flags, uid_t); +int venus_open(struct super_block *sb, struct CodaFid *fid, int flags, struct file **f); -int venus_mkdir(struct super_block *sb, struct ViceFid *dirfid, +int venus_mkdir(struct super_block *sb, struct CodaFid *dirfid, const char *name, int length, - struct ViceFid *newfid, struct coda_vattr *attrs); -int venus_create(struct super_block *sb, struct ViceFid *dirfid, + struct CodaFid *newfid, struct coda_vattr *attrs); +int venus_create(struct super_block *sb, struct CodaFid *dirfid, const char *name, int length, int excl, int mode, int rdev, - struct ViceFid *newfid, struct coda_vattr *attrs) ; -int venus_rmdir(struct super_block *sb, struct ViceFid *dirfid, + struct CodaFid *newfid, struct coda_vattr *attrs) ; +int venus_rmdir(struct super_block *sb, struct CodaFid *dirfid, const char *name, int length); -int venus_remove(struct super_block *sb, struct ViceFid *dirfid, +int venus_remove(struct super_block *sb, struct CodaFid *dirfid, const char *name, int length); -int venus_readlink(struct super_block *sb, struct ViceFid *fid, +int venus_readlink(struct super_block *sb, struct CodaFid *fid, char *buffer, int *length); -int venus_rename(struct super_block *, struct ViceFid *new_fid, - struct ViceFid *old_fid, size_t old_length, +int venus_rename(struct super_block *, struct CodaFid *new_fid, + struct CodaFid *old_fid, size_t old_length, size_t new_length, const char *old_name, const char *new_name); -int venus_link(struct super_block *sb, struct ViceFid *fid, - struct ViceFid *dirfid, const char *name, int len ); -int venus_symlink(struct super_block *sb, struct ViceFid *fid, +int venus_link(struct super_block *sb, struct CodaFid *fid, + struct CodaFid *dirfid, const char *name, int len ); +int venus_symlink(struct super_block *sb, struct CodaFid *fid, const char *name, int len, const char *symname, int symlen); -int venus_access(struct super_block *sb, struct ViceFid *fid, int mask); -int venus_pioctl(struct super_block *sb, struct ViceFid *fid, +int venus_access(struct super_block *sb, struct CodaFid *fid, int mask); +int venus_pioctl(struct super_block *sb, struct CodaFid *fid, unsigned int cmd, struct PioctlData *data); int coda_downcall(int opcode, union outputArgs *out, struct super_block *sb); -int venus_fsync(struct super_block *sb, struct ViceFid *fid); +int venus_fsync(struct super_block *sb, struct CodaFid *fid); int venus_statfs(struct super_block *sb, struct statfs *sfs); diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/linux/compiler.h linux-2.4.20-wolk4.3-fullkernel/include/linux/compiler.h --- linux-2.4.20-wolk4.2-fullkernel/include/linux/compiler.h 2003-05-03 02:37:54.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/linux/compiler.h 2003-06-19 14:09:08.000000000 +0200 @@ -1,12 +1,6 @@ #ifndef __LINUX_COMPILER_H #define __LINUX_COMPILER_H -#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) -#define inline __inline__ __attribute__((always_inline)) -#define __inline__ __inline__ __attribute__((always_inline)) -#define __inline __inline__ __attribute__((always_inline)) -#endif - /* Somewhere in the middle of the GCC 2.96 development cycle, we implemented a mechanism by which the user can annotate likely branch directions and expect the blocks to be reordered appropriately. Define __builtin_expect diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/linux/fs.h linux-2.4.20-wolk4.3-fullkernel/include/linux/fs.h --- linux-2.4.20-wolk4.2-fullkernel/include/linux/fs.h 2003-06-28 09:23:23.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/linux/fs.h 2003-07-03 21:15:07.000000000 +0200 @@ -502,6 +502,7 @@ struct inode { unsigned long i_version; unsigned short i_bytes; struct semaphore i_sem; + struct rw_semaphore i_alloc_sem; struct semaphore i_zombie; struct inode_operations *i_op; struct file_operations *i_fop; /* former ->i_op->default_file_ops */ @@ -1431,6 +1432,7 @@ static inline int fsync_inode_data_buffe return fsync_buffers_list(&inode->i_dirty_data_buffers); } extern int inode_has_buffers(struct inode *); +extern int do_fdatasync(struct file *); extern int filemap_fdatasync(struct address_space *); extern int filemap_fdatawait(struct address_space *); extern void sync_supers(kdev_t dev, int wait); @@ -1651,9 +1653,13 @@ extern int brw_page(int, struct page *, typedef int (get_block_t)(struct inode*,long,struct buffer_head*,int); +#ifdef CONFIG_HIGHMEM +extern int try_to_reclaim_buffers(int, unsigned int); +#else +#define try_to_reclaim_buffers(x, y) do { ; } while(0) +#endif /* Generic buffer handling for block filesystems.. */ extern int try_to_release_page(struct page * page, int gfp_mask); -extern int try_to_reclaim_buffers(int, unsigned int); extern int discard_bh_page(struct page *, unsigned long, int); extern void discard_buffer(struct buffer_head *bh) ; #define block_flushpage(page, offset) discard_bh_page(page, offset, 1) diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/linux/iobuf.h linux-2.4.20-wolk4.3-fullkernel/include/linux/iobuf.h --- linux-2.4.20-wolk4.2-fullkernel/include/linux/iobuf.h 2003-06-28 09:23:23.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/linux/iobuf.h 2003-06-23 17:29:31.000000000 +0200 @@ -11,6 +11,7 @@ #include #include #include +#include #include /* @@ -76,6 +77,7 @@ int expand_kiobuf(struct kiobuf *, int); void kiobuf_wait_for_io(struct kiobuf *); extern int alloc_kiobuf_bhs(struct kiobuf *); extern void free_kiobuf_bhs(struct kiobuf *); +extern kmem_cache_t *kiobuf_cachep; /* fs/buffer.c */ diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/linux/sysctl.h linux-2.4.20-wolk4.3-fullkernel/include/linux/sysctl.h --- linux-2.4.20-wolk4.2-fullkernel/include/linux/sysctl.h 2003-06-28 09:23:23.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/linux/sysctl.h 2003-06-21 12:17:34.000000000 +0200 @@ -156,6 +156,8 @@ enum VM_MAX_READAHEAD=13, /* Max file readahead */ VM_HEAP_STACK_GAP=14, /* int: page gap between heap and stack */ VM_PAGEBUF=15, /* struct: Control pagebuf parameters */ + VM_OOM_PARENT_MAX=16, /* Max childs per parent oom-killed before we kill the parent */ + VM_OOM_PARENT_EXPIRE=17 /* Min numbers of seconds before we forget about parents sins */ }; /* Tunable scheduler parameters in /proc/sys/sched/ */ diff -Naurp linux-2.4.20-wolk4.2-fullkernel/include/linux/timer.h linux-2.4.20-wolk4.3-fullkernel/include/linux/timer.h --- linux-2.4.20-wolk4.2-fullkernel/include/linux/timer.h 2003-05-13 12:18:44.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/include/linux/timer.h 2003-07-02 20:16:53.000000000 +0200 @@ -143,4 +143,13 @@ static inline int timer_pending(const ti #define time_after_eq(a,b) ((long)(a) - (long)(b) >= 0) #define time_before_eq(a,b) time_after_eq(b,a) +#define RATE_LIMIT(interval) \ +({ \ + static unsigned long expires; \ + int ok = time_after(jiffies, expires); \ + if (ok) \ + expires = jiffies + (interval); \ + ok; \ +}) + #endif diff -Naurp linux-2.4.20-wolk4.2-fullkernel/init/main.c linux-2.4.20-wolk4.3-fullkernel/init/main.c --- linux-2.4.20-wolk4.2-fullkernel/init/main.c 2003-06-28 09:23:24.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/init/main.c 2003-07-01 17:31:46.000000000 +0200 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -224,7 +225,7 @@ void __init calibrate_delay(void) loops_per_jiffy = (1<<12); - printk("Calibrating delay loop... "); + printk("CPU#%d: Calibrating delay loop... ", smp_processor_id()); while (loops_per_jiffy <<= 1) { /* wait for "start of" clock tick */ ticks = jiffies; diff -Naurp linux-2.4.20-wolk4.2-fullkernel/kernel/module.c linux-2.4.20-wolk4.3-fullkernel/kernel/module.c --- linux-2.4.20-wolk4.2-fullkernel/kernel/module.c 2003-06-28 09:23:24.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/kernel/module.c 2003-06-19 14:16:34.000000000 +0200 @@ -1289,6 +1289,89 @@ fini: return PAGE_SIZE - left; } +static void *s_modules_start(struct seq_file *m, loff_t *pos) +{ + struct module *v; + loff_t n = *pos; + + lock_kernel(); + for (v = module_list, n = *pos; v != &kernel_module; n--, v = v->next) { + if (n == 0) + return v; + } + unlock_kernel(); + return NULL; +} + +static void *s_modules_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct module *v = p; + (*pos)++; + v = v->next; + if (v == &kernel_module) { + unlock_kernel(); + return NULL; + } + return v; +} + +static void s_modules_stop(struct seq_file *m, void *p) +{ + if (p) + unlock_kernel(); +} + +static int s_modules_show(struct seq_file *m, void *p) +{ + struct module *v = p; + struct module_ref *ref; + + seq_printf(m, "%-20s%8lu", v->name, v->size); + + if (v->flags & MOD_RUNNING) + seq_printf(m, "%4ld", + (mod_member_present(v, can_unload) + && v->can_unload + ? -1L : (long)atomic_read(&v->uc.usecount))); + + if (v->flags & MOD_DELETED) + seq_puts(m, " (deleted)"); + else if (v->flags & MOD_RUNNING) { + if (v->flags & MOD_AUTOCLEAN) + seq_puts(m, " (autoclean)"); + if (!(v->flags & MOD_USED_ONCE)) + seq_puts(m, " (unused)"); + } + else if (v->flags & MOD_INITIALIZING) + seq_puts(m, " (initializing)"); + else + seq_puts(m, " (uninitialized)"); + + if ((ref = v->refs) != NULL) { + seq_puts(m, " ["); + while (1) { + seq_puts(m, ref->ref->name); + + if ((ref = ref->next_ref) != NULL) + seq_putc(m, ' '); + else + break; + } + seq_putc(m, ']'); + } + + seq_putc(m, '\n'); + + return 0; +} + +struct seq_operations modules_op = { + start: s_modules_start, + next: s_modules_next, + stop: s_modules_stop, + show: s_modules_show +}; + /* * Called by the /proc file system to return a current list of ksyms. */ diff -Naurp linux-2.4.20-wolk4.2-fullkernel/kernel/sched.c linux-2.4.20-wolk4.3-fullkernel/kernel/sched.c --- linux-2.4.20-wolk4.2-fullkernel/kernel/sched.c 2003-06-28 09:23:24.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/kernel/sched.c 2003-06-26 10:19:04.000000000 +0200 @@ -60,7 +60,7 @@ #warning INFO: Desktop Scheduler Tweaks will be used. int min_timeslice = (( 10 * HZ) / 1000 ?: 1); -int max_timeslice = ( 40 * HZ) / 1000; +int max_timeslice = (( 40 * HZ) / 1000 ?: 1); int child_penalty = 95; int parent_penalty = 100; int exit_weight = 3; @@ -73,7 +73,7 @@ int starvation_limit = 2 * HZ; #warning INFO: Server Scheduler Tweaks will be used. int min_timeslice = (( 10 * HZ) / 1000 ?: 1); -int max_timeslice = ( 200 * HZ) / 1000; +int max_timeslice = ((200 * HZ) / 1000 ?: 1); int child_penalty = 50; int parent_penalty = 100; int exit_weight = 3; @@ -934,6 +934,30 @@ void scheduler_tick(int user_tick, int s enqueue_task(p, rq->expired); } else enqueue_task(p, rq->active); +#if defined (CONFIG_SCHED_DESKTOP) + } else { + /* + * Prevent a too long timeslice allowing a task to monopolize + * the CPU. We do this by splitting up the timeslice into + * smaller pieces. + * + * Note: this does not mean the task's timeslices expire or + * get lost in any way, they just might be preempted by + * another task of equal priority. (one with higher + * priority would have preempted this task already.) We + * requeue this task to the end of the list on this priority + * level, which is in essence a round-robin of tasks with + * equal priority. + */ + if (!(p->time_slice % MIN_TIMESLICE) && + (p->array == rq->active)) { + dequeue_task(p, rq->active); + set_tsk_need_resched(p); + p->prio = effective_prio(p); + enqueue_task(p, rq->active); + } + +#endif /* CONFIG_SCHED_DESKTOP */ } out: #ifdef CONFIG_SMP @@ -1461,10 +1485,14 @@ static int setscheduler(pid_t pid, int p if (array) { activate_task(p, task_rq(p)); /* - * Reschedule if on a CPU and the priority dropped, or not on - * a CPU and the priority rose above the currently running task. + * Reschedule if we are currently running on this runqueue and + * our priority decreased, or if we are not currently running on + * this runqueue and our priority is higher than the current's */ - if ((rq->curr == p) ? (p->prio > oldprio) : (p->prio < rq->curr->prio)) + if (rq->curr == p) { + if (p->prio > oldprio) + resched_task(rq->curr); + } else if (p->prio < rq->curr->prio) resched_task(rq->curr); } diff -Naurp linux-2.4.20-wolk4.2-fullkernel/kernel/sysctl.c linux-2.4.20-wolk4.3-fullkernel/kernel/sysctl.c --- linux-2.4.20-wolk4.2-fullkernel/kernel/sysctl.c 2003-06-28 09:23:24.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/kernel/sysctl.c 2003-06-21 12:17:34.000000000 +0200 @@ -74,6 +74,8 @@ extern char core_name_format []; extern int cad_pid; extern int sysctl_sched_yield_scale; extern int allow_setid_core; +extern unsigned int oom_parent_max; +extern unsigned int oom_parent_expire; /* Tunable scheduler parameters */ extern int min_timeslice; @@ -539,11 +541,11 @@ static ctl_table vm_table[] = { sizeof(struct cache_limits), 0644, NULL, &proc_dointvec}, {VM_PAGERDAEMON, "kswapd", &pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec}, - {VM_PGT_CACHE, "pagetable_cache", + {VM_PGT_CACHE, "pagetable_cache", &pgt_cache_water, 2*sizeof(int), 0644, NULL, &proc_dointvec}, - {VM_PAGE_CLUSTER, "page-cluster", + {VM_PAGE_CLUSTER, "page-cluster", &page_cluster, sizeof(int), 0644, NULL, &proc_dointvec}, - {VM_HEAP_STACK_GAP, "heap-stack-gap", + {VM_HEAP_STACK_GAP, "heap-stack-gap", &heap_stack_gap, sizeof(int), 0644, NULL, &proc_dointvec}, {VM_MIN_READAHEAD, "min-readahead", &vm_min_readahead,sizeof(int), 0644, NULL, &proc_dointvec}, @@ -551,6 +553,10 @@ static ctl_table vm_table[] = { &vm_max_readahead,sizeof(int), 0644, NULL, &proc_dointvec}, {VM_MAX_MAP_COUNT, "max_map_count", &max_map_count, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_OOM_PARENT_MAX, "oom_parent_max", + &oom_parent_max, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_OOM_PARENT_EXPIRE, "oom_parent_expire", + &oom_parent_expire, sizeof(int), 0644, NULL, &proc_dointvec}, {0} }; diff -Naurp linux-2.4.20-wolk4.2-fullkernel/mm/filemap.c linux-2.4.20-wolk4.3-fullkernel/mm/filemap.c --- linux-2.4.20-wolk4.2-fullkernel/mm/filemap.c 2003-06-28 09:23:15.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/mm/filemap.c 2003-06-27 10:48:06.000000000 +0200 @@ -829,7 +829,7 @@ void wakeup_page_waiters(struct page * p head = page_waitqueue(page); if (waitqueue_active(head)) - wake_up(head); + sync_page(page); } /* @@ -1728,6 +1728,12 @@ no_cached_page: UPDATE_ATIME(inode); } +/* + * i_sem and i_alloc_sem should be held already. i_sem may be dropped + * later once we've mapped the new IO. i_alloc_sem is kept until the IO + * completes. + */ + static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset) { ssize_t retval; @@ -1838,6 +1844,9 @@ ssize_t generic_file_read(struct file * if (filp->f_flags & O_DIRECT) goto o_direct; + if (filp->f_flags & O_STREAMING) + shrink_pagecache(filp, (*ppos) >> PAGE_CACHE_SHIFT); + retval = -EFAULT; if (access_ok(VERIFY_WRITE, buf, count)) { retval = 0; @@ -1868,12 +1877,16 @@ ssize_t generic_file_read(struct file * retval = 0; if (!count) goto out; /* skip atime */ + down_read(&inode->i_alloc_sem); + down(&inode->i_sem); size = i_size_read(inode); if (pos < size) { retval = generic_file_direct_IO(READ, filp, buf, count, pos); if (retval > 0) *ppos = pos + retval; } + up(&inode->i_sem); + up_read(&inode->i_alloc_sem); UPDATE_ATIME(filp->f_dentry->d_inode); goto out; } @@ -3139,42 +3152,18 @@ static void update_inode_times(struct in } } /* - * Write to a file through the page cache. - * - * We currently put everything into the page cache prior to writing it. - * This is not a problem when writing full pages. With partial pages, - * however, we first have to read the data into the cache, then - * dirty the page, and finally schedule it for writing. Alternatively, we - * could write-through just the portion of data that would go into that - * page, but that would kill performance for applications that write data - * line by line, and it's prone to race conditions. - * - * Note that this routine doesn't try to keep track of dirty pages. Each - * file system has to do this all by itself, unfortunately. - * okir@monad.swb.de + * precheck_file_write(): + * Check the conditions on a file descriptor prior to beginning a write + * on it. Contains the common precheck code for both buffered and direct + * IO. */ -ssize_t generic_file_write_nolock(struct file * file, const char *buf, - size_t count, loff_t *ppos) +static int precheck_file_write(struct file *file, struct inode *inode, + size_t *count, loff_t *ppos) { - struct address_space *mapping = file->f_dentry->d_inode->i_mapping; - struct inode *inode = mapping->host; + int err; unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur; - loff_t pos; - struct page *page, *cached_page; - ssize_t written; - long status = 0; - ssize_t err; - unsigned bytes; - - if ((ssize_t) count < 0) - return -EINVAL; - - if (!access_ok(VERIFY_READ, buf, count)) - return -EFAULT; - - cached_page = NULL; - - pos = *ppos; + loff_t pos = *ppos; + err = -EINVAL; if (pos < 0) goto out; @@ -3185,11 +3174,9 @@ ssize_t generic_file_write_nolock(struct goto out; } - written = 0; - /* FIXME: this is for backwards compatibility with 2.4 */ if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) - pos = inode->i_size; + *ppos = pos = inode->i_size; /* * Check whether we've reached the file size limit. @@ -3202,23 +3189,23 @@ ssize_t generic_file_write_nolock(struct send_sig(SIGXFSZ, current, 0); goto out; } - if (pos > 0xFFFFFFFFULL || count > limit - (u32)pos) { + if (pos > 0xFFFFFFFFULL || *count > limit - (u32)pos) { /* send_sig(SIGXFSZ, current, 0); */ - count = limit - (u32)pos; + *count = limit - (u32)pos; } } /* * LFS rule */ - if ( pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { + if ( pos + *count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { if (pos >= MAX_NON_LFS) { send_sig(SIGXFSZ, current, 0); goto out; } - if (count > MAX_NON_LFS - (u32)pos) { + if (*count > MAX_NON_LFS - (u32)pos) { /* send_sig(SIGXFSZ, current, 0); */ - count = MAX_NON_LFS - (u32)pos; + *count = MAX_NON_LFS - (u32)pos; } } @@ -3236,7 +3223,7 @@ ssize_t generic_file_write_nolock(struct gr_learn_resource(current, RLIMIT_FSIZE, count + (u32)pos); if (pos >= inode->i_sb->s_maxbytes) { - if (count || pos > inode->i_sb->s_maxbytes) { + if (*count || pos > inode->i_sb->s_maxbytes) { send_sig(SIGXFSZ, current, 0); err = -EFBIG; goto out; @@ -3244,37 +3231,70 @@ ssize_t generic_file_write_nolock(struct /* zero-length writes at ->s_maxbytes are OK */ } - if (pos + count > inode->i_sb->s_maxbytes) - count = inode->i_sb->s_maxbytes - pos; + if (pos + *count > inode->i_sb->s_maxbytes) + *count = inode->i_sb->s_maxbytes - pos; } else { if (is_read_only(inode->i_rdev)) { err = -EPERM; goto out; } if (pos >= inode->i_size) { - if (count || pos > inode->i_size) { + if (*count || pos > inode->i_size) { err = -ENOSPC; goto out; } } - if (pos + count > inode->i_size) - count = inode->i_size - pos; + if (pos + *count > inode->i_size) + *count = inode->i_size - pos; } err = 0; - if (count == 0) + if (*count == 0) goto out; remove_suid(inode); update_inode_times(inode); - if (file->f_flags & O_DIRECT) - goto o_direct; +out: + return err; +} - if (file->f_flags & O_STREAMING) - shrink_pagecache(file, pos >> PAGE_CACHE_SHIFT); +/* + * Write to a file through the page cache. + * + * We currently put everything into the page cache prior to writing it. + * This is not a problem when writing full pages. With partial pages, + * however, we first have to read the data into the cache, then + * dirty the page, and finally schedule it for writing. Alternatively, we + * could write-through just the portion of data that would go into that + * page, but that would kill performance for applications that write data + * line by line, and it's prone to race conditions. + * + * Note that this routine doesn't try to keep track of dirty pages. Each + * file system has to do this all by itself, unfortunately. + * okir@monad.swb.de + */ +ssize_t generic_file_write_nolock(struct file * file, const char *buf, + size_t count, loff_t *ppos) +{ + struct address_space *mapping = file->f_dentry->d_inode->i_mapping; + struct inode *inode = mapping->host; + loff_t pos; + struct page *page, *cached_page; + ssize_t written; + long status = 0; + ssize_t err; + unsigned bytes; + cached_page = NULL; + pos = *ppos; + written = 0; + + err = precheck_file_write(file, inode, &count, &pos); + if (err != 0 || count == 0) + goto out; + do { unsigned long index, offset; long page_fault; @@ -3361,10 +3381,8 @@ done: status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA); } -out_status: err = written ? written : status; out: - return err; fail_write: status = -EFAULT; @@ -3381,8 +3399,28 @@ sync_failure: if (pos + bytes > inode->i_size) vmtruncate(inode, inode->i_size); goto done; +} + +ssize_t +generic_direct_write(struct file *file,const char *buf,size_t count, loff_t *ppos) +{ + struct address_space *mapping = file->f_dentry->d_inode->i_mapping; + struct inode *inode = mapping->host; + loff_t pos; + ssize_t written; + long status = 0; + int err; + + pos = *ppos; + written = 0; + + err = precheck_file_write(file, inode, &count, &pos); + if (err != 0 || count == 0) + goto out; + + if (!file->f_flags & O_DIRECT) + BUG(); -o_direct: written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos); if (written > 0) { loff_t end = pos + written; @@ -3399,18 +3437,55 @@ o_direct: */ if (written >= 0 && file->f_flags & O_SYNC) status = generic_osync_inode(inode, OSYNC_METADATA); - goto out_status; + + err = written ? written : status; +out: + return err; } -ssize_t generic_file_write(struct file *file, const char *buf, - size_t count, loff_t *ppos) +static int do_odirect_fallback(struct file *file, struct inode *inode, + const char *buf, size_t count, loff_t *ppos) { - struct inode *inode = file->f_dentry->d_inode->i_mapping->host; - int err; + int ret, err; down(&inode->i_sem); - err = generic_file_write_nolock(file, buf, count, ppos); + ret = generic_file_write_nolock(file, buf, count, ppos); + if (ret > 0) { + err = do_fdatasync(file); + if (err) + ret = err; + } up(&inode->i_sem); + return ret; +} + +ssize_t +generic_file_write(struct file *file,const char *buf, size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_dentry->d_inode->i_mapping->host; + int err; + + if ((ssize_t) count < 0) + return -EINVAL; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + + if (file->f_flags & O_DIRECT) { + /* generic_direct_write may drop i_sem during the + actual IO */ + down_read(&inode->i_alloc_sem); + down(&inode->i_sem); + err = generic_direct_write(file, buf, count, ppos); + up(&inode->i_sem); + up_read(&inode->i_alloc_sem); + if (unlikely(err == -ENOTBLK)) + err = do_odirect_fallback(file, inode, buf, count, ppos); + } else { + down(&inode->i_sem); + err = generic_file_write_nolock(file, buf, count, ppos); + up(&inode->i_sem); + } return err; } diff -Naurp linux-2.4.20-wolk4.2-fullkernel/mm/oom_kill.c linux-2.4.20-wolk4.3-fullkernel/mm/oom_kill.c --- linux-2.4.20-wolk4.2-fullkernel/mm/oom_kill.c 2003-05-03 02:37:46.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/mm/oom_kill.c 2003-06-21 12:17:34.000000000 +0200 @@ -1,10 +1,13 @@ /* * linux/mm/oom_kill.c - * + * * Copyright (C) 1998,2000 Rik van Riel * Thanks go out to Claus Fischer for some serious inspiration and * for goading me into coding this file... * + * June 2003 Tvrtko A. Ursulin (tvrtko.ursulin@zg.htnet.hr) + * Extended with parent process statistics and appropriate actions + * * The routines in this file are used to kill a process when * we're seriously out of memory. This gets called from kswapd() * in linux/mm/vmscan.c when we really run out of memory. @@ -21,12 +24,132 @@ #include #include +#define OOM_HISTORY_SIZE 32 + +#define OOM_DEFAULT_VALUE (10) +#define OOM_DEFAULT_EXPIRE (5*60) + +struct parent_record +{ + pid_t pid; + struct task_struct *task; + unsigned long first_kill; + unsigned long last_kill; + unsigned long value; +}; + +unsigned int oom_parent_max = OOM_DEFAULT_VALUE; +unsigned int oom_parent_expire = OOM_DEFAULT_EXPIRE; + +static struct parent_record kill_history[OOM_HISTORY_SIZE]; + /* #define DEBUG */ +void oom_kill_task(struct task_struct *p); + +static void process_kill_history(void) +{ + struct parent_record *p; + struct task_struct *task; + + unsigned int i; + + for ( i = 0; i < OOM_HISTORY_SIZE; i++ ) + { + p = &kill_history[i]; + if ( p->pid ) + { + task = find_task_by_pid(p->pid); + if ( task != p->task ) + { +#ifdef DEBUG + printk(KERN_DEBUG "OOMkill: parent %d (%p) removed from list - does not exist\n",p->pid, p->task); +#endif + p->pid = 0; + } + else if ( abs(jiffies - p->last_kill) >= (oom_parent_expire*HZ) ) + { +#ifdef DEBUG + printk(KERN_DEBUG "OOMkill: parent %d (%p) removed from list - expired\n",p->pid, p->task); +#endif + p->pid = 0; + } + else if ( p->value >= oom_parent_max ) + { + printk(KERN_ERR "Out of Memory: Will kill parent process %d (%s).\n",p->pid,p->task->comm); + p->pid = 0; + oom_kill_task(p->task); + } + } + } +} + +static int find_free_record(void) +{ + struct parent_record *p; + + unsigned int i; + + for ( i = 0; i < OOM_HISTORY_SIZE; i++ ) + { + p = &kill_history[i]; + if ( !p->pid ) + return i; + } + + return -1; +} + +static struct parent_record *find_in_kill_history(struct task_struct *task) +{ + struct parent_record *p = NULL; + unsigned int i; + + if ( !task ) + return NULL; + + for ( i = 0; i < OOM_HISTORY_SIZE; i++ ) + { + p = &kill_history[i]; + if ( p->pid ) + { + if ( (task->pid == p->pid) && (task == p->task) ) + return p; + } + } + + return NULL; +} + +static struct parent_record *new_parent(struct task_struct *task) +{ + struct parent_record *p; + int i; + + if ( !task ) + return NULL; + + i = find_free_record(); + + if ( i < 0 ) + return NULL; + + p = &kill_history[i]; + + p->pid= task->pid; + p->task = task; + p->first_kill = jiffies; + p->last_kill = jiffies; + p->value = 0; + + return p; +} + + /** * int_sqrt - oom_kill.c internal function, rough approximation to sqrt * @x: integer of which to calculate the sqrt - * + * * A very rough approximation to the sqrt() function. */ static unsigned int int_sqrt(unsigned int x) @@ -35,7 +158,7 @@ static unsigned int int_sqrt(unsigned in while (x & ~(unsigned int)1) x >>=2, out >>=1; if (x) out -= out >> 2; return (out ? out : 1); -} +} /** * oom_badness - calculate a numeric value for how bad this task has been @@ -181,6 +304,7 @@ void oom_kill_task(struct task_struct *p static void oom_kill(void) { struct task_struct *p, *q; + struct parent_record *parent; extern wait_queue_head_t kswapd_done; #ifdef CONFIG_NO_OOM @@ -195,6 +319,19 @@ static void oom_kill(void) if (p == NULL) panic("Out of memory and no killable processes...\n"); + /* Add or update statistics for a parent processs */ + if ( p->p_opptr->pid > 1 ) + { + parent = find_in_kill_history(p->p_opptr); + if ( !parent ) + parent = new_parent(p->p_opptr); + else + { + parent->value++; + parent->last_kill = jiffies; + } + } + /* kill all processes that share the ->mm (i.e. all threads) */ for_each_task(q) { if (q->mm == p->mm) @@ -222,6 +359,11 @@ void out_of_memory(void) static unsigned long first, last, count, lastkill; unsigned long now, since; + /* + * Process kill history... + */ + process_kill_history(); + now = jiffies; since = now - last; last = now; diff -Naurp linux-2.4.20-wolk4.2-fullkernel/mm/page_alloc.c linux-2.4.20-wolk4.3-fullkernel/mm/page_alloc.c --- linux-2.4.20-wolk4.2-fullkernel/mm/page_alloc.c 2003-06-28 09:23:24.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/mm/page_alloc.c 2003-06-20 20:18:10.000000000 +0200 @@ -373,7 +373,7 @@ static struct page * __alloc_pages_limit if (!z) break; if (!z->size) - BUG(); + continue; /* * We allocate if the number of (free + inactive_clean) @@ -456,7 +456,7 @@ try_again: if (!z) break; if (!z->size) - BUG(); + continue; min += z->pages_min; if (z->free_pages > min) { @@ -1318,6 +1318,39 @@ static int __init setup_mem_frac(char *s __setup("memfrac=", setup_mem_frac); +#ifdef CONFIG_HIGHMEM +void __init reset_highmem_zone(int highmempages) { + + pg_data_t *pgdat; + int sum; + + sum = 0; + pgdat = pgdat_list; + + /* sum up the highpages */ + while (pgdat) { + sum += (pgdat->node_zones+ZONE_HIGHMEM)->pages_high; + pgdat = pgdat->node_next; + } + + pgdat = pgdat_list; + /* zero the watermarks and the free count if there's no at least high pages */ + if (highmempages <= sum) { + + while (pgdat) { + (pgdat->node_zones+ZONE_HIGHMEM)->size = 0; + (pgdat->node_zones+ZONE_HIGHMEM)->pages_min = 0; + (pgdat->node_zones+ZONE_HIGHMEM)->pages_low = 0; + (pgdat->node_zones+ZONE_HIGHMEM)->pages_high = 0; + (pgdat->node_zones+ZONE_HIGHMEM)->pages_plenty = 0; + (pgdat->node_zones+ZONE_HIGHMEM)->free_pages = 0; + pgdat = pgdat->node_next; + } + } + +} +#endif + #ifdef CONFIG_PROC_FS #include diff -Naurp linux-2.4.20-wolk4.2-fullkernel/mm/rmap.c linux-2.4.20-wolk4.3-fullkernel/mm/rmap.c --- linux-2.4.20-wolk4.2-fullkernel/mm/rmap.c 2003-06-28 09:23:24.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/mm/rmap.c 2003-06-19 14:07:18.000000000 +0200 @@ -13,11 +13,9 @@ /* * Locking: - * - the page->pte_chain is protected by the PG_chainlock bit, - * which nests within the zone lru_lock, then the * - the page->pte.chain is protected by the PG_chainlock bit, - * which nests within the lru lock, then the - * mm->page_table_lock, and then the page lock. + * which nests within the the mm->page_table_lock, + * which nests within the page lock. * - because swapout locking is opposite to the locking order * in the page fault path, the swapout path uses trylocks * on the mm->page_table_lock @@ -41,33 +39,56 @@ * here, the page struct for the page table page contains the process * it belongs to and the offset within that process. * - * We use an array of pte pointers in this structure to minimise cache - * misses while traversing reverse maps. + * We use an array of pte pointers in this structure to minimise cache misses + * while traversing reverse maps. */ -#define NRPTE ((L1_CACHE_BYTES - sizeof(void *))/sizeof(pte_addr_t)) +#define NRPTE ((L1_CACHE_BYTES - sizeof(unsigned long))/sizeof(pte_addr_t)) +/* + * next_and_idx encodes both the address of the next pte_chain and the + * offset of the highest-index used pte in ptes[]. + */ struct pte_chain { - struct pte_chain *next; + unsigned long next_and_idx; pte_addr_t ptes[NRPTE]; } ____cacheline_aligned; static kmem_cache_t *pte_chain_cache; +static inline struct pte_chain *pte_chain_next(struct pte_chain *pte_chain) +{ + return (struct pte_chain *)(pte_chain->next_and_idx & ~NRPTE); +} + +static inline struct pte_chain *pte_chain_ptr(unsigned long pte_chain_addr) +{ + return (struct pte_chain *)(pte_chain_addr & ~NRPTE); +} + +static inline int pte_chain_idx(struct pte_chain *pte_chain) +{ + return pte_chain->next_and_idx & NRPTE; +} + +static inline unsigned long +pte_chain_encode(struct pte_chain *pte_chain, int idx) +{ + return (unsigned long)pte_chain | idx; +} + /* * pte_chain list management policy: * - * - If a page has a pte_chain list then it is shared by at least two - * processes, or by a process which has recently done a fork+exec, - * because a single sharing uses PageDirect. - * - The pageout code collapses pte_chains with a single user back into - * PageDirect pointers. This is done lazily so a process can do a number - * of fork+exec sequences without having to allocate and free pte_chains. + * - If a page has a pte_chain list then it is shared by at least two processes, + * because a single sharing uses PageDirect. (Well, this isn't true yet, + * coz this code doesn't collapse singletons back to PageDirect on the remove + * path). * - A pte_chain list has free space only in the head member - all succeeding * members are 100% full. * - If the head element has free space, it occurs in its leading slots. * - All free space in the pte_chain is at the start of the head member. - * - Insertion into the pte_chain puts a pte pointer in the last free slot - * of the head member. + * - Insertion into the pte_chain puts a pte pointer in the last free slot of + * the head member. * - Removal from a pte chain moves the head pte of the head member onto the * victim pte and frees the head member if it became empty. */ @@ -102,7 +123,8 @@ struct pte_chain * pte_chain_alloc(int g */ void __pte_chain_free(struct pte_chain *pte_chain) { - pte_chain->next = NULL; + if (pte_chain->next_and_idx) + pte_chain->next_and_idx = 0; kmem_cache_free(pte_chain_cache, pte_chain); } @@ -121,9 +143,8 @@ void __pte_chain_free(struct pte_chain * * page are over or under their RSS limit. * Caller needs to hold the pte_chain_lock. * - * If the page has a single-entry pte_chain, collapse that back to a - * PageDirect representation. This way, it's only done under memory - * pressure, giving a slight speedup to fork+exec for active forkers. + * If the page has a single-entry pte_chain, collapse that back to a PageDirect + * representation. This way, it's only done under memory pressure. */ int page_referenced(struct page * page, int * rsslimit) { @@ -136,12 +157,7 @@ int page_referenced(struct page * page, if (PageDirect(page)) { pte_t *pte = rmap_ptep_map(page->pte.direct); - /* - * First we do a non-locked, read only test to avoid - * unneeded (locked) dirtying of the pagetable in the case - * where the pte isn't referenced. - */ - if (ptep_test_young(pte) && ptep_test_and_clear_young(pte)) + if (pte_young(*pte) && ptep_test_and_clear_young(pte)) referenced++; mm = ptep_to_mm(pte); @@ -152,7 +168,7 @@ int page_referenced(struct page * page, int nr_chains = 0; /* Check all the page tables mapping this page. */ - for (pc = page->pte.chain; pc; pc = pc->next) { + for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) { int i; for (i = NRPTE-1; i >= 0; i--) { @@ -162,12 +178,7 @@ int page_referenced(struct page * page, if (!pte_paddr) break; pte = rmap_ptep_map(pte_paddr); - /* - * First we do a non-locked, read only test to avoid - * unneeded (locked) dirtying of the pagetable in the case - * where the pte isn't referenced. - */ - if (ptep_test_young(pte) && ptep_test_and_clear_young(pte)) + if (ptep_test_and_clear_young(pte)) referenced++; mm = ptep_to_mm(pte); if (mm->rss < mm->rlimit_rss) @@ -207,7 +218,6 @@ page_add_rmap(struct page * page, pte_t { pte_addr_t pte_paddr = ptep_to_paddr(ptep); struct pte_chain * cur_pte_chain; - int i; #ifdef DEBUG_RMAP if (!page || !ptep) @@ -229,6 +239,7 @@ page_add_rmap(struct page * page, pte_t */ { struct pte_chain * pc; + int i; if (PageDirect(page)) { if (page->pte.direct == pte_paddr) BUG(); @@ -256,6 +267,7 @@ page_add_rmap(struct page * page, pte_t ClearPageDirect(page); pte_chain->ptes[NRPTE-1] = page->pte.direct; pte_chain->ptes[NRPTE-2] = pte_paddr; + pte_chain->next_and_idx = pte_chain_encode(NULL, NRPTE-2); page->pte.direct = 0; page->pte.chain = pte_chain; pte_chain = NULL; /* We consumed it */ @@ -264,22 +276,15 @@ page_add_rmap(struct page * page, pte_t cur_pte_chain = page->pte.chain; if (cur_pte_chain->ptes[0]) { /* It's full */ - pte_chain->next = cur_pte_chain; + pte_chain->next_and_idx = pte_chain_encode(cur_pte_chain, + NRPTE - 1); page->pte.chain = pte_chain; pte_chain->ptes[NRPTE-1] = pte_paddr; pte_chain = NULL; /* We consumed it */ goto out; } - - BUG_ON(!cur_pte_chain->ptes[NRPTE-1]); - - for (i = NRPTE-2; i >= 0; i--) { - if (!cur_pte_chain->ptes[i]) { - cur_pte_chain->ptes[i] = pte_paddr; - goto out; - } - } - BUG(); + cur_pte_chain->ptes[pte_chain_idx(cur_pte_chain) - 1] = pte_paddr; + cur_pte_chain->next_and_idx--; out: pte_chain_unlock(page); return pte_chain; @@ -321,18 +326,18 @@ void page_remove_rmap(struct page * page } } else { struct pte_chain *start = page->pte.chain; + struct pte_chain *next; int victim_i = -1; - for (pc = start; pc; pc = pc->next) { + for (pc = start; pc; pc = next) { int i; - if (pc->next) - prefetch(pc->next); - for (i = 0; i < NRPTE; i++) { + next = pte_chain_next(pc); + if (next) + prefetch(next); + for (i = pte_chain_idx(pc); i < NRPTE; i++) { pte_addr_t pa = pc->ptes[i]; - if (!pa) - continue; if (victim_i == -1) victim_i = i; if (pa != pte_paddr) @@ -341,8 +346,10 @@ void page_remove_rmap(struct page * page start->ptes[victim_i] = 0; if (victim_i == NRPTE-1) { /* Emptied a pte_chain */ - page->pte.chain = start->next; + page->pte.chain = pte_chain_next(start); __pte_chain_free(start); + } else { + start->next_and_idx++; } goto out; } @@ -451,8 +458,8 @@ out_unlock: * @page: the page to get unmapped * * Tries to remove all the page table entries which are mapping this - * page, used in the pageout path. Caller must hold the zone lru lock - * and the page lock. Return values are: + * page, used in the pageout path. Caller must hold the page lock + * and its pte chain lock. Return values are: * * SWAP_SUCCESS - we succeeded in removing all mappings * SWAP_AGAIN - we missed a trylock, try again later @@ -487,10 +494,10 @@ int try_to_unmap(struct page * page) for (pc = start; pc; pc = next_pc) { int i; - next_pc = pc->next; + next_pc = pte_chain_next(pc); if (next_pc) prefetch(next_pc); - for (i = 0; i < NRPTE; i++) { + for (i = pte_chain_idx(pc); i < NRPTE; i++) { pte_addr_t pte_paddr = pc->ptes[i]; if (!pte_paddr) @@ -510,10 +517,12 @@ int try_to_unmap(struct page * page) start->ptes[victim_i] = 0; victim_i++; if (victim_i == NRPTE) { - page->pte.chain = start->next; + page->pte.chain = pte_chain_next(start); __pte_chain_free(start); start = page->pte.chain; victim_i = 0; + } else { + start->next_and_idx++; } break; case SWAP_AGAIN: @@ -550,7 +559,7 @@ void __init pte_chain_init(void) pte_chain_cache = kmem_cache_create( "pte_chain", sizeof(struct pte_chain), 0, - 0, + SLAB_MUST_HWCACHE_ALIGN, pte_chain_ctor, NULL); diff -Naurp linux-2.4.20-wolk4.2-fullkernel/mm/swapfile.c linux-2.4.20-wolk4.3-fullkernel/mm/swapfile.c --- linux-2.4.20-wolk4.2-fullkernel/mm/swapfile.c 2003-05-03 02:37:56.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/mm/swapfile.c 2003-06-19 14:18:35.000000000 +0200 @@ -1033,8 +1033,10 @@ asmlinkage long sys_swapon(const char * goto bad_swap; } + get_page(virt_to_page(swap_header)); lock_page(virt_to_page(swap_header)); rw_swap_page_nolock(READ, SWP_ENTRY(type,0), (char *) swap_header); + put_page(virt_to_page(swap_header)); if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10)) swap_header_version = 1; diff -Naurp linux-2.4.20-wolk4.2-fullkernel/mm/vmscan.c linux-2.4.20-wolk4.3-fullkernel/mm/vmscan.c --- linux-2.4.20-wolk4.2-fullkernel/mm/vmscan.c 2003-06-28 09:23:24.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/mm/vmscan.c 2003-07-02 20:14:19.000000000 +0200 @@ -792,7 +792,7 @@ int rebalance_laundry_zone(struct zone_s UnlockPage(page); page_cache_release(page); lru_lock(zone); - if (unlikely(page->buffers) && + if (unlikely((page->buffers != NULL)) && PageInactiveLaundry(page)) { del_page_from_inactive_laundry_list(page); add_page_to_inactive_dirty_list(page); @@ -951,10 +951,10 @@ static int do_try_to_free_pages(unsigned * Eat memory from filesystem page cache, buffer cache, * dentry, inode and filesystem quota caches. */ - ret += rebalance_inactive(25); + rebalance_inactive(25); for_each_zone(zone) { if (need_rebalance_dirty(zone)) - ret += rebalance_dirty_zone(zone, BATCH_WORK_AMOUNT, gfp_mask); + rebalance_dirty_zone(zone, BATCH_WORK_AMOUNT, gfp_mask); if (need_rebalance_laundry(zone)) ret += rebalance_laundry_zone(zone, BATCH_WORK_AMOUNT, gfp_mask); @@ -971,6 +971,12 @@ static int do_try_to_free_pages(unsigned */ ret += kmem_cache_reap(gfp_mask); + /* + * Mhwahahhaha! This is the part I really like. Giggle. + */ + if (!ret && free_min(ANY_ZONE) > 0) + out_of_memory(); + return ret; } @@ -1020,12 +1026,6 @@ static int do_try_to_free_pages_kswapd(u refill_freelist(); - /* - * Mhwahahhaha! This is the part I really like. Giggle. - */ - if (ret < free_min(ANY_ZONE)) - out_of_memory(); - return ret; } @@ -1097,7 +1097,7 @@ int kswapd(void *unused) * Kswapd main loop. */ for (;;) { - static long recalc = 0; + static unsigned long recalc = 0; /* * We try to rebalance the VM either when we have a @@ -1140,7 +1140,7 @@ void wakeup_kswapd(unsigned int gfp_mask /* If we're in the memory freeing business ourself, don't sleep * but just wake kswapd and go back to businesss. */ - if (current->flags & PF_MEMALLOC) { + if (current->flags & (PF_MEMALLOC|PF_MEMDIE)) { wake_up_interruptible(&kswapd_wait); return; } diff -Naurp linux-2.4.20-wolk4.2-fullkernel/net/ipv4/Config.in linux-2.4.20-wolk4.3-fullkernel/net/ipv4/Config.in --- linux-2.4.20-wolk4.2-fullkernel/net/ipv4/Config.in 2003-05-03 02:33:23.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/net/ipv4/Config.in 2003-06-23 08:21:12.000000000 +0200 @@ -14,7 +14,6 @@ if [ "$CONFIG_IP_ADVANCED_ROUTER" = "y" bool ' IP: equal cost multipath' CONFIG_IP_ROUTE_MULTIPATH bool ' IP: use TOS value as routing key' CONFIG_IP_ROUTE_TOS bool ' IP: verbose route monitoring' CONFIG_IP_ROUTE_VERBOSE - bool ' IP: large routing tables' CONFIG_IP_ROUTE_LARGE_TABLES fi bool ' IP: kernel level autoconfiguration' CONFIG_IP_PNP if [ "$CONFIG_IP_PNP" = "y" ]; then diff -Naurp linux-2.4.20-wolk4.2-fullkernel/net/ipv4/fib_hash.c linux-2.4.20-wolk4.3-fullkernel/net/ipv4/fib_hash.c --- linux-2.4.20-wolk4.2-fullkernel/net/ipv4/fib_hash.c 2003-05-03 02:00:11.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/net/ipv4/fib_hash.c 2003-06-23 08:21:12.000000000 +0200 @@ -90,7 +90,7 @@ struct fn_zone int fz_nent; /* Number of entries */ int fz_divisor; /* Hash divisor */ - u32 fz_hashmask; /* (1<fz_hashmask) int fz_order; /* Zone order */ @@ -150,9 +150,30 @@ extern __inline__ int fn_key_leq(fn_key_ static rwlock_t fib_hash_lock = RW_LOCK_UNLOCKED; -#define FZ_MAX_DIVISOR 1024 +#define FZ_MAX_DIVISOR ((PAGE_SIZE<= size) + break; + } + return order; +} + +static struct fib_node **fz_hash_alloc(int divisor) +{ + unsigned long size = divisor * sizeof(struct fib_node *); + + if (divisor <= 1024) { + return kmalloc(size, GFP_KERNEL); + } else { + return (struct fib_node **) + __get_free_pages(GFP_KERNEL, size_to_order(size)); + } +} /* The fib hash lock must be held when this is called. */ static __inline__ void fn_rebuild_zone(struct fn_zone *fz, @@ -175,6 +196,15 @@ static __inline__ void fn_rebuild_zone(s } } +static void fz_hash_free(struct fib_node **hash, int divisor) +{ + if (divisor <= 1024) + kfree(hash); + else + free_pages((unsigned long) hash, + size_to_order(divisor * sizeof(struct fib_node *))); +} + static void fn_rehash_zone(struct fn_zone *fz) { struct fib_node **ht, **old_ht; @@ -186,24 +216,30 @@ static void fn_rehash_zone(struct fn_zon switch (old_divisor) { case 16: new_divisor = 256; - new_hashmask = 0xFF; break; case 256: new_divisor = 1024; - new_hashmask = 0x3FF; break; default: - printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); - return; + if ((old_divisor << 1) > FZ_MAX_DIVISOR) { + printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor); + return; + } + new_divisor = (old_divisor << 1); + break; } + + new_hashmask = (new_divisor - 1); + #if RT_CACHE_DEBUG >= 2 printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor); #endif - ht = kmalloc(new_divisor*sizeof(struct fib_node*), GFP_KERNEL); + ht = fz_hash_alloc(new_divisor); if (ht) { memset(ht, 0, new_divisor*sizeof(struct fib_node*)); + write_lock_bh(&fib_hash_lock); old_ht = fz->fz_hash; fz->fz_hash = ht; @@ -211,10 +247,10 @@ static void fn_rehash_zone(struct fn_zon fz->fz_divisor = new_divisor; fn_rebuild_zone(fz, old_ht, old_divisor); write_unlock_bh(&fib_hash_lock); - kfree(old_ht); + + fz_hash_free(old_ht, old_divisor); } } -#endif /* CONFIG_IP_ROUTE_LARGE_TABLES */ static void fn_free_node(struct fib_node * f) { @@ -234,12 +270,11 @@ fn_new_zone(struct fn_hash *table, int z memset(fz, 0, sizeof(struct fn_zone)); if (z) { fz->fz_divisor = 16; - fz->fz_hashmask = 0xF; } else { fz->fz_divisor = 1; - fz->fz_hashmask = 0; } - fz->fz_hash = kmalloc(fz->fz_divisor*sizeof(struct fib_node*), GFP_KERNEL); + fz->fz_hashmask = (fz->fz_divisor - 1); + fz->fz_hash = fz_hash_alloc(fz->fz_divisor); if (!fz->fz_hash) { kfree(fz); return NULL; @@ -531,12 +566,10 @@ rta->rta_prefsrc ? *(u32*)rta->rta_prefs if ((fi = fib_create_info(r, rta, n, &err)) == NULL) return err; -#ifdef CONFIG_IP_ROUTE_LARGE_TABLES - if (fz->fz_nent > (fz->fz_divisor<<2) && + if (fz->fz_nent > (fz->fz_divisor<<1) && fz->fz_divisor < FZ_MAX_DIVISOR && (z==32 || (1< fz->fz_divisor)) fn_rehash_zone(fz); -#endif fp = fz_chain_p(key, fz); diff -Naurp linux-2.4.20-wolk4.2-fullkernel/net/sched/sch_htb.c linux-2.4.20-wolk4.3-fullkernel/net/sched/sch_htb.c --- linux-2.4.20-wolk4.2-fullkernel/net/sched/sch_htb.c 2003-05-03 02:37:02.000000000 +0200 +++ linux-2.4.20-wolk4.3-fullkernel/net/sched/sch_htb.c 2003-07-03 11:58:50.000000000 +0200 @@ -9,6 +9,8 @@ * Authors: Martin Devera, * * Credits (in time order) for older HTB versions: + * Stef Coene + * HTB support at LARTC mailing list * Ondrej Kraus, * found missing INIT_QDISC(htb) * Vladimir Smelhaus, Aamer Akhter, Bert Hubert @@ -19,7 +21,7 @@ * created test case so that I was able to fix nasty bug * and many others. thanks. * - * $Id: sch_htb.c,v 1.17 2003/01/29 09:22:18 devik Exp devik $ + * $Id: sch_htb.c,v 1.20 2003/06/18 19:55:49 devik Exp devik $ */ #include #include @@ -71,7 +73,7 @@ #define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */ #define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock) #define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock) -#define HTB_VER 0x3000a /* major must be matched with number suplied by TC as version */ +#define HTB_VER 0x3000c /* major must be matched with number suplied by TC as version */ #if HTB_VER >> 16 != TC_HTB_PROTOVER #error "Mismatched sch_htb.c and pkt_sch.h" @@ -215,6 +217,9 @@ struct htb_sched /* time of nearest event per level (row) */ unsigned long near_ev_cache[TC_HTB_MAXDEPTH]; + /* cached value of jiffies in dequeue */ + unsigned long jiffies; + /* whether we hit non-work conserving class during this dequeue; we use */ int nwc_hit; /* this to disable mindelay complaint in dequeue */ @@ -334,7 +339,7 @@ static void htb_next_rb_node(rb_node_t * static void htb_debug_dump (struct htb_sched *q) { int i,p; - printk(KERN_DEBUG "htb*g j=%lu\n",jiffies); + printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies); /* rows */ for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) { printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]); @@ -417,8 +422,8 @@ static void htb_add_to_wait_tree (struct if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit()) printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint); #endif - cl->pq_key = jiffies + PSCHED_US2JIFFIE(delay); - if (cl->pq_key == jiffies) + cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay); + if (cl->pq_key == q->jiffies) cl->pq_key++; /* update the nearest event cache */ @@ -596,7 +601,7 @@ htb_class_mode(struct htb_class *cl,long long toks; if ((toks = (cl->ctokens + *diff)) < ( -#ifdef HTB_HYSTERESIS +#if HTB_HYSTERESIS cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : #endif 0)) { @@ -604,7 +609,7 @@ htb_class_mode(struct htb_class *cl,long return HTB_CANT_SEND; } if ((toks = (cl->tokens + *diff)) >= ( -#ifdef HTB_HYSTERESIS +#if HTB_HYSTERESIS cl->cmode == HTB_CAN_SEND ? -cl->buffer : #endif 0)) @@ -807,7 +812,7 @@ static void htb_charge_class(struct htb_ cl->classid, diff, (unsigned long long) q->now, (unsigned long long) cl->t_c, - jiffies); + q->jiffies); diff = 1000; } #endif @@ -850,6 +855,7 @@ static void htb_charge_class(struct htb_ * * Scans event queue for pending events and applies them. Returns jiffies to * next pending event (0 for no event in pq). + * Note: Aplied are events whose have cl->pq_key <= jiffies. */ static long htb_do_events(struct htb_sched *q,int level) { @@ -864,9 +870,9 @@ static long htb_do_events(struct htb_sch while (p->rb_left) p = p->rb_left; cl = rb_entry(p, struct htb_class, pq_node); - if (cl->pq_key - (jiffies+1) < 0x80000000) { - HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - jiffies); - return cl->pq_key - jiffies; + if (cl->pq_key - (q->jiffies+1) < 0x80000000) { + HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies); + return cl->pq_key - q->jiffies; } htb_safe_rb_erase(p,q->wait_pq+level); diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0); @@ -877,7 +883,7 @@ static long htb_do_events(struct htb_sch cl->classid, diff, (unsigned long long) q->now, (unsigned long long) cl->t_c, - jiffies); + q->jiffies); diff = 1000; } #endif @@ -985,7 +991,8 @@ static void htb_delay_by(struct Qdisc *s delay = 5*HZ; } del_timer(&q->timer); - q->timer.expires = jiffies + delay; + /* why don't use jiffies here ? because expires can be in past */ + q->timer.expires = q->jiffies + delay; add_timer(&q->timer); sch->flags |= TCQ_F_THROTTLED; sch->stats.overlimits++; @@ -1002,6 +1009,7 @@ static struct sk_buff *htb_dequeue(struc int evs_used = 0; #endif + q->jiffies = jiffies; HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue), sch->q.qlen); @@ -1021,14 +1029,14 @@ static struct sk_buff *htb_dequeue(struc /* common case optimization - skip event handler quickly */ int m; long delay; - if (jiffies - q->near_ev_cache[level] < 0x80000000 || 0) { + if (q->jiffies - q->near_ev_cache[level] < 0x80000000 || 0) { delay = htb_do_events(q,level); - q->near_ev_cache[level] += delay ? delay : HZ; + q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ); #ifdef HTB_DEBUG evs_used++; #endif } else - delay = q->near_ev_cache[level] - jiffies; + delay = q->near_ev_cache[level] - q->jiffies; if (delay && min_delay > delay) min_delay = delay; @@ -1047,8 +1055,8 @@ static struct sk_buff *htb_dequeue(struc #ifdef HTB_DEBUG if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) { if (min_delay == LONG_MAX) { - printk(KERN_ERR "HTB: dequeue bug (%d), report it please !\n", - evs_used); + printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n", + evs_used,q->jiffies,jiffies); htb_debug_dump(q); } else printk(KERN_WARNING "HTB: mindelay=%ld, some class has " @@ -1057,7 +1065,7 @@ static struct sk_buff *htb_dequeue(struc #endif htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay); fin: - HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,jiffies,skb); + HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb); return skb; } @@ -1422,7 +1430,7 @@ static int htb_change_class(struct Qdisc parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch); hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]); - HTB_DBG(0,1,"htb_chg cl=%p, clid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum); + HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum); rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]); ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]); if (!rtab || !ctab) goto failure;