## Automatically generated incremental diff ## From: linux-2.5.71-bk2 ## To: linux-2.5.72 ## Robot: $Id: make-incremental-diff,v 1.11 2002/02/20 02:59:33 hpa Exp $ diff -urN linux-2.5.71-bk2/Makefile linux-2.5.72/Makefile --- linux-2.5.71-bk2/Makefile 2003-06-16 21:54:50.000000000 -0700 +++ linux-2.5.72/Makefile 2003-06-16 21:54:53.000000000 -0700 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 5 -SUBLEVEL = 71 -EXTRAVERSION = -bk2 +SUBLEVEL = 72 +EXTRAVERSION = # *DOCUMENTATION* # To see a list of typical targets execute "make help" diff -urN linux-2.5.71-bk2/arch/alpha/kernel/osf_sys.c linux-2.5.72/arch/alpha/kernel/osf_sys.c --- linux-2.5.71-bk2/arch/alpha/kernel/osf_sys.c 2003-06-14 12:17:55.000000000 -0700 +++ linux-2.5.72/arch/alpha/kernel/osf_sys.c 2003-06-16 21:54:53.000000000 -0700 @@ -822,7 +822,6 @@ affects all sorts of things, like timeval and itimerval. */ extern struct timezone sys_tz; -extern int do_sys_settimeofday(struct timeval *tv, struct timezone *tz); extern int do_getitimer(int which, struct itimerval *value); extern int do_setitimer(int which, struct itimerval *, struct itimerval *); extern asmlinkage int sys_utimes(char *, struct timeval *); @@ -901,11 +900,11 @@ asmlinkage int osf_settimeofday(struct timeval32 *tv, struct timezone *tz) { - struct timeval ktv; + struct timespec kts; struct timezone ktz; if (tv) { - if (get_tv32(&ktv, tv)) + if (get_tv32((struct timeval *)&kts, tv)) return -EFAULT; } if (tz) { @@ -913,7 +912,9 @@ return -EFAULT; } - return do_sys_settimeofday(tv ? &ktv : NULL, tz ? &ktz : NULL); + kts.tv_nsec *= 1000; + + return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL); } asmlinkage int diff -urN linux-2.5.71-bk2/arch/sparc64/kernel/sys_sparc32.c linux-2.5.72/arch/sparc64/kernel/sys_sparc32.c --- linux-2.5.71-bk2/arch/sparc64/kernel/sys_sparc32.c 2003-06-16 21:54:50.000000000 -0700 +++ linux-2.5.72/arch/sparc64/kernel/sys_sparc32.c 2003-06-16 21:54:53.000000000 -0700 @@ -1531,7 +1531,11 @@ u32 totalswap; u32 freeswap; unsigned short procs; - char _f[22]; + unsigned short pad; + u32 totalhigh; + u32 freehigh; + u32 mem_unit; + char _f[20-2*sizeof(int)-sizeof(int)]; }; extern asmlinkage int sys_sysinfo(struct sysinfo *info); @@ -1540,11 +1544,30 @@ { struct sysinfo s; int ret, err; + int bitcount = 0; mm_segment_t old_fs = get_fs (); - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); ret = sys_sysinfo(&s); - set_fs (old_fs); + set_fs(old_fs); + /* Check to see if any memory value is too large for 32-bit and + * scale down if needed. + */ + if ((s.totalram >> 32) || (s.totalswap >> 32)) { + while (s.mem_unit < PAGE_SIZE) { + s.mem_unit <<= 1; + bitcount++; + } + s.totalram >>= bitcount; + s.freeram >>= bitcount; + s.sharedram >>= bitcount; + s.bufferram >>= bitcount; + s.totalswap >>= bitcount; + s.freeswap >>= bitcount; + s.totalhigh >>= bitcount; + s.freehigh >>= bitcount; + } + err = put_user (s.uptime, &info->uptime); err |= __put_user (s.loads[0], &info->loads[0]); err |= __put_user (s.loads[1], &info->loads[1]); @@ -1556,6 +1579,9 @@ err |= __put_user (s.totalswap, &info->totalswap); err |= __put_user (s.freeswap, &info->freeswap); err |= __put_user (s.procs, &info->procs); + err |= __put_user (s.totalhigh, &info->totalhigh); + err |= __put_user (s.freehigh, &info->freehigh); + err |= __put_user (s.mem_unit, &info->mem_unit); if (err) return -EFAULT; return ret; diff -urN linux-2.5.71-bk2/arch/x86_64/Makefile linux-2.5.72/arch/x86_64/Makefile --- linux-2.5.71-bk2/arch/x86_64/Makefile 2003-06-14 12:18:33.000000000 -0700 +++ linux-2.5.72/arch/x86_64/Makefile 2003-06-16 21:54:53.000000000 -0700 @@ -42,7 +42,6 @@ CFLAGS += -mcmodel=kernel CFLAGS += -pipe # this makes reading assembly source easier, but produces worse code -# disable for production kernel CFLAGS += -fno-reorder-blocks # should lower this a lot and see how much .text is saves CFLAGS += -finline-limit=2000 diff -urN linux-2.5.71-bk2/arch/x86_64/ia32/vsyscall.S linux-2.5.72/arch/x86_64/ia32/vsyscall.S --- linux-2.5.71-bk2/arch/x86_64/ia32/vsyscall.S 2003-06-14 12:18:21.000000000 -0700 +++ linux-2.5.72/arch/x86_64/ia32/vsyscall.S 2003-06-16 21:54:53.000000000 -0700 @@ -35,7 +35,7 @@ .section .text.rtsigreturn,"ax" .balign 32 - .globl __kernel_rt_sigreturn,"ax" + .globl __kernel_rt_sigreturn .type __kernel_rt_sigreturn,@function __kernel_rt_sigreturn: .LSTART_rt_sigreturn: diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/Makefile linux-2.5.72/arch/x86_64/kernel/Makefile --- linux-2.5.71-bk2/arch/x86_64/kernel/Makefile 2003-06-14 12:18:29.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/Makefile 2003-06-16 21:54:53.000000000 -0700 @@ -24,6 +24,8 @@ $(obj)/bootflag.c: @ln -sf ../../i386/kernel/bootflag.c $(obj)/bootflag.c +$(obj)/cpuid.c: + @ln -sf ../../i386/kernel/cpuid.c $(obj)/cpuid.c -clean-files += bootflag.c +clean-files += bootflag.c cpuid.c diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/aperture.c linux-2.5.72/arch/x86_64/kernel/aperture.c --- linux-2.5.71-bk2/arch/x86_64/kernel/aperture.c 2003-06-14 12:18:29.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/aperture.c 2003-06-16 21:54:53.000000000 -0700 @@ -25,8 +25,6 @@ int fallback_aper_order __initdata = 1; /* 64MB */ int fallback_aper_force __initdata = 0; -extern int no_iommu, force_mmu; - /* This code runs before the PCI subsystem is initialized, so just access the northbridge directly. */ diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/apic.c linux-2.5.72/arch/x86_64/kernel/apic.c --- linux-2.5.71-bk2/arch/x86_64/kernel/apic.c 2003-06-14 12:17:58.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/apic.c 2003-06-16 21:54:53.000000000 -0700 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -464,13 +465,11 @@ unsigned int apic_thmr; } apic_pm_state; -static int lapic_suspend(struct device *dev, u32 state, u32 level) +static int lapic_suspend(struct sys_device *dev, u32 state) { unsigned int l, h; unsigned long flags; - if (level != SUSPEND_POWER_DOWN) - return 0; if (!apic_pm_state.active) return 0; @@ -497,13 +496,11 @@ return 0; } -static int lapic_resume(struct device *dev, u32 level) +static int lapic_resume(struct sys_device *dev) { unsigned int l, h; unsigned long flags; - if (level != RESUME_POWER_ON) - return 0; if (!apic_pm_state.active) return 0; @@ -537,38 +534,35 @@ return 0; } -static struct device_driver lapic_driver = { - .name = "lapic", - .bus = &system_bus_type, +static struct sysdev_class lapic_sysclass = { + set_kset_name("lapic"), .resume = lapic_resume, .suspend = lapic_suspend, }; /* not static, needed by child devices */ -struct sys_device device_lapic = { - .name = "lapic", +static struct sys_device device_lapic = { .id = 0, - .dev = { - .name = "lapic", - .driver = &lapic_driver, - }, + .cls = &lapic_sysclass, }; -EXPORT_SYMBOL(device_lapic); static void __init apic_pm_activate(void) { apic_pm_state.active = 1; } -static int __init init_lapic_devicefs(void) +static int __init init_lapic_sysfs(void) { + int error; if (!cpu_has_apic) return 0; /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - driver_register(&lapic_driver); - return sys_device_register(&device_lapic); + error = sysdev_class_register(&lapic_sysclass); + if (!error) + error = sys_device_register(&device_lapic); + return error; } -device_initcall(init_lapic_devicefs); +device_initcall(init_lapic_sysfs); #else /* CONFIG_PM */ diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/bluesmoke.c linux-2.5.72/arch/x86_64/kernel/bluesmoke.c --- linux-2.5.71-bk2/arch/x86_64/kernel/bluesmoke.c 2003-06-14 12:18:33.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/bluesmoke.c 2003-06-16 21:54:53.000000000 -0700 @@ -303,7 +303,7 @@ wrmsrl(MSR_IA32_MCG_STATUS, 0); if (regs && (status & (1<<1))) - printk(KERN_EMERG "MCE at EIP %lx ESP %lx\n", regs->rip, regs->rsp); + printk(KERN_EMERG "MCE at RIP %lx RSP %lx\n", regs->rip, regs->rsp); others: generic_machine_check(regs, error_code); @@ -352,7 +352,6 @@ { u64 cap; int i; - struct pci_dev *nb; if (!test_bit(X86_FEATURE_MCE, &c->x86_capability) || !test_bit(X86_FEATURE_MCA, &c->x86_capability)) diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/cpuid.c linux-2.5.72/arch/x86_64/kernel/cpuid.c --- linux-2.5.71-bk2/arch/x86_64/kernel/cpuid.c 2003-06-14 12:18:04.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/cpuid.c 1969-12-31 16:00:00.000000000 -0800 @@ -1,178 +0,0 @@ -#ident "$Id: cpuid.c,v 1.4 2001/10/24 23:58:53 ak Exp $" -/* ----------------------------------------------------------------------- * - * - * Copyright 2000 H. Peter Anvin - All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, - * USA; either version 2 of the License, or (at your option) any later - * version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - - -/* - * cpuid.c - * - * x86 CPUID access device - * - * This device is accessed by lseek() to the appropriate CPUID level - * and then read in chunks of 16 bytes. A larger size means multiple - * reads of consecutive levels. - * - * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on - * an SMP box will direct the access to CPU %d. - */ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#ifdef CONFIG_SMP - -struct cpuid_command { - int cpu; - u32 reg; - u32 *data; -}; - -static void cpuid_smp_cpuid(void *cmd_block) -{ - struct cpuid_command *cmd = (struct cpuid_command *) cmd_block; - - if ( cmd->cpu == smp_processor_id() ) - cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], &cmd->data[3]); -} - -static inline void do_cpuid(int cpu, u32 reg, u32 *data) -{ - struct cpuid_command cmd; - - preempt_disable(); - if ( cpu == smp_processor_id() ) { - cpuid(reg, &data[0], &data[1], &data[2], &data[3]); - } else { - cmd.cpu = cpu; - cmd.reg = reg; - cmd.data = data; - - smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1); - } - preempt_enable(); -} -#else /* ! CONFIG_SMP */ - -static inline void do_cpuid(int cpu, u32 reg, u32 *data) -{ - cpuid(reg, &data[0], &data[1], &data[2], &data[3]); -} - -#endif /* ! CONFIG_SMP */ - -static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) -{ - loff_t ret; - - lock_kernel(); - - switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - break; - default: - ret = -EINVAL; - } - - unlock_kernel(); - return ret; -} - -static ssize_t cpuid_read(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - u32 *tmp = (u32 *)buf; - u32 data[4]; - size_t rv; - u32 reg = *ppos; - int cpu = minor(file->f_dentry->d_inode->i_rdev); - - if ( count % 16 ) - return -EINVAL; /* Invalid chunk size */ - - for ( rv = 0 ; count ; count -= 16 ) { - do_cpuid(cpu, reg, data); - if ( copy_to_user(tmp,&data,16) ) - return -EFAULT; - tmp += 4; - *ppos = reg++; - } - - return ((char *)tmp) - buf; -} - -static int cpuid_open(struct inode *inode, struct file *file) -{ - int cpu = minor(file->f_dentry->d_inode->i_rdev); - struct cpuinfo_x86 *c = &(cpu_data)[cpu]; - - if ( !(cpu_online_map & (1UL << cpu)) ) - return -ENXIO; /* No such CPU */ - if ( c->cpuid_level < 0 ) - return -EIO; /* CPUID not supported */ - - return 0; -} - -/* - * File operations we support - */ -static struct file_operations cpuid_fops = { - .owner = THIS_MODULE, - .llseek = cpuid_seek, - .read = cpuid_read, - .open = cpuid_open, -}; - -int __init cpuid_init(void) -{ - if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) { - printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", - CPUID_MAJOR); - return -EBUSY; - } - - return 0; -} - -void __exit cpuid_exit(void) -{ - unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); -} - -module_init(cpuid_init); -module_exit(cpuid_exit) - -MODULE_AUTHOR("H. Peter Anvin "); -MODULE_DESCRIPTION("x86 generic CPUID driver"); -MODULE_LICENSE("GPL"); diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/ldt.c linux-2.5.72/arch/x86_64/kernel/ldt.c --- linux-2.5.71-bk2/arch/x86_64/kernel/ldt.c 2003-06-14 12:18:34.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/ldt.c 2003-06-16 21:54:53.000000000 -0700 @@ -21,8 +21,7 @@ #include #include #include - -extern void load_gs_index(unsigned gs); +#include #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ static void flush_ldt(void *null) diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/msr.c linux-2.5.72/arch/x86_64/kernel/msr.c --- linux-2.5.71-bk2/arch/x86_64/kernel/msr.c 2003-06-14 12:17:59.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/msr.c 2003-06-16 21:54:53.000000000 -0700 @@ -1,4 +1,4 @@ -#ident "$Id: msr.c,v 1.6 2001/10/24 23:58:53 ak Exp $" +#ident "$Id$" /* ----------------------------------------------------------------------- * * * Copyright 2000 H. Peter Anvin - All Rights Reserved @@ -22,9 +22,6 @@ * * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on * an SMP box will direct the access to CPU %d. - -RED-PEN: need to get power management for S3 restore - */ #include @@ -44,7 +41,6 @@ #include #include #include -#include /* Note: "err" is handled in a funny way below. Otherwise one version of gcc or another breaks. */ @@ -119,12 +115,11 @@ static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) { struct msr_command cmd; + int ret; preempt_disable(); if ( cpu == smp_processor_id() ) { - int ret = wrmsr_eio(reg, eax, edx); - preempt_enable(); - return ret; + ret = wrmsr_eio(reg, eax, edx); } else { cmd.cpu = cpu; cmd.reg = reg; @@ -132,17 +127,20 @@ cmd.data[1] = edx; smp_call_function(msr_smp_wrmsr, &cmd, 1, 1); - preempt_enable(); - return cmd.err; + ret = cmd.err; } + preempt_enable(); + return ret; } static inline int do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx) { struct msr_command cmd; + int ret; + preempt_disable(); if ( cpu == smp_processor_id() ) { - return rdmsr_eio(reg, eax, edx); + ret = rdmsr_eio(reg, eax, edx); } else { cmd.cpu = cpu; cmd.reg = reg; @@ -152,8 +150,10 @@ *eax = cmd.data[0]; *edx = cmd.data[1]; - return cmd.err; + ret = cmd.err; } + preempt_enable(); + return ret; } #else /* ! CONFIG_SMP */ diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/nmi.c linux-2.5.72/arch/x86_64/kernel/nmi.c --- linux-2.5.71-bk2/arch/x86_64/kernel/nmi.c 2003-06-14 12:18:09.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/nmi.c 2003-06-16 21:54:53.000000000 -0700 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -152,50 +153,45 @@ #include -static int lapic_nmi_suspend(struct device *dev, u32 state, u32 level) - { - if (level != SUSPEND_POWER_DOWN) - return 0; +static int lapic_nmi_suspend(struct sys_device *dev, u32 state) +{ disable_lapic_nmi_watchdog(); return 0; - } +} -static int lapic_nmi_resume(struct device *dev, u32 level) - { - if (level != RESUME_POWER_ON) - return 0; +static int lapic_nmi_resume(struct sys_device *dev) +{ #if 0 enable_lapic_nmi_watchdog(); #endif return 0; - } +} -static struct device_driver lapic_nmi_driver = { - .name = "lapic_nmi", - .bus = &system_bus_type, +static struct sysdev_class nmi_sysclass = { + set_kset_name("lapic_nmi"), .resume = lapic_nmi_resume, .suspend = lapic_nmi_suspend, }; static struct sys_device device_lapic_nmi = { - .name = "lapic_nmi", .id = 0, - .dev = { - .name = "lapic_nmi", - .driver = &lapic_nmi_driver, - .parent = &device_lapic.dev, - }, + .cls = &nmi_sysclass, }; -static int __init init_lapic_nmi_devicefs(void) +static int __init init_lapic_nmi_sysfs(void) { + int error; + if (nmi_active == 0) return 0; - driver_register(&lapic_nmi_driver); - return sys_device_register(&device_lapic_nmi); + + error = sysdev_class_register(&nmi_sysclass); + if (!error) + error = sys_device_register(&device_lapic_nmi); + return error; } /* must come after the local APIC's device_initcall() */ -late_initcall(init_lapic_nmi_devicefs); +late_initcall(init_lapic_nmi_sysfs); #endif /* CONFIG_PM */ @@ -332,13 +328,11 @@ { int cpu = safe_smp_processor_id(); - init_tss[cpu].ist[NMI_STACK] -= 2048; /* this shouldn't be needed. */ nmi_enter(); add_pda(__nmi_count,1); if (!nmi_callback(regs, cpu)) default_do_nmi(regs); nmi_exit(); - init_tss[cpu].ist[NMI_STACK] += 2048; } void set_nmi_callback(nmi_callback_t callback) diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/pci-gart.c linux-2.5.72/arch/x86_64/kernel/pci-gart.c --- linux-2.5.71-bk2/arch/x86_64/kernel/pci-gart.c 2003-06-14 12:18:08.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/pci-gart.c 2003-06-16 21:54:53.000000000 -0700 @@ -38,6 +38,8 @@ #include #include #include +#include +#include unsigned long iommu_bus_base; /* GART remapping area (physical) */ static unsigned long iommu_size; /* size of remapping area bytes */ @@ -53,9 +55,6 @@ int force_mmu = 0; #endif -extern int fallback_aper_order; -extern int fallback_aper_force; - /* Allocation bitmap for the remapping area */ static spinlock_t iommu_bitmap_lock = SPIN_LOCK_UNLOCKED; static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ @@ -135,10 +134,19 @@ void *memory; int gfp = GFP_ATOMIC; int i; - unsigned long iommu_page; int flush = 0; + unsigned long iommu_page; + unsigned long dma_mask; - if (hwdev == NULL || hwdev->dma_mask < 0xffffffff || no_iommu) + if (hwdev == NULL) { + gfp |= GFP_DMA; + dma_mask = 0xffffffff; + } else { + dma_mask = hwdev->consistent_dma_mask; + } + if (dma_mask == 0) + dma_mask = 0xffffffff; + if (dma_mask < 0xffffffff || no_iommu) gfp |= GFP_DMA; /* @@ -151,7 +159,7 @@ return NULL; } else { int high = 0, mmu; - if (((unsigned long)virt_to_bus(memory) + size) > 0xffffffffUL) + if (((unsigned long)virt_to_bus(memory) + size) > dma_mask) high = 1; mmu = 1; if (force_mmu && !(gfp & GFP_DMA)) @@ -222,7 +230,6 @@ static void **iommu_leak_tab; static int leak_trace; int iommu_leak_pages = 20; -extern unsigned long printk_address(unsigned long); void dump_leak(void) { int i; @@ -459,7 +466,7 @@ extern int agp_amdk8_init(void); -void __init pci_iommu_init(void) +int __init pci_iommu_init(void) { struct agp_kern_info info; unsigned long aper_size; @@ -468,6 +475,7 @@ #ifndef CONFIG_AGP_AMD_8151 no_agp = 1; #else + /* Makefile puts PCI initialization via subsys_initcall first. */ /* Add other K8 AGP bridge drivers here */ no_agp = no_agp || (agp_amdk8_init() < 0) || @@ -477,7 +485,7 @@ if (no_iommu || (!force_mmu && end_pfn < 0xffffffff>>PAGE_SHIFT)) { printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); no_iommu = 1; - return; + return -1; } if (no_agp) { @@ -489,7 +497,7 @@ if (err < 0) { printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); no_iommu = 1; - return; + return -1; } } @@ -540,8 +548,13 @@ clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size); flush_gart(); + + return 0; } +/* Must execute after PCI subsystem */ +fs_initcall(pci_iommu_init); + /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]] size set size of iommu (in bytes) noagp don't initialize the AGP driver and use full aperture. @@ -589,4 +602,3 @@ } return 1; } - diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/pci-nommu.c linux-2.5.72/arch/x86_64/kernel/pci-nommu.c --- linux-2.5.71-bk2/arch/x86_64/kernel/pci-nommu.c 2003-06-14 12:17:55.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/pci-nommu.c 2003-06-16 21:54:53.000000000 -0700 @@ -2,13 +2,12 @@ #include #include #include +#include /* * Dummy IO MMU functions */ -extern unsigned long end_pfn; - void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle) { diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/process.c linux-2.5.72/arch/x86_64/kernel/process.c --- linux-2.5.71-bk2/arch/x86_64/kernel/process.c 2003-06-14 12:18:22.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/process.c 2003-06-16 21:54:53.000000000 -0700 @@ -200,8 +200,6 @@ show_trace(®s->rsp); } -extern void load_gs_index(unsigned); - /* * Free current thread data structures etc.. */ diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/setup.c linux-2.5.72/arch/x86_64/kernel/setup.c --- linux-2.5.71-bk2/arch/x86_64/kernel/setup.c 2003-06-14 12:17:57.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/setup.c 2003-06-16 21:54:53.000000000 -0700 @@ -585,7 +585,7 @@ void __init print_cpu_info(struct cpuinfo_x86 *c) { if (c->x86_model_id[0]) - printk("AMD %s", c->x86_model_id); + printk("%s", c->x86_model_id); if (c->x86_mask || c->cpuid_level >= 0) printk(" stepping %02x\n", c->x86_mask); diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/signal.c linux-2.5.72/arch/x86_64/kernel/signal.c --- linux-2.5.71-bk2/arch/x86_64/kernel/signal.c 2003-06-14 12:18:25.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/signal.c 2003-06-16 21:54:53.000000000 -0700 @@ -475,8 +475,6 @@ do_signal(regs,oldset); } -extern int exception_trace; - void signal_fault(struct pt_regs *regs, void *frame, char *where) { struct task_struct *me = current; diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/smpboot.c linux-2.5.72/arch/x86_64/kernel/smpboot.c --- linux-2.5.71-bk2/arch/x86_64/kernel/smpboot.c 2003-06-14 12:18:51.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/smpboot.c 2003-06-16 21:54:53.000000000 -0700 @@ -51,8 +51,7 @@ #include #include #include - -extern int disable_apic; +#include /* Bitmask of currently online CPUs */ unsigned long cpu_online_map = 1; @@ -67,8 +66,6 @@ /* Set when the idlers are all forked */ int smp_threads_ready; -extern void time_init_smp(void); - /* * Trampoline 80x86 program as an array. */ @@ -128,7 +125,6 @@ long long delta; long one_usec; int buggy = 0; - extern unsigned cpu_khz; printk(KERN_INFO "checking TSC synchronization across %u CPUs: ",num_booting_cpus()); @@ -242,8 +238,6 @@ } #undef NR_LOOPS -extern void calibrate_delay(void); - static atomic_t init_deasserted; void __init smp_callin(void) @@ -337,8 +331,6 @@ int cpucount; -extern int cpu_idle(void); - /* * Activate a secondary processor. */ @@ -560,8 +552,6 @@ return (send_status | accept_status); } -extern unsigned long cpu_initialized; - static void __init do_boot_cpu (int apicid) { struct task_struct *idle; diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/time.c linux-2.5.72/arch/x86_64/kernel/time.c --- linux-2.5.71-bk2/arch/x86_64/kernel/time.c 2003-06-16 21:54:50.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/time.c 2003-06-16 21:54:53.000000000 -0700 @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_X86_LOCAL_APIC #include #endif @@ -38,9 +39,6 @@ spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED; -extern int using_apic_timer; -extern void smp_local_timer_interrupt(struct pt_regs * regs); - #undef HPET_HACK_ENABLE_DANGEROUS diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/traps.c linux-2.5.72/arch/x86_64/kernel/traps.c --- linux-2.5.71-bk2/arch/x86_64/kernel/traps.c 2003-06-14 12:17:56.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/traps.c 2003-06-16 21:54:53.000000000 -0700 @@ -41,13 +41,10 @@ #include #include #include +#include #include -asmlinkage int system_call(void); -asmlinkage int kernel_syscall(void); -extern void ia32_syscall(void); - extern struct gate_struct idt_table[256]; asmlinkage void divide_error(void); @@ -73,8 +70,6 @@ asmlinkage void spurious_interrupt_bug(void); asmlinkage void call_debug(void); -extern int exception_trace; - struct notifier_block *die_chain; static inline void conditional_sti(struct pt_regs *regs) @@ -457,8 +452,6 @@ DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2()) DO_ERROR(18, SIGSEGV, "reserved", reserved) -extern void dump_pagetable(unsigned long); - asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) { conditional_sti(regs); diff -urN linux-2.5.71-bk2/arch/x86_64/kernel/wakeup.S linux-2.5.72/arch/x86_64/kernel/wakeup.S --- linux-2.5.71-bk2/arch/x86_64/kernel/wakeup.S 2003-06-14 12:18:30.000000000 -0700 +++ linux-2.5.72/arch/x86_64/kernel/wakeup.S 1969-12-31 16:00:00.000000000 -0800 @@ -1,306 +0,0 @@ -/* - * ACPI S3 entry/exit handling. - * - * Notes: - * Relies on kernel being loaded below 4GB. - * Needs restore_low_mappings called before. - * - * Copyright 2003 by Andi Kleen, SuSE Labs. - * - * Long mode entry loosely based on example code in chapter 14 of the x86-64 system - * programmer's manual. - * - * Notebook: - - FIXME need to interface with suspend.c properly. do_magic. check i386. rename to suspend64.S - - Need to fix vgacon,mtrr,bluesmoke to do resume - - Interrupts should be off until the io-apic code has reinited the APIC. - Need support for that in the pm frame work or a special hack? - - SMP support is non existent. Need to somehow restart the other CPUs again. - If CPU hotplug was working it could be used. Save/Restore needs to run on the same CPU. - - Should check magic like i386 code - - suspend code copies something. check what it is. - */ - -#include - -#include -#include -#include - -#define O(x) (x-acpi_wakeup) - - .text - .code16 -ENTRY(acpi_wakeup) - /* 16bit real mode entered from ACPI BIOS */ - /* The machine is just through BIOS setup after power down and everything set up - by Linux needs to be restored. */ - /* The code here needs to be position independent or manually relocated, - because it is copied to a <1MB page for real mode execution */ - - /* A20 enabled (according to ACPI spec) */ - /* cs = acpi_wakeup >> 4 ; eip = acpi_wakeup & 0xF */ - - movw %cs,%ax - movw %ax,%ds /* make %ds point to acpi_wakeup */ - movw %ax,%ss - movw $O(wakeup_stack),%sp /* setup stack */ - - pushl $0 - popfl /* clear EFLAGS */ - - lgdt %ds:O(pGDT) /* load kernel GDT */ - - movl $0x1,%eax /* enable protected mode */ - movl %eax,%cr0 - - movl %ds:O(wakeup_page_table),%edi - ljmpl $__KERNEL16_CS,$0 /* -> s3_prot16 (filled in earlier by caller) */ - - /* patched by s3_restore_state below */ -pGDT: - .short 0 - .quad 0 - - .align 4 - .globl wakeup_page_table -wakeup_page_table: - .long 0 - - .align 8 -wakeup_stack: - .fill 128,1,0 - .globl acpi_wakeup_end -acpi_wakeup_end: - /* end of real mode trampoline */ - - /* pointed to by __KERNEL16_CS:0 */ - .code16 -ENTRY(s3_prot16) - /* Now in 16bit protected mode, still no paging, stack/data segments invalid */ - - /* Prepare everything for 64bit paging, but still keep it turned off */ - movl %cr4,%eax - bts $5,%eax /* set PAE bit */ - movl %eax,%cr4 - - movl %edi,%cr3 /* load kernel page table */ - - movl $0x80000001,%eax - cpuid /* no execute supported ? */ - movl %edx,%esi - - movl $MSR_EFER,%ecx - rdmsr - bts $8,%eax /* long mode */ - bt $20,%esi /* NX supported ? */ - jnc 1f - bt $_EFER_NX,%eax -1: - wrmsr /* set temporary efer - real one is restored a bit later */ - - movl %cr0,%eax - bts $31,%eax /* paging */ - movl %eax,%cr0 - - /* running in identity mapping now */ - - /* go to 64bit code segment */ - ljmpl $__KERNEL_CS,$s3_restore_state-__START_KERNEL_map - - .code64 - .macro SAVEMSR msr,target - movl $\msr,%ecx - rdmsr - shlq $32,%rdx - orq %rax,%rdx - movq %rdx,\target(%rip) - .endm - - .macro RESTMSR msr,src - movl $\msr,%ecx - movq \src(%rip),%rax - movq %rax,%rdx - shrq $32,%rdx - wrmsr - .endm - - .macro SAVECTL reg - movq %\reg,%rax - movq %rax,saved_\reg(%rip) - .endm - - .macro RESTCTL reg - movq saved_\reg(%rip),%rax - movq %rax,%\reg - .endm - - /* Running in identity mapping, long mode */ -s3_restore_state_low: - movq $s3_restore_state,%rax - jmpq *%rax - - /* Running in real kernel mapping now */ -s3_restore_state: - xorl %eax,%eax - movl %eax,%ds - movq saved_rsp(%rip),%rsp - movw saved_ss(%rip),%ss - movw saved_fs(%rip),%fs - movw saved_gs(%rip),%gs - movw saved_es(%rip),%es - movw saved_ds(%rip),%ds - - lidt saved_idt - ltr saved_tr - lldt saved_ldt - /* gdt is already loaded */ - - RESTCTL cr0 - RESTCTL cr4 - /* cr3 is already loaded */ - - RESTMSR MSR_EFER,saved_efer - RESTMSR MSR_LSTAR,saved_lstar - RESTMSR MSR_CSTAR,saved_cstar - RESTMSR MSR_FS_BASE,saved_fs_base - RESTMSR MSR_GS_BASE,saved_gs_base - RESTMSR MSR_KERNEL_GS_BASE,saved_kernel_gs_base - RESTMSR MSR_SYSCALL_MASK,saved_syscall_mask - - fxrstor fpustate(%rip) - - RESTCTL dr0 - RESTCTL dr1 - RESTCTL dr2 - RESTCTL dr3 - RESTCTL dr6 - RESTCTL dr7 - - movq saved_rflags(%rip),%rax - pushq %rax - popfq - - movq saved_rbp(%rip),%rbp - movq saved_rbx(%rip),%rbx - movq saved_r12(%rip),%r12 - movq saved_r13(%rip),%r13 - movq saved_r14(%rip),%r14 - movq saved_r15(%rip),%r15 - ret - -ENTRY(acpi_prepare_wakeup) - sgdt saved_gdt - - /* copy gdt descr and page table to low level wakeup code so that it can - reload them early. */ - movq acpi_wakeup_address(%rip),%rax - movw saved_gdt+8(%rip),%cx - movw %cx,O(pGDT)+8(%rax) - movq saved_gdt(%rip),%rcx - movq %rcx,O(pGDT)(%rax) - - movq %cr3,%rdi - movl %edi,O(wakeup_page_table)(%rax) - ret - - /* Save CPU state. */ - /* Everything saved here needs to be restored above. */ -ENTRY(do_suspend_lowlevel) - testl %edi,%edi - jnz s3_restore_state - - SAVECTL cr0 - SAVECTL cr4 - SAVECTL cr3 - - str saved_tr - sidt saved_idt - sgdt saved_gdt - sldt saved_ldt - - SAVEMSR MSR_EFER,saved_efer - SAVEMSR MSR_LSTAR,saved_lstar - SAVEMSR MSR_CSTAR,saved_cstar - SAVEMSR MSR_FS_BASE,saved_fs_base - SAVEMSR MSR_GS_BASE,saved_gs_base - SAVEMSR MSR_KERNEL_GS_BASE,saved_kernel_gs_base - SAVEMSR MSR_SYSCALL_MASK,saved_syscall_mask - - movw %ds,saved_ds(%rip) - movw %es,saved_es(%rip) - movw %fs,saved_fs(%rip) - movw %gs,saved_gs(%rip) - movw %ss,saved_ss(%rip) - movq %rsp,saved_rsp(%rip) - - pushfq - popq %rax - movq %rax,saved_rflags(%rip) - - SAVECTL dr0 - SAVECTL dr1 - SAVECTL dr2 - SAVECTL dr3 - SAVECTL dr6 - SAVECTL dr7 - - fxsave fpustate(%rip) - - /* finally save callee saved registers */ - movq %rbp,saved_rbp(%rip) - movq %rbx,saved_rbx(%rip) - movq %r12,saved_r12(%rip) - movq %r13,saved_r13(%rip) - movq %r14,saved_r14(%rip) - movq %r15,saved_r15(%rip) - movq $3,%rdi - call acpi_enter_sleep_state - ret /* should not happen */ - - .data - .align 8 -saved_efer: .quad 0 -saved_lstar: .quad 0 -saved_cstar: .quad 0 -saved_cr4: .quad 0 -saved_cr3: .quad 0 -saved_cr0: .quad 0 -saved_rbp: .quad 0 -saved_rbx: .quad 0 -saved_rsp: .quad 0 -saved_r12: .quad 0 -saved_r13: .quad 0 -saved_r14: .quad 0 -saved_r15: .quad 0 -saved_rflags: .quad 0 -saved_gs_base: .quad 0 -saved_fs_base: .quad 0 -saved_kernel_gs_base: .quad 0 -saved_syscall_mask: .quad 0 -saved_dr0: .quad 0 -saved_dr1: .quad 0 -saved_dr2: .quad 0 -saved_dr3: .quad 0 -saved_dr6: .quad 0 -saved_dr7: .quad 0 -saved_ds: .short 0 -saved_fs: .short 0 -saved_gs: .short 0 -saved_es: .short 0 -saved_ss: .short 0 -saved_idt: .short 0 - .quad 0 -saved_ldt: .short 0 -saved_gdt: .short 0 - .quad 0 -saved_tr: .short 0 - - .align 16 -fpustate: .fill 512,1,0 diff -urN linux-2.5.71-bk2/arch/x86_64/lib/delay.c linux-2.5.72/arch/x86_64/lib/delay.c --- linux-2.5.71-bk2/arch/x86_64/lib/delay.c 2003-06-14 12:18:07.000000000 -0700 +++ linux-2.5.72/arch/x86_64/lib/delay.c 2003-06-16 21:54:53.000000000 -0700 @@ -21,7 +21,6 @@ void __delay(unsigned long loops) { -#ifndef CONFIG_SIMNOW unsigned long bclock, now; rdtscl(bclock); @@ -31,7 +30,6 @@ rdtscl(now); } while((now-bclock) < loops); -#endif } inline void __const_udelay(unsigned long xloops) diff -urN linux-2.5.71-bk2/arch/x86_64/mm/init.c linux-2.5.72/arch/x86_64/mm/init.c --- linux-2.5.71-bk2/arch/x86_64/mm/init.c 2003-06-14 12:18:00.000000000 -0700 +++ linux-2.5.72/arch/x86_64/mm/init.c 2003-06-16 21:54:53.000000000 -0700 @@ -24,6 +24,7 @@ #endif #include #include +#include #include #include @@ -41,6 +42,8 @@ #define Dprintk(x...) +extern char _stext; + DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); /* @@ -393,6 +396,9 @@ return 0; } +static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, + kcore_vsyscall; + void __init mem_init(void) { int codesize, reservedpages, datasize, initsize; @@ -434,6 +440,15 @@ datasize = (unsigned long) &_edata - (unsigned long) &_etext; initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + /* Register memory areas for /proc/kcore */ + kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT); + kclist_add(&kcore_vmalloc, (void *)VMALLOC_START, + VMALLOC_END-VMALLOC_START); + kclist_add(&kcore_kernel, &_stext, &_end - &_stext); + kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN); + kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, + VSYSCALL_END - VSYSCALL_START); + printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), end_pfn << (PAGE_SHIFT-10), @@ -462,7 +477,7 @@ ClearPageReserved(virt_to_page(addr)); set_page_count(virt_to_page(addr), 1); #ifdef CONFIG_INIT_DEBUG - memset(addr & ~(PAGE_SIZE-1), 0xcc, PAGE_SIZE); + memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); #endif free_page(addr); totalram_pages++; @@ -497,3 +512,29 @@ reserve_bootmem(phys, len); #endif } + +int kern_addr_valid(unsigned long addr) +{ + unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; + if (above != 0 && above != -1UL) + return 0; + + pml4_t *pml4 = pml4_offset_k(addr); + if (pml4_none(*pml4)) + return 0; + + pgd_t *pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return 0; + + pmd_t *pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd)) + return 0; + if (pmd_large(*pmd)) + return pfn_valid(pmd_pfn(*pmd)); + + pte_t *pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return 0; + return pfn_valid(pte_pfn(*pte)); +} diff -urN linux-2.5.71-bk2/arch/x86_64/pci/Makefile linux-2.5.72/arch/x86_64/pci/Makefile --- linux-2.5.71-bk2/arch/x86_64/pci/Makefile 2003-06-14 12:17:59.000000000 -0700 +++ linux-2.5.72/arch/x86_64/pci/Makefile 2003-06-16 21:54:53.000000000 -0700 @@ -1,8 +1,29 @@ # # Makefile for X86_64 specific PCI routines # -obj-y := x86-64.o +# Reuse the i386 PCI subsystem using symlinks +# +obj-y := i386.o obj-$(CONFIG_PCI_DIRECT)+= direct.o obj-y += fixup.o obj-$(CONFIG_ACPI_PCI) += acpi.o obj-y += legacy.o irq.o common.o + +$(obj)/direct.c: $(obj)/pci.h + @ln -sf ../../i386/pci/direct.c $(obj)/direct.c +$(obj)/legacy.c: $(obj)/pci.h + @ln -sf ../../i386/pci/legacy.c $(obj)/legacy.c +$(obj)/common.c: $(obj)/pci.h + @ln -sf ../../i386/pci/common.c $(obj)/common.c +$(obj)/acpi.c: $(obj)/pci.h + @ln -sf ../../i386/pci/acpi.c $(obj)/acpi.c +$(obj)/pci.h: + @ln -sf ../../i386/pci/pci.h $(obj)/pci.h +$(obj)/irq.c: $(obj)/pci.h + @ln -sf ../../i386/pci/irq.c $(obj)/irq.c +$(obj)/fixup.c: $(obj)/pci.h + @ln -sf ../../i386/pci/fixup.c $(obj)/fixup.c +$(obj)/i386.c: $(obj)/pci.h + @ln -sf ../../i386/pci/i386.c $(obj)/i386.c + +clean-files += i386.c legacy.c fixup.c acpi.c irq.c pci.h common.c direct.c diff -urN linux-2.5.71-bk2/arch/x86_64/pci/acpi.c linux-2.5.72/arch/x86_64/pci/acpi.c --- linux-2.5.71-bk2/arch/x86_64/pci/acpi.c 2003-06-14 12:18:52.000000000 -0700 +++ linux-2.5.72/arch/x86_64/pci/acpi.c 1969-12-31 16:00:00.000000000 -0800 @@ -1,27 +0,0 @@ -#include -#include -#include -#include "pci.h" - -static int __init pci_acpi_init(void) -{ - if (pcibios_scanned) - return 0; - - if (!(pci_probe & PCI_NO_ACPI_ROUTING)) { - if (!acpi_pci_irq_init()) { - printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); - printk(KERN_INFO "PCI: if you experience problems, try using option 'pci=noacpi'\n"); - pcibios_scanned++; - pcibios_enable_irq = acpi_pci_irq_enable; - } else - printk(KERN_WARNING "PCI: Invalid ACPI-PCI IRQ routing table\n"); - - /* still scan manually in case ACPI forgot some bus */ - pcibios_fixup_peer_bridges(); - } - - return 0; -} - -subsys_initcall(pci_acpi_init); diff -urN linux-2.5.71-bk2/arch/x86_64/pci/changelog linux-2.5.72/arch/x86_64/pci/changelog --- linux-2.5.71-bk2/arch/x86_64/pci/changelog 2003-06-14 12:18:07.000000000 -0700 +++ linux-2.5.72/arch/x86_64/pci/changelog 1969-12-31 16:00:00.000000000 -0800 @@ -1 +0,0 @@ -See arch/i386/pci/changelog for early changelog. diff -urN linux-2.5.71-bk2/arch/x86_64/pci/common.c linux-2.5.72/arch/x86_64/pci/common.c --- linux-2.5.71-bk2/arch/x86_64/pci/common.c 2003-06-14 12:17:59.000000000 -0700 +++ linux-2.5.72/arch/x86_64/pci/common.c 1969-12-31 16:00:00.000000000 -0800 @@ -1,212 +0,0 @@ -/* - * Low-Level PCI Support for PC - * - * (c) 1999--2000 Martin Mares - - Note: on x86-64 there is no PCI BIOS so there is no way to sort in the - same order as 32bit Linux. This could cause grief for dualbooting because - devices may wander. May want to use ACPI for sorting eventually. - - */ - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "pci.h" - -unsigned int pci_probe = PCI_PROBE_CONF1 | PCI_PROBE_CONF2; - -int pcibios_last_bus = 0xff; /* XXX */ -struct pci_bus *pci_root_bus = NULL; -struct pci_ops *pci_root_ops = NULL; - -int (*pci_config_read)(int seg, int bus, int dev, int fn, int reg, int len, u32 *value) = NULL; -int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value) = NULL; - -/* - * legacy, numa, and acpi all want to call pcibios_scan_root - * from their initcalls. This flag prevents that. - */ -int pcibios_scanned; - -/* - * This interrupt-safe spinlock protects all accesses to PCI - * configuration space. - */ -spinlock_t pci_config_lock = SPIN_LOCK_UNLOCKED; - -/* - * Several buggy motherboards address only 16 devices and mirror - * them to next 16 IDs. We try to detect this `feature' on all - * primary buses (those containing host bridges as they are - * expected to be unique) and remove the ghost devices. - */ - -static void __devinit pcibios_fixup_ghosts(struct pci_bus *b) -{ - struct list_head *ln, *mn; - struct pci_dev *d, *e; - int mirror = PCI_DEVFN(16,0); - int seen_host_bridge = 0; - int i; - - DBG("PCI: Scanning for ghost devices on bus %d\n", b->number); - for (ln=b->devices.next; ln != &b->devices; ln=ln->next) { - d = pci_dev_b(ln); - if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST) - seen_host_bridge++; - for (mn=ln->next; mn != &b->devices; mn=mn->next) { - e = pci_dev_b(mn); - if (e->devfn != d->devfn + mirror || - e->vendor != d->vendor || - e->device != d->device || - e->class != d->class) - continue; - for(i=0; iresource[i].start != d->resource[i].start || - e->resource[i].end != d->resource[i].end || - e->resource[i].flags != d->resource[i].flags) - continue; - break; - } - if (mn == &b->devices) - return; - } - if (!seen_host_bridge) - return; - printk(KERN_WARNING "PCI: Ignoring ghost devices on bus %02x\n", b->number); - - ln = &b->devices; - while (ln->next != &b->devices) { - d = pci_dev_b(ln->next); - if (d->devfn >= mirror) { - list_del(&d->global_list); - list_del(&d->bus_list); - kfree(d); - } else - ln = ln->next; - } -} - -struct pbus_set_ranges_data; - -void __devinit -pcibios_fixup_pbus_ranges (struct pci_bus *bus, struct pbus_set_ranges_data *ranges) -{ -} - -/* - * Called after each bus is probed, but before its children - * are examined. - */ - -void __devinit pcibios_fixup_bus(struct pci_bus *b) -{ - pcibios_fixup_ghosts(b); - pci_read_bridge_bases(b); -} - - -struct pci_bus * __devinit pcibios_scan_root(int busnum) -{ - struct list_head *list; - struct pci_bus *bus; - - list_for_each(list, &pci_root_buses) { - bus = pci_bus_b(list); - if (bus->number == busnum) { - /* Already scanned */ - return bus; - } - } - - printk("PCI: Probing PCI hardware (bus %02x)\n", busnum); - - return pci_scan_bus(busnum, pci_root_ops, NULL); -} - -extern u8 pci_cache_line_size; - -static int __init pcibios_init(void) -{ - if (!pci_root_ops) { - printk("PCI: System does not support PCI\n"); - return 0; - } - - pci_cache_line_size = boot_cpu_data.x86_clflush_size >> 2; - - pcibios_resource_survey(); - -#ifdef CONFIG_GART_IOMMU - pci_iommu_init(); -#endif - - /* may eventually need to do ACPI sort here. */ - return 0; -} - -subsys_initcall(pcibios_init); - -char * __devinit pcibios_setup(char *str) -{ - if (!strcmp(str, "off")) { - pci_probe = 0; - return NULL; - } -#ifdef CONFIG_PCI_DIRECT - else if (!strcmp(str, "conf1")) { - pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS; - return NULL; - } - else if (!strcmp(str, "conf2")) { - pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS; - return NULL; - } -#endif -#ifdef CONFIG_ACPI_PCI - else if (!strcmp(str, "noacpi")) { - pci_probe |= PCI_NO_ACPI_ROUTING; - return NULL; - } -#endif - else if (!strcmp(str, "rom")) { - pci_probe |= PCI_ASSIGN_ROMS; - return NULL; - } else if (!strcmp(str, "assign-busses")) { - pci_probe |= PCI_ASSIGN_ALL_BUSSES; - return NULL; - } else if (!strcmp(str, "usepirqmask")) { - pci_probe |= PCI_USE_PIRQ_MASK; - return NULL; - } else if (!strncmp(str, "irqmask=", 8)) { - pcibios_irq_mask = simple_strtol(str+8, NULL, 0); - return NULL; - } else if (!strncmp(str, "lastbus=", 8)) { - pcibios_last_bus = simple_strtol(str+8, NULL, 0); - return NULL; - } - return str; -} - -unsigned int pcibios_assign_all_busses(void) -{ - return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0; -} - -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - int err; - - if ((err = pcibios_enable_resources(dev, mask)) < 0) - return err; - - return pcibios_enable_irq(dev); -} diff -urN linux-2.5.71-bk2/arch/x86_64/pci/direct.c linux-2.5.72/arch/x86_64/pci/direct.c --- linux-2.5.71-bk2/arch/x86_64/pci/direct.c 2003-06-14 12:18:52.000000000 -0700 +++ linux-2.5.72/arch/x86_64/pci/direct.c 1969-12-31 16:00:00.000000000 -0800 @@ -1,281 +0,0 @@ -/* - * direct.c - Low-level direct PCI config space access - */ - -#include -#include -#include "pci.h" - -/* - * Functions for accessing PCI configuration space with type 1 accesses - */ - -#define PCI_CONF1_ADDRESS(bus, dev, fn, reg) \ - (0x80000000 | (bus << 16) | (dev << 11) | (fn << 8) | (reg & ~3)) - -static int __pci_conf1_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) -{ - unsigned long flags; - - if (!value || (bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8); - - switch (len) { - case 1: - *value = inb(0xCFC + (reg & 3)); - break; - case 2: - *value = inw(0xCFC + (reg & 2)); - break; - case 4: - *value = inl(0xCFC); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -static int __pci_conf1_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) -{ - unsigned long flags; - - if ((bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) - return -EINVAL; - - spin_lock_irqsave(&pci_config_lock, flags); - - outl(PCI_CONF1_ADDRESS(bus, dev, fn, reg), 0xCF8); - - switch (len) { - case 1: - outb((u8)value, 0xCFC + (reg & 3)); - break; - case 2: - outw((u16)value, 0xCFC + (reg & 2)); - break; - case 4: - outl((u32)value, 0xCFC); - break; - } - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -#undef PCI_CONF1_ADDRESS - -static int pci_conf1_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) -{ - return __pci_conf1_read(0, bus->number, PCI_SLOT(devfn), - PCI_FUNC(devfn), where, size, value); -} - -static int pci_conf1_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) -{ - return __pci_conf1_write(0, bus->number, PCI_SLOT(devfn), - PCI_FUNC(devfn), where, size, value); -} - -static struct pci_ops pci_direct_conf1 = { - .read = pci_conf1_read, - .write = pci_conf1_write, -}; - - -/* - * Functions for accessing PCI configuration space with type 2 accesses - */ - -#define PCI_CONF2_ADDRESS(dev, reg) (u16)(0xC000 | (dev << 8) | reg) - -static int __pci_conf2_read (int seg, int bus, int dev, int fn, int reg, int len, u32 *value) -{ - unsigned long flags; - - if (!value || (bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) - return -EINVAL; - - if (dev & 0x10) - return PCIBIOS_DEVICE_NOT_FOUND; - - spin_lock_irqsave(&pci_config_lock, flags); - - outb((u8)(0xF0 | (fn << 1)), 0xCF8); - outb((u8)bus, 0xCFA); - - switch (len) { - case 1: - *value = inb(PCI_CONF2_ADDRESS(dev, reg)); - break; - case 2: - *value = inw(PCI_CONF2_ADDRESS(dev, reg)); - break; - case 4: - *value = inl(PCI_CONF2_ADDRESS(dev, reg)); - break; - } - - outb (0, 0xCF8); - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -static int __pci_conf2_write (int seg, int bus, int dev, int fn, int reg, int len, u32 value) -{ - unsigned long flags; - - if ((bus > 255) || (dev > 31) || (fn > 7) || (reg > 255)) - return -EINVAL; - - if (dev & 0x10) - return PCIBIOS_DEVICE_NOT_FOUND; - - spin_lock_irqsave(&pci_config_lock, flags); - - outb((u8)(0xF0 | (fn << 1)), 0xCF8); - outb((u8)bus, 0xCFA); - - switch (len) { - case 1: - outb ((u8)value, PCI_CONF2_ADDRESS(dev, reg)); - break; - case 2: - outw ((u16)value, PCI_CONF2_ADDRESS(dev, reg)); - break; - case 4: - outl ((u32)value, PCI_CONF2_ADDRESS(dev, reg)); - break; - } - - outb (0, 0xCF8); - - spin_unlock_irqrestore(&pci_config_lock, flags); - - return 0; -} - -#undef PCI_CONF2_ADDRESS - -static int pci_conf2_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value) -{ - return __pci_conf2_read(0, bus->number, PCI_SLOT(devfn), - PCI_FUNC(devfn), where, size, value); -} - -static int pci_conf2_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value) -{ - return __pci_conf2_write(0, bus->number, PCI_SLOT(devfn), - PCI_FUNC(devfn), where, size, value); -} - -static struct pci_ops pci_direct_conf2 = { - .read = pci_conf2_read, - .write = pci_conf2_write, -}; - - -/* - * Before we decide to use direct hardware access mechanisms, we try to do some - * trivial checks to ensure it at least _seems_ to be working -- we just test - * whether bus 00 contains a host bridge (this is similar to checking - * techniques used in XFree86, but ours should be more reliable since we - * attempt to make use of direct access hints provided by the PCI BIOS). - * - * This should be close to trivial, but it isn't, because there are buggy - * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID. - */ -static int __devinit pci_sanity_check(struct pci_ops *o) -{ - u32 x = 0; - int retval = 0; - struct pci_bus *bus; /* Fake bus and device */ - struct pci_dev *dev; - - if (pci_probe & PCI_NO_CHECKS) - return 1; - - bus = kmalloc(sizeof(*bus), GFP_ATOMIC); - dev = kmalloc(sizeof(*dev), GFP_ATOMIC); - if (!bus || !dev) { - printk(KERN_ERR "Out of memory in %s\n", __FUNCTION__); - goto exit; - } - - bus->number = 0; - dev->bus = bus; - for(dev->devfn=0; dev->devfn < 0x100; dev->devfn++) - if ((!o->read(bus, dev->devfn, PCI_CLASS_DEVICE, 2, &x) && - (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) || - (!o->read(bus, dev->devfn, PCI_VENDOR_ID, 2, &x) && - (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ))) { - retval = 1; - goto exit; - } - DBG("PCI: Sanity check failed\n"); -exit: - kfree(dev); - kfree(bus); - return retval; -} - -static int __init pci_direct_init(void) -{ - unsigned int tmp; - unsigned long flags; - - local_irq_save(flags); - - /* - * Check if configuration type 1 works. - */ - if (pci_probe & PCI_PROBE_CONF1) { - outb (0x01, 0xCFB); - tmp = inl (0xCF8); - outl (0x80000000, 0xCF8); - if (inl (0xCF8) == 0x80000000 && - pci_sanity_check(&pci_direct_conf1)) { - outl (tmp, 0xCF8); - local_irq_restore(flags); - printk(KERN_INFO "PCI: Using configuration type 1\n"); - if (!request_region(0xCF8, 8, "PCI conf1")) - pci_root_ops = NULL; - else - pci_root_ops = &pci_direct_conf1; - return 0; - } - outl (tmp, 0xCF8); - } - - /* - * Check if configuration type 2 works. - */ - if (pci_probe & PCI_PROBE_CONF2) { - outb (0x00, 0xCFB); - outb (0x00, 0xCF8); - outb (0x00, 0xCFA); - if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00 && - pci_sanity_check(&pci_direct_conf2)) { - local_irq_restore(flags); - printk(KERN_INFO "PCI: Using configuration type 2\n"); - if (!request_region(0xCF8, 4, "PCI conf2")) - pci_root_ops = NULL; - else - pci_root_ops = &pci_direct_conf2; - return 0; - } - } - - local_irq_restore(flags); - return 0; -} - -arch_initcall(pci_direct_init); diff -urN linux-2.5.71-bk2/arch/x86_64/pci/fixup.c linux-2.5.72/arch/x86_64/pci/fixup.c --- linux-2.5.71-bk2/arch/x86_64/pci/fixup.c 2003-06-14 12:17:58.000000000 -0700 +++ linux-2.5.72/arch/x86_64/pci/fixup.c 1969-12-31 16:00:00.000000000 -0800 @@ -1,48 +0,0 @@ -/* - * Exceptions for specific devices. Usually work-arounds for fatal design flaws. - * - -Short list on x86-64........so far. - - */ - -#include -#include -#include "pci.h" - -static void __devinit pci_fixup_ncr53c810(struct pci_dev *d) -{ - /* - * NCR 53C810 returns class code 0 (at least on some systems). - * Fix class to be PCI_CLASS_STORAGE_SCSI - */ - if (!d->class) { - printk(KERN_WARNING "PCI: fixing NCR 53C810 class code for %s\n", d->slot_name); - d->class = PCI_CLASS_STORAGE_SCSI << 8; - } -} - -static void __devinit pci_fixup_ide_bases(struct pci_dev *d) -{ - int i; - - /* - * PCI IDE controllers use non-standard I/O port decoding, respect it. - */ - if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE) - return; - DBG("PCI: IDE base address fixup for %s\n", d->slot_name); - for(i=0; i<4; i++) { - struct resource *r = &d->resource[i]; - if ((r->start & ~0x80) == 0x374) { - r->start |= 2; - r->end = r->start; - } - } -} - -struct pci_fixup pcibios_fixups[] = { - { PCI_FIXUP_HEADER, PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases }, - { PCI_FIXUP_HEADER, PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810 }, - { 0 } -}; diff -urN linux-2.5.71-bk2/arch/x86_64/pci/irq.c linux-2.5.72/arch/x86_64/pci/irq.c --- linux-2.5.71-bk2/arch/x86_64/pci/irq.c 2003-06-14 12:18:23.000000000 -0700 +++ linux-2.5.72/arch/x86_64/pci/irq.c 1969-12-31 16:00:00.000000000 -0800 @@ -1,639 +0,0 @@ -/* - * Low-Level PCI Support for PC -- Routing of Interrupts - * - * (c) 1999--2000 Martin Mares - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "pci.h" - -#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) -#define PIRQ_VERSION 0x0100 - -int broken_hp_bios_irq9; - -static struct irq_routing_table *pirq_table; - -/* - * Never use: 0, 1, 2 (timer, keyboard, and cascade) - * Avoid using: 13, 14 and 15 (FP error and IDE). - * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse) - */ -unsigned int pcibios_irq_mask = 0xfff8; - -static int pirq_penalty[16] = { - 1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000, - 0, 0, 0, 0, 1000, 100000, 100000, 100000 -}; - -struct irq_router { - char *name; - u16 vendor, device; - int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); - int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new); -}; - -int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL; - -/* - * Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table. - */ - -static struct irq_routing_table * __init pirq_find_routing_table(void) -{ - u8 *addr; - struct irq_routing_table *rt; - int i; - u8 sum; - - for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) { - rt = (struct irq_routing_table *) addr; - if (rt->signature != PIRQ_SIGNATURE || - rt->version != PIRQ_VERSION || - rt->size % 16 || - rt->size < sizeof(struct irq_routing_table)) - continue; - sum = 0; - for(i=0; isize; i++) - sum += addr[i]; - if (!sum) { - DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt); - return rt; - } - } - return NULL; -} - -/* - * If we have a IRQ routing table, use it to search for peer host - * bridges. It's a gross hack, but since there are no other known - * ways how to get a list of buses, we have to go this way. - */ - -static void __init pirq_peer_trick(void) -{ - struct irq_routing_table *rt = pirq_table; - u8 busmap[256]; - int i; - struct irq_info *e; - - memset(busmap, 0, sizeof(busmap)); - for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) { - e = &rt->slots[i]; -#ifdef DEBUG - { - int j; - DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot); - for(j=0; j<4; j++) - DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap); - DBG("\n"); - } -#endif - busmap[e->bus] = 1; - } - for(i=1; i<256; i++) - /* - * It might be a secondary bus, but in this case its parent is already - * known (ascending bus order) and therefore pci_scan_bus returns immediately. - */ - if (busmap[i] && pci_scan_bus(i, pci_root_bus->ops, NULL)) - printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i); - //pcibios_last_bus = -1; -} - -/* - * Code for querying and setting of IRQ routes on various interrupt routers. - */ - -void eisa_set_level_irq(unsigned int irq) -{ - unsigned char mask = 1 << (irq & 7); - unsigned int port = 0x4d0 + (irq >> 3); - unsigned char val = inb(port); - - if (!(val & mask)) { - DBG(" -> edge"); - outb(val | mask, port); - } -} - -/* - * Common IRQ routing practice: nybbles in config space, - * offset by some magic constant. - */ -static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) -{ - u8 x; - unsigned reg = offset + (nr >> 1); - - pci_read_config_byte(router, reg, &x); - return (nr & 1) ? (x >> 4) : (x & 0xf); -} - -static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val) -{ - u8 x; - unsigned reg = offset + (nr >> 1); - - pci_read_config_byte(router, reg, &x); - x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val); - pci_write_config_byte(router, reg, x); -} - -#if 0 /* enable when pci ids ae known */ -/* - * The VIA pirq rules are nibble-based, like ALI, - * but without the ugly irq number munging. - */ -static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - return read_config_nybble(router, 0x55, pirq); -} - -static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - write_config_nybble(router, 0x55, pirq, irq); - return 1; -} - -/* - * PIRQ routing for SiS 85C503 router used in several SiS chipsets - * According to the SiS 5595 datasheet (preliminary V1.0, 12/24/1997) - * the related registers work as follows: - * - * general: one byte per re-routable IRQ, - * bit 7 IRQ mapping enabled (0) or disabled (1) - * bits [6:4] reserved - * bits [3:0] IRQ to map to - * allowed: 3-7, 9-12, 14-15 - * reserved: 0, 1, 2, 8, 13 - * - * individual registers in device config space: - * - * 0x41/0x42/0x43/0x44: PCI INT A/B/C/D - bits as in general case - * - * 0x61: IDEIRQ: bits as in general case - but: - * bits [6:5] must be written 01 - * bit 4 channel-select primary (0), secondary (1) - * - * 0x62: USBIRQ: bits as in general case - but: - * bit 4 OHCI function disabled (0), enabled (1) - * - * 0x6a: ACPI/SCI IRQ - bits as in general case - * - * 0x7e: Data Acq. Module IRQ - bits as in general case - * - * Apparently there are systems implementing PCI routing table using both - * link values 0x01-0x04 and 0x41-0x44 for PCI INTA..D, but register offsets - * like 0x62 as link values for USBIRQ e.g. So there is no simple - * "register = offset + pirq" relation. - * Currently we support PCI INTA..D and USBIRQ and try our best to handle - * both link mappings. - * IDE/ACPI/DAQ mapping is currently unsupported (left untouched as set by BIOS). - */ - -static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - u8 x; - int reg = pirq; - - switch(pirq) { - case 0x01: - case 0x02: - case 0x03: - case 0x04: - reg += 0x40; - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x62: - pci_read_config_byte(router, reg, &x); - if (reg != 0x62) - break; - if (!(x & 0x40)) - return 0; - break; - case 0x61: - case 0x6a: - case 0x7e: - printk(KERN_INFO "SiS pirq: advanced IDE/ACPI/DAQ mapping not yet implemented\n"); - return 0; - default: - printk(KERN_INFO "SiS router pirq escape (%d)\n", pirq); - return 0; - } - return (x & 0x80) ? 0 : (x & 0x0f); -} - -static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - u8 x; - int reg = pirq; - - switch(pirq) { - case 0x01: - case 0x02: - case 0x03: - case 0x04: - reg += 0x40; - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x62: - x = (irq&0x0f) ? (irq&0x0f) : 0x80; - if (reg != 0x62) - break; - /* always mark OHCI enabled, as nothing else knows about this */ - x |= 0x40; - break; - case 0x61: - case 0x6a: - case 0x7e: - printk(KERN_INFO "advanced SiS pirq mapping not yet implemented\n"); - return 0; - default: - printk(KERN_INFO "SiS router pirq escape (%d)\n", pirq); - return 0; - } - pci_write_config_byte(router, reg, x); - - return 1; -} - -#endif - -/* Support for AMD756 PCI IRQ Routing - * Jhon H. Caicedo - * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced) - * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced) - * The AMD756 pirq rules are nibble-based - * offset 0x56 0-3 PIRQA 4-7 PIRQB - * offset 0x57 0-3 PIRQC 4-7 PIRQD - */ -static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq) -{ - u8 irq; - irq = 0; - if (pirq <= 4) - { - irq = read_config_nybble(router, 0x56, pirq - 1); - } - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", - dev->vendor, dev->device, pirq, irq); - return irq; -} - -static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) -{ - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", - dev->vendor, dev->device, pirq, irq); - if (pirq <= 4) - { - write_config_nybble(router, 0x56, pirq - 1, irq); - } - return 1; -} - -static struct irq_router pirq_routers[] = { -#if 0 /* all these do not exist on Hammer currently, but keep one example - for each. All these vendors have announced K8 chipsets, so we'll - eventually need a router for them. Luckily they tend to use the - same ones, so with luck just enabling the existing ones will work - when you know the final PCI ids. */ - - { "ALI", PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, pirq_ali_get, pirq_ali_set }, - - { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, pirq_via_get, pirq_via_set }, - - { "SIS", PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503, pirq_sis_get, pirq_sis_set }, - -#endif - - { "AMD756 VIPER", PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_740B, - pirq_amd756_get, pirq_amd756_set }, - - { "default", 0, 0, NULL, NULL } -}; - -static struct irq_router *pirq_router; -static struct pci_dev *pirq_router_dev; - -static void __init pirq_find_router(void) -{ - struct irq_routing_table *rt = pirq_table; - struct irq_router *r; - - DBG("PCI: Attempting to find IRQ router for %04x:%04x\n", - rt->rtr_vendor, rt->rtr_device); - - /* fall back to default router if nothing else found */ - pirq_router = &pirq_routers[ARRAY_SIZE(pirq_routers) - 1]; - - pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn); - if (!pirq_router_dev) { - DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn); - return; - } - - for(r=pirq_routers; r->vendor; r++) { - /* Exact match against router table entry? Use it! */ - if (r->vendor == rt->rtr_vendor && r->device == rt->rtr_device) { - pirq_router = r; - break; - } - /* Match against router device entry? Use it as a fallback */ - if (r->vendor == pirq_router_dev->vendor && r->device == pirq_router_dev->device) { - pirq_router = r; - } - } - printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", - pirq_router->name, - pirq_router_dev->vendor, - pirq_router_dev->device, - pirq_router_dev->slot_name); -} - -static struct irq_info *pirq_get_info(struct pci_dev *dev) -{ - struct irq_routing_table *rt = pirq_table; - int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); - struct irq_info *info; - - for (info = rt->slots; entries--; info++) - if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) - return info; - return NULL; -} - -static irqreturn_t pcibios_test_irq_handler(int irq, void *dev_id, struct pt_regs *regs) -{ - return IRQ_NONE; -} - -static int pcibios_lookup_irq(struct pci_dev *dev, int assign) -{ - u8 pin; - struct irq_info *info; - int i, pirq, newirq; - int irq = 0; - u32 mask; - struct irq_router *r = pirq_router; - struct pci_dev *dev2 = NULL; - char *msg = NULL; - - /* Find IRQ pin */ - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); - if (!pin) { - DBG(" -> no interrupt pin\n"); - return 0; - } - pin = pin - 1; - - /* Find IRQ routing entry */ - - if (!pirq_table) - return 0; - - DBG("IRQ for %s:%d", dev->slot_name, pin); - info = pirq_get_info(dev); - if (!info) { - DBG(" -> not found in routing table\n"); - return 0; - } - pirq = info->irq[pin].link; - mask = info->irq[pin].bitmap; - if (!pirq) { - DBG(" -> not routed\n"); - return 0; - } - DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs); - mask &= pcibios_irq_mask; - - /* Work around broken HP Pavilion Notebooks which assign USB to - IRQ 9 even though it is actually wired to IRQ 11 */ - - if (broken_hp_bios_irq9 && pirq == 0x59 && dev->irq == 9) { - dev->irq = 11; - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 11); - r->set(pirq_router_dev, dev, pirq, 11); - } - - /* - * Find the best IRQ to assign: use the one - * reported by the device if possible. - */ - newirq = dev->irq; - if (!((1 << newirq) & mask)) { - if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; - else printk(KERN_WARNING "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n", newirq, dev->slot_name); - } - if (!newirq && assign) { - for (i = 0; i < 16; i++) { - if (!(mask & (1 << i))) - continue; - if (pirq_penalty[i] < pirq_penalty[newirq] && - !request_irq(i, pcibios_test_irq_handler, SA_SHIRQ, "pci-test", dev)) { - free_irq(i, dev); - newirq = i; - } - } - } - DBG(" -> newirq=%d", newirq); - - /* Check if it is hardcoded */ - if ((pirq & 0xf0) == 0xf0) { - irq = pirq & 0xf; - DBG(" -> hardcoded IRQ %d\n", irq); - msg = "Hardcoded"; - } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ - ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) { - DBG(" -> got IRQ %d\n", irq); - msg = "Found"; - } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { - DBG(" -> assigning IRQ %d", newirq); - if (r->set(pirq_router_dev, dev, pirq, newirq)) { - eisa_set_level_irq(newirq); - DBG(" ... OK\n"); - msg = "Assigned"; - irq = newirq; - } - } - - if (!irq) { - DBG(" ... failed\n"); - if (newirq && mask == (1 << newirq)) { - msg = "Guessed"; - irq = newirq; - } else - return 0; - } - printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, dev->slot_name); - - /* Update IRQ for all devices with the same pirq value */ - while ((dev2 = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { - pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); - if (!pin) - continue; - pin--; - info = pirq_get_info(dev2); - if (!info) - continue; - if (info->irq[pin].link == pirq) { - /* We refuse to override the dev->irq information. Give a warning! */ - if ( dev2->irq && dev2->irq != irq && \ - (!(pci_probe & PCI_USE_PIRQ_MASK) || \ - ((1 << dev2->irq) & mask)) ) { - printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", - dev2->slot_name, dev2->irq, irq); - continue; - } - dev2->irq = irq; - pirq_penalty[irq]++; - if (dev != dev2) - printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, dev2->slot_name); - } - } - return 1; -} - -void __init pcibios_fixup_irqs(void) -{ - struct pci_dev *dev = NULL; - u8 pin; - - DBG("PCI: IRQ fixup\n"); - while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - /* - * If the BIOS has set an out of range IRQ number, just ignore it. - * Also keep track of which IRQ's are already in use. - */ - if (dev->irq >= 16) { - DBG("%s: ignoring bogus IRQ %d\n", dev->slot_name, dev->irq); - dev->irq = 0; - } - /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */ - if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000) - pirq_penalty[dev->irq] = 0; - pirq_penalty[dev->irq]++; - } - - dev = NULL; - while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); -#ifdef CONFIG_X86_IO_APIC - /* - * Recalculate IRQ numbers if we use the I/O APIC. - */ - if (io_apic_assign_pci_irqs) - { - int irq; - - if (pin) { - pin--; /* interrupt pins are numbered starting from 1 */ - irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); - /* - * Busses behind bridges are typically not listed in the MP-table. - * In this case we have to look up the IRQ based on the parent bus, - * parent slot, and pin number. The SMP code detects such bridged - * busses itself so we should get into this branch reliably. - */ - if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ - struct pci_dev * bridge = dev->bus->self; - - pin = (pin + PCI_SLOT(dev->devfn)) % 4; - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, - PCI_SLOT(bridge->devfn), pin); - if (irq >= 0) - printk(KERN_WARNING "PCI: using PPB(B%d,I%d,P%d) to get irq %d\n", - bridge->bus->number, PCI_SLOT(bridge->devfn), pin, irq); - } - if (irq >= 0) { - printk(KERN_INFO "PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n", - dev->bus->number, PCI_SLOT(dev->devfn), pin, irq); - dev->irq = irq; - } - } - } -#endif - /* - * Still no IRQ? Try to lookup one... - */ - if (pin && !dev->irq) - pcibios_lookup_irq(dev, 0); - } -} - -static int __init pcibios_irq_init(void) -{ - DBG("PCI: IRQ init\n"); - - if (pcibios_enable_irq) - return 0; - - pirq_table = pirq_find_routing_table(); - - if (pirq_table) { - pirq_peer_trick(); - pirq_find_router(); - if (pirq_table->exclusive_irqs) { - int i; - for (i=0; i<16; i++) - if (!(pirq_table->exclusive_irqs & (1 << i))) - pirq_penalty[i] += 100; - } - /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */ - if (io_apic_assign_pci_irqs) - pirq_table = NULL; - } - - pcibios_enable_irq = pirq_enable_irq; - - pcibios_fixup_irqs(); - return 0; -} - -subsys_initcall(pcibios_irq_init); - - -void pcibios_penalize_isa_irq(int irq) -{ - /* - * If any ISAPnP device reports an IRQ in its list of possible - * IRQ's, we try to avoid assigning it to PCI devices. - */ - pirq_penalty[irq] += 100; -} - -int pirq_enable_irq(struct pci_dev *dev) -{ - u8 pin; - extern int interrupt_line_quirk; - pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); - if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { - /* With IDE legacy devices the IRQ lookup failure is not a problem.. */ - if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5)) - return 0; - - printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.\n", - 'A' + pin - 1, dev->slot_name); - } - /* VIA bridges use interrupt line for apic/pci steering across - the V-Link */ - else if (interrupt_line_quirk) - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); - - return 0; -} diff -urN linux-2.5.71-bk2/arch/x86_64/pci/legacy.c linux-2.5.72/arch/x86_64/pci/legacy.c --- linux-2.5.71-bk2/arch/x86_64/pci/legacy.c 2003-06-14 12:17:56.000000000 -0700 +++ linux-2.5.72/arch/x86_64/pci/legacy.c 1969-12-31 16:00:00.000000000 -0800 @@ -1,68 +0,0 @@ -/* - * legacy.c - traditional, old school PCI bus probing - */ -#include -#include -#include "pci.h" - -/* - * Discover remaining PCI buses in case there are peer host bridges. - * We use the number of last PCI bus provided by the PCI BIOS. - */ -void __devinit pcibios_fixup_peer_bridges(void) -{ - int n; - struct pci_bus *bus; - struct pci_dev *dev; - u16 l; - - if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) - return; - DBG("PCI: Peer bridge fixup\n"); - - bus = kmalloc(sizeof(*bus), GFP_ATOMIC); - dev = kmalloc(sizeof(*dev), GFP_ATOMIC); - if (!bus || !dev) { - printk(KERN_ERR "Out of memory in %s\n", __FUNCTION__); - goto exit; - } - - for (n=0; n <= pcibios_last_bus; n++) { - if (pci_bus_exists(&pci_root_buses, n)) - continue; - bus->number = n; - bus->ops = pci_root_ops; - dev->bus = bus; - for (dev->devfn=0; dev->devfn<256; dev->devfn += 8) - if (!pci_read_config_word(dev, PCI_VENDOR_ID, &l) && - l != 0x0000 && l != 0xffff) { - DBG("Found device at %02x:%02x [%04x]\n", n, dev->devfn, l); - printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); - pci_scan_bus(n, pci_root_ops, NULL); - break; - } - } -exit: - kfree(dev); - kfree(bus); -} - -static int __init pci_legacy_init(void) -{ - if (!pci_root_ops) { - printk("PCI: System does not support PCI\n"); - return 0; - } - - if (pcibios_scanned++) - return 0; - - printk("PCI: Probing PCI hardware\n"); - pci_root_bus = pcibios_scan_root(0); - - pcibios_fixup_peer_bridges(); - - return 0; -} - -subsys_initcall(pci_legacy_init); diff -urN linux-2.5.71-bk2/arch/x86_64/pci/pci.h linux-2.5.72/arch/x86_64/pci/pci.h --- linux-2.5.71-bk2/arch/x86_64/pci/pci.h 2003-06-14 12:18:07.000000000 -0700 +++ linux-2.5.72/arch/x86_64/pci/pci.h 1969-12-31 16:00:00.000000000 -0800 @@ -1,75 +0,0 @@ -/* - * Low-Level PCI Access for x86-64 machines. - * - * (c) 1999 Martin Mares - */ - -#undef DEBUG - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -#define PCI_PROBE_BIOS 0x0001 -#define PCI_PROBE_CONF1 0x0002 -#define PCI_PROBE_CONF2 0x0004 -#define PCI_NO_SORT 0x0100 -#define PCI_BIOS_SORT 0x0200 -#define PCI_NO_CHECKS 0x0400 -#define PCI_USE_PIRQ_MASK 0x0800 -#define PCI_ASSIGN_ROMS 0x1000 -#define PCI_BIOS_IRQ_SCAN 0x2000 -#define PCI_ASSIGN_ALL_BUSSES 0x4000 -#define PCI_NO_ACPI_ROUTING 0x8000 - -extern unsigned int pci_probe; - -extern unsigned int pcibios_max_latency; - -void pcibios_resource_survey(void); -int pcibios_enable_resources(struct pci_dev *, int); - -/* pci-pc.c */ - -extern int pcibios_last_bus; -extern struct pci_bus *pci_root_bus; -extern struct pci_ops *pci_root_ops; - -/* pci-irq.c */ - -struct irq_info { - u8 bus, devfn; /* Bus, device and function */ - struct { - u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ - u16 bitmap; /* Available IRQs */ - } __attribute__((packed)) irq[4]; - u8 slot; /* Slot number, 0=onboard */ - u8 rfu; -} __attribute__((packed)); - -struct irq_routing_table { - u32 signature; /* PIRQ_SIGNATURE should be here */ - u16 version; /* PIRQ_VERSION */ - u16 size; /* Table size in bytes */ - u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ - u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ - u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ - u32 miniport_data; /* Crap */ - u8 rfu[11]; - u8 checksum; /* Modulo 256 checksum must give zero */ - struct irq_info slots[0]; -} __attribute__((packed)); - -extern unsigned int pcibios_irq_mask; - -extern int pcibios_scanned; -extern spinlock_t pci_config_lock; - -int pirq_enable_irq(struct pci_dev *dev); - -extern int (*pcibios_enable_irq)(struct pci_dev *dev); - -/* legacy.c */ -extern void pcibios_fixup_peer_bridges(void); diff -urN linux-2.5.71-bk2/arch/x86_64/pci/x86-64.c linux-2.5.72/arch/x86_64/pci/x86-64.c --- linux-2.5.71-bk2/arch/x86_64/pci/x86-64.c 2003-06-14 12:18:34.000000000 -0700 +++ linux-2.5.72/arch/x86_64/pci/x86-64.c 1969-12-31 16:00:00.000000000 -0800 @@ -1,299 +0,0 @@ -/* - * Low-Level PCI Access for x86-64 machines - * - * Copyright 1993, 1994 Drew Eckhardt - * Visionary Computing - * (Unix and Linux consulting and custom programming) - * Drew@Colorado.EDU - * +1 (303) 786-7975 - * - * Drew's work was sponsored by: - * iX Multiuser Multitasking Magazine - * Hannover, Germany - * hm@ix.de - * - * Copyright 1997--2000 Martin Mares - * - * For more information, please consult the following manuals (look at - * http://www.pcisig.com/ for how to get them): - * - * PCI BIOS Specification - * PCI Local Bus Specification - * PCI to PCI Bridge Specification - * PCI System Design Guide - * - */ - -#include -#include -#include -#include -#include -#include - -#include "pci.h" - -/* - * We need to avoid collisions with `mirrored' VGA ports - * and other strange ISA hardware, so we always want the - * addresses to be allocated in the 0x000-0x0ff region - * modulo 0x400. - * - * Why? Because some silly external IO cards only decode - * the low 10 bits of the IO address. The 0x00-0xff region - * is reserved for motherboard devices that decode all 16 - * bits, so it's ok to allocate at, say, 0x2800-0x28ff, - * but we want to try to avoid allocating at 0x2900-0x2bff - * which might have be mirrored at 0x0100-0x03ff.. - */ -void -pcibios_align_resource(void *data, struct resource *res, - unsigned long size, unsigned long align) -{ - if (res->flags & IORESOURCE_IO) { - unsigned long start = res->start; - - if (start & 0x300) { - start = (start + 0x3ff) & ~0x3ff; - res->start = start; - } - } -} - - -/* - * Handle resources of PCI devices. If the world were perfect, we could - * just allocate all the resource regions and do nothing more. It isn't. - * On the other hand, we cannot just re-allocate all devices, as it would - * require us to know lots of host bridge internals. So we attempt to - * keep as much of the original configuration as possible, but tweak it - * when it's found to be wrong. - * - * Known BIOS problems we have to work around: - * - I/O or memory regions not configured - * - regions configured, but not enabled in the command register - * - bogus I/O addresses above 64K used - * - expansion ROMs left enabled (this may sound harmless, but given - * the fact the PCI specs explicitly allow address decoders to be - * shared between expansion ROMs and other resource regions, it's - * at least dangerous) - * - * Our solution: - * (1) Allocate resources for all buses behind PCI-to-PCI bridges. - * This gives us fixed barriers on where we can allocate. - * (2) Allocate resources for all enabled devices. If there is - * a collision, just mark the resource as unallocated. Also - * disable expansion ROMs during this step. - * (3) Try to allocate resources for disabled devices. If the - * resources were assigned correctly, everything goes well, - * if they weren't, they won't disturb allocation of other - * resources. - * (4) Assign new addresses to resources which were either - * not configured at all or misconfigured. If explicitly - * requested by the user, configure expansion ROM address - * as well. - */ - -static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) -{ - struct list_head *ln; - struct pci_bus *bus; - struct pci_dev *dev; - int idx; - struct resource *r, *pr; - - /* Depth-First Search on bus tree */ - for (ln=bus_list->next; ln != bus_list; ln=ln->next) { - bus = pci_bus_b(ln); - if ((dev = bus->self)) { - for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { - r = &dev->resource[idx]; - if (!r->start) - continue; - pr = pci_find_parent_resource(dev, r); - if (!pr || request_resource(pr, r) < 0) - printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, dev->slot_name); - } - } - pcibios_allocate_bus_resources(&bus->children); - } -} - -static void __init pcibios_allocate_resources(int pass) -{ - struct pci_dev *dev = NULL; - int idx, disabled; - u16 command; - struct resource *r, *pr; - - while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - pci_read_config_word(dev, PCI_COMMAND, &command); - for(idx = 0; idx < 6; idx++) { - r = &dev->resource[idx]; - if (r->parent) /* Already allocated */ - continue; - if (!r->start) /* Address not assigned at all */ - continue; - if (r->flags & IORESOURCE_IO) - disabled = !(command & PCI_COMMAND_IO); - else - disabled = !(command & PCI_COMMAND_MEMORY); - if (pass == disabled) { - DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n", - r->start, r->end, r->flags, disabled, pass); - pr = pci_find_parent_resource(dev, r); - if (!pr || request_resource(pr, r) < 0) { - printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, dev->slot_name); - /* We'll assign a new address later */ - r->end -= r->start; - r->start = 0; - } - } - } - if (!pass) { - r = &dev->resource[PCI_ROM_RESOURCE]; - if (r->flags & PCI_ROM_ADDRESS_ENABLE) { - /* Turn the ROM off, leave the resource region, but keep it unregistered. */ - u32 reg; - DBG("PCI: Switching off ROM of %s\n", dev->slot_name); - r->flags &= ~PCI_ROM_ADDRESS_ENABLE; - pci_read_config_dword(dev, dev->rom_base_reg, ®); - pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE); - } - } - } -} - -static void __init pcibios_assign_resources(void) -{ - struct pci_dev *dev = NULL; - int idx; - struct resource *r; - - while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - int class = dev->class >> 8; - - /* Don't touch classless devices and host bridges */ - if (!class || class == PCI_CLASS_BRIDGE_HOST) - continue; - - for(idx=0; idx<6; idx++) { - r = &dev->resource[idx]; - - /* - * Don't touch IDE controllers and I/O ports of video cards! - */ - if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) || - (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO))) - continue; - - /* - * We shall assign a new address to this resource, either because - * the BIOS forgot to do so or because we have decided the old - * address was unusable for some reason. - */ - if (!r->start && r->end) - pci_assign_resource(dev, idx); - } - - if (pci_probe & PCI_ASSIGN_ROMS) { - r = &dev->resource[PCI_ROM_RESOURCE]; - r->end -= r->start; - r->start = 0; - if (r->end) - pci_assign_resource(dev, PCI_ROM_RESOURCE); - } - } -} - -void __init pcibios_resource_survey(void) -{ - DBG("PCI: Allocating resources\n"); - pcibios_allocate_bus_resources(&pci_root_buses); - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); - pcibios_assign_resources(); -} - -int pcibios_enable_resources(struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for(idx=0; idx<6; idx++) { - if (!(mask & (1<resource[idx]; - if (!r->start && r->end) { - printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (dev->resource[PCI_ROM_RESOURCE].start) - cmd |= PCI_COMMAND_MEMORY; - if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - -/* - * If we set up a device for bus mastering, we need to check the latency - * timer as certain crappy BIOSes forget to set it properly. - */ -unsigned int pcibios_max_latency = 255; - -void pcibios_set_master(struct pci_dev *dev) -{ - u8 lat; - pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); - if (lat < 16) - lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; - else if (lat > pcibios_max_latency) - lat = pcibios_max_latency; - else - return; - printk("PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat); - pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); -} - -int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine) -{ - unsigned long prot; - - /* I/O space cannot be accessed via normal processor loads and - * stores on this platform. - */ - if (mmap_state == pci_mmap_io) - return -EINVAL; - - /* Leave vm_pgoff as-is, the PCI space address is the physical - * address on this platform. - */ - vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO); - - prot = pgprot_val(vma->vm_page_prot); - if (boot_cpu_data.x86 > 3) - prot |= _PAGE_PCD | _PAGE_PWT; - vma->vm_page_prot = __pgprot(prot); - - /* Write-combine setting is ignored, it is changed via the mtrr - * interfaces on this platform. - */ - if (remap_page_range(vma, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) - return -EAGAIN; - - return 0; -} diff -urN linux-2.5.71-bk2/drivers/char/rtc.c linux-2.5.72/drivers/char/rtc.c --- linux-2.5.71-bk2/drivers/char/rtc.c 2003-06-14 12:18:08.000000000 -0700 +++ linux-2.5.72/drivers/char/rtc.c 2003-06-16 21:54:53.000000000 -0700 @@ -279,7 +279,7 @@ if (rtc_has_irq == 0) return -EIO; - if (count < sizeof(unsigned long)) + if (count < sizeof(unsigned)) return -EINVAL; add_wait_queue(&rtc_wait, &wait); @@ -310,9 +310,10 @@ schedule(); } while (1); - retval = put_user(data, (unsigned long *)buf); - if (!retval) - retval = sizeof(unsigned long); + if (count < sizeof(unsigned long)) + retval = put_user(data, (unsigned int *)buf) ?: sizeof(int); + else + retval = put_user(data, (unsigned long *)buf) ?: sizeof(long); out: current->state = TASK_RUNNING; remove_wait_queue(&rtc_wait, &wait); diff -urN linux-2.5.71-bk2/drivers/net/Space.c linux-2.5.72/drivers/net/Space.c --- linux-2.5.71-bk2/drivers/net/Space.c 2003-06-14 12:17:58.000000000 -0700 +++ linux-2.5.72/drivers/net/Space.c 2003-06-16 21:54:53.000000000 -0700 @@ -398,24 +398,6 @@ return -ENODEV; } -#ifdef CONFIG_NET_FC -static int fcif_probe(struct net_device *dev) -{ - if (dev->base_addr == -1) - return 1; - - if (1 -#ifdef CONFIG_IPHASE5526 - && iph5526_probe(dev) -#endif - && 1 ) { - return 1; /* -ENODEV or -EAGAIN would be more accurate. */ - } - return 0; -} -#endif /* CONFIG_NET_FC */ - - #ifdef CONFIG_ETHERTAP static struct net_device tap0_dev = { .name = "tap0", @@ -589,22 +571,6 @@ #endif -#ifdef CONFIG_NET_FC -static struct net_device fc1_dev = { - .name = "fc1", - .next = NEXT_DEV, - .init = fcif_probe -}; -static struct net_device fc0_dev = { - .name = "fc0", - .next = &fc1_dev, - .init = fcif_probe -}; -#undef NEXT_DEV -#define NEXT_DEV (&fc0_dev) -#endif - - #ifdef CONFIG_SBNI static struct net_device sbni7_dev = { .name = "sbni7", diff -urN linux-2.5.71-bk2/drivers/net/fc/iph5526.c linux-2.5.72/drivers/net/fc/iph5526.c --- linux-2.5.71-bk2/drivers/net/fc/iph5526.c 2003-06-14 12:18:02.000000000 -0700 +++ linux-2.5.72/drivers/net/fc/iph5526.c 2003-06-16 21:54:53.000000000 -0700 @@ -239,19 +239,7 @@ static int __init iph5526_probe_pci(struct net_device *dev) { -#ifdef MODULE struct fc_info *fi = (struct fc_info *)dev->priv; -#else - struct fc_info *fi = fc[count]; - static int count; - int err; - - if (!fi) - return -ENODEV; - - fc_setup(dev); - count++; -#endif fi->dev = dev; dev->base_addr = fi->base_addr; dev->irq = fi->irq; @@ -4479,8 +4467,6 @@ return buf; } -#ifdef MODULE - #define NAMELEN 8 /* # of chars for storing dev->name */ static struct net_device *dev_fc[MAX_FC_CARDS]; @@ -4491,7 +4477,7 @@ static int scsi_registered; -int init_module(void) +static int __init iph5526_init(void) { int i = 0; @@ -4530,7 +4516,7 @@ return 0; } -void cleanup_module(void) +static void __exit iph5526_exit(void) { int i = 0; while(fc[i] != NULL) { @@ -4549,7 +4535,9 @@ if (scsi_registered == TRUE) scsi_unregister_host(&driver_template); } -#endif /* MODULE */ + +module_init(iph5526_init); +module_exit(iph5526_exit); void clean_up_memory(struct fc_info *fi) { diff -urN linux-2.5.71-bk2/drivers/net/tun.c linux-2.5.72/drivers/net/tun.c --- linux-2.5.71-bk2/drivers/net/tun.c 2003-06-16 21:54:50.000000000 -0700 +++ linux-2.5.72/drivers/net/tun.c 2003-06-16 21:54:53.000000000 -0700 @@ -404,6 +404,7 @@ return -ENOMEM; tun = dev->priv; + tun->dev = dev; tun->flags = flags; if (strchr(dev->name, '%')) { @@ -566,8 +567,6 @@ rtnl_unlock(); - if (!(tun->flags & TUN_PERSIST)) - kfree(tun); return 0; } diff -urN linux-2.5.71-bk2/drivers/net/wan/dscc4.c linux-2.5.72/drivers/net/wan/dscc4.c --- linux-2.5.71-bk2/drivers/net/wan/dscc4.c 2003-06-14 12:18:04.000000000 -0700 +++ linux-2.5.72/drivers/net/wan/dscc4.c 2003-06-16 21:54:53.000000000 -0700 @@ -164,7 +164,14 @@ #define SOURCE_ID(flags) (((flags) >> 28) & 0x03) #define TO_SIZE(state) (((state) >> 16) & 0x1fff) -#define TO_STATE(len) cpu_to_le32(((len) & TxSizeMax) << 16) + +/* + * Given the operating range of Linux HDLC, the 2 defines below could be + * made simpler. However they are a fine reminder for the limitations of + * the driver: it's better to stay < TxSizeMax and < RxSizeMax. + */ +#define TO_STATE_TX(len) cpu_to_le32(((len) & TxSizeMax) << 16) +#define TO_STATE_RX(len) cpu_to_le32((RX_MAX(len) % RxSizeMax) << 16) #define RX_MAX(len) ((((len) >> 5) + 1) << 5) #define SCC_REG_START(dpriv) (SCC_START+(dpriv->dev_id)*SCC_OFFSET) @@ -272,7 +279,8 @@ #define Idt 0x00080000 #define TxSccRes 0x01000000 #define RxSccRes 0x00010000 -#define TxSizeMax 0x1fff +#define TxSizeMax 0x1fff /* Datasheet DS1 - 11.1.1.1 */ +#define RxSizeMax 0x1ffc /* Datasheet DS1 - 11.1.2.1 */ #define Ccr0ClockMask 0x0000003f #define Ccr1LoopMask 0x00000200 @@ -467,8 +475,8 @@ skbuff = dpriv->rx_skbuff; for (i = 0; i < RX_RING_SIZE; i++) { if (*skbuff) { - pci_unmap_single(pdev, rx_fd->data, (*skbuff)->len, - PCI_DMA_FROMDEVICE); + pci_unmap_single(pdev, rx_fd->data, + RX_MAX(HDLC_MAX_MRU), PCI_DMA_FROMDEVICE); dev_kfree_skb(*skbuff); } skbuff++; @@ -480,17 +488,18 @@ { unsigned int dirty = dpriv->rx_dirty%RX_RING_SIZE; struct RxFD *rx_fd = dpriv->rx_fd + dirty; + const int len = RX_MAX(HDLC_MAX_MRU); struct sk_buff *skb; int ret = 0; - skb = dev_alloc_skb(RX_MAX(HDLC_MAX_MRU)); + skb = dev_alloc_skb(len); dpriv->rx_skbuff[dirty] = skb; if (skb) { skb->dev = dev; - skb->protocol = htons(ETH_P_IP); + skb->protocol = hdlc_type_trans(skb, dev); skb->mac.raw = skb->data; rx_fd->data = pci_map_single(dpriv->pci_priv->pdev, skb->data, - skb->len, PCI_DMA_FROMDEVICE); + len, PCI_DMA_FROMDEVICE); } else { rx_fd->data = (u32) NULL; ret = -1; @@ -613,13 +622,12 @@ } pkt_len = TO_SIZE(rx_fd->state2); pci_dma_sync_single(pdev, rx_fd->data, pkt_len, PCI_DMA_FROMDEVICE); - pci_unmap_single(pdev, rx_fd->data, pkt_len, PCI_DMA_FROMDEVICE); + pci_unmap_single(pdev, rx_fd->data, RX_MAX(HDLC_MAX_MRU), PCI_DMA_FROMDEVICE); if ((skb->data[--pkt_len] & FrameOk) == FrameOk) { stats->rx_packets++; stats->rx_bytes += pkt_len; - skb->tail += pkt_len; - skb->len = pkt_len; - if (netif_running(dev)) + skb_put(skb, pkt_len); + if (netif_running(dev)) skb->protocol = hdlc_type_trans(skb, dev); skb->dev->last_rx = jiffies; netif_rx(skb); @@ -1029,7 +1037,7 @@ next = dpriv->tx_current%TX_RING_SIZE; dpriv->tx_skbuff[next] = skb; tx_fd = dpriv->tx_fd + next; - tx_fd->state = FrameEnd | TO_STATE(skb->len); + tx_fd->state = FrameEnd | TO_STATE_TX(skb->len); tx_fd->data = pci_map_single(ppriv->pdev, skb->data, skb->len, PCI_DMA_TODEVICE); tx_fd->complete = 0x00000000; @@ -1223,9 +1231,9 @@ if (bps) { /* DCE */ printk(KERN_DEBUG "%s: generated RxClk (DCE)\n", dev->name); if (settings->clock_rate != bps) { - settings->clock_rate = bps; printk(KERN_DEBUG "%s: clock adjusted (%08d -> %08d)\n", - dev->name, dpriv->settings.clock_rate, bps); + dev->name, settings->clock_rate, bps); + settings->clock_rate = bps; } } else { /* DTE */ state = 0x80001000; @@ -1436,7 +1444,7 @@ * random freeze induced by null sized tx frames. */ tx_fd->data = tx_fd->next; - tx_fd->state = FrameEnd | TO_STATE(2*DUMMY_SKB_SIZE); + tx_fd->state = FrameEnd | TO_STATE_TX(2*DUMMY_SKB_SIZE); tx_fd->complete = 0x00000000; tx_fd->jiffies = 0; @@ -1723,7 +1731,7 @@ skb->len = DUMMY_SKB_SIZE; memcpy(skb->data, version, strlen(version)%DUMMY_SKB_SIZE); - tx_fd->state = FrameEnd | TO_STATE(DUMMY_SKB_SIZE); + tx_fd->state = FrameEnd | TO_STATE_TX(DUMMY_SKB_SIZE); tx_fd->data = pci_map_single(dpriv->pci_priv->pdev, skb->data, DUMMY_SKB_SIZE, PCI_DMA_TODEVICE); dpriv->tx_skbuff[last] = skb; @@ -1754,7 +1762,7 @@ dpriv->tx_dirty = 0xffffffff; i = dpriv->tx_current = 0; do { - tx_fd->state = FrameEnd | TO_STATE(2*DUMMY_SKB_SIZE); + tx_fd->state = FrameEnd | TO_STATE_TX(2*DUMMY_SKB_SIZE); tx_fd->complete = 0x00000000; /* FIXME: NULL should be ok - to be tried */ tx_fd->data = dpriv->tx_fd_dma; @@ -1772,7 +1780,7 @@ rx_fd->state1 = HiDesc; rx_fd->state2 = 0x00000000; rx_fd->end = 0xbabeface; - rx_fd->state1 |= (RX_MAX(HDLC_MAX_MRU) << 16); + rx_fd->state1 |= TO_STATE_RX(HDLC_MAX_MRU); // FIXME: return value verifiee mais traitement suspect if (try_get_rx_skb(dpriv, dev) >= 0) dpriv->rx_dirty++; diff -urN linux-2.5.71-bk2/include/asm-alpha/ide.h linux-2.5.72/include/asm-alpha/ide.h --- linux-2.5.71-bk2/include/asm-alpha/ide.h 2003-06-14 12:18:32.000000000 -0700 +++ linux-2.5.72/include/asm-alpha/ide.h 2003-06-16 21:54:53.000000000 -0700 @@ -75,7 +75,7 @@ for (index = 0; index < MAX_HWIFS; index++) { ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); hw.irq = ide_default_irq(ide_default_io_base(index)); - ide_register_hw(&hw); + ide_register_hw(&hw, NULL); } #endif } diff -urN linux-2.5.71-bk2/include/asm-x86_64/fixmap.h linux-2.5.72/include/asm-x86_64/fixmap.h --- linux-2.5.71-bk2/include/asm-x86_64/fixmap.h 2003-06-14 12:17:59.000000000 -0700 +++ linux-2.5.72/include/asm-x86_64/fixmap.h 2003-06-16 21:54:53.000000000 -0700 @@ -16,6 +16,7 @@ #include #include #include +#include /* * Here we define all the compile-time 'special' virtual @@ -62,6 +63,10 @@ #define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ +#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) +#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) + #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) extern void __this_fixmap_does_not_exist(void); diff -urN linux-2.5.71-bk2/include/asm-x86_64/io.h linux-2.5.72/include/asm-x86_64/io.h --- linux-2.5.71-bk2/include/asm-x86_64/io.h 2003-06-14 12:18:08.000000000 -0700 +++ linux-2.5.72/include/asm-x86_64/io.h 2003-06-16 21:54:53.000000000 -0700 @@ -105,7 +105,7 @@ #define IO_SPACE_LIMIT 0xffff -#ifdef __KERNEL__ +#if defined(__KERNEL__) && __x86_64__ #include diff -urN linux-2.5.71-bk2/include/asm-x86_64/mmzone.h linux-2.5.72/include/asm-x86_64/mmzone.h --- linux-2.5.71-bk2/include/asm-x86_64/mmzone.h 2003-06-14 12:18:30.000000000 -0700 +++ linux-2.5.72/include/asm-x86_64/mmzone.h 2003-06-16 21:54:53.000000000 -0700 @@ -23,7 +23,6 @@ extern struct pglist_data *node_data[]; -/* kern_addr_valid below hardcodes the same algorithm*/ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) { int nid; @@ -46,19 +45,6 @@ #define local_mapnr(kvaddr) \ ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) ) -#define kern_addr_valid(kvaddr) ({ \ - int ok = 0; \ - unsigned long index = __pa(kvaddr) >> memnode_shift; \ - if (index <= NODEMAPSIZE) { \ - unsigned nodeid = memnodemap[index]; \ - unsigned long pfn = __pa(kvaddr) >> PAGE_SHIFT; \ - unsigned long start_pfn = node_start_pfn(nodeid); \ - ok = (nodeid != 0xff) && \ - (pfn >= start_pfn) && \ - (pfn < start_pfn + node_size(nodeid)); \ - } \ - ok; \ -}) /* AK: this currently doesn't deal with invalid addresses. We'll see if the 2.5 kernel doesn't pass them diff -urN linux-2.5.71-bk2/include/asm-x86_64/page.h linux-2.5.72/include/asm-x86_64/page.h --- linux-2.5.71-bk2/include/asm-x86_64/page.h 2003-06-14 12:17:57.000000000 -0700 +++ linux-2.5.72/include/asm-x86_64/page.h 2003-06-16 21:54:53.000000000 -0700 @@ -76,6 +76,8 @@ #define __PAGE_OFFSET 0x0000010000000000 /* 1 << 40 */ #define __PHYSICAL_MASK_SHIFT 40 #define __PHYSICAL_MASK ((1UL << __PHYSICAL_MASK_SHIFT) - 1) +#define __VIRTUAL_MASK_SHIFT 48 +#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) #define KERNEL_TEXT_SIZE (40UL*1024*1024) #define KERNEL_TEXT_START 0xffffffff80000000UL diff -urN linux-2.5.71-bk2/include/asm-x86_64/pci.h linux-2.5.72/include/asm-x86_64/pci.h --- linux-2.5.71-bk2/include/asm-x86_64/pci.h 2003-06-14 12:18:08.000000000 -0700 +++ linux-2.5.72/include/asm-x86_64/pci.h 2003-06-16 21:54:53.000000000 -0700 @@ -45,7 +45,6 @@ struct pci_dev; extern int iommu_setup(char *opt); -extern void pci_iommu_init(void); /* Allocate and map kernel buffer using consistent mode DMA for a device. * hwdev should be valid struct pci_dev pointer for PCI devices, diff -urN linux-2.5.71-bk2/include/asm-x86_64/pgtable.h linux-2.5.72/include/asm-x86_64/pgtable.h --- linux-2.5.71-bk2/include/asm-x86_64/pgtable.h 2003-06-14 12:18:29.000000000 -0700 +++ linux-2.5.72/include/asm-x86_64/pgtable.h 2003-06-16 21:54:53.000000000 -0700 @@ -339,7 +339,7 @@ #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) #define pmd_bad(x) ((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE ) #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) - +#define pmd_pfn(x) ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK) #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) #define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE }) @@ -392,9 +392,7 @@ #endif /* !__ASSEMBLY__ */ -#ifndef CONFIG_DISCONTIGMEM -#define kern_addr_valid(addr) (1) -#endif +extern int kern_addr_valid(unsigned long addr); #define io_remap_page_range remap_page_range @@ -403,4 +401,9 @@ #define pgtable_cache_init() do { } while (0) #define check_pgt_cache() do { } while (0) +/* fs/proc/kcore.c */ +#define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) +#define kc_offset_to_vaddr(o) \ + (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o)) + #endif /* _X86_64_PGTABLE_H */ diff -urN linux-2.5.71-bk2/include/asm-x86_64/proto.h linux-2.5.72/include/asm-x86_64/proto.h --- linux-2.5.71-bk2/include/asm-x86_64/proto.h 2003-06-14 12:18:33.000000000 -0700 +++ linux-2.5.72/include/asm-x86_64/proto.h 2003-06-16 21:54:53.000000000 -0700 @@ -16,7 +16,12 @@ extern void init_memory_mapping(void); extern void system_call(void); +extern int kernel_syscall(void); +extern void syscall_init(void); + +extern void ia32_syscall(void); extern void ia32_cstar_target(void); + extern void calibrate_delay(void); extern void cpu_idle(void); extern void sys_ni_syscall(void); @@ -24,6 +29,8 @@ extern void ia32_syscall(void); extern void iommu_hole_init(void); +extern void time_init_smp(void); + extern void do_softirq_thunk(void); extern int numa_setup(char *opt); @@ -39,8 +46,12 @@ extern void reserve_bootmem_generic(unsigned long phys, unsigned len); extern void free_bootmem_generic(unsigned long phys, unsigned len); +extern void load_gs_index(unsigned gs); + extern unsigned long end_pfn_map; +extern unsigned long cpu_initialized; + extern void show_stack(unsigned long * rsp); extern void show_trace(unsigned long * rsp); extern void show_registers(struct pt_regs *regs); @@ -66,9 +77,16 @@ extern unsigned long end_pfn; extern unsigned long table_start, table_end; -extern void syscall_init(void); +extern int exception_trace; +extern int no_iommu, force_mmu; +extern int using_apic_timer; +extern int disable_apic; +extern unsigned cpu_khz; + +extern int fallback_aper_order; +extern int fallback_aper_force; -struct pt_regs; +extern void smp_local_timer_interrupt(struct pt_regs * regs); long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); diff -urN linux-2.5.71-bk2/include/linux/atalk.h linux-2.5.72/include/linux/atalk.h --- linux-2.5.71-bk2/include/linux/atalk.h 2003-06-14 12:18:30.000000000 -0700 +++ linux-2.5.72/include/linux/atalk.h 2003-06-16 21:54:53.000000000 -0700 @@ -198,7 +198,7 @@ #define at_sk(__sk) ((struct atalk_sock *)(__sk)->sk_protinfo) -extern struct sock *atalk_sockets; +extern struct hlist_head atalk_sockets; extern rwlock_t atalk_sockets_lock; extern struct atalk_route *atalk_routes; diff -urN linux-2.5.71-bk2/include/linux/list.h linux-2.5.72/include/linux/list.h --- linux-2.5.71-bk2/include/linux/list.h 2003-06-14 12:18:22.000000000 -0700 +++ linux-2.5.72/include/linux/list.h 2003-06-16 21:54:53.000000000 -0700 @@ -380,7 +380,7 @@ /** * list_for_each_continue_rcu - iterate over an rcu-protected list - * continuing from existing point. + * continuing after existing point. * @pos: the &struct list_head to use as a loop counter. * @head: the head for your list. */ @@ -522,6 +522,30 @@ pos && ({ prefetch(pos->next); 1;}) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ pos = pos->next) + +/** + * hlist_for_each_entry_continue - iterate over a hlist continuing after existing point + * @tpos: the type * to use as a loop counter. + * @pos: the &struct hlist_node to use as a loop counter. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_continue(tpos, pos, member) \ + for (pos = (pos)->next; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_from - iterate over a hlist continuing from existing point + * @tpos: the type * to use as a loop counter. + * @pos: the &struct hlist_node to use as a loop counter. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_from(tpos, pos, member) \ + for (; pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + /** * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry * @tpos: the type * to use as a loop counter. diff -urN linux-2.5.71-bk2/include/net/af_unix.h linux-2.5.72/include/net/af_unix.h --- linux-2.5.71-bk2/include/net/af_unix.h 2003-06-14 12:18:29.000000000 -0700 +++ linux-2.5.72/include/net/af_unix.h 2003-06-16 21:54:53.000000000 -0700 @@ -2,34 +2,34 @@ #define __LINUX_NET_AFUNIX_H extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); -typedef struct sock unix_socket; extern void unix_gc(void); #define UNIX_HASH_SIZE 256 -extern unix_socket *unix_socket_table[UNIX_HASH_SIZE+1]; +extern struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; extern rwlock_t unix_table_lock; extern atomic_t unix_tot_inflight; -static inline unix_socket *first_unix_socket(int *i) +static inline struct sock *first_unix_socket(int *i) { for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) { - if (unix_socket_table[*i]) - return unix_socket_table[*i]; + if (!hlist_empty(&unix_socket_table[*i])) + return __sk_head(&unix_socket_table[*i]); } return NULL; } -static inline unix_socket *next_unix_socket(int *i, unix_socket *s) +static inline struct sock *next_unix_socket(int *i, struct sock *s) { + struct sock *next = sk_next(s); /* More in this chain? */ - if (s->sk_next) - return s->sk_next; + if (next) + return next; /* Look for next non-empty chain. */ for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) { - if (unix_socket_table[*i]) - return unix_socket_table[*i]; + if (!hlist_empty(&unix_socket_table[*i])) + return __sk_head(&unix_socket_table[*i]); } return NULL; } @@ -69,7 +69,6 @@ struct vfsmount *mnt; struct semaphore readsem; struct sock *other; - struct sock **list; struct sock *gc_tree; atomic_t inflight; rwlock_t lock; diff -urN linux-2.5.71-bk2/include/net/bluetooth/bluetooth.h linux-2.5.72/include/net/bluetooth/bluetooth.h --- linux-2.5.71-bk2/include/net/bluetooth/bluetooth.h 2003-06-14 12:18:08.000000000 -0700 +++ linux-2.5.72/include/net/bluetooth/bluetooth.h 2003-06-16 21:54:53.000000000 -0700 @@ -31,6 +31,7 @@ #include #include +#include #include #include @@ -119,8 +120,8 @@ }; struct bt_sock_list { - struct sock *head; - rwlock_t lock; + struct hlist_head head; + rwlock_t lock; }; int bt_sock_register(int proto, struct net_proto_family *ops); diff -urN linux-2.5.71-bk2/include/net/ipx.h linux-2.5.72/include/net/ipx.h --- linux-2.5.71-bk2/include/net/ipx.h 2003-06-14 12:17:57.000000000 -0700 +++ linux-2.5.72/include/net/ipx.h 2003-06-16 21:54:53.000000000 -0700 @@ -59,7 +59,7 @@ /* socket support */ unsigned short if_sknum; - struct sock *if_sklist; + struct hlist_head if_sklist; spinlock_t if_sklist_lock; /* administrative overhead */ diff -urN linux-2.5.71-bk2/include/net/llc_sap.h linux-2.5.72/include/net/llc_sap.h --- linux-2.5.71-bk2/include/net/llc_sap.h 2003-06-14 12:17:56.000000000 -0700 +++ linux-2.5.72/include/net/llc_sap.h 2003-06-16 21:54:53.000000000 -0700 @@ -35,8 +35,8 @@ struct llc_addr laddr; struct list_head node; struct { - rwlock_t lock; - struct sock *list; + rwlock_t lock; + struct hlist_head list; } sk_list; }; diff -urN linux-2.5.71-bk2/include/net/raw.h linux-2.5.72/include/net/raw.h --- linux-2.5.71-bk2/include/net/raw.h 2003-06-14 12:18:34.000000000 -0700 +++ linux-2.5.72/include/net/raw.h 2003-06-16 21:54:53.000000000 -0700 @@ -28,7 +28,7 @@ * hashing mechanism, make sure you update icmp.c as well. */ #define RAWV4_HTABLE_SIZE MAX_INET_PROTOS -extern struct sock *raw_v4_htable[RAWV4_HTABLE_SIZE]; +extern struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE]; extern rwlock_t raw_v4_lock; diff -urN linux-2.5.71-bk2/include/net/rawv6.h linux-2.5.72/include/net/rawv6.h --- linux-2.5.71-bk2/include/net/rawv6.h 2003-06-14 12:18:08.000000000 -0700 +++ linux-2.5.72/include/net/rawv6.h 2003-06-16 21:54:53.000000000 -0700 @@ -4,7 +4,7 @@ #ifdef __KERNEL__ #define RAWV6_HTABLE_SIZE MAX_INET_PROTOS -extern struct sock *raw_v6_htable[RAWV6_HTABLE_SIZE]; +extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; extern rwlock_t raw_v6_lock; extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); diff -urN linux-2.5.71-bk2/include/net/sctp/structs.h linux-2.5.72/include/net/sctp/structs.h --- linux-2.5.71-bk2/include/net/sctp/structs.h 2003-06-14 12:18:51.000000000 -0700 +++ linux-2.5.72/include/net/sctp/structs.h 2003-06-16 21:54:53.000000000 -0700 @@ -97,7 +97,7 @@ unsigned short fastreuse; struct sctp_bind_bucket *next; struct sctp_bind_bucket **pprev; - struct sock *sk; + struct hlist_head sk_list; }; struct sctp_bind_hashbucket { diff -urN linux-2.5.71-bk2/include/net/sock.h linux-2.5.72/include/net/sock.h --- linux-2.5.71-bk2/include/net/sock.h 2003-06-14 12:18:29.000000000 -0700 +++ linux-2.5.72/include/net/sock.h 2003-06-16 21:54:53.000000000 -0700 @@ -41,6 +41,7 @@ #define _SOCK_H #include +#include #include #include #include @@ -93,10 +94,8 @@ * @skc_state - Connection state * @skc_reuse - %SO_REUSEADDR setting * @skc_bound_dev_if - bound device index if != 0 - * @skc_next - main hash linkage for various protocol lookup tables - * @skc_pprev - main hash linkage for various protocol lookup tables - * @skc_bind_next - main hash linkage for various protocol lookup tables - * @skc_bind_pprev - main hash linkage for various protocol lookup tables + * @skc_node - main hash linkage for various protocol lookup tables + * @skc_bind_node - bind hash linkage for various protocol lookup tables * @skc_refcnt - reference count * * This is the minimal network layer representation of sockets, the header @@ -107,10 +106,8 @@ volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; - struct sock *skc_next; - struct sock **skc_pprev; - struct sock *skc_bind_next; - struct sock **skc_bind_pprev; + struct hlist_node skc_node; + struct hlist_node skc_bind_node; atomic_t skc_refcnt; }; @@ -187,10 +184,8 @@ #define sk_state __sk_common.skc_state #define sk_reuse __sk_common.skc_reuse #define sk_bound_dev_if __sk_common.skc_bound_dev_if -#define sk_next __sk_common.skc_next -#define sk_pprev __sk_common.skc_pprev -#define sk_bind_next __sk_common.skc_bind_next -#define sk_bind_pprev __sk_common.skc_bind_pprev +#define sk_node __sk_common.skc_node +#define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt volatile unsigned char sk_zapped; unsigned char sk_shutdown; @@ -262,6 +257,74 @@ void (*sk_destruct)(struct sock *sk); }; +/* + * Hashed lists helper routines + */ +static inline struct sock *__sk_head(struct hlist_head *head) +{ + return hlist_entry(head->first, struct sock, sk_node); +} + +static inline struct sock *sk_head(struct hlist_head *head) +{ + return hlist_empty(head) ? NULL : __sk_head(head); +} + +static inline struct sock *sk_next(struct sock *sk) +{ + return sk->sk_node.next ? + hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; +} + +static inline int sk_unhashed(struct sock *sk) +{ + return hlist_unhashed(&sk->sk_node); +} + +static inline int sk_hashed(struct sock *sk) +{ + return sk->sk_node.pprev != NULL; +} + +static __inline__ void sk_node_init(struct hlist_node *node) +{ + node->pprev = NULL; +} + +static __inline__ int sk_del_node_init(struct sock *sk) +{ + if (sk_hashed(sk)) { + __hlist_del(&sk->sk_node); + sk_node_init(&sk->sk_node); + return 1; + } + return 0; +} + +static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) +{ + hlist_add_head(&sk->sk_node, list); +} + +static __inline__ void sk_add_bind_node(struct sock *sk, + struct hlist_head *list) +{ + hlist_add_head(&sk->sk_bind_node, list); +} + +#define sk_for_each(__sk, node, list) \ + hlist_for_each_entry(__sk, node, list, sk_node) +#define sk_for_each_from(__sk, node) \ + if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ + hlist_for_each_entry_from(__sk, node, sk_node) +#define sk_for_each_continue(__sk, node) \ + if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ + hlist_for_each_entry_continue(__sk, node, sk_node) +#define sk_for_each_safe(__sk, node, tmp, list) \ + hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node) +#define sk_for_each_bound(__sk, node, list) \ + hlist_for_each_entry(__sk, node, list, sk_bind_node) + /* Sock flags */ enum sock_flags { SOCK_DEAD, diff -urN linux-2.5.71-bk2/include/net/tcp.h linux-2.5.72/include/net/tcp.h --- linux-2.5.71-bk2/include/net/tcp.h 2003-06-16 21:54:50.000000000 -0700 +++ linux-2.5.72/include/net/tcp.h 2003-06-16 21:54:53.000000000 -0700 @@ -25,6 +25,7 @@ #undef TCP_CLEAR_TIMERS #include +#include #include #include #include @@ -42,8 +43,8 @@ * for the rest. I'll experiment with dynamic table growth later. */ struct tcp_ehash_bucket { - rwlock_t lock; - struct sock *chain; + rwlock_t lock; + struct hlist_head chain; } __attribute__((__aligned__(8))); /* This is for listening sockets, thus all sockets which possess wildcards. */ @@ -83,16 +84,27 @@ struct tcp_bind_bucket { unsigned short port; signed short fastreuse; - struct tcp_bind_bucket *next; - struct sock *owners; - struct tcp_bind_bucket **pprev; + struct hlist_node node; + struct hlist_head owners; }; +#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node) + struct tcp_bind_hashbucket { spinlock_t lock; - struct tcp_bind_bucket *chain; + struct hlist_head chain; }; +static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head) +{ + return hlist_entry(head->chain.first, struct tcp_bind_bucket, node); +} + +static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head) +{ + return hlist_empty(&head->chain) ? NULL : __tb_head(head); +} + extern struct tcp_hashinfo { /* This is for sockets with full identity only. Sockets here will * always be without wildcards and will have the following invariant: @@ -116,7 +128,7 @@ * table where wildcard'd TCP sockets can exist. Hash function here * is just local port number. */ - struct sock *__tcp_listening_hash[TCP_LHTABLE_SIZE]; + struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE]; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. @@ -180,10 +192,8 @@ #define tw_state __tw_common.skc_state #define tw_reuse __tw_common.skc_reuse #define tw_bound_dev_if __tw_common.skc_bound_dev_if -#define tw_next __tw_common.skc_next -#define tw_pprev __tw_common.skc_pprev -#define tw_bind_next __tw_common.skc_bind_next -#define tw_bind_pprev __tw_common.skc_bind_pprev +#define tw_node __tw_common.skc_node +#define tw_bind_node __tw_common.skc_bind_node #define tw_refcnt __tw_common.skc_refcnt volatile unsigned char tw_substate; unsigned char tw_rcv_wscale; @@ -205,15 +215,56 @@ long tw_ts_recent_stamp; unsigned long tw_ttd; struct tcp_bind_bucket *tw_tb; - struct tcp_tw_bucket *tw_next_death; - struct tcp_tw_bucket **tw_pprev_death; - + struct hlist_node tw_death_node; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct in6_addr tw_v6_daddr; struct in6_addr tw_v6_rcv_saddr; #endif }; +static __inline__ void tw_add_node(struct tcp_tw_bucket *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_node, list); +} + +static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind_node, list); +} + +static inline int tw_dead_hashed(struct tcp_tw_bucket *tw) +{ + return tw->tw_death_node.pprev != NULL; +} + +static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw) +{ + tw->tw_death_node.pprev = NULL; +} + +static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw) +{ + __hlist_del(&tw->tw_death_node); + tw_dead_node_init(tw); +} + +static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw) +{ + if (tw_dead_hashed(tw)) { + __tw_del_dead_node(tw); + return 1; + } + return 0; +} + +#define tw_for_each(tw, node, head) \ + hlist_for_each_entry(tw, node, head, tw_node) + +#define tw_for_each_inmate(tw, node, safe, jail) \ + hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) + #define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk)) extern kmem_cache_t *tcp_timewait_cachep; diff -urN linux-2.5.71-bk2/include/net/udp.h linux-2.5.72/include/net/udp.h --- linux-2.5.71-bk2/include/net/udp.h 2003-06-14 12:18:30.000000000 -0700 +++ linux-2.5.72/include/net/udp.h 2003-06-16 21:54:53.000000000 -0700 @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -34,16 +35,17 @@ * and hashing code needs to work with different AF's yet * the port space is shared. */ -extern struct sock *udp_hash[UDP_HTABLE_SIZE]; +extern struct hlist_head udp_hash[UDP_HTABLE_SIZE]; extern rwlock_t udp_hash_lock; extern int udp_port_rover; static inline int udp_lport_inuse(u16 num) { - struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)]; + struct sock *sk; + struct hlist_node *node; - for (; sk; sk = sk->sk_next) + sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) if (inet_sk(sk)->num == num) return 1; return 0; diff -urN linux-2.5.71-bk2/include/net/x25.h linux-2.5.72/include/net/x25.h --- linux-2.5.71-bk2/include/net/x25.h 2003-06-14 12:18:24.000000000 -0700 +++ linux-2.5.72/include/net/x25.h 2003-06-16 21:54:53.000000000 -0700 @@ -263,7 +263,7 @@ }; #define X25_SKB_CB(s) ((struct x25_skb_cb *) ((s)->cb)) -extern struct sock *x25_list; +extern struct hlist_head x25_list; extern rwlock_t x25_list_lock; extern struct list_head x25_route_list; extern rwlock_t x25_route_list_lock; diff -urN linux-2.5.71-bk2/net/appletalk/atalk_proc.c linux-2.5.72/net/appletalk/atalk_proc.c --- linux-2.5.71-bk2/net/appletalk/atalk_proc.c 2003-06-14 12:18:33.000000000 -0700 +++ linux-2.5.72/net/appletalk/atalk_proc.c 2003-06-16 21:54:53.000000000 -0700 @@ -143,10 +143,13 @@ static __inline__ struct sock *atalk_get_socket_idx(loff_t pos) { struct sock *s; + struct hlist_node *node; - for (s = atalk_sockets; pos && s; s = s->sk_next) - --pos; - + sk_for_each(s, node, &atalk_sockets) + if (!pos--) + goto found; + s = NULL; +found: return s; } @@ -164,13 +167,10 @@ ++*pos; if (v == (void *)1) { - i = NULL; - if (atalk_sockets) - i = atalk_sockets; + i = sk_head(&atalk_sockets); goto out; } - i = v; - i = i->sk_next; + i = sk_next(v); out: return i; } diff -urN linux-2.5.71-bk2/net/appletalk/ddp.c linux-2.5.72/net/appletalk/ddp.c --- linux-2.5.71-bk2/net/appletalk/ddp.c 2003-06-14 12:18:51.000000000 -0700 +++ linux-2.5.72/net/appletalk/ddp.c 2003-06-16 21:54:53.000000000 -0700 @@ -85,31 +85,25 @@ * * \**************************************************************************/ -struct sock *atalk_sockets; +HLIST_HEAD(atalk_sockets); rwlock_t atalk_sockets_lock = RW_LOCK_UNLOCKED; -#if 0 /* currently unused -DaveM */ +static inline void __atalk_insert_socket(struct sock *sk) +{ + sk_add_node(sk, &atalk_sockets); +} + static inline void atalk_insert_socket(struct sock *sk) { write_lock_bh(&atalk_sockets_lock); - sk->sk_next = atalk_sockets; - if (sk->sk_next) - atalk_sockets->sk_pprev = &sk->sk_next; - atalk_sockets = sk; - sk->sk_pprev = &atalk_sockets; + __atalk_insert_socket(sk); write_unlock_bh(&atalk_sockets_lock); } -#endif static inline void atalk_remove_socket(struct sock *sk) { write_lock_bh(&atalk_sockets_lock); - if (sk->sk_pprev) { - if (sk->sk_next) - sk->sk_next->sk_pprev = sk->sk_pprev; - *sk->sk_pprev = sk->sk_next; - sk->sk_pprev = NULL; - } + sk_del_node_init(sk); write_unlock_bh(&atalk_sockets_lock); } @@ -117,9 +111,10 @@ struct atalk_iface *atif) { struct sock *s; + struct hlist_node *node; read_lock_bh(&atalk_sockets_lock); - for (s = atalk_sockets; s; s = s->sk_next) { + sk_for_each(s, node, &atalk_sockets) { struct atalk_sock *at = at_sk(s); if (to->sat_port != at->src_port) @@ -128,13 +123,13 @@ if (to->sat_addr.s_net == ATADDR_ANYNET && to->sat_addr.s_node == ATADDR_BCAST && at->src_net == atif->address.s_net) - break; + goto found; if (to->sat_addr.s_net == at->src_net && (to->sat_addr.s_node == at->src_node || to->sat_addr.s_node == ATADDR_BCAST || to->sat_addr.s_node == ATADDR_ANYNODE)) - break; + goto found; /* XXXX.0 -- we got a request for this router. make sure * that the node is appropriately set. */ @@ -142,9 +137,11 @@ to->sat_addr.s_net != ATADDR_ANYNET && atif->address.s_node == at->src_node) { to->sat_addr.s_node = atif->address.s_node; - break; + goto found; } } + s = NULL; +found: read_unlock_bh(&atalk_sockets_lock); return s; } @@ -163,26 +160,21 @@ struct sockaddr_at *sat) { struct sock *s; + struct hlist_node *node; + struct atalk_sock *at; write_lock_bh(&atalk_sockets_lock); - for (s = atalk_sockets; s; s = s->sk_next) { - struct atalk_sock *at = at_sk(s); + sk_for_each(s, node, &atalk_sockets) { + at = at_sk(s); if (at->src_net == sat->sat_addr.s_net && at->src_node == sat->sat_addr.s_node && at->src_port == sat->sat_port) - break; - } - - if (!s) { - /* Wheee, it's free, assign and insert. */ - sk->sk_next = atalk_sockets; - if (sk->sk_next) - atalk_sockets->sk_pprev = &sk->sk_next; - atalk_sockets = sk; - sk->sk_pprev = &atalk_sockets; + goto found; } - + s = NULL; + __atalk_insert_socket(sk); /* Wheee, it's free, assign and insert. */ +found: write_unlock_bh(&atalk_sockets_lock); return s; } @@ -1028,7 +1020,6 @@ */ static int atalk_pick_and_bind_port(struct sock *sk, struct sockaddr_at *sat) { - struct sock *s; int retval; write_lock_bh(&atalk_sockets_lock); @@ -1036,7 +1027,10 @@ for (sat->sat_port = ATPORT_RESERVED; sat->sat_port < ATPORT_LAST; sat->sat_port++) { - for (s = atalk_sockets; s; s = s->sk_next) { + struct sock *s; + struct hlist_node *node; + + sk_for_each(s, node, &atalk_sockets) { struct atalk_sock *at = at_sk(s); if (at->src_net == sat->sat_addr.s_net && @@ -1046,11 +1040,7 @@ } /* Wheee, it's free, assign and insert. */ - sk->sk_next = atalk_sockets; - if (sk->sk_next) - atalk_sockets->sk_pprev = &sk->sk_next; - atalk_sockets = sk; - sk->sk_pprev = &atalk_sockets; + __atalk_insert_socket(sk); at_sk(sk)->src_port = sat->sat_port; retval = 0; goto out; diff -urN linux-2.5.71-bk2/net/ax25/af_ax25.c linux-2.5.72/net/ax25/af_ax25.c --- linux-2.5.71-bk2/net/ax25/af_ax25.c 2003-06-14 12:18:33.000000000 -0700 +++ linux-2.5.72/net/ax25/af_ax25.c 2003-06-16 21:54:53.000000000 -0700 @@ -275,8 +275,9 @@ void ax25_send_to_raw(struct sock *sk, struct sk_buff *skb, int proto) { struct sk_buff *copy; + struct hlist_node *node; - while (sk != NULL) { + sk_for_each_from(sk, node) if (sk->sk_type == SOCK_RAW && sk->sk_protocol == proto && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) { @@ -286,9 +287,6 @@ if (sock_queue_rcv_skb(sk, copy) != 0) kfree_skb(copy); } - - sk = sk->sk_next; - } } /* diff -urN linux-2.5.71-bk2/net/bluetooth/af_bluetooth.c linux-2.5.72/net/bluetooth/af_bluetooth.c --- linux-2.5.71-bk2/net/bluetooth/af_bluetooth.c 2003-06-14 12:17:59.000000000 -0700 +++ linux-2.5.72/net/bluetooth/af_bluetooth.c 2003-06-16 21:54:53.000000000 -0700 @@ -142,24 +142,16 @@ void bt_sock_link(struct bt_sock_list *l, struct sock *sk) { write_lock_bh(&l->lock); - sk->sk_next = l->head; - l->head = sk; + sk_add_node(sk, &l->head); sock_hold(sk); write_unlock_bh(&l->lock); } void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) { - struct sock **skp; - write_lock_bh(&l->lock); - for (skp = &l->head; *skp; skp = &((*skp)->sk_next)) { - if (*skp == sk) { - *skp = sk->sk_next; - __sock_put(sk); - break; - } - } + if (sk_del_node_init(sk)) + __sock_put(sk); write_unlock_bh(&l->lock); } diff -urN linux-2.5.71-bk2/net/bluetooth/hci_sock.c linux-2.5.72/net/bluetooth/hci_sock.c --- linux-2.5.71-bk2/net/bluetooth/hci_sock.c 2003-06-14 12:18:22.000000000 -0700 +++ linux-2.5.72/net/bluetooth/hci_sock.c 2003-06-16 21:54:53.000000000 -0700 @@ -91,11 +91,12 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) { struct sock *sk; + struct hlist_node *node; BT_DBG("hdev %p len %d", hdev, skb->len); read_lock(&hci_sk_list.lock); - for (sk = hci_sk_list.head; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &hci_sk_list.head) { struct hci_filter *flt; struct sk_buff *nskb; @@ -607,10 +608,11 @@ if (event == HCI_DEV_UNREG) { struct sock *sk; + struct hlist_node *node; /* Detach sockets from device */ read_lock(&hci_sk_list.lock); - for (sk = hci_sk_list.head; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &hci_sk_list.head) { bh_lock_sock(sk); if (hci_pi(sk)->hdev == hdev) { hci_pi(sk)->hdev = NULL; diff -urN linux-2.5.71-bk2/net/bluetooth/l2cap.c linux-2.5.72/net/bluetooth/l2cap.c --- linux-2.5.71-bk2/net/bluetooth/l2cap.c 2003-06-14 12:18:25.000000000 -0700 +++ linux-2.5.72/net/bluetooth/l2cap.c 2003-06-16 21:54:53.000000000 -0700 @@ -186,10 +186,12 @@ static struct sock *__l2cap_get_sock_by_addr(u16 psm, bdaddr_t *src) { struct sock *sk; - for (sk = l2cap_sk_list.head; sk; sk = sk->sk_next) { + struct hlist_node *node; + sk_for_each(sk, node, &l2cap_sk_list.head) if (l2cap_pi(sk)->sport == psm && !bacmp(&bt_sk(sk)->src, src)) - break; - } + goto found; + sk = NULL; +found: return sk; } @@ -199,8 +201,9 @@ static struct sock *__l2cap_get_sock_by_psm(int state, u16 psm, bdaddr_t *src) { struct sock *sk, *sk1 = NULL; + struct hlist_node *node; - for (sk = l2cap_sk_list.head; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &l2cap_sk_list.head) { if (state && sk->sk_state != state) continue; @@ -214,7 +217,7 @@ sk1 = sk; } } - return sk ? sk : sk1; + return node ? sk : sk1; } /* Find socket with given address (psm, src). @@ -1773,6 +1776,7 @@ { int exact = 0, lm1 = 0, lm2 = 0; register struct sock *sk; + struct hlist_node *node; if (type != ACL_LINK) return 0; @@ -1781,7 +1785,7 @@ /* Find listening sockets and check their link_mode */ read_lock(&l2cap_sk_list.lock); - for (sk = l2cap_sk_list.head; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &l2cap_sk_list.head) { if (sk->sk_state != BT_LISTEN) continue; @@ -2004,21 +2008,23 @@ static void *l2cap_seq_start(struct seq_file *seq, loff_t *pos) { struct sock *sk; + struct hlist_node *node; loff_t l = *pos; read_lock_bh(&l2cap_sk_list.lock); - for (sk = l2cap_sk_list.head; sk; sk = sk->sk_next) + sk_for_each(sk, node, &l2cap_sk_list.head) if (!l--) - return sk; - return NULL; + goto found; + sk = NULL; +found: + return sk; } static void *l2cap_seq_next(struct seq_file *seq, void *e, loff_t *pos) { - struct sock *sk = e; (*pos)++; - return sk->sk_next; + return sk_next(e); } static void l2cap_seq_stop(struct seq_file *seq, void *e) diff -urN linux-2.5.71-bk2/net/bluetooth/rfcomm/sock.c linux-2.5.72/net/bluetooth/rfcomm/sock.c --- linux-2.5.71-bk2/net/bluetooth/rfcomm/sock.c 2003-06-14 12:18:24.000000000 -0700 +++ linux-2.5.72/net/bluetooth/rfcomm/sock.c 2003-06-16 21:54:53.000000000 -0700 @@ -115,14 +115,15 @@ static struct sock *__rfcomm_get_sock_by_addr(u8 channel, bdaddr_t *src) { struct sock *sk; + struct hlist_node *node; - for (sk = rfcomm_sk_list.head; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &rfcomm_sk_list.head) { if (rfcomm_pi(sk)->channel == channel && !bacmp(&bt_sk(sk)->src, src)) break; } - return sk; + return node ? sk : NULL; } /* Find socket with channel and source bdaddr. @@ -131,8 +132,9 @@ static struct sock *__rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src) { struct sock *sk, *sk1 = NULL; + struct hlist_node *node; - for (sk = rfcomm_sk_list.head; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &rfcomm_sk_list.head) { if (state && sk->sk_state != state) continue; @@ -146,7 +148,7 @@ sk1 = sk; } } - return sk ? sk : sk1; + return node ? sk : sk1; } /* Find socket with given address (channel, src). @@ -775,11 +777,12 @@ static void *rfcomm_seq_start(struct seq_file *seq, loff_t *pos) { struct sock *sk; + struct hlist_node *node; loff_t l = *pos; read_lock_bh(&rfcomm_sk_list.lock); - for (sk = rfcomm_sk_list.head; sk; sk = sk->sk_next) + sk_for_each(sk, node, &rfcomm_sk_list.head) if (!l--) return sk; return NULL; @@ -789,7 +792,7 @@ { struct sock *sk = e; (*pos)++; - return sk->sk_next; + return sk_next(sk); } static void rfcomm_seq_stop(struct seq_file *seq, void *e) diff -urN linux-2.5.71-bk2/net/bluetooth/sco.c linux-2.5.72/net/bluetooth/sco.c --- linux-2.5.71-bk2/net/bluetooth/sco.c 2003-06-14 12:18:04.000000000 -0700 +++ linux-2.5.72/net/bluetooth/sco.c 2003-06-16 21:54:53.000000000 -0700 @@ -298,12 +298,13 @@ static struct sock *__sco_get_sock_by_addr(bdaddr_t *ba) { struct sock *sk; + struct hlist_node *node; - for (sk = sco_sk_list.head; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &sco_sk_list.head) if (!bacmp(&bt_sk(sk)->src, ba)) - break; - } - + goto found; + sk = NULL; +found: return sk; } @@ -312,11 +313,12 @@ */ static struct sock *sco_get_sock_listen(bdaddr_t *src) { - struct sock *sk, *sk1 = NULL; + struct sock *sk = NULL, *sk1 = NULL; + struct hlist_node *node; read_lock(&sco_sk_list.lock); - for (sk = sco_sk_list.head; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &sco_sk_list.head) { if (sk->sk_state != BT_LISTEN) continue; @@ -331,7 +333,7 @@ read_unlock(&sco_sk_list.lock); - return sk ? sk : sk1; + return node ? sk : sk1; } static void sco_sock_destruct(struct sock *sk) @@ -884,21 +886,24 @@ static void *sco_seq_start(struct seq_file *seq, loff_t *pos) { struct sock *sk; + struct hlist_node *node; loff_t l = *pos; read_lock_bh(&sco_sk_list.lock); - for (sk = sco_sk_list.head; sk; sk = sk->sk_next) + sk_for_each(sk, node, &sco_sk_list.head) if (!l--) - return sk; - return NULL; + goto found; + sk = NULL; +found: + return sk; } static void *sco_seq_next(struct seq_file *seq, void *e, loff_t *pos) { struct sock *sk = e; (*pos)++; - return sk->sk_next; + return sk_next(sk); } static void sco_seq_stop(struct seq_file *seq, void *e) diff -urN linux-2.5.71-bk2/net/decnet/af_decnet.c linux-2.5.72/net/decnet/af_decnet.c --- linux-2.5.71-bk2/net/decnet/af_decnet.c 2003-06-16 21:54:50.000000000 -0700 +++ linux-2.5.72/net/decnet/af_decnet.c 2003-06-16 21:54:53.000000000 -0700 @@ -152,18 +152,18 @@ static kmem_cache_t *dn_sk_cachep; static struct proto_ops dn_proto_ops; static rwlock_t dn_hash_lock = RW_LOCK_UNLOCKED; -static struct sock *dn_sk_hash[DN_SK_HASH_SIZE]; -static struct sock *dn_wild_sk; +static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE]; +static struct hlist_head dn_wild_sk; static int __dn_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen, int flags); static int __dn_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen, int flags); -static struct sock **dn_find_list(struct sock *sk) +static struct hlist_head *dn_find_list(struct sock *sk) { struct dn_scp *scp = DN_SK(sk); if (scp->addr.sdn_flags & SDF_WILD) - return dn_wild_sk ? NULL : &dn_wild_sk; + return hlist_empty(&dn_wild_sk) ? NULL : &dn_wild_sk; return &dn_sk_hash[scp->addrloc & DN_SK_HASH_MASK]; } @@ -173,14 +173,16 @@ */ static int check_port(unsigned short port) { - struct sock *sk = dn_sk_hash[port & DN_SK_HASH_MASK]; + struct sock *sk; + struct hlist_node *node; + if (port == 0) return -1; - while(sk) { + + sk_for_each(sk, node, &dn_sk_hash[port & DN_SK_HASH_MASK]) { struct dn_scp *scp = DN_SK(sk); if (scp->addrloc == port) return -1; - sk = sk->sk_next; } return 0; } @@ -209,13 +211,10 @@ static int dn_hash_sock(struct sock *sk) { struct dn_scp *scp = DN_SK(sk); - struct sock **skp; + struct hlist_head *list; int rv = -EUSERS; - if (sk->sk_next) - BUG(); - if (sk->sk_pprev) - BUG(); + BUG_ON(sk_hashed(sk)); write_lock_bh(&dn_hash_lock); @@ -223,12 +222,10 @@ goto out; rv = -EADDRINUSE; - if ((skp = dn_find_list(sk)) == NULL) + if ((list = dn_find_list(sk)) == NULL) goto out; - sk->sk_next = *skp; - sk->sk_pprev = skp; - *skp = sk; + sk_add_node(sk, list); rv = 0; out: write_unlock_bh(&dn_hash_lock); @@ -237,39 +234,19 @@ static void dn_unhash_sock(struct sock *sk) { - struct sock **skp = sk->sk_pprev; - - if (skp == NULL) - return; - write_lock(&dn_hash_lock); - while(*skp != sk) - skp = &((*skp)->sk_next); - *skp = sk->sk_next; + sk_del_node_init(sk); write_unlock(&dn_hash_lock); - - sk->sk_next = NULL; - sk->sk_pprev = NULL; } static void dn_unhash_sock_bh(struct sock *sk) { - struct sock **skp = sk->sk_pprev; - - if (skp == NULL) - return; - write_lock_bh(&dn_hash_lock); - while(*skp != sk) - skp = &((*skp)->sk_next); - *skp = sk->sk_next; + sk_del_node_init(sk); write_unlock_bh(&dn_hash_lock); - - sk->sk_next = NULL; - sk->sk_pprev = NULL; } -struct sock **listen_hash(struct sockaddr_dn *addr) +struct hlist_head *listen_hash(struct sockaddr_dn *addr) { int i; unsigned hash = addr->sdn_objnum; @@ -292,23 +269,17 @@ */ static void dn_rehash_sock(struct sock *sk) { - struct sock **skp = sk->sk_pprev; + struct hlist_head *list; struct dn_scp *scp = DN_SK(sk); if (scp->addr.sdn_flags & SDF_WILD) return; write_lock_bh(&dn_hash_lock); - while(*skp != sk) - skp = &((*skp)->sk_next); - *skp = sk->sk_next; - + hlist_del(&sk->sk_node); DN_SK(sk)->addrloc = 0; - skp = listen_hash(&DN_SK(sk)->addr); - - sk->sk_next = *skp; - sk->sk_pprev = skp; - *skp = sk; + list = listen_hash(&DN_SK(sk)->addr); + sk_add_node(sk, list); write_unlock_bh(&dn_hash_lock); } @@ -401,11 +372,12 @@ struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr) { - struct sock **skp = listen_hash(addr); + struct hlist_head *list = listen_hash(addr); + struct hlist_node *node; struct sock *sk; read_lock(&dn_hash_lock); - for(sk = *skp; sk; sk = sk->sk_next) { + sk_for_each(sk, node, list) { struct dn_scp *scp = DN_SK(sk); if (sk->sk_state != TCP_LISTEN) continue; @@ -425,8 +397,13 @@ return sk; } - if (dn_wild_sk && (dn_wild_sk->sk_state == TCP_LISTEN)) - sock_hold((sk = dn_wild_sk)); + sk = sk_head(&dn_wild_sk); + if (sk) { + if (sk->sk_state == TCP_LISTEN) + sock_hold(sk); + else + sk = NULL; + } read_unlock(&dn_hash_lock); return sk; @@ -436,11 +413,11 @@ { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct sock *sk; + struct hlist_node *node; struct dn_scp *scp; read_lock(&dn_hash_lock); - sk = dn_sk_hash[cb->dst_port & DN_SK_HASH_MASK]; - for (; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &dn_sk_hash[cb->dst_port & DN_SK_HASH_MASK]) { scp = DN_SK(sk); if (cb->src != dn_saddr2dn(&scp->peer)) continue; @@ -448,14 +425,12 @@ continue; if (scp->addrrem && (cb->src_port != scp->addrrem)) continue; - break; - } - - if (sk) sock_hold(sk); - + goto found; + } + sk = NULL; +found: read_unlock(&dn_hash_lock); - return sk; } @@ -2122,7 +2097,7 @@ for(state->bucket = 0; state->bucket < DN_SK_HASH_SIZE; ++state->bucket) { - n = dn_sk_hash[state->bucket]; + n = sk_head(&dn_sk_hash[state->bucket]); if (n) break; } @@ -2135,13 +2110,13 @@ { struct dn_iter_state *state = seq->private; - n = n->sk_next; + n = sk_next(n); try_again: if (n) goto out; if (++state->bucket >= DN_SK_HASH_SIZE) goto out; - n = dn_sk_hash[state->bucket]; + n = sk_head(&dn_sk_hash[state->bucket]); goto try_again; out: return n; diff -urN linux-2.5.71-bk2/net/econet/af_econet.c linux-2.5.72/net/econet/af_econet.c --- linux-2.5.71-bk2/net/econet/af_econet.c 2003-06-14 12:18:21.000000000 -0700 +++ linux-2.5.72/net/econet/af_econet.c 2003-06-16 21:54:53.000000000 -0700 @@ -45,7 +45,7 @@ #include static struct proto_ops econet_ops; -static struct sock *econet_sklist; +static struct hlist_head econet_sklist; static rwlock_t econet_lock = RW_LOCK_UNLOCKED; /* Since there are only 256 possible network numbers (or fewer, depends @@ -93,29 +93,18 @@ #endif }; -static void econet_remove_socket(struct sock **list, struct sock *sk) +static void econet_remove_socket(struct hlist_head *list, struct sock *sk) { - struct sock *s; - write_lock_bh(&econet_lock); - - while ((s = *list) != NULL) { - if (s == sk) { - *list = s->sk_next; - break; - } - list = &s->sk_next; - } - + if (sk_del_node_init(sk)) + sock_put(sk); write_unlock_bh(&econet_lock); - if (s) - sock_put(s); } -static void econet_insert_socket(struct sock **list, struct sock *sk) +static void econet_insert_socket(struct hlist_head *list, struct sock *sk) { write_lock_bh(&econet_lock); - sk->sk_next = *list; + sk_add_node(sk, list); sock_hold(sk); write_unlock_bh(&econet_lock); } @@ -726,20 +715,19 @@ static struct sock *ec_listening_socket(unsigned char port, unsigned char station, unsigned char net) { - struct sock *sk = econet_sklist; + struct sock *sk; + struct hlist_node *node; - while (sk) - { + sk_for_each(sk, node, &econet_sklist) { struct econet_opt *opt = ec_sk(sk); if ((opt->port == port || opt->port == 0) && (opt->station == station || opt->station == 0) && (opt->net == net || opt->net == 0)) - return sk; - - sk = sk->sk_next; + goto found; } - - return NULL; + sk = NULL; +found: + return sk; } /* diff -urN linux-2.5.71-bk2/net/ipv4/icmp.c linux-2.5.72/net/ipv4/icmp.c --- linux-2.5.71-bk2/net/ipv4/icmp.c 2003-06-14 12:18:34.000000000 -0700 +++ linux-2.5.72/net/ipv4/icmp.c 2003-06-16 21:54:53.000000000 -0700 @@ -680,12 +680,12 @@ /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ hash = protocol & (MAX_INET_PROTOS - 1); read_lock(&raw_v4_lock); - if ((raw_sk = raw_v4_htable[hash]) != NULL) { + if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) { while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, iph->saddr, skb->dev->ifindex)) != NULL) { raw_err(raw_sk, skb, info); - raw_sk = raw_sk->sk_next; + raw_sk = sk_next(raw_sk); iph = (struct iphdr *)skb->data; } } diff -urN linux-2.5.71-bk2/net/ipv4/ip_input.c linux-2.5.72/net/ipv4/ip_input.c --- linux-2.5.71-bk2/net/ipv4/ip_input.c 2003-06-14 12:17:57.000000000 -0700 +++ linux-2.5.72/net/ipv4/ip_input.c 2003-06-16 21:54:53.000000000 -0700 @@ -225,7 +225,7 @@ resubmit: hash = protocol & (MAX_INET_PROTOS - 1); - raw_sk = raw_v4_htable[hash]; + raw_sk = sk_head(&raw_v4_htable[hash]); /* If there maybe a raw socket we must check - if not we * don't care less diff -urN linux-2.5.71-bk2/net/ipv4/raw.c linux-2.5.72/net/ipv4/raw.c --- linux-2.5.71-bk2/net/ipv4/raw.c 2003-06-14 12:18:22.000000000 -0700 +++ linux-2.5.72/net/ipv4/raw.c 2003-06-16 21:54:53.000000000 -0700 @@ -80,19 +80,16 @@ #include #include -struct sock *raw_v4_htable[RAWV4_HTABLE_SIZE]; +struct hlist_head raw_v4_htable[RAWV4_HTABLE_SIZE]; rwlock_t raw_v4_lock = RW_LOCK_UNLOCKED; static void raw_v4_hash(struct sock *sk) { - struct sock **skp = &raw_v4_htable[inet_sk(sk)->num & - (RAWV4_HTABLE_SIZE - 1)]; + struct hlist_head *head = &raw_v4_htable[inet_sk(sk)->num & + (RAWV4_HTABLE_SIZE - 1)]; write_lock_bh(&raw_v4_lock); - if ((sk->sk_next = *skp) != NULL) - (*skp)->sk_pprev = &sk->sk_next; - *skp = sk; - sk->sk_pprev = skp; + sk_add_node(sk, head); sock_prot_inc_use(sk->sk_prot); sock_hold(sk); write_unlock_bh(&raw_v4_lock); @@ -101,11 +98,7 @@ static void raw_v4_unhash(struct sock *sk) { write_lock_bh(&raw_v4_lock); - if (sk->sk_pprev) { - if (sk->sk_next) - sk->sk_next->sk_pprev = sk->sk_pprev; - *sk->sk_pprev = sk->sk_next; - sk->sk_pprev = NULL; + if (sk_del_node_init(sk)) { sock_prot_dec_use(sk->sk_prot); __sock_put(sk); } @@ -116,18 +109,20 @@ unsigned long raddr, unsigned long laddr, int dif) { - struct sock *s = sk; + struct hlist_node *node; - for (; s; s = s->sk_next) { - struct inet_opt *inet = inet_sk(s); + sk_for_each_from(sk, node) { + struct inet_opt *inet = inet_sk(sk); if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && !(inet->rcv_saddr && inet->rcv_saddr != laddr) && - !(s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) - break; /* gotcha */ + !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + goto found; /* gotcha */ } - return s; + sk = NULL; +found: + return sk; } /* @@ -158,11 +153,13 @@ void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) { struct sock *sk; + struct hlist_head *head; read_lock(&raw_v4_lock); - if ((sk = raw_v4_htable[hash]) == NULL) + head = &raw_v4_htable[hash]; + if (hlist_empty(head)) goto out; - sk = __raw_v4_lookup(sk, iph->protocol, + sk = __raw_v4_lookup(__sk_head(head), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); @@ -174,7 +171,7 @@ if (clone) raw_rcv(sk, clone); } - sk = __raw_v4_lookup(sk->sk_next, iph->protocol, + sk = __raw_v4_lookup(sk_next(sk), iph->protocol, iph->saddr, iph->daddr, skb->dev->ifindex); } @@ -697,16 +694,18 @@ static struct sock *raw_get_first(struct seq_file *seq) { - struct sock *sk = NULL; + struct sock *sk; struct raw_iter_state* state = raw_seq_private(seq); for (state->bucket = 0; state->bucket < RAWV4_HTABLE_SIZE; ++state->bucket) { - sk = raw_v4_htable[state->bucket]; - while (sk && sk->sk_family != PF_INET) - sk = sk->sk_next; - if (sk) - break; + struct hlist_node *node; + + sk_for_each(sk, node, &raw_v4_htable[state->bucket]) + if (sk->sk_family == PF_INET) + goto found; } + sk = NULL; +found: return sk; } @@ -715,13 +714,13 @@ struct raw_iter_state* state = raw_seq_private(seq); do { - sk = sk->sk_next; + sk = sk_next(sk); try_again: ; } while (sk && sk->sk_family != PF_INET); if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) { - sk = raw_v4_htable[state->bucket]; + sk = sk_head(&raw_v4_htable[state->bucket]); goto try_again; } return sk; diff -urN linux-2.5.71-bk2/net/ipv4/route.c linux-2.5.72/net/ipv4/route.c --- linux-2.5.71-bk2/net/ipv4/route.c 2003-06-16 21:54:50.000000000 -0700 +++ linux-2.5.72/net/ipv4/route.c 2003-06-16 21:54:53.000000000 -0700 @@ -111,7 +111,7 @@ int ip_rt_max_size; int ip_rt_gc_timeout = RT_GC_TIMEOUT; int ip_rt_gc_interval = 60 * HZ; -int ip_rt_gc_min_interval = 5 * HZ; +int ip_rt_gc_min_interval = HZ / 2; int ip_rt_redirect_number = 9; int ip_rt_redirect_load = HZ / 50; int ip_rt_redirect_silence = ((HZ / 50) << (9 + 1)); @@ -456,6 +456,25 @@ out: return ret; } +/* Bits of score are: + * 31: very valuable + * 30: not quite useless + * 29..0: usage counter + */ +static inline u32 rt_score(struct rtable *rt) +{ + u32 score = rt->u.dst.__use; + + if (rt_valuable(rt)) + score |= (1<<31); + + if (!rt->fl.iif || + !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL))) + score |= (1<<30); + + return score; +} + /* This runs via a timer and thus is always in BH context. */ static void rt_check_expire(unsigned long dummy) { @@ -720,10 +739,19 @@ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) { struct rtable *rth, **rthp; - unsigned long now = jiffies; + unsigned long now; + struct rtable *cand, **candp; + u32 min_score; + int chain_length; int attempts = !in_softirq(); restart: + chain_length = 0; + min_score = ~(u32)0; + cand = NULL; + candp = NULL; + now = jiffies; + rthp = &rt_hash_table[hash].chain; spin_lock_bh(&rt_hash_table[hash].lock); @@ -755,9 +783,35 @@ return 0; } + if (!atomic_read(&rth->u.dst.__refcnt)) { + u32 score = rt_score(rth); + + if (score <= min_score) { + cand = rth; + candp = rthp; + min_score = score; + } + } + + chain_length++; + rthp = &rth->u.rt_next; } + if (cand) { + /* ip_rt_gc_elasticity used to be average length of chain + * length, when exceeded gc becomes really aggressive. + * + * The second limit is less certain. At the moment it allows + * only 2 entries per bucket. We will see. + */ + if (chain_length > ip_rt_gc_elasticity || + (chain_length > 1 && !(min_score & (1<<31)))) { + *candp = cand->u.rt_next; + rt_free(cand); + } + } + /* Try to bind route to arp only if it is output route or unicast forwarding path. */ diff -urN linux-2.5.71-bk2/net/ipv4/tcp.c linux-2.5.72/net/ipv4/tcp.c --- linux-2.5.71-bk2/net/ipv4/tcp.c 2003-06-14 12:18:05.000000000 -0700 +++ linux-2.5.72/net/ipv4/tcp.c 2003-06-16 21:54:53.000000000 -0700 @@ -1906,7 +1906,7 @@ BUG_TRAP(sock_flag(sk, SOCK_DEAD)); /* It cannot be in hash table! */ - BUG_TRAP(!sk->sk_pprev); + BUG_TRAP(sk_unhashed(sk)); /* If it has not 0 inet_sk(sk)->num, it must be bound */ BUG_TRAP(!inet_sk(sk)->num || sk->sk_prev); @@ -2625,7 +2625,7 @@ panic("Failed to allocate TCP established hash table\n"); for (i = 0; i < (tcp_ehash_size << 1); i++) { tcp_ehash[i].lock = RW_LOCK_UNLOCKED; - tcp_ehash[i].chain = NULL; + INIT_HLIST_HEAD(&tcp_ehash[i].chain); } do { @@ -2641,7 +2641,7 @@ panic("Failed to allocate TCP bind hash table\n"); for (i = 0; i < tcp_bhash_size; i++) { tcp_bhash[i].lock = SPIN_LOCK_UNLOCKED; - tcp_bhash[i].chain = NULL; + INIT_HLIST_HEAD(&tcp_bhash[i].chain); } /* Try to be a bit smarter and adjust defaults depending diff -urN linux-2.5.71-bk2/net/ipv4/tcp_diag.c linux-2.5.72/net/ipv4/tcp_diag.c --- linux-2.5.71-bk2/net/ipv4/tcp_diag.c 2003-06-14 12:17:56.000000000 -0700 +++ linux-2.5.72/net/ipv4/tcp_diag.c 2003-06-16 21:54:53.000000000 -0700 @@ -461,13 +461,13 @@ tcp_listen_lock(); for (i = s_i; i < TCP_LHTABLE_SIZE; i++) { struct sock *sk; + struct hlist_node *node; if (i > s_i) s_num = 0; - for (sk = tcp_listening_hash[i], num = 0; - sk != NULL; - sk = sk->sk_next, num++) { + num = 0; + sk_for_each(sk, node, &tcp_listening_hash[i]) { struct inet_opt *inet = inet_sk(sk); if (num < s_num) continue; @@ -485,6 +485,7 @@ tcp_listen_unlock(); goto done; } + ++num; } } tcp_listen_unlock(); @@ -499,15 +500,15 @@ for (i = s_i; i < tcp_ehash_size; i++) { struct tcp_ehash_bucket *head = &tcp_ehash[i]; struct sock *sk; + struct hlist_node *node; if (i > s_i) s_num = 0; read_lock_bh(&head->lock); - for (sk = head->chain, num = 0; - sk != NULL; - sk = sk->sk_next, num++) { + num = 0; + sk_for_each(sk, node, &head->chain) { struct inet_opt *inet = inet_sk(sk); if (num < s_num) @@ -527,12 +528,12 @@ read_unlock_bh(&head->lock); goto done; } + ++num; } if (r->tcpdiag_states&TCPF_TIME_WAIT) { - for (sk = tcp_ehash[i+tcp_ehash_size].chain; - sk != NULL; - sk = sk->sk_next, num++) { + sk_for_each(sk, node, + &tcp_ehash[i + tcp_ehash_size].chain) { struct inet_opt *inet = inet_sk(sk); if (num < s_num) @@ -553,6 +554,7 @@ read_unlock_bh(&head->lock); goto done; } + ++num; } } read_unlock_bh(&head->lock); diff -urN linux-2.5.71-bk2/net/ipv4/tcp_ipv4.c linux-2.5.72/net/ipv4/tcp_ipv4.c --- linux-2.5.71-bk2/net/ipv4/tcp_ipv4.c 2003-06-16 21:54:50.000000000 -0700 +++ linux-2.5.72/net/ipv4/tcp_ipv4.c 2003-06-16 21:54:53.000000000 -0700 @@ -133,11 +133,8 @@ if (tb) { tb->port = snum; tb->fastreuse = 0; - tb->owners = NULL; - if ((tb->next = head->chain) != NULL) - tb->next->pprev = &tb->next; - head->chain = tb; - tb->pprev = &head->chain; + INIT_HLIST_HEAD(&tb->owners); + hlist_add_head(&tb->node, &head->chain); } return tb; } @@ -145,10 +142,8 @@ /* Caller must hold hashbucket lock for this tb with local BH disabled */ void tcp_bucket_destroy(struct tcp_bind_bucket *tb) { - if (!tb->owners) { - if (tb->next) - tb->next->pprev = tb->pprev; - *(tb->pprev) = tb->next; + if (hlist_empty(&tb->owners)) { + __hlist_del(&tb->node); kmem_cache_free(tcp_bucket_cachep, tb); } } @@ -162,10 +157,7 @@ spin_lock(&head->lock); tb = (struct tcp_bind_bucket *)sk->sk_prev; - if ((child->sk_bind_next = tb->owners) != NULL) - tb->owners->sk_bind_pprev = &child->sk_bind_next; - tb->owners = child; - child->sk_bind_pprev = &tb->owners; + sk_add_bind_node(child, &tb->owners); child->sk_prev = (struct sock *)tb; spin_unlock(&head->lock); } @@ -181,20 +173,18 @@ unsigned short snum) { inet_sk(sk)->num = snum; - if ((sk->sk_bind_next = tb->owners) != NULL) - tb->owners->sk_bind_pprev = &sk->sk_bind_next; - tb->owners = sk; - sk->sk_bind_pprev = &tb->owners; + sk_add_bind_node(sk, &tb->owners); sk->sk_prev = (struct sock *)tb; } static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) { struct inet_opt *inet = inet_sk(sk); - struct sock *sk2 = tb->owners; + struct sock *sk2; + struct hlist_node *node; int reuse = sk->sk_reuse; - for (; sk2; sk2 = sk2->sk_bind_next) { + sk_for_each_bound(sk2, node, &tb->owners) { if (sk != sk2 && !ipv6_only_sock(sk2) && sk->sk_bound_dev_if == sk2->sk_bound_dev_if) { @@ -207,7 +197,7 @@ } } } - return sk2 != NULL; + return node != NULL; } /* Obtain a reference to a local port for the given sock, @@ -216,6 +206,7 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) { struct tcp_bind_hashbucket *head; + struct hlist_node *node; struct tcp_bind_bucket *tb; int ret; @@ -234,7 +225,7 @@ rover = low; head = &tcp_bhash[tcp_bhashfn(rover)]; spin_lock(&head->lock); - for (tb = head->chain; tb; tb = tb->next) + tb_for_each(tb, node, &head->chain) if (tb->port == rover) goto next; break; @@ -253,15 +244,17 @@ * non-NULL and we hold it's mutex. */ snum = rover; - tb = NULL; } else { head = &tcp_bhash[tcp_bhashfn(snum)]; spin_lock(&head->lock); - for (tb = head->chain; tb; tb = tb->next) + tb_for_each(tb, node, &head->chain) if (tb->port == snum) - break; + goto tb_found; } - if (tb && tb->owners) { + tb = NULL; + goto tb_not_found; +tb_found: + if (!hlist_empty(&tb->owners)) { if (sk->sk_reuse > 1) goto success; if (tb->fastreuse > 0 && @@ -273,10 +266,11 @@ goto fail_unlock; } } +tb_not_found: ret = 1; if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) goto fail_unlock; - if (!tb->owners) { + if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) tb->fastreuse = 1; else @@ -308,9 +302,7 @@ spin_lock(&head->lock); tb = (struct tcp_bind_bucket *)sk->sk_prev; - if (sk->sk_bind_next) - sk->sk_bind_next->sk_bind_pprev = sk->sk_bind_pprev; - *(sk->sk_bind_pprev) = sk->sk_bind_next; + __hlist_del(&sk->sk_bind_node); sk->sk_prev = NULL; inet->num = 0; tcp_bucket_destroy(tb); @@ -354,23 +346,20 @@ static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) { - struct sock **skp; + struct hlist_head *list; rwlock_t *lock; - BUG_TRAP(!sk->sk_pprev); + BUG_TRAP(sk_unhashed(sk)); if (listen_possible && sk->sk_state == TCP_LISTEN) { - skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; + list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; lock = &tcp_lhash_lock; tcp_listen_wlock(); } else { - skp = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain; + list = &tcp_ehash[(sk->sk_hashent = tcp_sk_hashfn(sk))].chain; lock = &tcp_ehash[sk->sk_hashent].lock; write_lock(lock); } - if ((sk->sk_next = *skp) != NULL) - (*skp)->sk_pprev = &sk->sk_next; - *skp = sk; - sk->sk_pprev = skp; + sk_add_node(sk, list); sock_prot_inc_use(sk->sk_prot); write_unlock(lock); if (listen_possible && sk->sk_state == TCP_LISTEN) @@ -390,7 +379,7 @@ { rwlock_t *lock; - if (!sk->sk_pprev) + if (sk_unhashed(sk)) goto ende; if (sk->sk_state == TCP_LISTEN) { @@ -403,13 +392,8 @@ write_lock_bh(&head->lock); } - if (sk->sk_pprev) { - if (sk->sk_next) - sk->sk_next->sk_pprev = sk->sk_pprev; - *sk->sk_pprev = sk->sk_next; - sk->sk_pprev = NULL; + if (sk_del_node_init(sk)) sock_prot_dec_use(sk->sk_prot); - } write_unlock_bh(lock); ende: @@ -423,14 +407,15 @@ * connection. So always assume those are both wildcarded * during the search since they can never be otherwise. */ -static struct sock *__tcp_v4_lookup_listener(struct sock *sk, u32 daddr, +static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr, unsigned short hnum, int dif) { - struct sock *result = NULL; + struct sock *result = NULL, *sk; + struct hlist_node *node; int score, hiscore; hiscore=-1; - for (; sk; sk = sk->sk_next) { + sk_for_each(sk, node, head) { struct inet_opt *inet = inet_sk(sk); if (inet->num == hnum && !ipv6_only_sock(sk)) { @@ -462,19 +447,20 @@ inline struct sock *tcp_v4_lookup_listener(u32 daddr, unsigned short hnum, int dif) { - struct sock *sk; + struct sock *sk = NULL; + struct hlist_head *head; read_lock(&tcp_lhash_lock); - sk = tcp_listening_hash[tcp_lhashfn(hnum)]; - if (sk) { - struct inet_opt *inet = inet_sk(sk); + head = &tcp_listening_hash[tcp_lhashfn(hnum)]; + if (!hlist_empty(head)) { + struct inet_opt *inet = inet_sk((sk = __sk_head(head))); - if (inet->num == hnum && !sk->sk_next && + if (inet->num == hnum && !sk->sk_node.next && (!inet->rcv_saddr || inet->rcv_saddr == daddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && !sk->sk_bound_dev_if) goto sherry_cache; - sk = __tcp_v4_lookup_listener(sk, daddr, hnum, dif); + sk = __tcp_v4_lookup_listener(head, daddr, hnum, dif); } if (sk) { sherry_cache: @@ -498,21 +484,24 @@ TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) __u32 ports = TCP_COMBINED_PORTS(sport, hnum); struct sock *sk; + struct hlist_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ int hash = tcp_hashfn(daddr, hnum, saddr, sport); head = &tcp_ehash[hash]; read_lock(&head->lock); - for (sk = head->chain; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &head->chain) { if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ - for (sk = (head + tcp_ehash_size)->chain; sk; sk = sk->sk_next) + sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) { if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; + } + sk = NULL; out: read_unlock(&head->lock); return sk; @@ -562,14 +551,14 @@ __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); int hash = tcp_hashfn(daddr, lport, saddr, inet->dport); struct tcp_ehash_bucket *head = &tcp_ehash[hash]; - struct sock *sk2, **skp; + struct sock *sk2; + struct hlist_node *node; struct tcp_tw_bucket *tw; write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ - for (skp = &(head + tcp_ehash_size)->chain; (sk2 = *skp) != NULL; - skp = &sk2->sk_next) { + sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) { tw = (struct tcp_tw_bucket *)sk2; if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { @@ -599,7 +588,6 @@ tp->ts_recent = tw->tw_ts_recent; tp->ts_recent_stamp = tw->tw_ts_recent_stamp; sock_hold(sk2); - skp = &head->chain; goto unique; } else goto not_unique; @@ -608,7 +596,7 @@ tw = NULL; /* And established part... */ - for (skp = &head->chain; (sk2 = *skp) != NULL; skp = &sk2->sk_next) { + sk_for_each(sk2, node, &head->chain) { if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) goto not_unique; } @@ -618,13 +606,9 @@ * in hash table socket with a funny identity. */ inet->num = lport; inet->sport = htons(lport); - BUG_TRAP(!sk->sk_pprev); - if ((sk->sk_next = *skp) != NULL) - (*skp)->sk_pprev = &sk->sk_next; - - *skp = sk; - sk->sk_pprev = skp; sk->sk_hashent = hash; + BUG_TRAP(sk_unhashed(sk)); + sk_add_node(sk, &head->chain); sock_prot_inc_use(sk->sk_prot); write_unlock(&head->lock); @@ -661,6 +645,7 @@ int low = sysctl_local_port_range[0]; int high = sysctl_local_port_range[1]; int remaining = (high - low) + 1; + struct hlist_node *node; struct tcp_tw_bucket *tw = NULL; local_bh_disable(); @@ -692,9 +677,9 @@ * because the established check is already * unique enough. */ - for (tb = head->chain; tb; tb = tb->next) { + tb_for_each(tb, node, &head->chain) { if (tb->port == rover) { - BUG_TRAP(tb->owners); + BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; if (!__tcp_v4_check_established(sk, @@ -729,7 +714,7 @@ spin_unlock(&tcp_portalloc_lock); tcp_bind_hash(sk, tb, rover); - if (!sk->sk_pprev) { + if (sk_unhashed(sk)) { inet_sk(sk)->sport = htons(rover); __tcp_v4_hash(sk, 0); } @@ -747,7 +732,7 @@ head = &tcp_bhash[tcp_bhashfn(snum)]; tb = (struct tcp_bind_bucket *)sk->sk_prev; spin_lock_bh(&head->lock); - if (tb->owners == sk && !sk->sk_bind_next) { + if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { __tcp_v4_hash(sk, 0); spin_unlock_bh(&head->lock); return 0; @@ -2131,6 +2116,18 @@ #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ +static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head) +{ + return hlist_empty(head) ? NULL : + list_entry(head->first, struct tcp_tw_bucket, tw_node); +} + +static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw) +{ + return tw->tw_node.next ? + hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; +} + static void *listening_get_first(struct seq_file *seq) { struct tcp_iter_state* st = seq->private; @@ -2139,7 +2136,7 @@ for (st->bucket = 0; st->bucket < TCP_LHTABLE_SIZE; ++st->bucket) { struct open_request *req; struct tcp_opt *tp; - struct sock *sk = tcp_listening_hash[st->bucket]; + struct sock *sk = sk_head(&tcp_listening_hash[st->bucket]); if (!sk) continue; @@ -2175,6 +2172,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) { struct tcp_opt *tp; + struct hlist_node *node; struct sock *sk = cur; struct tcp_iter_state* st = seq->private; @@ -2197,13 +2195,13 @@ get_req: req = tp->listen_opt->syn_table[st->sbucket]; } - sk = st->syn_wait_sk->sk_next; + sk = sk_next(st->syn_wait_sk); st->state = TCP_SEQ_STATE_LISTENING; read_unlock_bh(&tp->syn_wait_lock); } else - sk = sk->sk_next; + sk = sk_next(sk); get_sk: - while (sk) { + sk_for_each_from(sk, node) { if (sk->sk_family == st->family) { cur = sk; goto out; @@ -2218,10 +2216,9 @@ goto get_req; } read_unlock_bh(&tp->syn_wait_lock); - sk = sk->sk_next; } if (++st->bucket < TCP_LHTABLE_SIZE) { - sk = tcp_listening_hash[st->bucket]; + sk = sk_head(&tcp_listening_hash[st->bucket]); goto get_sk; } cur = NULL; @@ -2246,22 +2243,25 @@ for (st->bucket = 0; st->bucket < tcp_ehash_size; ++st->bucket) { struct sock *sk; + struct hlist_node *node; struct tcp_tw_bucket *tw; read_lock(&tcp_ehash[st->bucket].lock); - for (sk = tcp_ehash[st->bucket].chain; sk; - sk = sk->sk_next, ++st->num) { - if (sk->sk_family != st->family) + sk_for_each(sk, node, &tcp_ehash[st->bucket].chain) { + if (sk->sk_family != st->family) { + ++st->num; continue; + } rc = sk; goto out; } st->state = TCP_SEQ_STATE_TIME_WAIT; - for (tw = (struct tcp_tw_bucket *) - tcp_ehash[st->bucket + tcp_ehash_size].chain; - tw; tw = (struct tcp_tw_bucket *)tw->tw_next, ++st->num) { - if (tw->tw_family != st->family) + tw_for_each(tw, node, + &tcp_ehash[st->bucket + tcp_ehash_size].chain) { + if (tw->tw_family != st->family) { + ++st->num; continue; + } rc = tw; goto out; } @@ -2276,15 +2276,16 @@ { struct sock *sk = cur; struct tcp_tw_bucket *tw; + struct hlist_node *node; struct tcp_iter_state* st = seq->private; if (st->state == TCP_SEQ_STATE_TIME_WAIT) { tw = cur; - tw = (struct tcp_tw_bucket *)tw->tw_next; + tw = tw_next(tw); get_tw: while (tw && tw->tw_family != st->family) { ++st->num; - tw = (struct tcp_tw_bucket *)tw->tw_next; + tw = tw_next(tw); } if (tw) { cur = tw; @@ -2294,24 +2295,24 @@ st->state = TCP_SEQ_STATE_ESTABLISHED; if (++st->bucket < tcp_ehash_size) { read_lock(&tcp_ehash[st->bucket].lock); - sk = tcp_ehash[st->bucket].chain; + sk = sk_head(&tcp_ehash[st->bucket].chain); } else { cur = NULL; goto out; } } else - sk = sk->sk_next; + sk = sk_next(sk); - while (sk && sk->sk_family != st->family) { + sk_for_each_from(sk, node) { + if (sk->sk_family == st->family) + goto found; ++st->num; - sk = sk->sk_next; - } - if (!sk) { - st->state = TCP_SEQ_STATE_TIME_WAIT; - tw = (struct tcp_tw_bucket *) - tcp_ehash[st->bucket + tcp_ehash_size].chain; - goto get_tw; } + + st->state = TCP_SEQ_STATE_TIME_WAIT; + tw = tw_head(&tcp_ehash[st->bucket + tcp_ehash_size].chain); + goto get_tw; +found: cur = sk; out: return cur; diff -urN linux-2.5.71-bk2/net/ipv4/tcp_minisocks.c linux-2.5.72/net/ipv4/tcp_minisocks.c --- linux-2.5.71-bk2/net/ipv4/tcp_minisocks.c 2003-06-16 21:54:50.000000000 -0700 +++ linux-2.5.72/net/ipv4/tcp_minisocks.c 2003-06-16 21:54:53.000000000 -0700 @@ -63,23 +63,19 @@ /* Unlink from established hashes. */ ehead = &tcp_ehash[tw->tw_hashent]; write_lock(&ehead->lock); - if (!tw->tw_pprev) { + if (hlist_unhashed(&tw->tw_node)) { write_unlock(&ehead->lock); return; } - if (tw->tw_next) - tw->tw_next->sk_pprev = tw->tw_pprev; - *(tw->tw_pprev) = tw->tw_next; - tw->tw_pprev = NULL; + __hlist_del(&tw->tw_node); + sk_node_init(&tw->tw_node); write_unlock(&ehead->lock); /* Disassociate with bind bucket. */ bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num)]; spin_lock(&bhead->lock); tb = tw->tw_tb; - if (tw->tw_bind_next) - tw->tw_bind_next->sk_bind_pprev = tw->tw_bind_pprev; - *(tw->tw_bind_pprev) = tw->tw_bind_next; + __hlist_del(&tw->tw_bind_node); tw->tw_tb = NULL; tcp_bucket_destroy(tb); spin_unlock(&bhead->lock); @@ -298,7 +294,6 @@ { struct tcp_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; struct tcp_bind_hashbucket *bhead; - struct sock **head, *sktw; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in @@ -308,30 +303,17 @@ spin_lock(&bhead->lock); tw->tw_tb = (struct tcp_bind_bucket *)sk->sk_prev; BUG_TRAP(sk->sk_prev); - if ((tw->tw_bind_next = tw->tw_tb->owners) != NULL) - tw->tw_tb->owners->sk_bind_pprev = &tw->tw_bind_next; - tw->tw_tb->owners = (struct sock *)tw; - tw->tw_bind_pprev = &tw->tw_tb->owners; + tw_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); write_lock(&ehead->lock); /* Step 2: Remove SK from established hash. */ - if (sk->sk_pprev) { - if (sk->sk_next) - sk->sk_next->sk_pprev = sk->sk_pprev; - *sk->sk_pprev = sk->sk_next; - sk->sk_pprev = NULL; + if (sk_del_node_init(sk)) sock_prot_dec_use(sk->sk_prot); - } /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ - head = &(ehead + tcp_ehash_size)->chain; - sktw = (struct sock *)tw; - if ((sktw->sk_next = *head) != NULL) - (*head)->sk_pprev = &sktw->sk_next; - *head = sktw; - sktw->sk_pprev = head; + tw_add_node(tw, &(ehead + tcp_ehash_size)->chain); atomic_inc(&tw->tw_refcnt); write_unlock(&ehead->lock); @@ -376,7 +358,7 @@ tw->tw_rcv_wnd = tcp_receive_window(tp); tw->tw_ts_recent = tp->ts_recent; tw->tw_ts_recent_stamp = tp->ts_recent_stamp; - tw->tw_pprev_death = NULL; + tw_dead_node_init(tw); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { @@ -417,7 +399,7 @@ } /* Kill off TIME_WAIT sockets once their lifetime has expired. */ -static int tcp_tw_death_row_slot = 0; +static int tcp_tw_death_row_slot; static void tcp_twkill(unsigned long); @@ -425,13 +407,14 @@ #define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ #define TCP_TWKILL_PERIOD (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS) -static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS]; +static struct hlist_head tcp_tw_death_row[TCP_TWKILL_SLOTS]; static spinlock_t tw_death_lock = SPIN_LOCK_UNLOCKED; static struct timer_list tcp_tw_timer = TIMER_INITIALIZER(tcp_twkill, 0, 0); static void tcp_twkill(unsigned long dummy) { struct tcp_tw_bucket *tw; + struct hlist_node *node, *safe; int killed = 0; /* NOTE: compare this to previous version where lock @@ -445,18 +428,13 @@ if (tcp_tw_count == 0) goto out; - while((tw = tcp_tw_death_row[tcp_tw_death_row_slot]) != NULL) { - tcp_tw_death_row[tcp_tw_death_row_slot] = tw->tw_next_death; - if (tw->tw_next_death) - tw->tw_next_death->tw_pprev_death = tw->tw_pprev_death; - tw->tw_pprev_death = NULL; + tw_for_each_inmate(tw, node, safe, + &tcp_tw_death_row[tcp_tw_death_row_slot]) { + __tw_del_dead_node(tw); spin_unlock(&tw_death_lock); - tcp_timewait_kill(tw); tcp_tw_put(tw); - killed++; - spin_lock(&tw_death_lock); } tcp_tw_death_row_slot = @@ -477,11 +455,7 @@ void tcp_tw_deschedule(struct tcp_tw_bucket *tw) { spin_lock(&tw_death_lock); - if (tw->tw_pprev_death) { - if (tw->tw_next_death) - tw->tw_next_death->tw_pprev_death = tw->tw_pprev_death; - *tw->tw_pprev_death = tw->tw_next_death; - tw->tw_pprev_death = NULL; + if (tw_del_dead_node(tw)) { tcp_tw_put(tw); if (--tcp_tw_count == 0) del_timer(&tcp_tw_timer); @@ -497,11 +471,11 @@ static void tcp_twcal_tick(unsigned long); static struct timer_list tcp_twcal_timer = TIMER_INITIALIZER(tcp_twcal_tick, 0, 0); -static struct tcp_tw_bucket *tcp_twcal_row[TCP_TW_RECYCLE_SLOTS]; +static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS]; void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo) { - struct tcp_tw_bucket **tpp; + struct hlist_head *list; int slot; /* timeout := RTO * 3.5 @@ -533,13 +507,9 @@ spin_lock(&tw_death_lock); /* Unlink it, if it was scheduled */ - if (tw->tw_pprev_death) { - if (tw->tw_next_death) - tw->tw_next_death->tw_pprev_death = tw->tw_pprev_death; - *tw->tw_pprev_death = tw->tw_next_death; - tw->tw_pprev_death = NULL; + if (tw_del_dead_node(tw)) tcp_tw_count--; - } else + else atomic_inc(&tw->tw_refcnt); if (slot >= TCP_TW_RECYCLE_SLOTS) { @@ -553,7 +523,7 @@ } tw->tw_ttd = jiffies + timeo; slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1); - tpp = &tcp_tw_death_row[slot]; + list = &tcp_tw_death_row[slot]; } else { tw->tw_ttd = jiffies + (slot << TCP_TW_RECYCLE_TICK); @@ -567,13 +537,10 @@ mod_timer(&tcp_twcal_timer, jiffies + (slot<tw_next_death = *tpp) != NULL) - (*tpp)->tw_pprev_death = &tw->tw_next_death; - *tpp = tw; - tw->tw_pprev_death = tpp; + hlist_add_head(&tw->tw_death_node, list); if (tcp_tw_count++ == 0) mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD); @@ -597,12 +564,12 @@ for (n=0; ntw_next_death; - tw->tw_pprev_death = NULL; - + tw_for_each_inmate(tw, node, safe, + &tcp_twcal_row[slot]) { + __tw_del_dead_node(tw); tcp_timewait_kill(tw); tcp_tw_put(tw); killed++; @@ -614,7 +581,7 @@ tcp_twcal_hand = slot; } - if (tcp_twcal_row[slot] != NULL) { + if (!hlist_empty(&tcp_twcal_row[slot])) { mod_timer(&tcp_twcal_timer, j); goto out; } @@ -652,7 +619,7 @@ newsk->sk_state = TCP_SYN_RECV; /* SANITY */ - newsk->sk_pprev = NULL; + sk_node_init(&newsk->sk_node); newsk->sk_prev = NULL; /* Clone the TCP header template */ diff -urN linux-2.5.71-bk2/net/ipv4/udp.c linux-2.5.72/net/ipv4/udp.c --- linux-2.5.71-bk2/net/ipv4/udp.c 2003-06-14 12:17:59.000000000 -0700 +++ linux-2.5.72/net/ipv4/udp.c 2003-06-16 21:54:53.000000000 -0700 @@ -113,7 +113,7 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics); -struct sock *udp_hash[UDP_HTABLE_SIZE]; +struct hlist_head udp_hash[UDP_HTABLE_SIZE]; rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED; /* Shared by v4/v6 udp. */ @@ -121,6 +121,8 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) { + struct hlist_node *node; + struct sock *sk2; struct inet_opt *inet = inet_sk(sk); write_lock_bh(&udp_hash_lock); @@ -133,11 +135,11 @@ best_size_so_far = 32767; best = result = udp_port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - struct sock *sk2; + struct hlist_head *list; int size; - sk2 = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - if (!sk2) { + list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (hlist_empty(list)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & @@ -145,10 +147,9 @@ goto gotit; } size = 0; - do { + sk_for_each(sk2, node, list) if (++size >= best_size_so_far) goto next; - } while ((sk2 = sk2->sk_next) != NULL); best_size_so_far = size; best = result; next:; @@ -167,11 +168,8 @@ gotit: udp_port_rover = snum = result; } else { - struct sock *sk2; - - for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; - sk2 != NULL; - sk2 = sk2->sk_next) { + sk_for_each(sk2, node, + &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { struct inet_opt *inet2 = inet_sk(sk2); if (inet2->num == snum && @@ -186,12 +184,10 @@ } } inet->num = snum; - if (!sk->sk_pprev) { - struct sock **skp = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; - if ((sk->sk_next = *skp) != NULL) - (*skp)->sk_pprev = &sk->sk_next; - *skp = sk; - sk->sk_pprev = skp; + if (sk_unhashed(sk)) { + struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + + sk_add_node(sk, h); sock_prot_inc_use(sk->sk_prot); sock_hold(sk); } @@ -211,11 +207,7 @@ static void udp_v4_unhash(struct sock *sk) { write_lock_bh(&udp_hash_lock); - if (sk->sk_pprev) { - if (sk->sk_next) - sk->sk_next->sk_pprev = sk->sk_pprev; - *sk->sk_pprev = sk->sk_next; - sk->sk_pprev = NULL; + if (sk_del_node_init(sk)) { inet_sk(sk)->num = 0; sock_prot_dec_use(sk->sk_prot); __sock_put(sk); @@ -229,11 +221,11 @@ struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif) { struct sock *sk, *result = NULL; + struct hlist_node *node; unsigned short hnum = ntohs(dport); int badness = -1; - for (sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk; - sk = sk->sk_next) { + sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_opt *inet = inet_sk(sk); if (inet->num == hnum && !ipv6_only_sock(sk)) { @@ -287,10 +279,11 @@ u16 rmt_port, u32 rmt_addr, int dif) { + struct hlist_node *node; struct sock *s = sk; unsigned short hnum = ntohs(loc_port); - for (; s; s = s->sk_next) { + sk_for_each_from(s, node) { struct inet_opt *inet = inet_sk(s); if (inet->num != hnum || @@ -302,8 +295,10 @@ continue; if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif)) continue; - break; + goto found; } + s = NULL; +found: return s; } @@ -1088,7 +1083,7 @@ int dif; read_lock(&udp_hash_lock); - sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]; + sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); dif = skb->dev->ifindex; sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { @@ -1097,7 +1092,7 @@ do { struct sk_buff *skb1 = skb; - sknext = udp_v4_mcast_next(sk->sk_next, uh->dest, daddr, + sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, uh->source, saddr, dif); if(sknext) skb1 = skb_clone(skb, GFP_ATOMIC); @@ -1354,20 +1349,27 @@ static __inline__ struct sock *udp_get_bucket(struct seq_file *seq, loff_t *pos) { int i; - struct sock *sk = NULL; + struct sock *sk; + struct hlist_node *node; loff_t l = *pos; struct udp_iter_state *state = seq->private; - for (; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) - for (i = 0, sk = udp_hash[state->bucket]; sk; - ++i, sk = sk->sk_next) { - if (sk->sk_family != state->family) + for (; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { + i = 0; + sk_for_each(sk, node, &udp_hash[state->bucket]) { + if (sk->sk_family != state->family) { + ++i; continue; - if (l--) + } + if (l--) { + ++i; continue; + } *pos = i; goto out; } + } + sk = NULL; out: return sk; } @@ -1381,6 +1383,7 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *sk; + struct hlist_node *node; struct udp_iter_state *state; if (v == (void *)1) { @@ -1391,9 +1394,7 @@ state = seq->private; sk = v; - sk = sk->sk_next; - - for (; sk; sk = sk->sk_next) + sk_for_each_continue(sk, node) if (sk->sk_family == state->family) goto out; diff -urN linux-2.5.71-bk2/net/ipv6/icmp.c linux-2.5.72/net/ipv6/icmp.c --- linux-2.5.71-bk2/net/ipv6/icmp.c 2003-06-14 12:18:06.000000000 -0700 +++ linux-2.5.72/net/ipv6/icmp.c 2003-06-16 21:54:53.000000000 -0700 @@ -516,10 +516,10 @@ rcu_read_unlock(); read_lock(&raw_v6_lock); - if ((sk = raw_v6_htable[hash]) != NULL) { + if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) { while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); - sk = sk->sk_next; + sk = sk_next(sk); } } read_unlock(&raw_v6_lock); diff -urN linux-2.5.71-bk2/net/ipv6/ip6_input.c linux-2.5.72/net/ipv6/ip6_input.c --- linux-2.5.71-bk2/net/ipv6/ip6_input.c 2003-06-14 12:18:29.000000000 -0700 +++ linux-2.5.72/net/ipv6/ip6_input.c 2003-06-16 21:54:53.000000000 -0700 @@ -158,7 +158,7 @@ goto discard; nexthdr = skb->nh.raw[nhoff]; - raw_sk = raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]; + raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]); if (raw_sk) ipv6_raw_deliver(skb, nexthdr); diff -urN linux-2.5.71-bk2/net/ipv6/raw.c linux-2.5.72/net/ipv6/raw.c --- linux-2.5.71-bk2/net/ipv6/raw.c 2003-06-14 12:18:07.000000000 -0700 +++ linux-2.5.72/net/ipv6/raw.c 2003-06-16 21:54:53.000000000 -0700 @@ -53,19 +53,16 @@ #include #include -struct sock *raw_v6_htable[RAWV6_HTABLE_SIZE]; +struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; rwlock_t raw_v6_lock = RW_LOCK_UNLOCKED; static void raw_v6_hash(struct sock *sk) { - struct sock **skp = &raw_v6_htable[inet_sk(sk)->num & - (RAWV6_HTABLE_SIZE - 1)]; + struct hlist_head *list = &raw_v6_htable[inet_sk(sk)->num & + (RAWV6_HTABLE_SIZE - 1)]; write_lock_bh(&raw_v6_lock); - if ((sk->sk_next = *skp) != NULL) - (*skp)->sk_pprev = &sk->sk_next; - *skp = sk; - sk->sk_pprev = skp; + sk_add_node(sk, list); sock_prot_inc_use(sk->sk_prot); sock_hold(sk); write_unlock_bh(&raw_v6_lock); @@ -74,11 +71,7 @@ static void raw_v6_unhash(struct sock *sk) { write_lock_bh(&raw_v6_lock); - if (sk->sk_pprev) { - if (sk->sk_next) - sk->sk_next->sk_pprev = sk->sk_pprev; - *sk->sk_pprev = sk->sk_next; - sk->sk_pprev = NULL; + if (sk_del_node_init(sk)) { sock_prot_dec_use(sk->sk_prot); __sock_put(sk); } @@ -90,12 +83,12 @@ struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, struct in6_addr *loc_addr, struct in6_addr *rmt_addr) { - struct sock *s = sk; + struct hlist_node *node; int addr_type = ipv6_addr_type(loc_addr); - for (s = sk; s; s = s->sk_next) { - if (inet_sk(s)->num == num) { - struct ipv6_pinfo *np = inet6_sk(s); + sk_for_each_from(sk, node) + if (inet_sk(sk)->num == num) { + struct ipv6_pinfo *np = inet6_sk(sk); if (!ipv6_addr_any(&np->daddr) && ipv6_addr_cmp(&np->daddr, rmt_addr)) @@ -103,16 +96,17 @@ if (!ipv6_addr_any(&np->rcv_saddr)) { if (!ipv6_addr_cmp(&np->rcv_saddr, loc_addr)) - break; + goto found; if ((addr_type & IPV6_ADDR_MULTICAST) && - inet6_mc_check(s, loc_addr, rmt_addr)) - break; + inet6_mc_check(sk, loc_addr, rmt_addr)) + goto found; continue; } - break; + goto found; } - } - return s; + sk = NULL; +found: + return sk; } /* @@ -156,7 +150,7 @@ hash = nexthdr & (MAX_INET_PROTOS - 1); read_lock(&raw_v6_lock); - sk = raw_v6_htable[hash]; + sk = sk_head(&raw_v6_htable[hash]); /* * The first socket found will be delivered after @@ -176,7 +170,7 @@ if (clone) rawv6_rcv(sk, clone); } - sk = __raw_v6_lookup(sk->sk_next, nexthdr, daddr, saddr); + sk = __raw_v6_lookup(sk_next(sk), nexthdr, daddr, saddr); } out: read_unlock(&raw_v6_lock); @@ -926,16 +920,16 @@ static struct sock *raw6_get_first(struct seq_file *seq) { - struct sock *sk = NULL; + struct sock *sk; + struct hlist_node *node; struct raw6_iter_state* state = raw6_seq_private(seq); - for (state->bucket = 0; state->bucket < RAWV6_HTABLE_SIZE; ++state->bucket) { - sk = raw_v6_htable[state->bucket]; - while (sk && sk->sk_family != PF_INET6) - sk = sk->sk_next; - if (sk) - break; - } + for (state->bucket = 0; state->bucket < RAWV6_HTABLE_SIZE; ++state->bucket) + sk_for_each(sk, node, &raw_v6_htable[state->bucket]) + if (sk->sk_family == PF_INET6) + goto out; + sk = NULL; +out: return sk; } @@ -944,13 +938,13 @@ struct raw6_iter_state* state = raw6_seq_private(seq); do { - sk = sk->sk_next; + sk = sk_next(sk); try_again: ; } while (sk && sk->sk_family != PF_INET6); if (!sk && ++state->bucket < RAWV6_HTABLE_SIZE) { - sk = raw_v6_htable[state->bucket]; + sk = sk_head(&raw_v6_htable[state->bucket]); goto try_again; } return sk; diff -urN linux-2.5.71-bk2/net/ipv6/tcp_ipv6.c linux-2.5.72/net/ipv6/tcp_ipv6.c --- linux-2.5.71-bk2/net/ipv6/tcp_ipv6.c 2003-06-14 12:18:22.000000000 -0700 +++ linux-2.5.72/net/ipv6/tcp_ipv6.c 2003-06-16 21:54:53.000000000 -0700 @@ -133,17 +133,19 @@ static inline int tcp_v6_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) { - struct sock *sk2 = tb->owners; + struct sock *sk2; + struct hlist_node *node; /* We must walk the whole port owner list in this case. -DaveM */ - for (; sk2; sk2 = sk2->sk_bind_next) + sk_for_each_bound(sk2, node, &tb->owners) { if (sk != sk2 && sk->sk_bound_dev_if == sk2->sk_bound_dev_if && (!sk->sk_reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) && ipv6_rcv_saddr_equal(sk, sk2)) break; + } - return sk2 != NULL; + return node != NULL; } /* Grrr, addr_type already calculated by caller, but I don't want @@ -155,6 +157,7 @@ { struct tcp_bind_hashbucket *head; struct tcp_bind_bucket *tb; + struct hlist_node *node; int ret; local_bh_disable(); @@ -171,7 +174,7 @@ rover = low; head = &tcp_bhash[tcp_bhashfn(rover)]; spin_lock(&head->lock); - for (tb = head->chain; tb; tb = tb->next) + tb_for_each(tb, node, &head->chain) if (tb->port == rover) goto next; break; @@ -188,15 +191,17 @@ /* OK, here is the one we will use. */ snum = rover; - tb = NULL; } else { head = &tcp_bhash[tcp_bhashfn(snum)]; spin_lock(&head->lock); - for (tb = head->chain; tb != NULL; tb = tb->next) + tb_for_each(tb, node, &head->chain) if (tb->port == snum) - break; + goto tb_found; } - if (tb != NULL && tb->owners != NULL) { + tb = NULL; + goto tb_not_found; +tb_found: + if (tb && !hlist_empty(&tb->owners)) { if (tb->fastreuse > 0 && sk->sk_reuse && sk->sk_state != TCP_LISTEN) { goto success; @@ -206,11 +211,11 @@ goto fail_unlock; } } +tb_not_found: ret = 1; - if (tb == NULL && - (tb = tcp_bucket_create(head, snum)) == NULL) + if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) goto fail_unlock; - if (tb->owners == NULL) { + if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) tb->fastreuse = 1; else @@ -234,25 +239,23 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) { - struct sock **skp; + struct hlist_head *list; rwlock_t *lock; - BUG_TRAP(!sk->sk_pprev); + BUG_TRAP(sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { - skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; + list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; lock = &tcp_lhash_lock; tcp_listen_wlock(); } else { - skp = &tcp_ehash[(sk->sk_hashent = tcp_v6_sk_hashfn(sk))].chain; + sk->sk_hashent = tcp_v6_sk_hashfn(sk); + list = &tcp_ehash[sk->sk_hashent].chain; lock = &tcp_ehash[sk->sk_hashent].lock; write_lock(lock); } - if ((sk->sk_next = *skp) != NULL) - (*skp)->sk_pprev = &sk->sk_next; - *skp = sk; - sk->sk_pprev = skp; + sk_add_node(sk, list); sock_prot_inc_use(sk->sk_prot); write_unlock(lock); } @@ -276,13 +279,13 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif) { struct sock *sk; + struct hlist_node *node; struct sock *result = NULL; int score, hiscore; hiscore=0; read_lock(&tcp_lhash_lock); - sk = tcp_listening_hash[tcp_lhashfn(hnum)]; - for (; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) { if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -325,6 +328,7 @@ { struct tcp_ehash_bucket *head; struct sock *sk; + struct hlist_node *node; __u32 ports = TCP_COMBINED_PORTS(sport, hnum); int hash; @@ -334,13 +338,13 @@ hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); head = &tcp_ehash[hash]; read_lock(&head->lock); - for (sk = head->chain; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ - for (sk = (head + tcp_ehash_size)->chain; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) { /* FIXME: acme: check this... */ struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; @@ -474,13 +478,14 @@ u32 ports = TCP_COMBINED_PORTS(inet->dport, inet->num); int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); struct tcp_ehash_bucket *head = &tcp_ehash[hash]; - struct sock *sk2, **skp; + struct sock *sk2; + struct hlist_node *node; struct tcp_tw_bucket *tw; write_lock_bh(&head->lock); - for (skp = &(head + tcp_ehash_size)->chain; (sk2 = *skp) != NULL; - skp = &sk2->sk_next) { + /* Check TIME-WAIT sockets first. */ + sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) { tw = (struct tcp_tw_bucket*)sk2; if(*((__u32 *)&(tw->tw_dport)) == ports && @@ -498,7 +503,6 @@ tp->ts_recent = tw->tw_ts_recent; tp->ts_recent_stamp = tw->tw_ts_recent_stamp; sock_hold(sk2); - skp = &head->chain; goto unique; } else goto not_unique; @@ -506,18 +510,15 @@ } tw = NULL; - for (skp = &head->chain; (sk2 = *skp) != NULL; skp = &sk2->sk_next) { + /* And established part... */ + sk_for_each(sk2, node, &head->chain) { if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) goto not_unique; } unique: - BUG_TRAP(!sk->sk_pprev); - if ((sk->sk_next = *skp) != NULL) - (*skp)->sk_pprev = &sk->sk_next; - - *skp = sk; - sk->sk_pprev = skp; + BUG_TRAP(sk_unhashed(sk)); + sk_add_node(sk, &head->chain); sk->sk_hashent = hash; sock_prot_inc_use(sk->sk_prot); write_unlock_bh(&head->lock); @@ -552,11 +553,11 @@ } head = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)]; - tb = head->chain; + tb = tb_head(head); spin_lock_bh(&head->lock); - if (tb->owners == sk && !sk->sk_bind_next) { + if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { __tcp_v6_hash(sk); spin_unlock_bh(&head->lock); return 0; diff -urN linux-2.5.71-bk2/net/ipv6/udp.c linux-2.5.72/net/ipv6/udp.c --- linux-2.5.71-bk2/net/ipv6/udp.c 2003-06-14 12:18:30.000000000 -0700 +++ linux-2.5.72/net/ipv6/udp.c 2003-06-16 21:54:53.000000000 -0700 @@ -101,6 +101,9 @@ */ static int udp_v6_get_port(struct sock *sk, unsigned short snum) { + struct sock *sk2; + struct hlist_node *node; + write_lock_bh(&udp_hash_lock); if (snum == 0) { int best_size_so_far, best, result, i; @@ -111,11 +114,11 @@ best_size_so_far = 32767; best = result = udp_port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - struct sock *sk2; int size; + struct hlist_head *list; - sk2 = udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - if (!sk2) { + list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (hlist_empty(list)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & @@ -123,10 +126,9 @@ goto gotit; } size = 0; - do { + sk_for_each(sk2, node, list) if (++size >= best_size_so_far) goto next; - } while ((sk2 = sk2->sk_next) != NULL); best_size_so_far = size; best = result; next:; @@ -143,11 +145,8 @@ gotit: udp_port_rover = snum = result; } else { - struct sock *sk2; - - for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; - sk2 != NULL; - sk2 = sk2->sk_next) { + sk_for_each(sk2, node, + &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { if (inet_sk(sk2)->num == snum && sk2 != sk && sk2->sk_bound_dev_if == sk->sk_bound_dev_if && @@ -158,12 +157,8 @@ } inet_sk(sk)->num = snum; - if (!sk->sk_pprev) { - struct sock **skp = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; - if ((sk->sk_next = *skp) != NULL) - (*skp)->sk_pprev = &sk->sk_next; - *skp = sk; - sk->sk_pprev = skp; + if (sk_unhashed(sk)) { + sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]); sock_prot_inc_use(sk->sk_prot); sock_hold(sk); } @@ -183,11 +178,7 @@ static void udp_v6_unhash(struct sock *sk) { write_lock_bh(&udp_hash_lock); - if (sk->sk_pprev) { - if (sk->sk_next) - sk->sk_next->sk_pprev = sk->sk_pprev; - *sk->sk_pprev = sk->sk_next; - sk->sk_pprev = NULL; + if (sk_del_node_init(sk)) { inet_sk(sk)->num = 0; sock_prot_dec_use(sk->sk_prot); __sock_put(sk); @@ -199,12 +190,12 @@ struct in6_addr *daddr, u16 dport, int dif) { struct sock *sk, *result = NULL; + struct hlist_node *node; unsigned short hnum = ntohs(dport); int badness = -1; read_lock(&udp_hash_lock); - for (sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk; - sk = sk->sk_next) { + sk_for_each(sk, node, &udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_opt *inet = inet_sk(sk); if (inet->num == hnum && sk->sk_family == PF_INET6) { @@ -299,9 +290,10 @@ if (addr_type == IPV6_ADDR_MAPPED) { struct sockaddr_in sin; - if (__ipv6_only_sock(sk)) - return -ENETUNREACH; - + if (__ipv6_only_sock(sk)) { + err = -ENETUNREACH; + goto out; + } sin.sin_family = AF_INET; sin.sin_addr.s_addr = daddr->s6_addr32[3]; sin.sin_port = usin->sin6_port; @@ -309,8 +301,8 @@ err = udp_connect(sk, (struct sockaddr*) &sin, sizeof(sin)); ipv4_connected: - if (err < 0) - return err; + if (err) + goto out; ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), inet->daddr); @@ -323,7 +315,7 @@ ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000ffff), inet->rcv_saddr); } - return 0; + goto out; } if (addr_type&IPV6_ADDR_LINKLOCAL) { @@ -331,8 +323,8 @@ usin->sin6_scope_id) { if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != usin->sin6_scope_id) { - fl6_sock_release(flowlabel); - return -EINVAL; + err = -EINVAL; + goto out; } sk->sk_bound_dev_if = usin->sin6_scope_id; if (!sk->sk_bound_dev_if && @@ -341,8 +333,10 @@ } /* Connect to link-local address requires an interface */ - if (!sk->sk_bound_dev_if) - return -EINVAL; + if (!sk->sk_bound_dev_if) { + err = -EINVAL; + goto out; + } } ipv6_addr_copy(&np->daddr, daddr); @@ -379,31 +373,33 @@ if ((err = dst->error) != 0) { dst_release(dst); - fl6_sock_release(flowlabel); - return err; + goto out; } /* get the source address used in the appropriate device */ err = ipv6_get_saddr(dst, daddr, &fl.fl6_src); - if (err == 0) { - if (ipv6_addr_any(&np->saddr)) - ipv6_addr_copy(&np->saddr, &fl.fl6_src); - - if (ipv6_addr_any(&np->rcv_saddr)) { - ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); - inet->rcv_saddr = LOOPBACK4_IPV6; - } + if (err) { + dst_release(dst); + goto out; + } - ip6_dst_store(sk, dst, - !ipv6_addr_cmp(&fl.fl6_dst, &np->daddr) ? - &np->daddr : NULL); + if (ipv6_addr_any(&np->saddr)) + ipv6_addr_copy(&np->saddr, &fl.fl6_src); - sk->sk_state = TCP_ESTABLISHED; + if (ipv6_addr_any(&np->rcv_saddr)) { + ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); + inet->rcv_saddr = LOOPBACK4_IPV6; } - fl6_sock_release(flowlabel); + ip6_dst_store(sk, dst, + !ipv6_addr_cmp(&fl.fl6_dst, &np->daddr) ? + &np->daddr : NULL); + + sk->sk_state = TCP_ESTABLISHED; +out: + fl6_sock_release(flowlabel); return err; } @@ -579,9 +575,11 @@ u16 rmt_port, struct in6_addr *rmt_addr, int dif) { + struct hlist_node *node; struct sock *s = sk; unsigned short num = ntohs(loc_port); - for (; s; s = s->sk_next) { + + sk_for_each_from(s, node) { struct inet_opt *inet = inet_sk(s); if (inet->num == num && s->sk_family == PF_INET6) { @@ -622,7 +620,7 @@ int dif; read_lock(&udp_hash_lock); - sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]; + sk = sk_head(&udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); dif = skb->dev->ifindex; sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) @@ -630,7 +628,7 @@ buff = NULL; sk2 = sk; - while ((sk2 = udp_v6_mcast_next(sk2->sk_next, uh->dest, daddr, + while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, uh->source, saddr, dif))) { if (!buff) { buff = skb_clone(skb, GFP_ATOMIC); diff -urN linux-2.5.71-bk2/net/ipx/af_ipx.c linux-2.5.72/net/ipx/af_ipx.c --- linux-2.5.71-bk2/net/ipx/af_ipx.c 2003-06-14 12:17:59.000000000 -0700 +++ linux-2.5.72/net/ipx/af_ipx.c 2003-06-16 21:54:53.000000000 -0700 @@ -132,7 +132,6 @@ static void ipx_remove_socket(struct sock *sk) { - struct sock *s; /* Determine interface with which socket is associated */ struct ipx_interface *intrfc = ipx_sk(sk)->intrfc; @@ -141,20 +140,7 @@ ipxitf_hold(intrfc); spin_lock_bh(&intrfc->if_sklist_lock); - s = intrfc->if_sklist; - if (s == sk) { - intrfc->if_sklist = s->sk_next; - goto out_unlock; - } - - while (s && s->sk_next) { - if (s->sk_next == sk) { - s->sk_next = sk->sk_next; - goto out_unlock; - } - s = s->sk_next; - } -out_unlock: + sk_del_node_init(sk); spin_unlock_bh(&intrfc->if_sklist_lock); sock_put(sk); ipxitf_put(intrfc); @@ -246,15 +232,7 @@ sock_hold(sk); spin_lock_bh(&intrfc->if_sklist_lock); ipx_sk(sk)->intrfc = intrfc; - sk->sk_next = NULL; - if (!intrfc->if_sklist) - intrfc->if_sklist = sk; - else { - struct sock *s = intrfc->if_sklist; - while (s->sk_next) - s = s->sk_next; - s->sk_next = sk; - } + sk_add_node(sk, &intrfc->if_sklist); spin_unlock_bh(&intrfc->if_sklist_lock); ipxitf_put(intrfc); } @@ -263,11 +241,14 @@ static struct sock *__ipxitf_find_socket(struct ipx_interface *intrfc, unsigned short port) { - struct sock *s = intrfc->if_sklist; - - while (s && ipx_sk(s)->port != port) - s = s->sk_next; + struct sock *s; + struct hlist_node *node; + sk_for_each(s, node, &intrfc->if_sklist) + if (ipx_sk(s)->port == port) + goto found; + s = NULL; +found: return s; } @@ -292,36 +273,37 @@ unsigned short port) { struct sock *s; + struct hlist_node *node; ipxitf_hold(intrfc); spin_lock_bh(&intrfc->if_sklist_lock); - s = intrfc->if_sklist; - while (s) { + sk_for_each(s, node, &intrfc->if_sklist) { struct ipx_opt *ipxs = ipx_sk(s); if (ipxs->port == port && !memcmp(node, ipxs->node, IPX_NODE_LEN)) - break; - s = s->sk_next; + goto found; } + s = NULL; +found: spin_unlock_bh(&intrfc->if_sklist_lock); ipxitf_put(intrfc); - return s; } #endif void __ipxitf_down(struct ipx_interface *intrfc) { - struct sock *s, *t; + struct sock *s; + struct hlist_node *node, *t; /* Delete all routes associated with this interface */ ipxrtr_del_routes(intrfc); spin_lock_bh(&intrfc->if_sklist_lock); /* error sockets */ - for (s = intrfc->if_sklist; s;) { + sk_for_each_safe(s, node, t, &intrfc->if_sklist) { struct ipx_opt *ipxs = ipx_sk(s); s->sk_err = ENOLINK; @@ -329,11 +311,9 @@ ipxs->intrfc = NULL; ipxs->port = 0; s->sk_zapped = 1; /* Indicates it is no longer bound */ - t = s; - s = s->sk_next; - t->sk_next = NULL; + sk_del_node_init(s); } - intrfc->if_sklist = NULL; + INIT_HLIST_HEAD(&intrfc->if_sklist); spin_unlock_bh(&intrfc->if_sklist_lock); /* remove this interface from list */ @@ -400,12 +380,12 @@ int is_broadcast = !memcmp(ipx->ipx_dest.node, ipx_broadcast_node, IPX_NODE_LEN); struct sock *s; + struct hlist_node *node; int rc; spin_lock_bh(&intrfc->if_sklist_lock); - s = intrfc->if_sklist; - while (s) { + sk_for_each(s, node, &intrfc->if_sklist) { struct ipx_opt *ipxs = ipx_sk(s); if (ipxs->port == ipx->ipx_dest.sock && @@ -429,7 +409,6 @@ if (intrfc != ipx_internal_net) break; } - s = s->sk_next; } /* skb was solely for us, and we did not make a copy, so free it. */ @@ -462,15 +441,18 @@ connection = (((int) *(ncphdr + 9)) << 8) | (int) *(ncphdr + 8); if (connection) { + struct hlist_node *node; /* Now we have to look for a special NCP connection handling * socket. Only these sockets have ipx_ncp_conn != 0, set by * SIOCIPXNCPCONN. */ spin_lock_bh(&intrfc->if_sklist_lock); - for (sk = intrfc->if_sklist; - sk && ipx_sk(sk)->ipx_ncp_conn != connection; - sk = sk->sk_next); - if (sk) - sock_hold(sk); + sk_for_each(sk, node, &intrfc->if_sklist) + if (ipx_sk(sk)->ipx_ncp_conn == connection) { + sock_hold(sk); + goto found; + } + sk = NULL; + found: spin_unlock_bh(&intrfc->if_sklist_lock); } return sk; @@ -905,7 +887,7 @@ intrfc->if_internal = internal; intrfc->if_ipx_offset = ipx_offset; intrfc->if_sknum = IPX_MIN_EPHEMERAL_SOCKET; - intrfc->if_sklist = NULL; + INIT_HLIST_HEAD(&intrfc->if_sklist); atomic_set(&intrfc->refcnt, 1); spin_lock_init(&intrfc->if_sklist_lock); __module_get(THIS_MODULE); diff -urN linux-2.5.71-bk2/net/ipx/ipx_proc.c linux-2.5.72/net/ipx/ipx_proc.c --- linux-2.5.71-bk2/net/ipx/ipx_proc.c 2003-06-14 12:17:56.000000000 -0700 +++ linux-2.5.72/net/ipx/ipx_proc.c 2003-06-16 21:54:53.000000000 -0700 @@ -168,22 +168,25 @@ static __inline__ struct sock *ipx_get_socket_idx(loff_t pos) { struct sock *s = NULL; + struct hlist_node *node; struct ipx_interface *i; list_for_each_entry(i, &ipx_interfaces, node) { - if (!pos) - break; spin_lock_bh(&i->if_sklist_lock); - for (s = i->if_sklist; pos && s; s = s->sk_next) + sk_for_each(s, node, &i->if_sklist) { + if (!pos) + break; --pos; + } + spin_unlock_bh(&i->if_sklist_lock); if (!pos) { - if (!s) - spin_unlock_bh(&i->if_sklist_lock); + if (node) + goto found; break; } - spin_unlock_bh(&i->if_sklist_lock); } - + s = NULL; +found: return s; } @@ -197,7 +200,7 @@ static void *ipx_seq_socket_next(struct seq_file *seq, void *v, loff_t *pos) { - struct sock* sk; + struct sock* sk, *next; struct ipx_interface *i; struct ipx_opt *ipxs; @@ -207,14 +210,15 @@ i = ipx_interfaces_head(); if (!i) goto out; - sk = i->if_sklist; + sk = sk_head(&i->if_sklist); if (sk) spin_lock_bh(&i->if_sklist_lock); goto out; } sk = v; - if (sk->sk_next) { - sk = sk->sk_next; + next = sk_next(sk); + if (next) { + sk = next; goto out; } ipxs = ipx_sk(sk); @@ -226,8 +230,8 @@ if (!i) break; spin_lock_bh(&i->if_sklist_lock); - if (i->if_sklist) { - sk = i->if_sklist; + if (!hlist_empty(&i->if_sklist)) { + sk = sk_head(&i->if_sklist); break; } spin_unlock_bh(&i->if_sklist_lock); diff -urN linux-2.5.71-bk2/net/key/af_key.c linux-2.5.72/net/key/af_key.c --- linux-2.5.71-bk2/net/key/af_key.c 2003-06-14 12:18:04.000000000 -0700 +++ linux-2.5.72/net/key/af_key.c 2003-06-16 21:54:53.000000000 -0700 @@ -35,7 +35,7 @@ /* List of all pfkey sockets. */ -static struct sock * pfkey_table; +HLIST_HEAD(pfkey_table); static DECLARE_WAIT_QUEUE_HEAD(pfkey_table_wait); static rwlock_t pfkey_table_lock = RW_LOCK_UNLOCKED; static atomic_t pfkey_table_users = ATOMIC_INIT(0); @@ -114,24 +114,16 @@ static void pfkey_insert(struct sock *sk) { pfkey_table_grab(); - sk->sk_next = pfkey_table; - pfkey_table = sk; + sk_add_node(sk, &pfkey_table); sock_hold(sk); pfkey_table_ungrab(); } static void pfkey_remove(struct sock *sk) { - struct sock **skp; - pfkey_table_grab(); - for (skp = &pfkey_table; *skp; skp = &((*skp)->sk_next)) { - if (*skp == sk) { - *skp = sk->sk_next; - __sock_put(sk); - break; - } - } + if (sk_del_node_init(sk)) + __sock_put(sk); pfkey_table_ungrab(); } @@ -231,6 +223,7 @@ int broadcast_flags, struct sock *one_sk) { struct sock *sk; + struct hlist_node *node; struct sk_buff *skb2 = NULL; int err = -ESRCH; @@ -241,7 +234,7 @@ return -ENOMEM; pfkey_lock_table(); - for (sk = pfkey_table; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &pfkey_table) { struct pfkey_opt *pfk = pfkey_sk(sk); int err2; @@ -2799,12 +2792,13 @@ off_t begin = 0; int len = 0; struct sock *s; + struct hlist_node *node; len += sprintf(buffer,"sk RefCnt Rmem Wmem User Inode\n"); read_lock(&pfkey_table_lock); - for (s = pfkey_table; s; s = s->sk_next) { + sk_for_each(s, node, &pfkey_table) { len += sprintf(buffer+len,"%p %-6d %-6u %-6u %-6u %-6lu", s, atomic_read(&s->sk_refcnt), diff -urN linux-2.5.71-bk2/net/llc/af_llc.c linux-2.5.72/net/llc/af_llc.c --- linux-2.5.71-bk2/net/llc/af_llc.c 2003-06-14 12:17:56.000000000 -0700 +++ linux-2.5.72/net/llc/af_llc.c 2003-06-16 21:54:53.000000000 -0700 @@ -184,7 +184,7 @@ if (!sk->sk_zapped) llc_sap_unassign_sock(llc->sap, sk); release_sock(sk); - if (llc->sap && !llc->sap->sk_list.list) + if (llc->sap && hlist_empty(&llc->sap->sk_list.list)) llc_sap_close(llc->sap); sock_put(sk); llc_sk_free(sk); diff -urN linux-2.5.71-bk2/net/llc/llc_conn.c linux-2.5.72/net/llc/llc_conn.c --- linux-2.5.71-bk2/net/llc/llc_conn.c 2003-06-14 12:17:57.000000000 -0700 +++ linux-2.5.72/net/llc/llc_conn.c 2003-06-16 21:54:53.000000000 -0700 @@ -489,19 +489,22 @@ struct llc_addr *laddr) { struct sock *rc; + struct hlist_node *node; read_lock_bh(&sap->sk_list.lock); - for (rc = sap->sk_list.list; rc; rc = rc->sk_next) { + sk_for_each(rc, node, &sap->sk_list.list) { struct llc_opt *llc = llc_sk(rc); if (llc->laddr.lsap == laddr->lsap && llc->daddr.lsap == daddr->lsap && llc_mac_match(llc->laddr.mac, laddr->mac) && - llc_mac_match(llc->daddr.mac, daddr->mac)) - break; + llc_mac_match(llc->daddr.mac, daddr->mac)) { + sock_hold(rc); + goto found; + } } - if (rc) - sock_hold(rc); + rc = NULL; +found: read_unlock_bh(&sap->sk_list.lock); return rc; } @@ -518,18 +521,21 @@ struct sock *llc_lookup_listener(struct llc_sap *sap, struct llc_addr *laddr) { struct sock *rc; + struct hlist_node *node; read_lock_bh(&sap->sk_list.lock); - for (rc = sap->sk_list.list; rc; rc = rc->sk_next) { + sk_for_each(rc, node, &sap->sk_list.list) { struct llc_opt *llc = llc_sk(rc); if (rc->sk_type == SOCK_STREAM && rc->sk_state == TCP_LISTEN && llc->laddr.lsap == laddr->lsap && - llc_mac_match(llc->laddr.mac, laddr->mac)) - break; + llc_mac_match(llc->laddr.mac, laddr->mac)) { + sock_hold(rc); + goto found; + } } - if (rc) - sock_hold(rc); + rc = NULL; +found: read_unlock_bh(&sap->sk_list.lock); return rc; } @@ -545,18 +551,21 @@ struct sock *llc_lookup_dgram(struct llc_sap *sap, struct llc_addr *laddr) { struct sock *rc; + struct hlist_node *node; read_lock_bh(&sap->sk_list.lock); - for (rc = sap->sk_list.list; rc; rc = rc->sk_next) { + sk_for_each(rc, node, &sap->sk_list.list) { struct llc_opt *llc = llc_sk(rc); if (rc->sk_type == SOCK_DGRAM && llc->laddr.lsap == laddr->lsap && - llc_mac_match(llc->laddr.mac, laddr->mac)) - break; + llc_mac_match(llc->laddr.mac, laddr->mac)) { + sock_hold(rc); + goto found; + } } - if (rc) - sock_hold(rc); + rc = NULL; +found: read_unlock_bh(&sap->sk_list.lock); return rc; } diff -urN linux-2.5.71-bk2/net/llc/llc_main.c linux-2.5.72/net/llc/llc_main.c --- linux-2.5.71-bk2/net/llc/llc_main.c 2003-06-14 12:18:07.000000000 -0700 +++ linux-2.5.72/net/llc/llc_main.c 2003-06-16 21:54:54.000000000 -0700 @@ -320,10 +320,11 @@ { int rc = 0; struct sock *sk; + struct hlist_node *node; write_lock_bh(&sap->sk_list.lock); - for (sk = sap->sk_list.list; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &sap->sk_list.list) { llc_sk(sk)->state = LLC_CONN_STATE_TEMP; if (llc_send_disc(sk)) diff -urN linux-2.5.71-bk2/net/llc/llc_proc.c linux-2.5.72/net/llc/llc_proc.c --- linux-2.5.71-bk2/net/llc/llc_proc.c 2003-06-14 12:17:57.000000000 -0700 +++ linux-2.5.72/net/llc/llc_proc.c 2003-06-16 21:54:54.000000000 -0700 @@ -38,21 +38,27 @@ { struct list_head *sap_entry; struct llc_sap *sap; + struct hlist_node *node; struct sock *sk = NULL; list_for_each(sap_entry, &llc_main_station.sap_list.list) { sap = list_entry(sap_entry, struct llc_sap, node); read_lock_bh(&sap->sk_list.lock); - for (sk = sap->sk_list.list; sk; sk = sk->sk_next) - if (!pos--) { - if (!sk) - read_unlock_bh(&sap->sk_list.lock); - goto out; - } + sk_for_each(sk, node, &sap->sk_list.list) { + if (!pos) + break; + --pos; + } read_unlock_bh(&sap->sk_list.lock); + if (!pos) { + if (node) + goto found; + break; + } } -out: + sk = NULL; +found: return sk; } @@ -66,7 +72,7 @@ static void *llc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct sock* sk; + struct sock* sk, *next; struct llc_opt *llc; struct llc_sap *sap; @@ -76,8 +82,9 @@ goto out; } sk = v; - if (sk->sk_next) { - sk = sk->sk_next; + next = sk_next(sk); + if (next) { + sk = next; goto out; } llc = llc_sk(sk); @@ -89,8 +96,8 @@ break; sap = list_entry(sap->node.next, struct llc_sap, node); read_lock_bh(&sap->sk_list.lock); - if (sap->sk_list.list) { - sk = sap->sk_list.list; + if (!hlist_empty(&sap->sk_list.list)) { + sk = sk_head(&sap->sk_list.list); break; } read_unlock_bh(&sap->sk_list.lock); diff -urN linux-2.5.71-bk2/net/llc/llc_sap.c linux-2.5.72/net/llc/llc_sap.c --- linux-2.5.71-bk2/net/llc/llc_sap.c 2003-06-14 12:18:29.000000000 -0700 +++ linux-2.5.72/net/llc/llc_sap.c 2003-06-16 21:54:54.000000000 -0700 @@ -34,11 +34,7 @@ { write_lock_bh(&sap->sk_list.lock); llc_sk(sk)->sap = sap; - sk->sk_next = sap->sk_list.list; - if (sk->sk_next) - sap->sk_list.list->sk_pprev = &sk->sk_next; - sap->sk_list.list = sk; - sk->sk_pprev = &sap->sk_list.list; + sk_add_node(sk, &sap->sk_list.list); sock_hold(sk); write_unlock_bh(&sap->sk_list.lock); } @@ -48,22 +44,14 @@ * @sap: SAP * @sk: pointer to connection * - * This function removes a connection from sk_list.list of a SAP. + * This function removes a connection from sk_list.list of a SAP if + * the connection was in this list. */ void llc_sap_unassign_sock(struct llc_sap *sap, struct sock *sk) { write_lock_bh(&sap->sk_list.lock); - if (sk->sk_pprev) { - if (sk->sk_next) - sk->sk_next->sk_pprev = sk->sk_pprev; - *sk->sk_pprev = sk->sk_next; - sk->sk_pprev = NULL; - /* - * This only makes sense if the socket was inserted on the - * list, if sk->sk_pprev is NULL it wasn't - */ + if (sk_del_node_init(sk)) sock_put(sk); - } write_unlock_bh(&sap->sk_list.lock); } diff -urN linux-2.5.71-bk2/net/netlink/af_netlink.c linux-2.5.72/net/netlink/af_netlink.c --- linux-2.5.71-bk2/net/netlink/af_netlink.c 2003-06-14 12:18:21.000000000 -0700 +++ linux-2.5.72/net/netlink/af_netlink.c 2003-06-16 21:54:54.000000000 -0700 @@ -68,7 +68,7 @@ #define nlk_sk(__sk) ((struct netlink_opt *)(__sk)->sk_protinfo) -static struct sock *nl_table[MAX_LINKS]; +static struct hlist_head nl_table[MAX_LINKS]; static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); static unsigned nl_nonroot[MAX_LINKS]; @@ -160,18 +160,19 @@ static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) { struct sock *sk; + struct hlist_node *node; read_lock(&nl_table_lock); - for (sk = nl_table[protocol]; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &nl_table[protocol]) { if (nlk_sk(sk)->pid == pid) { sock_hold(sk); - read_unlock(&nl_table_lock); - return sk; + goto found; } } - + sk = NULL; +found: read_unlock(&nl_table_lock); - return NULL; + return sk; } extern struct proto_ops netlink_ops; @@ -180,18 +181,18 @@ { int err = -EADDRINUSE; struct sock *osk; + struct hlist_node *node; netlink_table_grab(); - for (osk = nl_table[sk->sk_protocol]; osk; osk = osk->sk_next) { + sk_for_each(osk, node, &nl_table[sk->sk_protocol]) { if (nlk_sk(osk)->pid == pid) break; } - if (osk == NULL) { + if (!node) { err = -EBUSY; if (nlk_sk(sk)->pid == 0) { nlk_sk(sk)->pid = pid; - sk->sk_next = nl_table[sk->sk_protocol]; - nl_table[sk->sk_protocol] = sk; + sk_add_node(sk, &nl_table[sk->sk_protocol]); sock_hold(sk); err = 0; } @@ -202,16 +203,9 @@ static void netlink_remove(struct sock *sk) { - struct sock **skp; - netlink_table_grab(); - for (skp = &nl_table[sk->sk_protocol]; *skp; skp = &((*skp)->sk_next)) { - if (*skp == sk) { - *skp = sk->sk_next; - __sock_put(sk); - break; - } - } + if (sk_del_node_init(sk)) + __sock_put(sk); netlink_table_ungrab(); } @@ -298,12 +292,13 @@ { struct sock *sk = sock->sk; struct sock *osk; + struct hlist_node *node; s32 pid = current->pid; int err; retry: netlink_table_grab(); - for (osk = nl_table[sk->sk_protocol]; osk; osk = osk->sk_next) { + sk_for_each(osk, node, &nl_table[sk->sk_protocol]) { if (nlk_sk(osk)->pid == pid) { /* Bind collision, search negative pid values. */ if (pid > 0) @@ -512,6 +507,7 @@ u32 group, int allocation) { struct sock *sk; + struct hlist_node *node; struct sk_buff *skb2 = NULL; int protocol = ssk->sk_protocol; int failure = 0, delivered = 0; @@ -520,7 +516,7 @@ netlink_lock_table(); - for (sk = nl_table[protocol]; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &nl_table[protocol]) { struct netlink_opt *nlk = nlk_sk(sk); if (ssk == sk) @@ -572,10 +568,11 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) { struct sock *sk; + struct hlist_node *node; int protocol = ssk->sk_protocol; read_lock(&nl_table_lock); - for (sk = nl_table[protocol]; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &nl_table[protocol]) { struct netlink_opt *nlk = nlk_sk(sk); if (ssk == sk) continue; @@ -975,13 +972,14 @@ int len=0; int i; struct sock *s; + struct hlist_node *node; len+= sprintf(buffer,"sk Eth Pid Groups " "Rmem Wmem Dump Locks\n"); for (i=0; isk_next) { + sk_for_each(s, node, &nl_table[i]) { struct netlink_opt *nlk = nlk_sk(s); len+=sprintf(buffer+len,"%p %-3d %-6d %08x %-8d %-8d %p %d", diff -urN linux-2.5.71-bk2/net/netrom/af_netrom.c linux-2.5.72/net/netrom/af_netrom.c --- linux-2.5.71-bk2/net/netrom/af_netrom.c 2003-06-14 12:18:21.000000000 -0700 +++ linux-2.5.72/net/netrom/af_netrom.c 2003-06-16 21:54:54.000000000 -0700 @@ -57,7 +57,7 @@ static unsigned short circuit = 0x101; -static struct sock *nr_list; +HLIST_HEAD(nr_list); static spinlock_t nr_list_lock; static struct proto_ops nr_proto_ops; @@ -89,26 +89,8 @@ */ static void nr_remove_socket(struct sock *sk) { - struct sock *s; - spin_lock_bh(&nr_list_lock); - - if ((s = nr_list) == sk) { - nr_list = s->sk_next; - spin_unlock_bh(&nr_list_lock); - return; - } - - while (s && s->sk_next) { - if (s->sk_next == sk) { - s->sk_next = sk->sk_next; - spin_unlock_bh(&nr_list_lock); - return; - } - - s = s->sk_next; - } - + sk_del_node_init(sk); spin_unlock_bh(&nr_list_lock); } @@ -118,12 +100,12 @@ static void nr_kill_by_device(struct net_device *dev) { struct sock *s; + struct hlist_node *node; spin_lock_bh(&nr_list_lock); - for (s = nr_list; s; s = s->sk_next) { + sk_for_each(s, node, &nr_list) if (nr_sk(s)->device == dev) nr_disconnect(s, ENETUNREACH); - } spin_unlock_bh(&nr_list_lock); } @@ -149,8 +131,7 @@ static void nr_insert_socket(struct sock *sk) { spin_lock_bh(&nr_list_lock); - sk->sk_next = nr_list; - nr_list = sk; + sk_add_node(sk, &nr_list); spin_unlock_bh(&nr_list_lock); } @@ -161,18 +142,17 @@ static struct sock *nr_find_listener(ax25_address *addr) { struct sock *s; + struct hlist_node *node; spin_lock_bh(&nr_list_lock); - for (s = nr_list; s; s = s->sk_next) { + sk_for_each(s, node, &nr_list) if (!ax25cmp(&nr_sk(s)->source_addr, addr) && - s->sk_state == TCP_LISTEN) { - spin_unlock_bh(&nr_list_lock); - return s; - } - } + s->sk_state == TCP_LISTEN) + goto found; + s = NULL; +found: spin_unlock_bh(&nr_list_lock); - - return NULL; + return s; } /* @@ -181,19 +161,19 @@ static struct sock *nr_find_socket(unsigned char index, unsigned char id) { struct sock *s; + struct hlist_node *node; spin_lock_bh(&nr_list_lock); - for (s = nr_list; s; s = s->sk_next) { + sk_for_each(s, node, &nr_list) { nr_cb *nr = nr_sk(s); - if (nr->my_index == index && nr->my_id == id) { - spin_unlock_bh(&nr_list_lock); - return s; - } + if (nr->my_index == index && nr->my_id == id) + goto found; } + s = NULL; +found: spin_unlock_bh(&nr_list_lock); - - return NULL; + return s; } /* @@ -203,20 +183,20 @@ ax25_address *dest) { struct sock *s; + struct hlist_node *node; spin_lock_bh(&nr_list_lock); - for (s = nr_list; s; s = s->sk_next) { + sk_for_each(s, node, &nr_list) { nr_cb *nr = nr_sk(s); if (nr->your_index == index && nr->your_id == id && - !ax25cmp(&nr->dest_addr, dest)) { - spin_unlock_bh(&nr_list_lock); - return s; - } + !ax25cmp(&nr->dest_addr, dest)) + goto found; } + s = NULL; +found: spin_unlock_bh(&nr_list_lock); - - return NULL; + return s; } /* @@ -1152,6 +1132,7 @@ static int nr_get_info(char *buffer, char **start, off_t offset, int length) { struct sock *s; + struct hlist_node *node; struct net_device *dev; const char *devname; int len = 0; @@ -1162,7 +1143,7 @@ len += sprintf(buffer, "user_addr dest_node src_node dev my your st vs vr va t1 t2 t4 idle n2 wnd Snd-Q Rcv-Q inode\n"); - for (s = nr_list; s; s = s->sk_next) { + sk_for_each(s, node, &nr_list) { nr_cb *nr = nr_sk(s); if ((dev = nr->device) == NULL) diff -urN linux-2.5.71-bk2/net/packet/af_packet.c linux-2.5.72/net/packet/af_packet.c --- linux-2.5.71-bk2/net/packet/af_packet.c 2003-06-14 12:18:25.000000000 -0700 +++ linux-2.5.72/net/packet/af_packet.c 2003-06-16 21:54:54.000000000 -0700 @@ -140,7 +140,7 @@ */ /* List of all packet sockets. */ -static struct sock * packet_sklist; +HLIST_HEAD(packet_sklist); static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED; atomic_t packet_socks_nr; @@ -753,19 +753,13 @@ { struct sock *sk = sock->sk; struct packet_opt *po = pkt_sk(sk); - struct sock **skp; if (!sk) return 0; write_lock_bh(&packet_sklist_lock); - for (skp = &packet_sklist; *skp; skp = &(*skp)->sk_next) { - if (*skp == sk) { - *skp = sk->sk_next; - __sock_put(sk); - break; - } - } + if (sk_del_node_init(sk)) + __sock_put(sk); write_unlock_bh(&packet_sklist_lock); /* @@ -989,8 +983,7 @@ } write_lock_bh(&packet_sklist_lock); - sk->sk_next = packet_sklist; - packet_sklist = sk; + sk_add_node(sk, &packet_sklist); sock_hold(sk); write_unlock_bh(&packet_sklist_lock); return(0); @@ -1369,10 +1362,11 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) { struct sock *sk; + struct hlist_node *node; struct net_device *dev = (struct net_device*)data; read_lock(&packet_sklist_lock); - for (sk = packet_sklist; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &packet_sklist) { struct packet_opt *po = pkt_sk(sk); switch (msg) { @@ -1777,12 +1771,13 @@ off_t begin=0; int len=0; struct sock *s; + struct hlist_node *node; len+= sprintf(buffer,"sk RefCnt Type Proto Iface R Rmem User Inode\n"); read_lock(&packet_sklist_lock); - for (s = packet_sklist; s; s = s->sk_next) { + sk_for_each(s, node, &packet_sklist) { struct packet_opt *po = pkt_sk(s); len+=sprintf(buffer+len,"%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu", diff -urN linux-2.5.71-bk2/net/rose/af_rose.c linux-2.5.72/net/rose/af_rose.c --- linux-2.5.71-bk2/net/rose/af_rose.c 2003-06-14 12:18:21.000000000 -0700 +++ linux-2.5.72/net/rose/af_rose.c 2003-06-16 21:54:54.000000000 -0700 @@ -56,7 +56,7 @@ int sysctl_rose_maximum_vcs = ROSE_DEFAULT_MAXVC; int sysctl_rose_window_size = ROSE_DEFAULT_WINDOW_SIZE; -static struct sock *rose_list; +HLIST_HEAD(rose_list); static spinlock_t rose_list_lock = SPIN_LOCK_UNLOCKED; static struct proto_ops rose_proto_ops; @@ -151,24 +151,8 @@ */ static void rose_remove_socket(struct sock *sk) { - struct sock *s; - spin_lock_bh(&rose_list_lock); - if ((s = rose_list) == sk) { - rose_list = s->sk_next; - spin_unlock_bh(&rose_list_lock); - return; - } - - while (s && s->sk_next) { - if (s->sk_next == sk) { - s->sk_next = sk->sk_next; - spin_unlock_bh(&rose_list_lock); - return; - } - - s = s->sk_next; - } + sk_del_node_init(sk); spin_unlock_bh(&rose_list_lock); } @@ -179,9 +163,10 @@ void rose_kill_by_neigh(struct rose_neigh *neigh) { struct sock *s; + struct hlist_node *node; spin_lock_bh(&rose_list_lock); - for (s = rose_list; s; s = s->sk_next) { + sk_for_each(s, node, &rose_list) { rose_cb *rose = rose_sk(s); if (rose->neighbour == neigh) { @@ -199,9 +184,10 @@ static void rose_kill_by_device(struct net_device *dev) { struct sock *s; + struct hlist_node *node; spin_lock_bh(&rose_list_lock); - for (s = rose_list; s; s = s->sk_next) { + sk_for_each(s, node, &rose_list) { rose_cb *rose = rose_sk(s); if (rose->device == dev) { @@ -244,8 +230,7 @@ { spin_lock_bh(&rose_list_lock); - sk->sk_next = rose_list; - rose_list = sk; + sk_add_node(sk, &rose_list); spin_unlock_bh(&rose_list_lock); } @@ -256,32 +241,30 @@ static struct sock *rose_find_listener(rose_address *addr, ax25_address *call) { struct sock *s; + struct hlist_node *node; spin_lock_bh(&rose_list_lock); - for (s = rose_list; s; s = s->sk_next) { + sk_for_each(s, node, &rose_list) { rose_cb *rose = rose_sk(s); if (!rosecmp(&rose->source_addr, addr) && !ax25cmp(&rose->source_call, call) && - !rose->source_ndigis && s->sk_state == TCP_LISTEN) { - spin_unlock_bh(&rose_list_lock); - return s; - } + !rose->source_ndigis && s->sk_state == TCP_LISTEN) + goto found; } - for (s = rose_list; s; s = s->sk_next) { + sk_for_each(s, node, &rose_list) { rose_cb *rose = rose_sk(s); if (!rosecmp(&rose->source_addr, addr) && !ax25cmp(&rose->source_call, &null_ax25_address) && - s->sk_state == TCP_LISTEN) { - spin_unlock_bh(&rose_list_lock); - return s; - } + s->sk_state == TCP_LISTEN) + goto found; } + s = NULL; +found: spin_unlock_bh(&rose_list_lock); - - return NULL; + return s; } /* @@ -290,19 +273,19 @@ struct sock *rose_find_socket(unsigned int lci, struct rose_neigh *neigh) { struct sock *s; + struct hlist_node *node; spin_lock_bh(&rose_list_lock); - for (s = rose_list; s; s = s->sk_next) { + sk_for_each(s, node, &rose_list) { rose_cb *rose = rose_sk(s); - if (rose->lci == lci && rose->neighbour == neigh) { - spin_unlock_bh(&rose_list_lock); - return s; - } + if (rose->lci == lci && rose->neighbour == neigh) + goto found; } + s = NULL; +found: spin_unlock_bh(&rose_list_lock); - - return NULL; + return s; } /* @@ -1352,6 +1335,7 @@ static int rose_get_info(char *buffer, char **start, off_t offset, int length) { struct sock *s; + struct hlist_node *node; struct net_device *dev; const char *devname, *callsign; int len = 0; @@ -1362,7 +1346,7 @@ len += sprintf(buffer, "dest_addr dest_call src_addr src_call dev lci neigh st vs vr va t t1 t2 t3 hb idle Snd-Q Rcv-Q inode\n"); - for (s = rose_list; s; s = s->sk_next) { + sk_for_each(s, node, &rose_list) { rose_cb *rose = rose_sk(s); if ((dev = rose->device) == NULL) diff -urN linux-2.5.71-bk2/net/sctp/socket.c linux-2.5.72/net/sctp/socket.c --- linux-2.5.71-bk2/net/sctp/socket.c 2003-06-14 12:18:52.000000000 -0700 +++ linux-2.5.72/net/sctp/socket.c 2003-06-16 21:54:54.000000000 -0700 @@ -3016,14 +3016,15 @@ } - if (pp && pp->sk) { + if (pp && !hlist_empty(&pp->sk_list)) { /* We had a port hash table hit - there is an * available port (pp != NULL) and it is being - * used by other socket (pp->sk != NULL); that other + * used by other socket (pp->sk_list not empty); that other * socket is going to be sk2. */ int reuse = sk->sk_reuse; - struct sock *sk2 = pp->sk; + struct sock *sk2; + struct hlist_node *node; SCTP_DEBUG_PRINTK("sctp_get_port() found a " "possible match\n"); @@ -3040,7 +3041,7 @@ * that this port/socket (sk) combination are already * in an endpoint. */ - for (; sk2; sk2 = sk2->sk_bind_next) { + sk_for_each_bound(sk2, node, &pp->sk_list) { struct sctp_endpoint *ep2; ep2 = sctp_sk(sk2)->ep; @@ -3048,15 +3049,10 @@ continue; if (sctp_bind_addr_match(&ep2->base.bind_addr, addr, - sctp_sk(sk))) - goto found; - } - - found: - /* If we found a conflict, fail. */ - if (sk2 != NULL) { - ret = (long) sk2; - goto fail_unlock; + sctp_sk(sk))) { + ret = (long)sk2; + goto fail_unlock; + } } SCTP_DEBUG_PRINTK("sctp_get_port(): Found a match\n"); } @@ -3071,7 +3067,7 @@ * if sk->sk_reuse is too (that is, if the caller requested * SO_REUSEADDR on this socket -sk-). */ - if (!pp->sk) + if (hlist_empty(&pp->sk_list)) pp->fastreuse = sk->sk_reuse ? 1 : 0; else if (pp->fastreuse && !sk->sk_reuse) pp->fastreuse = 0; @@ -3083,10 +3079,7 @@ success: inet_sk(sk)->num = snum; if (!sk->sk_prev) { - if ((sk->sk_bind_next = pp->sk) != NULL) - pp->sk->sk_bind_pprev = &sk->sk_bind_next; - pp->sk = sk; - sk->sk_bind_pprev = &pp->sk; + sk_add_bind_node(sk, &pp->sk_list); sk->sk_prev = (struct sock *) pp; } ret = 0; @@ -3323,7 +3316,7 @@ if (pp) { pp->port = snum; pp->fastreuse = 0; - pp->sk = NULL; + INIT_HLIST_HEAD(&pp->sk_list); if ((pp->next = head->chain) != NULL) pp->next->pprev = &pp->next; head->chain = pp; @@ -3335,7 +3328,7 @@ /* Caller must hold hashbucket lock for this tb with local BH disabled */ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp) { - if (!pp->sk) { + if (!hlist_empty(&pp->sk_list)) { if (pp->next) pp->next->pprev = pp->pprev; *(pp->pprev) = pp->next; @@ -3353,9 +3346,7 @@ sctp_spin_lock(&head->lock); pp = (struct sctp_bind_bucket *)sk->sk_prev; - if (sk->sk_bind_next) - sk->sk_bind_next->sk_bind_pprev = sk->sk_bind_pprev; - *(sk->sk_bind_pprev) = sk->sk_bind_next; + hlist_del(&sk->sk_bind_node); sk->sk_prev = NULL; inet_sk(sk)->num = 0; sctp_bucket_destroy(pp); diff -urN linux-2.5.71-bk2/net/socket.c linux-2.5.72/net/socket.c --- linux-2.5.71-bk2/net/socket.c 2003-06-14 12:18:07.000000000 -0700 +++ linux-2.5.72/net/socket.c 2003-06-16 21:54:54.000000000 -0700 @@ -79,10 +79,7 @@ #include #include #include - -#if defined(CONFIG_KMOD) && defined(CONFIG_NET) #include -#endif #ifdef CONFIG_NET_RADIO #include /* Note : will define WIRELESS_EXT */ @@ -770,11 +767,9 @@ unlock_kernel(); sock = SOCKET_I(inode); -#ifdef CONFIG_NET if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { err = dev_ioctl(cmd, (void *)arg); } else -#endif /* CONFIG_NET */ #ifdef WIRELESS_EXT if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { err = dev_ioctl(cmd, (void *)arg); @@ -795,11 +790,8 @@ case SIOCGIFBR: case SIOCSIFBR: err = -ENOPKG; - -#ifdef CONFIG_KMOD if (!br_ioctl_hook) request_module("bridge"); -#endif down(&br_ioctl_mutex); if (br_ioctl_hook) @@ -809,10 +801,9 @@ case SIOCGIFVLAN: case SIOCSIFVLAN: err = -ENOPKG; -#ifdef CONFIG_KMOD if (!vlan_ioctl_hook) request_module("8021q"); -#endif + down(&vlan_ioctl_mutex); if (vlan_ioctl_hook) err = vlan_ioctl_hook(arg); @@ -826,10 +817,9 @@ case SIOCADDDLCI: case SIOCDELDLCI: err = -ENOPKG; -#ifdef CONFIG_KMOD if (!dlci_ioctl_hook) request_module("dlci"); -#endif + if (dlci_ioctl_hook) { down(&dlci_ioctl_mutex); err = dlci_ioctl_hook(cmd, (void *)arg); @@ -1021,7 +1011,7 @@ if (err) return err; -#if defined(CONFIG_KMOD) && defined(CONFIG_NET) +#if defined(CONFIG_KMOD) /* Attempt to load a protocol module if the find failed. * * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user diff -urN linux-2.5.71-bk2/net/unix/af_unix.c linux-2.5.72/net/unix/af_unix.c --- linux-2.5.71-bk2/net/unix/af_unix.c 2003-06-14 12:18:24.000000000 -0700 +++ linux-2.5.72/net/unix/af_unix.c 2003-06-16 21:54:54.000000000 -0700 @@ -124,11 +124,11 @@ kmem_cache_t *unix_sk_cachep; -unix_socket *unix_socket_table[UNIX_HASH_SIZE+1]; +struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; rwlock_t unix_table_lock = RW_LOCK_UNLOCKED; static atomic_t unix_nr_socks = ATOMIC_INIT(0); -#define unix_sockets_unbound (unix_socket_table[UNIX_HASH_SIZE]) +#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE]) #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) @@ -147,19 +147,19 @@ #define unix_peer(sk) ((sk)->sk_pair) -static inline int unix_our_peer(unix_socket *sk, unix_socket *osk) +static inline int unix_our_peer(struct sock *sk, struct sock *osk) { return unix_peer(osk) == sk; } -static inline int unix_may_send(unix_socket *sk, unix_socket *osk) +static inline int unix_may_send(struct sock *sk, struct sock *osk) { return (unix_peer(osk) == NULL || unix_our_peer(sk, osk)); } -static unix_socket *unix_peer_get(unix_socket *s) +static struct sock *unix_peer_get(struct sock *s) { - unix_socket *peer; + struct sock *peer; unix_state_rlock(s); peer = unix_peer(s); @@ -209,73 +209,56 @@ return len; } -static void __unix_remove_socket(unix_socket *sk) +static void __unix_remove_socket(struct sock *sk) { - struct unix_sock *u = unix_sk(sk); - unix_socket **list = u->list; - - if (list) { - if (sk->sk_next) - sk->sk_next->sk_prev = sk->sk_prev; - if (sk->sk_prev) - sk->sk_prev->sk_next = sk->sk_next; - if (*list == sk) - *list = sk->sk_next; - u->list = NULL; - sk->sk_prev = NULL; - sk->sk_next = NULL; + if (sk_del_node_init(sk)) __sock_put(sk); - } } -static void __unix_insert_socket(unix_socket **list, unix_socket *sk) +static void __unix_insert_socket(struct hlist_head *list, struct sock *sk) { - struct unix_sock *u = unix_sk(sk); - BUG_TRAP(!u->list); - - u->list = list; - sk->sk_prev = NULL; - sk->sk_next = *list; - if (*list) - (*list)->sk_prev = sk; - *list=sk; + BUG_TRAP(sk_unhashed(sk)); + sk_add_node(sk, list); sock_hold(sk); } -static inline void unix_remove_socket(unix_socket *sk) +static inline void unix_remove_socket(struct sock *sk) { write_lock(&unix_table_lock); __unix_remove_socket(sk); write_unlock(&unix_table_lock); } -static inline void unix_insert_socket(unix_socket **list, unix_socket *sk) +static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk) { write_lock(&unix_table_lock); __unix_insert_socket(list, sk); write_unlock(&unix_table_lock); } -static unix_socket *__unix_find_socket_byname(struct sockaddr_un *sunname, +static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, int len, int type, unsigned hash) { - unix_socket *s; + struct sock *s; + struct hlist_node *node; - for (s = unix_socket_table[hash ^ type]; s; s = s->sk_next) { + sk_for_each(s, node, &unix_socket_table[hash ^ type]) { struct unix_sock *u = unix_sk(s); if (u->addr->len == len && !memcmp(u->addr->name, sunname, len)) - break; + goto found; } + s = NULL; +found: return s; } -static inline unix_socket * -unix_find_socket_byname(struct sockaddr_un *sunname, - int len, int type, unsigned hash) +static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, + int len, int type, + unsigned hash) { - unix_socket *s; + struct sock *s; read_lock(&unix_table_lock); s = __unix_find_socket_byname(sunname, len, type, hash); @@ -285,21 +268,24 @@ return s; } -static unix_socket *unix_find_socket_byinode(struct inode *i) +static struct sock *unix_find_socket_byinode(struct inode *i) { - unix_socket *s; + struct sock *s; + struct hlist_node *node; read_lock(&unix_table_lock); - for (s = unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]; s; - s = s->sk_next) { + sk_for_each(s, node, + &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; if(dentry && dentry->d_inode == i) { sock_hold(s); - break; + goto found; } } + s = NULL; +found: read_unlock(&unix_table_lock); return s; } @@ -348,7 +334,7 @@ skb_queue_purge(&sk->sk_receive_queue); BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); - BUG_TRAP(!u->list); + BUG_TRAP(sk_unhashed(sk)); BUG_TRAP(!sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { printk("Attempt to release alive unix socket: %p\n", sk); @@ -364,12 +350,12 @@ #endif } -static int unix_release_sock (unix_socket *sk, int embrion) +static int unix_release_sock (struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct dentry *dentry; struct vfsmount *mnt; - unix_socket *skpair; + struct sock *skpair; struct sk_buff *skb; int state; @@ -501,12 +487,11 @@ u = unix_sk(sk); u->dentry = NULL; u->mnt = NULL; - u->list = NULL; rwlock_init(&u->lock); atomic_set(&u->inflight, sock ? 0 : -1); init_MUTEX(&u->readsem); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); - unix_insert_socket(&unix_sockets_unbound, sk); + unix_insert_socket(unix_sockets_unbound, sk); out: return sk; } @@ -540,7 +525,7 @@ static int unix_release(struct socket *sock) { - unix_socket *sk = sock->sk; + struct sock *sk = sock->sk; if (!sk) return 0; @@ -600,10 +585,10 @@ return err; } -static unix_socket *unix_find_other(struct sockaddr_un *sunname, int len, +static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, int type, unsigned hash, int *error) { - unix_socket *u; + struct sock *u; struct nameidata nd; int err = 0; @@ -663,7 +648,7 @@ int err; unsigned hash; struct unix_address *addr; - unix_socket **list; + struct hlist_head *list; err = -EINVAL; if (sunaddr->sun_family != AF_UNIX) @@ -854,7 +839,7 @@ return err; } -static long unix_wait_for_peer(unix_socket *other, long timeo) +static long unix_wait_for_peer(struct sock *other, long timeo) { struct unix_sock *u = unix_sk(other); int sched; @@ -883,7 +868,7 @@ struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk), *newu, *otheru; struct sock *newsk = NULL; - unix_socket *other = NULL; + struct sock *other = NULL; struct sk_buff *skb = NULL; unsigned hash; int st; @@ -1076,8 +1061,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) { - unix_socket *sk = sock->sk; - unix_socket *tsk; + struct sock *sk = sock->sk; + struct sock *tsk; struct sk_buff *skb; int err; @@ -1198,7 +1183,7 @@ struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); struct sockaddr_un *sunaddr=msg->msg_name; - unix_socket *other = NULL; + struct sock *other = NULL; int namelen = 0; /* fake GCC */ int err; unsigned hash; @@ -1344,7 +1329,7 @@ { struct sock_iocb *siocb = kiocb_to_siocb(kiocb); struct sock *sk = sock->sk; - unix_socket *other = NULL; + struct sock *other = NULL; struct sockaddr_un *sunaddr=msg->msg_name; int err,size; struct sk_buff *skb; @@ -1540,7 +1525,7 @@ * Sleep until data has arrive. But check for races.. */ -static long unix_stream_data_wait(unix_socket * sk, long timeo) +static long unix_stream_data_wait(struct sock * sk, long timeo) { DEFINE_WAIT(wait); @@ -1714,7 +1699,7 @@ static int unix_shutdown(struct socket *sock, int mode) { struct sock *sk = sock->sk; - unix_socket *other; + struct sock *other; mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); @@ -1829,7 +1814,7 @@ off_t begin=0; int len=0; int i; - unix_socket *s; + struct sock *s; len+= sprintf(buffer,"Num RefCount Protocol Flags Type St " "Inode Path\n"); diff -urN linux-2.5.71-bk2/net/unix/garbage.c linux-2.5.72/net/unix/garbage.c --- linux-2.5.71-bk2/net/unix/garbage.c 2003-06-14 12:18:06.000000000 -0700 +++ linux-2.5.72/net/unix/garbage.c 2003-06-16 21:54:54.000000000 -0700 @@ -84,17 +84,17 @@ /* Internal data structures and random procedures: */ -#define GC_HEAD ((unix_socket *)(-1)) -#define GC_ORPHAN ((unix_socket *)(-3)) +#define GC_HEAD ((struct sock *)(-1)) +#define GC_ORPHAN ((struct sock *)(-3)) -static unix_socket *gc_current=GC_HEAD; /* stack of objects to mark */ +static struct sock *gc_current = GC_HEAD; /* stack of objects to mark */ atomic_t unix_tot_inflight = ATOMIC_INIT(0); -static unix_socket *unix_get_socket(struct file *filp) +static struct sock *unix_get_socket(struct file *filp) { - unix_socket * u_sock = NULL; + struct sock *u_sock = NULL; struct inode *inode = filp->f_dentry->d_inode; /* @@ -120,7 +120,7 @@ void unix_inflight(struct file *fp) { - unix_socket *s=unix_get_socket(fp); + struct sock *s = unix_get_socket(fp); if(s) { atomic_inc(&unix_sk(s)->inflight); atomic_inc(&unix_tot_inflight); @@ -129,7 +129,7 @@ void unix_notinflight(struct file *fp) { - unix_socket *s=unix_get_socket(fp); + struct sock *s = unix_get_socket(fp); if(s) { atomic_dec(&unix_sk(s)->inflight); atomic_dec(&unix_tot_inflight); @@ -141,9 +141,9 @@ * Garbage Collector Support Functions */ -static inline unix_socket *pop_stack(void) +static inline struct sock *pop_stack(void) { - unix_socket *p=gc_current; + struct sock *p = gc_current; gc_current = unix_sk(p)->gc_tree; return p; } @@ -153,7 +153,7 @@ return gc_current == GC_HEAD; } -static void maybe_unmark_and_push(unix_socket *x) +static void maybe_unmark_and_push(struct sock *x) { struct unix_sock *u = unix_sk(x); @@ -171,7 +171,7 @@ { static DECLARE_MUTEX(unix_gc_sem); int i; - unix_socket *s; + struct sock *s; struct sk_buff_head hitlist; struct sk_buff *skb; @@ -231,8 +231,8 @@ while (!empty_stack()) { - unix_socket *x = pop_stack(); - unix_socket *sk; + struct sock *x = pop_stack(); + struct sock *sk; spin_lock(&x->sk_receive_queue.lock); skb = skb_peek(&x->sk_receive_queue); diff -urN linux-2.5.71-bk2/net/wanrouter/af_wanpipe.c linux-2.5.72/net/wanrouter/af_wanpipe.c --- linux-2.5.71-bk2/net/wanrouter/af_wanpipe.c 2003-06-14 12:18:35.000000000 -0700 +++ linux-2.5.72/net/wanrouter/af_wanpipe.c 2003-06-16 21:54:54.000000000 -0700 @@ -158,7 +158,7 @@ /* List of all wanpipe sockets. */ -struct sock* wanpipe_sklist; +HLIST_HEAD(wanpipe_sklist); static rwlock_t wanpipe_sklist_lock = RW_LOCK_UNLOCKED; atomic_t wanpipe_socks_nr; @@ -949,7 +949,6 @@ { wanpipe_opt *wp; struct sock *sk = sock->sk; - struct sock **skp; if (!sk) return 0; @@ -983,13 +982,8 @@ set_bit(1,&wanpipe_tx_critical); write_lock(&wanpipe_sklist_lock); - for (skp = &wanpipe_sklist; *skp; skp = &(*skp)->sk_next) { - if (*skp == sk) { - *skp = sk->sk_next; - __sock_put(sk); - break; - } - } + if (sk_del_node_init(sk)) + __sock_put(sk); write_unlock(&wanpipe_sklist_lock); clear_bit(1,&wanpipe_tx_critical); @@ -1149,13 +1143,8 @@ } write_lock(&wanpipe_sklist_lock); - for (skp = &wanpipe_sklist; *skp; skp = &(*skp)->sk_next) { - if (*skp == sk) { - *skp = sk->sk_next; - __sock_put(sk); - break; - } - } + if (sk_del_node_init(sk)) + __sock_put(sk); write_unlock(&wanpipe_sklist_lock); @@ -1217,13 +1206,8 @@ * appropriate locks */ write_lock(&wanpipe_sklist_lock); - for (skp = &wanpipe_sklist; *skp; skp = &(*skp)->sk_next) { - if (*skp == sk) { - *skp = sk->sk_next; - __sock_put(sk); - break; - } - } + if (sk_del_node_init(init)) + __sock_put(sk); write_unlock(&wanpipe_sklist_lock); sk->sk_socket = NULL; @@ -1551,8 +1535,7 @@ * can also change the list */ set_bit(1,&wanpipe_tx_critical); write_lock(&wanpipe_sklist_lock); - sk->sk_next = wanpipe_sklist; - wanpipe_sklist = sk; + sk_add_node(sk, &wanpipe_sklist); sock_hold(sk); write_unlock(&wanpipe_sklist_lock); clear_bit(1,&wanpipe_tx_critical); @@ -1730,12 +1713,13 @@ static int wanpipe_notifier(struct notifier_block *this, unsigned long msg, void *data) { struct sock *sk; + hlist_node *node; struct net_device *dev = (struct net_device *)data; - struct wanpipe_opt *po; - for (sk = wanpipe_sklist; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &wanpipe_sklist) { + struct wanpipe_opt *po = wp_sk(sk); - if ((po = wp_sk(sk)) == NULL) + if (!po) continue; if (dev == NULL) continue; @@ -1879,13 +1863,14 @@ static int wanpipe_debug (struct sock *origsk, void *arg) { - struct sock *sk=NULL; + struct sock *sk; + struct hlist_node *node; struct net_device *dev = NULL; wanpipe_common_t *chan=NULL; int cnt=0, err=0; wan_debug_t *dbg_data = (wan_debug_t *)arg; - for (sk = wanpipe_sklist; sk; sk = sk->sk_next) { + sk_for_each(sk, node, &wanpipe_sklist) { wanpipe_opt *wp = wp_sk(sk); if (sk == origsk){ @@ -2448,8 +2433,7 @@ set_bit(1,&wanpipe_tx_critical); write_lock(&wanpipe_sklist_lock); - newsk->sk_next = wanpipe_sklist; - wanpipe_sklist = newsk; + sk_add_node(newsk, &wanpipe_sklist); sock_hold(sk); write_unlock(&wanpipe_sklist_lock); clear_bit(1,&wanpipe_tx_critical); diff -urN linux-2.5.71-bk2/net/x25/af_x25.c linux-2.5.72/net/x25/af_x25.c --- linux-2.5.71-bk2/net/x25/af_x25.c 2003-06-14 12:18:34.000000000 -0700 +++ linux-2.5.72/net/x25/af_x25.c 2003-06-16 21:54:54.000000000 -0700 @@ -66,7 +66,7 @@ int sysctl_x25_clear_request_timeout = X25_DEFAULT_T23; int sysctl_x25_ack_holdback_timeout = X25_DEFAULT_T2; -struct sock *x25_list; +HLIST_HEAD(x25_list); rwlock_t x25_list_lock = RW_LOCK_UNLOCKED; static struct proto_ops x25_proto_ops; @@ -153,22 +153,9 @@ */ static void x25_remove_socket(struct sock *sk) { - struct sock *s; - write_lock_bh(&x25_list_lock); - - if ((s = x25_list) == sk) - x25_list = s->sk_next; - else while (s && s->sk_next) { - if (s->sk_next == sk) { - s->sk_next = sk->sk_next; - sock_put(sk); - break; - } - - s = s->sk_next; - } - + if (sk_del_node_init(sk)) + sock_put(sk); write_unlock_bh(&x25_list_lock); } @@ -178,10 +165,11 @@ static void x25_kill_by_device(struct net_device *dev) { struct sock *s; + struct hlist_node *node; write_lock_bh(&x25_list_lock); - for (s = x25_list; s; s = s->sk_next) + sk_for_each(s, node, &x25_list) if (x25_sk(s)->neighbour && x25_sk(s)->neighbour->dev == dev) x25_disconnect(s, ENETUNREACH, 0, 0); @@ -230,8 +218,7 @@ static void x25_insert_socket(struct sock *sk) { write_lock_bh(&x25_list_lock); - sk->sk_next = x25_list; - x25_list = sk; + sk_add_node(sk, &x25_list); sock_hold(sk); write_unlock_bh(&x25_list_lock); } @@ -243,19 +230,21 @@ static struct sock *x25_find_listener(struct x25_address *addr) { struct sock *s; + struct hlist_node *node; read_lock_bh(&x25_list_lock); - for (s = x25_list; s; s = s->sk_next) + sk_for_each(s, node, &x25_list) if ((!strcmp(addr->x25_addr, x25_sk(s)->source_addr.x25_addr) || !strcmp(addr->x25_addr, null_x25_address.x25_addr)) && - s->sk_state == TCP_LISTEN) - break; - - if (s) - sock_hold(s); + s->sk_state == TCP_LISTEN) { + sock_hold(s); + goto found; + } + s = NULL; +found: read_unlock_bh(&x25_list_lock); return s; } @@ -266,12 +255,15 @@ struct sock *__x25_find_socket(unsigned int lci, struct x25_neigh *nb) { struct sock *s; + struct hlist_node *node; - for (s = x25_list; s; s = s->sk_next) - if (x25_sk(s)->lci == lci && x25_sk(s)->neighbour == nb) - break; - if (s) - sock_hold(s); + sk_for_each(s, node, &x25_list) + if (x25_sk(s)->lci == lci && x25_sk(s)->neighbour == nb) { + sock_hold(s); + goto found; + } + s = NULL; +found: return s; } @@ -1359,10 +1351,11 @@ void x25_kill_by_neigh(struct x25_neigh *nb) { struct sock *s; + struct hlist_node *node; write_lock_bh(&x25_list_lock); - for (s = x25_list; s; s = s->sk_next) + sk_for_each(s, node, &x25_list) if (x25_sk(s)->neighbour == nb) x25_disconnect(s, ENETUNREACH, 0, 0); diff -urN linux-2.5.71-bk2/net/x25/x25_proc.c linux-2.5.72/net/x25/x25_proc.c --- linux-2.5.71-bk2/net/x25/x25_proc.c 2003-06-14 12:17:59.000000000 -0700 +++ linux-2.5.72/net/x25/x25_proc.c 2003-06-16 21:54:54.000000000 -0700 @@ -93,10 +93,13 @@ static __inline__ struct sock *x25_get_socket_idx(loff_t pos) { struct sock *s; + struct hlist_node *node; - for (s = x25_list; pos && s; s = s->sk_next) - --pos; - + sk_for_each(s, node, &x25_list) + if (!pos--) + goto found; + s = NULL; +found: return s; } @@ -114,13 +117,10 @@ ++*pos; if (v == (void *)1) { - s = NULL; - if (x25_list) - s = x25_list; + s = sk_head(&x25_list); goto out; } - s = v; - s = s->sk_next; + s = sk_next(v); out: return s; }