## Automatically generated incremental diff ## From: linux-2.4.22-bk15 ## To: linux-2.4.22-bk16 ## Robot: $Id: make-incremental-diff,v 1.11 2002/02/20 02:59:33 hpa Exp $ diff -urN linux-2.4.22-bk15/Documentation/Configure.help linux-2.4.22-bk16/Documentation/Configure.help --- linux-2.4.22-bk15/Documentation/Configure.help 2003-09-13 03:06:16.000000000 -0700 +++ linux-2.4.22-bk16/Documentation/Configure.help 2003-09-13 03:06:32.000000000 -0700 @@ -27081,12 +27081,31 @@ and restore instructions. It's useful for tracking down spinlock problems, but slow! If you're unsure, select N. -Early printk support (requires VGA!) +Early printk support CONFIG_IA64_EARLY_PRINTK - Selecting this option uses the VGA screen for printk() output before - the consoles are initialised. It is useful for debugging problems - early in the boot process, but only if you have a VGA screen - attached. If you're unsure, select N. + Selecting this option uses a UART or VGA screen (or both) for + printk() output before the consoles are initialised. It is useful + for debugging problems early in the boot process, but only if you + have a serial terminal or a VGA screen attached. If you're unsure, + select N. + +Early printk on serial port +CONFIG_IA64_EARLY_PRINTK_UART + Select this option to use a serial port for early printk() output. + You must also select either CONFIG_IA64_EARLY_PRINTK_UART_BASE or + CONFIG_SERIAL_HCDP. If you select CONFIG_SERIAL_HCDP, early + printk() output will appear on the first console device described by + the HCDP. If you set CONFIG_IA64_EARLY_PRINTK_UART_BASE, the HCDP + will be ignored. + +UART base address +CONFIG_IA64_EARLY_PRINTK_UART_BASE + The physical MMIO address of the UART to use for early printk(). + This overrides any UART located using the EFI HCDP table. + +Early printk on VGA +CONFIG_IA64_EARLY_PRINTK_VGA + Select this option to use VGA for early printk() output. Print possible IA64 hazards to console CONFIG_IA64_PRINT_HAZARDS diff -urN linux-2.4.22-bk15/Makefile linux-2.4.22-bk16/Makefile --- linux-2.4.22-bk15/Makefile 2003-09-13 03:06:16.000000000 -0700 +++ linux-2.4.22-bk16/Makefile 2003-09-13 03:06:32.000000000 -0700 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 22 -EXTRAVERSION = -bk15 +EXTRAVERSION = -bk16 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -urN linux-2.4.22-bk15/arch/i386/kernel/mpparse.c linux-2.4.22-bk16/arch/i386/kernel/mpparse.c --- linux-2.4.22-bk15/arch/i386/kernel/mpparse.c 2003-09-13 03:06:16.000000000 -0700 +++ linux-2.4.22-bk16/arch/i386/kernel/mpparse.c 2003-09-13 03:06:33.000000000 -0700 @@ -683,6 +683,24 @@ struct mpc_config_lintsrc lintsrc; int linttypes[2] = { mp_ExtINT, mp_NMI }; int i; + struct { + int mp_bus_id_to_type[MAX_MP_BUSSES]; + int mp_bus_id_to_node[MAX_MP_BUSSES]; + int mp_bus_id_to_local[MAX_MP_BUSSES]; + int mp_bus_id_to_pci_bus[MAX_MP_BUSSES]; + struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + } *bus_data; + + bus_data = alloc_bootmem(sizeof(*bus_data)); + if (!bus_data) + panic("SMP mptable: out of memory!\n"); + mp_bus_id_to_type = bus_data->mp_bus_id_to_type; + mp_bus_id_to_node = bus_data->mp_bus_id_to_node; + mp_bus_id_to_local = bus_data->mp_bus_id_to_local; + mp_bus_id_to_pci_bus = bus_data->mp_bus_id_to_pci_bus; + mp_irqs = bus_data->mp_irqs; + for (i = 0; i < MAX_MP_BUSSES; ++i) + mp_bus_id_to_pci_bus[i] = -1; /* * local APIC has default address diff -urN linux-2.4.22-bk15/arch/sparc/boot/btfixupprep.c linux-2.4.22-bk16/arch/sparc/boot/btfixupprep.c --- linux-2.4.22-bk15/arch/sparc/boot/btfixupprep.c 2001-08-28 07:09:44.000000000 -0700 +++ linux-2.4.22-bk16/arch/sparc/boot/btfixupprep.c 2003-09-13 03:06:35.000000000 -0700 @@ -167,6 +167,8 @@ } } else if (buffer[nbase+4] != '_') continue; + if (!strcmp (sect, ".text.exit")) + continue; if (strcmp (sect, ".text") && strcmp (sect, ".text.init") && strcmp (sect, ".fixup") && (strcmp (sect, "__ksymtab") || buffer[nbase+3] != 'f')) { if (buffer[nbase+3] == 'f') fprintf(stderr, "Wrong use of '%s' in '%s' section. It can be only used in .text, .text.init, .fixup and __ksymtab\n", buffer + shift, sect); diff -urN linux-2.4.22-bk15/arch/sparc/kernel/sys_sunos.c linux-2.4.22-bk16/arch/sparc/kernel/sys_sunos.c --- linux-2.4.22-bk15/arch/sparc/kernel/sys_sunos.c 2002-11-28 15:53:12.000000000 -0800 +++ linux-2.4.22-bk16/arch/sparc/kernel/sys_sunos.c 2003-09-13 03:06:35.000000000 -0700 @@ -193,7 +193,7 @@ * fool it, but this should catch most mistakes. */ freepages = atomic_read(&buffermem_pages) >> PAGE_SHIFT; - freepages += atomic_read(&page_cache_size); + freepages += page_cache_size; freepages >>= 1; freepages += nr_free_pages(); freepages += nr_swap_pages; diff -urN linux-2.4.22-bk15/arch/sparc64/kernel/sys_sunos32.c linux-2.4.22-bk16/arch/sparc64/kernel/sys_sunos32.c --- linux-2.4.22-bk15/arch/sparc64/kernel/sys_sunos32.c 2002-11-28 15:53:12.000000000 -0800 +++ linux-2.4.22-bk16/arch/sparc64/kernel/sys_sunos32.c 2003-09-13 03:06:35.000000000 -0700 @@ -157,7 +157,7 @@ * fool it, but this should catch most mistakes. */ freepages = atomic_read(&buffermem_pages) >> PAGE_SHIFT; - freepages += atomic_read(&page_cache_size); + freepages += page_cache_size; freepages >>= 1; freepages += nr_free_pages(); freepages += nr_swap_pages; diff -urN linux-2.4.22-bk15/arch/x86_64/kernel/irq.c linux-2.4.22-bk16/arch/x86_64/kernel/irq.c --- linux-2.4.22-bk15/arch/x86_64/kernel/irq.c 2002-11-28 15:53:12.000000000 -0800 +++ linux-2.4.22-bk16/arch/x86_64/kernel/irq.c 2003-09-13 03:06:36.000000000 -0700 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -154,57 +155,56 @@ * Generic, controller-independent functions: */ -int get_irq_list(char *buf) +int show_interrupts(struct seq_file *p, void *v) { int i, j; struct irqaction * action; - char *p = buf; - p += sprintf(p, " "); + seq_printf(p, " "); for (j=0; jtypename); - p += sprintf(p, " %s", action->name); + seq_printf(p, " %14s", irq_desc[i].handler->typename); + seq_printf(p, " %s", action->name); for (action=action->next; action; action = action->next) - p += sprintf(p, ", %s", action->name); - *p++ = '\n'; + seq_printf(p, ", %s", action->name); + seq_putc(p,'\n'); } - p += sprintf(p, "NMI: "); + seq_printf(p, "NMI: "); for (j = 0; j < smp_num_cpus; j++) - p += sprintf(p, "%10u ", + seq_printf(p, "%10u ", nmi_count(cpu_logical_map(j))); - p += sprintf(p, "\n"); + seq_printf(p, "\n"); #if CONFIG_X86_LOCAL_APIC - p += sprintf(p, "LOC: "); + seq_printf(p, "LOC: "); for (j = 0; j < smp_num_cpus; j++) - p += sprintf(p, "%10u ", + seq_printf(p, "%10u ", apic_timer_irqs[cpu_logical_map(j)]); - p += sprintf(p, "\n"); + seq_printf(p, "\n"); #endif - p += sprintf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); #ifdef CONFIG_X86_IO_APIC #ifdef APIC_MISMATCH_DEBUG - p += sprintf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); + seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); #endif #endif - return p - buf; -} + return 0; +} /* * Global interrupt locks for SMP. Allow interrupts to come in on any diff -urN linux-2.4.22-bk15/crypto/proc.c linux-2.4.22-bk16/crypto/proc.c --- linux-2.4.22-bk15/crypto/proc.c 2003-08-25 04:44:40.000000000 -0700 +++ linux-2.4.22-bk16/crypto/proc.c 2003-09-13 03:06:36.000000000 -0700 @@ -60,6 +60,7 @@ switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_CIPHER: + seq_printf(m, "type : cipher\n"); seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); seq_printf(m, "min keysize : %u\n", alg->cra_cipher.cia_min_keysize); @@ -70,10 +71,17 @@ break; case CRYPTO_ALG_TYPE_DIGEST: + seq_printf(m, "type : digest\n"); seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); seq_printf(m, "digestsize : %u\n", alg->cra_digest.dia_digestsize); break; + case CRYPTO_ALG_TYPE_COMPRESS: + seq_printf(m, "type : compression\n"); + break; + default: + seq_printf(m, "type : unknown\n"); + break; } seq_putc(m, '\n'); diff -urN linux-2.4.22-bk15/drivers/char/hcdp_serial.c linux-2.4.22-bk16/drivers/char/hcdp_serial.c --- linux-2.4.22-bk15/drivers/char/hcdp_serial.c 2002-11-28 15:53:12.000000000 -0800 +++ linux-2.4.22-bk16/drivers/char/hcdp_serial.c 2003-09-13 03:06:37.000000000 -0700 @@ -219,3 +219,41 @@ printk("Leaving setup_serial_hcdp()\n"); #endif } + +#ifdef CONFIG_IA64_EARLY_PRINTK_UART +unsigned long hcdp_early_uart(void) +{ + efi_system_table_t *systab; + efi_config_table_t *config_tables; + hcdp_t *hcdp = 0; + hcdp_dev_t *dev; + int i; + + systab = (efi_system_table_t *) ia64_boot_param->efi_systab; + if (!systab) + return 0; + systab = __va(systab); + + config_tables = (efi_config_table_t *) systab->tables; + if (!config_tables) + return 0; + config_tables = __va(config_tables); + + for (i = 0; i < systab->nr_tables; i++) { + if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { + hcdp = (hcdp_t *) config_tables[i].table; + break; + } + } + if (!hcdp) + return 0; + hcdp = __va(hcdp); + + for (i = 0, dev = hcdp->hcdp_dev; i < hcdp->num_entries; i++, dev++) { + if (dev->type == HCDP_DEV_CONSOLE) + return (u64) dev->base_addr.addrhi << 32 + | dev->base_addr.addrlo; + } + return 0; +} +#endif diff -urN linux-2.4.22-bk15/drivers/char/rocket.c linux-2.4.22-bk16/drivers/char/rocket.c --- linux-2.4.22-bk15/drivers/char/rocket.c 2003-09-13 03:06:20.000000000 -0700 +++ linux-2.4.22-bk16/drivers/char/rocket.c 2003-09-13 03:06:38.000000000 -0700 @@ -1052,7 +1052,7 @@ restore_flags(flags); return; } - if ((atomic_read(&tty->count) == 1) && (info->count != 1)) { + if ((tty->count == 1) && (info->count != 1)) { /* * Uh, oh. tty->count is 1, which means that the tty * structure will be freed. Info->count should always diff -urN linux-2.4.22-bk15/drivers/scsi/scsi_scan.c linux-2.4.22-bk16/drivers/scsi/scsi_scan.c --- linux-2.4.22-bk15/drivers/scsi/scsi_scan.c 2003-09-13 03:06:27.000000000 -0700 +++ linux-2.4.22-bk16/drivers/scsi/scsi_scan.c 2003-09-13 03:06:45.000000000 -0700 @@ -205,6 +205,7 @@ {"HP", "C7200", "*", BLIST_SPARSELUN}, /* Medium Changer */ {"SMSC", "USB 2 HS", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, {"XYRATEX", "RS", "*", BLIST_SPARSELUN | BLIST_LARGELUN}, + {"NEC", "iStorage", "*", BLIST_SPARSELUN | BLIST_LARGELUN | BLIST_FORCELUN}, /* * Must be at end of list... diff -urN linux-2.4.22-bk15/fs/buffer.c linux-2.4.22-bk16/fs/buffer.c --- linux-2.4.22-bk15/fs/buffer.c 2003-09-13 03:06:30.000000000 -0700 +++ linux-2.4.22-bk16/fs/buffer.c 2003-09-13 03:06:46.000000000 -0700 @@ -2752,10 +2752,10 @@ #endif printk("Buffer memory: %6dkB\n", - atomic_read(&buffermem_pages) << (PAGE_SHIFT-10)); + atomic_read(&buffermem_pages) << (PAGE_SHIFT-10)); - printk("Cache memory: %6dkB\n", - (atomic_read(&page_cache_size)- atomic_read(&buffermem_pages)) << (PAGE_SHIFT-10)); + printk("Cache memory: %6ldkB\n", + (page_cache_size - atomic_read(&buffermem_pages)) << (PAGE_SHIFT-10)); #ifdef CONFIG_SMP /* trylock does nothing on UP and so we could deadlock */ if (!spin_trylock(&lru_list_lock)) diff -urN linux-2.4.22-bk15/fs/proc/proc_misc.c linux-2.4.22-bk16/fs/proc/proc_misc.c --- linux-2.4.22-bk15/fs/proc/proc_misc.c 2003-09-13 03:06:30.000000000 -0700 +++ linux-2.4.22-bk16/fs/proc/proc_misc.c 2003-09-13 03:06:46.000000000 -0700 @@ -166,7 +166,7 @@ #define B(x) ((unsigned long long)(x) << PAGE_SHIFT) si_meminfo(&i); si_swapinfo(&i); - pg_size = atomic_read(&page_cache_size) - i.bufferram ; + pg_size = page_cache_size - i.bufferram; len = sprintf(page, " total: used: free: shared: buffers: cached:\n" "Mem: %8Lu %8Lu %8Lu %8Lu %8Lu %8Lu\n" diff -urN linux-2.4.22-bk15/include/asm-x86_64/msr.h linux-2.4.22-bk16/include/asm-x86_64/msr.h --- linux-2.4.22-bk15/include/asm-x86_64/msr.h 2002-11-28 15:53:15.000000000 -0800 +++ linux-2.4.22-bk16/include/asm-x86_64/msr.h 2003-09-13 03:06:46.000000000 -0700 @@ -173,6 +173,9 @@ /* VIA Cyrix defined MSRs*/ #define MSR_VIA_FCR 0x1107 +#define MSR_VIA_LONGHAUL 0x110a +#define MSR_VIA_RNG 0x110b +#define MSR_VIA_BCR2 0x1147 /* Intel defined MSRs. */ #define MSR_IA32_P5_MC_ADDR 0 diff -urN linux-2.4.22-bk15/include/linux/mm.h linux-2.4.22-bk16/include/linux/mm.h --- linux-2.4.22-bk15/include/linux/mm.h 2003-08-25 04:44:44.000000000 -0700 +++ linux-2.4.22-bk16/include/linux/mm.h 2003-09-13 03:06:46.000000000 -0700 @@ -535,8 +535,8 @@ return page_count(page) - !!page->buffers == 1; } -extern int can_share_swap_page(struct page *); -extern int remove_exclusive_swap_page(struct page *); +extern int FASTCALL(can_share_swap_page(struct page *)); +extern int FASTCALL(remove_exclusive_swap_page(struct page *)); extern void __free_pte(pte_t); diff -urN linux-2.4.22-bk15/include/linux/mmzone.h linux-2.4.22-bk16/include/linux/mmzone.h --- linux-2.4.22-bk15/include/linux/mmzone.h 2002-11-28 15:53:15.000000000 -0800 +++ linux-2.4.22-bk16/include/linux/mmzone.h 2003-09-13 03:06:46.000000000 -0700 @@ -19,6 +19,11 @@ #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER #endif +#define ZONE_DMA 0 +#define ZONE_NORMAL 1 +#define ZONE_HIGHMEM 2 +#define MAX_NR_ZONES 3 + typedef struct free_area_struct { struct list_head free_list; unsigned long *map; @@ -26,6 +31,11 @@ struct pglist_data; +typedef struct zone_watermarks_s { + unsigned long min, low, high; +} zone_watermarks_t; + + /* * On machines where it is needed (eg PCs) we divide physical memory * into multiple physical zones. On a PC we have 3 zones: @@ -40,8 +50,27 @@ */ spinlock_t lock; unsigned long free_pages; - unsigned long pages_min, pages_low, pages_high; - int need_balance; + /* + * We don't know if the memory that we're going to allocate will be freeable + * or/and it will be released eventually, so to avoid totally wasting several + * GB of ram we must reserve some of the lower zone memory (otherwise we risk + * to run OOM on the lower zones despite there's tons of freeable ram + * on the higher zones). + */ + zone_watermarks_t watermarks[MAX_NR_ZONES]; + + /* + * The below fields are protected by different locks (or by + * no lock at all like need_balance), so they're longs to + * provide an atomic granularity against each other on + * all architectures. + */ + unsigned long need_balance; + /* protected by the pagemap_lru_lock */ + unsigned long nr_active_pages, nr_inactive_pages; + /* protected by the pagecache_lock */ + unsigned long nr_cache_pages; + /* * free areas of different sizes @@ -90,13 +119,9 @@ */ char *name; unsigned long size; + unsigned long realsize; } zone_t; -#define ZONE_DMA 0 -#define ZONE_NORMAL 1 -#define ZONE_HIGHMEM 2 -#define MAX_NR_ZONES 3 - /* * One allocation request operates on a zonelist. A zonelist * is a list of zones, the first one is the 'goal' of the @@ -143,8 +168,8 @@ extern int numnodes; extern pg_data_t *pgdat_list; -#define memclass(pgzone, classzone) (((pgzone)->zone_pgdat == (classzone)->zone_pgdat) \ - && ((pgzone) <= (classzone))) +#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones) +#define memclass(pgzone, classzone) (zone_idx(pgzone) <= zone_idx(classzone)) /* * The following two are not meant for general usage. They are here as diff -urN linux-2.4.22-bk15/include/linux/pagemap.h linux-2.4.22-bk16/include/linux/pagemap.h --- linux-2.4.22-bk15/include/linux/pagemap.h 2003-08-25 04:44:44.000000000 -0700 +++ linux-2.4.22-bk16/include/linux/pagemap.h 2003-09-13 03:06:46.000000000 -0700 @@ -45,7 +45,7 @@ #define PAGE_HASH_BITS (page_hash_bits) #define PAGE_HASH_SIZE (1 << PAGE_HASH_BITS) -extern atomic_t page_cache_size; /* # of pages currently in the hash table */ +extern unsigned long page_cache_size; /* # of pages currently in the hash table */ extern struct page **page_hash_table; extern void page_cache_init(unsigned long); diff -urN linux-2.4.22-bk15/include/linux/sched.h linux-2.4.22-bk16/include/linux/sched.h --- linux-2.4.22-bk15/include/linux/sched.h 2003-09-13 03:06:30.000000000 -0700 +++ linux-2.4.22-bk16/include/linux/sched.h 2003-09-13 03:06:46.000000000 -0700 @@ -429,7 +429,6 @@ #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ #define PF_MEMALLOC 0x00000800 /* Allocating memory */ -#define PF_MEMDIE 0x00001000 /* Killed for out-of-memory */ #define PF_FREE_PAGES 0x00002000 /* per process page freeing */ #define PF_NOIO 0x00004000 /* avoid generating further I/O */ diff -urN linux-2.4.22-bk15/include/linux/swap.h linux-2.4.22-bk16/include/linux/swap.h --- linux-2.4.22-bk15/include/linux/swap.h 2003-08-25 04:44:44.000000000 -0700 +++ linux-2.4.22-bk16/include/linux/swap.h 2003-09-13 03:06:46.000000000 -0700 @@ -87,7 +87,7 @@ extern unsigned int nr_free_buffer_pages(void); extern int nr_active_pages; extern int nr_inactive_pages; -extern atomic_t page_cache_size; +extern unsigned long page_cache_size; extern atomic_t buffermem_pages; extern spinlock_cacheline_t pagecache_lock_cacheline; @@ -115,6 +115,7 @@ extern wait_queue_head_t kswapd_wait; extern int FASTCALL(try_to_free_pages_zone(zone_t *, unsigned int)); extern int FASTCALL(try_to_free_pages(unsigned int)); +extern int vm_vfs_scan_ratio, vm_cache_scan_ratio, vm_lru_balance_ratio, vm_passes, vm_gfp_debug, vm_mapped_ratio; /* linux/mm/page_io.c */ extern void rw_swap_page(int, struct page *); @@ -175,34 +176,46 @@ BUG(); \ } while (0) +extern void delta_nr_active_pages(struct page *page, long delta); +#define inc_nr_active_pages(page) delta_nr_active_pages(page, 1) +#define dec_nr_active_pages(page) delta_nr_active_pages(page, -1) + +extern void delta_nr_inactive_pages(struct page *page, long delta); +#define inc_nr_inactive_pages(page) delta_nr_inactive_pages(page, 1) +#define dec_nr_inactive_pages(page) delta_nr_inactive_pages(page, -1) + #define add_page_to_active_list(page) \ do { \ DEBUG_LRU_PAGE(page); \ SetPageActive(page); \ list_add(&(page)->lru, &active_list); \ - nr_active_pages++; \ + inc_nr_active_pages(page); \ } while (0) #define add_page_to_inactive_list(page) \ do { \ DEBUG_LRU_PAGE(page); \ list_add(&(page)->lru, &inactive_list); \ - nr_inactive_pages++; \ + inc_nr_inactive_pages(page); \ } while (0) #define del_page_from_active_list(page) \ do { \ list_del(&(page)->lru); \ ClearPageActive(page); \ - nr_active_pages--; \ + dec_nr_active_pages(page); \ } while (0) #define del_page_from_inactive_list(page) \ do { \ list_del(&(page)->lru); \ - nr_inactive_pages--; \ + dec_nr_inactive_pages(page); \ } while (0) +extern void delta_nr_cache_pages(struct page *page, long delta); +#define inc_nr_cache_pages(page) delta_nr_cache_pages(page, 1) +#define dec_nr_cache_pages(page) delta_nr_cache_pages(page, -1) + extern spinlock_t swaplock; #define swap_list_lock() spin_lock(&swaplock) diff -urN linux-2.4.22-bk15/include/linux/sysctl.h linux-2.4.22-bk16/include/linux/sysctl.h --- linux-2.4.22-bk15/include/linux/sysctl.h 2003-09-13 03:06:30.000000000 -0700 +++ linux-2.4.22-bk16/include/linux/sysctl.h 2003-09-13 03:06:46.000000000 -0700 @@ -147,7 +147,13 @@ VM_MAX_MAP_COUNT=11, /* int: Maximum number of active map areas */ VM_MIN_READAHEAD=12, /* Min file readahead */ VM_MAX_READAHEAD=13, /* Max file readahead */ + VM_VFS_SCAN_RATIO=14, /* part of the inactive vfs lists to scan */ + VM_LRU_BALANCE_RATIO=15,/* balance active and inactive caches */ + VM_PASSES=16, /* number of vm passes before failing */ VM_PAGEBUF=17, /* struct: Control pagebuf parameters */ + VM_GFP_DEBUG=18, /* debug GFP failures */ + VM_CACHE_SCAN_RATIO=19, /* part of the inactive cache list to scan */ + VM_MAPPED_RATIO=20, /* amount of unfreeable pages that triggers swapout */ }; diff -urN linux-2.4.22-bk15/kernel/sysctl.c linux-2.4.22-bk16/kernel/sysctl.c --- linux-2.4.22-bk15/kernel/sysctl.c 2003-09-13 03:06:30.000000000 -0700 +++ linux-2.4.22-bk16/kernel/sysctl.c 2003-09-13 03:06:46.000000000 -0700 @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -276,6 +277,18 @@ }; static ctl_table vm_table[] = { + {VM_GFP_DEBUG, "vm_gfp_debug", + &vm_gfp_debug, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_VFS_SCAN_RATIO, "vm_vfs_scan_ratio", + &vm_vfs_scan_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_CACHE_SCAN_RATIO, "vm_cache_scan_ratio", + &vm_cache_scan_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_MAPPED_RATIO, "vm_mapped_ratio", + &vm_mapped_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_LRU_BALANCE_RATIO, "vm_lru_balance_ratio", + &vm_lru_balance_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_PASSES, "vm_passes", + &vm_passes, sizeof(int), 0644, NULL, &proc_dointvec}, {VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0644, NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL, &bdflush_min, &bdflush_max}, diff -urN linux-2.4.22-bk15/mm/filemap.c linux-2.4.22-bk16/mm/filemap.c --- linux-2.4.22-bk15/mm/filemap.c 2003-09-13 03:06:30.000000000 -0700 +++ linux-2.4.22-bk16/mm/filemap.c 2003-09-13 03:06:46.000000000 -0700 @@ -42,7 +42,7 @@ * SMP-threaded pagemap-LRU 1999, Andrea Arcangeli */ -atomic_t page_cache_size = ATOMIC_INIT(0); +unsigned long page_cache_size; unsigned int page_hash_bits; struct page **page_hash_table; @@ -79,7 +79,7 @@ next->pprev_hash = &page->next_hash; if (page->buffers) PAGE_BUG(page); - atomic_inc(&page_cache_size); + inc_nr_cache_pages(page); } static inline void add_page_to_inode_queue(struct address_space *mapping, struct page * page) @@ -113,7 +113,7 @@ next->pprev_hash = pprev; *pprev = next; page->pprev_hash = NULL; - atomic_dec(&page_cache_size); + dec_nr_cache_pages(page); } /* diff -urN linux-2.4.22-bk15/mm/mmap.c linux-2.4.22-bk16/mm/mmap.c --- linux-2.4.22-bk15/mm/mmap.c 2003-06-13 07:51:39.000000000 -0700 +++ linux-2.4.22-bk16/mm/mmap.c 2003-09-13 03:06:46.000000000 -0700 @@ -69,7 +69,7 @@ return 1; /* The page cache contains buffer pages these days.. */ - free = atomic_read(&page_cache_size); + free = page_cache_size; free += nr_free_pages(); free += nr_swap_pages; diff -urN linux-2.4.22-bk15/mm/oom_kill.c linux-2.4.22-bk16/mm/oom_kill.c --- linux-2.4.22-bk15/mm/oom_kill.c 2003-08-25 04:44:44.000000000 -0700 +++ linux-2.4.22-bk16/mm/oom_kill.c 2003-09-13 03:06:46.000000000 -0700 @@ -21,6 +21,8 @@ #include #include +#if 0 /* Nothing in this file is used */ + /* #define DEBUG */ /** @@ -151,7 +153,6 @@ * exit() and clear out its resources quickly... */ p->counter = 5 * HZ; - p->flags |= PF_MEMALLOC | PF_MEMDIE; /* This process has hardware access, be more careful. */ if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) { @@ -256,3 +257,5 @@ first = now; count = 0; } + +#endif /* Unused file */ diff -urN linux-2.4.22-bk15/mm/page_alloc.c linux-2.4.22-bk16/mm/page_alloc.c --- linux-2.4.22-bk15/mm/page_alloc.c 2002-11-28 15:53:15.000000000 -0800 +++ linux-2.4.22-bk16/mm/page_alloc.c 2003-09-13 03:06:46.000000000 -0700 @@ -42,6 +42,9 @@ static int zone_balance_ratio[MAX_NR_ZONES] __initdata = { 128, 128, 128, }; static int zone_balance_min[MAX_NR_ZONES] __initdata = { 20 , 20, 20, }; static int zone_balance_max[MAX_NR_ZONES] __initdata = { 255 , 255, 255, }; +static int lower_zone_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 }; + +int vm_gfp_debug = 0; /* * Temporary debugging check. @@ -253,7 +256,7 @@ static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed) { struct page * page = NULL; - int __freed = 0; + int __freed; if (!(gfp_mask & __GFP_WAIT)) goto out; @@ -321,28 +324,31 @@ return page; } +static inline unsigned long zone_free_pages(zone_t * zone, unsigned int order) +{ + long free = zone->free_pages - (1UL << order); + return free >= 0 ? free : 0; +} + /* * This is the 'heart' of the zoned buddy allocator: */ struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist) { - unsigned long min; zone_t **zone, * classzone; struct page * page; - int freed; + int freed, class_idx; zone = zonelist->zones; classzone = *zone; - if (classzone == NULL) - return NULL; - min = 1UL << order; + class_idx = zone_idx(classzone); + for (;;) { zone_t *z = *(zone++); if (!z) break; - min += z->pages_low; - if (z->free_pages > min) { + if (zone_free_pages(z, order) > z->watermarks[class_idx].low) { page = rmqueue(z, order); if (page) return page; @@ -355,18 +361,16 @@ wake_up_interruptible(&kswapd_wait); zone = zonelist->zones; - min = 1UL << order; for (;;) { - unsigned long local_min; + unsigned long min; zone_t *z = *(zone++); if (!z) break; - local_min = z->pages_min; + min = z->watermarks[class_idx].min; if (!(gfp_mask & __GFP_WAIT)) - local_min >>= 2; - min += local_min; - if (z->free_pages > min) { + min >>= 2; + if (zone_free_pages(z, order) > min) { page = rmqueue(z, order); if (page) return page; @@ -375,8 +379,7 @@ /* here we're in the low on memory slow path */ -rebalance: - if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) { + if (current->flags & PF_MEMALLOC && !in_interrupt()) { zone = zonelist->zones; for (;;) { zone_t *z = *(zone++); @@ -392,34 +395,51 @@ /* Atomic allocations - we can't balance anything */ if (!(gfp_mask & __GFP_WAIT)) - return NULL; + goto out; + rebalance: page = balance_classzone(classzone, gfp_mask, order, &freed); if (page) return page; zone = zonelist->zones; - min = 1UL << order; - for (;;) { - zone_t *z = *(zone++); - if (!z) - break; + if (likely(freed)) { + for (;;) { + zone_t *z = *(zone++); + if (!z) + break; - min += z->pages_min; - if (z->free_pages > min) { - page = rmqueue(z, order); - if (page) - return page; + if (zone_free_pages(z, order) > z->watermarks[class_idx].min) { + page = rmqueue(z, order); + if (page) + return page; + } } - } + goto rebalance; + } else { + /* + * Check that no other task is been killed meanwhile, + * in such a case we can succeed the allocation. + */ + for (;;) { + zone_t *z = *(zone++); + if (!z) + break; - /* Don't let big-order allocations loop */ - if (order > 3) - return NULL; + if (zone_free_pages(z, order) > z->watermarks[class_idx].high) { + page = rmqueue(z, order); + if (page) + return page; + } + } + } - /* Yield for kswapd, and try again */ - yield(); - goto rebalance; + out: + printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i)\n", + order, gfp_mask, !!(current->flags & PF_MEMALLOC)); + if (unlikely(vm_gfp_debug)) + dump_stack(); + return NULL; } /* @@ -481,18 +501,25 @@ { pg_data_t *pgdat; unsigned int sum = 0; + zonelist_t *zonelist; + zone_t **zonep, *zone; for_each_pgdat(pgdat) { - zonelist_t *zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK); - zone_t **zonep = zonelist->zones; - zone_t *zone; - - for (zone = *zonep++; zone; zone = *zonep++) { - unsigned long size = zone->size; - unsigned long high = zone->pages_high; - if (size > high) - sum += size - high; - } + int class_idx; + zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK); + zonep = zonelist->zones; + zone = *zonep; + class_idx = zone_idx(zone); + + sum += zone->nr_cache_pages; + do { + unsigned int free = zone->free_pages - zone->watermarks[class_idx].high; + zonep++; + zone = *zonep; + if (free <= 0) + continue; + sum += free; + } while (zone); } return sum; @@ -532,13 +559,9 @@ zone_t *zone; for (zone = tmpdat->node_zones; zone < tmpdat->node_zones + MAX_NR_ZONES; zone++) - printk("Zone:%s freepages:%6lukB min:%6lukB low:%6lukB " - "high:%6lukB\n", + printk("Zone:%s freepages:%6lukB\n", zone->name, - K(zone->free_pages), - K(zone->pages_min), - K(zone->pages_low), - K(zone->pages_high)); + K(zone->free_pages)); tmpdat = tmpdat->node_next; } @@ -729,6 +752,7 @@ zone_t *zone = pgdat->node_zones + j; unsigned long mask; unsigned long size, realsize; + int idx; zone_table[nid * MAX_NR_ZONES + j] = zone; realsize = size = zones_size[j]; @@ -737,11 +761,15 @@ printk("zone(%lu): %lu pages.\n", j, size); zone->size = size; + zone->realsize = realsize; zone->name = zone_names[j]; zone->lock = SPIN_LOCK_UNLOCKED; zone->zone_pgdat = pgdat; zone->free_pages = 0; zone->need_balance = 0; + zone->nr_active_pages = zone->nr_inactive_pages = 0; + + if (!size) continue; @@ -766,9 +794,29 @@ mask = zone_balance_min[j]; else if (mask > zone_balance_max[j]) mask = zone_balance_max[j]; - zone->pages_min = mask; - zone->pages_low = mask*2; - zone->pages_high = mask*3; + zone->watermarks[j].min = mask; + zone->watermarks[j].low = mask*2; + zone->watermarks[j].high = mask*3; + /* now set the watermarks of the lower zones in the "j" classzone */ + for (idx = j-1; idx >= 0; idx--) { + zone_t * lower_zone = pgdat->node_zones + idx; + unsigned long lower_zone_reserve; + if (!lower_zone->size) + continue; + + mask = lower_zone->watermarks[idx].min; + lower_zone->watermarks[j].min = mask; + lower_zone->watermarks[j].low = mask*2; + lower_zone->watermarks[j].high = mask*3; + + /* now the brainer part */ + lower_zone_reserve = realsize / lower_zone_reserve_ratio[idx]; + lower_zone->watermarks[j].min += lower_zone_reserve; + lower_zone->watermarks[j].low += lower_zone_reserve; + lower_zone->watermarks[j].high += lower_zone_reserve; + + realsize += lower_zone->realsize; + } zone->zone_mem_map = mem_map + offset; zone->zone_start_mapnr = offset; @@ -852,3 +900,16 @@ } __setup("memfrac=", setup_mem_frac); + +static int __init setup_lower_zone_reserve(char *str) +{ + int j = 0; + + while (get_option(&str, &lower_zone_reserve_ratio[j++]) == 2); + printk("setup_lower_zone_reserve: "); + for (j = 0; j < MAX_NR_ZONES-1; j++) printk("%d ", lower_zone_reserve_ratio[j]); + printk("\n"); + return 1; +} + +__setup("lower_zone_reserve=", setup_lower_zone_reserve); diff -urN linux-2.4.22-bk15/mm/swap.c linux-2.4.22-bk16/mm/swap.c --- linux-2.4.22-bk15/mm/swap.c 2002-11-28 15:53:15.000000000 -0800 +++ linux-2.4.22-bk16/mm/swap.c 2003-09-13 03:06:46.000000000 -0700 @@ -94,6 +94,78 @@ spin_unlock(&pagemap_lru_lock); } +/** + * delta_nr_active_pages: alter the number of active pages. + * + * @page: the page which is being activated/deactivated + * @delta: +1 for activation, -1 for deactivation + * + * Called under pagecache_lock + */ +void delta_nr_active_pages(struct page *page, long delta) +{ + pg_data_t *pgdat; + zone_t *classzone, *overflow; + + classzone = page_zone(page); + pgdat = classzone->zone_pgdat; + overflow = pgdat->node_zones + pgdat->nr_zones; + + while (classzone < overflow) { + classzone->nr_active_pages += delta; + classzone++; + } + nr_active_pages += delta; +} + +/** + * delta_nr_inactive_pages: alter the number of inactive pages. + * + * @page: the page which is being deactivated/activated + * @delta: +1 for deactivation, -1 for activation + * + * Called under pagecache_lock + */ +void delta_nr_inactive_pages(struct page *page, long delta) +{ + pg_data_t *pgdat; + zone_t *classzone, *overflow; + + classzone = page_zone(page); + pgdat = classzone->zone_pgdat; + overflow = pgdat->node_zones + pgdat->nr_zones; + + while (classzone < overflow) { + classzone->nr_inactive_pages += delta; + classzone++; + } + nr_inactive_pages += delta; +} + +/** + * delta_nr_cache_pages: alter the number of pages in the pagecache + * + * @page: the page which is being added/removed + * @delta: +1 for addition, -1 for removal + * + * Called under pagecache_lock + */ +void delta_nr_cache_pages(struct page *page, long delta) +{ + pg_data_t *pgdat; + zone_t *classzone, *overflow; + + classzone = page_zone(page); + pgdat = classzone->zone_pgdat; + overflow = pgdat->node_zones + pgdat->nr_zones; + + while (classzone < overflow) { + classzone->nr_cache_pages += delta; + classzone++; + } + page_cache_size += delta; +} + /* * Perform any setup for the swap system */ diff -urN linux-2.4.22-bk15/mm/vmscan.c linux-2.4.22-bk16/mm/vmscan.c --- linux-2.4.22-bk15/mm/vmscan.c 2002-11-28 15:53:15.000000000 -0800 +++ linux-2.4.22-bk16/mm/vmscan.c 2003-09-13 03:06:46.000000000 -0700 @@ -27,12 +27,42 @@ #include /* - * The "priority" of VM scanning is how much of the queues we - * will scan in one go. A value of 6 for DEF_PRIORITY implies - * that we'll scan 1/64th of the queues ("queue_length >> 6") - * during a normal aging round. + * "vm_passes" is the number of vm passes before failing the + * memory balancing. Take into account 3 passes are needed + * for a flush/wait/free cycle and that we only scan 1/vm_cache_scan_ratio + * of the inactive list at each pass. */ -#define DEF_PRIORITY (6) +int vm_passes = 60; + +/* + * "vm_cache_scan_ratio" is how much of the inactive LRU queue we will scan + * in one go. A value of 6 for vm_cache_scan_ratio implies that we'll + * scan 1/6 of the inactive lists during a normal aging round. + */ +int vm_cache_scan_ratio = 6; + +/* + * "vm_mapped_ratio" controls the pageout rate, the smaller, the earlier + * we'll start to pageout. + */ +int vm_mapped_ratio = 100; + +/* + * "vm_lru_balance_ratio" controls the balance between active and + * inactive cache. The bigger vm_balance is, the easier the + * active cache will grow, because we'll rotate the active list + * slowly. A value of 2 means we'll go towards a balance of + * 1/3 of the cache being inactive. + */ +int vm_lru_balance_ratio = 2; + +/* + * "vm_vfs_scan_ratio" is what proportion of the VFS queues we will scan + * in one go. A value of 6 for vm_vfs_scan_ratio implies that 1/6th of + * the unused-inode, dentry and dquot caches will be freed during a normal + * aging round. + */ +int vm_vfs_scan_ratio = 6; /* * The swap-out function returns 1 if it successfully @@ -292,13 +322,13 @@ return count; } -static int FASTCALL(swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone)); -static int swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone) +static int FASTCALL(swap_out(zone_t * classzone)); +static int swap_out(zone_t * classzone) { int counter, nr_pages = SWAP_CLUSTER_MAX; struct mm_struct *mm; - counter = mmlist_nr; + counter = mmlist_nr << 1; do { if (unlikely(current->need_resched)) { __set_current_state(TASK_RUNNING); @@ -334,15 +364,15 @@ return 0; } -static int FASTCALL(shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority)); -static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority) +static void FASTCALL(refill_inactive(int nr_pages, zone_t * classzone)); +static int FASTCALL(shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout)); +static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout) { struct list_head * entry; - int max_scan = nr_inactive_pages / priority; - int max_mapped = min((nr_pages << (10 - priority)), max_scan / 10); + int max_scan = (classzone->nr_inactive_pages + classzone->nr_active_pages) / vm_cache_scan_ratio; + int max_mapped = vm_mapped_ratio * nr_pages; - spin_lock(&pagemap_lru_lock); - while (--max_scan >= 0 && (entry = inactive_list.prev) != &inactive_list) { + while (max_scan && classzone->nr_inactive_pages && (entry = inactive_list.prev) != &inactive_list) { struct page * page; if (unlikely(current->need_resched)) { @@ -468,34 +498,49 @@ spin_lock(&pagecache_lock); /* - * this is the non-racy check for busy page. + * This is the non-racy check for busy page. + * It is critical to check PageDirty _after_ we made sure + * the page is freeable so not in use by anybody. + * At this point we're guaranteed that page->buffers is NULL, + * nobody can refill page->buffers under us because we still + * hold the page lock. */ - if (!page->mapping || !is_page_cache_freeable(page)) { + if (!page->mapping || page_count(page) > 1) { spin_unlock(&pagecache_lock); UnlockPage(page); page_mapped: - if (--max_mapped >= 0) - continue; + if (--max_mapped < 0) { + spin_unlock(&pagemap_lru_lock); - /* - * Alert! We've found too many mapped pages on the - * inactive list, so we start swapping out now! - */ - spin_unlock(&pagemap_lru_lock); - swap_out(priority, gfp_mask, classzone); - return nr_pages; - } + nr_pages -= kmem_cache_reap(gfp_mask); + if (nr_pages <= 0) + goto out; - /* - * It is critical to check PageDirty _after_ we made sure - * the page is freeable* so not in use by anybody. - */ + shrink_dcache_memory(vm_vfs_scan_ratio, gfp_mask); + shrink_icache_memory(vm_vfs_scan_ratio, gfp_mask); +#ifdef CONFIG_QUOTA + shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask); +#endif + + if (!*failed_swapout) + *failed_swapout = !swap_out(classzone); + + max_mapped = nr_pages * vm_mapped_ratio; + + spin_lock(&pagemap_lru_lock); + refill_inactive(nr_pages, classzone); + } + continue; + + } if (PageDirty(page)) { spin_unlock(&pagecache_lock); UnlockPage(page); continue; } + __lru_cache_del(page); + /* point of no return */ if (likely(!PageSwapCache(page))) { __remove_inode_page(page); @@ -508,7 +553,6 @@ swap_free(swap); } - __lru_cache_del(page); UnlockPage(page); /* effectively free the page here */ @@ -520,6 +564,7 @@ } spin_unlock(&pagemap_lru_lock); + out: return nr_pages; } @@ -530,13 +575,15 @@ * We move them the other way when we see the * reference bit on the page. */ -static void refill_inactive(int nr_pages) +static void refill_inactive(int nr_pages, zone_t * classzone) { struct list_head * entry; + unsigned long ratio; + + ratio = (unsigned long) nr_pages * classzone->nr_active_pages / (((unsigned long) classzone->nr_inactive_pages * vm_lru_balance_ratio) + 1); - spin_lock(&pagemap_lru_lock); entry = active_list.prev; - while (nr_pages && entry != &active_list) { + while (ratio && entry != &active_list) { struct page * page; page = list_entry(entry, struct page, lru); @@ -553,54 +600,62 @@ add_page_to_inactive_list(page); SetPageReferenced(page); } - spin_unlock(&pagemap_lru_lock); + + if (entry != &active_list) { + list_del(&active_list); + list_add(&active_list, entry); + } } -static int FASTCALL(shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages)); -static int shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages) +static int FASTCALL(shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout)); +static int shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout) { - int chunk_size = nr_pages; - unsigned long ratio; - nr_pages -= kmem_cache_reap(gfp_mask); if (nr_pages <= 0) - return 0; + goto out; - nr_pages = chunk_size; - /* try to keep the active list 2/3 of the size of the cache */ - ratio = (unsigned long) nr_pages * nr_active_pages / ((nr_inactive_pages + 1) * 2); - refill_inactive(ratio); - - nr_pages = shrink_cache(nr_pages, classzone, gfp_mask, priority); - if (nr_pages <= 0) - return 0; + spin_lock(&pagemap_lru_lock); + refill_inactive(nr_pages, classzone); - shrink_dcache_memory(priority, gfp_mask); - shrink_icache_memory(priority, gfp_mask); -#ifdef CONFIG_QUOTA - shrink_dqcache_memory(DEF_PRIORITY, gfp_mask); -#endif + nr_pages = shrink_cache(nr_pages, classzone, gfp_mask, failed_swapout); - return nr_pages; +out: + return nr_pages; } +static int check_classzone_need_balance(zone_t * classzone); + int try_to_free_pages_zone(zone_t *classzone, unsigned int gfp_mask) { - int priority = DEF_PRIORITY; - int nr_pages = SWAP_CLUSTER_MAX; - gfp_mask = pf_gfp_mask(gfp_mask); - do { - nr_pages = shrink_caches(classzone, priority, gfp_mask, nr_pages); - if (nr_pages <= 0) - return 1; - } while (--priority); - /* - * Hmm.. Cache shrink failed - time to kill something? - * Mhwahahhaha! This is the part I really like. Giggle. - */ - out_of_memory(); + for (;;) { + int tries = vm_passes; + int failed_swapout = !(gfp_mask & __GFP_IO); + int nr_pages = SWAP_CLUSTER_MAX; + + do { + nr_pages = shrink_caches(classzone, gfp_mask, nr_pages, &failed_swapout); + if (nr_pages <= 0) + return 1; + shrink_dcache_memory(vm_vfs_scan_ratio, gfp_mask); + shrink_icache_memory(vm_vfs_scan_ratio, gfp_mask); +#ifdef CONFIG_QUOTA + shrink_dqcache_memory(vm_vfs_scan_ratio, gfp_mask); +#endif + if (!failed_swapout) + failed_swapout = !swap_out(classzone); + } while (--tries); + + if (likely(current->pid != 1)) + break; + if (!check_classzone_need_balance(classzone)) + break; + + __set_current_state(TASK_RUNNING); + yield(); + } + return 0; } @@ -627,11 +682,12 @@ static int check_classzone_need_balance(zone_t * classzone) { - zone_t * first_classzone; + zone_t * first_zone; + int class_idx = zone_idx(classzone); - first_classzone = classzone->zone_pgdat->node_zones; - while (classzone >= first_classzone) { - if (classzone->free_pages > classzone->pages_high) + first_zone = classzone->zone_pgdat->node_zones; + while (classzone >= first_zone) { + if (classzone->free_pages > classzone->watermarks[class_idx].high) return 0; classzone--; } @@ -647,12 +703,12 @@ zone = pgdat->node_zones + i; if (unlikely(current->need_resched)) schedule(); - if (!zone->need_balance) + if (!zone->need_balance || !zone->size) continue; if (!try_to_free_pages_zone(zone, GFP_KSWAPD)) { zone->need_balance = 0; __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); + schedule_timeout(HZ*5); continue; } if (check_classzone_need_balance(zone)) @@ -684,7 +740,7 @@ for (i = pgdat->nr_zones-1; i >= 0; i--) { zone = pgdat->node_zones + i; - if (!zone->need_balance) + if (!zone->need_balance || !zone->size) continue; return 0; } diff -urN linux-2.4.22-bk15/net/atm/common.c linux-2.4.22-bk16/net/atm/common.c --- linux-2.4.22-bk15/net/atm/common.c 2003-09-13 03:06:30.000000000 -0700 +++ linux-2.4.22-bk16/net/atm/common.c 2003-09-13 03:06:46.000000000 -0700 @@ -33,21 +33,61 @@ #include #include "lec.h" #include "lec_arpc.h" -struct atm_lane_ops atm_lane_ops; -#endif -#ifdef CONFIG_ATM_LANE_MODULE +struct atm_lane_ops *atm_lane_ops; +static DECLARE_MUTEX(atm_lane_ops_mutex); + +void atm_lane_ops_set(struct atm_lane_ops *hook) +{ + down(&atm_lane_ops_mutex); + atm_lane_ops = hook; + up(&atm_lane_ops_mutex); +} + +int try_atm_lane_ops(void) +{ + down(&atm_lane_ops_mutex); + if (atm_lane_ops && try_inc_mod_count(atm_lane_ops->owner)) { + up(&atm_lane_ops_mutex); + return 1; + } + up(&atm_lane_ops_mutex); + return 0; +} + +#if defined(CONFIG_ATM_LANE_MODULE) || defined(CONFIG_ATM_MPOA_MODULE) EXPORT_SYMBOL(atm_lane_ops); +EXPORT_SYMBOL(try_atm_lane_ops); +EXPORT_SYMBOL(atm_lane_ops_set); +#endif #endif #if defined(CONFIG_ATM_MPOA) || defined(CONFIG_ATM_MPOA_MODULE) #include #include "mpc.h" -struct atm_mpoa_ops atm_mpoa_ops; -#endif +struct atm_mpoa_ops *atm_mpoa_ops; +static DECLARE_MUTEX(atm_mpoa_ops_mutex); + +void atm_mpoa_ops_set(struct atm_mpoa_ops *hook) +{ + down(&atm_mpoa_ops_mutex); + atm_mpoa_ops = hook; + up(&atm_mpoa_ops_mutex); +} + +int try_atm_mpoa_ops(void) +{ + down(&atm_mpoa_ops_mutex); + if (atm_mpoa_ops && try_inc_mod_count(atm_mpoa_ops->owner)) { + up(&atm_mpoa_ops_mutex); + return 1; + } + up(&atm_mpoa_ops_mutex); + return 0; +} #ifdef CONFIG_ATM_MPOA_MODULE EXPORT_SYMBOL(atm_mpoa_ops); -#ifndef CONFIG_ATM_LANE_MODULE -EXPORT_SYMBOL(atm_lane_ops); +EXPORT_SYMBOL(try_atm_mpoa_ops); +EXPORT_SYMBOL(atm_mpoa_ops_set); #endif #endif @@ -739,27 +779,43 @@ ret_val = -EPERM; goto done; } - if (atm_lane_ops.lecd_attach == NULL) - atm_lane_init(); - if (atm_lane_ops.lecd_attach == NULL) { /* try again */ +#if defined(CONFIG_ATM_LANE_MODULE) + if (!atm_lane_ops) + request_module("lec"); +#endif + if (try_atm_lane_ops()) { + error = atm_lane_ops->lecd_attach(vcc, (int) arg); + if (atm_lane_ops->owner) + __MOD_DEC_USE_COUNT(atm_lane_ops->owner); + if (error >= 0) + sock->state = SS_CONNECTED; + ret_val = error; + } else ret_val = -ENOSYS; - goto done; - } - error = atm_lane_ops.lecd_attach(vcc, (int)arg); - if (error >= 0) sock->state = SS_CONNECTED; - ret_val = error; goto done; case ATMLEC_MCAST: - if (!capable(CAP_NET_ADMIN)) + if (!capable(CAP_NET_ADMIN)) { ret_val = -EPERM; - else - ret_val = atm_lane_ops.mcast_attach(vcc, (int)arg); + goto done; + } + if (try_atm_lane_ops()) { + ret_val = atm_lane_ops->mcast_attach(vcc, (int) arg); + if (atm_lane_ops->owner) + __MOD_DEC_USE_COUNT(atm_lane_ops->owner); + } else + ret_val = -ENOSYS; goto done; case ATMLEC_DATA: - if (!capable(CAP_NET_ADMIN)) + if (!capable(CAP_NET_ADMIN)) { ret_val = -EPERM; - else - ret_val = atm_lane_ops.vcc_attach(vcc, (void*)arg); + goto done; + } + if (try_atm_lane_ops()) { + ret_val = atm_lane_ops->vcc_attach(vcc, (void *) arg); + if (atm_lane_ops->owner) + __MOD_DEC_USE_COUNT(atm_lane_ops->owner); + } else + ret_val = -ENOSYS; goto done; #endif #if defined(CONFIG_ATM_MPOA) || defined(CONFIG_ATM_MPOA_MODULE) @@ -768,21 +824,31 @@ ret_val = -EPERM; goto done; } - if (atm_mpoa_ops.mpoad_attach == NULL) - atm_mpoa_init(); - if (atm_mpoa_ops.mpoad_attach == NULL) { /* try again */ +#if defined(CONFIG_ATM_MPOA_MODULE) + if (!atm_mpoa_ops) + request_module("mpoa"); +#endif + if (try_atm_mpoa_ops()) { + error = atm_mpoa_ops->mpoad_attach(vcc, (int) arg); + if (atm_mpoa_ops->owner) + __MOD_DEC_USE_COUNT(atm_mpoa_ops->owner); + if (error >= 0) + sock->state = SS_CONNECTED; + ret_val = error; + } else ret_val = -ENOSYS; - goto done; - } - error = atm_mpoa_ops.mpoad_attach(vcc, (int)arg); - if (error >= 0) sock->state = SS_CONNECTED; - ret_val = error; goto done; case ATMMPC_DATA: - if (!capable(CAP_NET_ADMIN)) + if (!capable(CAP_NET_ADMIN)) { ret_val = -EPERM; - else - ret_val = atm_mpoa_ops.vcc_attach(vcc, arg); + goto done; + } + if (try_atm_mpoa_ops()) { + ret_val = atm_mpoa_ops->vcc_attach(vcc, arg); + if (atm_mpoa_ops->owner) + __MOD_DEC_USE_COUNT(atm_mpoa_ops->owner); + } else + ret_val = -ENOSYS; goto done; #endif #if defined(CONFIG_ATM_TCP) || defined(CONFIG_ATM_TCP_MODULE) @@ -1167,40 +1233,6 @@ } -/* - * lane_mpoa_init.c: A couple of helper functions - * to make modular LANE and MPOA client easier to implement - */ - -/* - * This is how it goes: - * - * if xxxx is not compiled as module, call atm_xxxx_init_ops() - * from here - * else call atm_mpoa_init_ops() from init_module() within - * the kernel when xxxx module is loaded - * - * In either case function pointers in struct atm_xxxx_ops - * are initialized to their correct values. Either they - * point to functions in the module or in the kernel - */ - -extern struct atm_mpoa_ops atm_mpoa_ops; /* in common.c */ -extern struct atm_lane_ops atm_lane_ops; /* in common.c */ - -#if defined(CONFIG_ATM_MPOA) || defined(CONFIG_ATM_MPOA_MODULE) -void atm_mpoa_init(void) -{ -#ifndef CONFIG_ATM_MPOA_MODULE /* not module */ - atm_mpoa_init_ops(&atm_mpoa_ops); -#else - request_module("mpoa"); -#endif - - return; -} -#endif - #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, @@ -1211,18 +1243,8 @@ EXPORT_SYMBOL(br_fdb_put_hook); #endif /* defined(CONFIG_ATM_LANE_MODULE) || defined(CONFIG_BRIDGE_MODULE) */ #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ +#endif /* defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) */ -void atm_lane_init(void) -{ -#ifndef CONFIG_ATM_LANE_MODULE /* not module */ - atm_lane_init_ops(&atm_lane_ops); -#else - request_module("lec"); -#endif - - return; -} -#endif static int __init atm_init(void) { diff -urN linux-2.4.22-bk15/net/atm/lec.c linux-2.4.22-bk16/net/atm/lec.c --- linux-2.4.22-bk15/net/atm/lec.c 2003-08-25 04:44:44.000000000 -0700 +++ linux-2.4.22-bk16/net/atm/lec.c 2003-09-13 03:06:46.000000000 -0700 @@ -11,6 +11,7 @@ /* We are ethernet device */ #include #include +#include #include #include #include @@ -57,8 +58,6 @@ unsigned char *addr); extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); -static spinlock_t lec_arp_spinlock = SPIN_LOCK_UNLOCKED; - #define DUMP_PACKETS 0 /* 0 = None, * 1 = 30 first bytes * 2 = Whole packet @@ -72,9 +71,9 @@ static int lec_close(struct net_device *dev); static struct net_device_stats *lec_get_stats(struct net_device *dev); static void lec_init(struct net_device *dev); -static __inline__ struct lec_arp_table* lec_arp_find(struct lec_priv *priv, +static inline struct lec_arp_table* lec_arp_find(struct lec_priv *priv, unsigned char *mac_addr); -static __inline__ int lec_arp_remove(struct lec_arp_table **lec_arp_tables, +static inline int lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove); /* LANE2 functions */ static void lane2_associate_ind (struct net_device *dev, u8 *mac_address, @@ -96,8 +95,18 @@ static struct net_device *dev_lec[MAX_LEC_ITF]; /* This will be called from proc.c via function pointer */ -struct net_device **get_dev_lec (void) { - return &dev_lec[0]; +struct net_device *get_dev_lec(int itf) +{ + struct net_device *dev; + + if (itf >= MAX_LEC_ITF) + return NULL; + rtnl_lock(); + dev = dev_lec[itf]; + if (dev) + dev_hold(dev); + rtnl_unlock(); + return dev; } #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) @@ -433,7 +442,7 @@ break; case l_narp_req: /* LANE2: see 7.1.35 in the lane2 spec */ entry = lec_arp_find(priv, mesg->content.normal.mac_addr); - lec_arp_remove(priv->lec_arp_tables, entry); + lec_arp_remove(priv, entry); if (mesg->content.normal.no_source_le_narp) break; @@ -834,37 +843,28 @@ return i; } -void atm_lane_init_ops(struct atm_lane_ops *ops) +static struct atm_lane_ops __atm_lane_ops = { - ops->lecd_attach = lecd_attach; - ops->mcast_attach = lec_mcast_attach; - ops->vcc_attach = lec_vcc_attach; - ops->get_lecs = get_dev_lec; - - printk("lec.c: " __DATE__ " " __TIME__ " initialized\n"); - - return; -} + .lecd_attach = lecd_attach, + .mcast_attach = lec_mcast_attach, + .vcc_attach = lec_vcc_attach, + .get_lec = get_dev_lec, + .owner = THIS_MODULE +}; static int __init lane_module_init(void) { - extern struct atm_lane_ops atm_lane_ops; - - atm_lane_init_ops(&atm_lane_ops); - + atm_lane_ops_set(&__atm_lane_ops); + printk("lec.c: " __DATE__ " " __TIME__ " initialized\n"); return 0; } static void __exit lane_module_cleanup(void) { int i; - extern struct atm_lane_ops atm_lane_ops; struct lec_priv *priv; - atm_lane_ops.lecd_attach = NULL; - atm_lane_ops.mcast_attach = NULL; - atm_lane_ops.vcc_attach = NULL; - atm_lane_ops.get_lecs = NULL; + atm_lane_ops_set(NULL); for (i = 0; i < MAX_LEC_ITF; i++) { if (dev_lec[i] != NULL) { @@ -874,7 +874,7 @@ unregister_trdev(dev_lec[i]); else #endif - unregister_netdev(dev_lec[i]); + unregister_netdev(dev_lec[i]); kfree(dev_lec[i]); dev_lec[i] = NULL; } @@ -1074,6 +1074,7 @@ for (i=0;ilec_arp_tables[i] = NULL; } + spin_lock_init(&priv->lec_arp_lock); init_timer(&priv->lec_arp_timer); priv->lec_arp_timer.expires = jiffies+LEC_ARP_REFRESH_INTERVAL; priv->lec_arp_timer.data = (unsigned long)priv; @@ -1110,21 +1111,20 @@ * Insert entry to lec_arp_table * LANE2: Add to the end of the list to satisfy 8.1.13 */ -static __inline__ void -lec_arp_add(struct lec_arp_table **lec_arp_tables, - struct lec_arp_table *to_add) +static inline void +lec_arp_add(struct lec_priv *priv, struct lec_arp_table *to_add) { unsigned long flags; unsigned short place; struct lec_arp_table *tmp; - spin_lock_irqsave(&lec_arp_spinlock, flags); + spin_lock_irqsave(&priv->lec_arp_lock, flags); place = HASH(to_add->mac_addr[ETH_ALEN-1]); - tmp = lec_arp_tables[place]; + tmp = priv->lec_arp_tables[place]; to_add->next = NULL; if (tmp == NULL) - lec_arp_tables[place] = to_add; + priv->lec_arp_tables[place] = to_add; else { /* add to the end */ while (tmp->next) @@ -1132,7 +1132,7 @@ tmp->next = to_add; } - spin_unlock_irqrestore(&lec_arp_spinlock, flags); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", 0xff&to_add->mac_addr[0], 0xff&to_add->mac_addr[1], @@ -1143,8 +1143,8 @@ /* * Remove entry from lec_arp_table */ -static __inline__ int -lec_arp_remove(struct lec_arp_table **lec_arp_tables, +static inline int +lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) { unsigned long flags; @@ -1152,22 +1152,22 @@ struct lec_arp_table *tmp; int remove_vcc=1; - spin_lock_irqsave(&lec_arp_spinlock, flags); + spin_lock_irqsave(&priv->lec_arp_lock, flags); if (!to_remove) { - spin_unlock_irqrestore(&lec_arp_spinlock, flags); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); return -1; } place = HASH(to_remove->mac_addr[ETH_ALEN-1]); - tmp = lec_arp_tables[place]; + tmp = priv->lec_arp_tables[place]; if (tmp == to_remove) { - lec_arp_tables[place] = tmp->next; + priv->lec_arp_tables[place] = tmp->next; } else { while(tmp && tmp->next != to_remove) { tmp = tmp->next; } if (!tmp) {/* Entry was not found */ - spin_unlock_irqrestore(&lec_arp_spinlock, flags); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); return -1; } } @@ -1181,7 +1181,7 @@ * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT */ for(place=0;placenext){ + for(tmp = priv->lec_arp_tables[place]; tmp != NULL; tmp = tmp->next) { if (memcmp(tmp->atm_addr, to_remove->atm_addr, ATM_ESA_LEN)==0) { remove_vcc=0; @@ -1194,7 +1194,7 @@ } skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */ - spin_unlock_irqrestore(&lec_arp_spinlock, flags); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", 0xff&to_remove->mac_addr[0], 0xff&to_remove->mac_addr[1], @@ -1390,7 +1390,7 @@ for (i=0;ilec_arp_tables[i];entry != NULL; entry=next) { next = entry->next; - lec_arp_remove(priv->lec_arp_tables, entry); + lec_arp_remove(priv, entry); kfree(entry); } } @@ -1430,7 +1430,7 @@ /* * Find entry by mac_address */ -static __inline__ struct lec_arp_table* +static inline struct lec_arp_table* lec_arp_find(struct lec_priv *priv, unsigned char *mac_addr) { @@ -1568,8 +1568,6 @@ lec_arp_check_expire(unsigned long data) { struct lec_priv *priv = (struct lec_priv *)data; - struct lec_arp_table **lec_arp_tables = - (struct lec_arp_table **)priv->lec_arp_tables; struct lec_arp_table *entry, *next; unsigned long now; unsigned long time_to_check; @@ -1585,7 +1583,7 @@ lec_arp_get(priv); now = jiffies; for(i=0;ilec_arp_tables[i]; entry != NULL; ) { if ((entry->flags) & LEC_REMOTE_FLAG && priv->topology_change) time_to_check=priv->forward_delay_time; @@ -1601,7 +1599,7 @@ /* Remove entry */ DPRINTK("LEC:Entry timed out\n"); next = entry->next; - lec_arp_remove(lec_arp_tables, entry); + lec_arp_remove(priv, entry); kfree(entry); entry = next; } else { @@ -1690,7 +1688,7 @@ if (!entry) { return priv->mcast_vcc; } - lec_arp_add(priv->lec_arp_tables, entry); + lec_arp_add(priv, entry); /* We want arp-request(s) to be sent */ entry->packets_flooded =1; entry->status = ESI_ARP_PENDING; @@ -1723,7 +1721,7 @@ if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) && (permanent || !(entry->flags & LEC_PERMANENT_FLAG))) { - lec_arp_remove(priv->lec_arp_tables, entry); + lec_arp_remove(priv, entry); kfree(entry); } lec_arp_put(priv); @@ -1789,7 +1787,7 @@ entry->status = ESI_FORWARD_DIRECT; memcpy(entry->mac_addr, mac_addr, ETH_ALEN); entry->last_used = jiffies; - lec_arp_add(priv->lec_arp_tables, entry); + lec_arp_add(priv, entry); } if (remoteflag) entry->flags|=LEC_REMOTE_FLAG; @@ -1809,7 +1807,7 @@ return; } entry->status = ESI_UNKNOWN; - lec_arp_add(priv->lec_arp_tables, entry); + lec_arp_add(priv, entry); /* Temporary, changes before end of function */ } memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN); @@ -2056,7 +2054,7 @@ to_add->old_push = vcc->push; vcc->push = lec_push; priv->mcast_vcc = vcc; - lec_arp_add(priv->lec_arp_tables, to_add); + lec_arp_add(priv, to_add); lec_arp_put(priv); return 0; } @@ -2074,7 +2072,7 @@ for(entry = priv->lec_arp_tables[i];entry; entry=next) { next = entry->next; if (vcc == entry->vcc) { - lec_arp_remove(priv->lec_arp_tables,entry); + lec_arp_remove(priv, entry); kfree(entry); if (priv->mcast_vcc == vcc) { priv->mcast_vcc = NULL; @@ -2154,23 +2152,23 @@ lec_arp_get(priv); entry = priv->lec_arp_empty_ones; if (vcc == entry->vcc) { - spin_lock_irqsave(&lec_arp_spinlock, flags); + spin_lock_irqsave(&priv->lec_arp_lock, flags); del_timer(&entry->timer); memcpy(entry->mac_addr, src, ETH_ALEN); entry->status = ESI_FORWARD_DIRECT; entry->last_used = jiffies; priv->lec_arp_empty_ones = entry->next; - spin_unlock_irqrestore(&lec_arp_spinlock, flags); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); /* We might have got an entry */ if ((prev=lec_arp_find(priv,src))) { - lec_arp_remove(priv->lec_arp_tables, prev); + lec_arp_remove(priv, prev); kfree(prev); } - lec_arp_add(priv->lec_arp_tables, entry); + lec_arp_add(priv, entry); lec_arp_put(priv); return; } - spin_lock_irqsave(&lec_arp_spinlock, flags); + spin_lock_irqsave(&priv->lec_arp_lock, flags); prev = entry; entry = entry->next; while (entry && entry->vcc != vcc) { @@ -2180,7 +2178,7 @@ if (!entry) { DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n"); lec_arp_put(priv); - spin_unlock_irqrestore(&lec_arp_spinlock, flags); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); return; } del_timer(&entry->timer); @@ -2188,12 +2186,12 @@ entry->status = ESI_FORWARD_DIRECT; entry->last_used = jiffies; prev->next = entry->next; - spin_unlock_irqrestore(&lec_arp_spinlock, flags); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); if ((prev = lec_arp_find(priv, src))) { - lec_arp_remove(priv->lec_arp_tables,prev); + lec_arp_remove(priv, prev); kfree(prev); } - lec_arp_add(priv->lec_arp_tables,entry); + lec_arp_add(priv, entry); lec_arp_put(priv); } MODULE_LICENSE("GPL"); diff -urN linux-2.4.22-bk15/net/atm/lec.h linux-2.4.22-bk16/net/atm/lec.h --- linux-2.4.22-bk15/net/atm/lec.h 2003-08-25 04:44:44.000000000 -0700 +++ linux-2.4.22-bk16/net/atm/lec.h 2003-09-13 03:06:46.000000000 -0700 @@ -64,7 +64,8 @@ int (*lecd_attach)(struct atm_vcc *vcc, int arg); int (*mcast_attach)(struct atm_vcc *vcc, int arg); int (*vcc_attach)(struct atm_vcc *vcc, void *arg); - struct net_device **(*get_lecs)(void); + struct net_device * (*get_lec)(int itf); + struct module *owner; }; /* @@ -102,6 +103,7 @@ collects all those VCCs. LANEv1 client has only one item in this list. These entries are not aged out. */ atomic_t lec_arp_users; + spinlock_t lec_arp_lock; struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */ struct atm_vcc *lecd; struct timer_list lec_arp_timer; @@ -148,14 +150,16 @@ int lecd_attach(struct atm_vcc *vcc, int arg); int lec_vcc_attach(struct atm_vcc *vcc, void *arg); int lec_mcast_attach(struct atm_vcc *vcc, int arg); -struct net_device **get_dev_lec(void); +struct net_device *get_dev_lec(int itf); int make_lec(struct atm_vcc *vcc); int send_to_lecd(struct lec_priv *priv, atmlec_msg_type type, unsigned char *mac_addr, unsigned char *atm_addr, struct sk_buff *data); void lec_push(struct atm_vcc *vcc, struct sk_buff *skb); -void atm_lane_init(void); -void atm_lane_init_ops(struct atm_lane_ops *ops); +extern struct atm_lane_ops *atm_lane_ops; +void atm_lane_ops_set(struct atm_lane_ops *hook); +int try_atm_lane_ops(void); + #endif /* _LEC_H_ */ diff -urN linux-2.4.22-bk15/net/atm/mpc.c linux-2.4.22-bk16/net/atm/mpc.c --- linux-2.4.22-bk15/net/atm/mpc.c 2003-08-25 04:44:44.000000000 -0700 +++ linux-2.4.22-bk16/net/atm/mpc.c 2003-09-13 03:06:46.000000000 -0700 @@ -251,12 +251,14 @@ static struct net_device *find_lec_by_itfnum(int itf) { - extern struct atm_lane_ops atm_lane_ops; /* in common.c */ - - if (atm_lane_ops.get_lecs == NULL) + struct net_device *dev; + if (!try_atm_lane_ops()) return NULL; - return atm_lane_ops.get_lecs()[itf]; /* FIXME: something better */ + dev = atm_lane_ops->get_lec(itf); + if (atm_lane_ops->owner) + __MOD_DEC_USE_COUNT(atm_lane_ops->owner); + return dev; } static struct mpoa_client *alloc_mpc(void) @@ -777,9 +779,10 @@ if (mpc->dev) { /* check if the lec is LANE2 capable */ priv = (struct lec_priv *)mpc->dev->priv; - if (priv->lane_version < 2) + if (priv->lane_version < 2) { + dev_put(mpc->dev); mpc->dev = NULL; - else + } else priv->lane2_ops->associate_indicator = lane2_assoc_ind; } @@ -837,6 +840,7 @@ struct lec_priv *priv = (struct lec_priv *)mpc->dev->priv; priv->lane2_ops->associate_indicator = NULL; stop_mpc(mpc); + dev_put(mpc->dev); } mpc->in_ops->destroy_cache(mpc); @@ -973,6 +977,7 @@ } mpc->dev_num = priv->itfnum; mpc->dev = dev; + dev_hold(dev); dprintk("mpoa: (%s) was initialized\n", dev->name); break; case NETDEV_UNREGISTER: @@ -982,6 +987,7 @@ break; dprintk("mpoa: device (%s) was deallocated\n", dev->name); stop_mpc(mpc); + dev_put(mpc->dev); mpc->dev = NULL; break; case NETDEV_UP: @@ -1391,13 +1397,18 @@ return; } -void atm_mpoa_init_ops(struct atm_mpoa_ops *ops) +static struct atm_mpoa_ops __atm_mpoa_ops = { + .mpoad_attach = atm_mpoa_mpoad_attach, + .vcc_attach = atm_mpoa_vcc_attach, + .owner = THIS_MODULE +}; + +static __init int atm_mpoa_init(void) { - ops->mpoad_attach = atm_mpoa_mpoad_attach; - ops->vcc_attach = atm_mpoa_vcc_attach; + atm_mpoa_ops_set(&__atm_mpoa_ops); #ifdef CONFIG_PROC_FS - if(mpc_proc_init() != 0) + if (mpc_proc_init() != 0) printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n"); else printk(KERN_INFO "mpoa: /proc/mpoa initialized\n"); @@ -1405,22 +1416,11 @@ printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n"); - return; -} - -#ifdef MODULE -int init_module(void) -{ - extern struct atm_mpoa_ops atm_mpoa_ops; - - atm_mpoa_init_ops(&atm_mpoa_ops); - return 0; } -void cleanup_module(void) +void __exit atm_mpoa_cleanup(void) { - extern struct atm_mpoa_ops atm_mpoa_ops; struct mpoa_client *mpc, *tmp; struct atm_mpoa_qos *qos, *nextqos; struct lec_priv *priv; @@ -1435,8 +1435,7 @@ del_timer(&mpc_timer); unregister_netdevice_notifier(&mpoa_notifier); - atm_mpoa_ops.mpoad_attach = NULL; - atm_mpoa_ops.vcc_attach = NULL; + atm_mpoa_ops_set(NULL); mpc = mpcs; mpcs = NULL; @@ -1471,5 +1470,8 @@ return; } -#endif /* MODULE */ + +module_init(atm_mpoa_init); +module_exit(atm_mpoa_cleanup); + MODULE_LICENSE("GPL"); diff -urN linux-2.4.22-bk15/net/atm/mpc.h linux-2.4.22-bk16/net/atm/mpc.h --- linux-2.4.22-bk15/net/atm/mpc.h 2000-12-11 13:33:43.000000000 -0800 +++ linux-2.4.22-bk16/net/atm/mpc.h 2003-09-13 03:06:46.000000000 -0700 @@ -48,11 +48,13 @@ struct atm_mpoa_ops { int (*mpoad_attach)(struct atm_vcc *vcc, int arg); /* attach mpoa daemon */ int (*vcc_attach)(struct atm_vcc *vcc, long arg); /* attach shortcut vcc */ + struct module *owner; }; /* Boot/module initialization function */ -void atm_mpoa_init(void); -void atm_mpoa_init_ops(struct atm_mpoa_ops *ops); +extern struct atm_mpoa_ops *atm_mpoa_ops; +int try_atm_mpoa_ops(void); +void atm_mpoa_ops_set(struct atm_mpoa_ops *hook); /* MPOA QoS operations */ struct atm_mpoa_qos *atm_mpoa_add_qos(uint32_t dst_ip, struct atm_qos *qos); diff -urN linux-2.4.22-bk15/net/atm/proc.c linux-2.4.22-bk16/net/atm/proc.c --- linux-2.4.22-bk15/net/atm/proc.c 2003-09-13 03:06:30.000000000 -0700 +++ linux-2.4.22-bk16/net/atm/proc.c 2003-09-13 03:06:46.000000000 -0700 @@ -47,7 +47,6 @@ #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) #include "lec.h" #include "lec_arpc.h" -extern struct atm_lane_ops atm_lane_ops; /* in common.c */ #endif static ssize_t proc_dev_atm_read(struct file *file,char *buf,size_t count, @@ -482,57 +481,77 @@ #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) static int atm_lec_info(loff_t pos,char *buf) { + unsigned long flags; struct lec_priv *priv; struct lec_arp_table *entry; int i, count, d, e; - struct net_device **dev_lec; + struct net_device *dev; if (!pos) { return sprintf(buf,"Itf MAC ATM destination" " Status Flags " "VPI/VCI Recv VPI/VCI\n"); } - if (atm_lane_ops.get_lecs == NULL) + if (!try_atm_lane_ops()) return 0; /* the lane module is not there yet */ - else - dev_lec = atm_lane_ops.get_lecs(); count = pos; - for(d=0;dpriv)) continue; - for(i=0;ilec_arp_tables[i]; - for(;entry;entry=entry->next) { - if (--count) continue; - e=sprintf(buf,"%s ", - dev_lec[d]->name); - lec_info(entry,buf+e); + for(d = 0; d < MAX_LEC_ITF; d++) { + dev = atm_lane_ops->get_lec(d); + if (!dev || !(priv = (struct lec_priv *) dev->priv)) + continue; + spin_lock_irqsave(&priv->lec_arp_lock, flags); + for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + for(entry = priv->lec_arp_tables[i]; entry; entry = entry->next) { + if (--count) + continue; + e = sprintf(buf,"%s ", dev->name); + lec_info(entry, buf+e); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); + dev_put(dev); + if (atm_lane_ops->owner) + __MOD_DEC_USE_COUNT(atm_lane_ops->owner); return strlen(buf); } } - for(entry=priv->lec_arp_empty_ones; entry; - entry=entry->next) { - if (--count) continue; - e=sprintf(buf,"%s ",dev_lec[d]->name); + for(entry = priv->lec_arp_empty_ones; entry; entry = entry->next) { + if (--count) + continue; + e = sprintf(buf,"%s ", dev->name); lec_info(entry, buf+e); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); + dev_put(dev); + if (atm_lane_ops->owner) + __MOD_DEC_USE_COUNT(atm_lane_ops->owner); return strlen(buf); } - for(entry=priv->lec_no_forward; entry; - entry=entry->next) { - if (--count) continue; - e=sprintf(buf,"%s ",dev_lec[d]->name); + for(entry = priv->lec_no_forward; entry; entry=entry->next) { + if (--count) + continue; + e = sprintf(buf,"%s ", dev->name); lec_info(entry, buf+e); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); + dev_put(dev); + if (atm_lane_ops->owner) + __MOD_DEC_USE_COUNT(atm_lane_ops->owner); return strlen(buf); } - for(entry=priv->mcast_fwds; entry; - entry=entry->next) { - if (--count) continue; - e=sprintf(buf,"%s ",dev_lec[d]->name); + for(entry = priv->mcast_fwds; entry; entry = entry->next) { + if (--count) + continue; + e = sprintf(buf,"%s ", dev->name); lec_info(entry, buf+e); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); + dev_put(dev); + if (atm_lane_ops->owner) + __MOD_DEC_USE_COUNT(atm_lane_ops->owner); return strlen(buf); } + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); + dev_put(dev); } + if (atm_lane_ops->owner) + __MOD_DEC_USE_COUNT(atm_lane_ops->owner); return 0; } #endif diff -urN linux-2.4.22-bk15/net/bridge/br_forward.c linux-2.4.22-bk16/net/bridge/br_forward.c --- linux-2.4.22-bk15/net/bridge/br_forward.c 2002-08-02 17:39:46.000000000 -0700 +++ linux-2.4.22-bk16/net/bridge/br_forward.c 2003-09-13 03:06:46.000000000 -0700 @@ -59,6 +59,7 @@ indev = skb->dev; skb->dev = to->dev; + skb->ip_summed = CHECKSUM_NONE; NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, __br_forward_finish);