diff -Nru a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking --- a/Documentation/filesystems/Locking Sun May 25 20:03:27 2003 +++ b/Documentation/filesystems/Locking Wed Jul 2 21:46:34 2003 @@ -318,7 +318,7 @@ ioctl: yes (see below) mmap: no open: maybe (see below) -flush: yes +flush: no release: no fsync: yes (see below) fasync: yes (see below) diff -Nru a/Documentation/pci.txt b/Documentation/pci.txt --- a/Documentation/pci.txt Tue Jun 24 15:30:22 2003 +++ b/Documentation/pci.txt Wed Jul 2 18:11:39 2003 @@ -7,14 +7,14 @@ Different PCI devices have different requirements and different bugs -- because of this, the PCI support layer in Linux kernel is not as trivial as one would wish. This short pamphlet tries to help all potential driver -authors to find their way through the deep forests of PCI handling. +authors find their way through the deep forests of PCI handling. 0. Structure of PCI drivers ~~~~~~~~~~~~~~~~~~~~~~~~~~~ There exist two kinds of PCI drivers: new-style ones (which leave most of probing for devices to the PCI layer and support online insertion and removal -of devices [thus supporting PCI, hot-pluggable PCI and CardBus in single +of devices [thus supporting PCI, hot-pluggable PCI and CardBus in a single driver]) and old-style ones which just do all the probing themselves. Unless you have a very good reason to do so, please don't use the old way of probing in any new code. After the driver finds the devices it wishes to operate @@ -174,7 +174,7 @@ the latency timer value if it's set to something bogus by the BIOS. If you want to use the PCI Memory-Write-Invalidate transaction, -call pci_set_mwi(). This enables bit PCI_COMMAND bit for Mem-Wr-Inval +call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval and also ensures that the cache line size register is set correctly. Make sure to check the return value of pci_set_mwi(), not all architectures may support Memory-Write-Invalidate. @@ -236,7 +236,7 @@ 7. Miscellaneous hints ~~~~~~~~~~~~~~~~~~~~~~ When displaying PCI slot names to the user (for example when a driver wants -to tell the user what card has it found), please use pci_dev->slot_name +to tell the user what card has it found), please use pci_name(pci_dev) for this purpose. Always refer to the PCI devices by a pointer to the pci_dev structure. @@ -247,6 +247,10 @@ If you're going to use PCI bus mastering DMA, take a look at Documentation/DMA-mapping.txt. + +Don't try to turn on Fast Back to Back writes in your driver. All devices +on the bus need to be capable of doing it, so this is something which needs +to be handled by platform and generic code, not individual drivers. 8. Obsolete functions diff -Nru a/Makefile b/Makefile --- a/Makefile Wed Jul 2 11:16:44 2003 +++ b/Makefile Fri Jul 4 04:42:14 2003 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 5 SUBLEVEL = 74 -EXTRAVERSION = +EXTRAVERSION = -bk2 # *DOCUMENTATION* # To see a list of typical targets execute "make help" diff -Nru a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c --- a/arch/alpha/mm/numa.c Sun Apr 20 09:14:40 2003 +++ b/arch/alpha/mm/numa.c Wed Jul 2 21:20:53 2003 @@ -338,7 +338,7 @@ lmem_map = node_mem_map(nid); pfn = NODE_DATA(nid)->node_start_pfn; - for (i = 0; i < node_size(nid); i++, pfn++) + for (i = 0; i < node_spanned_pages(nid); i++, pfn++) if (page_is_ram(pfn) && PageReserved(lmem_map+i)) reservedpages++; } @@ -372,7 +372,7 @@ printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for (nid = 0; nid < numnodes; nid++) { struct page * lmem_map = node_mem_map(nid); - i = node_size(nid); + i = node_spanned_pages(nid); while (i-- > 0) { total++; if (PageReserved(lmem_map+i)) diff -Nru a/arch/arm/mm/init.c b/arch/arm/mm/init.c --- a/arch/arm/mm/init.c Wed May 28 06:36:31 2003 +++ b/arch/arm/mm/init.c Wed Jul 2 21:20:53 2003 @@ -79,7 +79,7 @@ struct page *page, *end; page = NODE_MEM_MAP(node); - end = page + NODE_DATA(node)->node_size; + end = page + NODE_DATA(node)->node_spanned_pages; do { total++; @@ -576,7 +576,7 @@ for (node = 0; node < numnodes; node++) { pg_data_t *pgdat = NODE_DATA(node); - if (pgdat->node_size != 0) + if (pgdat->node_spanned_pages != 0) totalram_pages += free_all_bootmem_node(pgdat); } diff -Nru a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c --- a/arch/arm26/mm/init.c Wed Jun 4 04:15:46 2003 +++ b/arch/arm26/mm/init.c Wed Jul 2 21:20:53 2003 @@ -68,7 +68,7 @@ page = NODE_MEM_MAP(0); - end = page + NODE_DATA(0)->node_size; + end = page + NODE_DATA(0)->node_spanned_pages; do { total++; @@ -353,7 +353,7 @@ max_mapnr = virt_to_page(high_memory) - mem_map; /* this will put all unused low memory onto the freelists */ - if (pgdat->node_size != 0) + if (pgdat->node_spanned_pages != 0) totalram_pages += free_all_bootmem_node(pgdat); printk(KERN_INFO "Memory:"); diff -Nru a/arch/i386/Kconfig b/arch/i386/Kconfig --- a/arch/i386/Kconfig Thu Jun 26 23:05:44 2003 +++ b/arch/i386/Kconfig Wed Jul 2 21:19:42 2003 @@ -1339,6 +1339,14 @@ best used in conjunction with the NMI watchdog so that spinlock deadlocks are also debuggable. +config DEBUG_PAGEALLOC + bool "Page alloc debugging" + depends on DEBUG_KERNEL + help + Unmap pages from the kernel linear mapping after free_pages(). + This results in a large slowdown, but helps to find certain types + of memory corruptions. + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM diff -Nru a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c --- a/arch/i386/kernel/cpu/common.c Sun Mar 23 07:55:48 2003 +++ b/arch/i386/kernel/cpu/common.c Wed Jul 2 21:19:42 2003 @@ -430,6 +430,14 @@ rise_init_cpu(); nexgen_init_cpu(); umc_init_cpu(); + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* pse is not compatible with on-the-fly unmapping, + * disable it even if the cpus claim to support it. + */ + clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); + disable_pse = 1; +#endif } /* * cpu_init() initializes state that is per-CPU. Some data is already diff -Nru a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c --- a/arch/i386/kernel/io_apic.c Thu Jun 26 21:46:00 2003 +++ b/arch/i386/kernel/io_apic.c Wed Jul 2 21:21:32 2003 @@ -35,6 +35,7 @@ #include #include #include +#include #include @@ -2052,7 +2053,6 @@ */ static inline void check_timer(void) { - extern int timer_ack; int pin1, pin2; int vector; diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c --- a/arch/i386/kernel/time.c Mon Jun 16 12:48:20 2003 +++ b/arch/i386/kernel/time.c Wed Jul 2 21:21:32 2003 @@ -80,8 +80,7 @@ spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED; EXPORT_SYMBOL(i8253_lock); -extern struct timer_opts timer_none; -struct timer_opts* timer = &timer_none; +struct timer_opts *cur_timer = &timer_none; /* * This version of gettimeofday has microsecond resolution @@ -93,14 +92,14 @@ unsigned long usec, sec; do { + unsigned long lost; + seq = read_seqbegin(&xtime_lock); - usec = timer->get_offset(); - { - unsigned long lost = jiffies - wall_jiffies; - if (lost) - usec += lost * (1000000 / HZ); - } + usec = cur_timer->get_offset(); + lost = jiffies - wall_jiffies; + if (lost) + usec += lost * (1000000 / HZ); sec = xtime.tv_sec; usec += (xtime.tv_nsec / 1000); } while (read_seqretry(&xtime_lock, seq)); @@ -126,7 +125,7 @@ * wall time. Discover what correction gettimeofday() would have * made, and then undo it! */ - tv->tv_nsec -= timer->get_offset() * NSEC_PER_USEC; + tv->tv_nsec -= cur_timer->get_offset() * NSEC_PER_USEC; tv->tv_nsec -= (jiffies - wall_jiffies) * TICK_NSEC; while (tv->tv_nsec < 0) { @@ -180,7 +179,7 @@ */ unsigned long long monotonic_clock(void) { - return timer->monotonic_clock(); + return cur_timer->monotonic_clock(); } EXPORT_SYMBOL(monotonic_clock); @@ -189,7 +188,8 @@ * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) +static inline void do_timer_interrupt(int irq, void *dev_id, + struct pt_regs *regs) { #ifdef CONFIG_X86_IO_APIC if (timer_ack) { @@ -259,7 +259,7 @@ */ write_seqlock(&xtime_lock); - timer->mark_offset(); + cur_timer->mark_offset(); do_timer_interrupt(irq, NULL, regs); @@ -301,16 +301,13 @@ device_initcall(time_init_device); - void __init time_init(void) { - xtime.tv_sec = get_cmos_time(); wall_to_monotonic.tv_sec = -xtime.tv_sec; xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); wall_to_monotonic.tv_nsec = -xtime.tv_nsec; - - timer = select_timer(); + cur_timer = select_timer(); time_init_hook(); } diff -Nru a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c --- a/arch/i386/kernel/timers/timer.c Sun May 25 17:00:00 2003 +++ b/arch/i386/kernel/timers/timer.c Wed Jul 2 21:21:34 2003 @@ -3,12 +3,6 @@ #include #include -/* list of externed timers */ -extern struct timer_opts timer_pit; -extern struct timer_opts timer_tsc; -#ifdef CONFIG_X86_CYCLONE_TIMER -extern struct timer_opts timer_cyclone; -#endif /* list of timers, ordered by preference, NULL terminated */ static struct timer_opts* timers[] = { #ifdef CONFIG_X86_CYCLONE_TIMER @@ -28,6 +22,15 @@ return 1; } __setup("clock=", clock_setup); + + +/* The chosen timesource has been found to be bad. + * Fall back to a known good timesource (the PIT) + */ +void clock_fallback(void) +{ + cur_timer = &timer_pit; +} /* iterates through the list of timers, returning the first * one that initializes successfully. diff -Nru a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c --- a/arch/i386/kernel/timers/timer_cyclone.c Mon Apr 21 08:11:07 2003 +++ b/arch/i386/kernel/timers/timer_cyclone.c Thu Jul 3 17:52:12 2003 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -18,7 +19,6 @@ #include extern spinlock_t i8253_lock; -extern unsigned long jiffies; extern unsigned long calibrate_tsc(void); /* Number of usecs that the last interrupt was delayed */ @@ -88,7 +88,7 @@ * between cyclone and pit reads (as noted when * usec delta is > 90% # of usecs/tick) */ - if (abs(delay - delay_at_last_interrupt) > (900000/HZ)) + if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) jiffies++; } diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c --- a/arch/i386/kernel/timers/timer_tsc.c Mon Apr 21 08:11:07 2003 +++ b/arch/i386/kernel/timers/timer_tsc.c Thu Jul 3 17:52:12 2003 @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -21,7 +22,6 @@ int tsc_disable __initdata = 0; extern spinlock_t i8253_lock; -extern unsigned long jiffies; static int use_tsc; /* Number of usecs that the last interrupt was delayed */ @@ -124,6 +124,7 @@ int countmp; static int count1 = 0; unsigned long long this_offset, last_offset; + static int lost_count = 0; write_lock(&monotonic_lock); last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; @@ -178,9 +179,19 @@ delta += delay_at_last_interrupt; lost = delta/(1000000/HZ); delay = delta%(1000000/HZ); - if (lost >= 2) + if (lost >= 2) { jiffies += lost-1; + /* sanity check to ensure we're not always loosing ticks */ + if (lost_count++ > 100) { + printk(KERN_WARNING "Loosing too many ticks!\n"); + printk(KERN_WARNING "TSC cannot be used as a timesource." + " (Are you running with SpeedStep?)\n"); + printk(KERN_WARNING "Falling back to a sane timesource.\n"); + clock_fallback(); + } + } else + lost_count = 0; /* update the monotonic base value */ this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; monotonic_base += cycles_2_ns(this_offset - last_offset); @@ -194,7 +205,7 @@ * between tsc and pit reads (as noted when * usec delta is > 90% # of usecs/tick) */ - if (abs(delay - delay_at_last_interrupt) > (900000/HZ)) + if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ)) jiffies++; } diff -Nru a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c --- a/arch/i386/lib/delay.c Tue Feb 18 06:41:35 2003 +++ b/arch/i386/lib/delay.c Wed Jul 2 21:21:32 2003 @@ -25,7 +25,7 @@ void __delay(unsigned long loops) { - timer->delay(loops); + cur_timer->delay(loops); } inline void __const_udelay(unsigned long xloops) diff -Nru a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c --- a/arch/i386/mm/pageattr.c Sun Mar 2 18:13:32 2003 +++ b/arch/i386/mm/pageattr.c Wed Jul 2 21:19:42 2003 @@ -13,6 +13,10 @@ #include #include +static spinlock_t cpa_lock = SPIN_LOCK_UNLOCKED; +static struct list_head df_list = LIST_HEAD_INIT(df_list); + + static inline pte_t *lookup_address(unsigned long address) { pgd_t *pgd = pgd_offset_k(address); @@ -31,10 +35,15 @@ { int i; unsigned long addr; - struct page *base = alloc_pages(GFP_KERNEL, 0); + struct page *base; pte_t *pbase; + + spin_unlock_irq(&cpa_lock); + base = alloc_pages(GFP_KERNEL, 0); + spin_lock_irq(&cpa_lock); if (!base) return NULL; + address = __pa(address); addr = address & LARGE_PAGE_MASK; pbase = (pte_t *)page_address(base); @@ -87,7 +96,7 @@ } static int -__change_page_attr(struct page *page, pgprot_t prot, struct page **oldpage) +__change_page_attr(struct page *page, pgprot_t prot) { pte_t *kpte; unsigned long address; @@ -123,7 +132,7 @@ } if (cpu_has_pse && (atomic_read(&kpte_page->count) == 1)) { - *oldpage = kpte_page; + list_add(&kpte_page->list, &df_list); revert_page(kpte_page, address); } return 0; @@ -134,12 +143,6 @@ on_each_cpu(flush_kernel_map, NULL, 1, 1); } -struct deferred_page { - struct deferred_page *next; - struct page *fpage; -}; -static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ - /* * Change the page attributes of an page in the linear mapping. * @@ -156,47 +159,54 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot) { int err = 0; - struct page *fpage; int i; + unsigned long flags; - down_write(&init_mm.mmap_sem); + spin_lock_irqsave(&cpa_lock, flags); for (i = 0; i < numpages; i++, page++) { - fpage = NULL; - err = __change_page_attr(page, prot, &fpage); + err = __change_page_attr(page, prot); if (err) break; - if (fpage) { - struct deferred_page *df; - df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); - if (!df) { - flush_map(); - __free_page(fpage); - } else { - df->next = df_list; - df->fpage = fpage; - df_list = df; - } - } } - up_write(&init_mm.mmap_sem); + spin_unlock_irqrestore(&cpa_lock, flags); return err; } void global_flush_tlb(void) { - struct deferred_page *df, *next_df; + LIST_HEAD(l); + struct list_head* n; - down_read(&init_mm.mmap_sem); - df = xchg(&df_list, NULL); - up_read(&init_mm.mmap_sem); + BUG_ON(irqs_disabled()); + + spin_lock_irq(&cpa_lock); + list_splice_init(&df_list, &l); + spin_unlock_irq(&cpa_lock); flush_map(); - for (; df; df = next_df) { - next_df = df->next; - if (df->fpage) - __free_page(df->fpage); - kfree(df); - } + n = l.next; + while (n != &l) { + struct page *pg = list_entry(n, struct page, list); + n = n->next; + __free_page(pg); + } } + +#ifdef CONFIG_DEBUG_PAGEALLOC +void kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (PageHighMem(page)) + return; + /* the return value is ignored - the calls cannot fail, + * large pages are disabled at boot time. + */ + change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); + /* we should perform an IPI and flush all tlbs, + * but that can deadlock->flush only current cpu. + */ + __flush_tlb_all(); +} +EXPORT_SYMBOL(kernel_map_pages); +#endif EXPORT_SYMBOL(change_page_attr); EXPORT_SYMBOL(global_flush_tlb); diff -Nru a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c --- a/arch/i386/mm/pgtable.c Mon May 12 11:11:38 2003 +++ b/arch/i386/mm/pgtable.c Wed Jul 2 21:20:53 2003 @@ -34,7 +34,7 @@ show_free_areas(); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_size; ++i) { + for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat->node_mem_map + i; total++; if (PageHighMem(page)) diff -Nru a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c --- a/arch/i386/pci/direct.c Mon Jun 23 17:12:27 2003 +++ b/arch/i386/pci/direct.c Wed Jul 2 18:11:41 2003 @@ -177,7 +177,7 @@ * This should be close to trivial, but it isn't, because there are buggy * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID. */ -static int __devinit pci_sanity_check(struct pci_raw_ops *o) +static int __init pci_sanity_check(struct pci_raw_ops *o) { u32 x = 0; int devfn; diff -Nru a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c --- a/arch/i386/pci/irq.c Thu Jun 19 14:58:11 2003 +++ b/arch/i386/pci/irq.c Thu Jul 3 12:49:14 2003 @@ -102,13 +102,12 @@ #endif busmap[e->bus] = 1; } - for(i=1; i<256; i++) - /* - * It might be a secondary bus, but in this case its parent is already - * known (ascending bus order) and therefore pci_scan_bus returns immediately. - */ - if (busmap[i] && pci_scan_bus(i, &pci_root_ops, NULL)) + for(i = 1; i < 256; i++) { + if (!busmap[i] || pci_find_bus(0, i)) + continue; + if (pci_scan_bus(i, &pci_root_ops, NULL)) printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i); + } pcibios_last_bus = -1; } @@ -196,15 +195,16 @@ /* * The VIA pirq rules are nibble-based, like ALI, * but without the ugly irq number munging. + * However, PIRQD is in the upper instead of lower 4 bits. */ static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { - return read_config_nybble(router, 0x55, pirq); + return read_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq); } static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - write_config_nybble(router, 0x55, pirq, irq); + write_config_nybble(router, 0x55, pirq == 4 ? 5 : pirq, irq); return 1; } diff -Nru a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c --- a/arch/i386/pci/legacy.c Thu Jun 19 14:58:11 2003 +++ b/arch/i386/pci/legacy.c Wed Jul 2 19:45:28 2003 @@ -11,40 +11,26 @@ */ static void __devinit pcibios_fixup_peer_bridges(void) { - int n; - struct pci_bus *bus; - struct pci_dev *dev; - u16 l; + int n, devfn; if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) return; DBG("PCI: Peer bridge fixup\n"); - bus = kmalloc(sizeof(*bus), GFP_ATOMIC); - dev = kmalloc(sizeof(*dev), GFP_ATOMIC); - if (!bus || !dev) { - printk(KERN_ERR "Out of memory in %s\n", __FUNCTION__); - goto exit; - } - for (n=0; n <= pcibios_last_bus; n++) { - if (pci_bus_exists(&pci_root_buses, n)) + u32 l; + if (pci_find_bus(0, n)) continue; - bus->number = n; - bus->ops = &pci_root_ops; - dev->bus = bus; - for (dev->devfn=0; dev->devfn<256; dev->devfn += 8) - if (!pci_read_config_word(dev, PCI_VENDOR_ID, &l) && + for (devfn = 0; devfn < 256; devfn += 8) { + if (!raw_pci_ops->read(0, n, devfn, PCI_VENDOR_ID, 2, &l) && l != 0x0000 && l != 0xffff) { DBG("Found device at %02x:%02x [%04x]\n", n, dev->devfn, l); printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); pci_scan_bus(n, &pci_root_ops, NULL); break; } + } } -exit: - kfree(dev); - kfree(bus); } static int __init pci_legacy_init(void) diff -Nru a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c --- a/arch/ia64/ia32/binfmt_elf32.c Mon Jun 16 17:03:24 2003 +++ b/arch/ia64/ia32/binfmt_elf32.c Wed Jul 2 21:22:38 2003 @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -177,7 +178,7 @@ if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff -Nru a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c --- a/arch/ia64/kernel/sys_ia64.c Tue Jun 10 14:00:47 2003 +++ b/arch/ia64/kernel/sys_ia64.c Wed Jul 2 21:22:38 2003 @@ -100,7 +100,6 @@ asmlinkage unsigned long ia64_brk (unsigned long brk) { - extern int vm_enough_memory (long pages); unsigned long rlim, retval, newbrk, oldbrk; struct mm_struct *mm = current->mm; diff -Nru a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c --- a/arch/ia64/mm/init.c Tue Jun 17 23:50:17 2003 +++ b/arch/ia64/mm/init.c Wed Jul 2 21:20:53 2003 @@ -232,7 +232,7 @@ printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { printk("Node ID: %d\n", pgdat->node_id); - for(i = 0; i < pgdat->node_size; i++) { + for(i = 0; i < pgdat->node_spanned_pages; i++) { if (PageReserved(pgdat->node_mem_map+i)) reserved++; else if (PageSwapCache(pgdat->node_mem_map+i)) @@ -240,7 +240,7 @@ else if (page_count(pgdat->node_mem_map + i)) shared += page_count(pgdat->node_mem_map + i) - 1; } - printk("\t%d pages of RAM\n", pgdat->node_size); + printk("\t%d pages of RAM\n", pgdat->node_spanned_pages); printk("\t%d reserved pages\n", reserved); printk("\t%d pages shared\n", shared); printk("\t%d pages swap cached\n", cached); diff -Nru a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c --- a/arch/mips/kernel/sysirix.c Sun Jun 15 16:54:33 2003 +++ b/arch/mips/kernel/sysirix.c Wed Jul 2 21:22:38 2003 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -527,8 +528,6 @@ return get_seconds(); } -int vm_enough_memory(long pages); - /* * IRIX is completely broken... it returns 0 on success, otherwise * ENOMEM. @@ -585,7 +584,7 @@ /* * Check if we have enough memory.. */ - if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) { + if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) { ret = -ENOMEM; goto out; } diff -Nru a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c --- a/arch/ppc64/mm/init.c Fri Jun 6 18:19:27 2003 +++ b/arch/ppc64/mm/init.c Wed Jul 2 21:20:53 2003 @@ -109,7 +109,7 @@ show_free_areas(); printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_size; i++) { + for (i = 0; i < pgdat->node_spanned_pages; i++) { page = pgdat->node_mem_map + i; total++; if (PageReserved(page)) @@ -564,7 +564,7 @@ int nid; for (nid = 0; nid < numnodes; nid++) { - if (node_data[nid].node_size != 0) { + if (node_data[nid].node_spanned_pages != 0) { printk("freeing bootmem node %x\n", nid); totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); diff -Nru a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c --- a/arch/ppc64/mm/numa.c Sat Jun 7 13:07:24 2003 +++ b/arch/ppc64/mm/numa.c Wed Jul 2 21:20:53 2003 @@ -160,21 +160,21 @@ * this simple case and complain if there is a gap in * memory */ - if (node_data[numa_domain].node_size) { + if (node_data[numa_domain].node_spanned_pages) { unsigned long shouldstart = node_data[numa_domain].node_start_pfn + - node_data[numa_domain].node_size; + node_data[numa_domain].node_spanned_pages; if (shouldstart != (start / PAGE_SIZE)) { printk(KERN_ERR "Hole in node, disabling " "region start %lx length %lx\n", start, size); continue; } - node_data[numa_domain].node_size += size / PAGE_SIZE; + node_data[numa_domain].node_spanned_pages += size / PAGE_SIZE; } else { node_data[numa_domain].node_start_pfn = start / PAGE_SIZE; - node_data[numa_domain].node_size = size / PAGE_SIZE; + node_data[numa_domain].node_spanned_pages = size / PAGE_SIZE; } for (i = start ; i < (start+size); i += MEMORY_INCREMENT) @@ -202,7 +202,7 @@ map_cpu_to_node(i, 0); node_data[0].node_start_pfn = 0; - node_data[0].node_size = lmb_end_of_DRAM() / PAGE_SIZE; + node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE; for (i = 0 ; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; @@ -224,12 +224,12 @@ unsigned long bootmem_paddr; unsigned long bootmap_pages; - if (node_data[nid].node_size == 0) + if (node_data[nid].node_spanned_pages == 0) continue; start_paddr = node_data[nid].node_start_pfn * PAGE_SIZE; end_paddr = start_paddr + - (node_data[nid].node_size * PAGE_SIZE); + (node_data[nid].node_spanned_pages * PAGE_SIZE); dbg("node %d\n", nid); dbg("start_paddr = %lx\n", start_paddr); @@ -311,7 +311,7 @@ unsigned long start_pfn; unsigned long end_pfn; - if (node_data[nid].node_size == 0) + if (node_data[nid].node_spanned_pages == 0) continue; start_pfn = plat_node_bdata[nid].node_boot_start >> PAGE_SHIFT; diff -Nru a/arch/s390/kernel/compat_exec.c b/arch/s390/kernel/compat_exec.c --- a/arch/s390/kernel/compat_exec.c Wed May 14 23:43:46 2003 +++ b/arch/s390/kernel/compat_exec.c Wed Jul 2 21:22:38 2003 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,7 @@ if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff -Nru a/arch/sh/kernel/cpu/sh4/pci-sh7751.c b/arch/sh/kernel/cpu/sh4/pci-sh7751.c --- a/arch/sh/kernel/cpu/sh4/pci-sh7751.c Tue May 6 15:06:40 2003 +++ b/arch/sh/kernel/cpu/sh4/pci-sh7751.c Thu Jul 3 08:08:45 2003 @@ -200,7 +200,7 @@ return; PCIDBG(2,"PCI: Peer bridge fixup\n"); for (n=0; n <= pcibios_last_bus; n++) { - if (pci_bus_exists(&pci_root_buses, n)) + if (pci_find_bus(0, n)) continue; bus.number = n; bus.ops = pci_root_ops; diff -Nru a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c --- a/arch/x86_64/ia32/ia32_binfmt.c Thu Jun 26 02:32:32 2003 +++ b/arch/x86_64/ia32/ia32_binfmt.c Wed Jul 2 21:22:38 2003 @@ -14,6 +14,8 @@ #include #include #include +#include + #include #include #include @@ -339,7 +341,7 @@ if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } diff -Nru a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c --- a/arch/x86_64/mm/init.c Sun Jun 15 07:12:07 2003 +++ b/arch/x86_64/mm/init.c Wed Jul 2 21:20:53 2003 @@ -64,7 +64,7 @@ printk("Free swap: %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { - for (i = 0; i < pgdat->node_size; ++i) { + for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat->node_mem_map + i; total++; if (PageReserved(page)) diff -Nru a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c --- a/arch/x86_64/mm/numa.c Fri May 23 03:22:07 2003 +++ b/arch/x86_64/mm/numa.c Wed Jul 2 21:20:53 2003 @@ -86,7 +86,7 @@ memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; NODE_DATA(nodeid)->node_start_pfn = start_pfn; - NODE_DATA(nodeid)->node_size = end_pfn - start_pfn; + NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; /* Find a place for the bootmem map */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); diff -Nru a/drivers/base/class.c b/drivers/base/class.c --- a/drivers/base/class.c Wed Jun 11 15:40:05 2003 +++ b/drivers/base/class.c Thu Jul 3 17:51:00 2003 @@ -3,6 +3,8 @@ * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs + * Copyright (c) 2003 Greg Kroah-Hartman + * Copyright (c) 2003 IBM Corp. * * This file is released under the GPLv2 * @@ -337,6 +339,24 @@ class_dev->class_id); class_device_del(class_dev); class_device_put(class_dev); +} + +int class_device_rename(struct class_device *class_dev, char *new_name) +{ + class_dev = class_device_get(class_dev); + if (!class_dev) + return -EINVAL; + + pr_debug("CLASS: renaming '%s' to '%s'\n", class_dev->class_id, + new_name); + + strlcpy(class_dev->class_id, new_name, KOBJ_NAME_LEN); + + kobject_rename(&class_dev->kobj, new_name); + + class_device_put(class_dev); + + return 0; } struct class_device * class_device_get(struct class_device *class_dev) diff -Nru a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c --- a/drivers/base/firmware_class.c Tue Jun 17 02:10:01 2003 +++ b/drivers/base/firmware_class.c Thu Jul 3 14:02:41 2003 @@ -149,7 +149,7 @@ if (offset + count > fw->size) count = fw->size - offset; - memcpy(buffer, fw->data + offset, count); + memcpy(buffer + offset, fw->data + offset, count); return count; } static int @@ -198,7 +198,7 @@ if (retval) return retval; - memcpy(fw->data + offset, buffer, count); + memcpy(fw->data + offset, buffer + offset, count); fw->size = max_t(size_t, offset + count, fw->size); diff -Nru a/drivers/block/cciss.c b/drivers/block/cciss.c --- a/drivers/block/cciss.c Thu Jun 5 06:17:28 2003 +++ b/drivers/block/cciss.c Wed Jul 2 21:23:13 2003 @@ -1887,7 +1887,7 @@ BUG(); if (( c = cmd_alloc(h, 1)) == NULL) - goto startio; + goto full; blkdev_dequeue_request(creq); @@ -1960,8 +1960,9 @@ h->maxQsinceinit = h->Qdepth; goto queue; -startio: +full: blk_stop_queue(q); +startio: start_io(h); } diff -Nru a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c --- a/drivers/block/ll_rw_blk.c Mon Jun 2 18:32:46 2003 +++ b/drivers/block/ll_rw_blk.c Wed Jul 2 21:23:13 2003 @@ -1072,8 +1072,8 @@ **/ void blk_start_queue(request_queue_t *q) { - if (test_and_clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) - schedule_work(&q->unplug_work); + clear_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); + schedule_work(&q->unplug_work); } /** diff -Nru a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c --- a/drivers/ieee1394/sbp2.c Tue Jun 24 20:47:32 2003 +++ b/drivers/ieee1394/sbp2.c Thu Jul 3 00:39:27 2003 @@ -56,6 +56,8 @@ #include #include #include +#include + #include #include #include diff -Nru a/drivers/net/Kconfig b/drivers/net/Kconfig --- a/drivers/net/Kconfig Wed Jun 25 14:16:04 2003 +++ b/drivers/net/Kconfig Wed Jul 2 21:21:29 2003 @@ -1397,7 +1397,7 @@ config TC35815 tristate "TOSHIBA TC35815 Ethernet support" - depends on NET_PCI && PCI + depends on NET_PCI && PCI && TOSHIBA_JMR3927 config DGRS tristate "Digi Intl. RightSwitch SE-X support" diff -Nru a/drivers/net/e100/e100_main.c b/drivers/net/e100/e100_main.c --- a/drivers/net/e100/e100_main.c Sun Jun 1 15:50:09 2003 +++ b/drivers/net/e100/e100_main.c Thu Jun 26 17:35:25 2003 @@ -1085,9 +1085,9 @@ goto exit1; } - e100_prepare_xmit_buff(bdp, skb); - bdp->drv_stats.net_stats.tx_bytes += skb->len; + + e100_prepare_xmit_buff(bdp, skb); dev->trans_start = jiffies; diff -Nru a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c --- a/drivers/pci/hotplug/acpiphp_glue.c Tue Jun 17 17:36:42 2003 +++ b/drivers/pci/hotplug/acpiphp_glue.c Thu Jul 3 04:46:01 2003 @@ -385,7 +385,7 @@ bridge->seg = seg; bridge->bus = bus; - bridge->pci_bus = pci_find_bus(bus); + bridge->pci_bus = pci_find_bus(seg, bus); bridge->res_lock = SPIN_LOCK_UNLOCKED; diff -Nru a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c --- a/drivers/pci/hotplug/cpci_hotplug_pci.c Tue Jun 3 15:44:23 2003 +++ b/drivers/pci/hotplug/cpci_hotplug_pci.c Thu Jul 3 04:46:06 2003 @@ -395,7 +395,7 @@ /* Scan behind bridge */ n = pci_scan_bridge(bus, dev, max, 2); - child = pci_find_bus(max + 1); + child = pci_find_bus(0, max + 1); if (!child) return -ENODEV; pci_proc_attach_bus(child); diff -Nru a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c --- a/drivers/pci/hotplug/ibmphp_core.c Thu Jun 26 08:35:33 2003 +++ b/drivers/pci/hotplug/ibmphp_core.c Thu Jul 3 04:46:23 2003 @@ -774,7 +774,7 @@ struct pci_dev *dev; u16 l; - if (pci_find_bus(busno) || !(ibmphp_find_same_bus_num (busno))) + if (pci_find_bus(0, busno) || !(ibmphp_find_same_bus_num (busno))) return 1; bus = kmalloc (sizeof (*bus), GFP_KERNEL); @@ -819,7 +819,7 @@ func->dev = pci_find_slot (func->busno, PCI_DEVFN(func->device, func->function)); if (func->dev == NULL) { - struct pci_bus *bus = pci_find_bus(func->busno); + struct pci_bus *bus = pci_find_bus(0, func->busno); if (!bus) return 0; @@ -1335,7 +1335,7 @@ goto exit; } - bus = pci_find_bus(0); + bus = pci_find_bus(0, 0); if (!bus) { err ("Can't find the root pci bus, can not continue\n"); rc = -ENODEV; diff -Nru a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c --- a/drivers/pci/pci-sysfs.c Wed Jun 11 04:46:21 2003 +++ b/drivers/pci/pci-sysfs.c Thu Jul 3 13:32:04 2003 @@ -3,6 +3,8 @@ * * (C) Copyright 2002 Greg Kroah-Hartman * (C) Copyright 2002 IBM Corp. + * (C) Copyright 2003 Matthew Wilcox + * (C) Copyright 2003 Hewlett-Packard * * File attributes for PCI devices * @@ -60,6 +62,108 @@ static DEVICE_ATTR(resource,S_IRUGO,pci_show_resources,NULL); +static ssize_t +pci_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +{ + struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); + unsigned int size = 64; + + /* Several chips lock up trying to read undefined config space */ + if (capable(CAP_SYS_ADMIN)) { + size = 256; + } else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) { + size = 128; + } + + if (off > size) + return 0; + if (off + count > size) { + size -= off; + count = size; + } else { + size = count; + } + + while (off & 3) { + unsigned char val; + pci_read_config_byte(dev, off, &val); + buf[off] = val; + off++; + if (--size == 0) + break; + } + + while (size > 3) { + unsigned int val; + pci_read_config_dword(dev, off, &val); + buf[off] = val & 0xff; + buf[off + 1] = (val >> 8) & 0xff; + buf[off + 2] = (val >> 16) & 0xff; + buf[off + 3] = (val >> 24) & 0xff; + off += 4; + size -= 4; + } + + while (size > 0) { + unsigned char val; + pci_read_config_byte(dev, off, &val); + buf[off] = val; + off++; + --size; + } + + return count; +} + +static ssize_t +pci_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count) +{ + struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); + unsigned int size = count; + + if (off > 256) + return 0; + if (off + count > 256) { + size = 256 - off; + count = size; + } + + while (off & 3) { + pci_write_config_byte(dev, off, buf[off]); + off++; + if (--size == 0) + break; + } + + while (size > 3) { + unsigned int val = buf[off]; + val |= (unsigned int) buf[off + 1] << 8; + val |= (unsigned int) buf[off + 2] << 16; + val |= (unsigned int) buf[off + 3] << 24; + pci_write_config_dword(dev, off, val); + off += 4; + size -= 4; + } + + while (size > 0) { + pci_write_config_byte(dev, off, buf[off]); + off++; + --size; + } + + return count; +} + +static struct bin_attribute pci_config_attr = { + .attr = { + .name = "config", + .mode = S_IRUGO | S_IWUSR, + }, + .size = 256, + .read = pci_read_config, + .write = pci_write_config, +}; + void pci_create_sysfs_dev_files (struct pci_dev *pdev) { struct device *dev = &pdev->dev; @@ -72,4 +176,5 @@ device_create_file (dev, &dev_attr_class); device_create_file (dev, &dev_attr_irq); device_create_file (dev, &dev_attr_resource); + sysfs_create_bin_file(&dev->kobj, &pci_config_attr); } diff -Nru a/drivers/pci/pci.h b/drivers/pci/pci.h --- a/drivers/pci/pci.h Thu Jun 19 15:02:58 2003 +++ b/drivers/pci/pci.h Thu Jul 3 04:43:34 2003 @@ -29,7 +29,6 @@ extern unsigned char pci_max_busnr(void); extern unsigned char pci_bus_max_busnr(struct pci_bus *bus); extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap); -extern struct pci_bus *pci_find_bus(unsigned char busnr); struct pci_dev_wrapped { struct pci_dev *dev; diff -Nru a/drivers/pci/probe.c b/drivers/pci/probe.c --- a/drivers/pci/probe.c Wed Jun 25 19:45:53 2003 +++ b/drivers/pci/probe.c Thu Jul 3 08:08:45 2003 @@ -633,22 +633,11 @@ return max; } -int __devinit pci_bus_exists(const struct list_head *list, int nr) -{ - const struct pci_bus *b; - - list_for_each_entry(b, list, node) { - if (b->number == nr || pci_bus_exists(&b->children, nr)) - return 1; - } - return 0; -} - struct pci_bus * __devinit pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata) { struct pci_bus *b; - if (pci_bus_exists(&pci_root_buses, bus)) { + if (pci_find_bus(0, bus)) { /* If we already got to this bus through a different bridge, ignore it */ DBG("PCI: Bus %02x already known\n", bus); return NULL; diff -Nru a/drivers/pci/search.c b/drivers/pci/search.c --- a/drivers/pci/search.c Thu Jun 19 09:14:03 2003 +++ b/drivers/pci/search.c Thu Jul 3 08:09:17 2003 @@ -7,12 +7,14 @@ * Copyright 2003 -- Greg Kroah-Hartman */ +#include #include #include +#include spinlock_t pci_bus_lock = SPIN_LOCK_UNLOCKED; -static struct pci_bus * +static struct pci_bus * __devinit pci_do_find_bus(struct pci_bus* bus, unsigned char busnr) { struct pci_bus* child; @@ -30,22 +32,24 @@ } /** - * pci_find_bus - locate PCI bus from a given bus number + * pci_find_bus - locate PCI bus from a given domain and bus number + * @domain: number of PCI domain to search * @busnr: number of desired PCI bus * - * Given a PCI bus number, the desired PCI bus is located in system - * global list of PCI buses. If the bus is found, a pointer to its + * Given a PCI bus number and domain number, the desired PCI bus is located + * in the global list of PCI buses. If the bus is found, a pointer to its * data structure is returned. If no bus is found, %NULL is returned. */ -struct pci_bus * -pci_find_bus(unsigned char busnr) +struct pci_bus * __devinit pci_find_bus(int domain, int busnr) { - struct pci_bus* bus = NULL; - struct pci_bus* tmp_bus; + struct pci_bus *bus = NULL; + struct pci_bus *tmp_bus; while ((bus = pci_find_next_bus(bus)) != NULL) { + if (pci_domain_nr(bus) != domain) + continue; tmp_bus = pci_do_find_bus(bus, busnr); - if(tmp_bus) + if (tmp_bus) return tmp_bus; } return NULL; @@ -66,7 +70,7 @@ struct list_head *n; struct pci_bus *b = NULL; - WARN_ON(irqs_disabled()); + WARN_ON(in_interrupt()); spin_lock(&pci_bus_lock); n = from ? from->node.next : pci_root_buses.next; if (n != &pci_root_buses) @@ -125,7 +129,7 @@ struct list_head *n; struct pci_dev *dev; - WARN_ON(irqs_disabled()); + WARN_ON(in_interrupt()); spin_lock(&pci_bus_lock); n = from ? from->global_list.next : pci_devices.next; @@ -190,7 +194,7 @@ struct list_head *n; struct pci_dev *dev; - WARN_ON(irqs_disabled()); + WARN_ON(in_interrupt()); spin_lock(&pci_bus_lock); n = from ? from->global_list.next : pci_devices.next; @@ -256,7 +260,7 @@ struct list_head *n; struct pci_dev *dev; - WARN_ON(irqs_disabled()); + WARN_ON(in_interrupt()); spin_lock(&pci_bus_lock); n = from ? from->global_list.prev : pci_devices.prev; diff -Nru a/drivers/pnp/interface.c b/drivers/pnp/interface.c --- a/drivers/pnp/interface.c Tue Jun 24 13:17:01 2003 +++ b/drivers/pnp/interface.c Thu Jul 3 08:38:43 2003 @@ -259,7 +259,10 @@ for (i = 0; i < PNP_MAX_PORT; i++) { if (pnp_port_valid(dev, i)) { pnp_printf(buffer,"io"); - pnp_printf(buffer," 0x%lx-0x%lx \n", + if (pnp_port_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," 0x%lx-0x%lx\n", pnp_port_start(dev, i), pnp_port_end(dev, i)); } @@ -267,7 +270,10 @@ for (i = 0; i < PNP_MAX_MEM; i++) { if (pnp_mem_valid(dev, i)) { pnp_printf(buffer,"mem"); - pnp_printf(buffer," 0x%lx-0x%lx \n", + if (pnp_mem_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," 0x%lx-0x%lx\n", pnp_mem_start(dev, i), pnp_mem_end(dev, i)); } @@ -275,13 +281,21 @@ for (i = 0; i < PNP_MAX_IRQ; i++) { if (pnp_irq_valid(dev, i)) { pnp_printf(buffer,"irq"); - pnp_printf(buffer," %ld \n", pnp_irq(dev, i)); + if (pnp_irq_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," %ld\n", + pnp_irq(dev, i)); } } for (i = 0; i < PNP_MAX_DMA; i++) { if (pnp_dma_valid(dev, i)) { pnp_printf(buffer,"dma"); - pnp_printf(buffer," %ld \n", pnp_dma(dev, i)); + if (pnp_dma_flags(dev, i) & IORESOURCE_DISABLED) + pnp_printf(buffer," disabled\n"); + else + pnp_printf(buffer," %ld\n", + pnp_dma(dev, i)); } } ret = (buffer->curr - buf); diff -Nru a/drivers/pnp/manager.c b/drivers/pnp/manager.c --- a/drivers/pnp/manager.c Tue Jun 24 13:24:06 2003 +++ b/drivers/pnp/manager.c Thu Jul 3 08:45:32 2003 @@ -45,9 +45,15 @@ flags = &dev->res.port_resource[idx].flags; /* set the initial values */ + *flags = *flags | rule->flags | IORESOURCE_IO; + + if (!rule->size) { + *flags |= IORESOURCE_DISABLED; + return 1; /* skip disabled resource requests */ + } + *start = rule->min; *end = *start + rule->size - 1; - *flags = *flags | rule->flags | IORESOURCE_IO; /* run through until pnp_check_port is happy */ while (!pnp_check_port(dev, idx)) { @@ -81,8 +87,6 @@ flags = &dev->res.mem_resource[idx].flags; /* set the initial values */ - *start = rule->min; - *end = *start + rule->size -1; *flags = *flags | rule->flags | IORESOURCE_MEM; /* convert pnp flags to standard Linux flags */ @@ -95,6 +99,14 @@ if (rule->flags & IORESOURCE_MEM_SHADOWABLE) *flags |= IORESOURCE_SHADOWABLE; + if (!rule->size) { + *flags |= IORESOURCE_DISABLED; + return 1; /* skip disabled resource requests */ + } + + *start = rule->min; + *end = *start + rule->size -1; + /* run through until pnp_check_mem is happy */ while (!pnp_check_mem(dev, idx)) { *start += rule->align; @@ -135,6 +147,11 @@ /* set the initial values */ *flags = *flags | rule->flags | IORESOURCE_IRQ; + if (!rule->map) { + *flags |= IORESOURCE_DISABLED; + return 1; /* skip disabled resource requests */ + } + for (i = 0; i < 16; i++) { if(rule->map & (1<flags | IORESOURCE_DMA; + if (!rule->map) { + *flags |= IORESOURCE_DISABLED; + return 1; /* skip disabled resource requests */ + } + for (i = 0; i < 8; i++) { if(rule->map & (1<res = *res; if (!(mode & PNP_CONFIG_FORCE)) { for (i = 0; i < PNP_MAX_PORT; i++) { - if(pnp_check_port(dev,i)) + if(!pnp_check_port(dev,i)) goto fail; } for (i = 0; i < PNP_MAX_MEM; i++) { - if(pnp_check_mem(dev,i)) + if(!pnp_check_mem(dev,i)) goto fail; } for (i = 0; i < PNP_MAX_IRQ; i++) { - if(pnp_check_irq(dev,i)) + if(!pnp_check_irq(dev,i)) goto fail; } for (i = 0; i < PNP_MAX_DMA; i++) { - if(pnp_check_dma(dev,i)) + if(!pnp_check_dma(dev,i)) goto fail; } } up(&pnp_res_mutex); - pnp_auto_config_dev(dev); kfree(bak); return 0; diff -Nru a/drivers/pnp/resource.c b/drivers/pnp/resource.c --- a/drivers/pnp/resource.c Wed Jun 25 16:30:51 2003 +++ b/drivers/pnp/resource.c Thu Jul 3 08:38:43 2003 @@ -286,6 +286,8 @@ continue; for (tmp = 0; tmp < PNP_MAX_PORT; tmp++) { if (tdev->res.port_resource[tmp].flags & IORESOURCE_IO) { + if (pnp_port_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; tport = &tdev->res.port_resource[tmp].start; tend = &tdev->res.port_resource[tmp].end; if (ranged_conflict(port,end,tport,tend)) @@ -340,6 +342,8 @@ continue; for (tmp = 0; tmp < PNP_MAX_MEM; tmp++) { if (tdev->res.mem_resource[tmp].flags & IORESOURCE_MEM) { + if (pnp_mem_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; taddr = &tdev->res.mem_resource[tmp].start; tend = &tdev->res.mem_resource[tmp].end; if (ranged_conflict(addr,end,taddr,tend)) @@ -409,6 +413,8 @@ continue; for (tmp = 0; tmp < PNP_MAX_IRQ; tmp++) { if (tdev->res.irq_resource[tmp].flags & IORESOURCE_IRQ) { + if (pnp_irq_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; if ((tdev->res.irq_resource[tmp].start == *irq)) return 0; } @@ -462,6 +468,8 @@ continue; for (tmp = 0; tmp < PNP_MAX_DMA; tmp++) { if (tdev->res.dma_resource[tmp].flags & IORESOURCE_DMA) { + if (pnp_dma_flags(dev, tmp) & IORESOURCE_DISABLED) + continue; if ((tdev->res.dma_resource[tmp].start == *dma)) return 0; } diff -Nru a/drivers/pnp/support.c b/drivers/pnp/support.c --- a/drivers/pnp/support.c Tue Jun 24 13:17:01 2003 +++ b/drivers/pnp/support.c Thu Jul 3 08:38:43 2003 @@ -68,9 +68,13 @@ int i = 0; while ((res->irq_resource[i].flags & IORESOURCE_IRQ) && i < PNP_MAX_IRQ) i++; if (i < PNP_MAX_IRQ) { + res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag + if (irq == -1) { + res->irq_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->irq_resource[i].start = res->irq_resource[i].end = (unsigned long) irq; - res->irq_resource[i].flags = IORESOURCE_IRQ; // Also clears _UNSET flag } } @@ -79,9 +83,13 @@ int i = 0; while ((res->dma_resource[i].flags & IORESOURCE_DMA) && i < PNP_MAX_DMA) i++; if (i < PNP_MAX_DMA) { + res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag + if (dma == -1) { + res->dma_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->dma_resource[i].start = res->dma_resource[i].end = (unsigned long) dma; - res->dma_resource[i].flags = IORESOURCE_DMA; // Also clears _UNSET flag } } @@ -90,9 +98,13 @@ int i = 0; while ((res->port_resource[i].flags & IORESOURCE_IO) && i < PNP_MAX_PORT) i++; if (i < PNP_MAX_PORT) { + res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag + if (len <= 0 || (io + len -1) >= 0x10003) { + res->port_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->port_resource[i].start = (unsigned long) io; res->port_resource[i].end = (unsigned long)(io + len - 1); - res->port_resource[i].flags = IORESOURCE_IO; // Also clears _UNSET flag } } @@ -101,9 +113,13 @@ int i = 0; while ((res->mem_resource[i].flags & IORESOURCE_MEM) && i < PNP_MAX_MEM) i++; if (i < PNP_MAX_MEM) { + res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag + if (len <= 0) { + res->mem_resource[i].flags |= IORESOURCE_DISABLED; + return; + } res->mem_resource[i].start = (unsigned long) mem; res->mem_resource[i].end = (unsigned long)(mem + len - 1); - res->mem_resource[i].flags = IORESOURCE_MEM; // Also clears _UNSET flag } } diff -Nru a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c --- a/drivers/scsi/scsi.c Wed Jun 25 04:47:15 2003 +++ b/drivers/scsi/scsi.c Wed Jul 2 00:19:28 2003 @@ -582,7 +582,7 @@ local_irq_save(flags); cpu = smp_processor_id(); list_add_tail(&cmd->eh_entry, &done_q[cpu]); - cpu_raise_softirq(cpu, SCSI_SOFTIRQ); + raise_softirq_irqoff(SCSI_SOFTIRQ); local_irq_restore(flags); } diff -Nru a/fs/attr.c b/fs/attr.c --- a/fs/attr.c Thu May 29 22:42:39 2003 +++ b/fs/attr.c Wed Jul 2 21:21:22 2003 @@ -22,8 +22,6 @@ int retval = -EPERM; unsigned int ia_valid = attr->ia_valid; - lock_kernel(); - /* If force is set do it anyway. */ if (ia_valid & ATTR_FORCE) goto fine; @@ -58,7 +56,6 @@ fine: retval = 0; error: - unlock_kernel(); return retval; } diff -Nru a/fs/block_dev.c b/fs/block_dev.c --- a/fs/block_dev.c Sun May 25 18:29:01 2003 +++ b/fs/block_dev.c Wed Jul 2 21:21:28 2003 @@ -155,11 +155,13 @@ */ static loff_t block_llseek(struct file *file, loff_t offset, int origin) { - /* ewww */ - loff_t size = file->f_dentry->d_inode->i_bdev->bd_inode->i_size; + struct inode *bd_inode; + loff_t size; loff_t retval; - lock_kernel(); + bd_inode = file->f_dentry->d_inode->i_bdev->bd_inode; + down(&bd_inode->i_sem); + size = bd_inode->i_size; switch (origin) { case 2: @@ -175,7 +177,7 @@ } retval = offset; } - unlock_kernel(); + up(&bd_inode->i_sem); return retval; } diff -Nru a/fs/buffer.c b/fs/buffer.c --- a/fs/buffer.c Fri Jun 27 01:46:43 2003 +++ b/fs/buffer.c Thu Jul 3 18:54:38 2003 @@ -1447,6 +1447,28 @@ } EXPORT_SYMBOL(__getblk); +/* + * Do async read-ahead on a buffer.. + */ +void +__breadahead(struct block_device *bdev, sector_t block, int size) +{ + struct buffer_head *bh = __getblk(bdev, block, size); + if (!test_set_buffer_locked(bh)) { + if (!buffer_uptodate(bh)) { + /* + * This eats the bh count from __getblk() and + * unlocks when the read is done. + */ + bh->b_end_io = end_buffer_io_sync; + submit_bh(READ, bh); + return; + } + unlock_buffer(bh); + } + brelse(bh); +} + /** * __bread() - reads a specified block and returns the bh * @block: number of block diff -Nru a/fs/coda/file.c b/fs/coda/file.c --- a/fs/coda/file.c Sat Oct 5 15:33:50 2002 +++ b/fs/coda/file.c Wed Jul 2 21:46:34 2003 @@ -153,19 +153,22 @@ struct inode *coda_inode; int err = 0, fcnt; + lock_kernel(); + coda_vfs_stat.flush++; /* last close semantics */ fcnt = file_count(coda_file); - if (fcnt > 1) return 0; + if (fcnt > 1) + goto out; /* No need to make an upcall when we have not made any modifications * to the file */ if ((coda_file->f_flags & O_ACCMODE) == O_RDONLY) - return 0; + goto out; if (use_coda_close) - return 0; + goto out; cfi = CODA_FTOC(coda_file); BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); @@ -180,6 +183,8 @@ err = 0; } +out: + unlock_kernel(); return err; } diff -Nru a/fs/exec.c b/fs/exec.c --- a/fs/exec.c Thu Jun 5 23:36:53 2003 +++ b/fs/exec.c Wed Jul 2 21:53:42 2003 @@ -392,7 +392,7 @@ if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (security_vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } @@ -441,9 +441,9 @@ { int i; - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { + for (i = 0; i < MAX_ARG_PAGES; i++) { if (bprm->page[i]) - __free_page(bprm->page[i]); + __free_page(bprm->page[i]); bprm->page[i] = NULL; } } @@ -772,6 +772,8 @@ if (retval) goto out; + bprm->mm = NULL; /* We're using it now */ + /* This is the point of no return */ current->sas_ss_sp = current->sas_ss_size = 0; @@ -999,7 +1001,7 @@ } read_lock(&binfmt_lock); put_binfmt(fmt); - if (retval != -ENOEXEC) + if (retval != -ENOEXEC || bprm->mm == NULL) break; if (!bprm->file) { read_unlock(&binfmt_lock); @@ -1007,7 +1009,7 @@ } } read_unlock(&binfmt_lock); - if (retval != -ENOEXEC) { + if (retval != -ENOEXEC || bprm->mm == NULL) { break; #ifdef CONFIG_KMOD }else{ @@ -1035,7 +1037,6 @@ struct linux_binprm bprm; struct file *file; int retval; - int i; sched_balance_exec(); @@ -1103,17 +1104,14 @@ out: /* Something went wrong, return the inode and free the argument pages*/ - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - struct page * page = bprm.page[i]; - if (page) - __free_page(page); - } + free_arg_pages(&bprm); if (bprm.security) security_bprm_free(&bprm); out_mm: - mmdrop(bprm.mm); + if (bprm.mm) + mmdrop(bprm.mm); out_file: if (bprm.file) { diff -Nru a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c --- a/fs/ext2/ialloc.c Thu Apr 17 12:14:09 2003 +++ b/fs/ext2/ialloc.c Wed Jul 2 21:22:40 2003 @@ -489,17 +489,18 @@ return group; } -struct inode * ext2_new_inode(struct inode * dir, int mode) +struct inode *ext2_new_inode(struct inode *dir, int mode) { struct super_block *sb; struct buffer_head *bitmap_bh = NULL; struct buffer_head *bh2; int group, i; - ino_t ino; + ino_t ino = 0; struct inode * inode; - struct ext2_group_desc * desc; - struct ext2_super_block * es; + struct ext2_group_desc *gdp; + struct ext2_super_block *es; struct ext2_inode_info *ei; + struct ext2_sb_info *sbi; int err; sb = dir->i_sb; @@ -508,36 +509,62 @@ return ERR_PTR(-ENOMEM); ei = EXT2_I(inode); - es = EXT2_SB(sb)->s_es; + sbi = EXT2_SB(sb); + es = sbi->s_es; repeat: if (S_ISDIR(mode)) { - if (test_opt (sb, OLDALLOC)) + if (test_opt(sb, OLDALLOC)) group = find_group_dir(sb, dir); else group = find_group_orlov(sb, dir); } else group = find_group_other(sb, dir); - err = -ENOSPC; - if (group == -1) + if (group == -1) { + err = -ENOSPC; goto fail; + } - err = -EIO; - bitmap_bh = read_inode_bitmap(sb, group); - if (!bitmap_bh) - goto fail2; - - i = ext2_find_first_zero_bit((unsigned long *)bitmap_bh->b_data, - EXT2_INODES_PER_GROUP(sb)); - if (i >= EXT2_INODES_PER_GROUP(sb)) - goto bad_count; - if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), - i, (void *) bitmap_bh->b_data)) { + for (i = 0; i < sbi->s_groups_count; i++) { + gdp = ext2_get_group_desc(sb, group, &bh2); brelse(bitmap_bh); - ext2_release_inode(sb, group, S_ISDIR(mode)); - goto repeat; + bitmap_bh = read_inode_bitmap(sb, group); + if (!bitmap_bh) { + err = -EIO; + goto fail2; + } + + i = ext2_find_first_zero_bit((unsigned long *)bitmap_bh->b_data, + EXT2_INODES_PER_GROUP(sb)); + if (i >= EXT2_INODES_PER_GROUP(sb)) { + /* + * Rare race: find_group_xx() decided that there were + * free inodes in this group, but by the time we tried + * to allocate one, they're all gone. This can also + * occur because the counters which find_group_orlov() + * uses are approximate. So just go and search the + * next block group. + */ + if (++group == sbi->s_groups_count) + group = 0; + continue; + } + if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), + i, bitmap_bh->b_data)) { + brelse(bitmap_bh); + bitmap_bh = NULL; + ext2_release_inode(sb, group, S_ISDIR(mode)); + goto repeat; + } + goto got; } + /* + * Scanned all blockgroups. + */ + err = -ENOSPC; + goto fail2; +got: mark_buffer_dirty(bitmap_bh); if (sb->s_flags & MS_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); @@ -605,8 +632,9 @@ inode->i_generation = EXT2_SB(sb)->s_next_generation++; insert_inode_hash(inode); - if(DQUOT_ALLOC_INODE(inode)) { + if (DQUOT_ALLOC_INODE(inode)) { DQUOT_DROP(inode); + err = -ENOSPC; goto fail3; } err = ext2_init_acl(inode, dir); @@ -631,21 +659,6 @@ make_bad_inode(inode); iput(inode); return ERR_PTR(err); - -bad_count: - brelse(bitmap_bh); - ext2_error (sb, "ext2_new_inode", - "Free inodes count corrupted in group %d", - group); - /* Is it really ENOSPC? */ - err = -ENOSPC; - if (sb->s_flags & MS_RDONLY) - goto fail; - - desc = ext2_get_group_desc (sb, group, &bh2); - desc->bg_free_inodes_count = 0; - mark_buffer_dirty(bh2); - goto repeat; } unsigned long ext2_count_free_inodes (struct super_block * sb) diff -Nru a/fs/ext3/inode.c b/fs/ext3/inode.c --- a/fs/ext3/inode.c Wed Jun 25 16:30:56 2003 +++ b/fs/ext3/inode.c Thu Jul 3 19:20:48 2003 @@ -2290,68 +2290,72 @@ ext3_journal_stop(handle); } -/* - * ext3_get_inode_loc returns with an extra refcount against the - * inode's underlying buffer_head on success. - */ - -int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) +static unsigned long ext3_get_inode_block(struct super_block *sb, + unsigned long ino, struct ext3_iloc *iloc) { - struct buffer_head *bh = 0; - unsigned long block; - unsigned long block_group; - unsigned long group_desc; - unsigned long desc; - unsigned long offset; + unsigned long desc, group_desc, block_group; + unsigned long offset, block; + struct buffer_head *bh; struct ext3_group_desc * gdp; - if ((inode->i_ino != EXT3_ROOT_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( - EXT3_SB(inode->i_sb)->s_es->s_inodes_count)) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "bad inode number: %lu", inode->i_ino); - goto bad_inode; - } - block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb); - if (block_group >= EXT3_SB(inode->i_sb)->s_groups_count) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", + if ((ino != EXT3_ROOT_INO && + ino != EXT3_JOURNAL_INO && + ino < EXT3_FIRST_INO(sb)) || + ino > le32_to_cpu( + EXT3_SB(sb)->s_es->s_inodes_count)) { + ext3_error (sb, "ext3_get_inode_block", + "bad inode number: %lu", ino); + return 0; + } + block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); + if (block_group >= EXT3_SB(sb)->s_groups_count) { + ext3_error (sb, "ext3_get_inode_block", "group >= groups count"); - goto bad_inode; + return 0; } - group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); - desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); - bh = EXT3_SB(inode->i_sb)->s_group_desc[group_desc]; + group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb); + desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1); + bh = EXT3_SB(sb)->s_group_desc[group_desc]; if (!bh) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", + ext3_error (sb, "ext3_get_inode_block", "Descriptor not loaded"); - goto bad_inode; + return 0; } gdp = (struct ext3_group_desc *) bh->b_data; /* * Figure out the offset within the block group inode table */ - offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * - EXT3_INODE_SIZE(inode->i_sb); + offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) * + EXT3_INODE_SIZE(sb); block = le32_to_cpu(gdp[desc].bg_inode_table) + - (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); - if (!(bh = sb_bread(inode->i_sb, block))) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "unable to read inode block - " - "inode=%lu, block=%lu", inode->i_ino, block); - goto bad_inode; - } - offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); + (offset >> EXT3_BLOCK_SIZE_BITS(sb)); - iloc->bh = bh; - iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); iloc->block_group = block_group; + iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1); + return block; +} - return 0; +/* + * ext3_get_inode_loc returns with an extra refcount against the + * inode's underlying buffer_head on success. + */ + +int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) +{ + unsigned long block; - bad_inode: + block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc); + if (block) { + struct buffer_head *bh = sb_bread(inode->i_sb, block); + if (bh) { + iloc->bh = bh; + return 0; + } + ext3_error (inode->i_sb, "ext3_get_inode_loc", + "unable to read inode block - " + "inode=%lu, block=%lu", inode->i_ino, block); + } return -EIO; } @@ -2388,7 +2392,7 @@ if (ext3_get_inode_loc(inode, &iloc)) goto bad_inode; bh = iloc.bh; - raw_inode = iloc.raw_inode; + raw_inode = ext3_raw_inode(&iloc); inode->i_mode = le16_to_cpu(raw_inode->i_mode); inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); @@ -2454,11 +2458,9 @@ * even on big-endian machines: we do NOT byteswap the block numbers! */ for (block = 0; block < EXT3_N_BLOCKS; block++) - ei->i_data[block] = iloc.raw_inode->i_block[block]; + ei->i_data[block] = raw_inode->i_block[block]; INIT_LIST_HEAD(&ei->i_orphan); - brelse (iloc.bh); - if (S_ISREG(inode->i_mode)) { inode->i_op = &ext3_file_inode_operations; inode->i_fop = &ext3_file_operations; @@ -2476,8 +2478,9 @@ } else { inode->i_op = &ext3_special_inode_operations; init_special_inode(inode, inode->i_mode, - le32_to_cpu(iloc.raw_inode->i_block[0])); + le32_to_cpu(raw_inode->i_block[0])); } + brelse (iloc.bh); ext3_set_inode_flags(inode); return; @@ -2497,7 +2500,7 @@ struct inode *inode, struct ext3_iloc *iloc) { - struct ext3_inode *raw_inode = iloc->raw_inode; + struct ext3_inode *raw_inode = ext3_raw_inode(iloc); struct ext3_inode_info *ei = EXT3_I(inode); struct buffer_head *bh = iloc->bh; int err = 0, rc, block; diff -Nru a/fs/jbd/commit.c b/fs/jbd/commit.c --- a/fs/jbd/commit.c Wed Jun 25 16:30:56 2003 +++ b/fs/jbd/commit.c Wed Jul 2 21:23:09 2003 @@ -169,10 +169,23 @@ * that multiple journal_get_write_access() calls to the same * buffer are perfectly permissable. */ - while (commit_transaction->t_reserved_list) { - jh = commit_transaction->t_reserved_list; - JBUFFER_TRACE(jh, "reserved, unused: refile"); - journal_refile_buffer(journal, jh); + { + int nr = 0; + while (commit_transaction->t_reserved_list) { + jh = commit_transaction->t_reserved_list; + JBUFFER_TRACE(jh, "reserved, unused: refile"); + journal_refile_buffer(journal, jh); + nr++; + } + if (nr) { + static int noisy; + + if (noisy < 10) { + noisy++; + printk("%s: freed %d reserved buffers\n", + __FUNCTION__, nr); + } + } } /* diff -Nru a/fs/jbd/transaction.c b/fs/jbd/transaction.c --- a/fs/jbd/transaction.c Fri Jun 20 13:16:32 2003 +++ b/fs/jbd/transaction.c Wed Jul 2 21:23:09 2003 @@ -1168,37 +1168,24 @@ * journal_release_buffer: undo a get_write_access without any buffer * updates, if the update decided in the end that it didn't need access. * - * journal_get_write_access() can block, so it is quite possible for a - * journaling component to decide after the write access is returned - * that global state has changed and the update is no longer required. - * * The caller passes in the number of credits which should be put back for * this buffer (zero or one). + * + * We leave the buffer attached to t_reserved_list because even though this + * handle doesn't want it, some other concurrent handle may want to journal + * this buffer. If that handle is curently in between get_write_access() and + * journal_dirty_metadata() then it expects the buffer to be reserved. If + * we were to rip it off t_reserved_list here, the other handle will explode + * when journal_dirty_metadata is presented with a non-reserved buffer. + * + * If nobody really wants to journal this buffer then it will be thrown + * away at the start of commit. */ void journal_release_buffer(handle_t *handle, struct buffer_head *bh, int credits) { - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - struct journal_head *jh = bh2jh(bh); - - JBUFFER_TRACE(jh, "entry"); - - /* If the buffer is reserved but not modified by this - * transaction, then it is safe to release it. In all other - * cases, just leave the buffer as it is. */ - - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - if (jh->b_jlist == BJ_Reserved && jh->b_transaction == transaction && - !buffer_jbddirty(jh2bh(jh))) { - JBUFFER_TRACE(jh, "unused: refiling it"); - __journal_refile_buffer(jh); - } - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); + BUFFER_TRACE(bh, "entry"); handle->h_buffer_credits += credits; - JBUFFER_TRACE(jh, "exit"); } /** diff -Nru a/fs/nfs/file.c b/fs/nfs/file.c --- a/fs/nfs/file.c Wed May 7 03:34:41 2003 +++ b/fs/nfs/file.c Wed Jul 2 21:46:34 2003 @@ -104,11 +104,13 @@ dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + lock_kernel(); status = nfs_wb_file(inode, file); if (!status) { status = file->f_error; file->f_error = 0; } + unlock_kernel(); return status; } diff -Nru a/fs/open.c b/fs/open.c --- a/fs/open.c Fri Jun 20 13:16:06 2003 +++ b/fs/open.c Wed Jul 2 21:46:34 2003 @@ -952,11 +952,8 @@ return 0; } retval = 0; - if (filp->f_op && filp->f_op->flush) { - lock_kernel(); + if (filp->f_op && filp->f_op->flush) retval = filp->f_op->flush(filp); - unlock_kernel(); - } dnotify_flush(filp, id); locks_remove_posix(filp, id); fput(filp); diff -Nru a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c --- a/fs/proc/proc_misc.c Tue Jun 24 08:42:14 2003 +++ b/fs/proc/proc_misc.c Wed Jul 2 21:21:25 2003 @@ -497,11 +497,10 @@ static int locks_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len; - lock_kernel(); - len = get_locks_status(page, start, off, count); - unlock_kernel(); - if (len < count) *eof = 1; + int len = get_locks_status(page, start, off, count); + + if (len < count) + *eof = 1; return len; } diff -Nru a/fs/proc/root.c b/fs/proc/root.c --- a/fs/proc/root.c Sun May 25 18:48:57 2003 +++ b/fs/proc/root.c Wed Jul 2 21:21:25 2003 @@ -81,11 +81,13 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry) { - if (dir->i_ino == PROC_ROOT_INO) { /* check for safety... */ - lock_kernel(); + /* + * nr_threads is actually protected by the tasklist_lock; + * however, it's conventional to do reads, especially for + * reporting, without any locking whatsoever. + */ + if (dir->i_ino == PROC_ROOT_INO) /* check for safety... */ dir->i_nlink = proc_root.nlink + nr_threads; - unlock_kernel(); - } if (!proc_lookup(dir, dentry)) { return NULL; diff -Nru a/fs/ramfs/inode.c b/fs/ramfs/inode.c --- a/fs/ramfs/inode.c Sun May 25 19:19:54 2003 +++ b/fs/ramfs/inode.c Wed Jul 2 21:21:20 2003 @@ -146,6 +146,7 @@ .mmap = generic_file_mmap, .fsync = simple_sync_file, .sendfile = generic_file_sendfile, + .llseek = generic_file_llseek, }; static struct inode_operations ramfs_file_inode_operations = { diff -Nru a/fs/sysfs/bin.c b/fs/sysfs/bin.c --- a/fs/sysfs/bin.c Wed Jun 11 14:48:45 2003 +++ b/fs/sysfs/bin.c Thu Jul 3 09:05:47 2003 @@ -2,6 +2,8 @@ * bin.c - binary file operations for sysfs. */ +#undef DEBUG + #include #include #include @@ -42,18 +44,17 @@ ret = fill_read(dentry, buffer, offs, count); if (ret < 0) - goto Done; + return ret; count = ret; - ret = -EFAULT; - if (copy_to_user(userbuf, buffer, count) != 0) - goto Done; + if (copy_to_user(userbuf, buffer + offs, count) != 0) + return -EINVAL; + + pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count); *off = offs + count; - ret = count; - Done: - return ret; + return count; } static int @@ -72,7 +73,6 @@ struct dentry *dentry = file->f_dentry; int size = dentry->d_inode->i_size; loff_t offs = *off; - int ret; if (count > PAGE_SIZE) count = PAGE_SIZE; @@ -83,16 +83,13 @@ count = size - offs; } - ret = -EFAULT; - if (copy_from_user(buffer, userbuf, count)) - goto Done; + if (copy_from_user(buffer + offs, userbuf, count)) + return -EFAULT; count = flush_write(dentry, buffer, offs, count); if (count > 0) *off = offs + count; - ret = count; - Done: - return ret; + return count; } static int open(struct inode * inode, struct file * file) diff -Nru a/fs/sysfs/dir.c b/fs/sysfs/dir.c --- a/fs/sysfs/dir.c Tue Mar 11 13:30:18 2003 +++ b/fs/sysfs/dir.c Thu Jul 3 10:41:51 2003 @@ -121,7 +121,29 @@ dput(parent); } +void sysfs_rename_dir(struct kobject * kobj, char *new_name) +{ + struct dentry * new_dentry, * parent; + + if (!strcmp(kobj->name, new_name)) + return; + + if (!kobj->parent) + return; + + parent = kobj->parent->dentry; + + down(&parent->d_inode->i_sem); + + new_dentry = sysfs_get_dentry(parent, new_name); + d_move(kobj->dentry, new_dentry); + + strlcpy(kobj->name, new_name, KOBJ_NAME_LEN); + + up(&parent->d_inode->i_sem); +} EXPORT_SYMBOL(sysfs_create_dir); EXPORT_SYMBOL(sysfs_remove_dir); +EXPORT_SYMBOL(sysfs_rename_dir); diff -Nru a/fs/sysfs/file.c b/fs/sysfs/file.c --- a/fs/sysfs/file.c Wed Jun 11 14:48:45 2003 +++ b/fs/sysfs/file.c Thu Jul 3 09:27:08 2003 @@ -247,6 +247,12 @@ if (!kobj || !attr) goto Einval; + /* Grab the module reference for this attribute if we have one */ + if (!try_module_get(attr->owner)) { + error = -ENODEV; + goto Done; + } + /* if the kobject has no ktype, then we assume that it is a subsystem * itself, and use ops for it. */ @@ -300,6 +306,7 @@ goto Done; Eaccess: error = -EACCES; + module_put(attr->owner); Done: if (error && kobj) kobject_put(kobj); @@ -314,10 +321,12 @@ static int sysfs_release(struct inode * inode, struct file * filp) { struct kobject * kobj = filp->f_dentry->d_parent->d_fsdata; + struct attribute * attr = filp->f_dentry->d_fsdata; struct sysfs_buffer * buffer = filp->private_data; if (kobj) kobject_put(kobj); + module_put(attr->owner); if (buffer) { if (buffer->page) diff -Nru a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h --- a/include/asm-alpha/mmzone.h Thu Apr 3 14:49:57 2003 +++ b/include/asm-alpha/mmzone.h Wed Jul 2 21:20:53 2003 @@ -31,7 +31,6 @@ #define pa_to_nid(pa) alpha_pa_to_nid(pa) #define NODE_DATA(nid) (&node_data[(nid)]) -#define node_size(nid) (NODE_DATA(nid)->node_size) #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) @@ -124,7 +123,7 @@ #define pfn_to_nid(pfn) pa_to_nid(((u64)pfn << PAGE_SHIFT)) #define pfn_valid(pfn) \ (((pfn) - node_start_pfn(pfn_to_nid(pfn))) < \ - node_size(pfn_to_nid(pfn))) \ + node_spanned_pages(pfn_to_nid(pfn))) \ #define virt_addr_valid(kaddr) pfn_valid((__pa(kaddr) >> PAGE_SHIFT)) diff -Nru a/include/asm-i386/cacheflush.h b/include/asm-i386/cacheflush.h --- a/include/asm-i386/cacheflush.h Sat Apr 12 16:21:01 2003 +++ b/include/asm-i386/cacheflush.h Wed Jul 2 21:19:42 2003 @@ -17,4 +17,9 @@ void global_flush_tlb(void); int change_page_attr(struct page *page, int numpages, pgprot_t prot); +#ifdef CONFIG_DEBUG_PAGEALLOC +/* internal debugging function */ +void kernel_map_pages(struct page *page, int numpages, int enable); +#endif + #endif /* _I386_CACHEFLUSH_H */ diff -Nru a/include/asm-i386/hardirq.h b/include/asm-i386/hardirq.h --- a/include/asm-i386/hardirq.h Mon Mar 31 14:29:49 2003 +++ b/include/asm-i386/hardirq.h Mon Jun 30 22:26:58 2003 @@ -7,8 +7,6 @@ typedef struct { unsigned int __softirq_pending; - unsigned int __syscall_count; - struct task_struct * __ksoftirqd_task; /* waitqueue is too large */ unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ unsigned int apic_timer_irqs; /* arch dependent */ diff -Nru a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h --- a/include/asm-i386/mmzone.h Sun May 4 23:38:34 2003 +++ b/include/asm-i386/mmzone.h Wed Jul 2 21:20:53 2003 @@ -32,8 +32,7 @@ #define alloc_bootmem_low_pages_node(ignore, x) \ __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) -#define node_size(nid) (node_data[nid]->node_size) -#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) +#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) /* * Following are macros that each numa implmentation must define. @@ -54,7 +53,7 @@ #define node_end_pfn(nid) \ ({ \ pg_data_t *__pgdat = NODE_DATA(nid); \ - __pgdat->node_start_pfn + __pgdat->node_size; \ + __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ }) #define local_mapnr(kvaddr) \ diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h --- a/include/asm-i386/timer.h Mon Mar 31 08:03:54 2003 +++ b/include/asm-i386/timer.h Wed Jul 2 21:21:34 2003 @@ -21,8 +21,21 @@ #define TICK_SIZE (tick_nsec / 1000) extern struct timer_opts* select_timer(void); +extern void clock_fallback(void); /* Modifiers for buggy PIT handling */ extern int pit_latch_buggy; + +extern struct timer_opts *cur_timer; +extern int timer_ack; + +/* list of externed timers */ +extern struct timer_opts timer_none; +extern struct timer_opts timer_pit; +extern struct timer_opts timer_tsc; +#ifdef CONFIG_X86_CYCLONE_TIMER +extern struct timer_opts timer_cyclone; +#endif + #endif diff -Nru a/include/asm-mips64/mmzone.h b/include/asm-mips64/mmzone.h --- a/include/asm-mips64/mmzone.h Sun Jun 15 16:56:22 2003 +++ b/include/asm-mips64/mmzone.h Wed Jul 2 21:20:53 2003 @@ -24,7 +24,7 @@ #define PHYSADDR_TO_NID(pa) NASID_TO_COMPACT_NODEID(NASID_GET(pa)) #define PLAT_NODE_DATA(n) (plat_node_data[n]) -#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_size) +#define PLAT_NODE_DATA_SIZE(n) (PLAT_NODE_DATA(n)->gendata.node_spanned_pages) #define PLAT_NODE_DATA_LOCALNR(p, n) \ (((p) >> PAGE_SHIFT) - PLAT_NODE_DATA(n)->gendata.node_start_pfn) diff -Nru a/include/asm-ppc64/mmzone.h b/include/asm-ppc64/mmzone.h --- a/include/asm-ppc64/mmzone.h Thu Jun 5 23:37:10 2003 +++ b/include/asm-ppc64/mmzone.h Wed Jul 2 21:20:53 2003 @@ -54,7 +54,6 @@ */ #define NODE_DATA(nid) (&node_data[nid]) -#define node_size(nid) (NODE_DATA(nid)->node_size) #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) /* diff -Nru a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h --- a/include/asm-x86_64/mmzone.h Sun Jun 15 02:06:56 2003 +++ b/include/asm-x86_64/mmzone.h Wed Jul 2 21:20:53 2003 @@ -40,8 +40,7 @@ #define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ - NODE_DATA(nid)->node_size) -#define node_size(nid) (NODE_DATA(nid)->node_size) + NODE_DATA(nid)->node_spanned_pages) #define local_mapnr(kvaddr) \ ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) ) diff -Nru a/include/linux/buffer_head.h b/include/linux/buffer_head.h --- a/include/linux/buffer_head.h Thu Apr 24 03:30:41 2003 +++ b/include/linux/buffer_head.h Thu Jul 3 18:54:38 2003 @@ -167,6 +167,7 @@ struct buffer_head * __getblk(struct block_device *, sector_t, int); void __brelse(struct buffer_head *); void __bforget(struct buffer_head *); +void __breadahead(struct block_device *, sector_t block, int size); struct buffer_head *__bread(struct block_device *, sector_t block, int size); struct buffer_head *alloc_buffer_head(int gfp_flags); void free_buffer_head(struct buffer_head * bh); @@ -239,6 +240,12 @@ sb_bread(struct super_block *sb, sector_t block) { return __bread(sb->s_bdev, block, sb->s_blocksize); +} + +static inline void +sb_breadahead(struct super_block *sb, sector_t block) +{ + __breadahead(sb->s_bdev, block, sb->s_blocksize); } static inline struct buffer_head * diff -Nru a/include/linux/device.h b/include/linux/device.h --- a/include/linux/device.h Tue Jun 10 12:01:32 2003 +++ b/include/linux/device.h Thu Jul 3 10:42:01 2003 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -95,7 +96,7 @@ #define BUS_ATTR(_name,_mode,_show,_store) \ struct bus_attribute bus_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; @@ -136,7 +137,7 @@ #define DRIVER_ATTR(_name,_mode,_show,_store) \ struct driver_attribute driver_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; @@ -176,7 +177,7 @@ #define CLASS_ATTR(_name,_mode,_show,_store) \ struct class_attribute class_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; @@ -215,6 +216,8 @@ extern int class_device_add(struct class_device *); extern void class_device_del(struct class_device *); +extern int class_device_rename(struct class_device *, char *); + extern struct class_device * class_device_get(struct class_device *); extern void class_device_put(struct class_device *); @@ -226,7 +229,7 @@ #define CLASS_DEVICE_ATTR(_name,_mode,_show,_store) \ struct class_device_attribute class_device_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; @@ -324,7 +327,7 @@ #define DEVICE_ATTR(_name,_mode,_show,_store) \ struct device_attribute dev_attr_##_name = { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ .show = _show, \ .store = _store, \ }; diff -Nru a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h --- a/include/linux/ext3_fs.h Fri Jun 27 23:20:44 2003 +++ b/include/linux/ext3_fs.h Thu Jul 3 19:20:48 2003 @@ -636,10 +636,14 @@ struct ext3_iloc { struct buffer_head *bh; - struct ext3_inode *raw_inode; + unsigned long offset; unsigned long block_group; }; +static inline struct ext3_inode *ext3_raw_inode(struct ext3_iloc *iloc) +{ + return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset); +} /* * This structure is stuffed into the struct file's private_data field diff -Nru a/include/linux/interrupt.h b/include/linux/interrupt.h --- a/include/linux/interrupt.h Thu May 22 23:53:24 2003 +++ b/include/linux/interrupt.h Wed Jul 2 00:17:58 2003 @@ -94,8 +94,8 @@ asmlinkage void do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); extern void softirq_init(void); -#define __cpu_raise_softirq(cpu, nr) do { softirq_pending(cpu) |= 1UL << (nr); } while (0) -extern void FASTCALL(cpu_raise_softirq(unsigned int cpu, unsigned int nr)); +#define __raise_softirq_irqoff(nr) do { local_softirq_pending() |= 1UL << (nr); } while (0) +extern void FASTCALL(raise_softirq_irqoff(unsigned int nr)); extern void FASTCALL(raise_softirq(unsigned int nr)); #ifndef invoke_softirq diff -Nru a/include/linux/ioport.h b/include/linux/ioport.h --- a/include/linux/ioport.h Thu Jun 12 03:02:55 2003 +++ b/include/linux/ioport.h Thu Jul 3 08:38:43 2003 @@ -43,6 +43,7 @@ #define IORESOURCE_SHADOWABLE 0x00010000 #define IORESOURCE_BUS_HAS_VGA 0x00080000 +#define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 #define IORESOURCE_AUTO 0x40000000 #define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */ diff -Nru a/include/linux/irq_cpustat.h b/include/linux/irq_cpustat.h --- a/include/linux/irq_cpustat.h Wed Jun 11 19:53:38 2003 +++ b/include/linux/irq_cpustat.h Mon Jun 30 22:26:58 2003 @@ -29,10 +29,6 @@ /* arch independent irq_stat fields */ #define softirq_pending(cpu) __IRQ_STAT((cpu), __softirq_pending) #define local_softirq_pending() softirq_pending(smp_processor_id()) -#define syscall_count(cpu) __IRQ_STAT((cpu), __syscall_count) -#define local_syscall_count() syscall_count(smp_processor_id()) -#define ksoftirqd_task(cpu) __IRQ_STAT((cpu), __ksoftirqd_task) -#define local_ksoftirqd_task() ksoftirqd_task(smp_processor_id()) /* arch dependent irq_stat fields */ #define nmi_count(cpu) __IRQ_STAT((cpu), __nmi_count) /* i386 */ diff -Nru a/include/linux/kobject.h b/include/linux/kobject.h --- a/include/linux/kobject.h Mon Jun 9 16:41:19 2003 +++ b/include/linux/kobject.h Thu Jul 3 10:41:56 2003 @@ -39,6 +39,8 @@ extern int kobject_add(struct kobject *); extern void kobject_del(struct kobject *); +extern void kobject_rename(struct kobject *, char *new_name); + extern int kobject_register(struct kobject *); extern void kobject_unregister(struct kobject *); diff -Nru a/include/linux/mm.h b/include/linux/mm.h --- a/include/linux/mm.h Sat May 17 14:09:59 2003 +++ b/include/linux/mm.h Wed Jul 2 21:22:37 2003 @@ -339,9 +339,14 @@ page->flags |= zone_num << ZONE_SHIFT; } -static inline void * lowmem_page_address(struct page *page) +#ifndef CONFIG_DISCONTIGMEM +/* The array of struct pages - for discontigmem use pgdat->lmem_map */ +extern struct page *mem_map; +#endif + +static inline void *lowmem_page_address(struct page *page) { - return __va( ( (page - page_zone(page)->zone_mem_map) + page_zone(page)->zone_start_pfn) << PAGE_SHIFT); + return __va(page_to_pfn(page) << PAGE_SHIFT); } #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) @@ -395,11 +400,6 @@ #define VM_FAULT_MINOR 1 #define VM_FAULT_MAJOR 2 -#ifndef CONFIG_DISCONTIGMEM -/* The array of struct pages - for discontigmem use pgdat->lmem_map */ -extern struct page *mem_map; -#endif - extern void show_free_areas(void); struct page *shmem_nopage(struct vm_area_struct * vma, @@ -609,5 +609,13 @@ int write); extern int remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); + +#ifndef CONFIG_DEBUG_PAGEALLOC +static inline void +kernel_map_pages(struct page *page, int numpages, int enable) +{ +} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff -Nru a/include/linux/mman.h b/include/linux/mman.h --- a/include/linux/mman.h Tue Mar 18 01:44:12 2003 +++ b/include/linux/mman.h Wed Jul 2 21:22:38 2003 @@ -9,7 +9,8 @@ #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 -extern int vm_enough_memory(long pages); +extern int sysctl_overcommit_memory; +extern int sysctl_overcommit_ratio; extern atomic_t vm_committed_space; #ifdef CONFIG_SMP diff -Nru a/include/linux/mmzone.h b/include/linux/mmzone.h --- a/include/linux/mmzone.h Thu Jun 5 23:37:53 2003 +++ b/include/linux/mmzone.h Wed Jul 2 21:20:53 2003 @@ -184,11 +184,16 @@ unsigned long *valid_addr_bitmap; struct bootmem_data *bdata; unsigned long node_start_pfn; - unsigned long node_size; + unsigned long node_present_pages; /* total number of physical pages */ + unsigned long node_spanned_pages; /* total size of physical page + range, including holes */ int node_id; struct pglist_data *pgdat_next; wait_queue_head_t kswapd_wait; } pg_data_t; + +#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) +#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) extern int numnodes; extern struct pglist_data *pgdat_list; diff -Nru a/include/linux/netdevice.h b/include/linux/netdevice.h --- a/include/linux/netdevice.h Wed Jun 25 21:48:56 2003 +++ b/include/linux/netdevice.h Wed Jul 2 00:20:08 2003 @@ -561,7 +561,7 @@ cpu = smp_processor_id(); dev->next_sched = softnet_data[cpu].output_queue; softnet_data[cpu].output_queue = dev; - cpu_raise_softirq(cpu, NET_TX_SOFTIRQ); + raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } } @@ -612,7 +612,7 @@ cpu = smp_processor_id(); skb->next = softnet_data[cpu].completion_queue; softnet_data[cpu].completion_queue = skb; - cpu_raise_softirq(cpu, NET_TX_SOFTIRQ); + raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); } } @@ -779,7 +779,7 @@ dev->quota += dev->weight; else dev->quota = dev->weight; - __cpu_raise_softirq(cpu, NET_RX_SOFTIRQ); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); local_irq_restore(flags); } @@ -805,7 +805,7 @@ local_irq_save(flags); cpu = smp_processor_id(); list_add_tail(&dev->poll_list, &softnet_data[cpu].poll_list); - __cpu_raise_softirq(cpu, NET_RX_SOFTIRQ); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); local_irq_restore(flags); return 1; } diff -Nru a/include/linux/pci.h b/include/linux/pci.h --- a/include/linux/pci.h Wed Jun 25 19:45:54 2003 +++ b/include/linux/pci.h Thu Jul 3 08:08:44 2003 @@ -543,7 +543,7 @@ /* Generic PCI functions used internally */ -int pci_bus_exists(const struct list_head *list, int nr); +extern struct pci_bus *pci_find_bus(int domain, int busnr); struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops, void *sysdata) { diff -Nru a/include/linux/security.h b/include/linux/security.h --- a/include/linux/security.h Wed Jun 25 16:30:52 2003 +++ b/include/linux/security.h Wed Jul 2 21:22:38 2003 @@ -49,6 +49,7 @@ extern int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); extern void cap_task_reparent_to_init (struct task_struct *p); extern int cap_syslog (int type); +extern int cap_vm_enough_memory (long pages); static inline int cap_netlink_send (struct sk_buff *skb) { @@ -958,6 +959,10 @@ * See the syslog(2) manual page for an explanation of the @type values. * @type contains the type of action. * Return 0 if permission is granted. + * @vm_enough_memory: + * Check permissions for allocating a new virtual mapping. + * @pages contains the number of pages. + * Return 0 if permission is granted. * * @register_security: * allow module stacking. @@ -989,6 +994,7 @@ int (*quotactl) (int cmds, int type, int id, struct super_block * sb); int (*quota_on) (struct file * f); int (*syslog) (int type); + int (*vm_enough_memory) (long pages); int (*bprm_alloc_security) (struct linux_binprm * bprm); void (*bprm_free_security) (struct linux_binprm * bprm); @@ -1238,6 +1244,11 @@ return security_ops->syslog(type); } +static inline int security_vm_enough_memory(long pages) +{ + return security_ops->vm_enough_memory(pages); +} + static inline int security_bprm_alloc (struct linux_binprm *bprm) { return security_ops->bprm_alloc_security (bprm); @@ -1896,6 +1907,11 @@ static inline int security_syslog(int type) { return cap_syslog(type); +} + +static inline int security_vm_enough_memory(long pages) +{ + return cap_vm_enough_memory(pages); } static inline int security_bprm_alloc (struct linux_binprm *bprm) diff -Nru a/include/linux/slab.h b/include/linux/slab.h --- a/include/linux/slab.h Sat Jun 14 20:55:10 2003 +++ b/include/linux/slab.h Wed Jul 2 21:22:38 2003 @@ -114,6 +114,10 @@ extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; +void ptrinfo(unsigned long addr); + +extern atomic_t slab_reclaim_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff -Nru a/include/linux/sysfs.h b/include/linux/sysfs.h --- a/include/linux/sysfs.h Thu May 22 16:35:57 2003 +++ b/include/linux/sysfs.h Thu Jul 3 10:41:51 2003 @@ -10,9 +10,11 @@ #define _SYSFS_H_ struct kobject; +struct module; struct attribute { char * name; + struct module * owner; mode_t mode; }; @@ -36,6 +38,9 @@ extern void sysfs_remove_dir(struct kobject *); + +extern void +sysfs_rename_dir(struct kobject *, char *new_name); extern int sysfs_create_file(struct kobject *, struct attribute *); diff -Nru a/init/Kconfig b/init/Kconfig --- a/init/Kconfig Sun May 25 14:08:02 2003 +++ b/init/Kconfig Wed Jul 2 21:23:11 2003 @@ -93,7 +93,8 @@ limited in memory. config LOG_BUF_SHIFT - int "Kernel log buffer size" if DEBUG_KERNEL + int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL + range 12 20 default 17 if ARCH_S390 default 16 if X86_NUMAQ || IA64 default 15 if SMP diff -Nru a/kernel/exit.c b/kernel/exit.c --- a/kernel/exit.c Fri Jun 27 09:43:50 2003 +++ b/kernel/exit.c Wed Jul 2 21:21:31 2003 @@ -651,6 +651,8 @@ if (tsk->exit_signal != -1) { int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD; do_notify_parent(tsk, signal); + } else if (tsk->ptrace) { + do_notify_parent(tsk, SIGCHLD); } tsk->state = TASK_ZOMBIE; @@ -715,7 +717,7 @@ tsk->exit_code = code; exit_notify(tsk); - if (tsk->exit_signal == -1) + if (tsk->exit_signal == -1 && tsk->ptrace == 0) release_task(tsk); schedule(); @@ -859,7 +861,7 @@ BUG_ON(state != TASK_DEAD); return 0; } - if (unlikely(p->exit_signal == -1)) + if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) /* * This can only happen in a race with a ptraced thread * dying on another processor. @@ -889,8 +891,12 @@ /* Double-check with lock held. */ if (p->real_parent != p->parent) { __ptrace_unlink(p); - do_notify_parent(p, p->exit_signal); p->state = TASK_ZOMBIE; + /* If this is a detached thread, this is where it goes away. */ + if (p->exit_signal == -1) + release_task (p); + else + do_notify_parent(p, p->exit_signal); p = NULL; } write_unlock_irq(&tasklist_lock); diff -Nru a/kernel/fork.c b/kernel/fork.c --- a/kernel/fork.c Fri Jun 27 09:43:50 2003 +++ b/kernel/fork.c Wed Jul 2 21:22:38 2003 @@ -286,7 +286,7 @@ continue; if (mpnt->vm_flags & VM_ACCOUNT) { unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; - if (!vm_enough_memory(len)) + if (security_vm_enough_memory(len)) goto fail_nomem; charge += len; } diff -Nru a/kernel/ksyms.c b/kernel/ksyms.c --- a/kernel/ksyms.c Tue Jun 24 14:26:24 2003 +++ b/kernel/ksyms.c Thu Jul 3 00:32:44 2003 @@ -462,6 +462,7 @@ #endif EXPORT_SYMBOL(schedule_timeout); EXPORT_SYMBOL(yield); +EXPORT_SYMBOL(io_schedule); EXPORT_SYMBOL(__cond_resched); EXPORT_SYMBOL(set_user_nice); EXPORT_SYMBOL(task_nice); @@ -586,7 +587,7 @@ EXPORT_SYMBOL(do_softirq); EXPORT_SYMBOL(raise_softirq); EXPORT_SYMBOL(open_softirq); -EXPORT_SYMBOL(cpu_raise_softirq); +EXPORT_SYMBOL(raise_softirq_irqoff); EXPORT_SYMBOL(__tasklet_schedule); EXPORT_SYMBOL(__tasklet_hi_schedule); diff -Nru a/kernel/softirq.c b/kernel/softirq.c --- a/kernel/softirq.c Wed Jun 11 19:53:38 2003 +++ b/kernel/softirq.c Mon Jun 30 22:26:58 2003 @@ -14,6 +14,7 @@ #include #include #include +#include #include /* @@ -41,15 +42,18 @@ static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp; +static DEFINE_PER_CPU(struct task_struct *, ksoftirqd); + /* * we cannot loop indefinitely here to avoid userspace starvation, * but we also don't want to introduce a worst case 1/HZ latency * to the pending events, so lets the scheduler to balance * the softirq load for us. */ -static inline void wakeup_softirqd(unsigned cpu) +static inline void wakeup_softirqd(void) { - struct task_struct * tsk = ksoftirqd_task(cpu); + /* Interrupts are disabled: no need to stop preemption */ + struct task_struct *tsk = __get_cpu_var(ksoftirqd); if (tsk && tsk->state != TASK_RUNNING) wake_up_process(tsk); @@ -96,7 +100,7 @@ goto restart; } if (pending) - wakeup_softirqd(smp_processor_id()); + wakeup_softirqd(); __local_bh_enable(); } @@ -117,9 +121,9 @@ /* * This function must run with irqs disabled! */ -inline void cpu_raise_softirq(unsigned int cpu, unsigned int nr) +inline void raise_softirq_irqoff(unsigned int nr) { - __cpu_raise_softirq(cpu, nr); + __raise_softirq_irqoff(nr); /* * If we're in an interrupt or softirq, we're done @@ -131,7 +135,7 @@ * schedule the softirq soon. */ if (!in_interrupt()) - wakeup_softirqd(cpu); + wakeup_softirqd(); } void raise_softirq(unsigned int nr) @@ -139,7 +143,7 @@ unsigned long flags; local_irq_save(flags); - cpu_raise_softirq(smp_processor_id(), nr); + raise_softirq_irqoff(nr); local_irq_restore(flags); } @@ -168,7 +172,7 @@ local_irq_save(flags); t->next = __get_cpu_var(tasklet_vec).list; __get_cpu_var(tasklet_vec).list = t; - cpu_raise_softirq(smp_processor_id(), TASKLET_SOFTIRQ); + raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_restore(flags); } @@ -179,7 +183,7 @@ local_irq_save(flags); t->next = __get_cpu_var(tasklet_hi_vec).list; __get_cpu_var(tasklet_hi_vec).list = t; - cpu_raise_softirq(smp_processor_id(), HI_SOFTIRQ); + raise_softirq_irqoff(HI_SOFTIRQ); local_irq_restore(flags); } @@ -211,7 +215,7 @@ local_irq_disable(); t->next = __get_cpu_var(tasklet_vec).list; __get_cpu_var(tasklet_vec).list = t; - __cpu_raise_softirq(smp_processor_id(), TASKLET_SOFTIRQ); + __raise_softirq_irqoff(TASKLET_SOFTIRQ); local_irq_enable(); } } @@ -244,7 +248,7 @@ local_irq_disable(); t->next = __get_cpu_var(tasklet_hi_vec).list; __get_cpu_var(tasklet_hi_vec).list = t; - __cpu_raise_softirq(smp_processor_id(), HI_SOFTIRQ); + __raise_softirq_irqoff(HI_SOFTIRQ); local_irq_enable(); } } @@ -325,7 +329,7 @@ __set_current_state(TASK_INTERRUPTIBLE); mb(); - local_ksoftirqd_task() = current; + __get_cpu_var(ksoftirqd) = current; for (;;) { if (!local_softirq_pending()) @@ -354,7 +358,7 @@ return NOTIFY_BAD; } - while (!ksoftirqd_task(hotcpu)) + while (!per_cpu(ksoftirqd, hotcpu)) yield(); } return NOTIFY_OK; diff -Nru a/lib/kobject.c b/lib/kobject.c --- a/lib/kobject.c Mon Jun 16 09:02:18 2003 +++ b/lib/kobject.c Thu Jul 3 10:41:56 2003 @@ -314,6 +314,21 @@ } /** + * kobject_rename - change the name of an object + * @kobj: object in question. + * @new_name: object's new name + */ + +void kobject_rename(struct kobject * kobj, char *new_name) +{ + kobj = kobject_get(kobj); + if (!kobj) + return; + sysfs_rename_dir(kobj, new_name); + kobject_put(kobj); +} + +/** * kobject_del - unlink kobject from hierarchy. * @kobj: object. */ diff -Nru a/mm/mmap.c b/mm/mmap.c --- a/mm/mmap.c Mon Jun 30 03:56:22 2003 +++ b/mm/mmap.c Wed Jul 2 21:22:38 2003 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -53,65 +54,9 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */ atomic_t vm_committed_space = ATOMIC_INIT(0); -/* - * Check that a process has enough memory to allocate a new virtual - * mapping. 1 means there is enough memory for the allocation to - * succeed and 0 implies there is not. - * - * We currently support three overcommit policies, which are set via the - * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-acounting - * - * Strict overcommit modes added 2002 Feb 26 by Alan Cox. - * Additional code 2002 Jul 20 by Robert Love. - */ -extern atomic_t slab_reclaim_pages; -int vm_enough_memory(long pages) -{ - unsigned long free, allowed; - - vm_acct_memory(pages); - - /* - * Sometimes we want to use more memory than we have - */ - if (sysctl_overcommit_memory == 1) - return 1; - - if (sysctl_overcommit_memory == 0) { - free = get_page_cache_size(); - free += nr_free_pages(); - free += nr_swap_pages; - - /* - * Any slabs which are created with the - * SLAB_RECLAIM_ACCOUNT flag claim to have contents - * which are reclaimable, under pressure. The dentry - * cache and most inode caches should fall into this - */ - free += atomic_read(&slab_reclaim_pages); - - /* - * Leave the last 3% for root - */ - if (!capable(CAP_SYS_ADMIN)) - free -= free / 32; - - if (free > pages) - return 1; - vm_unacct_memory(pages); - return 0; - } - - allowed = totalram_pages * sysctl_overcommit_ratio / 100; - allowed += total_swap_pages; - - if (atomic_read(&vm_committed_space) < allowed) - return 1; - - vm_unacct_memory(pages); - - return 0; -} +EXPORT_SYMBOL(sysctl_overcommit_memory); +EXPORT_SYMBOL(sysctl_overcommit_ratio); +EXPORT_SYMBOL(vm_committed_space); /* * Requires inode->i_mapping->i_shared_sem @@ -646,7 +591,7 @@ * Private writable mapping: check memory availability */ charged = len >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) return -ENOMEM; vm_flags |= VM_ACCOUNT; } @@ -950,7 +895,7 @@ grow = (address - vma->vm_end) >> PAGE_SHIFT; /* Overcommit.. */ - if (!vm_enough_memory(grow)) { + if (security_vm_enough_memory(grow)) { spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } @@ -1004,7 +949,7 @@ grow = (vma->vm_start - address) >> PAGE_SHIFT; /* Overcommit.. */ - if (!vm_enough_memory(grow)) { + if (security_vm_enough_memory(grow)) { spin_unlock(&vma->vm_mm->page_table_lock); return -ENOMEM; } @@ -1376,7 +1321,7 @@ if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; - if (!vm_enough_memory(len >> PAGE_SHIFT)) + if (security_vm_enough_memory(len >> PAGE_SHIFT)) return -ENOMEM; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; diff -Nru a/mm/mprotect.c b/mm/mprotect.c --- a/mm/mprotect.c Wed Nov 27 15:12:41 2002 +++ b/mm/mprotect.c Wed Jul 2 21:22:38 2003 @@ -175,7 +175,7 @@ if (newflags & VM_WRITE) { if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { charged = (end - start) >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) return -ENOMEM; newflags |= VM_ACCOUNT; } diff -Nru a/mm/mremap.c b/mm/mremap.c --- a/mm/mremap.c Thu Feb 20 08:37:02 2003 +++ b/mm/mremap.c Wed Jul 2 21:22:38 2003 @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -244,9 +245,7 @@ } if (!move_page_tables(vma, new_addr, addr, old_len)) { - unsigned long must_fault_in; - unsigned long fault_in_start; - unsigned long fault_in_end; + unsigned long vm_locked = vma->vm_flags & VM_LOCKED; if (allocated_vma) { *new_vma = *vma; @@ -272,14 +271,8 @@ } else vma = NULL; /* nothing more to do */ - must_fault_in = new_vma->vm_flags & VM_LOCKED; - fault_in_start = new_vma->vm_start; - fault_in_end = new_vma->vm_end; - do_munmap(current->mm, addr, old_len); - /* new_vma could have been invalidated by do_munmap */ - /* Restore VM_ACCOUNT if one or two pieces of vma left */ if (vma) { vma->vm_flags |= VM_ACCOUNT; @@ -288,9 +281,11 @@ } current->mm->total_vm += new_len >> PAGE_SHIFT; - if (must_fault_in) { + if (vm_locked) { current->mm->locked_vm += new_len >> PAGE_SHIFT; - make_pages_present(fault_in_start, fault_in_end); + if (new_len > old_len) + make_pages_present(new_addr + old_len, + new_addr + new_len); } return new_addr; } @@ -391,7 +386,7 @@ if (vma->vm_flags & VM_ACCOUNT) { charged = (new_len - old_len) >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + if (security_vm_enough_memory(charged)) goto out_nc; } diff -Nru a/mm/nommu.c b/mm/nommu.c --- a/mm/nommu.c Thu May 8 15:49:08 2003 +++ b/mm/nommu.c Wed Jul 2 21:21:24 2003 @@ -62,11 +62,8 @@ inode->i_size = offset; out_truncate: - if (inode->i_op && inode->i_op->truncate) { - lock_kernel(); + if (inode->i_op && inode->i_op->truncate) inode->i_op->truncate(inode); - unlock_kernel(); - } return 0; out_sig: send_sig(SIGXFSZ, current, 0); diff -Nru a/mm/page_alloc.c b/mm/page_alloc.c --- a/mm/page_alloc.c Tue Jun 10 23:33:21 2003 +++ b/mm/page_alloc.c Wed Jul 2 21:22:38 2003 @@ -32,6 +32,8 @@ #include #include +#include + DECLARE_BITMAP(node_online_map, MAX_NUMNODES); DECLARE_BITMAP(memblk_online_map, MAX_NR_MEMBLKS); struct pglist_data *pgdat_list; @@ -41,6 +43,9 @@ int numnodes = 1; int sysctl_lower_zone_protection = 0; +EXPORT_SYMBOL(totalram_pages); +EXPORT_SYMBOL(nr_swap_pages); + /* * Used by page_zone() to look up the address of the struct zone whose * id is encoded in the upper bits of page->flags @@ -265,6 +270,7 @@ mod_page_state(pgfree, 1 << order); free_pages_check(__FUNCTION__, page); list_add(&page->list, &list); + kernel_map_pages(page, 1<pageset[get_cpu()].pcp[cold]; @@ -556,7 +563,7 @@ (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += z->pages_low * sysctl_lower_zone_protection; } @@ -579,7 +586,7 @@ (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += local_min * sysctl_lower_zone_protection; } @@ -594,7 +601,7 @@ page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } goto nopage; } @@ -622,7 +629,7 @@ (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, cold); if (page) - return page; + goto got_pg; } min += z->pages_low * sysctl_lower_zone_protection; } @@ -653,6 +660,9 @@ current->comm, order, gfp_mask); } return NULL; +got_pg: + kernel_map_pages(page, 1 << order, 1); + return page; } /* @@ -726,6 +736,7 @@ return sum; } +EXPORT_SYMBOL(nr_free_pages); unsigned int nr_used_zone_pages(void) { @@ -818,6 +829,7 @@ EXPORT_PER_CPU_SYMBOL(page_states); atomic_t nr_pagecache = ATOMIC_INIT(0); +EXPORT_SYMBOL(nr_pagecache); #ifdef CONFIG_SMP DEFINE_PER_CPU(long, nr_pagecache_local) = 0; #endif @@ -896,7 +908,7 @@ { pg_data_t *pgdat = NODE_DATA(nid); - val->totalram = pgdat->node_size; + val->totalram = pgdat->node_present_pages; val->freeram = nr_free_pages_pgdat(pgdat); val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages; val->freehigh = pgdat->node_zones[ZONE_HIGHMEM].free_pages; @@ -1131,12 +1143,13 @@ for (i = 0; i < MAX_NR_ZONES; i++) totalpages += zones_size[i]; - pgdat->node_size = totalpages; + pgdat->node_spanned_pages = totalpages; realtotalpages = totalpages; if (zholes_size) for (i = 0; i < MAX_NR_ZONES; i++) realtotalpages -= zholes_size[i]; + pgdat->node_present_pages = realtotalpages; printk("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages); } @@ -1342,7 +1355,7 @@ pgdat->node_start_pfn = node_start_pfn; calculate_zone_totalpages(pgdat, zones_size, zholes_size); if (!node_mem_map) { - size = (pgdat->node_size + 1) * sizeof(struct page); + size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); node_mem_map = alloc_bootmem_node(pgdat, size); } pgdat->node_mem_map = node_mem_map; diff -Nru a/mm/shmem.c b/mm/shmem.c --- a/mm/shmem.c Fri Jun 20 13:16:19 2003 +++ b/mm/shmem.c Wed Jul 2 21:22:38 2003 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -507,7 +508,7 @@ */ change = VM_ACCT(attr->ia_size) - VM_ACCT(inode->i_size); if (change > 0) { - if (!vm_enough_memory(change)) + if (security_vm_enough_memory(change)) return -ENOMEM; } else if (attr->ia_size < inode->i_size) { vm_unacct_memory(-change); @@ -1139,7 +1140,7 @@ maxpos = inode->i_size; if (maxpos < pos + count) { maxpos = pos + count; - if (!vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) { + if (security_vm_enough_memory(VM_ACCT(maxpos) - VM_ACCT(inode->i_size))) { err = -ENOMEM; goto out; } @@ -1493,7 +1494,7 @@ memcpy(info, symname, len); inode->i_op = &shmem_symlink_inline_operations; } else { - if (!vm_enough_memory(VM_ACCT(1))) { + if (security_vm_enough_memory(VM_ACCT(1))) { iput(inode); return -ENOMEM; } @@ -1887,7 +1888,7 @@ if (size > SHMEM_MAX_BYTES) return ERR_PTR(-EINVAL); - if ((flags & VM_ACCOUNT) && !vm_enough_memory(VM_ACCT(size))) + if ((flags & VM_ACCOUNT) && security_vm_enough_memory(VM_ACCT(size))) return ERR_PTR(-ENOMEM); error = -ENOMEM; diff -Nru a/mm/slab.c b/mm/slab.c --- a/mm/slab.c Fri Jun 27 23:20:42 2003 +++ b/mm/slab.c Wed Jul 2 18:34:36 2003 @@ -89,7 +89,12 @@ #include #include #include +#include +#include + #include +#include +#include /* * DEBUG - 1 for kmem_cache_create() to honour; SLAB_DEBUG_INITIAL, @@ -351,6 +356,34 @@ #define POISON_AFTER 0x6b /* for use-after-free poisoning */ #define POISON_END 0xa5 /* end-byte of poisoning */ +static inline int obj_dbghead(kmem_cache_t *cachep) +{ + if (cachep->flags & SLAB_RED_ZONE) + return BYTES_PER_WORD; + return 0; +} + +static inline int obj_dbglen(kmem_cache_t *cachep) +{ + int len = 0; + + if (cachep->flags & SLAB_RED_ZONE) { + len += 2*BYTES_PER_WORD; + } + if (cachep->flags & SLAB_STORE_USER) { + len += BYTES_PER_WORD; + } + return len; +} +#else +static inline int obj_dbghead(kmem_cache_t *cachep) +{ + return 0; +} +static inline int obj_dbglen(kmem_cache_t *cachep) +{ + return 0; +} #endif /* @@ -430,6 +463,7 @@ * SLAB_RECLAIM_ACCOUNT turns this on per-slab */ atomic_t slab_reclaim_pages; +EXPORT_SYMBOL(slab_reclaim_pages); /* * chicken and egg problem: delay the per-cpu array allocation @@ -441,7 +475,7 @@ FULL } g_cpucache_up; -static struct timer_list reap_timers[NR_CPUS]; +static DEFINE_PER_CPU(struct timer_list, reap_timers); static void reap_timer_fnc(unsigned long data); @@ -491,7 +525,7 @@ */ static void start_cpu_timer(int cpu) { - struct timer_list *rt = &reap_timers[cpu]; + struct timer_list *rt = &per_cpu(reap_timers, cpu); if (rt->function == NULL) { init_timer(rt); @@ -765,16 +799,45 @@ } #if DEBUG -static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) + +#ifdef CONFIG_DEBUG_PAGEALLOC +static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr, unsigned long caller) { - int size = cachep->objsize; - if (cachep->flags & SLAB_RED_ZONE) { - addr += BYTES_PER_WORD; - size -= 2*BYTES_PER_WORD; - } - if (cachep->flags & SLAB_STORE_USER) { - size -= BYTES_PER_WORD; + int size = cachep->objsize-obj_dbglen(cachep); + + addr = (unsigned long *)&((char*)addr)[obj_dbghead(cachep)]; + + if (size < 5*sizeof(unsigned long)) + return; + + *addr++=0x12345678; + *addr++=caller; + *addr++=smp_processor_id(); + size -= 3*sizeof(unsigned long); + { + unsigned long *sptr = &caller; + unsigned long svalue; + + while (((long) sptr & (THREAD_SIZE-1)) != 0) { + svalue = *sptr++; + if (kernel_text_address(svalue)) { + *addr++=svalue; + size -= sizeof(unsigned long); + if (size <= sizeof(unsigned long)) + break; + } + } + } + *addr++=0x87654321; +} +#endif + +static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val) +{ + int size = cachep->objsize-obj_dbglen(cachep); + addr = &((char*)addr)[obj_dbghead(cachep)]; + memset(addr, val, size); *(unsigned char *)(addr+size-1) = POISON_END; } @@ -796,15 +859,11 @@ static void check_poison_obj(kmem_cache_t *cachep, void *addr) { - int size = cachep->objsize; void *end; - if (cachep->flags & SLAB_RED_ZONE) { - addr += BYTES_PER_WORD; - size -= 2*BYTES_PER_WORD; - } - if (cachep->flags & SLAB_STORE_USER) { - size -= BYTES_PER_WORD; - } + int size = cachep->objsize-obj_dbglen(cachep); + + addr = &((char*)addr)[obj_dbghead(cachep)]; + end = scan_poisoned_obj(addr, size); if (end) { int s; @@ -858,8 +917,16 @@ void *objp = slabp->s_mem + cachep->objsize * i; int objlen = cachep->objsize; - if (cachep->flags & SLAB_POISON) + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep)) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE,1); + else + check_poison_obj(cachep, objp); +#else check_poison_obj(cachep, objp); +#endif + } if (cachep->flags & SLAB_STORE_USER) objlen -= BYTES_PER_WORD; @@ -952,6 +1019,10 @@ } #if FORCED_DEBUG +#ifdef CONFIG_DEBUG_PAGEALLOC + if (size < PAGE_SIZE-3*BYTES_PER_WORD && size > 128) + size = PAGE_SIZE-3*BYTES_PER_WORD; +#endif /* * Enable redzoning and last user accounting, except * - for caches with forced alignment: redzoning would violate the @@ -1404,6 +1475,8 @@ slab_error(cachep, "constructor overwrote the" " start of an object"); } + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); #else if (cachep->ctor) cachep->ctor(objp, cachep, ctor_flags); @@ -1584,25 +1657,28 @@ * caller can perform a verify of its state (debugging). * Called without the cache-lock held. */ - if (cachep->flags & SLAB_RED_ZONE) { - cachep->ctor(objp+BYTES_PER_WORD, + cachep->ctor(objp+obj_dbghead(cachep), cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); - } else { - cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY); - } } if (cachep->flags & SLAB_POISON && cachep->dtor) { /* we want to cache poison the object, * call the destruction callback */ - if (cachep->flags & SLAB_RED_ZONE) - cachep->dtor(objp+BYTES_PER_WORD, cachep, 0); - else - cachep->dtor(objp, cachep, 0); + cachep->dtor(objp+obj_dbghead(cachep), cachep, 0); } - if (cachep->flags & SLAB_POISON) + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { + store_stackinfo(cachep, objp, POISON_AFTER); + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0); + } else { + poison_obj(cachep, objp, POISON_AFTER); + } +#else poison_obj(cachep, objp, POISON_AFTER); #endif + } +#endif return objp; } @@ -1617,6 +1693,7 @@ for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { entries++; BUG_ON(entries > cachep->num); + BUG_ON(i < 0 || i >= cachep->num); } BUG_ON(entries != cachep->num - slabp->inuse); #endif @@ -1746,9 +1823,16 @@ if (!objp) return objp; - if (cachep->flags & SLAB_POISON) { + if (cachep->flags & SLAB_POISON) { +#ifdef CONFIG_DEBUG_PAGEALLOC + if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) + kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 1); + else + check_poison_obj(cachep, objp); +#else check_poison_obj(cachep, objp); - poison_obj(cachep, objp, POISON_BEFORE); +#endif + poison_obj(cachep, objp, POISON_BEFORE); } if (cachep->flags & SLAB_STORE_USER) { objlen -= BYTES_PER_WORD; @@ -2085,16 +2169,7 @@ unsigned int kmem_cache_size(kmem_cache_t *cachep) { - unsigned int objlen = cachep->objsize; - -#if DEBUG - if (cachep->flags & SLAB_RED_ZONE) - objlen -= 2*BYTES_PER_WORD; - if (cachep->flags & SLAB_STORE_USER) - objlen -= BYTES_PER_WORD; -#endif - - return objlen; + return cachep->objsize-obj_dbglen(cachep); } kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags) @@ -2382,7 +2457,7 @@ static void reap_timer_fnc(unsigned long data) { int cpu = smp_processor_id(); - struct timer_list *rt = &reap_timers[cpu]; + struct timer_list *rt = &__get_cpu_var(reap_timers); cache_reap(); mod_timer(rt, jiffies + REAPTIMEOUT_CPUC + cpu); @@ -2626,3 +2701,70 @@ return size; } +void ptrinfo(unsigned long addr) +{ + struct page *page; + + printk("Dumping data about address %p.\n", (void*)addr); + if (!virt_addr_valid((void*)addr)) { + printk("virt addr invalid.\n"); + return; + } + do { + pgd_t *pgd = pgd_offset_k(addr); + pmd_t *pmd; + if (pgd_none(*pgd)) { + printk("No pgd.\n"); + break; + } + pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd)) { + printk("No pmd.\n"); + break; + } +#ifdef CONFIG_X86 + if (pmd_large(*pmd)) { + printk("Large page.\n"); + break; + } +#endif + printk("normal page, pte_val 0x%llx\n", + (unsigned long long)pte_val(*pte_offset_kernel(pmd, addr))); + } while(0); + + page = virt_to_page((void*)addr); + printk("struct page at %p, flags %lxh.\n", page, page->flags); + if (PageSlab(page)) { + kmem_cache_t *c; + struct slab *s; + unsigned long flags; + int objnr; + void *objp; + + c = GET_PAGE_CACHE(page); + printk("belongs to cache %s.\n",c->name); + + spin_lock_irqsave(&c->spinlock, flags); + s = GET_PAGE_SLAB(page); + printk("slabp %p with %d inuse objects (from %d).\n", + s, s->inuse, c->num); + check_slabp(c,s); + + objnr = (addr-(unsigned long)s->s_mem)/c->objsize; + objp = s->s_mem+c->objsize*objnr; + printk("points into object no %d, starting at %p, len %d.\n", + objnr, objp, c->objsize); + if (objnr >= c->num) { + printk("Bad obj number.\n"); + } else { + kernel_map_pages(virt_to_page(objp), c->objsize/PAGE_SIZE, 1); + + printk("redzone: %lxh/%lxh/%lxh.\n", + ((unsigned long*)objp)[0], + ((unsigned long*)(objp+c->objsize))[-2], + ((unsigned long*)(objp+c->objsize))[-1]); + } + spin_unlock_irqrestore(&c->spinlock, flags); + + } +} diff -Nru a/mm/swap.c b/mm/swap.c --- a/mm/swap.c Fri Jun 27 01:46:43 2003 +++ b/mm/swap.c Wed Jul 2 21:22:38 2003 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include /* for try_to_release_page() */ #include @@ -370,6 +371,7 @@ } preempt_enable(); } +EXPORT_SYMBOL(vm_acct_memory); #endif diff -Nru a/mm/swapfile.c b/mm/swapfile.c --- a/mm/swapfile.c Mon May 19 09:02:40 2003 +++ b/mm/swapfile.c Wed Jul 2 21:22:38 2003 @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include @@ -30,6 +32,8 @@ int total_swap_pages; static int swap_overflow; +EXPORT_SYMBOL(total_swap_pages); + static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -1042,7 +1046,7 @@ swap_list_unlock(); goto out_dput; } - if (vm_enough_memory(p->pages)) + if (!security_vm_enough_memory(p->pages)) vm_unacct_memory(p->pages); else { err = -ENOMEM; diff -Nru a/net/core/dev.c b/net/core/dev.c --- a/net/core/dev.c Sat Jun 28 00:10:39 2003 +++ b/net/core/dev.c Thu Jul 3 00:32:44 2003 @@ -1712,7 +1712,7 @@ softnet_break: netdev_rx_stat[this_cpu].time_squeeze++; - __cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); goto out; } diff -Nru a/security/capability.c b/security/capability.c --- a/security/capability.c Wed Jun 25 16:30:52 2003 +++ b/security/capability.c Wed Jul 2 21:22:38 2003 @@ -15,6 +15,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -275,6 +278,65 @@ return 0; } +/* + * Check that a process has enough memory to allocate a new virtual + * mapping. 0 means there is enough memory for the allocation to + * succeed and -ENOMEM implies there is not. + * + * We currently support three overcommit policies, which are set via the + * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-acounting + * + * Strict overcommit modes added 2002 Feb 26 by Alan Cox. + * Additional code 2002 Jul 20 by Robert Love. + */ +int cap_vm_enough_memory(long pages) +{ + unsigned long free, allowed; + + vm_acct_memory(pages); + + /* + * Sometimes we want to use more memory than we have + */ + if (sysctl_overcommit_memory == 1) + return 0; + + if (sysctl_overcommit_memory == 0) { + free = get_page_cache_size(); + free += nr_free_pages(); + free += nr_swap_pages; + + /* + * Any slabs which are created with the + * SLAB_RECLAIM_ACCOUNT flag claim to have contents + * which are reclaimable, under pressure. The dentry + * cache and most inode caches should fall into this + */ + free += atomic_read(&slab_reclaim_pages); + + /* + * Leave the last 3% for root + */ + if (!capable(CAP_SYS_ADMIN)) + free -= free / 32; + + if (free > pages) + return 0; + vm_unacct_memory(pages); + return -ENOMEM; + } + + allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed += total_swap_pages; + + if (atomic_read(&vm_committed_space) < allowed) + return 0; + + vm_unacct_memory(pages); + + return -ENOMEM; +} + EXPORT_SYMBOL(cap_capable); EXPORT_SYMBOL(cap_ptrace); EXPORT_SYMBOL(cap_capget); @@ -286,6 +348,7 @@ EXPORT_SYMBOL(cap_task_post_setuid); EXPORT_SYMBOL(cap_task_reparent_to_init); EXPORT_SYMBOL(cap_syslog); +EXPORT_SYMBOL(cap_vm_enough_memory); #ifdef CONFIG_SECURITY @@ -307,6 +370,8 @@ .task_reparent_to_init = cap_task_reparent_to_init, .syslog = cap_syslog, + + .vm_enough_memory = cap_vm_enough_memory, }; #if defined(CONFIG_SECURITY_CAPABILITIES_MODULE) diff -Nru a/security/dummy.c b/security/dummy.c --- a/security/dummy.c Wed Jun 25 16:30:52 2003 +++ b/security/dummy.c Wed Jul 2 21:22:38 2003 @@ -17,6 +17,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -97,6 +100,54 @@ return 0; } +static int dummy_vm_enough_memory(long pages) +{ + unsigned long free, allowed; + + vm_acct_memory(pages); + + /* + * Sometimes we want to use more memory than we have + */ + if (sysctl_overcommit_memory == 1) + return 0; + + if (sysctl_overcommit_memory == 0) { + free = get_page_cache_size(); + free += nr_free_pages(); + free += nr_swap_pages; + + /* + * Any slabs which are created with the + * SLAB_RECLAIM_ACCOUNT flag claim to have contents + * which are reclaimable, under pressure. The dentry + * cache and most inode caches should fall into this + */ + free += atomic_read(&slab_reclaim_pages); + + /* + * Leave the last 3% for root + */ + if (current->euid) + free -= free / 32; + + if (free > pages) + return 0; + vm_unacct_memory(pages); + return -ENOMEM; + } + + allowed = totalram_pages * sysctl_overcommit_ratio / 100; + allowed += total_swap_pages; + + if (atomic_read(&vm_committed_space) < allowed) + return 0; + + vm_unacct_memory(pages); + + return -ENOMEM; +} + static int dummy_bprm_alloc_security (struct linux_binprm *bprm) { return 0; @@ -793,6 +844,7 @@ set_to_dummy_if_null(ops, quota_on); set_to_dummy_if_null(ops, sysctl); set_to_dummy_if_null(ops, syslog); + set_to_dummy_if_null(ops, vm_enough_memory); set_to_dummy_if_null(ops, bprm_alloc_security); set_to_dummy_if_null(ops, bprm_free_security); set_to_dummy_if_null(ops, bprm_compute_creds);