## Automatically generated incremental diff ## From: linux-2.5.67-bk4 ## To: linux-2.5.67-bk5 ## Robot: $Id: make-incremental-diff,v 1.11 2002/02/20 02:59:33 hpa Exp $ diff -urN linux-2.5.67-bk4/Documentation/cachetlb.txt linux-2.5.67-bk5/Documentation/cachetlb.txt --- linux-2.5.67-bk4/Documentation/cachetlb.txt 2003-04-07 10:31:03.000000000 -0700 +++ linux-2.5.67-bk5/Documentation/cachetlb.txt 2003-04-13 04:36:10.000000000 -0700 @@ -75,7 +75,7 @@ Platform developers note that generic code will always invoke this interface with mm->page_table_lock held. -4) void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +4) void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) This time we need to remove the PAGE_SIZE sized translation from the TLB. The 'vma' is the backing structure used by @@ -87,9 +87,9 @@ After running, this interface must make sure that any previous page table modification for address space 'vma->vm_mm' for - user virtual address 'page' will be visible to the cpu. That + user virtual address 'addr' will be visible to the cpu. That is, after running, there will be no entries in the TLB for - 'vma->vm_mm' for virtual address 'page'. + 'vma->vm_mm' for virtual address 'addr'. This is used primarily during fault processing. @@ -144,9 +144,9 @@ change_range_of_page_tables(mm, start, end); flush_tlb_range(vma, start, end); - 3) flush_cache_page(vma, page); + 3) flush_cache_page(vma, addr); set_pte(pte_pointer, new_pte_val); - flush_tlb_page(vma, page); + flush_tlb_page(vma, addr); The cache level flush will always be first, because this allows us to properly handle systems whose caches are strict and require @@ -200,7 +200,7 @@ call flush_cache_page (see below) for each entry which may be modified. -4) void flush_cache_page(struct vm_area_struct *vma, unsigned long page) +4) void flush_cache_page(struct vm_area_struct *vma, unsigned long addr) This time we need to remove a PAGE_SIZE sized range from the cache. The 'vma' is the backing structure used by @@ -211,7 +211,7 @@ "Harvard" type cache layouts). After running, there will be no entries in the cache for - 'vma->vm_mm' for virtual address 'page'. + 'vma->vm_mm' for virtual address 'addr'. This is used primarily during fault processing. @@ -235,7 +235,7 @@ NOTE: This does not fix shared mmaps, check out the sparc64 port for one way to solve this (in particular SPARC_FLAG_MMAPSHARED). -Next, you have two methods to solve the D-cache aliasing issue for all +Next, you have to solve the D-cache aliasing issue for all other cases. Please keep in mind that fact that, for a given page mapped into some user address space, there is always at least one more mapping, that of the kernel in it's linear mapping starting at @@ -244,35 +244,8 @@ aliasing problem has the potential to exist since the kernel already maps this page at its virtual address. -First, I describe the old method to deal with this problem. I am -describing it for documentation purposes, but it is deprecated and the -latter method I describe next should be used by all new ports and all -existing ports should move over to the new mechanism as well. - - flush_page_to_ram(struct page *page) - - The physical page 'page' is about to be place into the - user address space of a process. If it is possible for - stores done recently by the kernel into this physical - page, to not be visible to an arbitrary mapping in userspace, - you must flush this page from the D-cache. - - If the D-cache is writeback in nature, the dirty data (if - any) for this physical page must be written back to main - memory before the cache lines are invalidated. - -Admittedly, the author did not think very much when designing this -interface. It does not give the architecture enough information about -what exactly is going on, and there is no context to base a judgment -on about whether an alias is possible at all. The new interfaces to -deal with D-cache aliasing are meant to address this by telling the -architecture specific code exactly which is going on at the proper points -in time. - -Here is the new interface: - - void copy_user_page(void *to, void *from, unsigned long address) - void clear_user_page(void *to, unsigned long address) + void copy_user_page(void *to, void *from, unsigned long addr, struct page *page) + void clear_user_page(void *to, unsigned long addr, struct page *page) These two routines store data in user anonymous or COW pages. It allows a port to efficiently avoid D-cache alias @@ -285,8 +258,9 @@ of the same "color" as the user mapping of the page. Sparc64 for example, uses this technique. - The "address" parameter tells the virtual address where the - user will ultimately have this page mapped. + The 'addr' parameter tells the virtual address where the + user will ultimately have this page mapped, and the 'page' + parameter gives a pointer to the struct page of the target. If D-cache aliasing is not an issue, these two routines may simply call memcpy/memset directly and do nothing more. @@ -363,5 +337,5 @@ void flush_icache_page(struct vm_area_struct *vma, struct page *page) All the functionality of flush_icache_page can be implemented in - flush_dcache_page and update_mmu_cache. In 2.5 the hope is to + flush_dcache_page and update_mmu_cache. In 2.7 the hope is to remove this interface completely. diff -urN linux-2.5.67-bk4/Documentation/filesystems/proc.txt linux-2.5.67-bk5/Documentation/filesystems/proc.txt --- linux-2.5.67-bk4/Documentation/filesystems/proc.txt 2003-04-07 10:30:59.000000000 -0700 +++ linux-2.5.67-bk5/Documentation/filesystems/proc.txt 2003-04-13 04:36:10.000000000 -0700 @@ -362,6 +362,93 @@ ide-cdrom version 4.53 ide-disk version 1.08 +.............................................................................. + +meminfo: + +Provides information about distribution and utilization of memory. This +varies by architecture and compile options. The following is from a +16GB PIII, which has highmem enabled. You may not have all of these fields. + +> cat /proc/meminfo + + +MemTotal: 16344972 kB +MemFree: 13634064 kB +Buffers: 3656 kB +Cached: 1195708 kB +SwapCached: 0 kB +Active: 891636 kB +Inactive: 1077224 kB +HighTotal: 15597528 kB +HighFree: 13629632 kB +LowTotal: 747444 kB +LowFree: 4432 kB +SwapTotal: 0 kB +SwapFree: 0 kB +Dirty: 968 kB +Writeback: 0 kB +Mapped: 280372 kB +Slab: 684068 kB +Committed_AS: 1576424 kB +PageTables: 24448 kB +ReverseMaps: 1080904 +VmallocTotal: 112216 kB +VmallocUsed: 428 kB +VmallocChunk: 111088 kB + + MemTotal: Total usable ram (i.e. physical ram minus a few reserved + bits and the kernel binary code) + MemFree: The sum of LowFree+HighFree + Buffers: Relatively temporary storage for raw disk blocks + shouldn't get tremendously large (20MB or so) + Cached: in-memory cache for files read from the disk (the + pagecache). Doesn't include SwapCached + SwapCached: Memory that once was swapped out, is swapped back in but + still also is in the swapfile (if memory is needed it + doesn't need to be swapped out AGAIN because it is already + in the swapfile. This saves I/O) + Active: Memory that has been used more recently and usually not + reclaimed unless absolutely necessary. + Inactive: Memory which has been less recently used. It is more + eligible to be reclaimed for other purposes + HighTotal: + HighFree: Highmem is all memory above ~860MB of physical memory + Highmem areas are for use by userspace programs, or + for the pagecache. The kernel must use tricks to access + this memory, making it slower to access than lowmem. + LowTotal: + LowFree: Lowmem is memory which can be used for everything that + highmem can be used for, but it is also availble for the + kernel's use for its own data structures. Among many + other things, it is where everything from the Slab is + allocated. Bad things happen when you're out of lowmem. + SwapTotal: total amount of swap space available + SwapFree: Memory which has been evicted from RAM, and is temporarily + on the disk + Dirty: Memory which is waiting to get written back to the disk + Writeback: Memory which is actively being written back to the disk + Mapped: files which have been mmaped, such as libraries + Slab: in-kernel data structures cache +Committed_AS: An estimate of how much RAM you would need to make a + 99.99% guarantee that there never is OOM (out of memory) + for this workload. Normally the kernel will overcommit + memory. That means, say you do a 1GB malloc, nothing + happens, really. Only when you start USING that malloc + memory you will get real memory on demand, and just as + much as you use. So you sort of take a mortgage and hope + the bank doesn't go bust. Other cases might include when + you mmap a file that's shared only when you write to it + and you get a private copy of that data. While it normally + is shared between processes. The Committed_AS is a + guesstimate of how much RAM/swap you would need + worst-case. + PageTables: amount of memory dedicated to the lowest level of page + tables. + ReverseMaps: number of reverse mappings performed +VmallocTotal: total size of vmalloc memory area + VmallocUsed: amount of vmalloc area which is used +VmallocChunk: largest contigious block of vmalloc area which is free More detailed information can be found in the controller specific subdirectories. These are named ide0, ide1 and so on. Each of these diff -urN linux-2.5.67-bk4/Makefile linux-2.5.67-bk5/Makefile --- linux-2.5.67-bk4/Makefile 2003-04-13 04:36:05.000000000 -0700 +++ linux-2.5.67-bk5/Makefile 2003-04-13 04:36:10.000000000 -0700 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 5 SUBLEVEL = 67 -EXTRAVERSION = -bk4 +EXTRAVERSION = -bk5 # *DOCUMENTATION* # To see a list of typical targets execute "make help" diff -urN linux-2.5.67-bk4/arch/alpha/vmlinux.lds.S linux-2.5.67-bk5/arch/alpha/vmlinux.lds.S --- linux-2.5.67-bk4/arch/alpha/vmlinux.lds.S 2003-04-07 10:31:24.000000000 -0700 +++ linux-2.5.67-bk5/arch/alpha/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -32,7 +32,11 @@ /* Will be freed after init */ . = ALIGN(8192); /* Init code and data */ __init_begin = .; - .init.text : { *(.init.text) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } .init.data : { *(.init.data) } . = ALIGN(16); diff -urN linux-2.5.67-bk4/arch/arm/vmlinux-armo.lds.in linux-2.5.67-bk5/arch/arm/vmlinux-armo.lds.in --- linux-2.5.67-bk4/arch/arm/vmlinux-armo.lds.in 2003-04-07 10:32:52.000000000 -0700 +++ linux-2.5.67-bk5/arch/arm/vmlinux-armo.lds.in 2003-04-13 04:36:10.000000000 -0700 @@ -14,7 +14,9 @@ .init : { /* Init code and data */ _stext = .; __init_begin = .; + _sinittext = .; *(.init.text) + _einittext = .; __proc_info_begin = .; *(.proc.info) __proc_info_end = .; diff -urN linux-2.5.67-bk4/arch/arm/vmlinux-armv.lds.in linux-2.5.67-bk5/arch/arm/vmlinux-armv.lds.in --- linux-2.5.67-bk4/arch/arm/vmlinux-armv.lds.in 2003-04-07 10:31:03.000000000 -0700 +++ linux-2.5.67-bk5/arch/arm/vmlinux-armv.lds.in 2003-04-13 04:36:10.000000000 -0700 @@ -18,7 +18,9 @@ .init : { /* Init code and data */ _stext = .; __init_begin = .; + _sinittext = .; *(.init.text) + _einittext = .; __proc_info_begin = .; *(.proc.info) __proc_info_end = .; diff -urN linux-2.5.67-bk4/arch/i386/vmlinux.lds.S linux-2.5.67-bk5/arch/i386/vmlinux.lds.S --- linux-2.5.67-bk4/arch/i386/vmlinux.lds.S 2003-04-07 10:30:44.000000000 -0700 +++ linux-2.5.67-bk5/arch/i386/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -54,7 +54,11 @@ /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ __init_begin = .; - .init.text : { *(.init.text) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } .init.data : { *(.init.data) } . = ALIGN(16); __setup_start = .; diff -urN linux-2.5.67-bk4/arch/ia64/mm/init.c linux-2.5.67-bk5/arch/ia64/mm/init.c --- linux-2.5.67-bk4/arch/ia64/mm/init.c 2003-04-07 10:31:09.000000000 -0700 +++ linux-2.5.67-bk5/arch/ia64/mm/init.c 2003-04-13 04:36:10.000000000 -0700 @@ -251,7 +251,6 @@ pte_unmap(pte); goto out; } - flush_page_to_ram(page); set_pte(pte, mk_pte(page, PAGE_GATE)); pte_unmap(pte); } diff -urN linux-2.5.67-bk4/arch/ia64/vmlinux.lds.S linux-2.5.67-bk5/arch/ia64/vmlinux.lds.S --- linux-2.5.67-bk4/arch/ia64/vmlinux.lds.S 2003-04-07 10:32:27.000000000 -0700 +++ linux-2.5.67-bk5/arch/ia64/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -96,7 +96,11 @@ . = ALIGN(PAGE_SIZE); __init_begin = .; .init.text : AT(ADDR(.init.text) - PAGE_OFFSET) - { *(.init.text) } + { + _sinittext = .; + *(.init.text) + _einittext = .; + } .init.data : AT(ADDR(.init.data) - PAGE_OFFSET) { *(.init.data) } diff -urN linux-2.5.67-bk4/arch/m68k/vmlinux-std.lds linux-2.5.67-bk5/arch/m68k/vmlinux-std.lds --- linux-2.5.67-bk4/arch/m68k/vmlinux-std.lds 2003-04-07 10:32:29.000000000 -0700 +++ linux-2.5.67-bk5/arch/m68k/vmlinux-std.lds 2003-04-13 04:36:10.000000000 -0700 @@ -40,7 +40,11 @@ /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ __init_begin = .; - .init.text : { *(.init.text) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } .init.data : { *(.init.data) } . = ALIGN(16); __setup_start = .; diff -urN linux-2.5.67-bk4/arch/m68k/vmlinux-sun3.lds linux-2.5.67-bk5/arch/m68k/vmlinux-sun3.lds --- linux-2.5.67-bk4/arch/m68k/vmlinux-sun3.lds 2003-04-07 10:30:33.000000000 -0700 +++ linux-2.5.67-bk5/arch/m68k/vmlinux-sun3.lds 2003-04-13 04:36:10.000000000 -0700 @@ -34,7 +34,11 @@ /* will be freed after init */ . = ALIGN(8192); /* Init code and data */ __init_begin = .; - .init.text : { *(.init.text) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } .init.data : { *(.init.data) } . = ALIGN(16); __setup_start = .; diff -urN linux-2.5.67-bk4/arch/m68knommu/vmlinux.lds.S linux-2.5.67-bk5/arch/m68knommu/vmlinux.lds.S --- linux-2.5.67-bk4/arch/m68knommu/vmlinux.lds.S 2003-04-07 10:31:07.000000000 -0700 +++ linux-2.5.67-bk5/arch/m68knommu/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -282,7 +282,9 @@ .init : { . = ALIGN(4096); __init_begin = .; + _sinittext = .; *(.init.text) + _einittext = .; *(.init.data) . = ALIGN(16); __setup_start = .; diff -urN linux-2.5.67-bk4/arch/mips64/kernel/linux32.c linux-2.5.67-bk5/arch/mips64/kernel/linux32.c --- linux-2.5.67-bk4/arch/mips64/kernel/linux32.c 2003-04-07 10:31:51.000000000 -0700 +++ linux-2.5.67-bk5/arch/mips64/kernel/linux32.c 2003-04-13 04:36:10.000000000 -0700 @@ -195,7 +195,7 @@ } err = copy_from_user(kaddr + offset, (char *)A(str), bytes_to_copy); - flush_page_to_ram(page); + flush_dcache_page(page); kunmap(page); if (err) diff -urN linux-2.5.67-bk4/arch/parisc/kernel/sys_parisc32.c linux-2.5.67-bk5/arch/parisc/kernel/sys_parisc32.c --- linux-2.5.67-bk4/arch/parisc/kernel/sys_parisc32.c 2003-04-13 04:36:05.000000000 -0700 +++ linux-2.5.67-bk5/arch/parisc/kernel/sys_parisc32.c 2003-04-13 04:36:10.000000000 -0700 @@ -183,7 +183,6 @@ } err = copy_from_user(kaddr + offset, (char *)A(str), bytes_to_copy); flush_dcache_page(page); - flush_page_to_ram(page); kunmap(page); if (err) diff -urN linux-2.5.67-bk4/arch/parisc/vmlinux.lds.S linux-2.5.67-bk5/arch/parisc/vmlinux.lds.S --- linux-2.5.67-bk4/arch/parisc/vmlinux.lds.S 2003-04-07 10:30:45.000000000 -0700 +++ linux-2.5.67-bk5/arch/parisc/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -53,7 +53,11 @@ . = ALIGN(16384); __init_begin = .; - .init.text : { *(.init.text) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } .init.data : { *(.init.data) } . = ALIGN(16); __setup_start = .; diff -urN linux-2.5.67-bk4/arch/ppc/vmlinux.lds.S linux-2.5.67-bk5/arch/ppc/vmlinux.lds.S --- linux-2.5.67-bk4/arch/ppc/vmlinux.lds.S 2003-04-07 10:33:04.000000000 -0700 +++ linux-2.5.67-bk5/arch/ppc/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -78,7 +78,11 @@ . = ALIGN(4096); __init_begin = .; - .init.text : { *(.init.text) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } .init.data : { *(.init.data); __vtop_table_begin = .; diff -urN linux-2.5.67-bk4/arch/ppc64/kernel/sys_ppc32.c linux-2.5.67-bk5/arch/ppc64/kernel/sys_ppc32.c --- linux-2.5.67-bk4/arch/ppc64/kernel/sys_ppc32.c 2003-04-13 04:36:05.000000000 -0700 +++ linux-2.5.67-bk5/arch/ppc64/kernel/sys_ppc32.c 2003-04-13 04:36:10.000000000 -0700 @@ -2077,7 +2077,6 @@ err = copy_from_user(kaddr + offset, (char *)A(str), bytes_to_copy); - flush_page_to_ram(page); kunmap((unsigned long)kaddr); if (err) diff -urN linux-2.5.67-bk4/arch/ppc64/vmlinux.lds.S linux-2.5.67-bk5/arch/ppc64/vmlinux.lds.S --- linux-2.5.67-bk4/arch/ppc64/vmlinux.lds.S 2003-04-07 10:32:18.000000000 -0700 +++ linux-2.5.67-bk5/arch/ppc64/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -77,7 +77,11 @@ /* will be freed after init */ . = ALIGN(4096); __init_begin = .; - .init.text : { *(.init.text) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } .init.data : { *(.init.data) } . = ALIGN(16); __setup_start = .; diff -urN linux-2.5.67-bk4/arch/s390/vmlinux.lds.S linux-2.5.67-bk5/arch/s390/vmlinux.lds.S --- linux-2.5.67-bk4/arch/s390/vmlinux.lds.S 2003-04-07 10:31:03.000000000 -0700 +++ linux-2.5.67-bk5/arch/s390/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -58,7 +58,11 @@ /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ __init_begin = .; - .init.text : { *(.init.text) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } .init.data : { *(.init.data) } . = ALIGN(256); __setup_start = .; diff -urN linux-2.5.67-bk4/arch/s390x/kernel/linux32.c linux-2.5.67-bk5/arch/s390x/kernel/linux32.c --- linux-2.5.67-bk4/arch/s390x/kernel/linux32.c 2003-04-07 10:32:16.000000000 -0700 +++ linux-2.5.67-bk5/arch/s390x/kernel/linux32.c 2003-04-13 04:36:10.000000000 -0700 @@ -1888,7 +1888,6 @@ err = copy_from_user(kaddr + offset, (char *)A(str), bytes_to_copy); - flush_page_to_ram(page); kunmap(page); if (err) diff -urN linux-2.5.67-bk4/arch/s390x/vmlinux.lds.S linux-2.5.67-bk5/arch/s390x/vmlinux.lds.S --- linux-2.5.67-bk4/arch/s390x/vmlinux.lds.S 2003-04-07 10:31:00.000000000 -0700 +++ linux-2.5.67-bk5/arch/s390x/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -58,7 +58,11 @@ /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ __init_begin = .; - .init.text : { *(.init.text) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } .init.data : { *(.init.data) } . = ALIGN(256); __setup_start = .; diff -urN linux-2.5.67-bk4/arch/sparc/vmlinux.lds.S linux-2.5.67-bk5/arch/sparc/vmlinux.lds.S --- linux-2.5.67-bk4/arch/sparc/vmlinux.lds.S 2003-04-07 10:30:39.000000000 -0700 +++ linux-2.5.67-bk5/arch/sparc/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -34,7 +34,11 @@ . = ALIGN(4096); __init_begin = .; - .init.text : { *(.init.text) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } __init_text_end = .; .init.data : { *(.init.data) } . = ALIGN(16); diff -urN linux-2.5.67-bk4/arch/sparc64/vmlinux.lds.S linux-2.5.67-bk5/arch/sparc64/vmlinux.lds.S --- linux-2.5.67-bk4/arch/sparc64/vmlinux.lds.S 2003-04-07 10:32:27.000000000 -0700 +++ linux-2.5.67-bk5/arch/sparc64/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -41,7 +41,11 @@ . = ALIGN(8192); __init_begin = .; - .init.text : { *(.init.text) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } .init.data : { *(.init.data) } . = ALIGN(16); __setup_start = .; diff -urN linux-2.5.67-bk4/arch/v850/vmlinux.lds.S linux-2.5.67-bk5/arch/v850/vmlinux.lds.S --- linux-2.5.67-bk4/arch/v850/vmlinux.lds.S 2003-04-07 10:31:53.000000000 -0700 +++ linux-2.5.67-bk5/arch/v850/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -105,7 +105,9 @@ #define RAMK_INIT_CONTENTS_NO_END \ . = ALIGN (4096) ; \ __init_start = . ; \ + _sinittext = .; \ *(.init.text) /* 2.5 convention */ \ + _einittext = .; \ *(.init.data) \ *(.text.init) /* 2.4 convention */ \ *(.data.init) \ @@ -125,7 +127,9 @@ /* The contents of `init' section for a ROM-resident kernel which should go into ROM. */ #define ROMK_INIT_ROM_CONTENTS \ + _sinittext = .; \ *(.init.text) /* 2.5 convention */ \ + _einittext = .; \ *(.text.init) /* 2.4 convention */ \ INITCALL_CONTENTS \ INITRAMFS_CONTENTS diff -urN linux-2.5.67-bk4/arch/x86_64/vmlinux.lds.S linux-2.5.67-bk5/arch/x86_64/vmlinux.lds.S --- linux-2.5.67-bk4/arch/x86_64/vmlinux.lds.S 2003-04-07 10:30:38.000000000 -0700 +++ linux-2.5.67-bk5/arch/x86_64/vmlinux.lds.S 2003-04-13 04:36:10.000000000 -0700 @@ -78,7 +78,11 @@ . = ALIGN(4096); /* Init code and data */ __init_begin = .; - .init.text : { *(.init.text) } + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } .init.data : { *(.init.data) } . = ALIGN(16); __setup_start = .; diff -urN linux-2.5.67-bk4/drivers/char/tty_io.c linux-2.5.67-bk5/drivers/char/tty_io.c --- linux-2.5.67-bk4/drivers/char/tty_io.c 2003-04-13 04:36:06.000000000 -0700 +++ linux-2.5.67-bk5/drivers/char/tty_io.c 2003-04-13 04:36:10.000000000 -0700 @@ -1874,7 +1874,7 @@ } task_lock(p); if (p->files) { - read_lock(&p->files->file_lock); + spin_lock(&p->files->file_lock); for (i=0; i < p->files->max_fds; i++) { filp = fcheck_files(p->files, i); if (filp && (filp->f_op == &tty_fops) && @@ -1886,7 +1886,7 @@ break; } } - read_unlock(&p->files->file_lock); + spin_unlock(&p->files->file_lock); } task_unlock(p); } diff -urN linux-2.5.67-bk4/drivers/ieee1394/pcilynx.c linux-2.5.67-bk5/drivers/ieee1394/pcilynx.c --- linux-2.5.67-bk4/drivers/ieee1394/pcilynx.c 2003-04-13 04:36:06.000000000 -0700 +++ linux-2.5.67-bk5/drivers/ieee1394/pcilynx.c 2003-04-13 04:36:10.000000000 -0700 @@ -72,9 +72,9 @@ /* Module Parameters */ +static int skip_eeprom = 0; module_param(skip_eeprom, int, 0444); MODULE_PARM_DESC(skip_eeprom, "Use generic bus info block instead of serial eeprom (default = 0)."); -static int skip_eeprom = 0; static struct hpsb_host_driver lynx_driver; diff -urN linux-2.5.67-bk4/drivers/md/raid1.c linux-2.5.67-bk5/drivers/md/raid1.c --- linux-2.5.67-bk4/drivers/md/raid1.c 2003-04-07 10:31:41.000000000 -0700 +++ linux-2.5.67-bk5/drivers/md/raid1.c 2003-04-13 04:36:10.000000000 -0700 @@ -238,11 +238,12 @@ * operation and are ready to return a success/failure code to the buffer * cache layer. */ -static void raid_end_bio_io(r1bio_t *r1_bio, int uptodate) +static void raid_end_bio_io(r1bio_t *r1_bio) { struct bio *bio = r1_bio->master_bio; - bio_endio(bio, bio->bi_size, uptodate ? 0 : -EIO); + bio_endio(bio, bio->bi_size, + test_bit(R1BIO_Uptodate, &r1_bio->state) ? 0 : -EIO); free_r1bio(r1_bio); } @@ -299,7 +300,7 @@ * we have only one bio on the read side */ if (uptodate) - raid_end_bio_io(r1_bio, uptodate); + raid_end_bio_io(r1_bio); else { /* * oops, read error: @@ -320,7 +321,7 @@ */ if (atomic_dec_and_test(&r1_bio->remaining)) { md_write_end(r1_bio->mddev); - raid_end_bio_io(r1_bio, uptodate); + raid_end_bio_io(r1_bio); } } atomic_dec(&conf->mirrors[mirror].rdev->nr_pending); @@ -542,10 +543,10 @@ * then return an IO error: */ md_write_end(mddev); - raid_end_bio_io(r1_bio, 0); + raid_end_bio_io(r1_bio); return 0; } - atomic_set(&r1_bio->remaining, sum_bios); + atomic_set(&r1_bio->remaining, sum_bios+1); /* * We have to be a bit careful about the semaphore above, thats @@ -567,6 +568,12 @@ generic_make_request(mbio); } + + if (atomic_dec_and_test(&r1_bio->remaining)) { + md_write_end(mddev); + raid_end_bio_io(r1_bio); + } + return 0; } @@ -917,7 +924,7 @@ " read error for block %llu\n", bdev_partition_name(bio->bi_bdev), (unsigned long long)r1_bio->sector); - raid_end_bio_io(r1_bio, 0); + raid_end_bio_io(r1_bio); break; } printk(KERN_ERR "raid1: %s: redirecting sector %llu to" diff -urN linux-2.5.67-bk4/fs/binfmt_elf.c linux-2.5.67-bk5/fs/binfmt_elf.c --- linux-2.5.67-bk4/fs/binfmt_elf.c 2003-04-07 10:31:14.000000000 -0700 +++ linux-2.5.67-bk5/fs/binfmt_elf.c 2003-04-13 04:36:10.000000000 -0700 @@ -1378,7 +1378,6 @@ flush_cache_page(vma, addr); kaddr = kmap(page); DUMP_WRITE(kaddr, PAGE_SIZE); - flush_page_to_ram(page); kunmap(page); } page_cache_release(page); diff -urN linux-2.5.67-bk4/fs/buffer.c linux-2.5.67-bk5/fs/buffer.c --- linux-2.5.67-bk4/fs/buffer.c 2003-04-13 04:36:06.000000000 -0700 +++ linux-2.5.67-bk5/fs/buffer.c 2003-04-13 04:36:10.000000000 -0700 @@ -1754,7 +1754,6 @@ * exposing stale data. * The page is currently locked and not marked for writeback */ - ClearPageUptodate(page); bh = head; /* Recovery: lock and submit the mapped buffers */ do { diff -urN linux-2.5.67-bk4/fs/dquot.c linux-2.5.67-bk5/fs/dquot.c --- linux-2.5.67-bk4/fs/dquot.c 2003-04-07 10:31:57.000000000 -0700 +++ linux-2.5.67-bk5/fs/dquot.c 2003-04-13 04:36:10.000000000 -0700 @@ -326,7 +326,7 @@ if (!dquot_dirty(dquot)) continue; spin_unlock(&dq_list_lock); - commit_dqblk(dquot); + sb->dq_op->sync_dquot(dquot); goto restart; } spin_unlock(&dq_list_lock); @@ -1072,9 +1072,16 @@ .alloc_inode = dquot_alloc_inode, .free_space = dquot_free_space, .free_inode = dquot_free_inode, - .transfer = dquot_transfer + .transfer = dquot_transfer, + .sync_dquot = commit_dqblk }; +/* Function used by filesystems for initializing the dquot_operations structure */ +void init_dquot_operations(struct dquot_operations *fsdqops) +{ + memcpy(fsdqops, &dquot_operations, sizeof(dquot_operations)); +} + static inline void set_enable_flags(struct quota_info *dqopt, int type) { switch (type) { @@ -1432,3 +1439,4 @@ EXPORT_SYMBOL(dqstats); EXPORT_SYMBOL(dq_list_lock); EXPORT_SYMBOL(dq_data_lock); +EXPORT_SYMBOL(init_dquot_operations); diff -urN linux-2.5.67-bk4/fs/exec.c linux-2.5.67-bk5/fs/exec.c --- linux-2.5.67-bk4/fs/exec.c 2003-04-13 04:36:06.000000000 -0700 +++ linux-2.5.67-bk5/fs/exec.c 2003-04-13 04:36:10.000000000 -0700 @@ -314,7 +314,6 @@ } lru_cache_add_active(page); flush_dcache_page(page); - flush_page_to_ram(page); set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY)))); pte_chain = page_add_rmap(page, pte, pte_chain); pte_unmap(pte); @@ -407,7 +406,7 @@ mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; mpnt->vm_end = STACK_TOP; #endif - mpnt->vm_page_prot = PAGE_COPY; + mpnt->vm_page_prot = protection_map[VM_STACK_FLAGS & 0x7]; mpnt->vm_flags = VM_STACK_FLAGS; mpnt->vm_ops = NULL; mpnt->vm_pgoff = 0; @@ -750,7 +749,7 @@ { long j = -1; - write_lock(&files->file_lock); + spin_lock(&files->file_lock); for (;;) { unsigned long set, i; @@ -762,16 +761,16 @@ if (!set) continue; files->close_on_exec->fds_bits[j] = 0; - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); for ( ; set ; i++,set >>= 1) { if (set & 1) { sys_close(i); } } - write_lock(&files->file_lock); + spin_lock(&files->file_lock); } - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); } int flush_old_exec(struct linux_binprm * bprm) diff -urN linux-2.5.67-bk4/fs/ext2/balloc.c linux-2.5.67-bk5/fs/ext2/balloc.c --- linux-2.5.67-bk4/fs/ext2/balloc.c 2003-04-07 10:31:04.000000000 -0700 +++ linux-2.5.67-bk5/fs/ext2/balloc.c 2003-04-13 04:36:10.000000000 -0700 @@ -94,12 +94,19 @@ return bh; } -static inline int reserve_blocks(struct super_block *sb, int count) +/* + * Set sb->s_dirt here because the superblock was "logically" altered. We + * need to recalculate its free blocks count and flush it out. + */ +static int reserve_blocks(struct super_block *sb, int count) { - struct ext2_sb_info * sbi = EXT2_SB(sb); - struct ext2_super_block * es = sbi->s_es; - unsigned free_blocks = le32_to_cpu(es->s_free_blocks_count); - unsigned root_blocks = le32_to_cpu(es->s_r_blocks_count); + struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_super_block *es = sbi->s_es; + unsigned free_blocks; + unsigned root_blocks; + + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + root_blocks = le32_to_cpu(es->s_r_blocks_count); if (free_blocks < count) count = free_blocks; @@ -117,46 +124,51 @@ return 0; } - es->s_free_blocks_count = cpu_to_le32(free_blocks - count); - mark_buffer_dirty(sbi->s_sbh); + percpu_counter_mod(&sbi->s_freeblocks_counter, -count); sb->s_dirt = 1; return count; } -static inline void release_blocks(struct super_block *sb, int count) +static void release_blocks(struct super_block *sb, int count) { if (count) { - struct ext2_sb_info * sbi = EXT2_SB(sb); - struct ext2_super_block * es = sbi->s_es; - unsigned free_blocks = le32_to_cpu(es->s_free_blocks_count); - es->s_free_blocks_count = cpu_to_le32(free_blocks + count); - mark_buffer_dirty(sbi->s_sbh); + struct ext2_sb_info *sbi = EXT2_SB(sb); + + percpu_counter_mod(&sbi->s_freeblocks_counter, count); sb->s_dirt = 1; } } -static inline int group_reserve_blocks(struct ext2_group_desc *desc, - struct buffer_head *bh, int count) +static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no, + struct ext2_group_desc *desc, struct buffer_head *bh, int count) { unsigned free_blocks; if (!desc->bg_free_blocks_count) return 0; + spin_lock(sb_bgl_lock(sbi, group_no)); free_blocks = le16_to_cpu(desc->bg_free_blocks_count); if (free_blocks < count) count = free_blocks; desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count); + spin_unlock(sb_bgl_lock(sbi, group_no)); mark_buffer_dirty(bh); return count; } -static inline void group_release_blocks(struct ext2_group_desc *desc, - struct buffer_head *bh, int count) +static void group_release_blocks(struct super_block *sb, int group_no, + struct ext2_group_desc *desc, struct buffer_head *bh, int count) { if (count) { - unsigned free_blocks = le16_to_cpu(desc->bg_free_blocks_count); + struct ext2_sb_info *sbi = EXT2_SB(sb); + unsigned free_blocks; + + spin_lock(sb_bgl_lock(sbi, group_no)); + free_blocks = le16_to_cpu(desc->bg_free_blocks_count); desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count); + spin_unlock(sb_bgl_lock(sbi, group_no)); + sb->s_dirt = 1; mark_buffer_dirty(bh); } } @@ -172,12 +184,11 @@ unsigned long i; unsigned long overflow; struct super_block * sb = inode->i_sb; + struct ext2_sb_info * sbi = EXT2_SB(sb); struct ext2_group_desc * desc; - struct ext2_super_block * es; + struct ext2_super_block * es = sbi->s_es; unsigned freed = 0, group_freed; - lock_super (sb); - es = EXT2_SB(sb)->s_es; if (block < le32_to_cpu(es->s_first_data_block) || block + count < block || block + count > le32_to_cpu(es->s_blocks_count)) { @@ -215,16 +226,17 @@ if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) || in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) || in_range (block, le32_to_cpu(desc->bg_inode_table), - EXT2_SB(sb)->s_itb_per_group) || + sbi->s_itb_per_group) || in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), - EXT2_SB(sb)->s_itb_per_group)) + sbi->s_itb_per_group)) ext2_error (sb, "ext2_free_blocks", "Freeing blocks in system zones - " "Block = %lu, count = %lu", block, count); for (i = 0, group_freed = 0; i < count; i++) { - if (!ext2_clear_bit(bit + i, bitmap_bh->b_data)) + if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group), + bit + i, (void *) bitmap_bh->b_data)) ext2_error (sb, "ext2_free_blocks", "bit already cleared for block %lu", block + i); @@ -236,7 +248,7 @@ if (sb->s_flags & MS_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); - group_release_blocks(desc, bh2, group_freed); + group_release_blocks(sb, block_group, desc, bh2, group_freed); freed += group_freed; if (overflow) { @@ -247,17 +259,18 @@ error_return: brelse(bitmap_bh); release_blocks(sb, freed); - unlock_super (sb); DQUOT_FREE_BLOCK(inode, freed); } -static int grab_block(char *map, unsigned size, int goal) +static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal) { int k; char *p, *r; if (!ext2_test_bit(goal, map)) goto got_it; + +repeat: if (goal) { /* * The goal was occupied; search forward for a free @@ -297,7 +310,8 @@ } return -1; got_it: - ext2_set_bit(goal, map); + if (ext2_set_bit_atomic(lock, goal, (void *) map)) + goto repeat; return goal; } @@ -309,15 +323,15 @@ * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */ -int ext2_new_block (struct inode * inode, unsigned long goal, - u32 * prealloc_count, u32 * prealloc_block, int * err) +int ext2_new_block(struct inode *inode, unsigned long goal, + u32 *prealloc_count, u32 *prealloc_block, int *err) { struct buffer_head *bitmap_bh = NULL; struct buffer_head *gdp_bh; /* bh2 */ struct ext2_group_desc *desc; int group_no; /* i */ int ret_block; /* j */ - int bit; /* k */ + int bit; /* k */ int target_block; /* tmp */ int block = 0; struct super_block *sb = inode->i_sb; @@ -341,13 +355,10 @@ prealloc_goal--; dq_alloc = prealloc_goal + 1; - - lock_super (sb); - es_alloc = reserve_blocks(sb, dq_alloc); if (!es_alloc) { *err = -ENOSPC; - goto out_unlock; + goto out_dquot; } ext2_debug ("goal=%lu.\n", goal); @@ -357,10 +368,16 @@ goal = le32_to_cpu(es->s_first_data_block); group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size; desc = ext2_get_group_desc (sb, group_no, &gdp_bh); - if (!desc) + if (!desc) { + /* + * gdp_bh may still be uninitialised. But group_release_blocks + * will not touch it because group_alloc is zero. + */ goto io_error; + } - group_alloc = group_reserve_blocks(desc, gdp_bh, es_alloc); + group_alloc = group_reserve_blocks(sbi, group_no, desc, + gdp_bh, es_alloc); if (group_alloc) { ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % group_size); @@ -371,11 +388,11 @@ ext2_debug("goal is at %d:%d.\n", group_no, ret_block); - ret_block = grab_block(bitmap_bh->b_data, - group_size, ret_block); + ret_block = grab_block(sb_bgl_lock(sbi, group_no), + bitmap_bh->b_data, group_size, ret_block); if (ret_block >= 0) goto got_block; - group_release_blocks(desc, gdp_bh, group_alloc); + group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); group_alloc = 0; } @@ -393,7 +410,8 @@ desc = ext2_get_group_desc(sb, group_no, &gdp_bh); if (!desc) goto io_error; - group_alloc = group_reserve_blocks(desc, gdp_bh, es_alloc); + group_alloc = group_reserve_blocks(sbi, group_no, desc, + gdp_bh, es_alloc); } if (!group_alloc) { *err = -ENOSPC; @@ -404,7 +422,8 @@ if (!bitmap_bh) goto io_error; - ret_block = grab_block(bitmap_bh->b_data, group_size, 0); + ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data, + group_size, 0); if (ret_block < 0) { ext2_error (sb, "ext2_new_block", "Free blocks count corrupted for block group %d", @@ -452,7 +471,9 @@ unsigned n; for (n = 0; n < group_alloc && ++ret_block < group_size; n++) { - if (ext2_set_bit(ret_block, bitmap_bh->b_data)) + if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no), + ret_block, + (void*) bitmap_bh->b_data)) break; } *prealloc_block = block + 1; @@ -471,10 +492,9 @@ *err = 0; out_release: - group_release_blocks(desc, gdp_bh, group_alloc); + group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); release_blocks(sb, es_alloc); -out_unlock: - unlock_super (sb); +out_dquot: DQUOT_FREE_BLOCK(inode, dq_alloc); out: brelse(bitmap_bh); @@ -487,11 +507,11 @@ unsigned long ext2_count_free_blocks (struct super_block * sb) { -#ifdef EXT2FS_DEBUG - struct ext2_super_block * es; - unsigned long desc_count, bitmap_count, x; struct ext2_group_desc * desc; + unsigned long desc_count = 0; int i; +#ifdef EXT2FS_DEBUG + unsigned long bitmap_count, x; lock_super (sb); es = EXT2_SB(sb)->s_es; @@ -519,13 +539,18 @@ unlock_super (sb); return bitmap_count; #else - return le32_to_cpu(EXT2_SB(sb)->s_es->s_free_blocks_count); + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + desc = ext2_get_group_desc (sb, i, NULL); + if (!desc) + continue; + desc_count += le16_to_cpu(desc->bg_free_blocks_count); + } + return desc_count; #endif } -static inline int block_in_use (unsigned long block, - struct super_block * sb, - unsigned char * map) +static inline int +block_in_use(unsigned long block, struct super_block *sb, unsigned char *map) { return ext2_test_bit ((block - le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) % EXT2_BLOCKS_PER_GROUP(sb), map); diff -urN linux-2.5.67-bk4/fs/ext2/ialloc.c linux-2.5.67-bk5/fs/ext2/ialloc.c --- linux-2.5.67-bk4/fs/ext2/ialloc.c 2003-04-07 10:31:22.000000000 -0700 +++ linux-2.5.67-bk5/fs/ext2/ialloc.c 2003-04-13 04:36:10.000000000 -0700 @@ -64,6 +64,66 @@ } /* + * Speculatively reserve an inode in a blockgroup which used to have some + * spare ones. Later, when we come to actually claim the inode in the bitmap + * it may be that it was taken. In that case the allocator will undo this + * reservation and try again. + * + * The inode allocator does not physically alter the superblock. But we still + * set sb->s_dirt, because the superblock was "logically" altered - we need to + * go and add up the free inodes counts again and flush out the superblock. + */ +static void ext2_reserve_inode(struct super_block *sb, int group, int dir) +{ + struct ext2_group_desc * desc; + struct buffer_head *bh; + + desc = ext2_get_group_desc(sb, group, &bh); + if (!desc) { + ext2_error(sb, "ext2_reserve_inode", + "can't get descriptor for group %d", group); + return; + } + + spin_lock(sb_bgl_lock(EXT2_SB(sb), group)); + desc->bg_free_inodes_count = + cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1); + if (dir) + desc->bg_used_dirs_count = + cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1); + spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); + if (dir) + percpu_counter_inc(&EXT2_SB(sb)->s_dirs_counter); + sb->s_dirt = 1; + mark_buffer_dirty(bh); +} + +static void ext2_release_inode(struct super_block *sb, int group, int dir) +{ + struct ext2_group_desc * desc; + struct buffer_head *bh; + + desc = ext2_get_group_desc(sb, group, &bh); + if (!desc) { + ext2_error(sb, "ext2_release_inode", + "can't get descriptor for group %d", group); + return; + } + + spin_lock(sb_bgl_lock(EXT2_SB(sb), group)); + desc->bg_free_inodes_count = + cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1); + if (dir) + desc->bg_used_dirs_count = + cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1); + spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); + if (dir) + percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); + sb->s_dirt = 1; + mark_buffer_dirty(bh); +} + +/* * NOTE! When we get the inode, we're the only people * that have access to it, and as such there are no * race conditions we have to worry about. The inode @@ -85,10 +145,8 @@ int is_directory; unsigned long ino; struct buffer_head *bitmap_bh = NULL; - struct buffer_head *bh2; unsigned long block_group; unsigned long bit; - struct ext2_group_desc * desc; struct ext2_super_block * es; ino = inode->i_ino; @@ -105,7 +163,6 @@ DQUOT_DROP(inode); } - lock_super (sb); es = EXT2_SB(sb)->s_es; is_directory = S_ISDIR(inode->i_mode); @@ -126,32 +183,17 @@ goto error_return; /* Ok, now we can actually update the inode bitmaps.. */ - if (!ext2_clear_bit(bit, bitmap_bh->b_data)) + if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group), + bit, (void *) bitmap_bh->b_data)) ext2_error (sb, "ext2_free_inode", "bit already cleared for inode %lu", ino); - else { - desc = ext2_get_group_desc (sb, block_group, &bh2); - if (desc) { - desc->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1); - if (is_directory) { - desc->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1); - EXT2_SB(sb)->s_dir_count--; - } - } - mark_buffer_dirty(bh2); - es->s_free_inodes_count = - cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1); - mark_buffer_dirty(EXT2_SB(sb)->s_sbh); - } + else + ext2_release_inode(sb, block_group, is_directory); mark_buffer_dirty(bitmap_bh); if (sb->s_flags & MS_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); - sb->s_dirt = 1; error_return: brelse(bitmap_bh); - unlock_super (sb); } /* @@ -211,9 +253,8 @@ */ static int find_group_dir(struct super_block *sb, struct inode *parent) { - struct ext2_super_block * es = EXT2_SB(sb)->s_es; int ngroups = EXT2_SB(sb)->s_groups_count; - int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups; + int avefreei = ext2_count_free_inodes(sb) / ngroups; struct ext2_group_desc *desc, *best_desc = NULL; struct buffer_head *bh, *best_bh = NULL; int group, best_group = -1; @@ -234,11 +275,9 @@ } if (!best_desc) return -1; - best_desc->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(best_desc->bg_free_inodes_count) - 1); - best_desc->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(best_desc->bg_used_dirs_count) + 1); - mark_buffer_dirty(best_bh); + + ext2_reserve_inode(sb, best_group, 1); + return best_group; } @@ -277,15 +316,23 @@ struct ext2_super_block *es = sbi->s_es; int ngroups = sbi->s_groups_count; int inodes_per_group = EXT2_INODES_PER_GROUP(sb); - int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups; - int avefreeb = le32_to_cpu(es->s_free_blocks_count) / ngroups; + int freei; + int avefreei; + int free_blocks; + int avefreeb; int blocks_per_dir; - int ndirs = sbi->s_dir_count; + int ndirs; int max_debt, max_dirs, min_blocks, min_inodes; int group = -1, i; struct ext2_group_desc *desc; struct buffer_head *bh; + freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); + avefreei = freei / ngroups; + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + avefreeb = free_blocks / ngroups; + ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); + if ((parent == sb->s_root->d_inode) || (parent->i_flags & EXT2_TOPDIR_FL)) { struct ext2_group_desc *best_desc = NULL; @@ -320,8 +367,10 @@ goto fallback; } - blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - - le32_to_cpu(es->s_free_blocks_count)) / ndirs; + if (ndirs == 0) + ndirs = 1; /* percpu_counters are approximate... */ + + blocks_per_dir = (le32_to_cpu(es->s_blocks_count)-free_blocks) / ndirs; max_dirs = ndirs / ngroups + inodes_per_group / 16; min_inodes = avefreei - inodes_per_group / 4; @@ -364,12 +413,7 @@ return -1; found: - desc->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1); - desc->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) + 1); - sbi->s_dir_count++; - mark_buffer_dirty(bh); + ext2_reserve_inode(sb, group, 1); return group; } @@ -431,9 +475,8 @@ return -1; found: - desc->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1); - mark_buffer_dirty(bh); + ext2_reserve_inode(sb, group, 0); + return group; } @@ -456,7 +499,6 @@ return ERR_PTR(-ENOMEM); ei = EXT2_I(inode); - lock_super (sb); es = EXT2_SB(sb)->s_es; repeat: if (S_ISDIR(mode)) { @@ -480,7 +522,12 @@ EXT2_INODES_PER_GROUP(sb)); if (i >= EXT2_INODES_PER_GROUP(sb)) goto bad_count; - ext2_set_bit(i, bitmap_bh->b_data); + if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), + i, (void *) bitmap_bh->b_data)) { + brelse(bitmap_bh); + ext2_release_inode(sb, group, S_ISDIR(mode)); + goto repeat; + } mark_buffer_dirty(bitmap_bh); if (sb->s_flags & MS_SYNCHRONOUS) @@ -497,9 +544,9 @@ goto fail2; } - es->s_free_inodes_count = - cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1); + percpu_counter_mod(&EXT2_SB(sb)->s_freeinodes_counter, -1); + spin_lock(sb_bgl_lock(EXT2_SB(sb), group)); if (S_ISDIR(mode)) { if (EXT2_SB(sb)->s_debts[group] < 255) EXT2_SB(sb)->s_debts[group]++; @@ -507,8 +554,8 @@ if (EXT2_SB(sb)->s_debts[group]) EXT2_SB(sb)->s_debts[group]--; } + spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); - mark_buffer_dirty(EXT2_SB(sb)->s_sbh); sb->s_dirt = 1; inode->i_uid = current->fsuid; if (test_opt (sb, GRPID)) @@ -549,7 +596,6 @@ inode->i_generation = EXT2_SB(sb)->s_next_generation++; insert_inode_hash(inode); - unlock_super(sb); if(DQUOT_ALLOC_INODE(inode)) { DQUOT_DROP(inode); goto fail3; @@ -571,15 +617,8 @@ return ERR_PTR(err); fail2: - desc = ext2_get_group_desc (sb, group, &bh2); - desc->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1); - if (S_ISDIR(mode)) - desc->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1); - mark_buffer_dirty(bh2); + ext2_release_inode(sb, group, S_ISDIR(mode)); fail: - unlock_super(sb); make_bad_inode(inode); iput(inode); return ERR_PTR(err); @@ -602,16 +641,19 @@ unsigned long ext2_count_free_inodes (struct super_block * sb) { + struct ext2_group_desc *desc; + unsigned long desc_count = 0; + int i; + #ifdef EXT2FS_DEBUG struct ext2_super_block * es; - unsigned long desc_count = 0, bitmap_count = 0; + unsigned long bitmap_count = 0; struct buffer_head *bitmap_bh = NULL; int i; lock_super (sb); es = EXT2_SB(sb)->s_es; for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { - struct ext2_group_desc *desc; unsigned x; desc = ext2_get_group_desc (sb, i, NULL); @@ -630,11 +672,18 @@ } brelse(bitmap_bh); printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n", - le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); + percpu_counter_read(EXT2_SB(sb)->s_freeinodes_counter), + desc_count, bitmap_count); unlock_super(sb); return desc_count; #else - return le32_to_cpu(EXT2_SB(sb)->s_es->s_free_inodes_count); + for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { + desc = ext2_get_group_desc (sb, i, NULL); + if (!desc) + continue; + desc_count += le16_to_cpu(desc->bg_free_inodes_count); + } + return desc_count; #endif } @@ -684,7 +733,8 @@ bitmap_count += x; } brelse(bitmap_bh); - if (le32_to_cpu(es->s_free_inodes_count) != bitmap_count) + if (percpu_counter_read(EXT2_SB(sb)->s_freeinodes_counter) != + bitmap_count) ext2_error(sb, "ext2_check_inodes_bitmap", "Wrong free inodes count in super block, " "stored = %lu, counted = %lu", diff -urN linux-2.5.67-bk4/fs/ext2/super.c linux-2.5.67-bk5/fs/ext2/super.c --- linux-2.5.67-bk4/fs/ext2/super.c 2003-04-07 10:31:55.000000000 -0700 +++ linux-2.5.67-bk5/fs/ext2/super.c 2003-04-13 04:36:10.000000000 -0700 @@ -769,6 +769,10 @@ printk ("EXT2-fs: not enough memory\n"); goto failed_mount; } + percpu_counter_init(&sbi->s_freeblocks_counter); + percpu_counter_init(&sbi->s_freeinodes_counter); + percpu_counter_init(&sbi->s_dirs_counter); + bgl_lock_init(&sbi->s_blockgroup_lock); sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts), GFP_KERNEL); if (!sbi->s_debts) { @@ -792,7 +796,6 @@ goto failed_mount2; } sbi->s_gdb_count = db_count; - sbi->s_dir_count = ext2_count_dirs(sb); get_random_bytes(&sbi->s_next_generation, sizeof(u32)); /* * set up enough so that it can read an inode @@ -814,6 +817,12 @@ ext2_warning(sb, __FUNCTION__, "mounting ext3 filesystem as ext2\n"); ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); + percpu_counter_mod(&sbi->s_freeblocks_counter, + ext2_count_free_blocks(sb)); + percpu_counter_mod(&sbi->s_freeinodes_counter, + ext2_count_free_inodes(sb)); + percpu_counter_mod(&sbi->s_dirs_counter, + ext2_count_dirs(sb)); return 0; failed_mount2: for (i = 0; i < db_count; i++) @@ -840,6 +849,8 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) { + es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); + es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); es->s_wtime = cpu_to_le32(get_seconds()); mark_buffer_dirty(EXT2_SB(sb)->s_sbh); sync_dirty_buffer(EXT2_SB(sb)->s_sbh); @@ -868,6 +879,8 @@ ext2_debug ("setting valid to 0\n"); es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT2_VALID_FS); + es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); + es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); es->s_mtime = cpu_to_le32(get_seconds()); ext2_sync_super(sb, es); } else @@ -965,7 +978,7 @@ buf->f_type = EXT2_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = le32_to_cpu(sbi->s_es->s_blocks_count) - overhead; - buf->f_bfree = ext2_count_free_blocks (sb); + buf->f_bfree = ext2_count_free_blocks(sb); buf->f_bavail = buf->f_bfree - le32_to_cpu(sbi->s_es->s_r_blocks_count); if (buf->f_bfree < le32_to_cpu(sbi->s_es->s_r_blocks_count)) buf->f_bavail = 0; diff -urN linux-2.5.67-bk4/fs/ext3/super.c linux-2.5.67-bk5/fs/ext3/super.c --- linux-2.5.67-bk4/fs/ext3/super.c 2003-04-07 10:32:32.000000000 -0700 +++ linux-2.5.67-bk5/fs/ext3/super.c 2003-04-13 04:36:10.000000000 -0700 @@ -566,6 +566,8 @@ # define ext3_clear_inode NULL #endif +static struct dquot_operations ext3_qops; + static struct super_operations ext3_sops = { .alloc_inode = ext3_alloc_inode, .destroy_inode = ext3_destroy_inode, @@ -1337,6 +1339,7 @@ */ sb->s_op = &ext3_sops; sb->s_export_op = &ext3_export_ops; + sb->dq_op = &ext3_qops; INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ sb->s_root = 0; @@ -1977,6 +1980,56 @@ return 0; } +/* Helper function for writing quotas on sync - we need to start transaction before quota file + * is locked for write. Otherwise the are possible deadlocks: + * Process 1 Process 2 + * ext3_create() quota_sync() + * journal_start() write_dquot() + * DQUOT_INIT() down(dqio_sem) + * down(dqio_sem) journal_start() + * + */ + +#ifdef CONFIG_QUOTA + +#define EXT3_OLD_QFMT_BLOCKS 2 +#define EXT3_V0_QFMT_BLOCKS 6 + +static int (*old_sync_dquot)(struct dquot *dquot); + +static int ext3_sync_dquot(struct dquot *dquot) +{ + int nblocks, ret; + handle_t *handle; + struct quota_info *dqops = sb_dqopt(dquot->dq_sb); + struct inode *qinode; + + switch (dqops->info[dquot->dq_type].dqi_format->qf_fmt_id) { + case QFMT_VFS_OLD: + nblocks = EXT3_OLD_QFMT_BLOCKS; + break; + case QFMT_VFS_V0: + nblocks = EXT3_V0_QFMT_BLOCKS; + break; + default: + nblocks = EXT3_MAX_TRANS_DATA; + } + lock_kernel(); + qinode = dqops->files[dquot->dq_type]->f_dentry->d_inode; + handle = ext3_journal_start(qinode, nblocks); + if (IS_ERR(handle)) { + unlock_kernel(); + return PTR_ERR(handle); + } + unlock_kernel(); + ret = old_sync_dquot(dquot); + lock_kernel(); + ret = ext3_journal_stop(handle); + unlock_kernel(); + return ret; +} +#endif + static struct super_block *ext3_get_sb(struct file_system_type *fs_type, int flags, char *dev_name, void *data) { @@ -1999,6 +2052,11 @@ err = init_inodecache(); if (err) goto out1; +#ifdef CONFIG_QUOTA + init_dquot_operations(&ext3_qops); + old_sync_dquot = ext3_qops.sync_dquot; + ext3_qops.sync_dquot = ext3_sync_dquot; +#endif err = register_filesystem(&ext3_fs_type); if (err) goto out; diff -urN linux-2.5.67-bk4/fs/fcntl.c linux-2.5.67-bk5/fs/fcntl.c --- linux-2.5.67-bk4/fs/fcntl.c 2003-04-07 10:31:56.000000000 -0700 +++ linux-2.5.67-bk5/fs/fcntl.c 2003-04-13 04:36:10.000000000 -0700 @@ -23,21 +23,21 @@ void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; - write_lock(&files->file_lock); + spin_lock(&files->file_lock); if (flag) FD_SET(fd, files->close_on_exec); else FD_CLR(fd, files->close_on_exec); - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); } static inline int get_close_on_exec(unsigned int fd) { struct files_struct *files = current->files; int res; - read_lock(&files->file_lock); + spin_lock(&files->file_lock); res = FD_ISSET(fd, files->close_on_exec); - read_unlock(&files->file_lock); + spin_unlock(&files->file_lock); return res; } @@ -134,15 +134,15 @@ struct files_struct * files = current->files; int fd; - write_lock(&files->file_lock); + spin_lock(&files->file_lock); fd = locate_fd(files, file, start); if (fd >= 0) { FD_SET(fd, files->open_fds); FD_CLR(fd, files->close_on_exec); - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); fd_install(fd, file); } else { - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); fput(file); } @@ -155,7 +155,7 @@ struct file * file, *tofree; struct files_struct * files = current->files; - write_lock(&files->file_lock); + spin_lock(&files->file_lock); if (!(file = fcheck(oldfd))) goto out_unlock; err = newfd; @@ -186,7 +186,7 @@ files->fd[newfd] = file; FD_SET(newfd, files->open_fds); FD_CLR(newfd, files->close_on_exec); - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); if (tofree) filp_close(tofree, files); @@ -194,11 +194,11 @@ out: return err; out_unlock: - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); goto out; out_fput: - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); fput(file); goto out; } diff -urN linux-2.5.67-bk4/fs/file.c linux-2.5.67-bk5/fs/file.c --- linux-2.5.67-bk4/fs/file.c 2003-04-07 10:30:41.000000000 -0700 +++ linux-2.5.67-bk5/fs/file.c 2003-04-13 04:36:10.000000000 -0700 @@ -65,7 +65,7 @@ goto out; nfds = files->max_fds; - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); /* * Expand to the max in easy steps, and keep expanding it until @@ -89,7 +89,7 @@ error = -ENOMEM; new_fds = alloc_fd_array(nfds); - write_lock(&files->file_lock); + spin_lock(&files->file_lock); if (!new_fds) goto out; @@ -110,15 +110,15 @@ memset(&new_fds[i], 0, (nfds-i) * sizeof(struct file *)); - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); free_fd_array(old_fds, i); - write_lock(&files->file_lock); + spin_lock(&files->file_lock); } } else { /* Somebody expanded the array while we slept ... */ - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); free_fd_array(new_fds, nfds); - write_lock(&files->file_lock); + spin_lock(&files->file_lock); } error = 0; out: @@ -167,7 +167,7 @@ goto out; nfds = files->max_fdset; - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); /* Expand to the max in easy steps */ do { @@ -183,7 +183,7 @@ error = -ENOMEM; new_openset = alloc_fdset(nfds); new_execset = alloc_fdset(nfds); - write_lock(&files->file_lock); + spin_lock(&files->file_lock); if (!new_openset || !new_execset) goto out; @@ -208,21 +208,21 @@ nfds = xchg(&files->max_fdset, nfds); new_openset = xchg(&files->open_fds, new_openset); new_execset = xchg(&files->close_on_exec, new_execset); - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); free_fdset (new_openset, nfds); free_fdset (new_execset, nfds); - write_lock(&files->file_lock); + spin_lock(&files->file_lock); return 0; } /* Somebody expanded the array while we slept ... */ out: - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); if (new_openset) free_fdset(new_openset, nfds); if (new_execset) free_fdset(new_execset, nfds); - write_lock(&files->file_lock); + spin_lock(&files->file_lock); return error; } diff -urN linux-2.5.67-bk4/fs/file_table.c linux-2.5.67-bk5/fs/file_table.c --- linux-2.5.67-bk4/fs/file_table.c 2003-04-07 10:30:38.000000000 -0700 +++ linux-2.5.67-bk5/fs/file_table.c 2003-04-13 04:36:10.000000000 -0700 @@ -182,11 +182,11 @@ struct file *file; struct files_struct *files = current->files; - read_lock(&files->file_lock); + spin_lock(&files->file_lock); file = fcheck(fd); if (file) get_file(file); - read_unlock(&files->file_lock); + spin_unlock(&files->file_lock); return file; } diff -urN linux-2.5.67-bk4/fs/nfsd/Makefile linux-2.5.67-bk5/fs/nfsd/Makefile --- linux-2.5.67-bk4/fs/nfsd/Makefile 2003-04-07 10:31:04.000000000 -0700 +++ linux-2.5.67-bk5/fs/nfsd/Makefile 2003-04-13 04:36:10.000000000 -0700 @@ -7,5 +7,5 @@ nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o -nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o +nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfsd-objs := $(nfsd-y) diff -urN linux-2.5.67-bk4/fs/nfsd/export.c linux-2.5.67-bk5/fs/nfsd/export.c --- linux-2.5.67-bk4/fs/nfsd/export.c 2003-04-07 10:31:46.000000000 -0700 +++ linux-2.5.67-bk5/fs/nfsd/export.c 2003-04-13 04:36:10.000000000 -0700 @@ -175,7 +175,7 @@ ek = svc_expkey_lookup(&key, 2); if (ek) expkey_put(&ek->h, &svc_expkey_cache); - svc_export_put(&exp->h, &svc_export_cache); + exp_put(exp); err = 0; out_nd: path_release(&nd); @@ -648,7 +648,6 @@ struct svc_export new; struct svc_expkey *fsid_key = NULL; struct nameidata nd; - struct inode *inode = NULL; int err; /* Consistency check */ @@ -674,7 +673,6 @@ err = path_lookup(nxp->ex_path, 0, &nd); if (err) goto out_unlock; - inode = nd.dentry->d_inode; err = -EINVAL; exp = exp_get_by_name(clp, nd.mnt, nd.dentry, NULL); @@ -687,7 +685,7 @@ fsid_key->ek_export != exp) goto finish; - if (exp != NULL) { + if (exp) { /* just a flags/id/fsid update */ exp_fsid_unhash(exp); @@ -700,7 +698,7 @@ goto finish; } - err = check_export(inode, nxp->ex_flags); + err = check_export(nd.dentry->d_inode, nxp->ex_flags); if (err) goto finish; err = -ENOMEM; @@ -838,7 +836,7 @@ err = 0; memcpy(f, &fh.fh_handle, sizeof(struct knfsd_fh)); fh_put(&fh); - + exp_put(exp); out: path_release(&nd); return err; diff -urN linux-2.5.67-bk4/fs/nfsd/nfs4proc.c linux-2.5.67-bk5/fs/nfsd/nfs4proc.c --- linux-2.5.67-bk4/fs/nfsd/nfs4proc.c 2003-04-07 10:33:02.000000000 -0700 +++ linux-2.5.67-bk5/fs/nfsd/nfs4proc.c 2003-04-13 04:36:10.000000000 -0700 @@ -173,20 +173,6 @@ return nfs_ok; } -static inline int -nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclientid) -{ - memset(&setclientid->se_clientid, 0, sizeof(clientid_t)); - memset(&setclientid->se_confirm, 0, sizeof(nfs4_verifier)); - return nfs_ok; -} - -static inline int -nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm) -{ - return nfs_ok; -} - /* * filehandle-manipulating ops. */ diff -urN linux-2.5.67-bk4/fs/nfsd/nfs4state.c linux-2.5.67-bk5/fs/nfsd/nfs4state.c --- linux-2.5.67-bk4/fs/nfsd/nfs4state.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.5.67-bk5/fs/nfsd/nfs4state.c 2003-04-13 04:36:10.000000000 -0700 @@ -0,0 +1,634 @@ +/* +* linux/fs/nfsd/nfs4state.c +* +* Copyright (c) 2001 The Regents of the University of Michigan. +* All rights reserved. +* +* Kendrick Smith +* Andy Adamson +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* 3. Neither the name of the University nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ + +#include +#include +#include + + +#include +#include +#include +#include +#include +#include + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +/* Globals */ +time_t boot_time; +static u32 current_clientid = 1; + +/* Locking: + * + * client_sema: + * protects clientid_hashtbl[], clientstr_hashtbl[], + * unconfstr_hashtbl[], uncofid_hashtbl[]. + */ +static struct semaphore client_sema; + +static inline u32 +opaque_hashval(const void *ptr, int nbytes) +{ + unsigned char *cptr = (unsigned char *) ptr; + + u32 x = 0; + while (nbytes--) { + x *= 37; + x += *cptr++; + } + return x; +} + +/* Hash tables for nfs4_clientid state */ +#define CLIENT_HASH_BITS 4 +#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) +#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) + +#define clientid_hashval(id) \ + ((id) & CLIENT_HASH_MASK) +#define clientstr_hashval(name, namelen) \ + (opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK) + +/* conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed + * setclientid_confirmed info. + * + * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed + * setclientid info. + */ +static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE]; +static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE]; +static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE]; +static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; + +/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ +static int +STALE_CLIENTID(clientid_t *clid) +{ + if (clid->cl_boot == boot_time) + return 0; + printk("NFSD stale clientid (%08x/%08x)\n", clid->cl_boot, clid->cl_id); + return 1; +} + +/* + * XXX Should we use a slab cache ? + * This type of memory management is somewhat inefficient, but we use it + * anyway since SETCLIENTID is not a common operation. + */ +static inline struct nfs4_client * +alloc_client(struct xdr_netobj name) +{ + struct nfs4_client *clp; + + if ((clp = kmalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) { + memset(clp, 0, sizeof(*clp)); + if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) { + memcpy(clp->cl_name.data, name.data, name.len); + clp->cl_name.len = name.len; + } + else { + kfree(clp); + clp = NULL; + } + } + return clp; +} + +static inline void +free_client(struct nfs4_client *clp) +{ + kfree(clp->cl_name.data); + kfree(clp); +} + +static void +expire_client(struct nfs4_client *clp) +{ + dprintk("NFSD: expire_client\n"); + list_del(&clp->cl_idhash); + list_del(&clp->cl_strhash); + free_client(clp); +} + +static struct nfs4_client * +create_client(struct xdr_netobj name) { + struct nfs4_client *clp; + + if(!(clp = alloc_client(name))) + goto out; + INIT_LIST_HEAD(&clp->cl_idhash); + INIT_LIST_HEAD(&clp->cl_strhash); +out: + return clp; +} + +static void +copy_verf(struct nfs4_client *target, nfs4_verifier source) { + memcpy(&target->cl_verifier, source, sizeof(nfs4_verifier)); +} + +static void +copy_clid(struct nfs4_client *target, struct nfs4_client *source) { + target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; + target->cl_clientid.cl_id = source->cl_clientid.cl_id; +} + +static void +copy_cred(struct svc_cred *target, struct svc_cred *source) { + int i; + + target->cr_uid = source->cr_uid; + target->cr_gid = source->cr_gid; + for(i = 0; i < NGROUPS; i++) + target->cr_groups[i] = source->cr_groups[i]; +} + +static int +cmp_name(struct xdr_netobj *n1, struct xdr_netobj *n2) { + if(!n1 || !n2) + return 0; + return((n1->len == n2->len) && !memcmp(n1->data, n2->data, n2->len)); +} + +static int +cmp_verf(nfs4_verifier v1, nfs4_verifier v2) { + return(!memcmp(v1,v2,sizeof(nfs4_verifier))); +} + +static int +cmp_clid(clientid_t * cl1, clientid_t * cl2) { + return((cl1->cl_boot == cl2->cl_boot) && + (cl1->cl_id == cl2->cl_id)); +} + +/* XXX what about NGROUP */ +static int +cmp_creds(struct svc_cred *cr1, struct svc_cred *cr2){ + return((cr1->cr_uid == cr2->cr_uid) && + (cr1->cr_gid == cr2->cr_gid)); + +} + +static void +gen_clid(struct nfs4_client *clp) { + clp->cl_clientid.cl_boot = boot_time; + clp->cl_clientid.cl_id = current_clientid++; +} + +static void +gen_confirm(struct nfs4_client *clp) { + struct timespec tv; + u32 * p; + + tv = CURRENT_TIME; + p = (u32 *)clp->cl_confirm; + *p++ = tv.tv_sec; + *p++ = tv.tv_nsec; +} + +static int +check_name(struct xdr_netobj name) { + + if (name.len == 0) + return 0; + if (name.len > NFSD4_CLIENT_MAXNAME) { + printk("NFSD: check_name: name too long(%d)!\n", name.len); + return 0; + } + return 1; +} + +void +add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) +{ + unsigned int idhashval; + + list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]); + idhashval = clientid_hashval(clp->cl_clientid.cl_id); + list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]); +} + +void +move_to_confirmed(struct nfs4_client *clp, unsigned int idhashval) +{ + unsigned int strhashval; + + printk("ANDROS: move_to_confirm nfs4_client %p\n", clp); + list_del_init(&clp->cl_strhash); + list_del_init(&clp->cl_idhash); + list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); + strhashval = clientstr_hashval(clp->cl_name.data, + clp->cl_name.len); + list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); +} + +/* + * RFC 3010 has a complex implmentation description of processing a + * SETCLIENTID request consisting of 5 bullets, labeled as + * CASE0 - CASE4 below. + * + * NOTES: + * callback information will be processed in a future patch + * + * an unconfirmed record is added when: + * NORMAL (part of CASE 4): there is no confirmed nor unconfirmed record. + * CASE 1: confirmed record found with matching name, principal, + * verifier, and clientid. + * CASE 2: confirmed record found with matching name, principal, + * and there is no unconfirmed record with matching + * name and principal + * + * an unconfirmed record is replaced when: + * CASE 3: confirmed record found with matching name, principal, + * and an unconfirmed record is found with matching + * name, principal, and with clientid and + * confirm that does not match the confirmed record. + * CASE 4: there is no confirmed record with matching name and + * principal. there is an unconfirmed record with + * matching name, principal. + * + * an unconfirmed record is deleted when: + * CASE 1: an unconfirmed record that matches input name, verifier, + * and confirmed clientid. + * CASE 4: any unconfirmed records with matching name and principal + * that exist after an unconfirmed record has been replaced + * as described above. + * + */ +int +nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid) +{ + u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr; + struct xdr_netobj clname = { + .len = setclid->se_namelen, + .data = setclid->se_name, + }; + char * clverifier = setclid->se_verf; + unsigned int strhashval; + struct nfs4_client * conf, * unconf, * new, * clp; + int status; + struct list_head *pos, *next; + + status = nfserr_inval; + if (!check_name(clname)) + goto out; + + /* + * XXX The Duplicate Request Cache (DRC) has been checked (??) + * We get here on a DRC miss. + */ + + strhashval = clientstr_hashval(clname.data, clname.len); + + conf = NULL; + down(&client_sema); + list_for_each_safe(pos, next, &conf_str_hashtbl[strhashval]) { + clp = list_entry(pos, struct nfs4_client, cl_strhash); + if (!cmp_name(&clp->cl_name, &clname)) + continue; + /* + * CASE 0: + * clname match, confirmed, different principal + * or different ip_address + */ + status = nfserr_clid_inuse; + if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)) { + printk("NFSD: setclientid: string in use by client" + "(clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + goto out; + } + if (clp->cl_addr != ip_addr) { + printk("NFSD: setclientid: string in use by client" + "(clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + goto out; + } + + /* + * cl_name match from a previous SETCLIENTID operation + * XXX check for additional matches? + */ + conf = clp; + break; + } + unconf = NULL; + list_for_each_safe(pos, next, &unconf_str_hashtbl[strhashval]) { + clp = list_entry(pos, struct nfs4_client, cl_strhash); + if (!cmp_name(&clp->cl_name, &clname)) + continue; + /* cl_name match from a previous SETCLIENTID operation */ + unconf = clp; + break; + } + status = nfserr_resource; + if (!conf) { + /* + * CASE 4: + * placed first, because it is the normal case. + */ + if (unconf) + expire_client(unconf); + if (!(new = create_client(clname))) + goto out; + copy_verf(new,clverifier); + new->cl_addr = ip_addr; + copy_cred(&new->cl_cred,&rqstp->rq_cred); + gen_clid(new); + gen_confirm(new); + add_to_unconfirmed(new, strhashval); + } else if (cmp_verf(conf->cl_verifier, clverifier)) { + /* + * CASE 1: + * cl_name match, confirmed, principal match + * verifier match: probable callback update + * + * remove any unconfirmed nfs4_client with + * matching cl_name, cl_verifier, and cl_clientid + * + * create and insert an unconfirmed nfs4_client with same + * cl_name, cl_verifier, and cl_clientid as existing + * nfs4_client, but with the new callback info and a + * new cl_confirm + */ + if ((unconf) && + cmp_verf(unconf->cl_verifier, conf->cl_verifier) && + cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) { + expire_client(unconf); + } + if (!(new = create_client(clname))) + goto out; + copy_verf(new,conf->cl_verifier); + new->cl_addr = ip_addr; + copy_cred(&new->cl_cred,&rqstp->rq_cred); + copy_clid(new, conf); + gen_confirm(new); + add_to_unconfirmed(new,strhashval); + } else if (!unconf) { + /* + * CASE 2: + * clname match, confirmed, principal match + * verfier does not match + * no unconfirmed. create a new unconfirmed nfs4_client + * using input clverifier, clname, and callback info + * and generate a new cl_clientid and cl_confirm. + */ + if (!(new = create_client(clname))) + goto out; + copy_verf(new,clverifier); + new->cl_addr = ip_addr; + copy_cred(&new->cl_cred,&rqstp->rq_cred); + gen_clid(new); + gen_confirm(new); + add_to_unconfirmed(new, strhashval); + } else if (!cmp_clid(&conf->cl_clientid, &unconf->cl_clientid) && + !cmp_verf(conf->cl_confirm, unconf->cl_confirm)) { + /* + * CASE3: + * confirmed found (name, principal match) + * confirmed verifier does not match input clverifier + * + * unconfirmed found (name match) + * confirmed->cl_clientid != unconfirmed->cl_clientid and + * confirmed->cl_confirm != unconfirmed->cl_confirm + * + * remove unconfirmed. + * + * create an unconfirmed nfs4_client + * with same cl_name as existing confirmed nfs4_client, + * but with new callback info, new cl_clientid, + * new cl_verifier and a new cl_confirm + */ + expire_client(unconf); + if (!(new = create_client(clname))) + goto out; + copy_verf(new,clverifier); + new->cl_addr = ip_addr; + copy_cred(&new->cl_cred,&rqstp->rq_cred); + gen_clid(new); + gen_confirm(new); + add_to_unconfirmed(new, strhashval); + } else { + /* No cases hit !!! */ + status = nfserr_inval; + goto out; + + } + setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; + setclid->se_clientid.cl_id = new->cl_clientid.cl_id; + memcpy(&setclid->se_confirm, new->cl_confirm, sizeof(nfs4_verifier)); + printk(KERN_INFO "NFSD: this client will not receive delegations\n"); + status = nfs_ok; +out: + up(&client_sema); + return status; +} + + +/* + * RFC 3010 has a complex implmentation description of processing a + * SETCLIENTID_CONFIRM request consisting of 4 bullets describing + * processing on a DRC miss, labeled as CASE1 - CASE4 below. + * + * NOTE: callback information will be processed here in a future patch + */ +int +nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm) +{ + u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr; + unsigned int idhashval; + struct nfs4_client *clp, *conf = NULL, *unconf = NULL; + char * confirm = setclientid_confirm->sc_confirm; + clientid_t * clid = &setclientid_confirm->sc_clientid; + struct list_head *pos, *next; + int status; + + status = nfserr_stale_clientid; + if (STALE_CLIENTID(clid)) + goto out; + /* + * XXX The Duplicate Request Cache (DRC) has been checked (??) + * We get here on a DRC miss. + */ + + idhashval = clientid_hashval(clid->cl_id); + down(&client_sema); + list_for_each_safe(pos, next, &conf_id_hashtbl[idhashval]) { + clp = list_entry(pos, struct nfs4_client, cl_idhash); + if (!cmp_clid(&clp->cl_clientid, clid)) + continue; + + status = nfserr_inval; + /* + * Found a record for this clientid. If the IP addresses + * don't match, return ERR_INVAL just as if the record had + * not been found. + */ + if (clp->cl_addr != ip_addr) { + printk("NFSD: setclientid: string in use by client" + "(clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + goto out; + } + conf = clp; + break; + } + list_for_each_safe(pos, next, &unconf_id_hashtbl[idhashval]) { + clp = list_entry(pos, struct nfs4_client, cl_idhash); + if (!cmp_clid(&clp->cl_clientid, clid)) + continue; + status = nfserr_inval; + if (clp->cl_addr != ip_addr) { + printk("NFSD: setclientid: string in use by client" + "(clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + goto out; + } + unconf = clp; + break; + } + /* CASE 1: + * unconf record that matches input clientid and input confirm. + * conf record that matches input clientid. + * conf and unconf records match names, verifiers + */ + if ((conf && unconf) && + (cmp_verf(unconf->cl_confirm, confirm)) && + (cmp_verf(conf->cl_verifier, unconf->cl_verifier)) && + (cmp_name(&conf->cl_name,&unconf->cl_name)) && + (!cmp_verf(conf->cl_confirm, unconf->cl_confirm))) { + if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) + status = nfserr_clid_inuse; + else { + expire_client(conf); + move_to_confirmed(unconf, idhashval); + status = nfs_ok; + } + goto out; + } + /* CASE 2: + * conf record that matches input clientid. + * if unconf record that matches input clientid, then unconf->cl_name + * or unconf->cl_verifier don't match the conf record. + */ + if ((conf && !unconf) || + ((conf && unconf) && + (!cmp_verf(conf->cl_verifier, unconf->cl_verifier) || + !cmp_name(&conf->cl_name, &unconf->cl_name)))) { + if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) { + status = nfserr_clid_inuse; + } else { + status = nfs_ok; + } + goto out; + } + /* CASE 3: + * conf record not found. + * unconf record found. + * unconf->cl_confirm matches input confirm + */ + if (!conf && unconf && cmp_verf(unconf->cl_confirm, confirm)) { + if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) { + status = nfserr_clid_inuse; + } else { + status = nfs_ok; + move_to_confirmed(unconf, idhashval); + } + goto out; + } + /* CASE 4: + * conf record not found, or if conf, then conf->cl_confirm does not + * match input confirm. + * unconf record not found, or if unconf, then unconf->cl_confirm + * does not match input confirm. + */ + if ((!conf || (conf && !cmp_verf(conf->cl_confirm, confirm))) && + (!unconf || (unconf && !cmp_verf(unconf->cl_confirm, confirm)))) { + status = nfserr_stale_clientid; + goto out; + } + /* check that we have hit one of the cases...*/ + status = nfserr_inval; + goto out; +out: + /* XXX if status == nfs_ok, probe callback path */ + up(&client_sema); + return status; +} + +void +nfs4_state_init(void) +{ + struct timespec tv; + int i; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + INIT_LIST_HEAD(&conf_id_hashtbl[i]); + INIT_LIST_HEAD(&conf_str_hashtbl[i]); + INIT_LIST_HEAD(&unconf_str_hashtbl[i]); + INIT_LIST_HEAD(&unconf_id_hashtbl[i]); + } + init_MUTEX(&client_sema); + tv = CURRENT_TIME; + boot_time = tv.tv_sec; +} + +static void +__nfs4_state_shutdown(void) +{ + int i; + struct nfs4_client *clp = NULL; + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&conf_id_hashtbl[i])) { + clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); + expire_client(clp); + } + while (!list_empty(&unconf_str_hashtbl[i])) { + clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash); + expire_client(clp); + } + } +} + +void +nfs4_state_shutdown(void) +{ + down(&client_sema); + __nfs4_state_shutdown(); + up(&client_sema); +} diff -urN linux-2.5.67-bk4/fs/nfsd/nfsctl.c linux-2.5.67-bk5/fs/nfsd/nfsctl.c --- linux-2.5.67-bk4/fs/nfsd/nfsctl.c 2003-04-07 10:30:44.000000000 -0700 +++ linux-2.5.67-bk5/fs/nfsd/nfsctl.c 2003-04-13 04:36:10.000000000 -0700 @@ -512,6 +512,7 @@ nfsd_cache_init(); /* RPC reply cache */ nfsd_export_init(); /* Exports table */ nfsd_lockd_init(); /* lockd->nfsd callbacks */ + nfs4_state_init(); /* NFSv4 State */ if (proc_mkdir("fs/nfs", 0)) { struct proc_dir_entry *entry; entry = create_proc_entry("fs/nfs/exports", 0, NULL); @@ -530,6 +531,7 @@ remove_proc_entry("fs/nfs", NULL); nfsd_stat_shutdown(); nfsd_lockd_shutdown(); + nfs4_state_shutdown(); unregister_filesystem(&nfsd_fs_type); } diff -urN linux-2.5.67-bk4/fs/nfsd/vfs.c linux-2.5.67-bk5/fs/nfsd/vfs.c --- linux-2.5.67-bk4/fs/nfsd/vfs.c 2003-04-07 10:31:04.000000000 -0700 +++ linux-2.5.67-bk5/fs/nfsd/vfs.c 2003-04-13 04:36:10.000000000 -0700 @@ -1568,13 +1568,11 @@ inode->i_uid == current->fsuid) return 0; - acc &= ~ MAY_OWNER_OVERRIDE; /* This bit is no longer needed, - and gets in the way later */ - err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC)); /* Allow read access to binaries even when mode 111 */ - if (err == -EACCES && S_ISREG(inode->i_mode) && acc == MAY_READ) + if (err == -EACCES && S_ISREG(inode->i_mode) && + acc == (MAY_READ | MAY_OWNER_OVERRIDE)) err = permission(inode, MAY_EXEC); return err? nfserrno(err) : 0; diff -urN linux-2.5.67-bk4/fs/open.c linux-2.5.67-bk5/fs/open.c --- linux-2.5.67-bk4/fs/open.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/fs/open.c 2003-04-13 04:36:10.000000000 -0700 @@ -702,7 +702,7 @@ int fd, error; error = -EMFILE; - write_lock(&files->file_lock); + spin_lock(&files->file_lock); repeat: fd = find_next_zero_bit(files->open_fds->fds_bits, @@ -751,7 +751,7 @@ error = fd; out: - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); return error; } @@ -765,9 +765,9 @@ void put_unused_fd(unsigned int fd) { struct files_struct *files = current->files; - write_lock(&files->file_lock); + spin_lock(&files->file_lock); __put_unused_fd(files, fd); - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); } /* @@ -786,11 +786,11 @@ void fd_install(unsigned int fd, struct file * file) { struct files_struct *files = current->files; - write_lock(&files->file_lock); + spin_lock(&files->file_lock); if (unlikely(files->fd[fd] != NULL)) BUG(); files->fd[fd] = file; - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); } asmlinkage long sys_open(const char __user * filename, int flags, int mode) @@ -870,7 +870,7 @@ struct file * filp; struct files_struct *files = current->files; - write_lock(&files->file_lock); + spin_lock(&files->file_lock); if (fd >= files->max_fds) goto out_unlock; filp = files->fd[fd]; @@ -879,11 +879,11 @@ files->fd[fd] = NULL; FD_CLR(fd, files->close_on_exec); __put_unused_fd(files, fd); - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); return filp_close(filp, files); out_unlock: - write_unlock(&files->file_lock); + spin_unlock(&files->file_lock); return -EBADF; } diff -urN linux-2.5.67-bk4/fs/proc/base.c linux-2.5.67-bk5/fs/proc/base.c --- linux-2.5.67-bk4/fs/proc/base.c 2003-04-07 10:31:51.000000000 -0700 +++ linux-2.5.67-bk5/fs/proc/base.c 2003-04-13 04:36:10.000000000 -0700 @@ -117,16 +117,16 @@ atomic_inc(&files->count); task_unlock(task); if (files) { - read_lock(&files->file_lock); + spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { *mnt = mntget(file->f_vfsmnt); *dentry = dget(file->f_dentry); - read_unlock(&files->file_lock); + spin_unlock(&files->file_lock); put_files_struct(files); return 0; } - read_unlock(&files->file_lock); + spin_unlock(&files->file_lock); put_files_struct(files); } return -ENOENT; @@ -655,7 +655,7 @@ task_unlock(p); if (!files) goto out; - read_lock(&files->file_lock); + spin_lock(&files->file_lock); for (fd = filp->f_pos-2; fd < files->max_fds; fd++, filp->f_pos++) { @@ -663,7 +663,7 @@ if (!fcheck_files(files, fd)) continue; - read_unlock(&files->file_lock); + spin_unlock(&files->file_lock); j = NUMBUF; i = fd; @@ -675,12 +675,12 @@ ino = fake_ino(pid, PROC_PID_FD_DIR + fd); if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { - read_lock(&files->file_lock); + spin_lock(&files->file_lock); break; } - read_lock(&files->file_lock); + spin_lock(&files->file_lock); } - read_unlock(&files->file_lock); + spin_unlock(&files->file_lock); put_files_struct(files); } out: @@ -824,13 +824,13 @@ atomic_inc(&files->count); task_unlock(task); if (files) { - read_lock(&files->file_lock); + spin_lock(&files->file_lock); if (fcheck_files(files, fd)) { - read_unlock(&files->file_lock); + spin_unlock(&files->file_lock); put_files_struct(files); return 1; } - read_unlock(&files->file_lock); + spin_unlock(&files->file_lock); put_files_struct(files); } d_drop(dentry); @@ -920,7 +920,7 @@ if (!files) goto out_unlock; inode->i_mode = S_IFLNK; - read_lock(&files->file_lock); + spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (!file) goto out_unlock2; @@ -928,7 +928,7 @@ inode->i_mode |= S_IRUSR | S_IXUSR; if (file->f_mode & 2) inode->i_mode |= S_IWUSR | S_IXUSR; - read_unlock(&files->file_lock); + spin_unlock(&files->file_lock); put_files_struct(files); inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; @@ -940,7 +940,7 @@ return NULL; out_unlock2: - read_unlock(&files->file_lock); + spin_unlock(&files->file_lock); put_files_struct(files); out_unlock: iput(inode); diff -urN linux-2.5.67-bk4/fs/proc/proc_misc.c linux-2.5.67-bk5/fs/proc/proc_misc.c --- linux-2.5.67-bk4/fs/proc/proc_misc.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/fs/proc/proc_misc.c 2003-04-13 04:36:10.000000000 -0700 @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -98,6 +99,41 @@ return proc_calc_metrics(page, start, off, count, eof, len); } +struct vmalloc_info { + unsigned long used; + unsigned long largest_chunk; +}; + +static struct vmalloc_info get_vmalloc_info(void) +{ + unsigned long prev_end = VMALLOC_START; + struct vm_struct* vma; + struct vmalloc_info vmi; + vmi.used = 0; + + read_lock(&vmlist_lock); + + if(!vmlist) + vmi.largest_chunk = (VMALLOC_END-VMALLOC_START); + else + vmi.largest_chunk = 0; + + for (vma = vmlist; vma; vma = vma->next) { + unsigned long free_area_size = + (unsigned long)vma->addr - prev_end; + vmi.used += vma->size; + if (vmi.largest_chunk < free_area_size ) + + vmi.largest_chunk = free_area_size; + prev_end = vma->size + (unsigned long)vma->addr; + } + if(VMALLOC_END-prev_end > vmi.largest_chunk) + vmi.largest_chunk = VMALLOC_END-prev_end; + + read_unlock(&vmlist_lock); + return vmi; +} + static int uptime_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -143,6 +179,8 @@ unsigned long inactive; unsigned long active; unsigned long free; + unsigned long vmtot; + struct vmalloc_info vmi; get_page_state(&ps); get_zone_counts(&active, &inactive, &free); @@ -155,6 +193,11 @@ si_swapinfo(&i); committed = atomic_read(&vm_committed_space); + vmtot = (VMALLOC_END-VMALLOC_START)>>10; + vmi = get_vmalloc_info(); + vmi.used >>= 10; + vmi.largest_chunk >>= 10; + /* * Tagged format, for easy grepping and expansion. */ @@ -177,7 +220,10 @@ "Mapped: %8lu kB\n" "Slab: %8lu kB\n" "Committed_AS: %8u kB\n" - "PageTables: %8lu kB\n", + "PageTables: %8lu kB\n" + "VmallocTotal: %8lu kB\n" + "VmallocUsed: %8lu kB\n" + "VmallocChunk: %8lu kB\n", K(i.totalram), K(i.freeram), K(i.bufferram), @@ -196,7 +242,10 @@ K(ps.nr_mapped), K(ps.nr_slab), K(committed), - K(ps.nr_page_table_pages) + K(ps.nr_page_table_pages), + vmtot, + vmi.used, + vmi.largest_chunk ); len += hugetlb_report_meminfo(page + len); @@ -386,7 +435,7 @@ extern int show_interrupts(struct seq_file *p, void *v); static int interrupts_open(struct inode *inode, struct file *file) { - unsigned size = PAGE_SIZE * (1 + NR_CPUS / 8); + unsigned size = 4096 * (1 + num_online_cpus() / 8); char *buf = kmalloc(size, GFP_KERNEL); struct seq_file *m; int res; diff -urN linux-2.5.67-bk4/fs/select.c linux-2.5.67-bk5/fs/select.c --- linux-2.5.67-bk4/fs/select.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/fs/select.c 2003-04-13 04:36:10.000000000 -0700 @@ -179,9 +179,9 @@ int retval, i, off; long __timeout = *timeout; - read_lock(¤t->files->file_lock); + spin_lock(¤t->files->file_lock); retval = max_select_fd(n, fds); - read_unlock(¤t->files->file_lock); + spin_unlock(¤t->files->file_lock); if (retval < 0) return retval; diff -urN linux-2.5.67-bk4/include/asm-alpha/bitops.h linux-2.5.67-bk5/include/asm-alpha/bitops.h --- linux-2.5.67-bk4/include/asm-alpha/bitops.h 2003-04-07 10:31:20.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-alpha/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -487,7 +487,9 @@ #define ext2_set_bit __test_and_set_bit +#define ext2_set_bit_atomic(l,n,a) test_and_set_bit(n,a) #define ext2_clear_bit __test_and_clear_bit +#define ext2_clear_bit_atomic(l,n,a) test_and_clear_bit(n,a) #define ext2_test_bit test_bit #define ext2_find_first_zero_bit find_first_zero_bit #define ext2_find_next_zero_bit find_next_zero_bit diff -urN linux-2.5.67-bk4/include/asm-alpha/cacheflush.h linux-2.5.67-bk5/include/asm-alpha/cacheflush.h --- linux-2.5.67-bk4/include/asm-alpha/cacheflush.h 2003-04-07 10:31:59.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-alpha/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -9,7 +9,6 @@ #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) #define flush_dcache_page(page) do { } while (0) /* Note that the following two definitions are _highly_ dependent diff -urN linux-2.5.67-bk4/include/asm-arm/bitops.h linux-2.5.67-bk5/include/asm-arm/bitops.h --- linux-2.5.67-bk4/include/asm-arm/bitops.h 2003-04-07 10:30:35.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-arm/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -357,8 +357,12 @@ */ #define ext2_set_bit(nr,p) \ __test_and_set_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p)) +#define ext2_set_bit_atomic(lock,nr,p) \ + test_and_set_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p)) #define ext2_clear_bit(nr,p) \ __test_and_clear_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p)) +#define ext2_clear_bit_atomic(lock,nr,p) \ + test_and_clear_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p)) #define ext2_test_bit(nr,p) \ __test_bit(WORD_BITOFF_TO_LE(nr), (unsigned long *)(p)) #define ext2_find_first_zero_bit(p,sz) \ diff -urN linux-2.5.67-bk4/include/asm-arm/proc-armo/cache.h linux-2.5.67-bk5/include/asm-arm/proc-armo/cache.h --- linux-2.5.67-bk4/include/asm-arm/proc-armo/cache.h 2003-04-07 10:30:44.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-arm/proc-armo/cache.h 2003-04-13 04:36:10.000000000 -0700 @@ -13,7 +13,6 @@ #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma,start,end) do { } while (0) #define flush_cache_page(vma,vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) #define invalidate_dcache_range(start,end) do { } while (0) #define clean_dcache_range(start,end) do { } while (0) diff -urN linux-2.5.67-bk4/include/asm-arm/proc-armv/cache.h linux-2.5.67-bk5/include/asm-arm/proc-armv/cache.h --- linux-2.5.67-bk4/include/asm-arm/proc-armv/cache.h 2003-04-07 10:31:14.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-arm/proc-armv/cache.h 2003-04-13 04:36:10.000000000 -0700 @@ -71,13 +71,6 @@ ((unsigned long)start) + size, 0); /* - * This is an obsolete interface; the functionality that was provided by this - * function is now merged into our flush_dcache_page, flush_icache_page, - * copy_user_page and clear_user_page functions. - */ -#define flush_page_to_ram(page) do { } while (0) - -/* * flush_dcache_page is used when the kernel has written to the page * cache page at virtual address page->virtual. * diff -urN linux-2.5.67-bk4/include/asm-cris/bitops.h linux-2.5.67-bk5/include/asm-cris/bitops.h --- linux-2.5.67-bk4/include/asm-cris/bitops.h 2003-04-07 10:32:50.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-cris/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -360,7 +360,9 @@ #define hweight8(x) generic_hweight8(x) #define ext2_set_bit test_and_set_bit +#define ext2_set_bit_atomic(l,n,a) test_and_set_bit(n,a) #define ext2_clear_bit test_and_clear_bit +#define ext2_clear_bit_atomic(l,n,a) test_and_clear_bit(n,a) #define ext2_test_bit test_bit #define ext2_find_first_zero_bit find_first_zero_bit #define ext2_find_next_zero_bit find_next_zero_bit diff -urN linux-2.5.67-bk4/include/asm-cris/pgtable.h linux-2.5.67-bk5/include/asm-cris/pgtable.h --- linux-2.5.67-bk4/include/asm-cris/pgtable.h 2003-04-07 10:30:34.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-cris/pgtable.h 2003-04-13 04:36:10.000000000 -0700 @@ -121,7 +121,6 @@ #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) #define flush_dcache_page(page) do { } while (0) #define flush_icache_range(start, end) do { } while (0) #define flush_icache_page(vma,pg) do { } while (0) diff -urN linux-2.5.67-bk4/include/asm-generic/rtc.h linux-2.5.67-bk5/include/asm-generic/rtc.h --- linux-2.5.67-bk4/include/asm-generic/rtc.h 2003-04-07 10:31:12.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-generic/rtc.h 2003-04-13 04:36:10.000000000 -0700 @@ -22,9 +22,8 @@ #define RTC_AIE 0x20 /* alarm interrupt enable */ #define RTC_UIE 0x10 /* update-finished interrupt enable */ -extern void gen_rtc_interrupt(unsigned long); - /* some dummy definitions */ +#define RTC_BATT_BAD 0x100 /* battery bad */ #define RTC_SQWE 0x08 /* enable square-wave output */ #define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */ #define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */ @@ -43,7 +42,7 @@ return uip; } -static inline void get_rtc_time(struct rtc_time *time) +static inline unsigned int get_rtc_time(struct rtc_time *time) { unsigned long uip_watchdog = jiffies; unsigned char ctrl; @@ -108,6 +107,8 @@ time->tm_year += 100; time->tm_mon--; + + return RTC_24H; } /* Set the current date and time in the real time clock. */ diff -urN linux-2.5.67-bk4/include/asm-i386/bitops.h linux-2.5.67-bk5/include/asm-i386/bitops.h --- linux-2.5.67-bk4/include/asm-i386/bitops.h 2003-04-07 10:31:55.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-i386/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -479,8 +479,12 @@ #define ext2_set_bit(nr,addr) \ __test_and_set_bit((nr),(unsigned long*)addr) +#define ext2_set_bit_atomic(lock,nr,addr) \ + test_and_set_bit((nr),(unsigned long*)addr) #define ext2_clear_bit(nr, addr) \ __test_and_clear_bit((nr),(unsigned long*)addr) +#define ext2_clear_bit_atomic(lock,nr, addr) \ + test_and_clear_bit((nr),(unsigned long*)addr) #define ext2_test_bit(nr, addr) test_bit((nr),(unsigned long*)addr) #define ext2_find_first_zero_bit(addr, size) \ find_first_zero_bit((unsigned long*)addr, size) diff -urN linux-2.5.67-bk4/include/asm-i386/cacheflush.h linux-2.5.67-bk5/include/asm-i386/cacheflush.h --- linux-2.5.67-bk4/include/asm-i386/cacheflush.h 2003-04-07 10:31:24.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-i386/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -9,7 +9,6 @@ #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) #define flush_dcache_page(page) do { } while (0) #define flush_icache_range(start, end) do { } while (0) #define flush_icache_page(vma,pg) do { } while (0) diff -urN linux-2.5.67-bk4/include/asm-ia64/bitops.h linux-2.5.67-bk5/include/asm-ia64/bitops.h --- linux-2.5.67-bk4/include/asm-ia64/bitops.h 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-ia64/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -453,7 +453,9 @@ #define __clear_bit(nr, addr) clear_bit(nr, addr) #define ext2_set_bit test_and_set_bit +#define ext2_set_atomic(l,n,a) test_and_set_bit(n,a) #define ext2_clear_bit test_and_clear_bit +#define ext2_clear_atomic(l,n,a) test_and_clear_bit(n,a) #define ext2_test_bit test_bit #define ext2_find_first_zero_bit find_first_zero_bit #define ext2_find_next_zero_bit find_next_zero_bit diff -urN linux-2.5.67-bk4/include/asm-ia64/cacheflush.h linux-2.5.67-bk5/include/asm-ia64/cacheflush.h --- linux-2.5.67-bk4/include/asm-ia64/cacheflush.h 2003-04-07 10:30:45.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-ia64/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -20,7 +20,6 @@ #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) #define flush_icache_page(vma,page) do { } while (0) #define flush_dcache_page(page) \ diff -urN linux-2.5.67-bk4/include/asm-m68k/bitops.h linux-2.5.67-bk5/include/asm-m68k/bitops.h --- linux-2.5.67-bk4/include/asm-m68k/bitops.h 2003-04-07 10:33:04.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-m68k/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -365,6 +365,24 @@ return retval; } +#define ext2_set_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_set_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + +#define ext2_clear_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_clear_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + extern __inline__ int ext2_test_bit (int nr, const volatile void *vaddr) { diff -urN linux-2.5.67-bk4/include/asm-m68k/cacheflush.h linux-2.5.67-bk5/include/asm-m68k/cacheflush.h --- linux-2.5.67-bk4/include/asm-m68k/cacheflush.h 2003-04-07 10:31:55.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-m68k/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -106,7 +106,6 @@ /* Push the page at kernel virtual address and clear the icache */ /* RZ: use cpush %bc instead of cpush %dc, cinv %ic */ -#define flush_page_to_ram(page) __flush_page_to_ram(page_address(page)) extern inline void __flush_page_to_ram(void *vaddr) { if (CPU_IS_040_OR_060) { @@ -125,7 +124,7 @@ } } -#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_page(page) __flush_page_to_ram(page_address(page)) #define flush_icache_page(vma,pg) do { } while (0) #define flush_icache_user_range(vma,pg,adr,len) do { } while (0) diff -urN linux-2.5.67-bk4/include/asm-m68k/page.h linux-2.5.67-bk5/include/asm-m68k/page.h --- linux-2.5.67-bk4/include/asm-m68k/page.h 2003-04-07 10:33:02.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-m68k/page.h 2003-04-13 04:36:10.000000000 -0700 @@ -79,8 +79,14 @@ #define copy_page(to,from) memcpy((to), (from), PAGE_SIZE) #endif -#define clear_user_page(page, vaddr, pg) clear_page(page) -#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) +#define clear_user_page(addr, vaddr, page) \ + do { clear_page(addr); \ + flush_dcache_page(page); \ + } while (0) +#define copy_user_page(to, from, vaddr, page) \ + do { copy_page(to, from); \ + flush_dcache_page(page); \ + } while (0) /* * These are used to make use of C type-checking.. diff -urN linux-2.5.67-bk4/include/asm-m68knommu/bitops.h linux-2.5.67-bk5/include/asm-m68knommu/bitops.h --- linux-2.5.67-bk4/include/asm-m68knommu/bitops.h 2003-04-07 10:30:42.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-m68knommu/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -402,6 +402,24 @@ return retval; } +#define ext2_set_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_set_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + +#define ext2_clear_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_clear_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + extern __inline__ int ext2_test_bit(int nr, const volatile void * addr) { int mask; diff -urN linux-2.5.67-bk4/include/asm-m68knommu/cacheflush.h linux-2.5.67-bk5/include/asm-m68knommu/cacheflush.h --- linux-2.5.67-bk4/include/asm-m68knommu/cacheflush.h 2003-04-07 10:32:49.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-m68knommu/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -10,7 +10,6 @@ #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) #define flush_dcache_range(start,len) do { } while (0) #define flush_dcache_page(page) do { } while (0) #define flush_icache_range(start,len) __flush_cache_all() diff -urN linux-2.5.67-bk4/include/asm-mips/bitops.h linux-2.5.67-bk5/include/asm-mips/bitops.h --- linux-2.5.67-bk4/include/asm-mips/bitops.h 2003-04-07 10:30:40.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-mips/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -824,6 +824,24 @@ return retval; } +#define ext2_set_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_set_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + +#define ext2_clear_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_clear_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + extern __inline__ int ext2_test_bit(int nr, const void * addr) { int mask; @@ -890,7 +908,9 @@ /* Native ext2 byte ordering, just collapse using defines. */ #define ext2_set_bit(nr, addr) test_and_set_bit((nr), (addr)) +#define ext2_set_bit_atomic(lock, nr, addr) test_and_set_bit((nr), (addr)) #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr), (addr)) +#define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr), (addr)) #define ext2_test_bit(nr, addr) test_bit((nr), (addr)) #define ext2_find_first_zero_bit(addr, size) find_first_zero_bit((addr), (size)) #define ext2_find_next_zero_bit(addr, size, offset) \ diff -urN linux-2.5.67-bk4/include/asm-mips/page.h linux-2.5.67-bk5/include/asm-mips/page.h --- linux-2.5.67-bk4/include/asm-mips/page.h 2003-04-07 10:31:04.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-mips/page.h 2003-04-13 04:36:10.000000000 -0700 @@ -25,8 +25,15 @@ #define clear_page(page) _clear_page(page) #define copy_page(to, from) _copy_page(to, from) -#define clear_user_page(page, vaddr) clear_page(page) -#define copy_user_page(to, from, vaddr) copy_page(to, from) + +#define clear_user_page(addr, vaddr, page) \ + do { clear_page(addr); \ + flush_dcache_page(page); \ + } while (0) +#define copy_user_page(to, from, vaddr, page) \ + do { copy_page(to, from); \ + flush_dcache_page(page); \ + } while (0) /* * These are used to make use of C type-checking.. diff -urN linux-2.5.67-bk4/include/asm-mips/pgtable.h linux-2.5.67-bk5/include/asm-mips/pgtable.h --- linux-2.5.67-bk4/include/asm-mips/pgtable.h 2003-04-07 10:30:59.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-mips/pgtable.h 2003-04-13 04:36:10.000000000 -0700 @@ -24,7 +24,6 @@ * - flush_cache_mm(mm) flushes the specified mm context's cache lines * - flush_cache_page(mm, vmaddr) flushes a single page * - flush_cache_range(vma, start, end) flushes a range of pages - * - flush_page_to_ram(page) write back kernel page to ram * - flush_icache_range(start, end) flush a range of instructions */ extern void (*_flush_cache_all)(void); @@ -39,15 +38,13 @@ extern void (*_flush_icache_page)(struct vm_area_struct *vma, struct page *page); -#define flush_dcache_page(page) do { } while (0) - #define flush_cache_all() _flush_cache_all() #define __flush_cache_all() ___flush_cache_all() #define flush_cache_mm(mm) _flush_cache_mm(mm) #define flush_cache_range(vma,start,end) _flush_cache_range(vma,start,end) #define flush_cache_page(vma,page) _flush_cache_page(vma, page) #define flush_cache_sigtramp(addr) _flush_cache_sigtramp(addr) -#define flush_page_to_ram(page) _flush_page_to_ram(page) +#define flush_dcache_page(page) _flush_page_to_ram(page) #define flush_icache_range(start, end) _flush_icache_range(start,end) #define flush_icache_page(vma, page) _flush_icache_page(vma, page) diff -urN linux-2.5.67-bk4/include/asm-mips64/bitops.h linux-2.5.67-bk5/include/asm-mips64/bitops.h --- linux-2.5.67-bk4/include/asm-mips64/bitops.h 2003-04-07 10:32:28.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-mips64/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -531,6 +531,24 @@ return retval; } +#define ext2_set_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_set_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + +#define ext2_clear_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_clear_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + extern inline int ext2_test_bit(int nr, const void * addr) { @@ -599,7 +617,9 @@ /* Native ext2 byte ordering, just collapse using defines. */ #define ext2_set_bit(nr, addr) test_and_set_bit((nr), (addr)) +#define ext2_set_bit_atomic(lock, nr, addr) test_and_set_bit((nr), (addr)) #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr), (addr)) +#define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr), (addr)) #define ext2_test_bit(nr, addr) test_bit((nr), (addr)) #define ext2_find_first_zero_bit(addr, size) find_first_zero_bit((addr), (size)) #define ext2_find_next_zero_bit(addr, size, offset) \ diff -urN linux-2.5.67-bk4/include/asm-mips64/page.h linux-2.5.67-bk5/include/asm-mips64/page.h --- linux-2.5.67-bk4/include/asm-mips64/page.h 2003-04-07 10:30:34.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-mips64/page.h 2003-04-13 04:36:10.000000000 -0700 @@ -25,8 +25,15 @@ #define clear_page(page) _clear_page(page) #define copy_page(to, from) _copy_page(to, from) -#define clear_user_page(page, vaddr) clear_page(page) -#define copy_user_page(to, from, vaddr) copy_page(to, from) + +#define clear_user_page(addr, vaddr, page) \ + do { clear_page(addr); \ + flush_dcache_page(page); \ + } while (0) +#define copy_user_page(to, from, vaddr, page) \ + do { copy_page(to, from); \ + flush_dcache_page(page); \ + } while (0) /* * These are used to make use of C type-checking.. diff -urN linux-2.5.67-bk4/include/asm-mips64/pgtable.h linux-2.5.67-bk5/include/asm-mips64/pgtable.h --- linux-2.5.67-bk4/include/asm-mips64/pgtable.h 2003-04-07 10:32:55.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-mips64/pgtable.h 2003-04-13 04:36:10.000000000 -0700 @@ -25,7 +25,6 @@ * - flush_cache_mm(mm) flushes the specified mm context's cache lines * - flush_cache_page(mm, vmaddr) flushes a single page * - flush_cache_range(vma, start, end) flushes a range of pages - * - flush_page_to_ram(page) write back kernel page to ram */ extern void (*_flush_cache_mm)(struct mm_struct *mm); extern void (*_flush_cache_range)(struct vm_area_struct *vma, unsigned long start, @@ -34,14 +33,12 @@ extern void (*_flush_page_to_ram)(struct page * page); #define flush_cache_all() do { } while(0) -#define flush_dcache_page(page) do { } while (0) #ifndef CONFIG_CPU_R10000 #define flush_cache_mm(mm) _flush_cache_mm(mm) #define flush_cache_range(vma,start,end) _flush_cache_range(vma,start,end) #define flush_cache_page(vma,page) _flush_cache_page(vma, page) -#define flush_page_to_ram(page) _flush_page_to_ram(page) - +#define flush_dcache_page(page) _flush_page_to_ram(page) #define flush_icache_range(start, end) _flush_cache_l1() #define flush_icache_user_range(vma, page, addr, len) \ flush_icache_page((vma), (page)) @@ -66,7 +63,7 @@ #define flush_cache_mm(mm) do { } while(0) #define flush_cache_range(vma,start,end) do { } while(0) #define flush_cache_page(vma,page) do { } while(0) -#define flush_page_to_ram(page) do { } while(0) +#define flush_dcache_page(page) do { } while(0) #define flush_icache_range(start, end) _flush_cache_l1() #define flush_icache_user_range(vma, page, addr, len) \ flush_icache_page((vma), (page)) diff -urN linux-2.5.67-bk4/include/asm-parisc/bitops.h linux-2.5.67-bk5/include/asm-parisc/bitops.h --- linux-2.5.67-bk4/include/asm-parisc/bitops.h 2003-04-07 10:30:34.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-parisc/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -389,10 +389,14 @@ */ #ifdef __LP64__ #define ext2_set_bit(nr, addr) test_and_set_bit((nr) ^ 0x38, addr) +#define ext2_set_bit_atomic(l,nr,addr) test_and_set_bit((nr) ^ 0x38, addr) #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr) ^ 0x38, addr) +#define ext2_clear_bit_atomic(l,nr,addr) test_and_clear_bit((nr) ^ 0x38, addr) #else #define ext2_set_bit(nr, addr) test_and_set_bit((nr) ^ 0x18, addr) +#define ext2_set_bit_atomic(l,nr,addr) test_and_set_bit((nr) ^ 0x18, addr) #define ext2_clear_bit(nr, addr) test_and_clear_bit((nr) ^ 0x18, addr) +#define ext2_clear_bit_atomic(l,nr,addr) test_and_clear_bit((nr) ^ 0x18, addr) #endif #endif /* __KERNEL__ */ diff -urN linux-2.5.67-bk4/include/asm-parisc/cacheflush.h linux-2.5.67-bk5/include/asm-parisc/cacheflush.h --- linux-2.5.67-bk4/include/asm-parisc/cacheflush.h 2003-04-07 10:32:52.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-parisc/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -18,11 +18,6 @@ #define flush_kernel_dcache_range(start,size) \ flush_kernel_dcache_range_asm((start), (start)+(size)); -static inline void -flush_page_to_ram(struct page *page) -{ -} - extern void flush_cache_all_local(void); static inline void cacheflush_h_tmp_function(void *dummy) diff -urN linux-2.5.67-bk4/include/asm-parisc/rtc.h linux-2.5.67-bk5/include/asm-parisc/rtc.h --- linux-2.5.67-bk4/include/asm-parisc/rtc.h 2003-04-07 10:31:09.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-parisc/rtc.h 2003-04-13 04:36:10.000000000 -0700 @@ -24,7 +24,7 @@ #define RTC_AIE 0x20 /* alarm interrupt enable */ #define RTC_UIE 0x10 /* update-finished interrupt enable */ -extern void gen_rtc_interrupt(unsigned long); +#define RTC_BATT_BAD 0x100 /* battery bad */ /* some dummy definitions */ #define RTC_SQWE 0x08 /* enable square-wave output */ @@ -44,16 +44,16 @@ { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } }; -static int get_rtc_time(struct rtc_time *wtime) +static inline unsigned int get_rtc_time(struct rtc_time *wtime) { struct pdc_tod tod_data; long int days, rem, y; const unsigned short int *ip; if(pdc_tod_read(&tod_data) < 0) - return -1; + return RTC_24H | RTC_BATT_BAD; + - // most of the remainder of this function is: // Copyright (C) 1991, 1993, 1997, 1998 Free Software Foundation, Inc. // This was originally a part of the GNU C Library. @@ -69,7 +69,7 @@ wtime->tm_sec = rem % 60; y = 1970; - + #define DIV(a, b) ((a) / (b) - ((a) % (b) < 0)) #define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400)) @@ -92,8 +92,8 @@ days -= ip[y]; wtime->tm_mon = y; wtime->tm_mday = days + 1; - - return 0; + + return RTC_24H; } static int set_rtc_time(struct rtc_time *wtime) diff -urN linux-2.5.67-bk4/include/asm-ppc/bitops.h linux-2.5.67-bk5/include/asm-ppc/bitops.h --- linux-2.5.67-bk4/include/asm-ppc/bitops.h 2003-04-07 10:31:20.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-ppc/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -392,7 +392,9 @@ #define ext2_set_bit(nr, addr) __test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr)) +#define ext2_set_bit_atomic(lock, nr, addr) test_and_set_bit((nr) ^ 0x18, (unsigned long *)(addr)) #define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr)) +#define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr) ^ 0x18, (unsigned long *)(addr)) static __inline__ int ext2_test_bit(int nr, __const__ void * addr) { diff -urN linux-2.5.67-bk4/include/asm-ppc/cacheflush.h linux-2.5.67-bk5/include/asm-ppc/cacheflush.h --- linux-2.5.67-bk4/include/asm-ppc/cacheflush.h 2003-04-07 10:32:18.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-ppc/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -23,7 +23,6 @@ #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma, a, b) do { } while (0) #define flush_cache_page(vma, p) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) #define flush_icache_page(vma, page) do { } while (0) extern void flush_dcache_page(struct page *page); diff -urN linux-2.5.67-bk4/include/asm-ppc64/bitops.h linux-2.5.67-bk5/include/asm-ppc64/bitops.h --- linux-2.5.67-bk4/include/asm-ppc64/bitops.h 2003-04-07 10:32:20.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-ppc64/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -338,6 +338,25 @@ __test_and_set_le_bit((nr),(unsigned long*)addr) #define ext2_clear_bit(nr, addr) \ __test_and_clear_le_bit((nr),(unsigned long*)addr) + +#define ext2_set_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_set_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + +#define ext2_clear_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_clear_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + #define ext2_test_bit(nr, addr) test_le_bit((nr),(unsigned long*)addr) #define ext2_find_first_zero_bit(addr, size) \ find_first_zero_le_bit((unsigned long*)addr, size) diff -urN linux-2.5.67-bk4/include/asm-ppc64/cacheflush.h linux-2.5.67-bk5/include/asm-ppc64/cacheflush.h --- linux-2.5.67-bk4/include/asm-ppc64/cacheflush.h 2003-04-07 10:30:39.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-ppc64/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -13,7 +13,6 @@ #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) #define flush_icache_page(vma, page) do { } while (0) extern void flush_dcache_page(struct page *page); diff -urN linux-2.5.67-bk4/include/asm-s390/bitops.h linux-2.5.67-bk5/include/asm-s390/bitops.h --- linux-2.5.67-bk4/include/asm-s390/bitops.h 2003-04-07 10:30:35.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-s390/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -805,8 +805,12 @@ #define ext2_set_bit(nr, addr) \ test_and_set_bit((nr)^24, (unsigned long *)addr) +#define ext2_set_bit_atomic(lock, nr, addr) \ + test_and_set_bit((nr)^24, (unsigned long *)addr) #define ext2_clear_bit(nr, addr) \ test_and_clear_bit((nr)^24, (unsigned long *)addr) +#define ext2_clear_bit_atomic(lock, nr, addr) \ + test_and_clear_bit((nr)^24, (unsigned long *)addr) #define ext2_test_bit(nr, addr) \ test_bit((nr)^24, (unsigned long *)addr) diff -urN linux-2.5.67-bk4/include/asm-s390/cacheflush.h linux-2.5.67-bk5/include/asm-s390/cacheflush.h --- linux-2.5.67-bk4/include/asm-s390/cacheflush.h 2003-04-07 10:33:02.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-s390/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -9,7 +9,6 @@ #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) #define flush_dcache_page(page) do { } while (0) #define flush_icache_range(start, end) do { } while (0) #define flush_icache_page(vma,pg) do { } while (0) diff -urN linux-2.5.67-bk4/include/asm-s390x/bitops.h linux-2.5.67-bk5/include/asm-s390x/bitops.h --- linux-2.5.67-bk4/include/asm-s390x/bitops.h 2003-04-07 10:32:51.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-s390x/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -838,8 +838,12 @@ #define ext2_set_bit(nr, addr) \ test_and_set_bit((nr)^56, (unsigned long *)addr) +#define ext2_set_bit_atomic(lock, nr, addr) \ + test_and_set_bit((nr)^56, (unsigned long *)addr) #define ext2_clear_bit(nr, addr) \ test_and_clear_bit((nr)^56, (unsigned long *)addr) +#define ext2_clear_bit_atomic(lock, nr, addr) \ + test_and_clear_bit((nr)^56, (unsigned long *)addr) #define ext2_test_bit(nr, addr) \ test_bit((nr)^56, (unsigned long *)addr) diff -urN linux-2.5.67-bk4/include/asm-s390x/cacheflush.h linux-2.5.67-bk5/include/asm-s390x/cacheflush.h --- linux-2.5.67-bk4/include/asm-s390x/cacheflush.h 2003-04-07 10:31:20.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-s390x/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -9,7 +9,6 @@ #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) #define flush_dcache_page(page) do { } while (0) #define flush_icache_range(start, end) do { } while (0) #define flush_icache_page(vma,pg) do { } while (0) diff -urN linux-2.5.67-bk4/include/asm-sh/bitops.h linux-2.5.67-bk5/include/asm-sh/bitops.h --- linux-2.5.67-bk4/include/asm-sh/bitops.h 2003-04-07 10:30:34.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-sh/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -344,6 +344,24 @@ } #endif +#define ext2_set_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_set_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + +#define ext2_clear_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_clear_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + /* Bitmap functions for the minix filesystem. */ #define minix_test_and_set_bit(nr,addr) test_and_set_bit(nr,addr) #define minix_set_bit(nr,addr) set_bit(nr,addr) diff -urN linux-2.5.67-bk4/include/asm-sh/pgtable.h linux-2.5.67-bk5/include/asm-sh/pgtable.h --- linux-2.5.67-bk4/include/asm-sh/pgtable.h 2003-04-07 10:33:03.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-sh/pgtable.h 2003-04-13 04:36:10.000000000 -0700 @@ -26,7 +26,6 @@ * - flush_cache_range(vma, start, end) flushes a range of pages * * - flush_dcache_page(pg) flushes(wback&invalidates) a page for dcache - * - flush_page_to_ram(page) write back kernel page to ram * - flush_icache_range(start, end) flushes(invalidates) a range for icache * - flush_icache_page(vma, pg) flushes(invalidates) a page for icache * @@ -37,7 +36,6 @@ #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) #define flush_dcache_page(page) do { } while (0) #define flush_icache_range(start, end) do { } while (0) #define flush_icache_page(vma,pg) do { } while (0) @@ -63,7 +61,6 @@ extern void flush_icache_range(unsigned long start, unsigned long end); extern void flush_cache_sigtramp(unsigned long addr); -#define flush_page_to_ram(page) do { } while (0) #define flush_icache_page(vma,pg) do { } while (0) #define flush_icache_user_range(vma,pg,adr,len) do { } while (0) diff -urN linux-2.5.67-bk4/include/asm-sparc/bitops.h linux-2.5.67-bk5/include/asm-sparc/bitops.h --- linux-2.5.67-bk4/include/asm-sparc/bitops.h 2003-04-07 10:32:30.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-sparc/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -455,6 +455,25 @@ #define ext2_set_bit __test_and_set_le_bit #define ext2_clear_bit __test_and_clear_le_bit + +#define ext2_set_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_set_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + +#define ext2_clear_bit_atomic(lock, nr, addr) \ + ({ \ + int ret; \ + spin_lock(lock); \ + ret = ext2_clear_bit((nr), (addr)); \ + spin_unlock(lock); \ + ret; \ + }) + #define ext2_test_bit test_le_bit #define ext2_find_first_zero_bit find_first_zero_le_bit #define ext2_find_next_zero_bit find_next_zero_le_bit diff -urN linux-2.5.67-bk4/include/asm-sparc/cacheflush.h linux-2.5.67-bk5/include/asm-sparc/cacheflush.h --- linux-2.5.67-bk4/include/asm-sparc/cacheflush.h 2003-04-07 10:31:00.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-sparc/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -64,7 +64,6 @@ extern void sparc_flush_page_to_ram(struct page *page); -#define flush_page_to_ram(page) do { } while (0) #define flush_dcache_page(page) sparc_flush_page_to_ram(page) #endif /* _SPARC_CACHEFLUSH_H */ diff -urN linux-2.5.67-bk4/include/asm-sparc64/bitops.h linux-2.5.67-bk5/include/asm-sparc64/bitops.h --- linux-2.5.67-bk4/include/asm-sparc64/bitops.h 2003-04-07 10:30:43.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-sparc64/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -351,7 +351,9 @@ #ifdef __KERNEL__ #define ext2_set_bit(nr,addr) test_and_set_le_bit((nr),(unsigned long *)(addr)) +#define ext2_set_bit_atomic(lock,nr,addr) test_and_set_le_bit((nr),(unsigned long *)(addr)) #define ext2_clear_bit(nr,addr) test_and_clear_le_bit((nr),(unsigned long *)(addr)) +#define ext2_clear_bit_atomic(lock,nr,addr) test_and_clear_le_bit((nr),(unsigned long *)(addr)) #define ext2_test_bit(nr,addr) test_le_bit((nr),(unsigned long *)(addr)) #define ext2_find_first_zero_bit(addr, size) \ find_first_zero_le_bit((unsigned long *)(addr), (size)) diff -urN linux-2.5.67-bk4/include/asm-sparc64/cacheflush.h linux-2.5.67-bk5/include/asm-sparc64/cacheflush.h --- linux-2.5.67-bk4/include/asm-sparc64/cacheflush.h 2003-04-07 10:32:54.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-sparc64/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -50,7 +50,4 @@ extern void flush_dcache_page(struct page *page); -/* This is unnecessary on the SpitFire since D-CACHE is write-through. */ -#define flush_page_to_ram(page) do { } while (0) - #endif /* _SPARC64_CACHEFLUSH_H */ diff -urN linux-2.5.67-bk4/include/asm-v850/bitops.h linux-2.5.67-bk5/include/asm-v850/bitops.h --- linux-2.5.67-bk4/include/asm-v850/bitops.h 2003-04-07 10:31:45.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-v850/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -252,7 +252,9 @@ #define hweight8(x) generic_hweight8 (x) #define ext2_set_bit test_and_set_bit +#define ext2_set_bit_atomic(l,n,a) test_and_set_bit(n,a) #define ext2_clear_bit test_and_clear_bit +#define ext2_clear_bit_atomic(l,n,a) test_and_clear_bit(n,a) #define ext2_test_bit test_bit #define ext2_find_first_zero_bit find_first_zero_bit #define ext2_find_next_zero_bit find_next_zero_bit diff -urN linux-2.5.67-bk4/include/asm-v850/cacheflush.h linux-2.5.67-bk5/include/asm-v850/cacheflush.h --- linux-2.5.67-bk4/include/asm-v850/cacheflush.h 2003-04-07 10:31:02.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-v850/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -29,7 +29,6 @@ #define flush_cache_mm(mm) ((void)0) #define flush_cache_range(vma, start, end) ((void)0) #define flush_cache_page(vma, vmaddr) ((void)0) -#define flush_page_to_ram(page) ((void)0) #define flush_dcache_page(page) ((void)0) #define flush_icache() ((void)0) #define flush_icache_range(start, end) ((void)0) diff -urN linux-2.5.67-bk4/include/asm-v850/nb85e_cache.h linux-2.5.67-bk5/include/asm-v850/nb85e_cache.h --- linux-2.5.67-bk4/include/asm-v850/nb85e_cache.h 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-v850/nb85e_cache.h 2003-04-13 04:36:10.000000000 -0700 @@ -62,7 +62,6 @@ unsigned long adr, int len); extern void nb85e_cache_flush_sigtramp (unsigned long addr); -#define flush_page_to_ram(x) ((void)0) #define flush_cache_all nb85e_cache_flush_all #define flush_cache_mm nb85e_cache_flush_mm #define flush_cache_range nb85e_cache_flush_range diff -urN linux-2.5.67-bk4/include/asm-v850/page.h linux-2.5.67-bk5/include/asm-v850/page.h --- linux-2.5.67-bk4/include/asm-v850/page.h 2003-04-07 10:31:50.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-v850/page.h 2003-04-13 04:36:10.000000000 -0700 @@ -40,8 +40,14 @@ #define clear_page(page) memset ((void *)(page), 0, PAGE_SIZE) #define copy_page(to, from) memcpy ((void *)(to), (void *)from, PAGE_SIZE) -#define clear_user_page(page, vaddr, pg) clear_page (page) -#define copy_user_page(to, from, vaddr,pg) copy_page (to, from) +#define clear_user_page(addr, vaddr, page) \ + do { clear_page(addr); \ + flush_dcache_page(page); \ + } while (0) +#define copy_user_page(to, from, vaddr, page) \ + do { copy_page(to, from); \ + flush_dcache_page(page); \ + } while (0) #ifdef STRICT_MM_TYPECHECKS /* diff -urN linux-2.5.67-bk4/include/asm-x86_64/bitops.h linux-2.5.67-bk5/include/asm-x86_64/bitops.h --- linux-2.5.67-bk4/include/asm-x86_64/bitops.h 2003-04-07 10:31:05.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-x86_64/bitops.h 2003-04-13 04:36:10.000000000 -0700 @@ -487,8 +487,12 @@ #define ext2_set_bit(nr,addr) \ __test_and_set_bit((nr),(unsigned long*)addr) +#define ext2_set_bit_atomic(lock,nr,addr) \ + test_and_set_bit((nr),(unsigned long*)addr) #define ext2_clear_bit(nr, addr) \ __test_and_clear_bit((nr),(unsigned long*)addr) +#define ext2_clear_bit_atomic(lock,nr,addr) \ + test_and_clear_bit((nr),(unsigned long*)addr) #define ext2_test_bit(nr, addr) test_bit((nr),(unsigned long*)addr) #define ext2_find_first_zero_bit(addr, size) \ find_first_zero_bit((unsigned long*)addr, size) diff -urN linux-2.5.67-bk4/include/asm-x86_64/cacheflush.h linux-2.5.67-bk5/include/asm-x86_64/cacheflush.h --- linux-2.5.67-bk4/include/asm-x86_64/cacheflush.h 2003-04-07 10:30:44.000000000 -0700 +++ linux-2.5.67-bk5/include/asm-x86_64/cacheflush.h 2003-04-13 04:36:10.000000000 -0700 @@ -9,7 +9,6 @@ #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(vma, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0) -#define flush_page_to_ram(page) do { } while (0) #define flush_dcache_page(page) do { } while (0) #define flush_icache_range(start, end) do { } while (0) #define flush_icache_page(vma,pg) do { } while (0) diff -urN linux-2.5.67-bk4/include/linux/blockgroup_lock.h linux-2.5.67-bk5/include/linux/blockgroup_lock.h --- linux-2.5.67-bk4/include/linux/blockgroup_lock.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.5.67-bk5/include/linux/blockgroup_lock.h 2003-04-13 04:36:10.000000000 -0700 @@ -0,0 +1,58 @@ +/* + * Per-blockgroup locking for ext2 and ext3. + * + * Simple hashed spinlocking. + */ + +#include +#include +#include + +#ifdef CONFIG_SMP + +/* + * We want a power-of-two. Is there a better way than this? + */ + +#if NR_CPUS >= 32 +#define NR_BG_LOCKS 128 +#elif NR_CPUS >= 16 +#define NR_BG_LOCKS 64 +#elif NR_CPUS >= 8 +#define NR_BG_LOCKS 32 +#elif NR_CPUS >= 4 +#define NR_BG_LOCKS 16 +#elif NR_CPUS >= 2 +#define NR_BG_LOCKS 8 +#else +#define NR_BG_LOCKS 4 +#endif + +#else /* CONFIG_SMP */ +#define NR_BG_LOCKS 1 +#endif /* CONFIG_SMP */ + +struct bgl_lock { + spinlock_t lock; +} ____cacheline_aligned_in_smp; + +struct blockgroup_lock { + struct bgl_lock locks[NR_BG_LOCKS]; +}; + +static inline void bgl_lock_init(struct blockgroup_lock *bgl) +{ + int i; + + for (i = 0; i < NR_BG_LOCKS; i++) + spin_lock_init(&bgl->locks[i].lock); +} + +/* + * The accessor is a macro so we can embed a blockgroup_lock into different + * superblock types + */ +#define sb_bgl_lock(sb, block_group) \ + (&(sb)->s_blockgroup_lock.locks[(block_group) & (NR_BG_LOCKS-1)].lock) + + diff -urN linux-2.5.67-bk4/include/linux/bootmem.h linux-2.5.67-bk5/include/linux/bootmem.h --- linux-2.5.67-bk4/include/linux/bootmem.h 2003-04-07 10:32:28.000000000 -0700 +++ linux-2.5.67-bk5/include/linux/bootmem.h 2003-04-13 04:36:10.000000000 -0700 @@ -32,6 +32,8 @@ void *node_bootmem_map; unsigned long last_offset; unsigned long last_pos; + unsigned long last_success; /* Previous allocation point. To speed + * up searching */ } bootmem_data_t; extern unsigned long __init bootmem_bootmap_pages (unsigned long); diff -urN linux-2.5.67-bk4/include/linux/ext2_fs_sb.h linux-2.5.67-bk5/include/linux/ext2_fs_sb.h --- linux-2.5.67-bk4/include/linux/ext2_fs_sb.h 2003-04-07 10:32:30.000000000 -0700 +++ linux-2.5.67-bk5/include/linux/ext2_fs_sb.h 2003-04-13 04:36:10.000000000 -0700 @@ -16,6 +16,9 @@ #ifndef _LINUX_EXT2_FS_SB #define _LINUX_EXT2_FS_SB +#include +#include + /* * second extended-fs super-block data in memory */ @@ -45,6 +48,10 @@ u32 s_next_generation; unsigned long s_dir_count; u8 *s_debts; + struct percpu_counter s_freeblocks_counter; + struct percpu_counter s_freeinodes_counter; + struct percpu_counter s_dirs_counter; + struct blockgroup_lock s_blockgroup_lock; }; #endif /* _LINUX_EXT2_FS_SB */ diff -urN linux-2.5.67-bk4/include/linux/file.h linux-2.5.67-bk5/include/linux/file.h --- linux-2.5.67-bk4/include/linux/file.h 2003-04-07 10:30:33.000000000 -0700 +++ linux-2.5.67-bk5/include/linux/file.h 2003-04-13 04:36:10.000000000 -0700 @@ -21,7 +21,7 @@ */ struct files_struct { atomic_t count; - rwlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */ + spinlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */ int max_fds; int max_fdset; int next_fd; diff -urN linux-2.5.67-bk4/include/linux/highmem.h linux-2.5.67-bk5/include/linux/highmem.h --- linux-2.5.67-bk4/include/linux/highmem.h 2003-04-07 10:32:18.000000000 -0700 +++ linux-2.5.67-bk5/include/linux/highmem.h 2003-04-13 04:36:10.000000000 -0700 @@ -67,7 +67,6 @@ kaddr = kmap_atomic(page, KM_USER0); memset((char *)kaddr + offset, 0, size); flush_dcache_page(page); - flush_page_to_ram(page); kunmap_atomic(kaddr, KM_USER0); } diff -urN linux-2.5.67-bk4/include/linux/init_task.h linux-2.5.67-bk5/include/linux/init_task.h --- linux-2.5.67-bk4/include/linux/init_task.h 2003-04-07 10:30:41.000000000 -0700 +++ linux-2.5.67-bk5/include/linux/init_task.h 2003-04-13 04:36:10.000000000 -0700 @@ -6,7 +6,7 @@ #define INIT_FILES \ { \ .count = ATOMIC_INIT(1), \ - .file_lock = RW_LOCK_UNLOCKED, \ + .file_lock = SPIN_LOCK_UNLOCKED, \ .max_fds = NR_OPEN_DEFAULT, \ .max_fdset = __FD_SETSIZE, \ .next_fd = 0, \ diff -urN linux-2.5.67-bk4/include/linux/mm.h linux-2.5.67-bk5/include/linux/mm.h --- linux-2.5.67-bk4/include/linux/mm.h 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/include/linux/mm.h 2003-04-13 04:36:10.000000000 -0700 @@ -486,6 +486,8 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); +extern void memmap_init_zone(struct page *, unsigned long, int, + unsigned long, unsigned long); extern void mem_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); diff -urN linux-2.5.67-bk4/include/linux/nfsd/nfsd.h linux-2.5.67-bk5/include/linux/nfsd/nfsd.h --- linux-2.5.67-bk4/include/linux/nfsd/nfsd.h 2003-04-07 10:32:53.000000000 -0700 +++ linux-2.5.67-bk5/include/linux/nfsd/nfsd.h 2003-04-13 04:36:10.000000000 -0700 @@ -117,6 +117,17 @@ int nfsd_permission(struct svc_export *, struct dentry *, int); +/* + * NFSv4 State + */ +#ifdef CONFIG_NFSD_V4 +void nfs4_state_init(void); +void nfs4_state_shutdown(void); +#else +void static inline nfs4_state_init(void){} +void static inline nfs4_state_shutdown(void){} +#endif + /* * lockd binding */ @@ -162,6 +173,7 @@ #define nfserr_bad_cookie __constant_htonl(NFSERR_BAD_COOKIE) #define nfserr_same __constant_htonl(NFSERR_SAME) #define nfserr_clid_inuse __constant_htonl(NFSERR_CLID_INUSE) +#define nfserr_stale_clientid __constant_htonl(NFSERR_STALE_CLIENTID) #define nfserr_resource __constant_htonl(NFSERR_RESOURCE) #define nfserr_nofilehandle __constant_htonl(NFSERR_NOFILEHANDLE) #define nfserr_minor_vers_mismatch __constant_htonl(NFSERR_MINOR_VERS_MISMATCH) diff -urN linux-2.5.67-bk4/include/linux/nfsd/state.h linux-2.5.67-bk5/include/linux/nfsd/state.h --- linux-2.5.67-bk4/include/linux/nfsd/state.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.5.67-bk5/include/linux/nfsd/state.h 2003-04-13 04:36:10.000000000 -0700 @@ -0,0 +1,63 @@ +/* + * linux/include/nfsd/state.h + * + * Copyright (c) 2001 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith + * Andy Adamson + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _NFSD4_STATE_H +#define _NFSD4_STATE_H + +#include + +#define NFSD4_CLIENT_MAXNAME 1024 + +extern int nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid); + +/* + * struct nfs4_client - one per client. Clientids live here. + * o Each nfs4_client is hashed by clientid. + * + * o Each nfs4_clients is also hashed by name + * (the opaque quantity initially sent by the client to identify itself). + */ +struct nfs4_client { + struct list_head cl_idhash; /* hash by cl_clientid.id */ + struct list_head cl_strhash; /* hash by cl_name */ + struct xdr_netobj cl_name; /* id generated by client */ + nfs4_verifier cl_verifier; /* generated by client */ + u32 cl_addr; /* client ipaddress */ + struct svc_cred cl_cred; /* setclientid principal */ + clientid_t cl_clientid; /* generated by server */ + nfs4_verifier cl_confirm; /* generated by server */ +}; +#endif /* NFSD4_STATE_H */ diff -urN linux-2.5.67-bk4/include/linux/nfsd/syscall.h linux-2.5.67-bk5/include/linux/nfsd/syscall.h --- linux-2.5.67-bk4/include/linux/nfsd/syscall.h 2003-04-07 10:32:59.000000000 -0700 +++ linux-2.5.67-bk5/include/linux/nfsd/syscall.h 2003-04-13 04:36:10.000000000 -0700 @@ -91,6 +91,13 @@ struct nfsctl_export u_export; struct nfsctl_fdparm u_getfd; struct nfsctl_fsparm u_getfs; + /* + * The following dummy member is needed to preserve binary compatibility + * on platforms where alignof(void*)>alignof(int). It's needed because + * this union used to contain a member (u_umap) which contained a + * pointer. + */ + void *u_ptr; } u; #define ca_svc u.u_svc #define ca_client u.u_client diff -urN linux-2.5.67-bk4/include/linux/percpu_counter.h linux-2.5.67-bk5/include/linux/percpu_counter.h --- linux-2.5.67-bk4/include/linux/percpu_counter.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.5.67-bk5/include/linux/percpu_counter.h 2003-04-13 04:36:10.000000000 -0700 @@ -0,0 +1,100 @@ +/* + * A simple "approximate counter" for use in ext2 and ext3 superblocks. + * + * WARNING: these things are HUGE. 4 kbytes per counter on 32-way P4. + */ + +#include +#include +#include +#include + +#ifdef CONFIG_SMP + +struct __percpu_counter { + long count; +} ____cacheline_aligned; + +struct percpu_counter { + spinlock_t lock; + long count; + struct __percpu_counter counters[NR_CPUS]; +}; + +#if NR_CPUS >= 16 +#define FBC_BATCH (NR_CPUS*2) +#else +#define FBC_BATCH (NR_CPUS*4) +#endif + +static inline void percpu_counter_init(struct percpu_counter *fbc) +{ + int i; + + spin_lock_init(&fbc->lock); + fbc->count = 0; + for (i = 0; i < NR_CPUS; i++) + fbc->counters[i].count = 0; +} + +void percpu_counter_mod(struct percpu_counter *fbc, long amount); + +static inline long percpu_counter_read(struct percpu_counter *fbc) +{ + return fbc->count; +} + +/* + * It is possible for the percpu_counter_read() to return a small negative + * number for some counter which should never be negative. + */ +static inline long percpu_counter_read_positive(struct percpu_counter *fbc) +{ + long ret = fbc->count; + + barrier(); /* Prevent reloads of fbc->count */ + if (ret > 0) + return ret; + return 1; +} + +#else + +struct percpu_counter { + long count; +}; + +static inline void percpu_counter_init(struct percpu_counter *fbc) +{ + fbc->count = 0; +} + +static inline void +percpu_counter_mod(struct percpu_counter *fbc, long amount) +{ + preempt_disable(); + fbc->count += amount; + preempt_enable(); +} + +static inline long percpu_counter_read(struct percpu_counter *fbc) +{ + return fbc->count; +} + +static inline long percpu_counter_read_positive(struct percpu_counter *fbc) +{ + return fbc->count; +} + +#endif /* CONFIG_SMP */ + +static inline void percpu_counter_inc(struct percpu_counter *fbc) +{ + percpu_counter_mod(fbc, 1); +} + +static inline void percpu_counter_dec(struct percpu_counter *fbc) +{ + percpu_counter_mod(fbc, -1); +} diff -urN linux-2.5.67-bk4/include/linux/quota.h linux-2.5.67-bk5/include/linux/quota.h --- linux-2.5.67-bk4/include/linux/quota.h 2003-04-07 10:32:17.000000000 -0700 +++ linux-2.5.67-bk5/include/linux/quota.h 2003-04-13 04:36:10.000000000 -0700 @@ -250,6 +250,7 @@ void (*free_space) (struct inode *, qsize_t); void (*free_inode) (const struct inode *, unsigned long); int (*transfer) (struct inode *, struct iattr *); + int (*sync_dquot) (struct dquot *); }; /* Operations handling requests from userspace */ @@ -303,6 +304,7 @@ int register_quota_format(struct quota_format_type *fmt); void unregister_quota_format(struct quota_format_type *fmt); +void init_dquot_operations(struct dquot_operations *fsdqops); #else diff -urN linux-2.5.67-bk4/include/linux/radix-tree.h linux-2.5.67-bk5/include/linux/radix-tree.h --- linux-2.5.67-bk4/include/linux/radix-tree.h 2003-04-07 10:30:58.000000000 -0700 +++ linux-2.5.67-bk5/include/linux/radix-tree.h 2003-04-13 04:36:10.000000000 -0700 @@ -43,7 +43,7 @@ extern int radix_tree_insert(struct radix_tree_root *, unsigned long, void *); extern void *radix_tree_lookup(struct radix_tree_root *, unsigned long); -extern int radix_tree_delete(struct radix_tree_root *, unsigned long); +extern void *radix_tree_delete(struct radix_tree_root *, unsigned long); extern unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, void **results, unsigned long first_index, unsigned int max_items); diff -urN linux-2.5.67-bk4/include/net/xfrm.h linux-2.5.67-bk5/include/net/xfrm.h --- linux-2.5.67-bk4/include/net/xfrm.h 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/include/net/xfrm.h 2003-04-13 04:36:11.000000000 -0700 @@ -723,6 +723,12 @@ struct sadb_alg desc; }; +/* XFRM tunnel handlers. */ +struct xfrm_tunnel { + int (*handler)(struct sk_buff *skb); + void (*err_handler)(struct sk_buff *skb, void *info); +}; + extern void xfrm_init(void); extern void xfrm4_init(void); extern void xfrm4_fini(void); @@ -752,6 +758,8 @@ extern int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl); extern int xfrm4_rcv(struct sk_buff *skb); extern int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type); +extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); +extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); extern int xfrm6_rcv(struct sk_buff **pskb); extern int xfrm6_clear_mutable_options(struct sk_buff *skb, u16 *nh_offset, int dir); extern int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen); diff -urN linux-2.5.67-bk4/kernel/extable.c linux-2.5.67-bk5/kernel/extable.c --- linux-2.5.67-bk4/kernel/extable.c 2003-04-07 10:31:00.000000000 -0700 +++ linux-2.5.67-bk5/kernel/extable.c 2003-04-13 04:36:11.000000000 -0700 @@ -20,7 +20,7 @@ extern const struct exception_table_entry __start___ex_table[]; extern const struct exception_table_entry __stop___ex_table[]; -extern char _stext[], _etext[]; +extern char _stext[], _etext[], _sinittext[], _einittext[]; /* Given an address, look for it in the exception tables. */ const struct exception_table_entry *search_exception_tables(unsigned long addr) @@ -39,5 +39,9 @@ addr <= (unsigned long)_etext) return 1; + if (addr >= (unsigned long)_sinittext && + addr <= (unsigned long)_einittext) + return 1; + return module_text_address(addr) != NULL; } diff -urN linux-2.5.67-bk4/kernel/fork.c linux-2.5.67-bk5/kernel/fork.c --- linux-2.5.67-bk4/kernel/fork.c 2003-04-07 10:30:42.000000000 -0700 +++ linux-2.5.67-bk5/kernel/fork.c 2003-04-13 04:36:11.000000000 -0700 @@ -603,7 +603,7 @@ atomic_set(&newf->count, 1); - newf->file_lock = RW_LOCK_UNLOCKED; + newf->file_lock = SPIN_LOCK_UNLOCKED; newf->next_fd = 0; newf->max_fds = NR_OPEN_DEFAULT; newf->max_fdset = __FD_SETSIZE; @@ -616,13 +616,13 @@ size = oldf->max_fdset; if (size > __FD_SETSIZE) { newf->max_fdset = 0; - write_lock(&newf->file_lock); + spin_lock(&newf->file_lock); error = expand_fdset(newf, size-1); - write_unlock(&newf->file_lock); + spin_unlock(&newf->file_lock); if (error) goto out_release; } - read_lock(&oldf->file_lock); + spin_lock(&oldf->file_lock); open_files = count_open_files(oldf, size); @@ -633,15 +633,15 @@ */ nfds = NR_OPEN_DEFAULT; if (open_files > nfds) { - read_unlock(&oldf->file_lock); + spin_unlock(&oldf->file_lock); newf->max_fds = 0; - write_lock(&newf->file_lock); + spin_lock(&newf->file_lock); error = expand_fd_array(newf, open_files-1); - write_unlock(&newf->file_lock); + spin_unlock(&newf->file_lock); if (error) goto out_release; nfds = newf->max_fds; - read_lock(&oldf->file_lock); + spin_lock(&oldf->file_lock); } old_fds = oldf->fd; @@ -656,7 +656,7 @@ get_file(f); *new_fds++ = f; } - read_unlock(&oldf->file_lock); + spin_unlock(&oldf->file_lock); /* compute the remainder to be cleared */ size = (newf->max_fds - open_files) * sizeof(struct file *); diff -urN linux-2.5.67-bk4/kernel/kallsyms.c linux-2.5.67-bk5/kernel/kallsyms.c --- linux-2.5.67-bk4/kernel/kallsyms.c 2003-04-07 10:32:54.000000000 -0700 +++ linux-2.5.67-bk5/kernel/kallsyms.c 2003-04-13 04:36:11.000000000 -0700 @@ -15,7 +15,22 @@ extern char kallsyms_names[] __attribute__((weak)); /* Defined by the linker script. */ -extern char _stext[], _etext[]; +extern char _stext[], _etext[], _sinittext[], _einittext[]; + +static inline int is_kernel_inittext(unsigned long addr) +{ + if (addr >= (unsigned long)_sinittext + && addr <= (unsigned long)_einittext) + return 1; + return 0; +} + +static inline int is_kernel_text(unsigned long addr) +{ + if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) + return 1; + return 0; +} /* Lookup an address. modname is set to NULL if it's in the kernel. */ const char *kallsyms_lookup(unsigned long addr, @@ -31,7 +46,7 @@ namebuf[127] = 0; namebuf[0] = 0; - if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) { + if (is_kernel_text(addr) || is_kernel_inittext(addr)) { unsigned long symbol_end; char *name = kallsyms_names; @@ -52,6 +67,8 @@ /* Base symbol size on next symbol. */ if (best + 1 < kallsyms_num_syms) symbol_end = kallsyms_addresses[best + 1]; + else if (is_kernel_inittext(addr)) + symbol_end = (unsigned long)_einittext; else symbol_end = (unsigned long)_etext; diff -urN linux-2.5.67-bk4/kernel/ksyms.c linux-2.5.67-bk5/kernel/ksyms.c --- linux-2.5.67-bk4/kernel/ksyms.c 2003-04-07 10:30:34.000000000 -0700 +++ linux-2.5.67-bk5/kernel/ksyms.c 2003-04-13 04:36:11.000000000 -0700 @@ -58,6 +58,7 @@ #include #include #include +#include #include #if defined(CONFIG_PROC_FS) @@ -100,6 +101,7 @@ #ifdef CONFIG_SMP EXPORT_SYMBOL(kmalloc_percpu); EXPORT_SYMBOL(kfree_percpu); +EXPORT_SYMBOL(percpu_counter_mod); #endif EXPORT_SYMBOL(vfree); EXPORT_SYMBOL(__vmalloc); diff -urN linux-2.5.67-bk4/kernel/ptrace.c linux-2.5.67-bk5/kernel/ptrace.c --- linux-2.5.67-bk4/kernel/ptrace.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/kernel/ptrace.c 2003-04-13 04:36:11.000000000 -0700 @@ -179,14 +179,18 @@ flush_cache_page(vma, addr); + /* + * FIXME! We used to have flush_page_to_ram() in here, but + * that was wrong. davem says we need a new per-arch primitive + * to handle this correctly. + */ + maddr = kmap(page); if (write) { memcpy(maddr + offset, buf, bytes); - flush_page_to_ram(page); flush_icache_user_range(vma, page, addr, bytes); } else { memcpy(buf, maddr + offset, bytes); - flush_page_to_ram(page); } kunmap(page); page_cache_release(page); diff -urN linux-2.5.67-bk4/kernel/sched.c linux-2.5.67-bk5/kernel/sched.c --- linux-2.5.67-bk4/kernel/sched.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/kernel/sched.c 2003-04-13 04:36:11.000000000 -0700 @@ -1427,9 +1427,6 @@ { unsigned long flags; - if (unlikely(!q)) - return; - spin_lock_irqsave(&q->lock, flags); __wake_up_common(q, mode, nr_exclusive, 0); spin_unlock_irqrestore(&q->lock, flags); diff -urN linux-2.5.67-bk4/lib/Makefile linux-2.5.67-bk5/lib/Makefile --- linux-2.5.67-bk4/lib/Makefile 2003-04-07 10:30:38.000000000 -0700 +++ linux-2.5.67-bk5/lib/Makefile 2003-04-13 04:36:11.000000000 -0700 @@ -14,6 +14,7 @@ obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o +obj-$(CONFIG_SMP) += percpu_counter.o ifneq ($(CONFIG_HAVE_DEC_LOCK),y) obj-y += dec_and_lock.o diff -urN linux-2.5.67-bk4/lib/kobject.c linux-2.5.67-bk5/lib/kobject.c --- linux-2.5.67-bk4/lib/kobject.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/lib/kobject.c 2003-04-13 04:36:11.000000000 -0700 @@ -98,13 +98,13 @@ struct kobject *kobj) { char *argv [3]; - char **envp; - char *buffer; + char **envp = NULL; + char *buffer = NULL; char *scratch; int i = 0; int retval; int kobj_path_length; - char *kobj_path; + char *kobj_path = NULL; char *name = NULL; /* If the kset has a filter operation, call it. If it returns @@ -119,16 +119,14 @@ if (!hotplug_path[0]) return; - envp = (char **)kmalloc(NUM_ENVP * sizeof (char *), GFP_KERNEL); + envp = kmalloc(NUM_ENVP * sizeof (char *), GFP_KERNEL); if (!envp) return; memset (envp, 0x00, NUM_ENVP * sizeof (char *)); buffer = kmalloc(BUFFER_SIZE, GFP_KERNEL); - if (!buffer) { - kfree(envp); - return; - } + if (!buffer) + goto exit; if (kset->hotplug_ops->name) name = kset->hotplug_ops->name(kset, kobj); @@ -150,11 +148,8 @@ kobj_path_length = get_kobj_path_length (kset, kobj); kobj_path = kmalloc (kobj_path_length, GFP_KERNEL); - if (!kobj_path) { - kfree (buffer); - kfree (envp); - return; - } + if (!kobj_path) + goto exit; memset (kobj_path, 0x00, kobj_path_length); fill_kobj_path (kset, kobj, kobj_path, kobj_path_length); @@ -181,15 +176,16 @@ __FUNCTION__, retval); exit: - kfree (kobj_path); - kfree (buffer); + kfree(kobj_path); + kfree(buffer); + kfree(envp); return; } #else static void kset_hotplug(const char *action, struct kset *kset, struct kobject *kobj) { - return 0; + return; } #endif /* CONFIG_HOTPLUG */ diff -urN linux-2.5.67-bk4/lib/percpu_counter.c linux-2.5.67-bk5/lib/percpu_counter.c --- linux-2.5.67-bk4/lib/percpu_counter.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.5.67-bk5/lib/percpu_counter.c 2003-04-13 04:36:11.000000000 -0700 @@ -0,0 +1,18 @@ + +#include + +void percpu_counter_mod(struct percpu_counter *fbc, long amount) +{ + int cpu = get_cpu(); + long count = fbc->counters[cpu].count; + + count += amount; + if (count >= FBC_BATCH || count <= -FBC_BATCH) { + spin_lock(&fbc->lock); + fbc->count += count; + spin_unlock(&fbc->lock); + count = 0; + } + fbc->counters[cpu].count = count; + put_cpu(); +} diff -urN linux-2.5.67-bk4/lib/radix-tree.c linux-2.5.67-bk5/lib/radix-tree.c --- linux-2.5.67-bk4/lib/radix-tree.c 2003-04-07 10:30:57.000000000 -0700 +++ linux-2.5.67-bk5/lib/radix-tree.c 2003-04-13 04:36:11.000000000 -0700 @@ -349,15 +349,18 @@ * @index: index key * * Remove the item at @index from the radix tree rooted at @root. + * + * Returns the address of the deleted item, or NULL if it was not present. */ -int radix_tree_delete(struct radix_tree_root *root, unsigned long index) +void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) { struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path; unsigned int height, shift; + void *ret = NULL; height = root->height; if (index > radix_tree_maxindex(height)) - return -ENOENT; + goto out; shift = (height-1) * RADIX_TREE_MAP_SHIFT; pathp->node = NULL; @@ -365,7 +368,7 @@ while (height > 0) { if (*pathp->slot == NULL) - return -ENOENT; + goto out; pathp[1].node = *pathp[0].slot; pathp[1].slot = (struct radix_tree_node **) @@ -375,8 +378,9 @@ height--; } - if (*pathp[0].slot == NULL) - return -ENOENT; + ret = *pathp[0].slot; + if (ret == NULL) + goto out; *pathp[0].slot = NULL; while (pathp[0].node && --pathp[0].node->count == 0) { @@ -387,8 +391,8 @@ if (root->rnode == NULL) root->height = 0; /* Empty tree, we can reset the height */ - - return 0; +out: + return ret; } EXPORT_SYMBOL(radix_tree_delete); diff -urN linux-2.5.67-bk4/mm/bootmem.c linux-2.5.67-bk5/mm/bootmem.c --- linux-2.5.67-bk4/mm/bootmem.c 2003-04-07 10:31:00.000000000 -0700 +++ linux-2.5.67-bk5/mm/bootmem.c 2003-04-13 04:36:11.000000000 -0700 @@ -115,6 +115,9 @@ if (end > bdata->node_low_pfn) BUG(); + if (addr < bdata->last_success) + bdata->last_success = addr; + /* * Round up the beginning of the address. */ @@ -135,26 +138,23 @@ * is not a problem. * * On low memory boxes we get it right in 100% of the cases. - */ - -/* + * * alignment has to be a power of 2 value. + * + * NOTE: This function is _not_ reenetrant. */ -static void * __init __alloc_bootmem_core (bootmem_data_t *bdata, - unsigned long size, unsigned long align, unsigned long goal) +static void * __init +__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, + unsigned long align, unsigned long goal) { - unsigned long i, start = 0; + unsigned long offset, remaining_size, areasize, preferred; + unsigned long i, start = 0, incr, eidx; void *ret; - unsigned long offset, remaining_size; - unsigned long areasize, preferred, incr; - unsigned long eidx = bdata->node_low_pfn - (bdata->node_boot_start >> - PAGE_SHIFT); - - if (!size) BUG(); - if (align & (align-1)) - BUG(); + BUG_ON(!size); + BUG_ON(align & (align-1)); + eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); offset = 0; if (align && (bdata->node_boot_start & (align - 1UL)) != 0) @@ -166,8 +166,11 @@ * first, then we try to allocate lower pages. */ if (goal && (goal >= bdata->node_boot_start) && - ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { + ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { preferred = goal - bdata->node_boot_start; + + if (bdata->last_success >= preferred) + preferred = bdata->last_success; } else preferred = 0; @@ -179,6 +182,8 @@ restart_scan: for (i = preferred; i < eidx; i += incr) { unsigned long j; + i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i); + i = (i + incr - 1) & -incr; if (test_bit(i, bdata->node_bootmem_map)) continue; for (j = i + 1; j < i + areasize; ++j) { @@ -189,31 +194,33 @@ } start = i; goto found; - fail_block:; + fail_block: + ; } + if (preferred) { preferred = offset; goto restart_scan; } return NULL; + found: - if (start >= eidx) - BUG(); + bdata->last_success = start << PAGE_SHIFT; + BUG_ON(start >= eidx); /* * Is the next page of the previous allocation-end the start * of this allocation's buffer? If yes then we can 'merge' * the previous partial page with this allocation. */ - if (align < PAGE_SIZE - && bdata->last_offset && bdata->last_pos+1 == start) { + if (align < PAGE_SIZE && + bdata->last_offset && bdata->last_pos+1 == start) { offset = (bdata->last_offset+align-1) & ~(align-1); - if (offset > PAGE_SIZE) - BUG(); + BUG_ON(offset > PAGE_SIZE); remaining_size = PAGE_SIZE-offset; if (size < remaining_size) { areasize = 0; - // last_pos unchanged + /* last_pos unchanged */ bdata->last_offset = offset+size; ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + bdata->node_boot_start); @@ -231,11 +238,12 @@ bdata->last_offset = size & ~PAGE_MASK; ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); } + /* * Reserve the area now: */ for (i = start; i < start+areasize; i++) - if (test_and_set_bit(i, bdata->node_bootmem_map)) + if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) BUG(); memset(ret, 0, size); return ret; @@ -256,21 +264,21 @@ map = bdata->node_bootmem_map; for (i = 0; i < idx; ) { unsigned long v = ~map[i / BITS_PER_LONG]; - if (v) { + if (v) { unsigned long m; - for (m = 1; m && i < idx; m<<=1, page++, i++) { + for (m = 1; m && i < idx; m<<=1, page++, i++) { if (v & m) { - count++; - ClearPageReserved(page); - set_page_count(page, 1); - __free_page(page); - } - } + count++; + ClearPageReserved(page); + set_page_count(page, 1); + __free_page(page); + } + } } else { i+=BITS_PER_LONG; - page+=BITS_PER_LONG; - } - } + page += BITS_PER_LONG; + } + } total += count; /* diff -urN linux-2.5.67-bk4/mm/filemap.c linux-2.5.67-bk5/mm/filemap.c --- linux-2.5.67-bk4/mm/filemap.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/mm/filemap.c 2003-04-13 04:36:11.000000000 -0700 @@ -1008,11 +1008,9 @@ success: /* - * Found the page and have a reference on it, need to check sharing - * and possibly copy it over to another page.. + * Found the page and have a reference on it. */ mark_page_accessed(page); - flush_page_to_ram(page); return page; no_cached_page: @@ -1124,12 +1122,9 @@ success: /* - * Found the page and have a reference on it, need to check sharing - * and possibly copy it over to another page.. + * Found the page and have a reference on it. */ mark_page_accessed(page); - flush_page_to_ram(page); - return page; no_cached_page: diff -urN linux-2.5.67-bk4/mm/fremap.c linux-2.5.67-bk5/mm/fremap.c --- linux-2.5.67-bk4/mm/fremap.c 2003-04-07 10:30:34.000000000 -0700 +++ linux-2.5.67-bk5/mm/fremap.c 2003-04-13 04:36:11.000000000 -0700 @@ -78,7 +78,6 @@ flush = zap_pte(mm, vma, addr, pte); mm->rss++; - flush_page_to_ram(page); flush_icache_page(vma, page); set_pte(pte, mk_pte(page, prot)); pte_chain = page_add_rmap(page, pte, pte_chain); diff -urN linux-2.5.67-bk4/mm/memory.c linux-2.5.67-bk5/mm/memory.c --- linux-2.5.67-bk4/mm/memory.c 2003-04-07 10:31:14.000000000 -0700 +++ linux-2.5.67-bk5/mm/memory.c 2003-04-13 04:36:11.000000000 -0700 @@ -916,7 +916,6 @@ pte_t *page_table) { invalidate_vcache(address, vma->vm_mm, new_page); - flush_page_to_ram(new_page); flush_cache_page(vma, address); establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); } @@ -1206,7 +1205,6 @@ pte = pte_mkdirty(pte_mkwrite(pte)); unlock_page(page); - flush_page_to_ram(page); flush_icache_page(vma, page); set_pte(page_table, pte); pte_chain = page_add_rmap(page, page_table, pte_chain); @@ -1271,7 +1269,6 @@ goto out; } mm->rss++; - flush_page_to_ram(page); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); lru_cache_add_active(page); mark_page_accessed(page); @@ -1365,7 +1362,6 @@ /* Only go through if we didn't race with anybody else... */ if (pte_none(*page_table)) { ++mm->rss; - flush_page_to_ram(new_page); flush_icache_page(vma, new_page); entry = mk_pte(new_page, vma->vm_page_prot); if (write_access) diff -urN linux-2.5.67-bk4/mm/page_alloc.c linux-2.5.67-bk5/mm/page_alloc.c --- linux-2.5.67-bk4/mm/page_alloc.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/mm/page_alloc.c 2003-04-13 04:36:11.000000000 -0700 @@ -1142,6 +1142,35 @@ } /* + * Initially all pages are reserved - free ones are freed + * up by free_all_bootmem() once the early boot process is + * done. Non-atomic initialization, single-pass. + */ +void __init memmap_init_zone(struct page *start, unsigned long size, int nid, + unsigned long zone, unsigned long start_pfn) +{ + struct page *page; + + for (page = start; page < (start + size); page++) { + set_page_zone(page, nid * MAX_NR_ZONES + zone); + set_page_count(page, 0); + SetPageReserved(page); + INIT_LIST_HEAD(&page->list); +#ifdef WANT_PAGE_VIRTUAL + /* The shift won't overflow because ZONE_NORMAL is below 4G. */ + if (zone != ZONE_HIGHMEM) + set_page_address(page, __va(start_pfn << PAGE_SHIFT)); +#endif + start_pfn++; + } +} + +#ifndef __HAVE_ARCH_MEMMAP_INIT +#define memmap_init(start, size, nid, zone, start_pfn) \ + memmap_init_zone((start), (size), (nid), (zone), (start_pfn)) +#endif + +/* * Set up the zone data structures: * - mark all pages reserved * - mark all memory queues empty @@ -1151,7 +1180,6 @@ unsigned long *zones_size, unsigned long *zholes_size) { unsigned long i, j; - unsigned long local_offset; const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1); int cpu, nid = pgdat->node_id; struct page *lmem_map = pgdat->node_mem_map; @@ -1160,7 +1188,6 @@ pgdat->nr_zones = 0; init_waitqueue_head(&pgdat->kswapd_wait); - local_offset = 0; /* offset within lmem_map */ for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long mask; @@ -1246,36 +1273,17 @@ zone->pages_low = mask*2; zone->pages_high = mask*3; - zone->zone_mem_map = lmem_map + local_offset; + zone->zone_mem_map = lmem_map; zone->zone_start_pfn = zone_start_pfn; if ((zone_start_pfn) & (zone_required_alignment-1)) printk("BUG: wrong zone alignment, it will crash\n"); - /* - * Initially all pages are reserved - free ones are freed - * up by free_all_bootmem() once the early boot process is - * done. Non-atomic initialization, single-pass. - */ - for (i = 0; i < size; i++) { - struct page *page = lmem_map + local_offset + i; - set_page_zone(page, nid * MAX_NR_ZONES + j); - set_page_count(page, 0); - SetPageReserved(page); - INIT_LIST_HEAD(&page->list); -#ifdef WANT_PAGE_VIRTUAL - if (j != ZONE_HIGHMEM) - /* - * The shift left won't overflow because the - * ZONE_NORMAL is below 4G. - */ - set_page_address(page, - __va(zone_start_pfn << PAGE_SHIFT)); -#endif - zone_start_pfn++; - } + memmap_init(lmem_map, size, nid, j, zone_start_pfn); + + zone_start_pfn += size; + lmem_map += size; - local_offset += size; for (i = 0; ; i++) { unsigned long bitmap_size; diff -urN linux-2.5.67-bk4/mm/shmem.c linux-2.5.67-bk5/mm/shmem.c --- linux-2.5.67-bk4/mm/shmem.c 2003-04-07 10:31:09.000000000 -0700 +++ linux-2.5.67-bk5/mm/shmem.c 2003-04-13 04:36:11.000000000 -0700 @@ -832,7 +832,6 @@ shmem_swp_unmap(entry); delete_from_swap_cache(swappage); spin_unlock(&info->lock); - flush_page_to_ram(swappage); copy_highpage(filepage, swappage); unlock_page(swappage); page_cache_release(swappage); @@ -953,7 +952,6 @@ return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; mark_page_accessed(page); - flush_page_to_ram(page); return page; } @@ -981,7 +979,6 @@ return err; if (page) { mark_page_accessed(page); - flush_page_to_ram(page); err = install_page(mm, vma, addr, page, prot); if (err) { page_cache_release(page); diff -urN linux-2.5.67-bk4/mm/slab.c linux-2.5.67-bk5/mm/slab.c --- linux-2.5.67-bk4/mm/slab.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/mm/slab.c 2003-04-13 04:36:11.000000000 -0700 @@ -383,11 +383,12 @@ } malloc_sizes[] = { #define CACHE(x) { .cs_size = (x) }, #include + { 0, } #undef CACHE }; /* Must match cache_sizes above. Out of line to keep cache footprint low. */ -static struct { +static struct cache_names { char *name; char *name_dma; } cache_names[] = { @@ -596,7 +597,9 @@ */ void __init kmem_cache_sizes_init(void) { - int i; + struct cache_sizes *sizes = malloc_sizes; + struct cache_names *names = cache_names; + /* * Fragmentation resistance on low memory - only use bigger * page orders on machines with more than 32MB of memory. @@ -604,15 +607,14 @@ if (num_physpages > (32 << 20) >> PAGE_SHIFT) slab_break_gfp_order = BREAK_GFP_ORDER_HI; - for (i = 0; i < ARRAY_SIZE(malloc_sizes); i++) { - struct cache_sizes *sizes = malloc_sizes + i; + while (sizes->cs_size) { /* For performance, all the general caches are L1 aligned. * This should be particularly beneficial on SMP boxes, as it * eliminates "false sharing". * Note for systems short on memory removing the alignment will * allow tighter packing of the smaller caches. */ sizes->cs_cachep = kmem_cache_create( - cache_names[i].name, sizes->cs_size, + names->name, sizes->cs_size, 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!sizes->cs_cachep) BUG(); @@ -624,10 +626,13 @@ } sizes->cs_dmacachep = kmem_cache_create( - cache_names[i].name_dma, sizes->cs_size, + names->name_dma, sizes->cs_size, 0, SLAB_CACHE_DMA|SLAB_HWCACHE_ALIGN, NULL, NULL); if (!sizes->cs_dmacachep) BUG(); + + sizes++; + names++; } /* * The generic caches are running - time to kick out the diff -urN linux-2.5.67-bk4/mm/swapfile.c linux-2.5.67-bk5/mm/swapfile.c --- linux-2.5.67-bk4/mm/swapfile.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/mm/swapfile.c 2003-04-13 04:36:11.000000000 -0700 @@ -641,7 +641,6 @@ shmem = 0; swcount = *swap_map; if (swcount > 1) { - flush_page_to_ram(page); if (start_mm == &init_mm) shmem = shmem_unuse(entry, page); else diff -urN linux-2.5.67-bk4/net/bridge/br_ioctl.c linux-2.5.67-bk5/net/bridge/br_ioctl.c --- linux-2.5.67-bk4/net/bridge/br_ioctl.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/net/bridge/br_ioctl.c 2003-04-13 04:36:11.000000000 -0700 @@ -84,17 +84,20 @@ case BRCTL_GET_PORT_LIST: { - int i; - int indices[256]; + int *indices; + int ret = 0; - for (i=0;i<256;i++) - indices[i] = 0; + indices = kmalloc(256*sizeof(int), GFP_KERNEL); + if (indices == NULL) + return -ENOMEM; + + memset(indices, 0, 256*sizeof(int)); br_get_port_ifindices(br, indices); if (copy_to_user((void *)arg0, indices, 256*sizeof(int))) - return -EFAULT; - - return 0; + ret = -EFAULT; + kfree(indices); + return ret; } case BRCTL_SET_BRIDGE_FORWARD_DELAY: @@ -212,19 +215,24 @@ case BRCTL_GET_BRIDGES: { - int i; - int indices[64]; - - for (i=0;i<64;i++) - indices[i] = 0; + int *indices; + int ret = 0; if (arg1 > 64) arg1 = 64; + + indices = kmalloc(arg1*sizeof(int), GFP_KERNEL); + if (indices == NULL) + return -ENOMEM; + + memset(indices, 0, arg1*sizeof(int)); arg1 = br_get_bridge_ifindices(indices, arg1); - if (copy_to_user((void *)arg0, indices, arg1*sizeof(int))) - return -EFAULT; - return arg1; + ret = copy_to_user((void *)arg0, indices, arg1*sizeof(int)) + ? -EFAULT : arg1; + + kfree(indices); + return ret; } case BRCTL_ADD_BRIDGE: diff -urN linux-2.5.67-bk4/net/bridge/netfilter/ebtable_broute.c linux-2.5.67-bk5/net/bridge/netfilter/ebtable_broute.c --- linux-2.5.67-bk4/net/bridge/netfilter/ebtable_broute.c 2003-04-07 10:30:33.000000000 -0700 +++ linux-2.5.67-bk5/net/bridge/netfilter/ebtable_broute.c 2003-04-13 04:36:11.000000000 -0700 @@ -14,7 +14,6 @@ #include #include #include -#include /* EBT_ACCEPT means the frame will be bridged * EBT_DROP means the frame will be routed @@ -70,18 +69,15 @@ ret = ebt_register_table(&broute_table); if (ret < 0) return ret; - br_write_lock_bh(BR_NETPROTO_LOCK); /* see br_input.c */ br_should_route_hook = ebt_broute; - br_write_unlock_bh(BR_NETPROTO_LOCK); return ret; } static void __exit fini(void) { - br_write_lock_bh(BR_NETPROTO_LOCK); br_should_route_hook = NULL; - br_write_unlock_bh(BR_NETPROTO_LOCK); + synchronize_net(); ebt_unregister_table(&broute_table); } diff -urN linux-2.5.67-bk4/net/ipv4/Makefile linux-2.5.67-bk5/net/ipv4/Makefile --- linux-2.5.67-bk4/net/ipv4/Makefile 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/net/ipv4/Makefile 2003-04-13 04:36:11.000000000 -0700 @@ -22,4 +22,4 @@ obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_NETFILTER) += netfilter/ -obj-y += xfrm4_policy.o xfrm4_state.o xfrm4_input.o +obj-y += xfrm4_policy.o xfrm4_state.o xfrm4_input.o xfrm4_tunnel.o diff -urN linux-2.5.67-bk4/net/ipv4/igmp.c linux-2.5.67-bk5/net/ipv4/igmp.c --- linux-2.5.67-bk4/net/ipv4/igmp.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/net/ipv4/igmp.c 2003-04-13 04:36:11.000000000 -0700 @@ -1161,12 +1161,14 @@ ASSERT_RTNL(); +#ifdef CONFIG_IP_MULTICAST in_dev->mr_ifc_count = 0; if (del_timer(&in_dev->mr_ifc_timer)) atomic_dec(&in_dev->refcnt); in_dev->mr_gq_running = 0; if (del_timer(&in_dev->mr_gq_timer)) atomic_dec(&in_dev->refcnt); +#endif for (i=in_dev->mc_list; i; i=i->next) igmp_group_dropped(i); @@ -1185,7 +1187,6 @@ ASSERT_RTNL(); #ifdef CONFIG_IP_MULTICAST - in_dev->mc_lock = RW_LOCK_UNLOCKED; in_dev->mr_gq_running = 0; init_timer(&in_dev->mr_gq_timer); in_dev->mr_gq_timer.data=(unsigned long) in_dev; @@ -1198,6 +1199,7 @@ in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; #endif + in_dev->mc_lock = RW_LOCK_UNLOCKED; ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); for (i=in_dev->mc_list; i; i=i->next) diff -urN linux-2.5.67-bk4/net/ipv4/ipip.c linux-2.5.67-bk5/net/ipv4/ipip.c --- linux-2.5.67-bk4/net/ipv4/ipip.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/net/ipv4/ipip.c 2003-04-13 04:36:11.000000000 -0700 @@ -115,6 +115,7 @@ #include #include #include +#include #define HASH_SIZE 16 #define HASH(addr) ((addr^(addr>>4))&0xF) @@ -285,7 +286,7 @@ dev_put(dev); } -void ipip_err(struct sk_buff *skb, u32 info) +void ipip_err(struct sk_buff *skb, void *__unused) { #ifndef I_WISH_WORLD_WERE_PERFECT @@ -468,11 +469,13 @@ #endif } -static inline void ipip_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) +static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb) { - if (INET_ECN_is_ce(iph->tos) && - INET_ECN_is_not_ce(skb->nh.iph->tos)) - IP_ECN_set_ce(iph); + struct iphdr *inner_iph = skb->nh.iph; + + if (INET_ECN_is_ce(outer_iph->tos) && + INET_ECN_is_not_ce(inner_iph->tos)) + IP_ECN_set_ce(inner_iph); } int ipip_rcv(struct sk_buff *skb) @@ -511,10 +514,8 @@ } read_unlock(&ipip_lock); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); out: - kfree_skb(skb); - return 0; + return -1; } /* @@ -867,7 +868,7 @@ return 0; } -static struct inet_protocol ipip_protocol = { +static struct xfrm_tunnel ipip_handler = { .handler = ipip_rcv, .err_handler = ipip_err, }; @@ -879,8 +880,8 @@ { printk(banner); - if (inet_add_protocol(&ipip_protocol, IPPROTO_IPIP) < 0) { - printk(KERN_INFO "ipip init: can't add protocol\n"); + if (xfrm4_tunnel_register(&ipip_handler) < 0) { + printk(KERN_INFO "ipip init: can't register tunnel\n"); return -EAGAIN; } @@ -892,8 +893,8 @@ static void __exit ipip_fini(void) { - if (inet_del_protocol(&ipip_protocol, IPPROTO_IPIP) < 0) - printk(KERN_INFO "ipip close: can't remove protocol\n"); + if (xfrm4_tunnel_deregister(&ipip_handler) < 0) + printk(KERN_INFO "ipip close: can't deregister tunnel\n"); unregister_netdev(&ipip_fb_tunnel_dev); } diff -urN linux-2.5.67-bk4/net/ipv4/xfrm4_tunnel.c linux-2.5.67-bk5/net/ipv4/xfrm4_tunnel.c --- linux-2.5.67-bk4/net/ipv4/xfrm4_tunnel.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.5.67-bk5/net/ipv4/xfrm4_tunnel.c 2003-04-13 04:36:11.000000000 -0700 @@ -0,0 +1,225 @@ +/* xfrm4_tunnel.c: Generic IP tunnel transformer. + * + * Copyright (C) 2003 David S. Miller (davem@redhat.com) + */ + +#include +#include +#include +#include +#include + +static int ipip_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct xfrm_state *x = dst->xfrm; + struct iphdr *iph, *top_iph; + int tos; + + iph = skb->nh.iph; + + spin_lock_bh(&x->lock); + + tos = iph->tos; + + top_iph = (struct iphdr *) skb_push(skb, x->props.header_len); + top_iph->ihl = 5; + top_iph->version = 4; + top_iph->tos = INET_ECN_encapsulate(tos, iph->tos); + top_iph->tot_len = htons(skb->len); + top_iph->frag_off = iph->frag_off & ~htons(IP_MF|IP_OFFSET); + if (!(iph->frag_off & htons(IP_DF))) + __ip_select_ident(top_iph, dst, 0); + top_iph->ttl = iph->ttl; + top_iph->protocol = IPPROTO_IPIP; + top_iph->check = 0; + top_iph->saddr = x->props.saddr.a4; + top_iph->daddr = x->id.daddr.a4; + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + ip_send_check(top_iph); + + skb->nh.raw = skb->data; + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock_bh(&x->lock); + + if ((skb->dst = dst_pop(dst)) == NULL) { + kfree_skb(skb); + return -EHOSTUNREACH; + } + return NET_XMIT_BYPASS; +} + +static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb) +{ + struct iphdr *inner_iph = skb->nh.iph; + + if (INET_ECN_is_ce(outer_iph->tos) && + INET_ECN_is_not_ce(inner_iph->tos)) + IP_ECN_set_ce(inner_iph); +} + +static int ipip_xfrm_rcv(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) +{ + struct iphdr *outer_iph = skb->nh.iph; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return -EINVAL; + skb->mac.raw = skb->nh.raw; + skb->nh.raw = skb->data; + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + dst_release(skb->dst); + skb->dst = NULL; + skb->protocol = htons(ETH_P_IP); + skb->pkt_type = PACKET_HOST; + ipip_ecn_decapsulate(outer_iph, skb); + netif_rx(skb); + + return 0; +} + +static struct xfrm_tunnel *ipip_handler; +static DECLARE_MUTEX(xfrm4_tunnel_sem); + +int xfrm4_tunnel_register(struct xfrm_tunnel *handler) +{ + int ret; + + down(&xfrm4_tunnel_sem); + ret = 0; + if (ipip_handler != NULL) + ret = -EINVAL; + if (!ret) + ipip_handler = handler; + up(&xfrm4_tunnel_sem); + + return ret; +} + +int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler) +{ + int ret; + + down(&xfrm4_tunnel_sem); + ret = 0; + if (ipip_handler != handler) + ret = -EINVAL; + if (!ret) + ipip_handler = NULL; + up(&xfrm4_tunnel_sem); + + synchronize_net(); + + return ret; +} + +static int ipip_rcv(struct sk_buff *skb) +{ + struct xfrm_tunnel *handler = ipip_handler; + struct xfrm_state *x = NULL; + int err; + + /* Tunnel devices take precedence. */ + if (handler) { + err = handler->handler(skb); + if (!err) + goto out; + } + + x = xfrm_state_lookup((xfrm_address_t *)&skb->nh.iph->daddr, + skb->nh.iph->saddr, + IPPROTO_IPIP, AF_INET); + + if (x) { + spin_lock(&x->lock); + + if (unlikely(x->km.state != XFRM_STATE_VALID)) + goto drop_unlock; + } + + err = ipip_xfrm_rcv(x, NULL, skb); + if (err) + goto drop_unlock; + + if (x) { + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock(&x->lock); + + xfrm_state_put(x); + } + + return 0; + +drop_unlock: + if (x) { + spin_unlock(&x->lock); + xfrm_state_put(x); + } + kfree_skb(skb); +out: + return 0; +} + +void ipip_err(struct sk_buff *skb, u32 info) +{ + struct xfrm_tunnel *handler = ipip_handler; + u32 arg = info; + + if (handler) + handler->err_handler(skb, &arg); +} + +static int ipip_init_state(struct xfrm_state *x, void *args) +{ + x->props.header_len = sizeof(struct iphdr); + + return 0; +} + +static void ipip_destroy(struct xfrm_state *x) +{ +} + +static struct xfrm_type ipip_type = { + .description = "IPIP", + .proto = IPPROTO_IPIP, + .init_state = ipip_init_state, + .destructor = ipip_destroy, + .input = ipip_xfrm_rcv, + .output = ipip_output +}; + +static struct inet_protocol ipip_protocol = { + .handler = ipip_rcv, + .err_handler = ipip_err, +}; + +static int __init ipip_init(void) +{ + SET_MODULE_OWNER(&ipip_type); + if (xfrm_register_type(&ipip_type, AF_INET) < 0) { + printk(KERN_INFO "ipip init: can't add xfrm type\n"); + return -EAGAIN; + } + if (inet_add_protocol(&ipip_protocol, IPPROTO_IPIP) < 0) { + printk(KERN_INFO "ipip init: can't add protocol\n"); + xfrm_unregister_type(&ipip_type, AF_INET); + return -EAGAIN; + } + return 0; +} + +static void __exit ipip_fini(void) +{ + if (inet_del_protocol(&ipip_protocol, IPPROTO_IPIP) < 0) + printk(KERN_INFO "ipip close: can't remove protocol\n"); + if (xfrm_unregister_type(&ipip_type, AF_INET) < 0) + printk(KERN_INFO "ipip close: can't remove xfrm type\n"); +} + +module_init(ipip_init); +module_exit(ipip_fini); +MODULE_LICENSE("GPL"); diff -urN linux-2.5.67-bk4/net/netsyms.c linux-2.5.67-bk5/net/netsyms.c --- linux-2.5.67-bk4/net/netsyms.c 2003-04-13 04:36:07.000000000 -0700 +++ linux-2.5.67-bk5/net/netsyms.c 2003-04-13 04:36:11.000000000 -0700 @@ -318,6 +318,8 @@ EXPORT_SYMBOL(xfrm_get_acqseq); EXPORT_SYMBOL(xfrm_parse_spi); EXPORT_SYMBOL(xfrm4_rcv); +EXPORT_SYMBOL(xfrm4_tunnel_register); +EXPORT_SYMBOL(xfrm4_tunnel_deregister); EXPORT_SYMBOL(xfrm_register_type); EXPORT_SYMBOL(xfrm_unregister_type); EXPORT_SYMBOL(xfrm_get_type); diff -urN linux-2.5.67-bk4/scripts/kallsyms.c linux-2.5.67-bk5/scripts/kallsyms.c --- linux-2.5.67-bk4/scripts/kallsyms.c 2003-04-07 10:32:58.000000000 -0700 +++ linux-2.5.67-bk5/scripts/kallsyms.c 2003-04-13 04:36:11.000000000 -0700 @@ -21,7 +21,7 @@ static struct sym_entry *table; static int size, cnt; -static unsigned long long _stext, _etext; +static unsigned long long _stext, _etext, _sinittext, _einittext; static void usage(void) @@ -51,10 +51,8 @@ static int symbol_valid(struct sym_entry *s) { - if (s->addr < _stext) - return 0; - - if (s->addr > _etext) + if ((s->addr < _stext || s->addr > _etext) + && (s->addr < _sinittext || s->addr > _einittext)) return 0; if (strstr(s->sym, "_compiled.")) @@ -85,6 +83,10 @@ _stext = table[i].addr; if (strcmp(table[i].sym, "_etext") == 0) _etext = table[i].addr; + if (strcmp(table[i].sym, "_sinittext") == 0) + _sinittext = table[i].addr; + if (strcmp(table[i].sym, "_einittext") == 0) + _einittext = table[i].addr; } }