Brute force merge to 2.5.61. This doesn't boot on anything AFAICT. Something severe went wrong in the merge from 2.5.59 to 2.5.60. Makefile | 2 arch/i386/boot/setup.S | 3 arch/i386/kernel/apic.c | 4 arch/i386/kernel/cpu/amd.c | 2 arch/i386/kernel/cpu/mtrr/amd.c | 4 arch/i386/kernel/cpu/mtrr/generic.c | 16 +- arch/i386/kernel/entry.S | 2 arch/i386/kernel/head.S | 4 arch/i386/kernel/microcode.c | 2 arch/i386/kernel/mpparse.c | 5 arch/i386/kernel/numaq.c | 10 + arch/i386/kernel/setup.c | 18 ++- arch/i386/kernel/smpboot.c | 2 arch/i386/kernel/sys_i386.c | 4 arch/i386/kernel/sysenter.c | 2 arch/i386/kernel/traps.c | 2 arch/i386/lib/getuser.S | 2 arch/i386/mm/discontig.c | 61 ++++++----- arch/i386/mm/fault.c | 178 ++++++++++++++++++++++++++------- arch/i386/mm/highmem.c | 56 +++++++--- arch/i386/mm/init.c | 157 +++++++++++++++++------------ arch/i386/mm/ioremap.c | 30 ++--- arch/i386/mm/pageattr.c | 13 +- arch/i386/mm/pgtable.c | 8 + arch/i386/pci/i386.c | 2 arch/i386/pci/numa.c | 2 drivers/block/ll_rw_blk.c | 2 drivers/char/agp/backend.c | 8 - drivers/char/agp/generic.c | 12 +- drivers/char/mem.c | 42 +++---- drivers/scsi/qlogicisp.c | 2 fs/aio.c | 39 ++++--- fs/binfmt_elf.c | 22 ++-- fs/bio.c | 28 +++-- fs/direct-io.c | 24 +++- fs/exec.c | 74 +++++++------ fs/file_table.c | 2 fs/inode.c | 11 +- fs/proc/base.c | 46 +++++--- fs/proc/task_mmu.c | 2 include/asm-generic/rmap.h | 28 ++++- include/asm-i386/dma-mapping.h | 2 include/asm-i386/fixmap.h | 28 ++++- include/asm-i386/highmem.h | 19 --- include/asm-i386/io.h | 2 include/asm-i386/io_apic.h | 2 include/asm-i386/mmzone.h | 39 +++++-- include/asm-i386/numaq.h | 2 include/asm-i386/page.h | 53 +++++++-- include/asm-i386/pgalloc.h | 2 include/asm-i386/pgtable-2level.h | 11 +- include/asm-i386/pgtable-3level.h | 15 +- include/asm-i386/pgtable.h | 34 +++--- include/asm-i386/rmap.h | 11 +- include/asm-i386/setup.h | 8 - include/asm-i386/shmparam.h | 2 include/asm-i386/thread_info.h | 10 - include/asm-i386/tlbflush.h | 4 include/linux/aio.h | 4 include/linux/highmem.h | 56 ++++++++++ include/linux/ide.h | 4 include/linux/mm.h | 48 ++++++-- include/linux/mmzone.h | 2 include/linux/pagemap.h | 3 include/linux/sched.h | 9 - include/linux/shm.h | 2 include/linux/swap.h | 6 - init/main.c | 6 - ipc/shm.c | 8 - kernel/fork.c | 14 +- kernel/futex.c | 26 +++- kernel/ksyms.c | 1 kernel/ptrace.c | 25 ++-- mm/bootmem.c | 116 +++++++++------------ mm/filemap.c | 44 ++++---- mm/fremap.c | 10 + mm/highmem.c | 55 +++++++--- mm/madvise.c | 8 - mm/memory.c | 176 ++++++++++++++++++++++---------- mm/mincore.c | 32 +++-- mm/mlock.c | 18 +-- mm/mmap.c | 83 +++++++-------- mm/mprotect.c | 12 +- mm/mremap.c | 30 ++--- mm/msync.c | 6 - mm/page-writeback.c | 4 mm/page_alloc.c | 12 +- mm/shmem.c | 75 +++++++------ mm/slab.c | 4 mm/swap.c | 2 mm/swapfile.c | 2 mm/vcache.c | 2 mm/vmalloc.c | 131 +++++++++--------------- net/ipv4/netfilter/ip_conntrack_core.c | 4 net/ipv4/tcp.c | 4 95 files changed, 1360 insertions(+), 861 deletions(-) diff -urpN linux-2.5.61/Makefile pgcl-2.5.61-1/Makefile --- linux-2.5.61/Makefile 2003-02-14 15:51:13.000000000 -0800 +++ pgcl-2.5.61-1/Makefile 2003-02-14 20:44:43.000000000 -0800 @@ -174,7 +174,7 @@ NOSTDINC_FLAGS = -nostdinc -iwithprefix CPPFLAGS := -D__KERNEL__ -Iinclude CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ - -fno-strict-aliasing -fno-common + -g -fno-strict-aliasing -fno-common AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \ diff -urpN linux-2.5.61/arch/i386/boot/setup.S pgcl-2.5.61-1/arch/i386/boot/setup.S --- linux-2.5.61/arch/i386/boot/setup.S 2003-02-14 15:51:47.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/boot/setup.S 2003-02-14 20:44:43.000000000 -0800 @@ -58,6 +58,9 @@ #include #include #include + +#define VMALLOC_START (-0xC0000000 - 128*1024*1024) +#include #include /* Signature words to ensure LILO loaded us right */ diff -urpN linux-2.5.61/arch/i386/kernel/apic.c pgcl-2.5.61-1/arch/i386/kernel/apic.c --- linux-2.5.61/arch/i386/kernel/apic.c 2003-02-14 15:53:02.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/apic.c 2003-02-14 20:44:43.000000000 -0800 @@ -678,7 +678,7 @@ void __init init_apic_mappings(void) * one for the IO-APIC. */ if (!smp_found_config && detect_init_APIC()) { - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); + apic_phys = (unsigned long) alloc_bootmem_pages(MMUPAGE_SIZE); apic_phys = __pa(apic_phys); } else apic_phys = mp_lapic_addr; @@ -710,7 +710,7 @@ void __init init_apic_mappings(void) } } else { fake_ioapic_page: - ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = (unsigned long) alloc_bootmem_pages(MMUPAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); diff -urpN linux-2.5.61/arch/i386/kernel/cpu/amd.c pgcl-2.5.61-1/arch/i386/kernel/cpu/amd.c --- linux-2.5.61/arch/i386/kernel/cpu/amd.c 2003-02-14 15:52:40.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/cpu/amd.c 2003-02-14 20:44:43.000000000 -0800 @@ -25,7 +25,7 @@ __asm__(".align 4\nvide: ret"); static void __init init_amd(struct cpuinfo_x86 *c) { u32 l, h; - int mbytes = num_physpages >> (20-PAGE_SHIFT); + int mbytes = num_physpages >> (20-MMUPAGE_SHIFT); int r; /* diff -urpN linux-2.5.61/arch/i386/kernel/cpu/mtrr/amd.c pgcl-2.5.61-1/arch/i386/kernel/cpu/mtrr/amd.c --- linux-2.5.61/arch/i386/kernel/cpu/mtrr/amd.c 2003-02-14 15:51:46.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/cpu/mtrr/amd.c 2003-02-14 20:44:43.000000000 -0800 @@ -42,7 +42,7 @@ amd_get_mtrr(unsigned int reg, unsigned * *128K ... */ low = (~low) & 0x1FFFC; - *size = (low + 4) << (15 - PAGE_SHIFT); + *size = (low + 4) << (15 - MMUPAGE_SHIFT); return; } @@ -77,7 +77,7 @@ static void amd_set_mtrr(unsigned int re desired 111 1111 1111 1100 mask But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ - regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) + regs[reg] = (-size >> (15 - MMUPAGE_SHIFT) & 0x0001FFFC) | (base << PAGE_SHIFT) | (type + 1); /* diff -urpN linux-2.5.61/arch/i386/kernel/cpu/mtrr/generic.c pgcl-2.5.61-1/arch/i386/kernel/cpu/mtrr/generic.c --- linux-2.5.61/arch/i386/kernel/cpu/mtrr/generic.c 2003-02-14 15:51:49.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/cpu/mtrr/generic.c 2003-02-14 20:44:43.000000000 -0800 @@ -133,13 +133,13 @@ void generic_get_mtrr(unsigned int reg, rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); /* Work out the shifted address mask. */ - mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) - | mask_lo >> PAGE_SHIFT; + mask_lo = size_or_mask | mask_hi << (32 - MMUPAGE_SHIFT) + | mask_lo >> MMUPAGE_SHIFT; /* This works correctly if size is a power of two, i.e. a contiguous range. */ *size = -mask_lo; - *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; + *base = base_hi << (32 - MMUPAGE_SHIFT) | base_lo >> MMUPAGE_SHIFT; *type = base_lo & 0xff; } @@ -319,10 +319,10 @@ static void generic_set_mtrr(unsigned in relevant mask register to disable a range. */ wrmsr(MTRRphysMask_MSR(reg), 0, 0); } else { - wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type, - (base & size_and_mask) >> (32 - PAGE_SHIFT)); - wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800, - (-size & size_and_mask) >> (32 - PAGE_SHIFT)); + wrmsr(MTRRphysBase_MSR(reg), base << MMUPAGE_SHIFT | type, + (base & size_and_mask) >> (32 - MMUPAGE_SHIFT)); + wrmsr(MTRRphysMask_MSR(reg), -size << MMUPAGE_SHIFT | 0x800, + (-size & size_and_mask) >> (32 - MMUPAGE_SHIFT)); } post_set(); @@ -337,7 +337,7 @@ int generic_validate_add_page(unsigned l if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && boot_cpu_data.x86_mask <= 7) { - if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { + if (base & ((1 << (22 - MMUPAGE_SHIFT)) - 1)) { printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); diff -urpN linux-2.5.61/arch/i386/kernel/entry.S pgcl-2.5.61-1/arch/i386/kernel/entry.S --- linux-2.5.61/arch/i386/kernel/entry.S 2003-02-14 15:51:23.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/entry.S 2003-02-14 20:44:43.000000000 -0800 @@ -155,7 +155,7 @@ do_lcall: movl %eax,EFLAGS(%ebp) # movl %edx,EIP(%ebp) # Now we move them to their "normal" places movl %ecx,CS(%ebp) # - andl $-8192, %ebp # GET_THREAD_INFO + andl $~(THREAD_SIZE-1), %ebp # GET_THREAD_INFO movl TI_EXEC_DOMAIN(%ebp), %edx # Get the execution domain call *4(%edx) # Call the lcall7 handler for the domain addl $4, %esp diff -urpN linux-2.5.61/arch/i386/kernel/head.S pgcl-2.5.61-1/arch/i386/kernel/head.S --- linux-2.5.61/arch/i386/kernel/head.S 2003-02-14 15:51:12.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/head.S 2003-02-14 20:44:43.000000000 -0800 @@ -16,6 +16,7 @@ #include #include #include +#include #define OLD_CL_MAGIC_ADDR 0x90020 #define OLD_CL_MAGIC 0xA33F @@ -309,13 +310,12 @@ rp_sidt: ret ENTRY(stack_start) - .long init_thread_union+8192 + .long init_thread_union+THREAD_SIZE .long __BOOT_DS /* This is the default interrupt "handler" :-) */ int_msg: .asciz "Unknown interrupt\n" - ALIGN ignore_int: cld pushl %eax diff -urpN linux-2.5.61/arch/i386/kernel/microcode.c pgcl-2.5.61-1/arch/i386/kernel/microcode.c --- linux-2.5.61/arch/i386/kernel/microcode.c 2003-02-14 15:51:21.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/microcode.c 2003-02-14 20:44:43.000000000 -0800 @@ -338,7 +338,7 @@ static ssize_t microcode_write(struct fi sizeof(struct microcode)); return -EINVAL; } - if ((len >> PAGE_SHIFT) > num_physpages) { + if ((len >> MMUPAGE_SHIFT) > num_physpages) { printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages); return -EINVAL; } diff -urpN linux-2.5.61/arch/i386/kernel/mpparse.c pgcl-2.5.61-1/arch/i386/kernel/mpparse.c --- linux-2.5.61/arch/i386/kernel/mpparse.c 2003-02-14 15:51:26.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/mpparse.c 2003-02-14 20:44:43.000000000 -0800 @@ -443,6 +443,7 @@ static int __init smp_read_mpc(struct mp } ++mpc_record; } + nr_ioapics = min(2, nr_ioapics); clustered_apic_check(); if (!num_processors) printk(KERN_ERR "SMP mptable: no processors registered!\n"); @@ -704,9 +705,9 @@ static int __init smp_scan_config (unsig smp_found_config = 1; printk("found SMP MP-table at %08lx\n", virt_to_phys(mpf)); - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); + reserve_bootmem(virt_to_phys(mpf), MMUPAGE_SIZE); if (mpf->mpf_physptr) - reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE); + reserve_bootmem(mpf->mpf_physptr, MMUPAGE_SIZE); mpf_found = mpf; return 1; } diff -urpN linux-2.5.61/arch/i386/kernel/numaq.c pgcl-2.5.61-1/arch/i386/kernel/numaq.c --- linux-2.5.61/arch/i386/kernel/numaq.c 2003-02-14 15:52:09.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/numaq.c 2003-02-14 20:44:43.000000000 -0800 @@ -33,7 +33,7 @@ unsigned long node_start_pfn[MAX_NUMNODES]; unsigned long node_end_pfn[MAX_NUMNODES]; -#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) +#define MB_TO_PAGES(addr) ((addr) << (20 - MMUPAGE_SHIFT)) /* * Function: smp_dump_qct() @@ -83,15 +83,17 @@ static void __init smp_dump_qct(void) */ int physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; -#define PFN_TO_ELEMENT(pfn) (pfn / PAGES_PER_ELEMENT) -#define PA_TO_ELEMENT(pa) (PFN_TO_ELEMENT(pa >> PAGE_SHIFT)) +#define PFN_TO_ELEMENT(pfn) ((pfn) / PAGES_PER_ELEMENT) +#define PA_TO_ELEMENT(pa) (PFN_TO_ELEMENT((pa) >> MMUPAGE_SHIFT)) int pfn_to_nid(unsigned long pfn) { int nid = physnode_map[PFN_TO_ELEMENT(pfn)]; - if (nid == -1) + if (nid == -1) { + printk("pfn %lx not on any node\n", pfn); BUG(); /* address is not present */ + } return nid; } diff -urpN linux-2.5.61/arch/i386/kernel/setup.c pgcl-2.5.61-1/arch/i386/kernel/setup.c --- linux-2.5.61/arch/i386/kernel/setup.c 2003-02-14 15:51:44.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/setup.c 2003-02-14 20:44:43.000000000 -0800 @@ -599,6 +599,8 @@ void __init find_max_pfn(void) continue; if (end > max_pfn) max_pfn = end; + + max_pfn &= ~(PAGE_MMUCOUNT - 1); } } @@ -609,6 +611,8 @@ unsigned long __init find_max_low_pfn(vo { unsigned long max_low_pfn; + printk("MAXMEM = %p\n", (void *)MAXMEM); + max_low_pfn = max_pfn; if (max_low_pfn > MAXMEM_PFN) { if (highmem_pages == -1) @@ -722,10 +726,10 @@ static unsigned long __init setup_memory highstart_pfn = max_low_pfn; } printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); + (highend_pfn - highstart_pfn) >> (20 - MMUPAGE_SHIFT)); #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", - pages_to_mb(max_low_pfn)); + max_low_pfn >> (20 - MMUPAGE_SHIFT)); /* * Initialize the boot-time allocator (with low memory only): */ @@ -746,7 +750,7 @@ static unsigned long __init setup_memory * reserve physical page 0 - it's a special BIOS page on many boxes, * enabling clean reboots, SMP operation, laptop functions. */ - reserve_bootmem(0, PAGE_SIZE); + reserve_bootmem(0, MMUPAGE_SIZE); #ifdef CONFIG_SMP /* @@ -754,7 +758,7 @@ static unsigned long __init setup_memory * FIXME: Don't need the extra page at 4K, but need to fix * trampoline before removing it. (see the GDT stuff) */ - reserve_bootmem(PAGE_SIZE, PAGE_SIZE); + reserve_bootmem(MMUPAGE_SIZE, MMUPAGE_SIZE); #endif #ifdef CONFIG_ACPI_SLEEP /* @@ -771,7 +775,7 @@ static unsigned long __init setup_memory #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE && INITRD_START) { - if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { + if (INITRD_START + INITRD_SIZE <= PFN_PHYS(max_low_pfn)) { reserve_bootmem(INITRD_START, INITRD_SIZE); initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0; @@ -781,7 +785,7 @@ static unsigned long __init setup_memory printk(KERN_ERR "initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", INITRD_START + INITRD_SIZE, - max_low_pfn << PAGE_SHIFT); + PFN_PHYS(max_low_pfn)); initrd_start = 0; } } @@ -834,7 +838,7 @@ static void __init register_memory(unsig request_resource(&ioport_resource, standard_io_resources+i); /* Tell the PCI layer not to allocate too close to the RAM area.. */ - low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff; + low_mem_size = ((max_low_pfn << MMUPAGE_SHIFT) + 0xfffff) & ~0xfffff; if (low_mem_size > pci_mem_start) pci_mem_start = low_mem_size; } diff -urpN linux-2.5.61/arch/i386/kernel/smpboot.c pgcl-2.5.61-1/arch/i386/kernel/smpboot.c --- linux-2.5.61/arch/i386/kernel/smpboot.c 2003-02-14 15:51:44.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/smpboot.c 2003-02-14 20:44:43.000000000 -0800 @@ -100,7 +100,7 @@ static unsigned long __init setup_trampo */ void __init smp_alloc_memory(void) { - trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE); + trampoline_base = (void *) alloc_bootmem_low_pages(MMUPAGE_SIZE); /* * Has to be in very low memory so we can execute * real-mode AP code. diff -urpN linux-2.5.61/arch/i386/kernel/sys_i386.c pgcl-2.5.61-1/arch/i386/kernel/sys_i386.c --- linux-2.5.61/arch/i386/kernel/sys_i386.c 2003-02-14 15:52:44.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/sys_i386.c 2003-02-14 20:44:43.000000000 -0800 @@ -97,10 +97,10 @@ asmlinkage int old_mmap(struct mmap_arg_ goto out; err = -EINVAL; - if (a.offset & ~PAGE_MASK) + if (a.offset & ~MMUPAGE_MASK) goto out; - err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> MMUPAGE_SHIFT); out: return err; } diff -urpN linux-2.5.61/arch/i386/kernel/sysenter.c pgcl-2.5.61-1/arch/i386/kernel/sysenter.c --- linux-2.5.61/arch/i386/kernel/sysenter.c 2003-02-14 15:53:01.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/sysenter.c 2003-02-14 20:44:43.000000000 -0800 @@ -33,7 +33,7 @@ struct fake_sep_struct { struct task_struct task; unsigned char trampoline[32] __attribute__((aligned(1024))); unsigned char stack[0]; -} __attribute__((aligned(8192))); +} __attribute__((aligned(THREAD_SIZE))); void enable_sep_cpu(void *info) { diff -urpN linux-2.5.61/arch/i386/kernel/traps.c pgcl-2.5.61-1/arch/i386/kernel/traps.c --- linux-2.5.61/arch/i386/kernel/traps.c 2003-02-14 15:51:19.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/kernel/traps.c 2003-02-14 20:44:43.000000000 -0800 @@ -115,7 +115,7 @@ void show_trace_task(struct task_struct unsigned long esp = tsk->thread.esp; /* User space on another CPU? */ - if ((esp ^ (unsigned long)tsk->thread_info) & (PAGE_MASK<<1)) + if ((esp ^ (unsigned long)tsk->thread_info) & ~(THREAD_SIZE-1)) return; show_trace((unsigned long *)esp); } diff -urpN linux-2.5.61/arch/i386/lib/getuser.S pgcl-2.5.61-1/arch/i386/lib/getuser.S --- linux-2.5.61/arch/i386/lib/getuser.S 2003-02-14 15:51:06.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/lib/getuser.S 2003-02-14 20:44:43.000000000 -0800 @@ -8,9 +8,9 @@ * return an error value in addition to the "real" * return value. */ +#include #include - /* * __get_user_X * diff -urpN linux-2.5.61/arch/i386/mm/discontig.c pgcl-2.5.61-1/arch/i386/mm/discontig.c --- linux-2.5.61/arch/i386/mm/discontig.c 2003-02-14 15:51:47.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/mm/discontig.c 2003-02-14 20:44:43.000000000 -0800 @@ -68,7 +68,7 @@ static void __init allocate_pgdat(int ni unsigned long node_datasz; node_datasz = PFN_UP(sizeof(struct pglist_data)); - NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT)); + NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << MMUPAGE_SHIFT)); min_low_pfn += node_datasz; memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); } @@ -113,8 +113,6 @@ static void __init register_bootmem_low_ } } -#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) - unsigned long node_remap_start_pfn[MAX_NUMNODES]; unsigned long node_remap_size[MAX_NUMNODES]; unsigned long node_remap_offset[MAX_NUMNODES]; @@ -128,8 +126,8 @@ void __init remap_numa_kva(void) int node; for (node = 1; node < numnodes; ++node) { - for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { - vaddr = node_remap_start_vaddr[node]+(pfn< system_max_low_pfn) highstart_pfn = system_max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); + (highend_pfn - highstart_pfn) >> (20 - MMUPAGE_SHIFT)); #endif system_max_low_pfn = max_low_pfn = max_low_pfn - reserve_pages; printk(KERN_NOTICE "%ldMB LOWMEM available.\n", - pages_to_mb(system_max_low_pfn)); + system_max_low_pfn >> (20 - MMUPAGE_SHIFT)); printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", min_low_pfn, max_low_pfn, highstart_pfn); @@ -207,6 +205,11 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(highstart_pfn)); for (nid = 0; nid < numnodes; nid++) find_max_pfn_node(nid); + printk("vmallocspace = [0x%lx, 0x%lx)\n", + VMALLOC_START, VMALLOC_END); + printk("fixmapspace = [0x%lx, 0x%lx)\n", + FIXADDR_START, FIXADDR_TOP); + printk("MAXMEM = 0x%lx\n", MAXMEM); NODE_DATA(0)->bdata = &node0_bdata; @@ -223,21 +226,21 @@ unsigned long __init setup_memory(void) * the (very unlikely) case of us accidentally initializing the * bootmem allocator with an invalid RAM area. */ - reserve_bootmem_node(NODE_DATA(0), HIGH_MEMORY, (PFN_PHYS(min_low_pfn) + - bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY)); + reserve_bootmem_node(NODE_DATA(0), HIGH_MEMORY, PFN_PHYS(min_low_pfn) + + bootmap_size - HIGH_MEMORY); /* * reserve physical page 0 - it's a special BIOS page on many boxes, * enabling clean reboots, SMP operation, laptop functions. */ - reserve_bootmem_node(NODE_DATA(0), 0, PAGE_SIZE); + reserve_bootmem_node(NODE_DATA(0), 0, MMUPAGE_SIZE); /* * But first pinch a few for the stack/trampoline stuff * FIXME: Don't need the extra page at 4K, but need to fix * trampoline before removing it. (see the GDT stuff) */ - reserve_bootmem_node(NODE_DATA(0), PAGE_SIZE, PAGE_SIZE); + reserve_bootmem_node(NODE_DATA(0), MMUPAGE_SIZE, MMUPAGE_SIZE); #ifdef CONFIG_ACPI_SLEEP /* @@ -260,7 +263,7 @@ unsigned long __init setup_memory(void) #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE && INITRD_START) { - if (INITRD_START + INITRD_SIZE <= (system_max_low_pfn << PAGE_SHIFT)) { + if (INITRD_START + INITRD_SIZE <= (system_max_low_pfn << MMUPAGE_SHIFT)) { reserve_bootmem_node(NODE_DATA(0), INITRD_START, INITRD_SIZE); initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0; @@ -270,7 +273,7 @@ unsigned long __init setup_memory(void) printk(KERN_ERR "initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", INITRD_START + INITRD_SIZE, - system_max_low_pfn << PAGE_SHIFT); + system_max_low_pfn << MMUPAGE_SHIFT); initrd_start = 0; } } @@ -290,20 +293,20 @@ void __init zone_sizes_init(void) unsigned long start = node_start_pfn[nid]; unsigned long high = node_end_pfn[nid]; - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> MMUPAGE_SHIFT; if (start > low) { #ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - start; + zones_size[ZONE_HIGHMEM] = (high - start) >> PAGE_MMUSHIFT; #endif } else { if (low < max_dma) - zones_size[ZONE_DMA] = low; + zones_size[ZONE_DMA] = low >> PAGE_MMUSHIFT; else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; + zones_size[ZONE_DMA] = max_dma >> PAGE_MMUSHIFT; + zones_size[ZONE_NORMAL] = (low - max_dma) >> PAGE_MMUSHIFT; #ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - low; + zones_size[ZONE_HIGHMEM] = (high - low) >> PAGE_MMUSHIFT; #endif } } @@ -337,10 +340,14 @@ void __init set_highmem_pages_init(int b zone_start_pfn = NODE_DATA(nid)->node_zones[ZONE_HIGHMEM].zone_start_pfn; printk("Initializing highpages for node %d\n", nid); - for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) { - one_highpage_init((struct page *)(zone_mem_map + node_pfn), - zone_start_pfn + node_pfn, bad_ppro); - } + + /* + * Note: zone->spanned_pages is in PAGE_SIZE units. + */ + for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) + one_highpage_init(&zone_mem_map[node_pfn], + zone_start_pfn + node_pfn*PAGE_MMUCOUNT, + bad_ppro); } totalram_pages += totalhigh_pages; #endif diff -urpN linux-2.5.61/arch/i386/mm/fault.c pgcl-2.5.61-1/arch/i386/mm/fault.c --- linux-2.5.61/arch/i386/mm/fault.c 2003-02-14 15:51:05.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/mm/fault.c 2003-02-14 20:44:43.000000000 -0800 @@ -20,6 +20,8 @@ #include #include /* For unblank_screen() */ #include +#include /* for max_low_pfn */ +#include #include #include @@ -53,9 +55,9 @@ good_area: if (!(vma->vm_flags & VM_WRITE)) goto bad_area; size--; - size += start & ~PAGE_MASK; - size >>= PAGE_SHIFT; - start &= PAGE_MASK; + size += start & ~MMUPAGE_MASK; + size >>= MMUPAGE_SHIFT; + start &= MMUPAGE_MASK; for (;;) { survive: @@ -73,7 +75,7 @@ good_area: if (!size) break; size--; - start += PAGE_SIZE; + start += MMUPAGE_SIZE; if (start < vma->vm_end) continue; vma = vma->vm_next; @@ -154,19 +156,22 @@ asmlinkage void do_page_fault(struct pt_ struct mm_struct *mm; struct vm_area_struct * vma; unsigned long address; - unsigned long page; int write; siginfo_t info; /* get the address */ __asm__("movl %%cr2,%0":"=r" (address)); + pr_debug("faulted on %lx,", address); + /* It's safe to allow irq's after cr2 has been saved */ if (regs->eflags & X86_EFLAGS_IF) local_irq_enable(); tsk = current; + pr_debug(" pid = %d\n", current->pid); + /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. @@ -185,7 +190,20 @@ asmlinkage void do_page_fault(struct pt_ mm = tsk->mm; info.si_code = SEGV_MAPERR; - + if (1) { + pgd_t *pgd = pgd_offset(mm, address); + pmd_t *pmd = pmd_offset(pgd, address); + pr_debug("fault handled by PGD at vaddr %p, %Lx\n", + pgd, pgd_val(*pgd)); + pr_debug("fault handled by PMD at vaddr %p, %Lx\n", + pmd, pmd_val(*pmd)); + if (pmd_present(*pmd)) { + pr_debug("fault will be handled by PTE at paddr %Lx\n", + (pmd_val(*pmd) & MMUPAGE_MASK) + +__pte_offset(address)*sizeof(pte_t)); + } else + pr_debug("pmd not present\n"); + } /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault.. @@ -196,12 +214,16 @@ asmlinkage void do_page_fault(struct pt_ down_read(&mm->mmap_sem); vma = find_vma(mm, address); - if (!vma) + if (!vma) { + pr_debug("no vma, goto bad_area\n"); goto bad_area; + } if (vma->vm_start <= address) goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) + if (!(vma->vm_flags & VM_GROWSDOWN)) { + pr_debug("VM_GROWSDOWN not in vma->vm_flags, goto bad_area\n"); goto bad_area; + } if (error_code & 4) { /* * accessing the stack below %esp is always a bug. @@ -209,11 +231,15 @@ asmlinkage void do_page_fault(struct pt_ * pusha) doing post-decrement on the stack and that * doesn't show up until later.. */ - if (address + 32 < regs->esp) + if (address + 32 < regs->esp) { + pr_debug("postdecrement on stack, goto bad_area\n"); goto bad_area; + } } - if (expand_stack(vma, address)) + if (expand_stack(vma, address)) { + pr_debug("expand_stack() failed, goto bad_area\n"); goto bad_area; + } /* * Ok, we have a good vm_area for this memory access, so * we can handle it.. @@ -225,19 +251,24 @@ good_area: default: /* 3: write, present */ #ifdef TEST_VERIFY_AREA if (regs->cs == KERNEL_CS) - printk("WP fault at %08lx\n", regs->eip); + pr_debug("WP fault at %08lx\n", regs->eip); #endif /* fall through */ case 2: /* write, not present */ - if (!(vma->vm_flags & VM_WRITE)) + if (!(vma->vm_flags & VM_WRITE)) { + pr_debug("vma not writable, goto bad_area\n"); goto bad_area; + } write++; break; case 1: /* read, present */ + pr_debug("NFI what happened, goto bad_area\n"); goto bad_area; case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) { + pr_debug("vma not read/exec, goto bad_area\n"); goto bad_area; + } } survive: @@ -265,7 +296,7 @@ good_area: * Did it hit the DOS screen memory VA from vm86 mode? */ if (regs->eflags & VM_MASK) { - unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT; + unsigned long bit = (address - 0xA0000) >> MMUPAGE_SHIFT; if (bit < 32) tsk->thread.screen_bitmap |= 1 << bit; } @@ -281,6 +312,44 @@ bad_area: /* User mode accesses just cause a SIGSEGV */ if (error_code & 4) { + printk("user mode SIGSEGV, pid = %d, comm = %16s, EIP = %p, ESP = %p, CR2 = %p\n", + current->pid, current->comm, (void *)regs->eip, (void *)regs->esp, (void *)address); + spin_lock(&mm->page_table_lock); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long addr; + + printk("vma = [%lx,%lx) prot=%lx flags=%lx\n", + vma->vm_start, vma->vm_end, + vma->vm_page_prot.pgprot, vma->vm_flags); + + for (addr = vma->vm_start; addr < vma->vm_end; addr += MMUPAGE_SIZE) { + pgd_t *pgd = pgd_offset(mm, addr); + pmd_t *pmd; + pte_t *pte; + struct page *page; + void *mem; + + if (pgd_none(*pgd) || pgd_bad(*pgd)) + continue; + + pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + continue; + + pte = pte_offset_map(pmd, addr); + if (pte_none(*pte) || !pte_present(*pte)) { + pte_unmap(pte); + continue; + } + page = pte_page(*pte); + mem = kmap_atomic(page, KM_USER0); + if (!memcmp(mem, page_address(ZERO_PAGE(0)), PAGE_SIZE)) + printk("page at 0x%lx zero!\n", addr); + kunmap_atomic(mem, KM_USER0); + pte_unmap(pte); + } + } + spin_unlock(&mm->page_table_lock); tsk->thread.cr2 = address; tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; @@ -288,6 +357,13 @@ bad_area: info.si_errno = 0; /* info.si_code has been set above */ info.si_addr = (void *)address; +#if 0 + if (current->pid >= 1024) { + while (1) { + schedule_timeout(HZ); + } + } +#endif force_sig_info(SIGSEGV, &info, tsk); return; } @@ -320,30 +396,53 @@ no_context: bust_spinlocks(1); - if (address < PAGE_SIZE) + if (address < MMUPAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); else printk(KERN_ALERT "Unable to handle kernel paging request"); - printk(" at virtual address %08lx\n",address); + printk(" at virtual address %08lx\n", address); printk(" printing eip:\n"); printk("%08lx\n", regs->eip); - asm("movl %%cr3,%0":"=r" (page)); - page = ((unsigned long *) __va(page))[address >> 22]; - printk(KERN_ALERT "*pde = %08lx\n", page); - /* - * We must not directly access the pte in the highpte - * case, the page table might be allocated in highmem. - * And lets rather not kmap-atomic the pte, just in case - * it's allocated already. - */ -#ifndef CONFIG_HIGHPTE - if (page & 1) { - page &= PAGE_MASK; - address &= 0x003ff000; - page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; - printk(KERN_ALERT "*pte = %08lx\n", page); + { + unsigned long cr3; + pgd_t *pgd; + pmd_t *pmd; + char *fmt; + + asm("movl %%cr3,%0":"=r" (cr3)); + cr3 &= ~0x1f; /* lower 5 bits of %cr3 are flags */ + /* pgd's in lowmem, but only need to be < 4G (32-bit %cr3) */ + pgd = (pgd_t *)__va(cr3); + fmt = PTRS_PER_PMD > 1 ? KERN_ALERT "*pdpte = %Lx\n" : NULL; + if (PTRS_PER_PMD > 1) + printk(fmt, pgd_val(*pgd)); + + /* pmd's in lowmem, but can be anywhere (64-bit PDPTE) */ + pmd = pmd_offset(pgd, address); + if (PTRS_PER_PMD > 1) + fmt = KERN_ALERT "*pde = %Lx\n"; + else + fmt = KERN_ALERT "*pde = %08lx\n"; + printk(fmt, pmd_val(*pmd)); + + /* + * this is getting at what are potentially user + * PTE's with pte_offset_kernel(); it's mostly + * unsafe to try editing kernel PTE's at this + * point for kmap_atomic() so just drop out of it + * if pmd_val(*pmd)/MMUPAGE_SIZE > max_low_pfn + */ + + if (pmd_present(*pmd) && !pmd_large(*pmd) + && pmd_val(*pmd)/MMUPAGE_SIZE <= max_low_pfn) { + pte_t *pte = pte_offset_kernel(pmd, address); + if (PTRS_PER_PMD > 1) + fmt = KERN_ALERT "*pte = %Lx\n"; + else + fmt = KERN_ALERT "*pte = %08lx\n"; + printk(fmt, pte_val(*pte)); + } } -#endif die("Oops", regs, error_code); bust_spinlocks(0); do_exit(SIGKILL); @@ -371,6 +470,7 @@ do_sigbus: * Send a sigbus, regardless of whether we were in kernel * or user mode. */ + pr_debug("sending SIGBUS\n"); tsk->thread.cr2 = address; tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; @@ -399,23 +499,31 @@ vmalloc_fault: pmd_t *pmd, *pmd_k; pte_t *pte_k; + printk("took vmalloc_fault on address %lx\n", address); + asm("movl %%cr3,%0":"=r" (pgd)); pgd = offset + (pgd_t *)__va(pgd); pgd_k = init_mm.pgd + offset; - if (!pgd_present(*pgd_k)) + if (!pgd_present(*pgd_k)) { + printk("missing pgd in vmalloc_fault()!\n"); goto no_context; + } set_pgd(pgd, *pgd_k); pmd = pmd_offset(pgd, address); pmd_k = pmd_offset(pgd_k, address); - if (!pmd_present(*pmd_k)) + if (!pmd_present(*pmd_k)) { + printk("missing pmd in vmalloc_fault()!\n"); goto no_context; + } set_pmd(pmd, *pmd_k); pte_k = pte_offset_kernel(pmd_k, address); - if (!pte_present(*pte_k)) + if (!pte_present(*pte_k)) { + printk("missing pte in vmalloc_fault()!\n"); goto no_context; + } return; } } diff -urpN linux-2.5.61/arch/i386/mm/highmem.c pgcl-2.5.61-1/arch/i386/mm/highmem.c --- linux-2.5.61/arch/i386/mm/highmem.c 2003-02-14 15:51:46.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/mm/highmem.c 2003-02-14 20:44:43.000000000 -0800 @@ -29,20 +29,27 @@ void kunmap(struct page *page) void *kmap_atomic(struct page *page, enum km_type type) { enum fixed_addresses idx; - unsigned long vaddr; + unsigned long vaddr, pfn; + int k; inc_preempt_count(); if (page < highmem_start_page) return page_address(page); idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + vaddr = __fix_to_virt(FIX_KMAP_END) + idx*PAGE_SIZE; + pfn = page_to_pfn(page); + for (k = 0; k < PAGE_MMUCOUNT; ++k) { + unsigned long addr = vaddr + k*MMUPAGE_SIZE; + pgd_t *pgd = pgd_offset_k(addr); + pmd_t *pmd = pmd_offset(pgd, addr); + pte_t *pte = pte_offset_kernel(pmd, addr); #if CONFIG_DEBUG_HIGHMEM - if (!pte_none(*(kmap_pte-idx))) - BUG(); + BUG_ON(!pte_none(*pte)); #endif - set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); - __flush_tlb_one(vaddr); + set_pte(pte, pfn_pte(pfn + k, kmap_prot)); + __flush_tlb_one(addr); + } return (void*) vaddr; } @@ -50,23 +57,36 @@ void *kmap_atomic(struct page *page, enu void kunmap_atomic(void *kvaddr, enum km_type type) { #if CONFIG_DEBUG_HIGHMEM - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + unsigned long vaddr = (unsigned long) kvaddr & MMUPAGE_MASK; + /* enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); */ + enum fixed_addresses idx = (vaddr - __fix_to_virt(FIX_KMAP_END))/PAGE_SIZE; + unsigned long lower_bound = __fix_to_virt(FIX_KMAP_END) + idx*PAGE_SIZE; + unsigned long upper_bound = lower_bound + PAGE_SIZE; + int k; if (vaddr < FIXADDR_START) { // FIXME dec_preempt_count(); return; } - if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) + if (vaddr < lower_bound || vaddr > upper_bound) { + printk("vaddr %lx outside [%lx,%lx)\n", vaddr, lower_bound, upper_bound); BUG(); + } /* * force other mappings to Oops if they'll try to access * this pte without first remap it */ - pte_clear(kmap_pte-idx); - __flush_tlb_one(vaddr); + vaddr = __fix_to_virt(FIX_KMAP_END) + idx*PAGE_SIZE; + for (k = 0; k < PAGE_MMUCOUNT; ++k) { + unsigned long addr = vaddr + k*MMUPAGE_SIZE; + pgd_t *pgd = pgd_offset_k(addr); + pmd_t *pmd = pmd_offset(pgd, addr); + pte_t *pte = pte_offset_kernel(pmd, addr); + pte_clear(pte); + __flush_tlb_one(addr); + } #endif dec_preempt_count(); @@ -74,14 +94,22 @@ void kunmap_atomic(void *kvaddr, enum km struct page *kmap_atomic_to_page(void *ptr) { - unsigned long idx, vaddr = (unsigned long)ptr; + unsigned long vaddr = (unsigned long)ptr; + pgd_t *pgd; + pmd_t *pmd; pte_t *pte; if (vaddr < FIXADDR_START) return virt_to_page(ptr); - idx = virt_to_fix(vaddr); - pte = kmap_pte - (idx - FIX_KMAP_BEGIN); + pgd = pgd_offset_k(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + + /* + * unsigned long idx = virt_to_fix(vaddr); + * pte = &kmap_pte[idx*PAGE_MMUCOUNT]; + */ return pte_page(*pte); } diff -urpN linux-2.5.61/arch/i386/mm/init.c pgcl-2.5.61-1/arch/i386/mm/init.c --- linux-2.5.61/arch/i386/mm/init.c 2003-02-14 15:52:25.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/mm/init.c 2003-02-14 20:55:01.000000000 -0800 @@ -43,6 +43,7 @@ struct mmu_gather mmu_gathers[NR_CPUS]; unsigned long highstart_pfn, highend_pfn; +struct page *zero_page; /* * Creates a middle page table and puts a pointer to it in the @@ -54,7 +55,7 @@ static pmd_t * __init one_md_table_init( pmd_t *pmd_table; #if CONFIG_X86_PAE - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + pmd_table = (pmd_t *) alloc_bootmem_low_pages(MMUPAGE_SIZE); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); if (pmd_table != pmd_offset(pgd, 0)) BUG(); @@ -72,7 +73,7 @@ static pmd_t * __init one_md_table_init( static pte_t * __init one_page_table_init(pmd_t *pmd) { if (pmd_none(*pmd)) { - pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(MMUPAGE_SIZE); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); if (page_table != pte_offset_kernel(pmd, 0)) BUG(); @@ -109,7 +110,14 @@ static void __init page_table_range_init for ( ; (pgd_ofs < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_ofs++) { if (pgd_none(*pgd)) one_md_table_init(pgd); + } + vaddr = start; + pgd_ofs = __pgd_offset(vaddr); + pmd_ofs = __pmd_offset(vaddr); + pgd = pgd_base + pgd_ofs; + + for ( ; (pgd_ofs < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_ofs++) { pmd = pmd_offset(pgd, vaddr); for (; (pmd_ofs < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_ofs++) { if (pmd_none(*pmd)) @@ -178,8 +186,8 @@ static inline int page_is_ram(unsigned l * are not. Notably the 640->1Mb area. We need a sanity * check here. */ - addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; - end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; + addr = (e820.map[i].addr+MMUPAGE_SIZE-1) >> MMUPAGE_SHIFT; + end = (e820.map[i].addr+e820.map[i].size) >> MMUPAGE_SHIFT; if ((pagenr >= addr) && (pagenr < end)) return 1; } @@ -187,37 +195,12 @@ static inline int page_is_ram(unsigned l } #if CONFIG_HIGHMEM -pte_t *kmap_pte; pgprot_t kmap_prot; - -#define kmap_get_fixmap_pte(vaddr) \ - pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) - -void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; -} +#define kmap_init() do { kmap_prot = PAGE_KERNEL; } while (0) void __init permanent_kmaps_init(pgd_t *pgd_base) { - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - unsigned long vaddr; - - vaddr = PKMAP_BASE; - page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); - - pgd = swapper_pg_dir + __pgd_offset(vaddr); - pmd = pmd_offset(pgd, vaddr); - pte = pte_offset_kernel(pmd, vaddr); - pkmap_page_table = pte; + page_table_range_init(PKMAP_BASE, PKMAP_BASE + PAGE_SIZE*LAST_PKMAP, pgd_base); } void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) @@ -236,7 +219,7 @@ void __init one_highpage_init(struct pag void __init set_highmem_pages_init(int bad_ppro) { int pfn; - for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) + for (pfn = highstart_pfn; pfn < highend_pfn; pfn += PAGE_MMUCOUNT) one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); totalram_pages += totalhigh_pages; } @@ -245,9 +228,9 @@ extern void set_highmem_pages_init(int); #endif /* !CONFIG_DISCONTIGMEM */ #else -#define kmap_init() do { } while (0) -#define permanent_kmaps_init(pgd_base) do { } while (0) -#define set_highmem_pages_init(bad_ppro) do { } while (0) +#define kmap_init() do { } while (0) +#define permanent_kmaps_init(pgd_base) do { } while (0) +#define set_highmem_pages_init(bad_ppro) do { } while (0) #endif /* CONFIG_HIGHMEM */ unsigned long __PAGE_KERNEL = _PAGE_KERNEL; @@ -303,6 +286,34 @@ static void __init pagetable_init (void) */ pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; #endif + { + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + unsigned long addr = VMALLOC_START; + + do { + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd) || pgd_bad(*pgd)) { + addr += MMUPAGE_SIZE; + continue; + } + do { + pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd) || pmd_bad(*pmd)) { + addr += MMUPAGE_SIZE; + continue; + } + do { + pte = pte_offset_kernel(pmd, addr); + if (!pte_none(*pte) || pte_present(*pte)) { + printk("bad vmallocspace PTE at vaddr 0x%lx\n", addr); + } + addr += MMUPAGE_SIZE; + } while (addr < VMALLOC_END); + } while (addr < VMALLOC_END); + } while (addr < VMALLOC_END); + } } void zap_low_mappings (void) @@ -329,17 +340,17 @@ void __init zone_sizes_init(void) unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; unsigned int max_dma, high, low; - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> MMUPAGE_SHIFT; low = max_low_pfn; high = highend_pfn; if (low < max_dma) - zones_size[ZONE_DMA] = low; + zones_size[ZONE_DMA] = low >> PAGE_MMUSHIFT; else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; + zones_size[ZONE_DMA] = max_dma >> PAGE_MMUSHIFT; + zones_size[ZONE_NORMAL] = (low - max_dma) >> PAGE_MMUSHIFT; #ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - low; + zones_size[ZONE_HIGHMEM] = (high - low) >> PAGE_MMUSHIFT; #endif } free_area_init(zones_size); @@ -370,7 +381,6 @@ void __init paging_init(void) set_in_cr4(X86_CR4_PAE); #endif __flush_tlb_all(); - kmap_init(); zone_sizes_init(); } @@ -421,6 +431,7 @@ static void __init set_max_mapnr_init(vo #else max_mapnr = num_physpages = max_low_pfn; #endif + max_mapnr /= PAGE_MMUCOUNT; } #define __free_all_bootmem() free_all_bootmem() #else @@ -428,11 +439,14 @@ static void __init set_max_mapnr_init(vo extern void set_max_mapnr_init(void); #endif /* !CONFIG_DISCONTIGMEM */ +/* + * Most of the reporting here needs doublechecking. + */ void __init mem_init(void) { extern int ppro_with_ram_bug(void); int codesize, reservedpages, datasize, initsize; - int tmp; + int pfn; int bad_ppro; #ifndef CONFIG_DISCONTIGMEM @@ -442,36 +456,31 @@ void __init mem_init(void) bad_ppro = ppro_with_ram_bug(); -#ifdef CONFIG_HIGHMEM - /* check that fixmap and pkmap do not overlap */ - if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { - printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n"); - printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START); - BUG(); - } -#endif - set_max_mapnr_init(); #ifdef CONFIG_HIGHMEM - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE); + high_memory = (void *) __va(highstart_pfn * MMUPAGE_SIZE); #else - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + high_memory = (void *) __va(max_low_pfn * MMUPAGE_SIZE); #endif /* clear the zero-page */ - memset(empty_zero_page, 0, PAGE_SIZE); + memset(empty_zero_page, 0, MMUPAGE_SIZE); /* this will put all low memory onto the freelists */ totalram_pages += __free_all_bootmem(); + zero_page = alloc_page(GFP_ATOMIC|GFP_DMA); + clear_page(page_address(zero_page)); + SetPageReserved(zero_page); + totalram_pages--; + reservedpages = 0; - for (tmp = 0; tmp < max_low_pfn; tmp++) + for (pfn = 0; pfn < max_low_pfn; pfn += PAGE_MMUCOUNT) /* * Only count reserved RAM pages */ - if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp))) + if (page_is_ram(pfn) && PageReserved(pfn_to_page(pfn))) reservedpages++; set_highmem_pages_init(bad_ppro); @@ -482,13 +491,18 @@ void __init mem_init(void) printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), + num_physpages << (MMUPAGE_SHIFT-10), codesize >> 10, reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10, (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); + printk("MAXMEM=0x%lx\n", MAXMEM); + printk("vmalloc: start = 0x%lx, end = 0x%lx\n", + VMALLOC_START, VMALLOC_END); + printk("fixaddr: start = 0x%lx, end = 0x%lx\n", + FIXADDR_START, FIXADDR_TOP); #if CONFIG_X86_PAE if (!cpu_has_pae) @@ -565,28 +579,43 @@ static int do_test_wp_bit(void) void free_initmem(void) { - unsigned long addr; + unsigned long addr, freed = 0;; addr = (unsigned long)(&__init_begin); - for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { + addr = (addr + PAGE_SIZE - 1) & PAGE_MASK; + while(addr < (((unsigned long)(&__init_end)) & PAGE_MASK)) { ClearPageReserved(virt_to_page(addr)); set_page_count(virt_to_page(addr), 1); free_page(addr); totalram_pages++; + freed++; + addr += PAGE_SIZE; } - printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); + printk(KERN_INFO "Freeing unused kernel memory: %ldk freed\n", + freed*(PAGE_SIZE/1024)); } #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start < end) - printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10); - for (; start < end; start += PAGE_SIZE) { + unsigned long freed = 0; + + start = (start + PAGE_SIZE - 1) & PAGE_MASK; + end &= PAGE_MASK; + + if (start >= end) + return; + + while (start < end) { ClearPageReserved(virt_to_page(start)); set_page_count(virt_to_page(start), 1); free_page(start); totalram_pages++; + freed++; + start += PAGE_SIZE; } + + printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", + freed*(PAGE_SIZE/1024)); } #endif diff -urpN linux-2.5.61/arch/i386/mm/ioremap.c pgcl-2.5.61-1/arch/i386/mm/ioremap.c --- linux-2.5.61/arch/i386/mm/ioremap.c 2003-02-14 15:51:23.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/mm/ioremap.c 2003-02-14 20:44:43.000000000 -0800 @@ -30,7 +30,7 @@ static inline void remap_area_pte(pte_t end = PMD_SIZE; if (address >= end) BUG(); - pfn = phys_addr >> PAGE_SHIFT; + pfn = phys_addr >> MMUPAGE_SHIFT; do { if (!pte_none(*pte)) { printk("remap_area_pte: page already exists\n"); @@ -38,7 +38,7 @@ static inline void remap_area_pte(pte_t } set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | flags))); - address += PAGE_SIZE; + address += MMUPAGE_SIZE; pfn++; pte++; } while (address && (address < end)); @@ -146,9 +146,9 @@ void * __ioremap(unsigned long phys_addr /* * Mappings have to be page-aligned */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - phys_addr; + offset = phys_addr & ~MMUPAGE_MASK; + phys_addr &= MMUPAGE_MASK; + size = MMUPAGE_ALIGN(last_addr) - phys_addr; /* * Ok, go for it.. @@ -196,7 +196,7 @@ void *ioremap_nocache (unsigned long phy if (phys_addr + size < virt_to_phys(high_memory)) { struct page *ppage = virt_to_page(__va(phys_addr)); - unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long npages = (size + MMUPAGE_SIZE - 1) >> MMUPAGE_SHIFT; BUG_ON(phys_addr+size > (unsigned long)high_memory); BUG_ON(phys_addr + size < phys_addr); @@ -215,7 +215,7 @@ void iounmap(void *addr) struct vm_struct *p; if (addr <= high_memory) return; - p = remove_vm_area((void *) (PAGE_MASK & (unsigned long) addr)); + p = remove_vm_area((void *) (MMUPAGE_MASK & (unsigned long) addr)); if (!p) { printk("__iounmap: bad address %p\n", addr); return; @@ -224,7 +224,7 @@ void iounmap(void *addr) unmap_vm_area(p); if (p->flags && p->phys_addr < virt_to_phys(high_memory)) { change_page_attr(virt_to_page(__va(p->phys_addr)), - p->size >> PAGE_SHIFT, + p->size >> MMUPAGE_SHIFT, PAGE_KERNEL); } kfree(p); @@ -250,14 +250,14 @@ void __init *bt_ioremap(unsigned long ph /* * Mappings have to be page-aligned */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - phys_addr; + offset = phys_addr & ~MMUPAGE_MASK; + phys_addr &= MMUPAGE_MASK; + size = MMUPAGE_ALIGN(last_addr) - phys_addr; /* * Mappings have to fit in the FIX_BTMAP area. */ - nrpages = size >> PAGE_SHIFT; + nrpages = size >> MMUPAGE_SHIFT; if (nrpages > NR_FIX_BTMAPS) return NULL; @@ -267,7 +267,7 @@ void __init *bt_ioremap(unsigned long ph idx = FIX_BTMAP_BEGIN; while (nrpages > 0) { set_fixmap(idx, phys_addr); - phys_addr += PAGE_SIZE; + phys_addr += MMUPAGE_SIZE; --idx; --nrpages; } @@ -284,8 +284,8 @@ void __init bt_iounmap(void *addr, unsig virt_addr = (unsigned long)addr; if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) return; - offset = virt_addr & ~PAGE_MASK; - nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; + offset = virt_addr & ~MMUPAGE_MASK; + nrpages = MMUPAGE_ALIGN(offset + size - 1) >> MMUPAGE_SHIFT; idx = FIX_BTMAP_BEGIN; while (nrpages > 0) { diff -urpN linux-2.5.61/arch/i386/mm/pageattr.c pgcl-2.5.61-1/arch/i386/mm/pageattr.c --- linux-2.5.61/arch/i386/mm/pageattr.c 2003-02-14 15:51:33.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/mm/pageattr.c 2003-02-14 20:44:43.000000000 -0800 @@ -38,8 +38,8 @@ static struct page *split_large_page(uns address = __pa(address); addr = address & LARGE_PAGE_MASK; pbase = (pte_t *)page_address(base); - for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { - pbase[i] = pfn_pte(addr >> PAGE_SHIFT, + for (i = 0; i < PTRS_PER_PTE; i++, addr += MMUPAGE_SIZE) { + pbase[i] = pfn_pte(addr/MMUPAGE_SIZE, addr == address ? prot : PAGE_KERNEL); } return base; @@ -82,7 +82,7 @@ static inline void revert_page(struct pa pte_t *linear = (pte_t *) pmd_offset(pgd_offset(&init_mm, address), address); set_pmd_pte(linear, address, - pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT, + pfn_pte((__pa(address) & LARGE_PAGE_MASK)/MMUPAGE_SIZE, PAGE_KERNEL_LARGE)); } @@ -94,15 +94,14 @@ __change_page_attr(struct page *page, pg struct page *kpte_page; #ifdef CONFIG_HIGHMEM - if (page >= highmem_start_page) - BUG(); + BUG_ON(page >= highmem_start_page); #endif address = (unsigned long)page_address(page); kpte = lookup_address(address); if (!kpte) return -EINVAL; - kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); + kpte_page = virt_to_page(((unsigned long)kpte) & MMUPAGE_MASK); if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { if ((pte_val(*kpte) & _PAGE_PSE) == 0) { pte_t old = *kpte; @@ -162,6 +161,8 @@ int change_page_attr(struct page *page, struct page *fpage; int i; + numpages = (numpages + PAGE_MMUCOUNT - 1)& ~(PAGE_MMUCOUNT-1); + down_write(&init_mm.mmap_sem); for (i = 0; i < numpages; i++, page++) { fpage = NULL; diff -urpN linux-2.5.61/arch/i386/mm/pgtable.c pgcl-2.5.61-1/arch/i386/mm/pgtable.c --- linux-2.5.61/arch/i386/mm/pgtable.c 2003-02-14 15:53:03.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/mm/pgtable.c 2003-02-14 20:44:43.000000000 -0800 @@ -97,10 +97,12 @@ void set_pmd_pfn(unsigned long vaddr, un if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ printk ("set_pmd_pfn: vaddr misaligned\n"); + printk ("vaddr = %lx, pfn = %lx\n", vaddr, pfn); return; /* BUG(); */ } - if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ + if (pfn & (PMD_SIZE/MMUPAGE_SIZE-1)) { /* pfn is misaligned */ printk ("set_pmd_pfn: pfn misaligned\n"); + printk ("vaddr = %lx, pfn = %lx\n", vaddr, pfn); return; /* BUG(); */ } pgd = swapper_pg_dir + __pgd_offset(vaddr); @@ -121,11 +123,13 @@ void __set_fixmap (enum fixed_addresses { unsigned long address = __fix_to_virt(idx); + printk("__set_fixmap(%d,%lx)\n", idx, phys); + if (idx >= __end_of_fixed_addresses) { BUG(); return; } - set_pte_pfn(address, phys >> PAGE_SHIFT, flags); + set_pte_pfn(address, phys >> MMUPAGE_SHIFT, flags); } pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) diff -urpN linux-2.5.61/arch/i386/pci/i386.c pgcl-2.5.61-1/arch/i386/pci/i386.c --- linux-2.5.61/arch/i386/pci/i386.c 2003-02-14 15:51:21.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/pci/i386.c 2003-02-14 20:44:43.000000000 -0800 @@ -319,7 +319,7 @@ int pci_mmap_page_range(struct pci_dev * /* Write-combine setting is ignored, it is changed via the mtrr * interfaces on this platform. */ - if (remap_page_range(vma, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, + if (remap_page_range(vma, vma->vm_start, vma->vm_pgoff << MMUPAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot)) return -EAGAIN; diff -urpN linux-2.5.61/arch/i386/pci/numa.c pgcl-2.5.61-1/arch/i386/pci/numa.c --- linux-2.5.61/arch/i386/pci/numa.c 2003-02-14 15:51:20.000000000 -0800 +++ pgcl-2.5.61-1/arch/i386/pci/numa.c 2003-02-14 20:44:43.000000000 -0800 @@ -127,7 +127,7 @@ static int __init pci_numa_init(void) return 0; pci_root_bus = pcibios_scan_root(0); - if (numnodes > 1) { + if (0 && numnodes > 1) { for (quad = 1; quad < numnodes; ++quad) { printk("Scanning PCI bus %d for quad %d\n", QUADLOCAL2BUS(quad,0), quad); diff -urpN linux-2.5.61/drivers/block/ll_rw_blk.c pgcl-2.5.61-1/drivers/block/ll_rw_blk.c --- linux-2.5.61/drivers/block/ll_rw_blk.c 2003-02-14 15:51:12.000000000 -0800 +++ pgcl-2.5.61-1/drivers/block/ll_rw_blk.c 2003-02-14 20:44:43.000000000 -0800 @@ -274,7 +274,7 @@ void blk_queue_make_request(request_queu **/ void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) { - unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; + unsigned long bounce_pfn = dma_addr >> MMUPAGE_SHIFT; unsigned long mb = dma_addr >> 20; static request_queue_t *last_q; diff -urpN linux-2.5.61/drivers/char/agp/backend.c pgcl-2.5.61-1/drivers/char/agp/backend.c --- linux-2.5.61/drivers/char/agp/backend.c 2003-02-14 15:52:06.000000000 -0800 +++ pgcl-2.5.61-1/drivers/char/agp/backend.c 2003-02-14 20:44:43.000000000 -0800 @@ -89,7 +89,7 @@ static int agp_find_max (void) { long memory, index, result; - memory = (num_physpages << PAGE_SHIFT) >> 20; + memory = (num_physpages << MMUPAGE_SHIFT) >> 20; index = 1; while ((memory > maxes_table[index].mem) && (index < 8)) @@ -101,7 +101,7 @@ static int agp_find_max (void) (maxes_table[index].mem - maxes_table[index - 1].mem); printk(KERN_INFO PFX "Maximum main memory to use for agp memory: %ldM\n", result); - result = result << (20 - PAGE_SHIFT); + result = result << (20 - MMUPAGE_SHIFT); return result; } @@ -145,7 +145,7 @@ static int agp_backend_initialize(struct } got_gatt = 1; - agp_bridge->key_list = vmalloc(PAGE_SIZE * 4); + agp_bridge->key_list = vmalloc(MMUPAGE_SIZE * 4); if (agp_bridge->key_list == NULL) { printk(KERN_ERR PFX "error allocating memory for key lists.\n"); rc = -ENOMEM; @@ -154,7 +154,7 @@ static int agp_backend_initialize(struct got_keylist = 1; /* FIXME vmalloc'd memory not guaranteed contiguous */ - memset(agp_bridge->key_list, 0, PAGE_SIZE * 4); + memset(agp_bridge->key_list, 0, MMUPAGE_SIZE * 4); if (agp_bridge->configure()) { printk(KERN_ERR PFX "error configuring host chipset.\n"); diff -urpN linux-2.5.61/drivers/char/agp/generic.c pgcl-2.5.61-1/drivers/char/agp/generic.c --- linux-2.5.61/drivers/char/agp/generic.c 2003-02-14 15:51:28.000000000 -0800 +++ pgcl-2.5.61-1/drivers/char/agp/generic.c 2003-02-14 20:44:43.000000000 -0800 @@ -83,7 +83,7 @@ agp_memory *agp_create_memory(int scratc kfree(new); return NULL; } - new->memory = vmalloc(PAGE_SIZE * scratch_pages); + new->memory = vmalloc(MMUPAGE_SIZE * scratch_pages); if (new->memory == NULL) { agp_free_key(new->key); @@ -118,7 +118,7 @@ void agp_free_memory(agp_memory * curr) kfree(curr); } -#define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) +#define ENTRIES_PER_PAGE (MMUPAGE_SIZE / sizeof(unsigned long)) agp_memory *agp_allocate_memory(size_t page_count, u32 type) { @@ -498,7 +498,7 @@ int agp_generic_create_gatt_table(void) if (table == NULL) return -ENOMEM; - table_end = table + ((PAGE_SIZE * (1 << page_order)) - 1); + table_end = table + ((MMUPAGE_SIZE * (1 << page_order)) - 1); for (page = virt_to_page(table); page <= virt_to_page(table_end); page++) SetPageReserved(page); @@ -507,7 +507,7 @@ int agp_generic_create_gatt_table(void) agp_gatt_table = (void *)table; CACHE_FLUSH(); agp_bridge->gatt_table = ioremap_nocache(virt_to_phys(table), - (PAGE_SIZE * (1 << page_order))); + (MMUPAGE_SIZE * (1 << page_order))); CACHE_FLUSH(); if (agp_bridge->gatt_table == NULL) { @@ -575,7 +575,7 @@ int agp_generic_free_gatt_table(void) iounmap(agp_bridge->gatt_table); table = (char *) agp_bridge->gatt_table_real; - table_end = table + ((PAGE_SIZE * (1 << page_order)) - 1); + table_end = table + ((MMUPAGE_SIZE * (1 << page_order)) - 1); for (page = virt_to_page(table); page <= virt_to_page(table_end); page++) ClearPageReserved(page); @@ -615,7 +615,7 @@ int agp_generic_insert_memory(agp_memory break; } - num_entries -= agp_memory_reserved/PAGE_SIZE; + num_entries -= agp_memory_reserved/MMUPAGE_SIZE; if (num_entries < 0) num_entries = 0; if (type != 0 || mem->type != 0) { diff -urpN linux-2.5.61/drivers/char/mem.c pgcl-2.5.61-1/drivers/char/mem.c --- linux-2.5.61/drivers/char/mem.c 2003-02-14 15:51:46.000000000 -0800 +++ pgcl-2.5.61-1/drivers/char/mem.c 2003-02-14 20:44:43.000000000 -0800 @@ -42,8 +42,8 @@ static ssize_t do_write_mem(struct file written = 0; #if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU)) /* we don't have page 0 mapped on sparc and m68k.. */ - if (realp < PAGE_SIZE) { - unsigned long sz = PAGE_SIZE-realp; + if (realp < MMUPAGE_SIZE) { + unsigned long sz = MMUPAGE_SIZE-realp; if (sz > count) sz = count; /* Hmm. Do something? */ buf+=sz; @@ -79,8 +79,8 @@ static ssize_t read_mem(struct file * fi read = 0; #if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU)) /* we don't have page 0 mapped on sparc and m68k.. */ - if (p < PAGE_SIZE) { - unsigned long sz = PAGE_SIZE-p; + if (p < MMUPAGE_SIZE) { + unsigned long sz = MMUPAGE_SIZE-p; if (sz > count) sz = count; if (sz > 0) { @@ -176,7 +176,7 @@ static inline int noncached_address(unsi static int mmap_mem(struct file * file, struct vm_area_struct * vma) { - unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; + unsigned long offset = vma->vm_pgoff << MMUPAGE_SHIFT; /* * Accessing memory above the top the kernel knows about or @@ -222,8 +222,8 @@ static ssize_t read_kmem(struct file *fi #if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU)) /* we don't have page 0 mapped on sparc and m68k.. */ - if (p < PAGE_SIZE && read > 0) { - size_t tmp = PAGE_SIZE - p; + if (p < MMUPAGE_SIZE && read > 0) { + size_t tmp = MMUPAGE_SIZE - p; if (tmp > read) tmp = read; if (clear_user(buf, tmp)) return -EFAULT; @@ -247,8 +247,8 @@ static ssize_t read_kmem(struct file *fi while (count > 0) { int len = count; - if (len > PAGE_SIZE) - len = PAGE_SIZE; + if (len > MMUPAGE_SIZE) + len = MMUPAGE_SIZE; len = vread(kbuf, (char *)p, len); if (!len) break; @@ -297,8 +297,8 @@ static ssize_t write_kmem(struct file * while (count > 0) { int len = count; - if (len > PAGE_SIZE) - len = PAGE_SIZE; + if (len > MMUPAGE_SIZE) + len = MMUPAGE_SIZE; if (len && copy_from_user(kbuf, buf, len)) { free_page((unsigned long)kbuf); return -EFAULT; @@ -408,12 +408,12 @@ static inline size_t read_zero_pagealign /* The shared case is hard. Let's do the conventional zeroing. */ do { - unsigned long unwritten = clear_user(buf, PAGE_SIZE); + unsigned long unwritten = clear_user(buf, MMUPAGE_SIZE); if (unwritten) - return size + unwritten - PAGE_SIZE; + return size + unwritten - MMUPAGE_SIZE; cond_resched(); - buf += PAGE_SIZE; - size -= PAGE_SIZE; + buf += MMUPAGE_SIZE; + size -= MMUPAGE_SIZE; } while (size); return size; @@ -436,23 +436,23 @@ static ssize_t read_zero(struct file * f left = count; /* do we want to be clever? Arbitrary cut-off */ - if (count >= PAGE_SIZE*4) { + if (count >= MMUPAGE_SIZE*4) { unsigned long partial; /* How much left of the page? */ - partial = (PAGE_SIZE-1) & -(unsigned long) buf; + partial = (MMUPAGE_SIZE-1) & -(unsigned long) buf; unwritten = clear_user(buf, partial); written = partial - unwritten; if (unwritten) goto out; left -= partial; buf += partial; - unwritten = read_zero_pagealigned(buf, left & PAGE_MASK); - written += (left & PAGE_MASK) - unwritten; + unwritten = read_zero_pagealigned(buf, left & MMUPAGE_MASK); + written += (left & MMUPAGE_MASK) - unwritten; if (unwritten) goto out; - buf += left & PAGE_MASK; - left &= ~PAGE_MASK; + buf += left & MMUPAGE_MASK; + left &= ~MMUPAGE_MASK; } unwritten = clear_user(buf, left); written += left - unwritten; diff -urpN linux-2.5.61/drivers/scsi/qlogicisp.c pgcl-2.5.61-1/drivers/scsi/qlogicisp.c --- linux-2.5.61/drivers/scsi/qlogicisp.c 2003-02-14 15:52:45.000000000 -0800 +++ pgcl-2.5.61-1/drivers/scsi/qlogicisp.c 2003-02-14 20:44:43.000000000 -0800 @@ -1413,7 +1413,7 @@ static int isp1020_init(struct Scsi_Host if ((command & PCI_COMMAND_MEMORY) && ((mem_flags & 1) == 0)) { - mem_base = (u_long) ioremap(mem_base, PAGE_SIZE); + mem_base = (u_long) ioremap(mem_base, MMUPAGE_SIZE); if (!mem_base) { printk("qlogicisp : i/o remapping failed.\n"); goto out_release; diff -urpN linux-2.5.61/fs/aio.c pgcl-2.5.61-1/fs/aio.c --- linux-2.5.61/fs/aio.c 2003-02-14 15:51:43.000000000 -0800 +++ pgcl-2.5.61-1/fs/aio.c 2003-02-14 20:44:43.000000000 -0800 @@ -86,8 +86,8 @@ static void aio_free_ring(struct kioctx struct aio_ring_info *info = &ctx->ring_info; long i; - for (i=0; inr_pages; i++) - put_page(info->ring_pages[i]); + for (i=0; i < info->nr_pages; i++) + put_page(pfn_to_page(info->ring_pages[i])); if (info->mmap_size) { down_write(&ctx->mm->mmap_sem); @@ -114,25 +114,25 @@ static int aio_setup_ring(struct kioctx size = sizeof(struct aio_ring); size += sizeof(struct io_event) * nr_events; - nr_pages = (size + PAGE_SIZE-1) >> PAGE_SHIFT; + nr_pages = (size + MMUPAGE_SIZE-1) >> MMUPAGE_SHIFT; if (nr_pages < 0) return -EINVAL; info->nr_pages = nr_pages; - nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event); + nr_events = (MMUPAGE_SIZE*nr_pages - sizeof(struct aio_ring))/sizeof(struct io_event); info->nr = 0; info->ring_pages = info->internal_pages; if (nr_pages > AIO_RING_PAGES) { - info->ring_pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL); + info->ring_pages = kmalloc(sizeof(unsigned long)*nr_pages, GFP_KERNEL); if (!info->ring_pages) return -ENOMEM; - memset(info->ring_pages, 0, sizeof(struct page *) * nr_pages); + memset(info->ring_pages, 0, sizeof(unsigned long)*nr_pages); } - info->mmap_size = nr_pages * PAGE_SIZE; + info->mmap_size = nr_pages*MMUPAGE_SIZE; dprintk("attempting mmap of %lu bytes\n", info->mmap_size); down_write(&ctx->mm->mmap_sem); info->mmap_base = do_mmap(NULL, 0, info->mmap_size, @@ -161,7 +161,8 @@ static int aio_setup_ring(struct kioctx info->nr = nr_events; /* trusted copy */ - ring = kmap_atomic(info->ring_pages[0], KM_USER0); + ring = kmap_atomic(pfn_to_page(info->ring_pages[0]), KM_USER0) + + (info->ring_pages[0] % PAGE_MMUCOUNT)*MMUPAGE_SIZE; ring->nr = nr_events; /* user copy */ ring->id = ctx->user_id; ring->head = ring->tail = 0; @@ -178,15 +179,17 @@ static int aio_setup_ring(struct kioctx /* aio_ring_event: returns a pointer to the event at the given index from * kmap_atomic(, km). Release the pointer with put_aio_ring_event(); */ -#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event)) -#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) +#define AIO_EVENTS_PER_PAGE (MMUPAGE_SIZE/sizeof(struct io_event)) +#define AIO_EVENTS_FIRST_PAGE ((MMUPAGE_SIZE-sizeof(struct aio_ring))/sizeof(struct io_event)) #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) #define aio_ring_event(info, nr, km) ({ \ unsigned pos = (nr) + AIO_EVENTS_OFFSET; \ struct io_event *__event; \ - __event = kmap_atomic( \ - (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE], km); \ + unsigned long pfn; \ + pfn = (info)->ring_pages[pos/AIO_EVENTS_PER_PAGE]; \ + __event = kmap_atomic(pfn_to_page(pfn), km); \ + __event += (pfn % PAGE_MMUCOUNT) * MMUPAGE_SIZE; \ __event += pos % AIO_EVENTS_PER_PAGE; \ __event; \ }) @@ -194,7 +197,7 @@ static int aio_setup_ring(struct kioctx #define put_aio_ring_event(event, km) do { \ struct io_event *__event = (event); \ (void)__event; \ - kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ + kunmap_atomic((void *)((unsigned long)__event & MMUPAGE_MASK), km); \ } while(0) /* ioctx_alloc @@ -400,7 +403,8 @@ static struct kiocb *__aio_get_req(struc * accept an event from this io. */ spin_lock_irq(&ctx->ctx_lock); - ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0); + ring = kmap_atomic(pfn_to_page(ctx->ring_info.ring_pages[0]), KM_USER0) + + (ctx->ring_info.ring_pages[0] % PAGE_MMUCOUNT)*MMUPAGE_SIZE; if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) { list_add(&req->ki_list, &ctx->active_reqs); get_ioctx(ctx); @@ -664,8 +668,8 @@ int aio_complete(struct kiocb *iocb, lon */ spin_lock_irqsave(&ctx->ctx_lock, flags); - ring = kmap_atomic(info->ring_pages[0], KM_IRQ1); - + ring = kmap_atomic(pfn_to_page(info->ring_pages[0]), KM_IRQ1) + + (info->ring_pages[0] % PAGE_MMUCOUNT)*MMUPAGE_SIZE; tail = info->tail; event = aio_ring_event(info, tail, KM_IRQ0); tail = (tail + 1) % info->nr; @@ -720,7 +724,8 @@ static int aio_read_evt(struct kioctx *i unsigned long head; int ret = 0; - ring = kmap_atomic(info->ring_pages[0], KM_USER0); + ring = kmap_atomic(pfn_to_page(info->ring_pages[0]), KM_USER0) + + (info->ring_pages[0] % PAGE_MMUCOUNT)*MMUPAGE_SIZE; dprintk("in aio_read_evt h%lu t%lu m%lu\n", (unsigned long)ring->head, (unsigned long)ring->tail, (unsigned long)ring->nr); diff -urpN linux-2.5.61/fs/binfmt_elf.c pgcl-2.5.61-1/fs/binfmt_elf.c --- linux-2.5.61/fs/binfmt_elf.c 2003-02-14 15:51:42.000000000 -0800 +++ pgcl-2.5.61-1/fs/binfmt_elf.c 2003-02-14 20:44:43.000000000 -0800 @@ -61,10 +61,10 @@ static int elf_core_dump(long signr, str #define elf_core_dump NULL #endif -#if ELF_EXEC_PAGESIZE > PAGE_SIZE +#if ELF_EXEC_PAGESIZE > MMUPAGE_SIZE # define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE #else -# define ELF_MIN_ALIGN PAGE_SIZE +# define ELF_MIN_ALIGN MMUPAGE_SIZE #endif #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1)) @@ -780,9 +780,8 @@ static int load_elf_binary(struct linux_ and some applications "depend" upon this behavior. Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ - /* N.B. Shouldn't the size here be PAGE_SIZE?? */ down_write(¤t->mm->mmap_sem); - error = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC, + error = do_mmap(NULL, 0, MMUPAGE_SIZE, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, 0); up_write(¤t->mm->mmap_sem); } @@ -1334,21 +1333,26 @@ static int elf_core_dump(long signr, str for (addr = vma->vm_start; addr < vma->vm_end; - addr += PAGE_SIZE) { + addr += MMUPAGE_SIZE) { struct page* page; + unsigned long pfn = 0; struct vm_area_struct *vma; if (get_user_pages(current, current->mm, addr, 1, 0, 1, - &page, &vma) <= 0) { - DUMP_SEEK (file->f_pos + PAGE_SIZE); + &pfn, &vma) <= 0) { + DUMP_SEEK (file->f_pos + MMUPAGE_SIZE); } else { + page = pfn_to_page(pfn); if (page == ZERO_PAGE(addr)) { - DUMP_SEEK (file->f_pos + PAGE_SIZE); + DUMP_SEEK (file->f_pos + MMUPAGE_SIZE); } else { void *kaddr; + unsigned long subpfn; + subpfn = pfn % PAGE_MMUCOUNT; flush_cache_page(vma, addr); kaddr = kmap(page); - DUMP_WRITE(kaddr, PAGE_SIZE); + kaddr += subpfn * MMUPAGE_SIZE; + DUMP_WRITE(kaddr, MMUPAGE_SIZE); flush_page_to_ram(page); kunmap(page); } diff -urpN linux-2.5.61/fs/bio.c pgcl-2.5.61-1/fs/bio.c --- linux-2.5.61/fs/bio.c 2003-02-14 15:51:53.000000000 -0800 +++ pgcl-2.5.61-1/fs/bio.c 2003-02-14 20:44:43.000000000 -0800 @@ -454,16 +454,26 @@ retry_segments: * Map the user space address into a bio suitable for io to a block * device. Caller should check the size of the returned bio, we might * not have mapped the entire range specified. + * + * XXX: + * I broke this. I need a wee bit of handholding to figure out how + * to assemble a bio. There are some obvious bits missing as the + * alignments etc. aren't compatible with MMUPAGE_SIZE == PAGE_SIZE. + * It probably needs a similar fix to direct io, wrt. merging etc. etc. + * I've done enough to make the get_user_pages() stuff _look_ okay, + * but this is obviously total shite wrt. performance. Which may be + * okay, as the primary users (AFAICT) are ioctl's. + * -- wli */ struct bio *bio_map_user(struct block_device *bdev, unsigned long uaddr, unsigned int len, int write_to_vm) { - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; + unsigned long end = (uaddr + len + MMUPAGE_SIZE - 1) >> MMUPAGE_SHIFT; + unsigned long start = uaddr >> MMUPAGE_SHIFT; const int nr_pages = end - start; request_queue_t *q = bdev_get_queue(bdev); int ret, offset, i; - struct page **pages; + unsigned long *pages; struct bio *bio; /* @@ -477,7 +487,7 @@ struct bio *bio_map_user(struct block_de if (!bio) return NULL; - pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); + pages = kmalloc(nr_pages * sizeof(unsigned long), GFP_KERNEL); if (!pages) goto out; @@ -491,9 +501,11 @@ struct bio *bio_map_user(struct block_de bio->bi_bdev = bdev; - offset = uaddr & ~PAGE_MASK; + offset = uaddr & ~MMUPAGE_MASK; for (i = 0; i < nr_pages; i++) { - unsigned int bytes = PAGE_SIZE - offset; + unsigned int bytes = MMUPAGE_SIZE - offset; + int suboff = (pages[i] % PAGE_MMUCOUNT)*MMUPAGE_SIZE; + struct page *pg = pfn_to_page(pages[i]); if (len <= 0) break; @@ -504,7 +516,7 @@ struct bio *bio_map_user(struct block_de /* * sorry... */ - if (bio_add_page(bio, pages[i], bytes, offset) < bytes) + if (bio_add_page(bio, pg, bytes, offset + suboff) < bytes) break; len -= bytes; @@ -515,7 +527,7 @@ struct bio *bio_map_user(struct block_de * release the pages we didn't map into the bio, if any */ while (i < nr_pages) - page_cache_release(pages[i++]); + page_cache_release(pfn_to_page(pages[i++])); kfree(pages); diff -urpN linux-2.5.61/fs/direct-io.c pgcl-2.5.61-1/fs/direct-io.c --- linux-2.5.61/fs/direct-io.c 2003-02-14 15:51:49.000000000 -0800 +++ pgcl-2.5.61-1/fs/direct-io.c 2003-02-14 20:44:43.000000000 -0800 @@ -35,7 +35,9 @@ /* * How many user pages to map in one call to get_user_pages(). This determines - * the size of a structure on the stack. + * the size of a structure on the stack. But these are mmupages; this + * will _not_ even be able to see a whole PAGE_SIZE area if you make + * PAGE_MMUCOUNT > DIO_PAGES. */ #define DIO_PAGES 64 @@ -49,6 +51,20 @@ * * If blkfactor is zero then the user's request was aligned to the filesystem's * blocksize. + * + * XXX: + * Okay, I just broke this and I'm not sure how to put it back together. + * Basically the issue is that we're pointed at _pfn's_ only by + * get_user_pages() so the assumption of virtual contiguity doesn't even + * guarantee PAGE_SIZE -aligned physical contiguity. + * + * AFAICT the fixup is to "opportunistically" merge all this stuff together + * into PAGE_SIZE-aligned contiguous bits and either special-case or be + * able to handle the rest as they come. I've left this broken for now. + * I'm relatively fearful of eating stackspace to keep count of the number + * mmupages starting at a given pfn there are while merging. + * + * -- wli */ struct dio { @@ -100,7 +116,7 @@ struct dio { * Page queue. These variables belong to dio_refill_pages() and * dio_get_page(). */ - struct page *pages[DIO_PAGES]; /* page buffer */ + unsigned long pages[DIO_PAGES]; /* page buffer */ unsigned head; /* next page to process */ unsigned tail; /* last valid page + 1 */ int page_errors; /* errno from get_user_pages() */ @@ -155,7 +171,7 @@ static int dio_refill_pages(struct dio * */ if (dio->page_errors == 0) dio->page_errors = ret; - dio->pages[0] = ZERO_PAGE(dio->curr_user_address); + dio->pages[0] = page_to_pfn(ZERO_PAGE(dio->curr_user_address)); dio->head = 0; dio->tail = 1; ret = 0; @@ -189,7 +205,7 @@ static struct page *dio_get_page(struct return ERR_PTR(ret); BUG_ON(dio_pages_present(dio) == 0); } - return dio->pages[dio->head++]; + return pfn_to_page(dio->pages[dio->head++]); } /* diff -urpN linux-2.5.61/fs/exec.c pgcl-2.5.61-1/fs/exec.c --- linux-2.5.61/fs/exec.c 2003-02-14 15:51:30.000000000 -0800 +++ pgcl-2.5.61-1/fs/exec.c 2003-02-14 20:44:43.000000000 -0800 @@ -288,46 +288,50 @@ int copy_strings_kernel(int argc,char ** * * tsk->mmap_sem is held for writing. */ -void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address) +static void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address) { - pgd_t * pgd; - pmd_t * pmd; - pte_t * pte; + unsigned long page_pfn, subpfn; struct pte_chain *pte_chain; - if (page_count(page) != 1) - printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address); + page_pfn = page_to_pfn(page); - pgd = pgd_offset(tsk->mm, address); - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - goto out_sig; - spin_lock(&tsk->mm->page_table_lock); - pmd = pmd_alloc(tsk->mm, pgd, address); - if (!pmd) - goto out; - pte = pte_alloc_map(tsk->mm, pmd, address); - if (!pte) - goto out; - if (!pte_none(*pte)) { + for (subpfn = 0; subpfn < PAGE_MMUCOUNT; ++subpfn) { + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + unsigned long pfn, vaddr = address + subpfn*MMUPAGE_SIZE; + + pgd = pgd_offset(tsk->mm, vaddr); + pte_chain = pte_chain_alloc(GFP_KERNEL); + if (!pte_chain) + goto out_nolock; + spin_lock(&tsk->mm->page_table_lock); + pmd = pmd_alloc(tsk->mm, pgd, vaddr); + if (!pmd) + goto out; + pte = pte_alloc_map(tsk->mm, pmd, vaddr); + if (!pte) + goto out; + pfn = page_pfn + subpfn; + set_pte(pte, pte_mkdirty(pte_mkwrite(pfn_pte(pfn, PAGE_COPY)))); + page_cache_get(page); + pte_chain = page_add_rmap(page, pte, pte_chain); pte_unmap(pte); - goto out; + tsk->mm->rss++; + spin_unlock(&tsk->mm->page_table_lock); + + /* no need for flush_tlb */ + pte_chain_free(pte_chain); } lru_cache_add_active(page); flush_dcache_page(page); flush_page_to_ram(page); - set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY)))); - pte_chain = page_add_rmap(page, pte, pte_chain); - pte_unmap(pte); - tsk->mm->rss++; - spin_unlock(&tsk->mm->page_table_lock); - /* no need for flush_tlb */ - pte_chain_free(pte_chain); + page_cache_release(page); /* want to add PAGE_MMUCOUNT-1 */ return; out: spin_unlock(&tsk->mm->page_table_lock); -out_sig: +out_nolock: __free_page(page); force_sig(SIGKILL, tsk); pte_chain_free(pte_chain); @@ -392,7 +396,7 @@ int setup_arg_pages(struct linux_binprm if (!mpnt) return -ENOMEM; - if (!vm_enough_memory((STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) { + if (!vm_enough_memory((STACK_TOP - (MMUPAGE_MASK & (unsigned long) bprm->p))>>MMUPAGE_SHIFT)) { kmem_cache_free(vm_area_cachep, mpnt); return -ENOMEM; } @@ -402,9 +406,12 @@ int setup_arg_pages(struct linux_binprm mpnt->vm_mm = mm; #ifdef CONFIG_STACK_GROWSUP mpnt->vm_start = stack_base; - mpnt->vm_end = PAGE_MASK & - (PAGE_SIZE - 1 + (unsigned long) bprm->p); + mpnt->vm_end = MMUPAGE_MASK & + (MMUPAGE_SIZE - 1 + (unsigned long) bprm->p); #else + /* + * wild guess. NFI if this is remotely sound. + */ mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; mpnt->vm_end = STACK_TOP; #endif @@ -414,16 +421,16 @@ int setup_arg_pages(struct linux_binprm mpnt->vm_pgoff = 0; mpnt->vm_file = NULL; INIT_LIST_HEAD(&mpnt->shared); - mpnt->vm_private_data = (void *) 0; + mpnt->vm_private_data = NULL; insert_vm_struct(mm, mpnt); - mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> MMUPAGE_SHIFT; } for (i = 0 ; i < MAX_ARG_PAGES ; i++) { struct page *page = bprm->page[i]; if (page) { bprm->page[i] = NULL; - put_dirty_page(current,page,stack_base); + put_dirty_page(current, page, stack_base); } stack_base += PAGE_SIZE; } @@ -1140,6 +1147,7 @@ out_file: allow_write_access(bprm.file); fput(bprm.file); } + return retval; } diff -urpN linux-2.5.61/fs/file_table.c pgcl-2.5.61-1/fs/file_table.c --- linux-2.5.61/fs/file_table.c 2003-02-14 15:51:09.000000000 -0800 +++ pgcl-2.5.61-1/fs/file_table.c 2003-02-14 20:44:43.000000000 -0800 @@ -220,7 +220,7 @@ void __init files_init(unsigned long mem * Per default don't use more than 10% of our memory for files. */ - n = (mempages * (PAGE_SIZE / 1024)) / 10; + n = (mempages * (MMUPAGE_SIZE / 1024)) / 10; files_stat.max_files = n; if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; diff -urpN linux-2.5.61/fs/inode.c pgcl-2.5.61-1/fs/inode.c --- linux-2.5.61/fs/inode.c 2003-02-14 15:52:59.000000000 -0800 +++ pgcl-2.5.61-1/fs/inode.c 2003-02-14 20:44:43.000000000 -0800 @@ -1228,16 +1228,19 @@ void __init inode_init(unsigned long mem for (i = 0; i < ARRAY_SIZE(i_wait_queue_heads); i++) init_waitqueue_head(&i_wait_queue_heads[i].wqh); - mempages >>= (14 - PAGE_SHIFT); +#if PAGE_SHIFT <= 14 + mempages >>= 14 - PAGE_SHIFT; +#else + mempages <<= PAGE_SHIFT - 14; +#endif mempages *= sizeof(struct list_head); - for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++) + for (order = 0; (PAGE_SIZE << order) < mempages; order++) ; do { unsigned long tmp; - nr_hash = (1UL << order) * PAGE_SIZE / - sizeof(struct list_head); + nr_hash = (PAGE_SIZE << order)/sizeof(struct list_head); i_hash_mask = (nr_hash - 1); tmp = nr_hash; diff -urpN linux-2.5.61/fs/proc/base.c pgcl-2.5.61-1/fs/proc/base.c --- linux-2.5.61/fs/proc/base.c 2003-02-14 15:51:59.000000000 -0800 +++ pgcl-2.5.61-1/fs/proc/base.c 2003-02-14 20:44:43.000000000 -0800 @@ -31,6 +31,7 @@ #include #include #include +#include /* * For hysterical raisins we keep the same inumbers as in the old procfs. @@ -432,29 +433,37 @@ static ssize_t mem_read(struct file * fi size_t count, loff_t *ppos) { struct task_struct *task = proc_task(file->f_dentry->d_inode); - char *page; + char *kbuf; + struct page *page; unsigned long src = *ppos; int ret = -ESRCH; struct mm_struct *mm; - if (!MAY_PTRACE(task)) + if (0 && !MAY_PTRACE(task)) goto out; ret = -ENOMEM; - page = (char *)__get_free_page(GFP_USER); - if (!page) + page = alloc_page(GFP_HIGHUSER); + if (!page) { + printk("alloc_page() failed in mem_read()\n"); goto out; + } + kbuf = kmap(page); ret = 0; mm = get_task_mm(task); - if (!mm) + if (!mm) { + printk("get_task_mm() failed in mem_read()\n"); goto out_free; + } +#if 0 ret = -EIO; if (file->private_data != (void*)((long)current->self_exec_id)) goto out_put; +#endif ret = 0; @@ -462,14 +471,16 @@ static ssize_t mem_read(struct file * fi int this_len, retval; this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - retval = access_process_vm(task, src, page, this_len, 0); + retval = access_process_vm(task, src, kbuf, this_len, 0); if (!retval) { + printk("access_process_vm() failed in mem_read()\n"); if (!ret) ret = -EIO; break; } - if (copy_to_user(buf, page, retval)) { + if (copy_to_user(buf, kbuf, retval)) { + printk("copy_to_user() failed in mem_read()\n"); ret = -EFAULT; break; } @@ -481,15 +492,17 @@ static ssize_t mem_read(struct file * fi } *ppos = src; -out_put: mmput(mm); out_free: - free_page((unsigned long) page); + kunmap(page); + __free_page(page); out: return ret; } +#if 0 #define mem_write NULL +#endif #ifndef mem_write /* This is a security hazard */ @@ -497,26 +510,28 @@ static ssize_t mem_write(struct file * f size_t count, loff_t *ppos) { int copied = 0; - char *page; + char *kbuf; + struct page *page; struct task_struct *task = proc_task(file->f_dentry->d_inode); unsigned long dst = *ppos; - if (!MAY_PTRACE(task)) + if (0 && !MAY_PTRACE(task)) return -ESRCH; - page = (char *)__get_free_page(GFP_USER); + page = alloc_page(GFP_HIGHUSER); if (!page) return -ENOMEM; + kbuf = kmap(page); while (count > 0) { int this_len, retval; this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - if (copy_from_user(page, buf, this_len)) { + if (copy_from_user(kbuf, buf, this_len)) { copied = -EFAULT; break; } - retval = access_process_vm(task, dst, page, this_len, 1); + retval = access_process_vm(task, dst, kbuf, this_len, 1); if (!retval) { if (!copied) copied = -EIO; @@ -528,7 +543,8 @@ static ssize_t mem_write(struct file * f count -= retval; } *ppos = dst; - free_page((unsigned long) page); + kunmap(page); + __free_page(page); return copied; } #endif diff -urpN linux-2.5.61/fs/proc/task_mmu.c pgcl-2.5.61-1/fs/proc/task_mmu.c --- linux-2.5.61/fs/proc/task_mmu.c 2003-02-14 15:51:18.000000000 -0800 +++ pgcl-2.5.61-1/fs/proc/task_mmu.c 2003-02-14 20:45:38.000000000 -0800 @@ -62,7 +62,7 @@ int task_statm(struct mm_struct *mm, int *resident = mm->rss; for (vma = mm->mmap; vma; vma = vma->vm_next) { - int pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int pages = (vma->vm_end - vma->vm_start) >> MMUPAGE_SHIFT; size += pages; if (is_vm_hugetlb_page(vma)) { diff -urpN linux-2.5.61/include/asm-generic/rmap.h pgcl-2.5.61-1/include/asm-generic/rmap.h --- linux-2.5.61/include/asm-generic/rmap.h 2003-02-14 15:51:56.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-generic/rmap.h 2003-02-14 20:44:43.000000000 -0800 @@ -15,7 +15,7 @@ * offset of the page table entry within the page table page * * For CONFIG_HIGHPTE, we need to represent the address of a pte in a - * scalar pte_addr_t. The pfn of the pte's page is shifted left by PAGE_SIZE + * scalar pte_addr_t. The pfn of the pte's page is shifted left by MMUPAGE_SIZE * bits and is then ORed with the byte offset of the pte within its page. * * For CONFIG_HIGHMEM4G, the pte_addr_t is 32 bits. 20 for the pfn, 12 for @@ -36,7 +36,7 @@ static inline void pgtable_add_rmap(stru return; #endif page->mapping = (void *)mm; - page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1); + page->index = address & ~((PTRS_PER_PTE * MMUPAGE_SIZE) - 1); inc_page_state(nr_page_table_pages); } @@ -57,16 +57,32 @@ static inline unsigned long ptep_to_addr { struct page * page = kmap_atomic_to_page(ptep); unsigned long low_bits; - low_bits = ((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE; + low_bits = ((unsigned long)ptep & ~MMUPAGE_MASK) * PTRS_PER_PTE; return page->index + low_bits; } #if CONFIG_HIGHPTE +/* + * kmap_atomic() virtual regions are not necessarily PAGE_SIZE-aligned + * even though they're of size PAGE_SIZE. So to find the subpfn the + * difference of vaddrs is divided by MMUPAGE_SIZE to determine its + * offset in mmupages into the whole of the kmap_atomic() region. + * Within the kmap_atomic() region, the offset of a given kmap_atomic() + * slot from FIX_KMAP_END is aligned to a PAGE_MMUCOUNT boundary and + * the subpfn is found with the remainder dividing by PAGE_MMUCOUNT. + */ static inline pte_addr_t ptep_to_paddr(pte_t *ptep) { - pte_addr_t paddr; - paddr = ((pte_addr_t)page_to_pfn(kmap_atomic_to_page(ptep))) << PAGE_SHIFT; - return paddr + (pte_addr_t)((unsigned long)ptep & ~PAGE_MASK); + unsigned long pfn, subpfn, vaddr = (unsigned long)ptep; + WARN_ON(vaddr < fix_to_virt(FIX_KMAP_END)); + WARN_ON(vaddr > fix_to_virt(FIX_KMAP_BEGIN)); + if (vaddr < fix_to_virt(FIX_KMAP_END) + || vaddr > fix_to_virt(FIX_KMAP_BEGIN)) + dump_stack(); + subpfn = (vaddr - fix_to_virt(FIX_KMAP_END))/MMUPAGE_SIZE; /*vpfndiff*/ + subpfn &= PAGE_MMUCOUNT-1; /* vpfndiff in Z/nZ */ + pfn = page_to_pfn(kmap_atomic_to_page(ptep)) + subpfn; + return (pte_addr_t)pfn*MMUPAGE_SIZE + (vaddr & ~MMUPAGE_MASK); } #else static inline pte_addr_t ptep_to_paddr(pte_t *ptep) diff -urpN linux-2.5.61/include/asm-i386/dma-mapping.h pgcl-2.5.61-1/include/asm-i386/dma-mapping.h --- linux-2.5.61/include/asm-i386/dma-mapping.h 2003-02-14 15:53:02.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/dma-mapping.h 2003-02-14 20:44:43.000000000 -0800 @@ -51,7 +51,7 @@ dma_map_page(struct device *dev, struct size_t size, enum dma_data_direction direction) { BUG_ON(direction == DMA_NONE); - return (dma_addr_t)(page_to_pfn(page)) * PAGE_SIZE + offset; + return (dma_addr_t)(page_to_pfn(page)) * MMUPAGE_SIZE + offset; } static inline void diff -urpN linux-2.5.61/include/asm-i386/fixmap.h pgcl-2.5.61-1/include/asm-i386/fixmap.h --- linux-2.5.61/include/asm-i386/fixmap.h 2003-02-14 15:51:10.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/fixmap.h 2003-02-14 20:44:43.000000000 -0800 @@ -41,6 +41,17 @@ * TLB entries of such buffers will not be flushed across * task switches. */ + +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + */ +#define PKMAP_NR(virt) (((virt) - PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) +#define LAST_PKMAP 1024 +#define LAST_PKMAP_MASK (LAST_PKMAP-1) + enum fixed_addresses { FIX_HOLE, FIX_VSYSCALL, @@ -65,7 +76,9 @@ enum fixed_addresses { #endif #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS*PAGE_MMUCOUNT)-1, + FIX_PKMAP_BEGIN, + FIX_PKMAP_END = FIX_PKMAP_BEGIN + LAST_PKMAP*PAGE_MMUCOUNT - 1, #endif #ifdef CONFIG_ACPI_BOOT FIX_ACPI_BEGIN, @@ -101,11 +114,11 @@ extern void __set_fixmap (enum fixed_add * the start of the fixmap. */ #define FIXADDR_TOP (0xfffff000UL) -#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << MMUPAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << MMUPAGE_SHIFT)) +#define __virt_to_fix(x) ((FIXADDR_TOP - ((x) & MMUPAGE_MASK)) >> MMUPAGE_SHIFT) extern void __this_fixmap_does_not_exist(void); @@ -133,8 +146,13 @@ static inline unsigned long fix_to_virt( static inline unsigned long virt_to_fix(const unsigned long vaddr) { - BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); + if (vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START) { + printk("bad vaddr in virt_to_fix 0x%lx\n", vaddr); + BUG(); + } return __virt_to_fix(vaddr); } +#define PKMAP_BASE fix_to_virt(FIX_PKMAP_END) + #endif diff -urpN linux-2.5.61/include/asm-i386/highmem.h pgcl-2.5.61-1/include/asm-i386/highmem.h --- linux-2.5.61/include/asm-i386/highmem.h 2003-02-14 15:51:11.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/highmem.h 2003-02-14 20:44:43.000000000 -0800 @@ -34,23 +34,8 @@ extern pte_t *pkmap_page_table; extern void kmap_init(void); -/* - * Right now we initialize only a single pte table. It can be extended - * easily, subsequent pte tables have to be allocated in one physical - * chunk of RAM. - */ -#define PKMAP_BASE (0xff800000UL) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif -#define LAST_PKMAP_MASK (LAST_PKMAP-1) -#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) -#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) - -extern void * FASTCALL(kmap_high(struct page *page)); -extern void FASTCALL(kunmap_high(struct page *page)); +void *FASTCALL(kmap_high(struct page *page)); +void FASTCALL(kunmap_high(struct page *page)); void *kmap(struct page *page); void kunmap(struct page *page); diff -urpN linux-2.5.61/include/asm-i386/io.h pgcl-2.5.61-1/include/asm-i386/io.h --- linux-2.5.61/include/asm-i386/io.h 2003-02-14 15:53:01.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/io.h 2003-02-14 20:44:43.000000000 -0800 @@ -95,7 +95,7 @@ static inline void * phys_to_virt(unsign /* * Change "struct page" to physical address. */ -#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) +#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << MMUPAGE_SHIFT) extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); diff -urpN linux-2.5.61/include/asm-i386/io_apic.h pgcl-2.5.61-1/include/asm-i386/io_apic.h --- linux-2.5.61/include/asm-i386/io_apic.h 2003-02-14 15:52:03.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/io_apic.h 2003-02-14 20:44:43.000000000 -0800 @@ -17,7 +17,7 @@ #define IO_APIC_BASE(idx) \ ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \ - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK))) + + (mp_ioapics[idx].mpc_apicaddr & ~MMUPAGE_MASK))) /* * The structure of the IO-APIC: diff -urpN linux-2.5.61/include/asm-i386/mmzone.h pgcl-2.5.61-1/include/asm-i386/mmzone.h --- linux-2.5.61/include/asm-i386/mmzone.h 2003-02-14 15:51:45.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/mmzone.h 2003-02-14 20:44:43.000000000 -0800 @@ -28,18 +28,18 @@ extern struct pglist_data *node_data[]; #define alloc_bootmem_low(x) \ __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(NODE_DATA(0), (x), MMUPAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) + __alloc_bootmem_node(NODE_DATA(0), (x), MMUPAGE_SIZE, 0) #define alloc_bootmem_node(ignore, x) \ __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(NODE_DATA(0), (x), MMUPAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) + __alloc_bootmem_node(NODE_DATA(0), (x), MMUPAGE_SIZE, 0) #define node_size(nid) (node_data[nid]->node_size) -#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) +#define node_localnr(pfn, nid) (((pfn) - node_data[nid]->node_start_pfn) / PAGE_MMUCOUNT) /* * Following are macros that each numa implmentation must define. @@ -48,25 +48,41 @@ extern struct pglist_data *node_data[]; /* * Given a kernel address, find the home node of the underlying memory. */ -#define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT) +#define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> MMUPAGE_SHIFT) /* * Return a pointer to the node data for node n. */ #define NODE_DATA(nid) (node_data[nid]) +/* + * These names clash. I blame mbligh. + */ #define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) + +/* + * pgdat->node_size is calculated from zone_sizes[], which is in + * units of PAGE_SIZE. I don't trust this. + */ #define node_end_pfn(nid) \ ({ \ pg_data_t *__pgdat = NODE_DATA(nid); \ - __pgdat->node_start_pfn + __pgdat->node_size; \ + __pgdat->node_start_pfn + __pgdat->node_size*PAGE_MMUCOUNT; \ }) #define local_mapnr(kvaddr) \ ({ \ - unsigned long __pfn = __pa(kvaddr) >> PAGE_SHIFT; \ - (__pfn - node_start_pfn(pfn_to_nid(__pfn))); \ + unsigned long __pfn = __pa(kvaddr) >> MMUPAGE_SHIFT; \ + (__pfn - node_start_pfn(pfn_to_nid(__pfn)))/PAGE_MMUCOUNT; \ +}) + +#define local_pfn(pg) \ +({ \ + struct page *__pg = pg; \ + unsigned long __nr; \ + __nr = (unsigned long)(__pg - page_zone(__pg)->zone_mem_map); \ + __nr*PAGE_MMUCOUNT; \ }) #define kern_addr_valid(kaddr) \ @@ -87,10 +103,9 @@ extern struct pglist_data *node_data[]; ({ \ struct page *__page = pg; \ struct zone *__zone = page_zone(__page); \ - (unsigned long)(__page - __zone->zone_mem_map) \ - + __zone->zone_start_pfn; \ + local_pfn(__page) + __zone->zone_start_pfn; \ }) -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) +#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> MMUPAGE_SHIFT)) /* * pfn_valid should be made as fast as possible, and the current definition * is valid for machines that are NUMA, but still contiguous, which is what diff -urpN linux-2.5.61/include/asm-i386/numaq.h pgcl-2.5.61-1/include/asm-i386/numaq.h --- linux-2.5.61/include/asm-i386/numaq.h 2003-02-14 15:51:47.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/numaq.h 2003-02-14 20:44:43.000000000 -0800 @@ -37,7 +37,7 @@ #define PAGES_PER_ELEMENT (16777216/256) #define pfn_to_pgdat(pfn) NODE_DATA(pfn_to_nid(pfn)) -#define PHYSADDR_TO_NID(pa) pfn_to_nid(pa >> PAGE_SHIFT) +#define PHYSADDR_TO_NID(pa) pfn_to_nid((pa) >> MMUPAGE_SHIFT) #define MAX_NUMNODES 8 extern int pfn_to_nid(unsigned long); extern void get_memcfg_numaq(void); diff -urpN linux-2.5.61/include/asm-i386/page.h pgcl-2.5.61-1/include/asm-i386/page.h --- linux-2.5.61/include/asm-i386/page.h 2003-02-14 15:51:19.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/page.h 2003-02-14 20:44:43.000000000 -0800 @@ -1,13 +1,34 @@ #ifndef _I386_PAGE_H #define _I386_PAGE_H -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 +/* + * One mmupage is represented by one Page Table Entry at the MMU level, + * and corresponds to one page at the user process level: its size is + * the same as param.h EXEC_PAGESIZE (for getpagesize(2) and mmap(2)). + */ +#define MMUPAGE_SHIFT 12 +#define MMUPAGE_SIZE (1 << MMUPAGE_SHIFT) +#define MMUPAGE_MASK (~(MMUPAGE_SIZE-1)) + +/* + * 2**N adjacent mmupages may be clustered to make up one kernel page. + * Reasonable and tested values for PAGE_MMUSHIFT are 0 (4k page), + * 1 (8k page), 2 (16k page), 3 (32k page). Higher values will not + * work without further changes e.g. to unsigned short b_size. + */ +#define PAGE_MMUSHIFT 4 +#define PAGE_MMUCOUNT (1 << PAGE_MMUSHIFT) + +/* + * One kernel page is represented by one struct page (see mm.h), + * and is the kernel's principal unit of memory allocation. + */ +#define PAGE_SHIFT (PAGE_MMUSHIFT + MMUPAGE_SHIFT) #define PAGE_SIZE (1UL << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) -#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) +#define LARGE_PAGE_SIZE (1 << PMD_SHIFT) #ifdef __KERNEL__ #ifndef __ASSEMBLY__ @@ -75,6 +96,7 @@ typedef struct { unsigned long pgprot; } /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) +#define MMUPAGE_ALIGN(addr) (((addr)+MMUPAGE_SIZE-1)&MMUPAGE_MASK) /* * This handles the memory map.. We could make this a config @@ -117,19 +139,28 @@ static __inline__ int get_order(unsigned #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) -#define __MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) -#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)) #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) + +#if 0 +#define __MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) +#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)) +#else +#define __MAXMEM \ + ((VMALLOC_START-2*MMUPAGE_SIZE-__PAGE_OFFSET) & LARGE_PAGE_MASK) +#define MAXMEM \ + __pa((VMALLOC_START-2*MMUPAGE_SIZE) & LARGE_PAGE_MASK) +#endif +#define pfn_to_kaddr(pfn) __va((pfn)*MMUPAGE_SIZE) #ifndef CONFIG_DISCONTIGMEM -#define pfn_to_page(pfn) (mem_map + (pfn)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) -#define pfn_valid(pfn) ((pfn) < max_mapnr) +#define pfn_to_page(pfn) (&mem_map[(pfn)/PAGE_MMUCOUNT]) +#define page_to_mapnr(page) ((unsigned long)((page)-mem_map)) +#define page_to_pfn(page) (page_to_mapnr(page)*PAGE_MMUCOUNT) +#define pfn_valid(pfn) ((pfn) < max_mapnr*PAGE_MMUCOUNT) #endif /* !CONFIG_DISCONTIGMEM */ -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr)/MMUPAGE_SIZE) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr)/MMUPAGE_SIZE) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) diff -urpN linux-2.5.61/include/asm-i386/pgalloc.h pgcl-2.5.61-1/include/asm-i386/pgalloc.h --- linux-2.5.61/include/asm-i386/pgalloc.h 2003-02-14 15:51:10.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/pgalloc.h 2003-02-14 20:44:43.000000000 -0800 @@ -14,7 +14,7 @@ static inline void pmd_populate(struct m { set_pmd(pmd, __pmd(_PAGE_TABLE + ((unsigned long long)page_to_pfn(pte) << - (unsigned long long) PAGE_SHIFT))); + (unsigned long long) MMUPAGE_SHIFT))); } /* * Allocate and free page tables. diff -urpN linux-2.5.61/include/asm-i386/pgtable-2level.h pgcl-2.5.61-1/include/asm-i386/pgtable-2level.h --- linux-2.5.61/include/asm-i386/pgtable-2level.h 2003-02-14 15:51:47.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/pgtable-2level.h 2003-02-14 20:44:43.000000000 -0800 @@ -17,6 +17,7 @@ #define PTRS_PER_PTE 1024 +#ifndef __ASSEMBLY__ #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) #define pmd_ERROR(e) \ @@ -49,7 +50,7 @@ static inline int pgd_present(pgd_t pgd) #define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) #define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +((unsigned long) __va(pgd_val(pgd) & MMUPAGE_MASK)) static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) { @@ -59,8 +60,10 @@ static inline pmd_t * pmd_offset(pgd_t * #define pte_same(a, b) ((a).pte_low == (b).pte_low) #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low) -#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT))) -#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) -#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) +#define pte_pfn(x) ((unsigned long)(((x).pte_low>>MMUPAGE_SHIFT))) +#define pfn_pte(pfn, prot) __pte(((pfn)<> PAGE_SHIFT) | - (pte.pte_high << (32 - PAGE_SHIFT)); + return (pte.pte_low >> MMUPAGE_SHIFT) | + (pte.pte_high << (32 - MMUPAGE_SHIFT)); } static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { pte_t pte; - pte.pte_high = page_nr >> (32 - PAGE_SHIFT); - pte.pte_low = (page_nr << PAGE_SHIFT) | pgprot_val(pgprot); + pte.pte_high = page_nr >> (32 - MMUPAGE_SHIFT); + pte.pte_low = (page_nr << MMUPAGE_SHIFT) | pgprot_val(pgprot); return pte; } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd(((unsigned long long)page_nr << PAGE_SHIFT) | pgprot_val(pgprot)); + return __pmd(((unsigned long long)page_nr << MMUPAGE_SHIFT) | pgprot_val(pgprot)); } +#endif /* !__ASSEMBLY__ */ + #endif /* _I386_PGTABLE_3LEVEL_H */ diff -urpN linux-2.5.61/include/asm-i386/pgtable.h pgcl-2.5.61-1/include/asm-i386/pgtable.h --- linux-2.5.61/include/asm-i386/pgtable.h 2003-02-14 15:52:24.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/pgtable.h 2003-02-14 20:44:43.000000000 -0800 @@ -28,8 +28,9 @@ extern void paging_init(void); * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern unsigned long empty_zero_page[1024]; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +extern unsigned long empty_zero_page[]; +extern struct page *zero_page; +#define ZERO_PAGE(vaddr) (zero_page) #endif /* !__ASSEMBLY__ */ @@ -38,16 +39,15 @@ extern unsigned long empty_zero_page[102 * implements both the traditional 2-level x86 page tables and the * newer 3-level PAE-mode page tables. */ -#ifndef __ASSEMBLY__ #if CONFIG_X86_PAE # include #else # include #endif +#ifndef __ASSEMBLY__ void pgtable_cache_init(void); - -#endif +#endif /* !__ASSEMBLY__ */ #define __beep() asm("movb $0x3,%al; outb %al,$0x61") @@ -75,15 +75,15 @@ void pgtable_cache_init(void); * The vmalloc() routines leaves a hole of 4kB between each vmalloced * area for the same reason. ;) */ -#define VMALLOC_OFFSET (8*1024*1024) -#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \ - ~(VMALLOC_OFFSET-1)) #define VMALLOC_VMADDR(x) ((unsigned long)(x)) -#if CONFIG_HIGHMEM -# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) -#else -# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) -#endif +#define VMALLOC_END (FIXADDR_START-2*MMUPAGE_SIZE) + +#define __VMALLOC_START (VMALLOC_END - VMALLOC_RESERVE - 2*MMUPAGE_SIZE) +#define VMALLOC_START \ + (high_memory \ + ? max(__VMALLOC_START, (unsigned long)high_memory) \ + : __VMALLOC_START \ + ) /* * The 4MB page is guessing.. Detailed in the infamous "Chapter H" @@ -175,7 +175,7 @@ extern unsigned long pg0[1024]; #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define pmd_bad(x) ((pmd_val(x) & (~MMUPAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) @@ -223,10 +223,10 @@ static inline pte_t pte_modify(pte_t pte #define page_pte(page) page_pte_prot(page, __pgprot(0)) #define pmd_page_kernel(pmd) \ -((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) +((unsigned long) __va(pmd_val(pmd) & MMUPAGE_MASK)) #ifndef CONFIG_DISCONTIGMEM -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) +#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> MMUPAGE_SHIFT)) #endif /* !CONFIG_DISCONTIGMEM */ #define pmd_large(pmd) \ @@ -247,7 +247,7 @@ static inline pte_t pte_modify(pte_t pte /* Find an entry in the third-level page table.. */ #define __pte_offset(address) \ - (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + (((address) >> MMUPAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) \ ((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address)) diff -urpN linux-2.5.61/include/asm-i386/rmap.h pgcl-2.5.61-1/include/asm-i386/rmap.h --- linux-2.5.61/include/asm-i386/rmap.h 2003-02-14 15:51:57.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/rmap.h 2003-02-14 20:44:43.000000000 -0800 @@ -5,10 +5,17 @@ #include #ifdef CONFIG_HIGHPTE +/* + * The byte offset needs to be relative to PAGE_SIZE, the pfn will be + * implicitly truncated to a PAGE_SIZE boundary, the mapping will be + * returned rounded downward, and will need compensation by adding in + * the paddr's offset within the PAGE_SIZE-aligned region to the vaddr + * returned from kmap_atomic(). + */ static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr) { - unsigned long pfn = (unsigned long)(pte_paddr >> PAGE_SHIFT); - unsigned long off = ((unsigned long)pte_paddr) & ~PAGE_MASK; + unsigned long pfn = (unsigned long)(pte_paddr/MMUPAGE_SIZE); + unsigned long off = (unsigned long)pte_paddr & ~PAGE_MASK; return (pte_t *)((char *)kmap_atomic(pfn_to_page(pfn), KM_PTE2) + off); } diff -urpN linux-2.5.61/include/asm-i386/setup.h pgcl-2.5.61-1/include/asm-i386/setup.h --- linux-2.5.61/include/asm-i386/setup.h 2003-02-14 15:52:03.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/setup.h 2003-02-14 20:44:43.000000000 -0800 @@ -6,15 +6,15 @@ #ifndef _i386_SETUP_H #define _i386_SETUP_H -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) +#define PFN_UP(x) (((x) + MMUPAGE_SIZE-1) >> MMUPAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> MMUPAGE_SHIFT) +#define PFN_PHYS(x) ((x) << MMUPAGE_SHIFT) /* * Reserved space for vmalloc and iomap - defined in asm/page.h */ #define MAXMEM_PFN PFN_DOWN(MAXMEM) -#define MAX_NONPAE_PFN (1 << 20) +#define MAX_NONPAE_PFN (1 << (32 - MMUPAGE_SHIFT)) /* * This is set up by the setup-routine at boot-time diff -urpN linux-2.5.61/include/asm-i386/shmparam.h pgcl-2.5.61-1/include/asm-i386/shmparam.h --- linux-2.5.61/include/asm-i386/shmparam.h 2003-02-14 15:51:27.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/shmparam.h 2003-02-14 20:44:43.000000000 -0800 @@ -1,6 +1,6 @@ #ifndef _ASMI386_SHMPARAM_H #define _ASMI386_SHMPARAM_H -#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ +#define SHMLBA MMUPAGE_SIZE /* attach addr a multiple of this */ #endif /* _ASMI386_SHMPARAM_H */ diff -urpN linux-2.5.61/include/asm-i386/thread_info.h pgcl-2.5.61-1/include/asm-i386/thread_info.h --- linux-2.5.61/include/asm-i386/thread_info.h 2003-02-14 15:51:17.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/thread_info.h 2003-02-14 20:44:43.000000000 -0800 @@ -51,6 +51,7 @@ struct thread_info { #endif #define PREEMPT_ACTIVE 0x4000000 +#define THREAD_SIZE (2*MMUPAGE_SIZE) /* * macros/functions for gaining access to the thread information structure @@ -79,14 +80,13 @@ struct thread_info { static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; - __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~8191UL)); + __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~(THREAD_SIZE - 1))); return ti; } /* thread information allocation */ -#define THREAD_SIZE (2*PAGE_SIZE) -#define alloc_thread_info() ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) -#define free_thread_info(ti) free_pages((unsigned long) (ti), 1) +#define alloc_thread_info() ((struct thread_info *) kmalloc(THREAD_SIZE, SLAB_KERNEL)) +#define free_thread_info(ti) kfree(ti) #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) @@ -94,7 +94,7 @@ static inline struct thread_info *curren /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movl $-8192, reg; \ + movl $~(THREAD_SIZE-1), reg; \ andl %esp, reg #endif diff -urpN linux-2.5.61/include/asm-i386/tlbflush.h pgcl-2.5.61-1/include/asm-i386/tlbflush.h --- linux-2.5.61/include/asm-i386/tlbflush.h 2003-02-14 15:52:43.000000000 -0800 +++ pgcl-2.5.61-1/include/asm-i386/tlbflush.h 2003-02-14 20:44:43.000000000 -0800 @@ -92,8 +92,10 @@ static inline void flush_tlb_mm(struct m static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { + int k; if (vma->vm_mm == current->active_mm) - __flush_tlb_one(addr); + for (k = 0; k < PAGE_MMUCOUNT; ++k) + __flush_tlb_one(addr + k*MMUPAGE_SIZE); } static inline void flush_tlb_range(struct vm_area_struct *vma, diff -urpN linux-2.5.61/include/linux/aio.h pgcl-2.5.61-1/include/linux/aio.h --- linux-2.5.61/include/linux/aio.h 2003-02-14 15:51:08.000000000 -0800 +++ pgcl-2.5.61-1/include/linux/aio.h 2003-02-14 20:44:43.000000000 -0800 @@ -104,13 +104,13 @@ struct aio_ring_info { unsigned long mmap_base; unsigned long mmap_size; - struct page **ring_pages; + unsigned long *ring_pages; spinlock_t ring_lock; long nr_pages; unsigned nr, tail; - struct page *internal_pages[AIO_RING_PAGES]; + unsigned long internal_pages[AIO_RING_PAGES]; /* pfn's */ }; struct kioctx { diff -urpN linux-2.5.61/include/linux/highmem.h pgcl-2.5.61-1/include/linux/highmem.h --- linux-2.5.61/include/linux/highmem.h 2003-02-14 15:52:25.000000000 -0800 +++ pgcl-2.5.61-1/include/linux/highmem.h 2003-02-14 20:44:43.000000000 -0800 @@ -39,6 +39,11 @@ static inline void check_highmem_ptes(vo static inline void clear_user_highpage(struct page *page, unsigned long vaddr) { void *addr = kmap_atomic(page, KM_USER0); + if ((unsigned long)addr < PAGE_OFFSET) { + void dump_stack(void); + printk("bad kmap_atomic() addr %p!\n", addr); + dump_stack(); + } clear_user_page(addr, vaddr, page); kunmap_atomic(addr, KM_USER0); } @@ -46,6 +51,11 @@ static inline void clear_user_highpage(s static inline void clear_highpage(struct page *page) { void *kaddr = kmap_atomic(page, KM_USER0); + if ((unsigned long)kaddr < PAGE_OFFSET) { + void dump_stack(void); + printk("bad kmap_atomic() addr %p!\n", kaddr); + dump_stack(); + } clear_page(kaddr); kunmap_atomic(kaddr, KM_USER0); } @@ -61,6 +71,11 @@ static inline void memclear_highpage_flu BUG(); kaddr = kmap_atomic(page, KM_USER0); + if ((unsigned long)kaddr < PAGE_OFFSET) { + void dump_stack(void); + printk("bad kmap_atomic() addr %p!\n", kaddr); + dump_stack(); + } memset((char *)kaddr + offset, 0, size); flush_dcache_page(page); flush_page_to_ram(page); @@ -73,17 +88,58 @@ static inline void copy_user_highpage(st vfrom = kmap_atomic(from, KM_USER0); vto = kmap_atomic(to, KM_USER1); + if ((unsigned long)vfrom < PAGE_OFFSET) { + void dump_stack(void); + printk("bad kmap_atomic() addr %p!\n", vfrom); + dump_stack(); + } + if ((unsigned long)vto < PAGE_OFFSET) { + void dump_stack(void); + printk("bad kmap_atomic() addr %p!\n", vto); + dump_stack(); + } copy_user_page(vto, vfrom, vaddr, to); kunmap_atomic(vfrom, KM_USER0); kunmap_atomic(vto, KM_USER1); } +static inline void copy_user_mmupages(struct page *dst, struct page *src, int offset, int size) +{ + char *vfrom, *vto; + + vfrom = kmap_atomic(src, KM_USER0); + vto = kmap_atomic(dst, KM_USER1); + if ((unsigned long)vfrom < PAGE_OFFSET) { + void dump_stack(void); + printk("bad kmap_atomic() addr %p!\n", vfrom); + dump_stack(); + } + if ((unsigned long)vto < PAGE_OFFSET) { + void dump_stack(void); + printk("bad kmap_atomic() addr %p!\n", vto); + dump_stack(); + } + memcpy(&vto[offset], &vfrom[offset], size); + kunmap_atomic(src, KM_USER0); + kunmap_atomic(dst, KM_USER1); +} + static inline void copy_highpage(struct page *to, struct page *from) { char *vfrom, *vto; vfrom = kmap_atomic(from, KM_USER0); vto = kmap_atomic(to, KM_USER1); + if ((unsigned long)vfrom < PAGE_OFFSET) { + void dump_stack(void); + printk("bad kmap_atomic() addr %p!\n", vfrom); + dump_stack(); + } + if ((unsigned long)vto < PAGE_OFFSET) { + void dump_stack(void); + printk("bad kmap_atomic() addr %p!\n", vto); + dump_stack(); + } copy_page(vto, vfrom); kunmap_atomic(vfrom, KM_USER0); kunmap_atomic(vto, KM_USER1); diff -urpN linux-2.5.61/include/linux/ide.h pgcl-2.5.61-1/include/linux/ide.h --- linux-2.5.61/include/linux/ide.h 2003-02-14 15:51:30.000000000 -0800 +++ pgcl-2.5.61-1/include/linux/ide.h 2003-02-14 20:44:43.000000000 -0800 @@ -218,7 +218,7 @@ typedef unsigned char byte; /* used ever * allowing each to have about 256 entries (8 bytes each) from this. */ #define PRD_BYTES 8 -#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) +#define PRD_ENTRIES (MMUPAGE_SIZE / (2 * PRD_BYTES)) /* * Our Physical Region Descriptor (PRD) table should be large enough @@ -237,7 +237,7 @@ typedef unsigned char byte; /* used ever * allowing each to have about 256 entries (8 bytes each) from this. */ #define PRD_BYTES 8 -#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) +#define PRD_ENTRIES (MMUPAGE_SIZE / (2 * PRD_BYTES)) /* * Some more useful definitions diff -urpN linux-2.5.61/include/linux/mm.h pgcl-2.5.61-1/include/linux/mm.h --- linux-2.5.61/include/linux/mm.h 2003-02-14 15:51:09.000000000 -0800 +++ pgcl-2.5.61-1/include/linux/mm.h 2003-02-14 20:44:43.000000000 -0800 @@ -70,7 +70,7 @@ struct vm_area_struct { struct vm_operations_struct * vm_ops; /* Information about our backing store: */ - unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE + unsigned long vm_pgoff; /* Offset (within vm_file) in MMUPAGE_SIZE units, *not* PAGE_CACHE_SIZE */ struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ @@ -164,7 +164,8 @@ struct page { atomic_t count; /* Usage count, see below. */ struct list_head list; /* ->mapping has some page lists. */ struct address_space *mapping; /* The inode (or ...) we belong to. */ - unsigned long index; /* Our offset within mapping. */ + unsigned long index; /* Our offset within mapping. + * in PAGE_CACHE_SIZE units. */ struct list_head lru; /* Pageout list, eg. active_list; protected by zone->lru_lock !! */ union { @@ -327,10 +328,19 @@ static inline void set_page_zone(struct page->flags |= zone_num << ZONE_SHIFT; } -static inline void * lowmem_page_address(struct page *page) -{ - return __va( ( (page - page_zone(page)->zone_mem_map) + page_zone(page)->zone_start_pfn) << PAGE_SHIFT); -} + +#if 0 +#define lowmem_page_address(page) __va(page_to_pfn(page)*MMUPAGE_SIZE) +#else + #define lowmem_page_address(page) \ +({ \ + extern unsigned long max_low_pfn; \ + const unsigned long __lpa_pfn = page_to_pfn(page); \ + BUG_ON(max_low_pfn && __lpa_pfn > max_low_pfn); \ + BUG_ON(__lpa_pfn >= (~PAGE_OFFSET+1)/MMUPAGE_SIZE); \ + __va(__lpa_pfn*MMUPAGE_SIZE); \ +}) +#endif #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) #define HASHED_PAGE_VIRTUAL @@ -415,16 +425,18 @@ extern int vmtruncate(struct inode * ino extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); -extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, unsigned long prot); +extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, unsigned long prot, int subpfn); extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); extern int sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long nonblock); -extern struct page * follow_page(struct mm_struct *mm, unsigned long address, int write); -int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, - int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); +/* follow_page()/get_user_pages() works with pfn's now */ +unsigned long follow_page(struct mm_struct *mm, unsigned long address, int write); +int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int len, int write, int force, + unsigned long *pfns, struct vm_area_struct **vmas); int __set_page_dirty_buffers(struct page *page); int __set_page_dirty_nobuffers(struct page *page); @@ -510,10 +522,10 @@ static inline unsigned long do_mmap(stru unsigned long flag, unsigned long offset) { unsigned long ret = -EINVAL; - if ((offset + PAGE_ALIGN(len)) < offset) + if ((offset + MMUPAGE_ALIGN(len)) < offset) goto out; - if (!(offset & ~PAGE_MASK)) - ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); + if (!(offset & ~MMUPAGE_MASK)) + ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> MMUPAGE_SHIFT); out: return ret; } @@ -590,6 +602,16 @@ static inline struct vm_area_struct * fi } extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr); +/* + * Return byte offset from start of page containing virtual address in + * vma, to start of mmupage containing it: 0 if PAGE_MMUSHIFT 0. + */ +static inline unsigned long vma_suboffset(struct vm_area_struct *vma, unsigned long address) +{ + return (address - vma->vm_start + vma->vm_pgoff * MMUPAGE_SIZE) + & (MMUPAGE_MASK - PAGE_MASK); +} + extern struct page * vmalloc_to_page(void *addr); extern unsigned long get_page_cache_size(void); diff -urpN linux-2.5.61/include/linux/mmzone.h pgcl-2.5.61-1/include/linux/mmzone.h --- linux-2.5.61/include/linux/mmzone.h 2003-02-14 15:51:47.000000000 -0800 +++ pgcl-2.5.61-1/include/linux/mmzone.h 2003-02-14 20:44:43.000000000 -0800 @@ -20,7 +20,7 @@ /* Free memory management - zoned buddy allocator. */ #ifndef CONFIG_FORCE_MAX_ZONEORDER -#define MAX_ORDER 11 +#define MAX_ORDER (11 - PAGE_MMUSHIFT) #else #define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER #endif diff -urpN linux-2.5.61/include/linux/pagemap.h pgcl-2.5.61-1/include/linux/pagemap.h --- linux-2.5.61/include/linux/pagemap.h 2003-02-14 15:51:06.000000000 -0800 +++ pgcl-2.5.61-1/include/linux/pagemap.h 2003-02-14 20:44:43.000000000 -0800 @@ -22,6 +22,9 @@ #define PAGE_CACHE_MASK PAGE_MASK #define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK) +#define PAGE_CACHE_MMUSHIFT (PAGE_CACHE_SHIFT - MMUPAGE_SHIFT) +#define PAGE_CACHE_MMUCOUNT (PAGE_CACHE_SIZE/MMUPAGE_SIZE) + #define page_cache_get(page) get_page(page) #define page_cache_release(page) put_page(page) void release_pages(struct page **pages, int nr, int cold); diff -urpN linux-2.5.61/include/linux/sched.h pgcl-2.5.61-1/include/linux/sched.h --- linux-2.5.61/include/linux/sched.h 2003-02-14 15:51:12.000000000 -0800 +++ pgcl-2.5.61-1/include/linux/sched.h 2003-02-14 20:44:43.000000000 -0800 @@ -195,7 +195,7 @@ struct mm_struct { unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; - unsigned long rss, total_vm, locked_vm; + unsigned long rss, total_vm, locked_vm; /* in MMUPAGE_SIZE units */ unsigned long def_flags; unsigned long cpu_vm_mask; unsigned long swap_address; @@ -590,12 +590,7 @@ extern struct mm_struct * start_lazy_tlb extern void end_lazy_tlb(struct mm_struct *mm); /* mmdrop drops the mm and the page tables */ -extern inline void FASTCALL(__mmdrop(struct mm_struct *)); -static inline void mmdrop(struct mm_struct * mm) -{ - if (atomic_dec_and_test(&mm->mm_count)) - __mmdrop(mm); -} +void mmdrop(struct mm_struct * mm); /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); diff -urpN linux-2.5.61/include/linux/shm.h pgcl-2.5.61-1/include/linux/shm.h --- linux-2.5.61/include/linux/shm.h 2003-02-14 15:52:10.000000000 -0800 +++ pgcl-2.5.61-1/include/linux/shm.h 2003-02-14 20:44:43.000000000 -0800 @@ -12,7 +12,7 @@ #define SHMMAX 0x2000000 /* max shared seg size (bytes) */ #define SHMMIN 1 /* min shared seg size (bytes) */ #define SHMMNI 4096 /* max num of segs system wide */ -#define SHMALL (SHMMAX/PAGE_SIZE*(SHMMNI/16)) /* max shm system wide (pages) */ +#define SHMALL (SHMMAX/MMUPAGE_SIZE*(SHMMNI/16)) /* max shm system wide (mmupages) */ #define SHMSEG SHMMNI /* max shared segs per process */ #include diff -urpN linux-2.5.61/include/linux/swap.h pgcl-2.5.61-1/include/linux/swap.h --- linux-2.5.61/include/linux/swap.h 2003-02-14 15:51:08.000000000 -0800 +++ pgcl-2.5.61-1/include/linux/swap.h 2003-02-14 20:44:43.000000000 -0800 @@ -45,7 +45,7 @@ static inline int current_is_kswapd(void */ union swap_header { struct { - char reserved[PAGE_SIZE - 10]; + char reserved[MMUPAGE_SIZE - 10]; char magic[10]; /* SWAP-SPACE or SWAPSPACE2 */ } magic; struct { @@ -103,8 +103,8 @@ enum { #define SWAP_CLUSTER_MAX 32 -#define SWAP_MAP_MAX 0x7fff -#define SWAP_MAP_BAD 0x8000 +#define SWAP_MAP_MAX 0xfffe +#define SWAP_MAP_BAD 0xffff /* * The in-memory structure used to track swap areas. diff -urpN linux-2.5.61/init/main.c pgcl-2.5.61-1/init/main.c --- linux-2.5.61/init/main.c 2003-02-14 15:51:19.000000000 -0800 +++ pgcl-2.5.61-1/init/main.c 2003-02-14 20:44:43.000000000 -0800 @@ -359,6 +359,7 @@ static void rest_init(void) cpu_idle(); } + /* * Activate the first processor. */ @@ -409,9 +410,9 @@ asmlinkage void __init start_kernel(void calibrate_delay(); #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && - initrd_start < min_low_pfn << PAGE_SHIFT) { + initrd_start < min_low_pfn << MMUPAGE_SHIFT) { printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - " - "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT); + "disabling it.\n",initrd_start,min_low_pfn << MMUPAGE_SHIFT); initrd_start = 0; } #endif @@ -459,6 +460,7 @@ static void __init do_initcalls(void) call = &__initcall_start; do { + printk("about to call initcall 0x%p\n", *call); (*call)(); call++; } while (call < &__initcall_end); diff -urpN linux-2.5.61/ipc/shm.c pgcl-2.5.61-1/ipc/shm.c --- linux-2.5.61/ipc/shm.c 2003-02-14 15:51:45.000000000 -0800 +++ pgcl-2.5.61-1/ipc/shm.c 2003-02-14 20:44:43.000000000 -0800 @@ -110,7 +110,7 @@ static void shm_open (struct vm_area_str */ static void shm_destroy (struct shmid_kernel *shp) { - shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; + shm_tot -= (shp->shm_segsz + MMUPAGE_SIZE - 1) >> MMUPAGE_SHIFT; shm_rmid (shp->id); shm_unlock(shp); if (!is_file_hugepages(shp->shm_file)) @@ -169,7 +169,7 @@ static int newseg (key_t key, int shmflg { int error; struct shmid_kernel *shp; - int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; + int numpages = (size + MMUPAGE_SIZE -1) >> MMUPAGE_SHIFT; struct file * file; char name[13]; int id; @@ -705,7 +705,7 @@ asmlinkage long sys_shmat (int shmid, ch * space left for the stack to grow (at least 4 pages). */ if (addr < current->mm->start_stack && - addr > current->mm->start_stack - size - PAGE_SIZE * 5) + addr > current->mm->start_stack - size - MMUPAGE_SIZE * 5) goto invalid; } @@ -747,7 +747,7 @@ asmlinkage long sys_shmdt (char *shmaddr for (shmd = mm->mmap; shmd; shmd = shmdnext) { shmdnext = shmd->vm_next; if ((shmd->vm_ops == &shm_vm_ops || (shmd->vm_flags & VM_HUGETLB)) - && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr) { + && shmd->vm_start - (shmd->vm_pgoff << MMUPAGE_SHIFT) == (ulong) shmaddr) { do_munmap(mm, shmd->vm_start, shmd->vm_end - shmd->vm_start); retval = 0; } diff -urpN linux-2.5.61/kernel/fork.c pgcl-2.5.61-1/kernel/fork.c --- linux-2.5.61/kernel/fork.c 2003-02-14 15:51:12.000000000 -0800 +++ pgcl-2.5.61-1/kernel/fork.c 2003-02-14 20:44:43.000000000 -0800 @@ -171,16 +171,17 @@ void __init fork_init(unsigned long memp task_struct_cachep = kmem_cache_create("task_struct", sizeof(struct task_struct),0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_MUST_HWCACHE_ALIGN, + NULL, NULL); if (!task_struct_cachep) panic("fork_init(): cannot create task_struct SLAB cache"); /* * The default maximum number of threads is set to a safe * value: the thread structures can take up at most half - * of memory. + * of low memory. */ - max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8; + max_threads = mempages / 8; /* * we need to allow at least 20 threads to boot a system */ @@ -256,7 +257,7 @@ static inline int dup_mmap(struct mm_str if(mpnt->vm_flags & VM_DONTCOPY) continue; if (mpnt->vm_flags & VM_ACCOUNT) { - unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; + unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> MMUPAGE_SHIFT; if (!vm_enough_memory(len)) goto fail_nomem; charge += len; @@ -378,8 +379,11 @@ struct mm_struct * mm_alloc(void) * is dropped: either by a lazy thread or by * mmput. Free the page directory and the mm. */ -inline void __mmdrop(struct mm_struct *mm) +void mmdrop(struct mm_struct *mm) { + if (!atomic_dec_and_test(&mm->mm_count)) + return; + BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); diff -urpN linux-2.5.61/kernel/futex.c pgcl-2.5.61-1/kernel/futex.c --- linux-2.5.61/kernel/futex.c 2003-02-14 15:51:22.000000000 -0800 +++ pgcl-2.5.61-1/kernel/futex.c 2003-02-14 20:44:43.000000000 -0800 @@ -109,14 +109,15 @@ static inline void tell_waiter(struct fu static struct page *__pin_page(unsigned long addr) { struct mm_struct *mm = current->mm; - struct page *page, *tmp; + unsigned long pfn, tmp; int err; /* * Do a quick atomic lookup first - this is the fastpath. */ - page = follow_page(mm, addr, 0); - if (likely(page != NULL)) { + pfn = follow_page(mm, addr, 0); + if (likely(pfn != 0)) { + struct page *page = pfn_to_page(pfn); if (!PageReserved(page)) get_page(page); return page; @@ -130,7 +131,7 @@ repeat_lookup: unlock_futex_mm(); down_read(&mm->mmap_sem); - err = get_user_pages(current, mm, addr, 1, 0, 0, &page, NULL); + err = get_user_pages(current, mm, addr, 1, 0, 0, &pfn, NULL); up_read(&mm->mmap_sem); lock_futex_mm(); @@ -142,12 +143,18 @@ repeat_lookup: * check for races: */ tmp = follow_page(mm, addr, 0); - if (tmp != page) { + + /* + * XXX: this is weird, it can refer to a different pfn in the + * same page. Counts as a race in my book. + */ + if (tmp != pfn) { + struct page *page = pfn_to_page(pfn); put_page(page); goto repeat_lookup; } - return page; + return pfn ? pfn_to_page(pfn) : NULL; } static inline void unpin_page(struct page *page) @@ -173,6 +180,11 @@ static int futex_wake(unsigned long uadd return -EFAULT; } + /* + * XXX: I broke this. + * This needs to include a suboffset w/in the struct page's area. + * -- wli + */ head = hash_futex(page, offset); list_for_each_safe(i, next, head) { @@ -442,7 +454,7 @@ asmlinkage int sys_futex(unsigned long u unsigned long pos_in_page; int ret; - pos_in_page = uaddr % PAGE_SIZE; + pos_in_page = uaddr % MMUPAGE_SIZE; /* Must be "naturally" aligned */ if (pos_in_page % sizeof(int)) diff -urpN linux-2.5.61/kernel/ksyms.c pgcl-2.5.61-1/kernel/ksyms.c --- linux-2.5.61/kernel/ksyms.c 2003-02-14 15:51:06.000000000 -0800 +++ pgcl-2.5.61-1/kernel/ksyms.c 2003-02-14 20:44:43.000000000 -0800 @@ -122,7 +122,6 @@ EXPORT_SYMBOL(kmap_high); EXPORT_SYMBOL(kunmap_high); EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(kmap_prot); -EXPORT_SYMBOL(kmap_pte); #endif #ifdef HASHED_PAGE_VIRTUAL EXPORT_SYMBOL(page_address); diff -urpN linux-2.5.61/kernel/ptrace.c pgcl-2.5.61-1/kernel/ptrace.c --- linux-2.5.61/kernel/ptrace.c 2003-02-14 15:53:02.000000000 -0800 +++ pgcl-2.5.61-1/kernel/ptrace.c 2003-02-14 20:44:43.000000000 -0800 @@ -155,37 +155,44 @@ int access_process_vm(struct task_struct struct mm_struct *mm; struct vm_area_struct *vma; struct page *page; + unsigned long pfn = 0; void *old_buf = buf; mm = get_task_mm(tsk); - if (!mm) + if (!mm) { + printk("get_task_mm() failed in access_process_vm()\n"); return 0; + } down_read(&mm->mmap_sem); /* ignore errors, just check how much was sucessfully transfered */ while (len) { int bytes, ret, offset; + unsigned long dst_off; void *maddr; - ret = get_user_pages(current, mm, addr, 1, - write, 1, &page, &vma); - if (ret <= 0) + ret = get_user_pages(current, mm, addr, 1, write, 1, &pfn, &vma); + if (ret <= 0) { + printk("get_uesr_pages() failed in access_process_vm()\n"); break; + } bytes = len; - offset = addr & (PAGE_SIZE-1); - if (bytes > PAGE_SIZE-offset) - bytes = PAGE_SIZE-offset; + offset = addr & ~MMUPAGE_MASK; + if (bytes > MMUPAGE_SIZE-offset) + bytes = MMUPAGE_SIZE-offset; flush_cache_page(vma, addr); + page = pfn_to_page(pfn); maddr = kmap(page); + dst_off = (pfn % PAGE_MMUCOUNT)*MMUPAGE_SIZE; if (write) { - memcpy(maddr + offset, buf, bytes); + memcpy(maddr + offset + dst_off, buf, bytes); flush_page_to_ram(page); flush_icache_user_range(vma, page, addr, bytes); } else { - memcpy(buf, maddr + offset, bytes); + memcpy(buf, maddr + offset + dst_off, bytes); flush_page_to_ram(page); } kunmap(page); diff -urpN linux-2.5.61/mm/bootmem.c pgcl-2.5.61-1/mm/bootmem.c --- linux-2.5.61/mm/bootmem.c 2003-02-14 15:51:26.000000000 -0800 +++ pgcl-2.5.61-1/mm/bootmem.c 2003-02-14 20:44:43.000000000 -0800 @@ -33,10 +33,7 @@ unsigned long __init bootmem_bootmap_pag unsigned long mapsize; mapsize = (pages+7)/8; - mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK; - mapsize >>= PAGE_SHIFT; - - return mapsize; + return (mapsize + MMUPAGE_SIZE - 1) >> MMUPAGE_SHIFT; } /* @@ -46,14 +43,17 @@ static unsigned long __init init_bootmem unsigned long mapstart, unsigned long start, unsigned long end) { bootmem_data_t *bdata = pgdat->bdata; - unsigned long mapsize = ((end - start)+7)/8; + unsigned long mapsize; pgdat->pgdat_next = pgdat_list; pgdat_list = pgdat; + /* round start down to simplify free_all_bootmem_core() */ + start &= ~(PAGE_MMUCOUNT - 1); + mapsize = ((end - start)+7)/8; mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL); - bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); - bdata->node_boot_start = (start << PAGE_SHIFT); + bdata->node_bootmem_map = phys_to_virt(mapstart << MMUPAGE_SHIFT); + bdata->node_boot_start = (start << MMUPAGE_SHIFT); bdata->node_low_pfn = end; /* @@ -77,10 +77,10 @@ static void __init reserve_bootmem_core( * round up, partially reserved pages are considered * fully reserved. */ - unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE; + unsigned long sidx = (addr - bdata->node_boot_start)/MMUPAGE_SIZE; unsigned long eidx = (addr + size - bdata->node_boot_start + - PAGE_SIZE-1)/PAGE_SIZE; - unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE; + MMUPAGE_SIZE-1)/MMUPAGE_SIZE; + unsigned long end_pfn = (addr + size + MMUPAGE_SIZE-1)/MMUPAGE_SIZE; if (!size) BUG(); @@ -90,13 +90,11 @@ static void __init reserve_bootmem_core( BUG(); if (sidx >= eidx) BUG(); - if ((addr >> PAGE_SHIFT) >= bdata->node_low_pfn) - BUG(); - if (end > bdata->node_low_pfn) + if (end_pfn > bdata->node_low_pfn) BUG(); for (i = sidx; i < eidx; i++) if (test_and_set_bit(i, bdata->node_bootmem_map)) - printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); + printk("hm, page %08lx reserved twice.\n", i*MMUPAGE_SIZE); } static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) @@ -108,18 +106,18 @@ static void __init free_bootmem_core(boo * considered reserved. */ unsigned long sidx; - unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE; - unsigned long end = (addr + size)/PAGE_SIZE; + unsigned long eidx = (addr + size - bdata->node_boot_start)/MMUPAGE_SIZE; + unsigned long end_pfn = (addr + size)/MMUPAGE_SIZE; if (!size) BUG(); - if (end > bdata->node_low_pfn) + if (end_pfn > bdata->node_low_pfn) BUG(); /* * Round up the beginning of the address. */ - start = (addr + PAGE_SIZE-1) / PAGE_SIZE; - sidx = start - (bdata->node_boot_start/PAGE_SIZE); + start = (addr + MMUPAGE_SIZE-1) / MMUPAGE_SIZE; + sidx = start - (bdata->node_boot_start/MMUPAGE_SIZE); for (i = sidx; i < eidx; i++) { if (!test_and_clear_bit(i, bdata->node_bootmem_map)) @@ -148,7 +146,7 @@ static void * __init __alloc_bootmem_cor unsigned long offset, remaining_size; unsigned long areasize, preferred, incr; unsigned long eidx = bdata->node_low_pfn - (bdata->node_boot_start >> - PAGE_SHIFT); + MMUPAGE_SHIFT); if (!size) BUG(); @@ -159,22 +157,22 @@ static void * __init __alloc_bootmem_cor if (align && (bdata->node_boot_start & (align - 1UL)) != 0) offset = (align - (bdata->node_boot_start & (align - 1UL))); - offset >>= PAGE_SHIFT; + offset >>= MMUPAGE_SHIFT; /* * We try to allocate bootmem pages above 'goal' * first, then we try to allocate lower pages. */ if (goal && (goal >= bdata->node_boot_start) && - ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { + ((goal >> MMUPAGE_SHIFT) < bdata->node_low_pfn)) { preferred = goal - bdata->node_boot_start; } else preferred = 0; - preferred = ((preferred + align - 1) & ~(align - 1)) >> PAGE_SHIFT; + preferred = ((preferred + align - 1) & ~(align - 1)) >> MMUPAGE_SHIFT; preferred += offset; - areasize = (size+PAGE_SIZE-1)/PAGE_SIZE; - incr = align >> PAGE_SHIFT ? : 1; + areasize = (size+MMUPAGE_SIZE-1)/MMUPAGE_SIZE; + incr = align >> MMUPAGE_SHIFT ? : 1; restart_scan: for (i = preferred; i < eidx; i += incr) { @@ -205,31 +203,31 @@ found: * of this allocation's buffer? If yes then we can 'merge' * the previous partial page with this allocation. */ - if (align < PAGE_SIZE + if (align < MMUPAGE_SIZE && bdata->last_offset && bdata->last_pos+1 == start) { offset = (bdata->last_offset+align-1) & ~(align-1); - if (offset > PAGE_SIZE) + if (offset > MMUPAGE_SIZE) BUG(); - remaining_size = PAGE_SIZE-offset; + remaining_size = MMUPAGE_SIZE-offset; if (size < remaining_size) { areasize = 0; // last_pos unchanged bdata->last_offset = offset+size; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + + ret = phys_to_virt(bdata->last_pos*MMUPAGE_SIZE + offset + bdata->node_boot_start); } else { remaining_size = size - remaining_size; - areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + + areasize = (remaining_size+MMUPAGE_SIZE-1)/MMUPAGE_SIZE; + ret = phys_to_virt(bdata->last_pos*MMUPAGE_SIZE + offset + bdata->node_boot_start); bdata->last_pos = start+areasize-1; bdata->last_offset = remaining_size; } - bdata->last_offset &= ~PAGE_MASK; + bdata->last_offset &= ~MMUPAGE_MASK; } else { bdata->last_pos = start + areasize - 1; - bdata->last_offset = size & ~PAGE_MASK; - ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); + bdata->last_offset = size & ~MMUPAGE_MASK; + ret = phys_to_virt(start * MMUPAGE_SIZE + bdata->node_boot_start); } /* * Reserve the area now: @@ -245,49 +243,37 @@ static unsigned long __init free_all_boo { struct page *page = pgdat->node_mem_map; bootmem_data_t *bdata = pgdat->bdata; - unsigned long i, count, total = 0; - unsigned long idx; + unsigned long i, total = 0; + unsigned long idx, mapnr, node_low_mapnr; unsigned long *map; - if (!bdata->node_bootmem_map) BUG(); - - count = 0; - idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); + BUG_ON(!bdata->node_bootmem_map); map = bdata->node_bootmem_map; - for (i = 0; i < idx; ) { - unsigned long v = ~map[i / BITS_PER_LONG]; - if (v) { - unsigned long m; - for (m = 1; m && i < idx; m<<=1, page++, i++) { - if (v & m) { - count++; + i = 0; + idx = bdata->node_low_pfn - (bdata->node_boot_start >> MMUPAGE_SHIFT); + + node_low_mapnr = (bdata->node_low_pfn - bdata->node_boot_start/MMUPAGE_SIZE)/PAGE_MMUCOUNT; + for (mapnr = 0; mapnr < node_low_mapnr; ++mapnr) { + int k, should_free = 1; + for (k = 0; k < PAGE_MMUCOUNT; ++k) + if (test_bit(mapnr*PAGE_MMUCOUNT + k, map)) + should_free = 0; + if (should_free) { + page = &pgdat->node_mem_map[mapnr]; ClearPageReserved(page); set_page_count(page, 1); __free_page(page); + ++total; } } - } else { - i+=BITS_PER_LONG; - page+=BITS_PER_LONG; - } - } - total += count; /* - * Now free the allocator bitmap itself, it's not - * needed anymore: - */ - page = virt_to_page(bdata->node_bootmem_map); - count = 0; - for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { - count++; - ClearPageReserved(page); - set_page_count(page, 1); - __free_page(page); } - total += count; - bdata->node_bootmem_map = NULL; + * Leak the allocator bitmap; it's not worth saving. + */ + bdata->node_bootmem_map = NULL; + printk("bootmem: freed %lx pages in node %d\n", total, pgdat->node_id); return total; } diff -urpN linux-2.5.61/mm/filemap.c pgcl-2.5.61-1/mm/filemap.c --- linux-2.5.61/mm/filemap.c 2003-02-14 15:51:27.000000000 -0800 +++ pgcl-2.5.61-1/mm/filemap.c 2003-02-14 20:44:43.000000000 -0800 @@ -700,8 +700,8 @@ static inline int fault_in_pages_writeab * If the page was already mapped, this will get a cache miss * for sure, so try to avoid doing it. */ - if (((unsigned long)uaddr & PAGE_MASK) != - ((unsigned long)end & PAGE_MASK)) + if (((unsigned long)uaddr & MMUPAGE_MASK) != + ((unsigned long)end & MMUPAGE_MASK)) ret = __put_user(0, end); } return ret; @@ -716,8 +716,8 @@ static void fault_in_pages_readable(cons if (ret == 0) { const char *end = uaddr + size - 1; - if (((unsigned long)uaddr & PAGE_MASK) != - ((unsigned long)end & PAGE_MASK)) + if (((unsigned long)uaddr & MMUPAGE_MASK) != + ((unsigned long)end & MMUPAGE_MASK)) __get_user(c, (char *)end); } } @@ -978,7 +978,7 @@ static int page_cache_read(struct file * * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. */ -struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address, int unused) +struct page *filemap_nopage(struct vm_area_struct * area, unsigned long address, int unused) { int error; struct file *file = area->vm_file; @@ -989,8 +989,8 @@ struct page * filemap_nopage(struct vm_a unsigned long size, pgoff, endoff; int did_readahead; - pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; - endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; + pgoff = (address - area->vm_start)/MMUPAGE_SIZE + area->vm_pgoff; + endoff = (area->vm_end - area->vm_start + MMUPAGE_SIZE - 1)/MMUPAGE_SIZE + area->vm_pgoff; retry_all: /* @@ -998,15 +998,15 @@ retry_all: * accessible.. */ size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if ((pgoff >= size) && (area->vm_mm == current->mm)) + if ((pgoff/PAGE_CACHE_MMUCOUNT >= size) && (area->vm_mm == current->mm)) return NULL; /* * The "size" of the file, as far as mmap is concerned, isn't bigger * than the mapping */ - if (size > endoff) - size = endoff; + if (size > endoff/PAGE_CACHE_MMUCOUNT) + size = endoff/PAGE_CACHE_MMUCOUNT; did_readahead = 0; @@ -1016,23 +1016,23 @@ retry_all: */ if (VM_SequentialReadHint(area)) { did_readahead = 1; - page_cache_readahead(mapping, ra, file, pgoff); + page_cache_readahead(mapping, ra, file, pgoff/PAGE_CACHE_MMUCOUNT); } /* * If the offset is outside the mapping size we're off the end * of a privately mapped file, so we need to map a zero page. */ - if ((pgoff < size) && !VM_RandomReadHint(area)) { + if ((pgoff/PAGE_CACHE_MMUCOUNT < size) && !VM_RandomReadHint(area)) { did_readahead = 1; - page_cache_readaround(mapping, ra, file, pgoff); + page_cache_readaround(mapping, ra, file, pgoff/PAGE_CACHE_MMUCOUNT); } /* * Do we have something in the page cache already? */ retry_find: - page = find_get_page(mapping, pgoff); + page = find_get_page(mapping, pgoff/PAGE_CACHE_MMUCOUNT); if (!page) { if (did_readahead) { handle_ra_miss(mapping,ra); @@ -1062,7 +1062,7 @@ no_cached_page: * We're only likely to ever get here if MADV_RANDOM is in * effect. */ - error = page_cache_read(file, pgoff); + error = page_cache_read(file, pgoff/PAGE_CACHE_MMUCOUNT); /* * The page we want has now been added to the page cache. @@ -1150,7 +1150,7 @@ static struct page * filemap_getpage(str * Do we have something in the page cache already? */ retry_find: - page = find_get_page(mapping, pgoff); + page = find_get_page(mapping, pgoff/PAGE_CACHE_MMUCOUNT); if (!page) { if (nonblock) return NULL; @@ -1266,22 +1266,26 @@ static int filemap_populate(struct vm_ar repeat: size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (pgoff + (len >> PAGE_CACHE_SHIFT) > size) + if (pgoff + len/MMUPAGE_SIZE > size) return -EINVAL; page = filemap_getpage(file, pgoff, nonblock); if (!page && !nonblock) return -ENOMEM; if (page) { - err = install_page(mm, vma, addr, page, prot); + /* + * page caches bytes index*PAGE_SIZE to index*(PAGE_SIZE+1)-1 + * pgoff % PAGE_MMUCOUNT is the subpfn w/in the page + */ + err = install_page(mm, vma, addr, page, prot, pgoff % PAGE_MMUCOUNT); if (err) { page_cache_release(page); return err; } } - len -= PAGE_SIZE; - addr += PAGE_SIZE; + len -= MMUPAGE_SIZE; + addr += MMUPAGE_SIZE; pgoff++; if (len) goto repeat; diff -urpN linux-2.5.61/mm/fremap.c pgcl-2.5.61-1/mm/fremap.c --- linux-2.5.61/mm/fremap.c 2003-02-14 15:51:07.000000000 -0800 +++ pgcl-2.5.61-1/mm/fremap.c 2003-02-14 20:44:43.000000000 -0800 @@ -47,13 +47,15 @@ static inline void zap_pte(struct mm_str * previously existing mapping. */ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, struct page *page, unsigned long prot) + unsigned long addr, struct page *page, unsigned long prot, + int subpfn) { int err = -ENOMEM; pte_t *pte, entry; pgd_t *pgd; pmd_t *pmd; struct pte_chain *pte_chain; + unsigned long pfn = page_to_pfn(page) + subpfn; pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) @@ -74,7 +76,7 @@ int install_page(struct mm_struct *mm, s mm->rss++; flush_page_to_ram(page); flush_icache_page(vma, page); - entry = mk_pte(page, protection_map[prot]); + entry = pfn_pte(pfn, protection_map[prot]); if (prot & PROT_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); set_pte(pte, entry); @@ -121,8 +123,8 @@ int sys_remap_file_pages(unsigned long s /* * Sanitize the syscall parameters: */ - start = PAGE_ALIGN(start); - size = PAGE_ALIGN(size); + start = MMUPAGE_ALIGN(start); + size = MMUPAGE_ALIGN(size); prot &= 0xf; down_read(&mm->mmap_sem); diff -urpN linux-2.5.61/mm/highmem.c pgcl-2.5.61-1/mm/highmem.c --- linux-2.5.61/mm/highmem.c 2003-02-14 15:51:59.000000000 -0800 +++ pgcl-2.5.61-1/mm/highmem.c 2003-02-14 20:44:43.000000000 -0800 @@ -53,8 +53,6 @@ static int pkmap_count[LAST_PKMAP]; static unsigned int last_pkmap_nr; static spinlock_t kmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; -pte_t * pkmap_page_table; - static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); static void flush_all_zero_pkmaps(void) @@ -64,6 +62,8 @@ static void flush_all_zero_pkmaps(void) flush_cache_all(); for (i = 0; i < LAST_PKMAP; i++) { + int j; + unsigned long vaddr = PKMAP_ADDR(i); struct page *page; /* @@ -77,8 +77,14 @@ static void flush_all_zero_pkmaps(void) pkmap_count[i] = 0; /* sanity check */ - if (pte_none(pkmap_page_table[i])) - BUG(); + for (j = 0; j < PAGE_MMUCOUNT; ++j) { + unsigned long addr = vaddr + j*MMUPAGE_SIZE; + pgd_t *pgd = pgd_offset_k(addr); + pmd_t *pmd = pmd_offset(pgd, addr); + pte_t *pte = pte_offset_kernel(pmd, addr); + + BUG_ON(pte_none(*pte)); + } /* * Don't need an atomic fetch-and-clear op here; @@ -87,8 +93,20 @@ static void flush_all_zero_pkmaps(void) * getting the kmap_lock (which is held here). * So no dangers, even with speculative execution. */ - page = pte_page(pkmap_page_table[i]); - pte_clear(&pkmap_page_table[i]); + { + pgd_t *pgd = pgd_offset_k(vaddr); + pmd_t *pmd = pmd_offset(pgd, vaddr); + pte_t *pte = pte_offset_kernel(pmd, vaddr); + page = pte_page(*pte); + } + + for (j = 0; j < PAGE_MMUCOUNT; ++j) { + unsigned long addr = vaddr + j*MMUPAGE_SIZE; + pgd_t *pgd = pgd_offset_k(addr); + pmd_t *pmd = pmd_offset(pgd, addr); + pte_t *pte = pte_offset_kernel(pmd, addr); + pte_clear(pte); + } set_page_address(page, NULL); } @@ -98,7 +116,7 @@ static void flush_all_zero_pkmaps(void) static inline unsigned long map_new_virtual(struct page *page) { unsigned long vaddr; - int count; + int k, count; start: count = LAST_PKMAP; @@ -136,7 +154,13 @@ start: } } vaddr = PKMAP_ADDR(last_pkmap_nr); - set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); + for (k = 0; k < PAGE_MMUCOUNT; ++k) { + unsigned long addr = vaddr + k * MMUPAGE_SIZE; + pgd_t *pgd = pgd_offset_k(addr); + pmd_t *pmd = pmd_offset(pgd, addr); + pte_t *pte = pte_offset_kernel(pmd, addr); + set_pte(pte, pfn_pte(page_to_pfn(page) + k, kmap_prot)); + } pkmap_count[last_pkmap_nr] = 1; set_page_address(page, (void *)vaddr); @@ -380,7 +404,7 @@ static void __blk_queue_bounce(request_q /* * is destination page below bounce pfn? */ - if ((page - page_zone(page)->zone_mem_map) + (page_zone(page)->zone_start_pfn) < q->bounce_pfn) + if (page_to_pfn(page) < q->bounce_pfn) continue; /* @@ -478,10 +502,17 @@ void check_highmem_ptes(void) preempt_disable(); for (type = 0; type < KM_TYPE_NR; type++) { + int k; idx = type + KM_TYPE_NR*smp_processor_id(); - if (!pte_none(*(kmap_pte-idx))) { - printk("scheduling with KM_TYPE %d held!\n", type); - BUG(); + for (k = 0; k < PAGE_MMUCOUNT; ++k) { + unsigned long addr = __fix_to_virt(FIX_KMAP_END) + idx*PAGE_SIZE + k*MMUPAGE_SIZE; + pgd_t *pgd = pgd_offset_k(addr); + pmd_t *pmd = pmd_offset(pgd, addr); + pte_t *pte = pte_offset_kernel(pmd, addr); + if (!pte_none(*pte)) { + printk("scheduling with KM_TYPE %d held!\n", type); + BUG(); + } } } preempt_enable(); diff -urpN linux-2.5.61/mm/madvise.c pgcl-2.5.61-1/mm/madvise.c --- linux-2.5.61/mm/madvise.c 2003-02-14 15:51:22.000000000 -0800 +++ pgcl-2.5.61-1/mm/madvise.c 2003-02-14 20:44:43.000000000 -0800 @@ -60,10 +60,10 @@ static long madvise_willneed(struct vm_a if (!file) return -EBADF; - start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + start = ((start - vma->vm_start) >> MMUPAGE_SHIFT) + vma->vm_pgoff; if (end > vma->vm_end) end = vma->vm_end; - end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + end = ((end - vma->vm_start) >> MMUPAGE_SHIFT) + vma->vm_pgoff; do_page_cache_readahead(file->f_dentry->d_inode->i_mapping, file, start, max_sane_readahead(end - start)); @@ -170,9 +170,9 @@ asmlinkage long sys_madvise(unsigned lon down_write(¤t->mm->mmap_sem); - if (start & ~PAGE_MASK) + if (start & ~MMUPAGE_MASK) goto out; - len = (len + ~PAGE_MASK) & PAGE_MASK; + len = (len + ~MMUPAGE_MASK) & MMUPAGE_MASK; end = start + len; if (end < start) goto out; diff -urpN linux-2.5.61/mm/memory.c pgcl-2.5.61-1/mm/memory.c --- linux-2.5.61/mm/memory.c 2003-02-14 15:51:42.000000000 -0800 +++ pgcl-2.5.61-1/mm/memory.c 2003-02-14 20:50:49.000000000 -0800 @@ -340,7 +340,7 @@ cont_copy_pte_range: src_pte = pte_offset_map_nested(src_pmd, address); cont_copy_pte_range_noset: - address += PAGE_SIZE; + address += MMUPAGE_SIZE; if (address >= end) { pte_unmap_nested(src_pte); pte_unmap(dst_pte); @@ -386,8 +386,8 @@ zap_pte_range(struct mmu_gather *tlb, pm offset = address & ~PMD_MASK; if (offset + size > PMD_SIZE) size = PMD_SIZE - offset; - size &= PAGE_MASK; - for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) { + size &= MMUPAGE_MASK; + for (offset=0; offset < size; ptep++, offset += MMUPAGE_SIZE) { pte_t pte = *ptep; if (pte_none(pte)) continue; @@ -466,12 +466,12 @@ void unmap_page_range(struct mmu_gather /* Dispose of an entire struct mmu_gather per rescheduling point */ #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) -#define ZAP_BLOCK_SIZE (FREE_PTE_NR * PAGE_SIZE) +#define ZAP_BLOCK_SIZE (FREE_PTE_NR * MMUPAGE_SIZE) #endif /* For UP, 256 pages at a time gives nice low latency */ #if !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) -#define ZAP_BLOCK_SIZE (256 * PAGE_SIZE) +#define ZAP_BLOCK_SIZE (256 * MMUPAGE_SIZE) #endif /* No preempt: go for the best straight-line efficiency */ @@ -516,10 +516,10 @@ int unmap_vmas(struct mmu_gather **tlbp, if (vma) { /* debug. killme. */ if (end_addr <= vma->vm_start) - printk("%s: end_addr(0x%08lx) <= vm_start(0x%08lx)\n", + pr_debug("%s: end_addr(0x%08lx) <= vm_start(0x%08lx)\n", __FUNCTION__, end_addr, vma->vm_start); if (start_addr >= vma->vm_end) - printk("%s: start_addr(0x%08lx) <= vm_end(0x%08lx)\n", + pr_debug("%s: start_addr(0x%08lx) <= vm_end(0x%08lx)\n", __FUNCTION__, start_addr, vma->vm_end); } @@ -535,7 +535,7 @@ int unmap_vmas(struct mmu_gather **tlbp, continue; if (vma->vm_flags & VM_ACCOUNT) - *nr_accounted += (end - start) >> PAGE_SHIFT; + *nr_accounted += (end - start) >> MMUPAGE_SHIFT; ret++; while (start != end) { @@ -560,7 +560,7 @@ int unmap_vmas(struct mmu_gather **tlbp, zap_bytes = ZAP_BLOCK_SIZE; } if (vma->vm_next && vma->vm_next->vm_start < vma->vm_end) - printk("%s: VMA list is not sorted correctly!\n", + pr_debug("%s: VMA list is not sorted correctly!\n", __FUNCTION__); } return ret; @@ -600,18 +600,19 @@ void zap_page_range(struct vm_area_struc * Do a quick page-table lookup for a single page. * mm->page_table_lock must be held. */ -struct page * -follow_page(struct mm_struct *mm, unsigned long address, int write) +unsigned long follow_page(struct mm_struct *mm, unsigned long address, int write) { pgd_t *pgd; pmd_t *pmd; pte_t *ptep, pte; unsigned long pfn; - struct vm_area_struct *vma; +#if 0 + struct vm_area_struct *vma; vma = hugepage_vma(mm, address); if (vma) return follow_huge_addr(mm, vma, address, write); +#endif pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || pgd_bad(*pgd)) @@ -620,8 +621,15 @@ follow_page(struct mm_struct *mm, unsign pmd = pmd_offset(pgd, address); if (pmd_none(*pmd)) goto out; + + /* + * hugetlb's still broken in pgcl; not difficult to fix, + * but an unnecessary distraction while it's in flux + */ +#if 0 if (pmd_huge(*pmd)) return follow_huge_pmd(mm, address, pmd, write); +#endif if (pmd_bad(*pmd)) goto out; @@ -635,12 +643,12 @@ follow_page(struct mm_struct *mm, unsign if (!write || (pte_write(pte) && pte_dirty(pte))) { pfn = pte_pfn(pte); if (pfn_valid(pfn)) - return pfn_to_page(pfn); + return pfn; /* pfn_to_page(pfn) */ } } out: - return NULL; + return 0; /* NULL */ } /* @@ -652,14 +660,23 @@ out: static inline struct page *get_page_map(struct page *page) { if (!pfn_valid(page_to_pfn(page))) - return 0; + return NULL; return page; } +static inline unsigned long get_pfn_map(unsigned long pfn) +{ + return pfn_valid(pfn) ? pfn : 0; +} + +/* + * This puppy is handing back MMUPAGE_SIZE -sized slots. + * Callers need auditing. + */ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, - struct page **pages, struct vm_area_struct **vmas) + unsigned long *pfns, struct vm_area_struct **vmas) { int i; unsigned int flags; @@ -673,23 +690,46 @@ int get_user_pages(struct task_struct *t i = 0; do { - struct vm_area_struct * vma; + struct vm_area_struct *vma; vma = find_extend_vma(mm, start); - if (!vma || (pages && (vma->vm_flags & VM_IO)) - || !(flags & vma->vm_flags)) - return i ? : -EFAULT; + if (!vma || (pfns && (vma->vm_flags & VM_IO)) + || !(flags & vma->vm_flags)) { + if (!i) { + if (!vma) { + printk("get_user_pages(): no vma" + "for address 0x%lx\n", start); + for (vma=mm->mmap;vma;vma=vma->vm_next) { + printk("[0x%lx,0x%lx)\n", + vma->vm_start, + vma->vm_end); + if (start >= vma->vm_start + && start < vma->vm_end) + printk("bogon!!!\n"); + } + } else if (pfns && (vma->vm_flags & VM_IO)) + printk("get_user_pages(): VM_IO!\n"); + else if (!(flags & vma->vm_flags)) + printk("get_user_pages(): no flags\n"); + else + printk("get_user_pages(): unknown!\n"); + } + return i ? i : -EFAULT; + } +#if 0 + /* I just broke this. Fix it eventually. */ if (is_vm_hugetlb_page(vma)) { i = follow_hugetlb_page(mm, vma, pages, vmas, &start, &len, i); continue; } +#endif spin_lock(&mm->page_table_lock); do { - struct page *map; - while (!(map = follow_page(mm, start, write))) { + unsigned long map_pfn; + while (!(map_pfn = follow_page(mm, start, write))) { spin_unlock(&mm->page_table_lock); switch (handle_mm_fault(mm,vma,start,write)) { case VM_FAULT_MINOR: @@ -699,36 +739,50 @@ int get_user_pages(struct task_struct *t tsk->maj_flt++; break; case VM_FAULT_SIGBUS: + if (!i) + printk("get_user_pages(): VM_FAULT_SIGBUS\n"); return i ? i : -EFAULT; case VM_FAULT_OOM: + if (!i) + printk("get_user_pages(): VM_FAULT_OOM\n"); return i ? i : -ENOMEM; default: BUG(); } spin_lock(&mm->page_table_lock); } - if (pages) { - pages[i] = get_page_map(map); - if (!pages[i]) { + if (pfns) { + pfns[i] = get_pfn_map(map_pfn); + if (!pfns[i]) { spin_unlock(&mm->page_table_lock); - while (i--) - page_cache_release(pages[i]); + while (i--) { + struct page *map; + map = pfn_to_page(pfns[i]); + page_cache_release(map); + } i = -EFAULT; + printk("get_user_pages(): saw a zero pfn\n"); goto out; } - flush_dcache_page(pages[i]); - if (!PageReserved(pages[i])) - page_cache_get(pages[i]); + if (1) { + struct page *map; + map = pfn_to_page(pfns[i]); + flush_dcache_page(map); + if (!PageReserved(map)) + page_cache_get(map); + } } if (vmas) vmas[i] = vma; i++; - start += PAGE_SIZE; + start += MMUPAGE_SIZE; len--; } while(len && start < vma->vm_end); spin_unlock(&mm->page_table_lock); } while(len); out: + if (i < 0) + printk("get_user_pages() returning an error\n"); return i; } @@ -745,7 +799,7 @@ static void zeromap_pte_range(pte_t * pt pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot)); BUG_ON(!pte_none(*pte)); set_pte(pte, zero_pte); - address += PAGE_SIZE; + address += MMUPAGE_SIZE; pte++; } while (address && (address < end)); } @@ -781,8 +835,7 @@ int zeromap_page_range(struct vm_area_st dir = pgd_offset(mm, address); flush_cache_range(vma, beg, end); - if (address >= end) - BUG(); + BUG_ON(address >= end); spin_lock(&mm->page_table_lock); do { @@ -816,12 +869,12 @@ static inline void remap_pte_range(pte_t end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; - pfn = phys_addr >> PAGE_SHIFT; + pfn = phys_addr >> MMUPAGE_SHIFT; do { BUG_ON(!pte_none(*pte)); if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn))) set_pte(pte, pfn_pte(pfn, prot)); - address += PAGE_SIZE; + address += MMUPAGE_SIZE; pfn++; pte++; } while (address && (address < end)); @@ -862,8 +915,7 @@ int remap_page_range(struct vm_area_stru phys_addr -= from; dir = pgd_offset(mm, from); flush_cache_range(vma, beg, end); - if (from >= end) - BUG(); + BUG_ON(from >= end); spin_lock(&mm->page_table_lock); do { @@ -900,13 +952,13 @@ static inline void establish_pte(struct /* * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock */ -static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, - pte_t *page_table) +static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, pte_t *page_table, unsigned long subpfn) { + pte_t pte = pfn_pte(page_to_pfn(new_page) + subpfn, vma->vm_page_prot); invalidate_vcache(address, vma->vm_mm, new_page); flush_page_to_ram(new_page); flush_cache_page(vma, address); - establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); + establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(pte))); } /* @@ -984,10 +1036,11 @@ static int do_wp_page(struct mm_struct * spin_lock(&mm->page_table_lock); page_table = pte_offset_map(pmd, address); if (pte_same(*page_table, pte)) { + unsigned long subpfn = pfn & (PAGE_MMUCOUNT-1); if (PageReserved(old_page)) ++mm->rss; page_remove_rmap(old_page, page_table); - break_cow(vma, new_page, address, page_table); + break_cow(vma, new_page, address, page_table, subpfn); pte_chain = page_add_rmap(new_page, page_table, pte_chain); lru_cache_add_active(new_page); @@ -1029,14 +1082,14 @@ static void vmtruncate_list(struct list_ } /* mapping wholly unaffected? */ - len = len >> PAGE_SHIFT; + len = len >> MMUPAGE_SHIFT; diff = pgoff - vma->vm_pgoff; if (diff >= len) continue; /* Ok, partially affected.. */ - start += diff << PAGE_SHIFT; - len = (len - diff) << PAGE_SHIFT; + start += diff << MMUPAGE_SHIFT; + len = (len - diff) << MMUPAGE_SHIFT; zap_page_range(vma, start, len); } } @@ -1062,7 +1115,7 @@ int vmtruncate(struct inode * inode, lof if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared)) goto out_unlock; - pgoff = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + pgoff = (offset + MMUPAGE_SIZE - 1) / MMUPAGE_SIZE; if (!list_empty(&mapping->i_mmap)) vmtruncate_list(&mapping->i_mmap, pgoff); if (!list_empty(&mapping->i_mmap_shared)) @@ -1189,6 +1242,9 @@ static int do_swap_page(struct mm_struct remove_exclusive_swap_page(page); mm->rss++; + /* + * This is obviously wrong. How to fix? + */ pte = mk_pte(page, vma->vm_page_prot); if (write_access && can_share_swap_page(page)) pte = pte_mkdirty(pte_mkwrite(pte)); @@ -1234,7 +1290,11 @@ do_anonymous_page(struct mm_struct *mm, page_table = pte_offset_map(pmd, addr); } - /* Read-only mapping of ZERO_PAGE. */ + /* + * Read-only mapping of ZERO_PAGE. + * This is fine. It doesn't matter which piece of + * the zero page is involved. + */ entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); /* ..except if it's a write access */ @@ -1246,6 +1306,7 @@ do_anonymous_page(struct mm_struct *mm, page = alloc_page(GFP_HIGHUSER); if (!page) goto no_mem; + clear_user_highpage(page, addr); spin_lock(&mm->page_table_lock); @@ -1260,6 +1321,10 @@ do_anonymous_page(struct mm_struct *mm, } mm->rss++; flush_page_to_ram(page); + + /* + * NFI if this is correct either. + */ entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); lru_cache_add_active(page); mark_page_accessed(page); @@ -1310,12 +1375,12 @@ do_no_page(struct mm_struct *mm, struct pte_unmap(page_table); spin_unlock(&mm->page_table_lock); - new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0); + new_page = vma->vm_ops->nopage(vma, address & MMUPAGE_MASK, 0); /* no page was available -- either SIGBUS or OOM */ if (new_page == NOPAGE_SIGBUS) return VM_FAULT_SIGBUS; - if (new_page == NOPAGE_OOM) + else if (new_page == NOPAGE_OOM) return VM_FAULT_OOM; pte_chain = pte_chain_alloc(GFP_KERNEL); @@ -1352,10 +1417,13 @@ do_no_page(struct mm_struct *mm, struct */ /* Only go through if we didn't race with anybody else... */ if (pte_none(*page_table)) { + unsigned long pfn; ++mm->rss; flush_page_to_ram(new_page); flush_icache_page(vma, new_page); - entry = mk_pte(new_page, vma->vm_page_prot); + pfn = page_to_pfn(new_page) + + vma_suboffset(vma, address)/MMUPAGE_SIZE; + entry = pfn_pte(pfn, vma->vm_page_prot); if (write_access) entry = pte_mkwrite(pte_mkdirty(entry)); set_pte(page_table, entry); @@ -1506,11 +1574,9 @@ int make_pages_present(unsigned long add vma = find_vma(current->mm, addr); write = (vma->vm_flags & VM_WRITE) != 0; - if (addr >= end) - BUG(); - if (end > vma->vm_end) - BUG(); - len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE; + BUG_ON(addr >= end); + BUG_ON(end > vma->vm_end); + len = (end+MMUPAGE_SIZE-1)/MMUPAGE_SIZE-addr/MMUPAGE_SIZE; ret = get_user_pages(current, current->mm, addr, len, write, 0, NULL, NULL); return ret == len ? 0 : -1; diff -urpN linux-2.5.61/mm/mincore.c pgcl-2.5.61-1/mm/mincore.c --- linux-2.5.61/mm/mincore.c 2003-02-14 15:52:59.000000000 -0800 +++ pgcl-2.5.61-1/mm/mincore.c 2003-02-14 20:44:43.000000000 -0800 @@ -29,7 +29,7 @@ static unsigned char mincore_page(struct struct address_space * as = vma->vm_file->f_dentry->d_inode->i_mapping; struct page * page; - page = find_get_page(as, pgoff); + page = find_get_page(as, pgoff/PAGE_CACHE_MMUCOUNT); if (page) { present = PageUptodate(page); page_cache_release(page); @@ -42,41 +42,43 @@ static long mincore_vma(struct vm_area_s unsigned long start, unsigned long end, unsigned char * vec) { long error, i, remaining; - unsigned char * tmp; + unsigned char *kaddr; + struct page *page; error = -ENOMEM; if (!vma->vm_file) return error; - start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + start = ((start - vma->vm_start) >> MMUPAGE_SHIFT) + vma->vm_pgoff; if (end > vma->vm_end) end = vma->vm_end; - end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + end = ((end - vma->vm_start) >> MMUPAGE_SHIFT) + vma->vm_pgoff; error = -EAGAIN; - tmp = (unsigned char *) __get_free_page(GFP_KERNEL); - if (!tmp) + page = alloc_page(GFP_HIGHUSER); + if (!page) return error; /* (end - start) is # of pages, and also # of bytes in "vec */ - remaining = (end - start), + remaining = end - start; error = 0; + kaddr = kmap_atomic(page, KM_USER0); for (i = 0; remaining > 0; remaining -= PAGE_SIZE, i++) { int j = 0; long thispiece = (remaining < PAGE_SIZE) ? remaining : PAGE_SIZE; while (j < thispiece) - tmp[j++] = mincore_page(vma, start++); + kaddr[j++] = mincore_page(vma, start++); - if (copy_to_user(vec + PAGE_SIZE * i, tmp, thispiece)) { + if (copy_to_user(vec + PAGE_SIZE * i, kaddr, thispiece)) { error = -EFAULT; break; } } - - free_page((unsigned long) tmp); + kunmap_atomic(kaddr, KM_USER0); + __free_page(page); return error; } @@ -116,15 +118,15 @@ asmlinkage long sys_mincore(unsigned lon down_read(¤t->mm->mmap_sem); - if (start & ~PAGE_CACHE_MASK) + if (start & ~MMUPAGE_MASK) goto out; - len = (len + ~PAGE_CACHE_MASK) & PAGE_CACHE_MASK; + len = (len + ~MMUPAGE_MASK) & MMUPAGE_MASK; end = start + len; if (end < start) goto out; error = -EFAULT; - if (!access_ok(VERIFY_WRITE, (unsigned long) vec, len >> PAGE_SHIFT)) + if (!access_ok(VERIFY_WRITE, (unsigned long) vec, len >> MMUPAGE_SHIFT)) goto out; error = 0; @@ -164,7 +166,7 @@ asmlinkage long sys_mincore(unsigned lon error = mincore_vma(vma, start, vma->vm_end, &vec[index]); if (error) goto out; - index += (vma->vm_end - start) >> PAGE_CACHE_SHIFT; + index += (vma->vm_end - start)/MMUPAGE_SIZE; start = vma->vm_end; vma = vma->vm_next; } diff -urpN linux-2.5.61/mm/mlock.c pgcl-2.5.61-1/mm/mlock.c --- linux-2.5.61/mm/mlock.c 2003-02-14 15:51:10.000000000 -0800 +++ pgcl-2.5.61-1/mm/mlock.c 2003-02-14 20:44:43.000000000 -0800 @@ -37,7 +37,7 @@ static int mlock_fixup(struct vm_area_st /* * Keep track of amount of locked VM. */ - pages = (end - start) >> PAGE_SHIFT; + pages = (end - start) >> MMUPAGE_SHIFT; if (newflags & VM_LOCKED) { pages = -pages; make_pages_present(start, end); @@ -55,7 +55,7 @@ static int do_mlock(unsigned long start, if (on && !capable(CAP_IPC_LOCK)) return -EPERM; - len = PAGE_ALIGN(len); + len = MMUPAGE_ALIGN(len); end = start + len; if (end < start) return -EINVAL; @@ -101,14 +101,14 @@ asmlinkage long sys_mlock(unsigned long int error = -ENOMEM; down_write(¤t->mm->mmap_sem); - len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); - start &= PAGE_MASK; + len = MMUPAGE_ALIGN(len + (start & ~MMUPAGE_MASK)); + start &= MMUPAGE_MASK; - locked = len >> PAGE_SHIFT; + locked = len >> MMUPAGE_SHIFT; locked += current->mm->locked_vm; lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; - lock_limit >>= PAGE_SHIFT; + lock_limit >>= MMUPAGE_SHIFT; /* check against resource limits */ if (locked > lock_limit) @@ -130,8 +130,8 @@ asmlinkage long sys_munlock(unsigned lon int ret; down_write(¤t->mm->mmap_sem); - len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); - start &= PAGE_MASK; + len = MMUPAGE_ALIGN(len + (start & ~MMUPAGE_MASK)); + start &= MMUPAGE_MASK; ret = do_mlock(start, len, 0); up_write(¤t->mm->mmap_sem); return ret; @@ -175,7 +175,7 @@ asmlinkage long sys_mlockall(int flags) goto out; lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur; - lock_limit >>= PAGE_SHIFT; + lock_limit >>= MMUPAGE_SHIFT; ret = -ENOMEM; if (current->mm->total_vm > lock_limit) diff -urpN linux-2.5.61/mm/mmap.c pgcl-2.5.61-1/mm/mmap.c --- linux-2.5.61/mm/mmap.c 2003-02-14 15:51:59.000000000 -0800 +++ pgcl-2.5.61-1/mm/mmap.c 2003-02-14 20:45:56.000000000 -0800 @@ -73,7 +73,7 @@ int vm_enough_memory(long pages) { unsigned long free, allowed; - atomic_add(pages, &vm_committed_space); + atomic_add((pages+PAGE_MMUCOUNT-1)/PAGE_MMUCOUNT, &vm_committed_space); /* * Sometimes we want to use more memory than we have @@ -102,9 +102,9 @@ int vm_enough_memory(long pages) * factors balance out... */ free += (dentry_stat.nr_unused * sizeof(struct dentry)) >> - PAGE_SHIFT; + MMUPAGE_SHIFT; free += (inodes_stat.nr_unused * sizeof(struct inode)) >> - PAGE_SHIFT; + MMUPAGE_SHIFT; if (free > pages) return 1; @@ -119,7 +119,6 @@ int vm_enough_memory(long pages) return 1; vm_unacct_memory(pages); - return 0; } @@ -169,8 +168,8 @@ asmlinkage unsigned long sys_brk(unsigne if (brk < mm->end_code) goto out; - newbrk = PAGE_ALIGN(brk); - oldbrk = PAGE_ALIGN(mm->brk); + newbrk = MMUPAGE_ALIGN(brk); + oldbrk = MMUPAGE_ALIGN(mm->brk); if (oldbrk == newbrk) goto set_brk; @@ -187,7 +186,7 @@ asmlinkage unsigned long sys_brk(unsigne goto out; /* Check against existing mmap mappings. */ - if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) + if (find_vma_intersection(mm, oldbrk, newbrk+MMUPAGE_SIZE)) goto out; /* Ok, looks good - let it rip. */ @@ -514,10 +513,10 @@ unsigned long do_mmap_pgoff(struct file if (len > TASK_SIZE) return -EINVAL; - len = PAGE_ALIGN(len); + len = MMUPAGE_ALIGN(len); /* offset overflow? */ - if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) + if ((pgoff + (len >> MMUPAGE_SHIFT)) < pgoff) return -EINVAL; /* Too many mappings? */ @@ -528,7 +527,7 @@ unsigned long do_mmap_pgoff(struct file * that it represents a valid section of the address space. */ addr = get_unmapped_area(file, addr, len, pgoff, flags); - if (addr & ~PAGE_MASK) + if (addr & ~MMUPAGE_MASK) return addr; /* Do simple checking here so the lower-level routines won't have @@ -545,7 +544,7 @@ unsigned long do_mmap_pgoff(struct file } /* mlock MCL_FUTURE? */ if (vm_flags & VM_LOCKED) { - unsigned long locked = mm->locked_vm << PAGE_SHIFT; + unsigned long locked = mm->locked_vm << MMUPAGE_SHIFT; locked += len; if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; @@ -613,7 +612,7 @@ munmap_back: } /* Check against address space limit. */ - if ((mm->total_vm << PAGE_SHIFT) + len + if ((mm->total_vm << MMUPAGE_SHIFT) + len > current->rlim[RLIMIT_AS].rlim_cur) return -ENOMEM; @@ -625,7 +624,7 @@ munmap_back: /* * Private writable mapping: check memory availability */ - charged = len >> PAGE_SHIFT; + charged = len >> MMUPAGE_SHIFT; if (!vm_enough_memory(charged)) return -ENOMEM; vm_flags |= VM_ACCOUNT; @@ -709,9 +708,9 @@ munmap_back: kmem_cache_free(vm_area_cachep, vma); } out: - mm->total_vm += len >> PAGE_SHIFT; + mm->total_vm += len >> MMUPAGE_SHIFT; if (vm_flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + mm->locked_vm += len >> MMUPAGE_SHIFT; make_pages_present(addr, addr + len); } if (flags & MAP_POPULATE) { @@ -744,7 +743,7 @@ unacct_error: * Ugly calling convention alert: * Return value with the low bits set means error value, * ie - * if (ret & ~PAGE_MASK) + * if (ret & ~MMUPAGE_MASK) * error = ret; * * This function "knows" that -ENOMEM has the bits set. @@ -762,7 +761,7 @@ arch_get_unmapped_area(struct file *filp return -ENOMEM; if (addr) { - addr = PAGE_ALIGN(addr); + addr = MMUPAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && (!vma || addr + len <= vma->vm_start)) @@ -799,7 +798,7 @@ get_unmapped_area(struct file *file, uns if (flags & MAP_FIXED) { if (addr > TASK_SIZE - len) return -ENOMEM; - if (addr & ~PAGE_MASK) + if (addr & ~MMUPAGE_MASK) return -EINVAL; if (file && is_file_hugepages(file)) { unsigned long ret; @@ -897,18 +896,20 @@ int expand_stack(struct vm_area_struct * { unsigned long grow; - if (!(vma->vm_flags & VM_GROWSUP)) + if (!(vma->vm_flags & VM_GROWSUP)) { + printk("bad vma flags in expand_stack()\n"); return -EFAULT; + } /* * vma->vm_start/vm_end cannot change under us because the caller * is required to hold the mmap_sem in read mode. We need to get * the spinlock only before relocating the vma range ourself. */ - address += 4 + PAGE_SIZE - 1; - address &= PAGE_MASK; + address += 4 + MMUPAGE_SIZE - 1; + address &= MMUPAGE_MASK; spin_lock(&vma->vm_mm->page_table_lock); - grow = (address - vma->vm_end) >> PAGE_SHIFT; + grow = (address - vma->vm_end) >> MMUPAGE_SHIFT; /* Overcommit.. */ if (!vm_enough_memory(grow)) { @@ -917,7 +918,7 @@ int expand_stack(struct vm_area_struct * } if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur || - ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > + ((vma->vm_mm->total_vm + grow) << MMUPAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) { spin_unlock(&vma->vm_mm->page_table_lock); vm_unacct_memory(grow); @@ -936,7 +937,7 @@ find_extend_vma(struct mm_struct *mm, un { struct vm_area_struct *vma, *prev; - addr &= PAGE_MASK; + addr &= MMUPAGE_MASK; vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; @@ -960,9 +961,9 @@ int expand_stack(struct vm_area_struct * * is required to hold the mmap_sem in read mode. We need to get * the spinlock only before relocating the vma range ourself. */ - address &= PAGE_MASK; + address &= MMUPAGE_MASK; spin_lock(&vma->vm_mm->page_table_lock); - grow = (vma->vm_start - address) >> PAGE_SHIFT; + grow = (vma->vm_start - address) >> MMUPAGE_SHIFT; /* Overcommit.. */ if (!vm_enough_memory(grow)) { @@ -971,7 +972,7 @@ int expand_stack(struct vm_area_struct * } if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur || - ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > + ((vma->vm_mm->total_vm + grow) << MMUPAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) { spin_unlock(&vma->vm_mm->page_table_lock); vm_unacct_memory(grow); @@ -992,7 +993,7 @@ find_extend_vma(struct mm_struct * mm, u struct vm_area_struct * vma; unsigned long start; - addr &= PAGE_MASK; + addr &= MMUPAGE_MASK; vma = find_vma(mm,addr); if (!vma) return NULL; @@ -1084,9 +1085,9 @@ static void unmap_vma(struct mm_struct * { size_t len = area->vm_end - area->vm_start; - area->vm_mm->total_vm -= len >> PAGE_SHIFT; + area->vm_mm->total_vm -= len >> MMUPAGE_SHIFT; if (area->vm_flags & VM_LOCKED) - area->vm_mm->locked_vm -= len >> PAGE_SHIFT; + area->vm_mm->locked_vm -= len >> MMUPAGE_SHIFT; /* * Is this a new hole at the lowest possible address? */ @@ -1191,11 +1192,11 @@ int split_vma(struct mm_struct * mm, str if (new_below) { new->vm_end = addr; vma->vm_start = addr; - vma->vm_pgoff += ((addr - new->vm_start) >> PAGE_SHIFT); + vma->vm_pgoff += ((addr - new->vm_start) >> MMUPAGE_SHIFT); } else { vma->vm_end = addr; new->vm_start = addr; - new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); + new->vm_pgoff += ((addr - vma->vm_start) >> MMUPAGE_SHIFT); } if (new->vm_file) @@ -1218,10 +1219,10 @@ int do_munmap(struct mm_struct *mm, unsi unsigned long end; struct vm_area_struct *mpnt, *prev, *last; - if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start) + if ((start & ~MMUPAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start) return -EINVAL; - if ((len = PAGE_ALIGN(len)) == 0) + if ((len = MMUPAGE_ALIGN(len)) == 0) return -EINVAL; /* Find the first overlapping VMA */ @@ -1300,7 +1301,7 @@ unsigned long do_brk(unsigned long addr, unsigned long flags; struct rb_node ** rb_link, * rb_parent; - len = PAGE_ALIGN(len); + len = MMUPAGE_ALIGN(len); if (!len) return addr; @@ -1308,7 +1309,7 @@ unsigned long do_brk(unsigned long addr, * mlock MCL_FUTURE? */ if (mm->def_flags & VM_LOCKED) { - unsigned long locked = mm->locked_vm << PAGE_SHIFT; + unsigned long locked = mm->locked_vm << MMUPAGE_SHIFT; locked += len; if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; @@ -1326,14 +1327,14 @@ unsigned long do_brk(unsigned long addr, } /* Check against address space limits *after* clearing old maps... */ - if ((mm->total_vm << PAGE_SHIFT) + len + if ((mm->total_vm << MMUPAGE_SHIFT) + len > current->rlim[RLIMIT_AS].rlim_cur) return -ENOMEM; if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; - if (!vm_enough_memory(len >> PAGE_SHIFT)) + if (!vm_enough_memory(len >> MMUPAGE_SHIFT)) return -ENOMEM; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; @@ -1348,7 +1349,7 @@ unsigned long do_brk(unsigned long addr, */ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!vma) { - vm_unacct_memory(len >> PAGE_SHIFT); + vm_unacct_memory(len >> MMUPAGE_SHIFT); return -ENOMEM; } @@ -1366,9 +1367,9 @@ unsigned long do_brk(unsigned long addr, vma_link(mm, vma, prev, rb_link, rb_parent); out: - mm->total_vm += len >> PAGE_SHIFT; + mm->total_vm += len >> MMUPAGE_SHIFT; if (flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + mm->locked_vm += len >> MMUPAGE_SHIFT; make_pages_present(addr, addr + len); } return addr; diff -urpN linux-2.5.61/mm/mprotect.c pgcl-2.5.61-1/mm/mprotect.c --- linux-2.5.61/mm/mprotect.c 2003-02-14 15:51:31.000000000 -0800 +++ pgcl-2.5.61-1/mm/mprotect.c 2003-02-14 20:44:43.000000000 -0800 @@ -53,7 +53,7 @@ change_pte_range(pmd_t *pmd, unsigned lo entry = ptep_get_and_clear(pte); set_pte(pte, pte_modify(entry, newprot)); } - address += PAGE_SIZE; + address += MMUPAGE_SIZE; pte++; } while (address && (address < end)); pte_unmap(pte - 1); @@ -174,9 +174,11 @@ mprotect_fixup(struct vm_area_struct *vm */ if (newflags & VM_WRITE) { if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) { - charged = (end - start) >> PAGE_SHIFT; - if (!vm_enough_memory(charged)) + charged = (end - start) >> MMUPAGE_SHIFT; + if (!vm_enough_memory(charged)) { + printk("mprotect_fixup(): OOM\n"); return -ENOMEM; + } newflags |= VM_ACCOUNT; } } @@ -228,9 +230,9 @@ sys_mprotect(unsigned long start, size_t struct vm_area_struct * vma, * next, * prev; int error = -EINVAL; - if (start & ~PAGE_MASK) + if (start & ~MMUPAGE_MASK) return -EINVAL; - len = PAGE_ALIGN(len); + len = MMUPAGE_ALIGN(len); end = start + len; if (end < start) return -EINVAL; diff -urpN linux-2.5.61/mm/mremap.c pgcl-2.5.61-1/mm/mremap.c --- linux-2.5.61/mm/mremap.c 2003-02-14 15:51:47.000000000 -0800 +++ pgcl-2.5.61-1/mm/mremap.c 2003-02-14 20:44:43.000000000 -0800 @@ -162,7 +162,7 @@ static int move_page_tables(struct vm_ar * only a few pages.. This also makes error recovery easier. */ while (offset) { - offset -= PAGE_SIZE; + offset -= MMUPAGE_SIZE; if (move_one_page(vma, old_addr + offset, new_addr + offset)) goto oops_we_failed; } @@ -177,7 +177,7 @@ static int move_page_tables(struct vm_ar */ oops_we_failed: flush_cache_range(vma, new_addr, new_addr + len); - while ((offset += PAGE_SIZE) < len) + while ((offset += MMUPAGE_SIZE) < len) move_one_page(vma, new_addr + offset, old_addr + offset); zap_page_range(vma, new_addr, len); return -1; @@ -253,7 +253,7 @@ static unsigned long move_vma(struct vm_ INIT_LIST_HEAD(&new_vma->shared); new_vma->vm_start = new_addr; new_vma->vm_end = new_addr+new_len; - new_vma->vm_pgoff += (addr-vma->vm_start) >> PAGE_SHIFT; + new_vma->vm_pgoff += (addr - vma->vm_start) >> MMUPAGE_SHIFT; if (new_vma->vm_file) get_file(new_vma->vm_file); if (new_vma->vm_ops && new_vma->vm_ops->open) @@ -287,9 +287,9 @@ static unsigned long move_vma(struct vm_ vma->vm_next->vm_flags |= VM_ACCOUNT; } - current->mm->total_vm += new_len >> PAGE_SHIFT; + current->mm->total_vm += new_len >> MMUPAGE_SHIFT; if (must_fault_in) { - current->mm->locked_vm += new_len >> PAGE_SHIFT; + current->mm->locked_vm += new_len >> MMUPAGE_SHIFT; make_pages_present(fault_in_start, fault_in_end); } return new_addr; @@ -318,15 +318,15 @@ unsigned long do_mremap(unsigned long ad if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) goto out; - if (addr & ~PAGE_MASK) + if (addr & ~MMUPAGE_MASK) goto out; - old_len = PAGE_ALIGN(old_len); - new_len = PAGE_ALIGN(new_len); + old_len = MMUPAGE_ALIGN(old_len); + new_len = MMUPAGE_ALIGN(new_len); /* new_addr is only valid if MREMAP_FIXED is specified */ if (flags & MREMAP_FIXED) { - if (new_addr & ~PAGE_MASK) + if (new_addr & ~MMUPAGE_MASK) goto out; if (!(flags & MREMAP_MAYMOVE)) goto out; @@ -378,19 +378,19 @@ unsigned long do_mremap(unsigned long ad goto out; } if (vma->vm_flags & VM_LOCKED) { - unsigned long locked = current->mm->locked_vm << PAGE_SHIFT; + unsigned long locked = current->mm->locked_vm << MMUPAGE_SHIFT; locked += new_len - old_len; ret = -EAGAIN; if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) goto out; } ret = -ENOMEM; - if ((current->mm->total_vm << PAGE_SHIFT) + (new_len - old_len) + if ((current->mm->total_vm << MMUPAGE_SHIFT) + (new_len - old_len) > current->rlim[RLIMIT_AS].rlim_cur) goto out; if (vma->vm_flags & VM_ACCOUNT) { - charged = (new_len - old_len) >> PAGE_SHIFT; + charged = (new_len - old_len) >> MMUPAGE_SHIFT; if (!vm_enough_memory(charged)) goto out_nc; } @@ -406,7 +406,7 @@ unsigned long do_mremap(unsigned long ad max_addr = vma->vm_next->vm_start; /* can we just expand the current mapping? */ if (max_addr - addr >= new_len) { - int pages = (new_len - old_len) >> PAGE_SHIFT; + int pages = (new_len - old_len) >> MMUPAGE_SHIFT; spin_lock(&vma->vm_mm->page_table_lock); vma->vm_end = addr + new_len; spin_unlock(&vma->vm_mm->page_table_lock); @@ -435,13 +435,13 @@ unsigned long do_mremap(unsigned long ad new_addr = get_unmapped_area(vma->vm_file, 0, new_len, vma->vm_pgoff, map_flags); ret = new_addr; - if (new_addr & ~PAGE_MASK) + if (new_addr & ~MMUPAGE_MASK) goto out; } ret = move_vma(vma, addr, old_len, new_len, new_addr); } out: - if (ret & ~PAGE_MASK) + if (ret & ~MMUPAGE_MASK) vm_unacct_memory(charged); out_nc: return ret; diff -urpN linux-2.5.61/mm/msync.c pgcl-2.5.61-1/mm/msync.c --- linux-2.5.61/mm/msync.c 2003-02-14 15:51:30.000000000 -0800 +++ pgcl-2.5.61-1/mm/msync.c 2003-02-14 20:44:43.000000000 -0800 @@ -59,7 +59,7 @@ static int filemap_sync_pte_range(pmd_t error = 0; do { error |= filemap_sync_pte(pte, vma, address, flags); - address += PAGE_SIZE; + address += MMUPAGE_SIZE; pte++; } while (address && (address < end)); @@ -174,12 +174,12 @@ asmlinkage long sys_msync(unsigned long down_read(¤t->mm->mmap_sem); if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) goto out; - if (start & ~PAGE_MASK) + if (start & ~MMUPAGE_MASK) goto out; if ((flags & MS_ASYNC) && (flags & MS_SYNC)) goto out; error = -ENOMEM; - len = (len + ~PAGE_MASK) & PAGE_MASK; + len = (len + ~MMUPAGE_MASK) & MMUPAGE_MASK; end = start + len; if (end < start) goto out; diff -urpN linux-2.5.61/mm/page-writeback.c pgcl-2.5.61-1/mm/page-writeback.c --- linux-2.5.61/mm/page-writeback.c 2003-02-14 15:52:04.000000000 -0800 +++ pgcl-2.5.61-1/mm/page-writeback.c 2003-02-14 20:44:43.000000000 -0800 @@ -341,8 +341,8 @@ static void set_ratelimit(void) ratelimit_pages = total_pages / (num_online_cpus() * 32); if (ratelimit_pages < 16) ratelimit_pages = 16; - if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024) - ratelimit_pages = (4096 * 1024) / PAGE_CACHE_SIZE; + if (ratelimit_pages * PAGE_CACHE_SIZE > PAGE_SIZE * 1024) + ratelimit_pages = (PAGE_SIZE * 1024) / PAGE_CACHE_SIZE; } static int diff -urpN linux-2.5.61/mm/page_alloc.c pgcl-2.5.61-1/mm/page_alloc.c --- linux-2.5.61/mm/page_alloc.c 2003-02-14 15:51:10.000000000 -0800 +++ pgcl-2.5.61-1/mm/page_alloc.c 2003-02-14 20:44:43.000000000 -0800 @@ -57,7 +57,7 @@ static int zone_balance_max[MAX_NR_ZONES */ static int bad_range(struct zone *zone, struct page *page) { - if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages) + if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages*PAGE_MMUCOUNT) return 1; if (page_to_pfn(page) < zone->zone_start_pfn) return 1; @@ -1155,7 +1155,7 @@ static void __init free_area_init_core(s { unsigned long i, j; unsigned long local_offset; - const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1); + const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-PAGE_MMUSHIFT-1); int cpu, nid = pgdat->node_id; struct page *lmem_map = pgdat->node_mem_map; unsigned long zone_start_pfn = pgdat->node_start_pfn; @@ -1215,7 +1215,7 @@ static void __init free_area_init_core(s INIT_LIST_HEAD(&pcp->list); } printk(" %s zone: %lu pages, LIFO batch:%lu\n", - zone_names[j], realsize, batch); + zone_names[j], realsize*PAGE_MMUCOUNT, batch); INIT_LIST_HEAD(&zone->active_list); INIT_LIST_HEAD(&zone->inactive_list); atomic_set(&zone->refill_counter, 0); @@ -1273,9 +1273,9 @@ static void __init free_area_init_core(s * ZONE_NORMAL is below 4G. */ set_page_address(page, - __va(zone_start_pfn << PAGE_SHIFT)); + __va(zone_start_pfn << MMUPAGE_SHIFT)); #endif - zone_start_pfn++; + zone_start_pfn += PAGE_MMUCOUNT; } local_offset += size; @@ -1347,7 +1347,7 @@ struct pglist_data contig_page_data = { void __init free_area_init(unsigned long *zones_size) { free_area_init_node(0, &contig_page_data, NULL, zones_size, - __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); + __pa(PAGE_OFFSET) >> MMUPAGE_SHIFT, NULL); mem_map = contig_page_data.node_mem_map; } #endif diff -urpN linux-2.5.61/mm/shmem.c pgcl-2.5.61-1/mm/shmem.c --- linux-2.5.61/mm/shmem.c 2003-02-14 15:51:33.000000000 -0800 +++ pgcl-2.5.61-1/mm/shmem.c 2003-02-14 20:44:43.000000000 -0800 @@ -38,14 +38,14 @@ /* This magic number is used in glibc for posix shared memory */ #define TMPFS_MAGIC 0x01021994 -#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) -#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) -#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) +#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) +#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) +#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) #define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) #define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) -#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) +#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size)/MMUPAGE_SIZE) /* Pretend that each entry is of this size in directory's i_size */ #define BOGO_DIRENT_SIZE 20 @@ -69,14 +69,14 @@ static inline struct page *shmem_dir_all /* * The above definition of ENTRIES_PER_PAGE, and the use of * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: - * might be reconsidered if it ever diverges from PAGE_SIZE. + * might be reconsidered if it ever diverges from MMUPAGE_SIZE. */ - return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT); + return alloc_pages(gfp_mask, PAGE_CACHE_MMUSHIFT); } static inline void shmem_dir_free(struct page *page) { - __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT); + __free_pages(page, PAGE_CACHE_MMUSHIFT); } static struct page **shmem_dir_map(struct page *page) @@ -295,7 +295,7 @@ static swp_entry_t *shmem_swp_alloc(stru static const swp_entry_t unswapped = {0}; if (sgp != SGP_WRITE && - ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) + (loff_t)index*PAGE_CACHE_SIZE >= inode->i_size) return ERR_PTR(-EINVAL); while (!(entry = shmem_swp_entry(info, index, &page))) { @@ -328,7 +328,7 @@ static swp_entry_t *shmem_swp_alloc(stru return ERR_PTR(-ENOMEM); } if (sgp != SGP_WRITE && - ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) { + (loff_t)index*PAGE_CACHE_SIZE >= inode->i_size) { entry = ERR_PTR(-EINVAL); break; } @@ -381,7 +381,7 @@ static void shmem_truncate(struct inode int freed; inode->i_ctime = inode->i_mtime = CURRENT_TIME; - idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + idx = (inode->i_size + PAGE_CACHE_SIZE - 1)/PAGE_CACHE_SIZE; if (idx >= info->next_index) return; @@ -497,7 +497,7 @@ static int shmem_notify_change(struct de long change = 0; int error; - if ((attr->ia_valid & ATTR_SIZE) && (attr->ia_size <= SHMEM_MAX_BYTES)) { + if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size <= SHMEM_MAX_BYTES) { /* * Account swap file usage based on new file size, * but just let vmtruncate fail on out-of-range sizes. @@ -515,9 +515,9 @@ static int shmem_notify_change(struct de * truncate_partial_page cannnot miss it were * it assigned to swap. */ - if (attr->ia_size & (PAGE_CACHE_SIZE-1)) { + if (attr->ia_size % PAGE_CACHE_SIZE) { (void) shmem_getpage(inode, - attr->ia_size>>PAGE_CACHE_SHIFT, + attr->ia_size/PAGE_CACHE_SIZE, &page, SGP_READ); } } @@ -928,16 +928,14 @@ struct page *shmem_nopage(struct vm_area { struct inode *inode = vma->vm_file->f_dentry->d_inode; struct page *page = NULL; - unsigned long idx; + unsigned long pgoff; int error; - idx = (address - vma->vm_start) >> PAGE_SHIFT; - idx += vma->vm_pgoff; - idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; + pgoff = (address - vma->vm_start)/MMUPAGE_SIZE + vma->vm_pgoff; - error = shmem_getpage(inode, idx, &page, SGP_CACHE); + error = shmem_getpage(inode, pgoff/PAGE_CACHE_MMUCOUNT, &page, SGP_CACHE); if (error) - return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; + return error == -ENOMEM ? NOPAGE_OOM : NOPAGE_SIGBUS; flush_page_to_ram(page); return page; @@ -952,28 +950,31 @@ static int shmem_populate(struct vm_area enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE; unsigned long size; - size = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size) + size = (inode->i_size + MMUPAGE_SIZE - 1)/MMUPAGE_SIZE; + if (pgoff >= size || pgoff + len/MMUPAGE_SIZE > size) return -EINVAL; while ((long) len > 0) { struct page *page = NULL; int err; /* - * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE + * Will need changing if PAGE_CACHE_SIZE != MMUPAGE_SIZE */ - err = shmem_getpage(inode, pgoff, &page, sgp); + err = shmem_getpage(inode, pgoff/PAGE_CACHE_MMUCOUNT, &page, sgp); if (err) return err; if (page) { - err = install_page(mm, vma, addr, page, prot); + /* + * XXX: I probably just broke this, too. -- wli + */ + err = install_page(mm, vma, addr, page, prot, pgoff % PAGE_MMUCOUNT); if (err) { page_cache_release(page); return err; } } - len -= PAGE_SIZE; - addr += PAGE_SIZE; + len -= MMUPAGE_SIZE; + addr += MMUPAGE_SIZE; pgoff++; } return 0; @@ -1217,8 +1218,8 @@ shmem_file_write(struct file *file, cons char *kaddr; int left; - offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ - index = pos >> PAGE_CACHE_SHIFT; + offset = pos % PAGE_CACHE_SIZE; /* Within page */ + index = pos/PAGE_CACHE_SIZE; bytes = PAGE_CACHE_SIZE - offset; if (bytes > count) bytes = count; @@ -1287,18 +1288,18 @@ static void do_shmem_file_read(struct fi struct address_space *mapping = inode->i_mapping; unsigned long index, offset; - index = *ppos >> PAGE_CACHE_SHIFT; - offset = *ppos & ~PAGE_CACHE_MASK; + index = *ppos/PAGE_CACHE_SIZE; + offset = *ppos % PAGE_CACHE_SIZE; for (;;) { struct page *page = NULL; unsigned long end_index, nr, ret; - end_index = inode->i_size >> PAGE_CACHE_SHIFT; + end_index = inode->i_size/PAGE_CACHE_SIZE; if (index > end_index) break; if (index == end_index) { - nr = inode->i_size & ~PAGE_CACHE_MASK; + nr = inode->i_size % PAGE_CACHE_SIZE; if (nr <= offset) break; } @@ -1315,9 +1316,9 @@ static void do_shmem_file_read(struct fi * are called without i_sem protection against truncate */ nr = PAGE_CACHE_SIZE; - end_index = inode->i_size >> PAGE_CACHE_SHIFT; + end_index = inode->i_size/PAGE_CACHE_SIZE; if (index == end_index) { - nr = inode->i_size & ~PAGE_CACHE_MASK; + nr = inode->i_size % PAGE_CACHE_SIZE; if (nr <= offset) { page_cache_release(page); break; @@ -1345,15 +1346,15 @@ static void do_shmem_file_read(struct fi */ ret = actor(desc, page, offset, nr); offset += ret; - index += offset >> PAGE_CACHE_SHIFT; - offset &= ~PAGE_CACHE_MASK; + index += offset/PAGE_CACHE_SIZE; + offset %= PAGE_CACHE_SIZE; page_cache_release(page); if (ret != nr || !desc->count) break; } - *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; + *ppos = (loff_t)index*PAGE_CACHE_SIZE + offset; UPDATE_ATIME(inode); } diff -urpN linux-2.5.61/mm/slab.c pgcl-2.5.61-1/mm/slab.c --- linux-2.5.61/mm/slab.c 2003-02-14 15:52:26.000000000 -0800 +++ pgcl-2.5.61-1/mm/slab.c 2003-02-14 20:44:43.000000000 -0800 @@ -637,7 +637,7 @@ void __init kmem_cache_sizes_init(void) * Fragmentation resistance on low memory - only use bigger * page orders on machines with more than 32MB of memory. */ - if (num_physpages > (32 << 20) >> PAGE_SHIFT) + if (num_physpages > (32 << 20) >> MMUPAGE_SHIFT) slab_break_gfp_order = BREAK_GFP_ORDER_HI; do { /* For performance, all the general caches are L1 aligned. @@ -934,7 +934,7 @@ kmem_cache_create (const char *name, siz align = L1_CACHE_BYTES; /* Determine if the slab management is 'on' or 'off' slab. */ - if (size >= (PAGE_SIZE>>3)) + if (size >= PAGE_SIZE/8 || ((flags & SLAB_MUST_HWCACHE_ALIGN) && size >= MMUPAGE_SIZE)) /* * Size is large, assume best to place the slab management obj * off-slab (should allow better packing of objs). diff -urpN linux-2.5.61/mm/swap.c pgcl-2.5.61-1/mm/swap.c --- linux-2.5.61/mm/swap.c 2003-02-14 15:51:31.000000000 -0800 +++ pgcl-2.5.61-1/mm/swap.c 2003-02-14 20:44:43.000000000 -0800 @@ -352,7 +352,7 @@ unsigned int pagevec_lookup(struct pagev */ void __init swap_setup(void) { - unsigned long megs = num_physpages >> (20 - PAGE_SHIFT); + unsigned long megs = num_physpages >> (20 - MMUPAGE_SHIFT); /* Use a smaller cluster for small-memory machines */ if (megs < 16) diff -urpN linux-2.5.61/mm/swapfile.c pgcl-2.5.61-1/mm/swapfile.c --- linux-2.5.61/mm/swapfile.c 2003-02-14 15:51:20.000000000 -0800 +++ pgcl-2.5.61-1/mm/swapfile.c 2003-02-14 20:44:43.000000000 -0800 @@ -425,7 +425,7 @@ static void unuse_pmd(struct vm_area_str pte_chain = pte_chain_alloc(GFP_ATOMIC); unuse_pte(vma, offset+address-vma->vm_start, pte, entry, page, &pte_chain); - address += PAGE_SIZE; + address += MMUPAGE_SIZE; pte++; } while (address && (address < end)); pte_unmap(pte - 1); diff -urpN linux-2.5.61/mm/vcache.c pgcl-2.5.61-1/mm/vcache.c --- linux-2.5.61/mm/vcache.c 2003-02-14 15:51:21.000000000 -0800 +++ pgcl-2.5.61-1/mm/vcache.c 2003-02-14 20:44:43.000000000 -0800 @@ -34,7 +34,7 @@ void __attach_vcache(vcache_t *vcache, { struct list_head *hash_head; - address &= PAGE_MASK; + address &= MMUPAGE_MASK; vcache->address = address; vcache->mm = mm; vcache->callback = callback; diff -urpN linux-2.5.61/mm/vmalloc.c pgcl-2.5.61-1/mm/vmalloc.c --- linux-2.5.61/mm/vmalloc.c 2003-02-14 15:51:49.000000000 -0800 +++ pgcl-2.5.61-1/mm/vmalloc.c 2003-02-14 20:44:43.000000000 -0800 @@ -44,15 +44,12 @@ static void unmap_area_pte(pmd_t *pmd, u end = PMD_SIZE; do { - pte_t page; - page = ptep_get_and_clear(pte); - address += PAGE_SIZE; + if (pte_present(*pte)) + pte_clear(pte); + else if (!pte_none(*pte)) + printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n"); pte++; - if (pte_none(page)) - continue; - if (pte_present(page)) - continue; - printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n"); + address += MMUPAGE_SIZE; } while (address < end); } @@ -83,57 +80,6 @@ static void unmap_area_pmd(pgd_t *dir, u } while (address < end); } -static int map_area_pte(pte_t *pte, unsigned long address, - unsigned long size, pgprot_t prot, - struct page ***pages) -{ - unsigned long end; - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - - do { - struct page *page = **pages; - - if (!pte_none(*pte)) - printk(KERN_ERR "alloc_area_pte: page already exists\n"); - if (!page) - return -ENOMEM; - - set_pte(pte, mk_pte(page, prot)); - address += PAGE_SIZE; - pte++; - (*pages)++; - } while (address < end); - return 0; -} - -static int map_area_pmd(pmd_t *pmd, unsigned long address, - unsigned long size, pgprot_t prot, - struct page ***pages) -{ - unsigned long end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - - do { - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); - if (!pte) - return -ENOMEM; - if (map_area_pte(pte, address, end - address, prot, pages)) - return -ENOMEM; - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address < end); - - return 0; -} - void unmap_vm_area(struct vm_struct *area) { unsigned long address = VMALLOC_VMADDR(area->addr); @@ -150,30 +96,47 @@ void unmap_vm_area(struct vm_struct *are flush_tlb_kernel_range(VMALLOC_VMADDR(area->addr), end); } +#define PTE_TABLE_MASK ((PTRS_PER_PTE-1) * sizeof(pte_t)) +#define PMD_TABLE_MASK ((PTRS_PER_PMD-1) * sizeof(pmd_t)) + int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) { unsigned long address = VMALLOC_VMADDR(area->addr); - unsigned long end = address + (area->size-PAGE_SIZE); - pgd_t *dir; + unsigned long end = address + (area->size-MMUPAGE_SIZE); + unsigned long voffset = 0; + pgd_t *pgd; int err = 0; - dir = pgd_offset_k(address); + pgd = pgd_offset_k(address); spin_lock(&init_mm.page_table_lock); do { - pmd_t *pmd = pmd_alloc(&init_mm, dir, address); + pmd_t *pmd = pmd_alloc(&init_mm, pgd, address); if (!pmd) { err = -ENOMEM; - break; - } - if (map_area_pmd(pmd, address, end - address, prot, pages)) { - err = -ENOMEM; - break; + goto out; } - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); - + do { + pte_t *pte = pte_alloc_kernel(&init_mm, pmd, address); + if (!pte) { + err = -ENOMEM; + goto out; + } + do { + unsigned long pfn; + pfn = page_to_pfn((*pages)[voffset/PAGE_SIZE]); + pfn += (voffset/MMUPAGE_SIZE) % PAGE_MMUCOUNT; + set_pte(pte, pfn_pte(pfn, prot)); + ++pte; + address += MMUPAGE_SIZE; + voffset += MMUPAGE_SIZE; + } while (((unsigned long)pte & PTE_TABLE_MASK) && address < end); + ++pmd; + } while (((unsigned long)pmd & PMD_TABLE_MASK) && address < end); + ++pgd; + /* presumably address could wrap to 0, but I doubt it */ + } while (address && address < end); +out: spin_unlock(&init_mm.page_table_lock); flush_cache_all(); return err; @@ -202,7 +165,7 @@ struct vm_struct *get_vm_area(unsigned l /* * We always allocate a guard page. */ - size += PAGE_SIZE; + size += MMUPAGE_SIZE; if (unlikely(!size)) { kfree (area); return NULL; @@ -231,6 +194,9 @@ found: area->phys_addr = 0; write_unlock(&vmlist_lock); + printk("vmalloc, returning [0x%p, 0x%p)\n", + area->addr, ((char *)area->addr) + area->size); + return area; out: @@ -273,7 +239,7 @@ void __vunmap(void *addr, int deallocate if (!addr) return; - if ((PAGE_SIZE-1) & (unsigned long)addr) { + if ((MMUPAGE_SIZE-1) & (unsigned long)addr) { printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr); return; } @@ -291,8 +257,7 @@ void __vunmap(void *addr, int deallocate int i; for (i = 0; i < area->nr_pages; i++) { - if (unlikely(!area->pages[i])) - BUG(); + BUG_ON(unlikely(!area->pages[i])); __free_page(area->pages[i]); } @@ -379,15 +344,15 @@ void *__vmalloc(unsigned long size, int struct page **pages; unsigned int nr_pages, array_size, i; - size = PAGE_ALIGN(size); - if (!size || (size >> PAGE_SHIFT) > num_physpages) + size = MMUPAGE_ALIGN(size); + if (!size || (size >> MMUPAGE_SHIFT) > num_physpages) return NULL; area = get_vm_area(size, VM_ALLOC); if (!area) return NULL; - nr_pages = size >> PAGE_SHIFT; + nr_pages = size >> MMUPAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *)); area->nr_pages = nr_pages; @@ -446,6 +411,10 @@ void *vmalloc_32(unsigned long size) return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL); } +/* + * XXX: broken, fix eventually + * -- wli + */ long vread(char *buf, char *addr, unsigned long count) { struct vm_struct *tmp; @@ -484,6 +453,10 @@ finished: return buf - buf_start; } +/* + * XXX: broken, fix eventually + * -- wli + */ long vwrite(char *buf, char *addr, unsigned long count) { struct vm_struct *tmp; diff -urpN linux-2.5.61/net/ipv4/netfilter/ip_conntrack_core.c pgcl-2.5.61-1/net/ipv4/netfilter/ip_conntrack_core.c --- linux-2.5.61/net/ipv4/netfilter/ip_conntrack_core.c 2003-02-14 15:51:04.000000000 -0800 +++ pgcl-2.5.61-1/net/ipv4/netfilter/ip_conntrack_core.c 2003-02-14 20:44:43.000000000 -0800 @@ -1440,9 +1440,9 @@ int __init ip_conntrack_init(void) ip_conntrack_htable_size = hashsize; } else { ip_conntrack_htable_size - = (((num_physpages << PAGE_SHIFT) / 16384) + = (((num_physpages << MMUPAGE_SHIFT) / 16384) / sizeof(struct list_head)); - if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) + if (num_physpages > (1024 * 1024 * 1024 / MMUPAGE_SIZE)) ip_conntrack_htable_size = 8192; if (ip_conntrack_htable_size < 16) ip_conntrack_htable_size = 16; diff -urpN linux-2.5.61/net/ipv4/tcp.c pgcl-2.5.61-1/net/ipv4/tcp.c --- linux-2.5.61/net/ipv4/tcp.c 2003-02-14 15:51:24.000000000 -0800 +++ pgcl-2.5.61-1/net/ipv4/tcp.c 2003-02-14 20:44:43.000000000 -0800 @@ -2615,9 +2615,9 @@ void __init tcp_init(void) * The methodology is similar to that of the buffer cache. */ if (num_physpages >= (128 * 1024)) - goal = num_physpages >> (21 - PAGE_SHIFT); + goal = num_physpages >> (21 - MMUPAGE_SHIFT); else - goal = num_physpages >> (23 - PAGE_SHIFT); + goal = num_physpages >> (23 - MMUPAGE_SHIFT); for (order = 0; (1UL << order) < goal; order++) ;