Page clustering, essentially a forward port of hugh's 2.4.7 code at ftp://ftp.veritas.com/linux/ This is currently NUMA-Q only, though it seems to compile for Pee Cees. Userspace runs until the first fork(), where a bad pmd is encountered. arch/i386/kernel/apic.c | 4 - arch/i386/kernel/cpu/mtrr/generic.c | 16 ++-- arch/i386/kernel/entry.S | 2 arch/i386/kernel/head.S | 3 arch/i386/kernel/irq.c | 2 arch/i386/kernel/mpparse.c | 4 - arch/i386/kernel/numaq.c | 6 - arch/i386/kernel/setup.c | 16 ++-- arch/i386/kernel/smpboot.c | 2 arch/i386/kernel/sys_i386.c | 4 - arch/i386/kernel/sysenter.c | 2 arch/i386/kernel/traps.c | 2 arch/i386/lib/getuser.S | 2 arch/i386/mm/discontig.c | 52 ++++++++------- arch/i386/mm/highmem.c | 57 +++++++++++++---- arch/i386/mm/init.c | 78 ++++++++++++++--------- arch/i386/mm/ioremap.c | 30 ++++----- arch/i386/mm/pgtable.c | 8 +- drivers/block/ll_rw_blk.c | 2 fs/binfmt_elf.c | 24 +++---- fs/exec.c | 3 include/asm-i386/fixmap.h | 23 +++++- include/asm-i386/highmem.h | 19 ----- include/asm-i386/io.h | 2 include/asm-i386/io_apic.h | 2 include/asm-i386/mmzone.h | 33 ++++++--- include/asm-i386/numaq.h | 2 include/asm-i386/page.h | 39 +++++++++-- include/asm-i386/pgtable-3level.h | 10 +-- include/asm-i386/pgtable.h | 16 +--- include/asm-i386/setup.h | 8 +- include/asm-i386/thread_info.h | 10 +-- include/linux/blkdev.h | 4 - include/linux/highmem.h | 18 +++++ include/linux/mm.h | 34 ++++++++-- include/linux/pagemap.h | 3 include/linux/sched.h | 2 include/linux/shm.h | 2 include/linux/swap.h | 6 - init/main.c | 4 - ipc/shm.c | 6 - kernel/fork.c | 7 +- mm/bootmem.c | 120 +++++++++++++++--------------------- mm/filemap.c | 34 +++++----- mm/highmem.c | 44 ++++++++++--- mm/memory.c | 52 ++++++++++----- mm/mmap.c | 80 ++++++++++++------------ mm/page_alloc.c | 10 +-- mm/shmem.c | 22 +++--- mm/swapfile.c | 37 +++++++++++ mm/vmalloc.c | 23 +++--- 51 files changed, 594 insertions(+), 397 deletions(-) diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/kernel/apic.c pgcl-bk/arch/i386/kernel/apic.c --- linux-2.5-bk/arch/i386/kernel/apic.c Tue Jan 7 19:07:11 2003 +++ pgcl-bk/arch/i386/kernel/apic.c Mon Jan 6 22:17:04 2003 @@ -678,7 +678,7 @@ * one for the IO-APIC. */ if (!smp_found_config && detect_init_APIC()) { - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); + apic_phys = (unsigned long) alloc_bootmem_pages(MMUPAGE_SIZE); apic_phys = __pa(apic_phys); } else apic_phys = mp_lapic_addr; @@ -710,7 +710,7 @@ } } else { fake_ioapic_page: - ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = (unsigned long) alloc_bootmem_pages(MMUPAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/kernel/cpu/mtrr/generic.c pgcl-bk/arch/i386/kernel/cpu/mtrr/generic.c --- linux-2.5-bk/arch/i386/kernel/cpu/mtrr/generic.c Tue Jan 7 19:07:12 2003 +++ pgcl-bk/arch/i386/kernel/cpu/mtrr/generic.c Mon Jan 6 22:17:05 2003 @@ -133,13 +133,13 @@ rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); /* Work out the shifted address mask. */ - mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) - | mask_lo >> PAGE_SHIFT; + mask_lo = size_or_mask | mask_hi << (32 - MMUPAGE_SHIFT) + | mask_lo >> MMUPAGE_SHIFT; /* This works correctly if size is a power of two, i.e. a contiguous range. */ *size = -mask_lo; - *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; + *base = base_hi << (32 - MMUPAGE_SHIFT) | base_lo >> MMUPAGE_SHIFT; *type = base_lo & 0xff; } @@ -319,10 +319,10 @@ relevant mask register to disable a range. */ wrmsr(MTRRphysMask_MSR(reg), 0, 0); } else { - wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type, - (base & size_and_mask) >> (32 - PAGE_SHIFT)); - wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800, - (-size & size_and_mask) >> (32 - PAGE_SHIFT)); + wrmsr(MTRRphysBase_MSR(reg), base << MMUPAGE_SHIFT | type, + (base & size_and_mask) >> (32 - MMUPAGE_SHIFT)); + wrmsr(MTRRphysMask_MSR(reg), -size << MMUPAGE_SHIFT | 0x800, + (-size & size_and_mask) >> (32 - MMUPAGE_SHIFT)); } post_set(); @@ -337,7 +337,7 @@ if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && boot_cpu_data.x86_mask <= 7) { - if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { + if (base & ((1 << (22 - MMUPAGE_SHIFT)) - 1)) { printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/kernel/entry.S pgcl-bk/arch/i386/kernel/entry.S --- linux-2.5-bk/arch/i386/kernel/entry.S Tue Jan 7 19:07:11 2003 +++ pgcl-bk/arch/i386/kernel/entry.S Tue Jan 7 18:17:22 2003 @@ -166,7 +166,7 @@ pushl %eax popfl - andl $-8192, %ebx # GET_THREAD_INFO + andl $~(THREAD_SIZE-1), %ebx # GET_THREAD_INFO movl TI_EXEC_DOMAIN(%ebx), %edx # Get the execution domain call *4(%edx) # Call the lcall7 handler for the domain addl $4, %esp diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/kernel/head.S pgcl-bk/arch/i386/kernel/head.S --- linux-2.5-bk/arch/i386/kernel/head.S Tue Jan 7 19:07:11 2003 +++ pgcl-bk/arch/i386/kernel/head.S Mon Jan 6 22:17:04 2003 @@ -16,6 +16,7 @@ #include #include #include +#include #define OLD_CL_MAGIC_ADDR 0x90020 #define OLD_CL_MAGIC 0xA33F @@ -309,7 +310,7 @@ ret ENTRY(stack_start) - .long init_thread_union+8192 + .long init_thread_union+THREAD_SIZE .long __BOOT_DS /* This is the default interrupt "handler" :-) */ diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/kernel/irq.c pgcl-bk/arch/i386/kernel/irq.c --- linux-2.5-bk/arch/i386/kernel/irq.c Tue Jan 7 19:07:11 2003 +++ pgcl-bk/arch/i386/kernel/irq.c Mon Jan 6 22:17:04 2003 @@ -337,7 +337,7 @@ long esp; __asm__ __volatile__("andl %%esp,%0" : - "=r" (esp) : "0" (8191)); + "=r" (esp) : "0" (THREAD_SIZE-1)); if (unlikely(esp < (sizeof(struct task_struct) + 1024))) { printk("do_IRQ: stack overflow: %ld\n", esp - sizeof(struct task_struct)); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/kernel/mpparse.c pgcl-bk/arch/i386/kernel/mpparse.c --- linux-2.5-bk/arch/i386/kernel/mpparse.c Mon Jan 6 20:23:07 2003 +++ pgcl-bk/arch/i386/kernel/mpparse.c Mon Jan 6 22:17:04 2003 @@ -706,9 +706,9 @@ smp_found_config = 1; printk("found SMP MP-table at %08lx\n", virt_to_phys(mpf)); - reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE); + reserve_bootmem(virt_to_phys(mpf), MMUPAGE_SIZE); if (mpf->mpf_physptr) - reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE); + reserve_bootmem(mpf->mpf_physptr, MMUPAGE_SIZE); mpf_found = mpf; return 1; } diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/kernel/numaq.c pgcl-bk/arch/i386/kernel/numaq.c --- linux-2.5-bk/arch/i386/kernel/numaq.c Tue Jan 7 19:07:11 2003 +++ pgcl-bk/arch/i386/kernel/numaq.c Mon Jan 6 22:17:04 2003 @@ -33,7 +33,7 @@ unsigned long node_start_pfn[MAX_NUMNODES]; unsigned long node_end_pfn[MAX_NUMNODES]; -#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) +#define MB_TO_PAGES(addr) ((addr) << (20 - MMUPAGE_SHIFT)) /* * Function: smp_dump_qct() @@ -83,8 +83,8 @@ */ int physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; -#define PFN_TO_ELEMENT(pfn) (pfn / PAGES_PER_ELEMENT) -#define PA_TO_ELEMENT(pa) (PFN_TO_ELEMENT(pa >> PAGE_SHIFT)) +#define PFN_TO_ELEMENT(pfn) ((pfn) / PAGES_PER_ELEMENT) +#define PA_TO_ELEMENT(pa) (PFN_TO_ELEMENT((pa) >> MMUPAGE_SHIFT)) int pfn_to_nid(unsigned long pfn) { diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/kernel/setup.c pgcl-bk/arch/i386/kernel/setup.c --- linux-2.5-bk/arch/i386/kernel/setup.c Tue Jan 7 19:07:11 2003 +++ pgcl-bk/arch/i386/kernel/setup.c Mon Jan 6 22:17:04 2003 @@ -599,6 +599,8 @@ continue; if (end > max_pfn) max_pfn = end; + + max_pfn &= ~(PAGE_MMUCOUNT - 1); } } @@ -722,10 +724,10 @@ highstart_pfn = max_low_pfn; } printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); + (highend_pfn - highstart_pfn) >> (20 - MMUPAGE_SHIFT)); #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", - pages_to_mb(max_low_pfn)); + max_low_pfn >> (20 - MMUPAGE_SHIFT)); /* * Initialize the boot-time allocator (with low memory only): */ @@ -746,7 +748,7 @@ * reserve physical page 0 - it's a special BIOS page on many boxes, * enabling clean reboots, SMP operation, laptop functions. */ - reserve_bootmem(0, PAGE_SIZE); + reserve_bootmem(0, MMUPAGE_SIZE); #ifdef CONFIG_SMP /* @@ -754,7 +756,7 @@ * FIXME: Don't need the extra page at 4K, but need to fix * trampoline before removing it. (see the GDT stuff) */ - reserve_bootmem(PAGE_SIZE, PAGE_SIZE); + reserve_bootmem(MMUPAGE_SIZE, MMUPAGE_SIZE); #endif #ifdef CONFIG_ACPI_SLEEP /* @@ -771,7 +773,7 @@ #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE && INITRD_START) { - if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { + if (INITRD_START + INITRD_SIZE <= PFN_PHYS(max_low_pfn)) { reserve_bootmem(INITRD_START, INITRD_SIZE); initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0; @@ -781,7 +783,7 @@ printk(KERN_ERR "initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", INITRD_START + INITRD_SIZE, - max_low_pfn << PAGE_SHIFT); + PFN_PHYS(max_low_pfn)); initrd_start = 0; } } @@ -834,7 +836,7 @@ request_resource(&ioport_resource, standard_io_resources+i); /* Tell the PCI layer not to allocate too close to the RAM area.. */ - low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff; + low_mem_size = ((max_low_pfn << MMUPAGE_SHIFT) + 0xfffff) & ~0xfffff; if (low_mem_size > pci_mem_start) pci_mem_start = low_mem_size; } diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/kernel/smpboot.c pgcl-bk/arch/i386/kernel/smpboot.c --- linux-2.5-bk/arch/i386/kernel/smpboot.c Tue Jan 7 19:07:11 2003 +++ pgcl-bk/arch/i386/kernel/smpboot.c Mon Jan 6 22:17:04 2003 @@ -100,7 +100,7 @@ */ void __init smp_alloc_memory(void) { - trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE); + trampoline_base = (void *) alloc_bootmem_low_pages(MMUPAGE_SIZE); /* * Has to be in very low memory so we can execute * real-mode AP code. diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/kernel/sys_i386.c pgcl-bk/arch/i386/kernel/sys_i386.c --- linux-2.5-bk/arch/i386/kernel/sys_i386.c Tue Jan 7 19:07:11 2003 +++ pgcl-bk/arch/i386/kernel/sys_i386.c Mon Jan 6 22:17:04 2003 @@ -97,10 +97,10 @@ goto out; err = -EINVAL; - if (a.offset & ~PAGE_MASK) + if (a.offset & ~MMUPAGE_MASK) goto out; - err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> MMUPAGE_SHIFT); out: return err; } diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/kernel/sysenter.c pgcl-bk/arch/i386/kernel/sysenter.c --- linux-2.5-bk/arch/i386/kernel/sysenter.c Tue Jan 7 19:07:11 2003 +++ pgcl-bk/arch/i386/kernel/sysenter.c Mon Jan 6 22:17:04 2003 @@ -32,7 +32,7 @@ struct task_struct task; unsigned char trampoline[32] __attribute__((aligned(1024))); unsigned char stack[0]; -} __attribute__((aligned(8192))); +} __attribute__((aligned(THREAD_SIZE))); static void __init enable_sep_cpu(void *info) { diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/kernel/traps.c pgcl-bk/arch/i386/kernel/traps.c --- linux-2.5-bk/arch/i386/kernel/traps.c Tue Jan 7 19:07:12 2003 +++ pgcl-bk/arch/i386/kernel/traps.c Tue Jan 7 18:17:22 2003 @@ -115,7 +115,7 @@ unsigned long esp = tsk->thread.esp; /* User space on another CPU? */ - if ((esp ^ (unsigned long)tsk->thread_info) & (PAGE_MASK<<1)) + if ((esp ^ (unsigned long)tsk->thread_info) & ~(THREAD_SIZE-1)) return; show_trace((unsigned long *)esp); } diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/lib/getuser.S pgcl-bk/arch/i386/lib/getuser.S --- linux-2.5-bk/arch/i386/lib/getuser.S Tue Jan 7 19:07:12 2003 +++ pgcl-bk/arch/i386/lib/getuser.S Mon Jan 6 22:17:05 2003 @@ -8,8 +8,8 @@ * return an error value in addition to the "real" * return value. */ +#include #include - /* * __get_user_X diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/mm/discontig.c pgcl-bk/arch/i386/mm/discontig.c --- linux-2.5-bk/arch/i386/mm/discontig.c Tue Jan 7 19:07:12 2003 +++ pgcl-bk/arch/i386/mm/discontig.c Mon Jan 6 22:17:05 2003 @@ -68,7 +68,7 @@ unsigned long node_datasz; node_datasz = PFN_UP(sizeof(struct pglist_data)); - NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT)); + NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << MMUPAGE_SHIFT)); min_low_pfn += node_datasz; memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); } @@ -113,8 +113,6 @@ } } -#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) - unsigned long node_remap_start_pfn[MAX_NUMNODES]; unsigned long node_remap_size[MAX_NUMNODES]; unsigned long node_remap_offset[MAX_NUMNODES]; @@ -128,8 +126,8 @@ int node; for (node = 1; node < numnodes; ++node) { - for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { - vaddr = node_remap_start_vaddr[node]+(pfn< system_max_low_pfn) highstart_pfn = system_max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); + (highend_pfn - highstart_pfn) >> (20 - MMUPAGE_SHIFT)); #endif system_max_low_pfn = max_low_pfn = max_low_pfn - reserve_pages; printk(KERN_NOTICE "%ldMB LOWMEM available.\n", - pages_to_mb(system_max_low_pfn)); + system_max_low_pfn >> (20 - MMUPAGE_SHIFT)); printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", min_low_pfn, max_low_pfn, highstart_pfn); @@ -223,21 +221,21 @@ * the (very unlikely) case of us accidentally initializing the * bootmem allocator with an invalid RAM area. */ - reserve_bootmem_node(NODE_DATA(0), HIGH_MEMORY, (PFN_PHYS(min_low_pfn) + - bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY)); + reserve_bootmem_node(NODE_DATA(0), HIGH_MEMORY, PFN_PHYS(min_low_pfn) + + bootmap_size - HIGH_MEMORY); /* * reserve physical page 0 - it's a special BIOS page on many boxes, * enabling clean reboots, SMP operation, laptop functions. */ - reserve_bootmem_node(NODE_DATA(0), 0, PAGE_SIZE); + reserve_bootmem_node(NODE_DATA(0), 0, MMUPAGE_SIZE); /* * But first pinch a few for the stack/trampoline stuff * FIXME: Don't need the extra page at 4K, but need to fix * trampoline before removing it. (see the GDT stuff) */ - reserve_bootmem_node(NODE_DATA(0), PAGE_SIZE, PAGE_SIZE); + reserve_bootmem_node(NODE_DATA(0), MMUPAGE_SIZE, MMUPAGE_SIZE); #ifdef CONFIG_ACPI_SLEEP /* @@ -260,7 +258,7 @@ #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE && INITRD_START) { - if (INITRD_START + INITRD_SIZE <= (system_max_low_pfn << PAGE_SHIFT)) { + if (INITRD_START + INITRD_SIZE <= (system_max_low_pfn << MMUPAGE_SHIFT)) { reserve_bootmem_node(NODE_DATA(0), INITRD_START, INITRD_SIZE); initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0; @@ -270,7 +268,7 @@ printk(KERN_ERR "initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", INITRD_START + INITRD_SIZE, - system_max_low_pfn << PAGE_SHIFT); + system_max_low_pfn << MMUPAGE_SHIFT); initrd_start = 0; } } @@ -290,20 +288,20 @@ unsigned long start = node_start_pfn[nid]; unsigned long high = node_end_pfn[nid]; - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> MMUPAGE_SHIFT; if (start > low) { #ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - start; + zones_size[ZONE_HIGHMEM] = (high - start) >> PAGE_MMUSHIFT; #endif } else { if (low < max_dma) - zones_size[ZONE_DMA] = low; + zones_size[ZONE_DMA] = low >> PAGE_MMUSHIFT; else { - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; + zones_size[ZONE_DMA] = max_dma >> PAGE_MMUSHIFT; + zones_size[ZONE_NORMAL] = (low - max_dma) >> PAGE_MMUSHIFT; #ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - low; + zones_size[ZONE_HIGHMEM] = (high - low) >> PAGE_MMUSHIFT; #endif } } @@ -337,10 +335,14 @@ zone_start_pfn = NODE_DATA(nid)->node_zones[ZONE_HIGHMEM].zone_start_pfn; printk("Initializing highpages for node %d\n", nid); - for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) { - one_highpage_init((struct page *)(zone_mem_map + node_pfn), - zone_start_pfn + node_pfn, bad_ppro); - } + + /* + * Note: zone->spanned_pages is in PAGE_SIZE units. + */ + for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) + one_highpage_init(&zone_mem_map[node_pfn], + zone_start_pfn + node_pfn*PAGE_MMUCOUNT, + bad_ppro); } totalram_pages += totalhigh_pages; #endif diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/mm/highmem.c pgcl-bk/arch/i386/mm/highmem.c --- linux-2.5-bk/arch/i386/mm/highmem.c Tue Jan 7 19:07:12 2003 +++ pgcl-bk/arch/i386/mm/highmem.c Tue Jan 7 19:06:35 2003 @@ -29,19 +29,29 @@ void *kmap_atomic(struct page *page, enum km_type type) { enum fixed_addresses idx; - unsigned long vaddr; + unsigned long vaddr, pfn; + int k; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; inc_preempt_count(); if (page < highmem_start_page) return page_address(page); idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + vaddr = __fix_to_virt(FIX_KMAP_END) + idx*PAGE_SIZE; + pgd = pgd_offset_k(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); #if CONFIG_DEBUG_HIGHMEM - if (!pte_none(*(kmap_pte-idx))) - BUG(); + for (k = 0; k < PAGE_MMUCOUNT; ++k) + BUG_ON(!pte_none(pte[k])); #endif - set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); + pfn = page_to_pfn(page); + for (k = 0; k < PAGE_MMUCOUNT; ++k) + set_pte(&pte[k], pfn_pte(pfn + k, kmap_prot)); + /* set_pte(&kmap_pte[idx*PAGE_MMUCOUNT+k], pfn_pte(pfn + k, kmap_prot)); */ __flush_tlb_one(vaddr); return (void*) vaddr; @@ -50,22 +60,37 @@ void kunmap_atomic(void *kvaddr, enum km_type type) { #if CONFIG_DEBUG_HIGHMEM - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + unsigned long vaddr = (unsigned long) kvaddr & MMUPAGE_MASK; + /* enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); */ + enum fixed_addresses idx = (vaddr - __fix_to_virt(FIX_KMAP_END))/PAGE_SIZE; + unsigned long lower_bound = __fix_to_virt(FIX_KMAP_END) + idx*PAGE_SIZE; + unsigned long upper_bound = lower_bound + PAGE_SIZE; + int k; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; if (vaddr < FIXADDR_START) { // FIXME dec_preempt_count(); return; } - if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) + if (vaddr < lower_bound || vaddr > upper_bound) { + printk("vaddr %lx outside [%lx,%lx)\n", vaddr, lower_bound, upper_bound); BUG(); + } /* * force other mappings to Oops if they'll try to access * this pte without first remap it */ - pte_clear(kmap_pte-idx); + vaddr = __fix_to_virt(FIX_KMAP_END) + idx*PAGE_SIZE; + pgd = pgd_offset_k(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + for (k = 0; k < PAGE_MMUCOUNT; ++k) + pte_clear(&pte[k]); + /* pte_clear(&kmap_pte[idx*PAGE_MMUCOUNT+k]); */ __flush_tlb_one(vaddr); #endif @@ -74,14 +99,22 @@ struct page *kmap_atomic_to_page(void *ptr) { - unsigned long idx, vaddr = (unsigned long)ptr; + unsigned long vaddr = (unsigned long)ptr; + pgd_t *pgd; + pmd_t *pmd; pte_t *pte; if (vaddr < FIXADDR_START) return virt_to_page(ptr); - idx = virt_to_fix(vaddr); - pte = kmap_pte - (idx - FIX_KMAP_BEGIN); + pgd = pgd_offset_k(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + + /* + * unsigned long idx = virt_to_fix(vaddr); + * pte = &kmap_pte[idx*PAGE_MMUCOUNT]; + */ return pte_page(*pte); } diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/mm/init.c pgcl-bk/arch/i386/mm/init.c --- linux-2.5-bk/arch/i386/mm/init.c Tue Jan 7 19:07:12 2003 +++ pgcl-bk/arch/i386/mm/init.c Mon Jan 6 22:17:05 2003 @@ -54,7 +54,7 @@ pmd_t *pmd_table; #if CONFIG_X86_PAE - pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + pmd_table = (pmd_t *) alloc_bootmem_low_pages(MMUPAGE_SIZE); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); if (pmd_table != pmd_offset(pgd, 0)) BUG(); @@ -71,7 +71,7 @@ */ static pte_t * __init one_page_table_init(pmd_t *pmd) { - pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(MMUPAGE_SIZE); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); if (page_table != pte_offset_kernel(pmd, 0)) BUG(); @@ -105,7 +105,14 @@ for ( ; (pgd_ofs < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_ofs++) { if (pgd_none(*pgd)) one_md_table_init(pgd); + } + + vaddr = start; + pgd_ofs = __pgd_offset(vaddr); + pmd_ofs = __pmd_offset(vaddr); + pgd = pgd_base + pgd_ofs; + for ( ; (pgd_ofs < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_ofs++) { pmd = pmd_offset(pgd, vaddr); for (; (pmd_ofs < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_ofs++) { if (pmd_none(*pmd)) @@ -174,8 +181,8 @@ * are not. Notably the 640->1Mb area. We need a sanity * check here. */ - addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; - end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT; + addr = (e820.map[i].addr+MMUPAGE_SIZE-1) >> MMUPAGE_SHIFT; + end = (e820.map[i].addr+e820.map[i].size) >> MMUPAGE_SHIFT; if ((pagenr >= addr) && (pagenr < end)) return 1; } @@ -184,6 +191,9 @@ #if CONFIG_HIGHMEM pte_t *kmap_pte; +unsigned long kmap_vstart, kmap_vend; +int kmap_begin_idx, kmap_end_idx, pkmap_begin_idx, pkmap_end_idx; +int apic_base_idx, ioapic_base_idx, ioapic_end_idx; pgprot_t kmap_prot; #define kmap_get_fixmap_pte(vaddr) \ @@ -191,10 +201,22 @@ void __init kmap_init(void) { - unsigned long kmap_vstart; - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); + /* + * cache the first kmap pte + * The PTE's had better be physically contiguous + * or indexing in k*map_atomic() will oops. + */ + kmap_begin_idx = FIX_KMAP_END; + kmap_end_idx = FIX_KMAP_BEGIN; + pkmap_begin_idx = FIX_PKMAP_END; + pkmap_end_idx = FIX_PKMAP_BEGIN; + apic_base_idx = FIX_APIC_BASE; + ioapic_base_idx = FIX_IO_APIC_BASE_END; + ioapic_end_idx = FIX_IO_APIC_BASE_0; + + kmap_vstart = __fix_to_virt(FIX_KMAP_END); + kmap_vend = __fix_to_virt(FIX_KMAP_BEGIN); kmap_pte = kmap_get_fixmap_pte(kmap_vstart); kmap_prot = PAGE_KERNEL; @@ -202,18 +224,22 @@ void __init permanent_kmaps_init(pgd_t *pgd_base) { - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - unsigned long vaddr; + /* + * pgd_t *pgd; + * pmd_t *pmd; + * pte_t *pte; + */ + unsigned long vaddr; vaddr = PKMAP_BASE; page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); - pgd = swapper_pg_dir + __pgd_offset(vaddr); - pmd = pmd_offset(pgd, vaddr); - pte = pte_offset_kernel(pmd, vaddr); - pkmap_page_table = pte; + /* + * pgd = swapper_pg_dir + __pgd_offset(vaddr); + * pmd = pmd_offset(pgd, vaddr); + * pte = pte_offset_kernel(pmd, vaddr); + * pkmap_page_table = pte; + */ } void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) @@ -325,7 +351,7 @@ unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; unsigned int max_dma, high, low; - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> MMUPAGE_SHIFT; low = max_low_pfn; high = highend_pfn; @@ -438,32 +464,22 @@ bad_ppro = ppro_with_ram_bug(); -#ifdef CONFIG_HIGHMEM - /* check that fixmap and pkmap do not overlap */ - if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) { - printk(KERN_ERR "fixmap and kmap areas overlap - this will crash\n"); - printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, FIXADDR_START); - BUG(); - } -#endif - set_max_mapnr_init(); #ifdef CONFIG_HIGHMEM - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE); + high_memory = (void *) __va(highstart_pfn * MMUPAGE_SIZE); #else - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + high_memory = (void *) __va(max_low_pfn * MMUPAGE_SIZE); #endif /* clear the zero-page */ - memset(empty_zero_page, 0, PAGE_SIZE); + memset(empty_zero_page, 0, MMUPAGE_SIZE); /* this will put all low memory onto the freelists */ totalram_pages += __free_all_bootmem(); reservedpages = 0; - for (tmp = 0; tmp < max_low_pfn; tmp++) + for (tmp = 0; tmp < max_low_pfn/PAGE_MMUCOUNT; tmp++) /* * Only count reserved RAM pages */ @@ -478,7 +494,7 @@ printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), - num_physpages << (PAGE_SHIFT-10), + num_physpages << (MMUPAGE_SHIFT-10), codesize >> 10, reservedpages << (PAGE_SHIFT-10), datasize >> 10, diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/mm/ioremap.c pgcl-bk/arch/i386/mm/ioremap.c --- linux-2.5-bk/arch/i386/mm/ioremap.c Tue Jan 7 19:07:12 2003 +++ pgcl-bk/arch/i386/mm/ioremap.c Mon Jan 6 22:17:05 2003 @@ -30,7 +30,7 @@ end = PMD_SIZE; if (address >= end) BUG(); - pfn = phys_addr >> PAGE_SHIFT; + pfn = phys_addr >> MMUPAGE_SHIFT; do { if (!pte_none(*pte)) { printk("remap_area_pte: page already exists\n"); @@ -38,7 +38,7 @@ } set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | flags))); - address += PAGE_SIZE; + address += MMUPAGE_SIZE; pfn++; pte++; } while (address && (address < end)); @@ -146,9 +146,9 @@ /* * Mappings have to be page-aligned */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - phys_addr; + offset = phys_addr & ~MMUPAGE_MASK; + phys_addr &= MMUPAGE_MASK; + size = MMUPAGE_ALIGN(last_addr) - phys_addr; /* * Ok, go for it.. @@ -196,7 +196,7 @@ if (phys_addr + size < virt_to_phys(high_memory)) { struct page *ppage = virt_to_page(__va(phys_addr)); - unsigned long npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long npages = (size + MMUPAGE_SIZE - 1) >> MMUPAGE_SHIFT; BUG_ON(phys_addr+size > (unsigned long)high_memory); BUG_ON(phys_addr + size < phys_addr); @@ -215,7 +215,7 @@ struct vm_struct *p; if (addr <= high_memory) return; - p = remove_vm_area((void *) (PAGE_MASK & (unsigned long) addr)); + p = remove_vm_area((void *) (MMUPAGE_MASK & (unsigned long) addr)); if (!p) { printk("__iounmap: bad address %p\n", addr); return; @@ -224,7 +224,7 @@ unmap_vm_area(p); if (p->flags && p->phys_addr < virt_to_phys(high_memory)) { change_page_attr(virt_to_page(__va(p->phys_addr)), - p->size >> PAGE_SHIFT, + p->size >> MMUPAGE_SHIFT, PAGE_KERNEL); } kfree(p); @@ -250,14 +250,14 @@ /* * Mappings have to be page-aligned */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr) - phys_addr; + offset = phys_addr & ~MMUPAGE_MASK; + phys_addr &= MMUPAGE_MASK; + size = MMUPAGE_ALIGN(last_addr) - phys_addr; /* * Mappings have to fit in the FIX_BTMAP area. */ - nrpages = size >> PAGE_SHIFT; + nrpages = size >> MMUPAGE_SHIFT; if (nrpages > NR_FIX_BTMAPS) return NULL; @@ -267,7 +267,7 @@ idx = FIX_BTMAP_BEGIN; while (nrpages > 0) { set_fixmap(idx, phys_addr); - phys_addr += PAGE_SIZE; + phys_addr += MMUPAGE_SIZE; --idx; --nrpages; } @@ -284,8 +284,8 @@ virt_addr = (unsigned long)addr; if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) return; - offset = virt_addr & ~PAGE_MASK; - nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; + offset = virt_addr & ~MMUPAGE_MASK; + nrpages = MMUPAGE_ALIGN(offset + size - 1) >> MMUPAGE_SHIFT; idx = FIX_BTMAP_BEGIN; while (nrpages > 0) { diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/arch/i386/mm/pgtable.c pgcl-bk/arch/i386/mm/pgtable.c --- linux-2.5-bk/arch/i386/mm/pgtable.c Tue Jan 7 19:07:12 2003 +++ pgcl-bk/arch/i386/mm/pgtable.c Mon Jan 6 22:17:05 2003 @@ -97,10 +97,12 @@ if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ printk ("set_pmd_pfn: vaddr misaligned\n"); + printk ("vaddr = %lx, pfn = %lx\n", vaddr, pfn); return; /* BUG(); */ } - if (pfn & (PTRS_PER_PTE-1)) { /* pfn is misaligned */ + if (pfn & (PMD_SIZE/MMUPAGE_SIZE-1)) { /* pfn is misaligned */ printk ("set_pmd_pfn: pfn misaligned\n"); + printk ("vaddr = %lx, pfn = %lx\n", vaddr, pfn); return; /* BUG(); */ } pgd = swapper_pg_dir + __pgd_offset(vaddr); @@ -121,11 +123,13 @@ { unsigned long address = __fix_to_virt(idx); + printk("__set_fixmap(%d,%lx)\n", idx, phys); + if (idx >= __end_of_fixed_addresses) { BUG(); return; } - set_pte_pfn(address, phys >> PAGE_SHIFT, flags); + set_pte_pfn(address, phys >> MMUPAGE_SHIFT, flags); } pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/drivers/block/ll_rw_blk.c pgcl-bk/drivers/block/ll_rw_blk.c --- linux-2.5-bk/drivers/block/ll_rw_blk.c Tue Jan 7 19:07:36 2003 +++ pgcl-bk/drivers/block/ll_rw_blk.c Mon Jan 6 22:17:13 2003 @@ -260,7 +260,7 @@ **/ void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) { - unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; + unsigned long bounce_pfn = dma_addr >> MMUPAGE_SHIFT; unsigned long mb = dma_addr >> 20; static request_queue_t *last_q; diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/fs/binfmt_elf.c pgcl-bk/fs/binfmt_elf.c --- linux-2.5-bk/fs/binfmt_elf.c Tue Jan 7 19:08:40 2003 +++ pgcl-bk/fs/binfmt_elf.c Tue Jan 7 19:06:39 2003 @@ -61,10 +61,10 @@ #define elf_core_dump NULL #endif -#if ELF_EXEC_PAGESIZE > PAGE_SIZE +#if ELF_EXEC_PAGESIZE > MMUPAGE_SIZE # define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE #else -# define ELF_MIN_ALIGN PAGE_SIZE +# define ELF_MIN_ALIGN MMUPAGE_SIZE #endif #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1)) @@ -231,8 +231,8 @@ while (argc-- > 0) { size_t len; __put_user((elf_addr_t)p, argv++); - len = strnlen_user((void *)p, PAGE_SIZE*MAX_ARG_PAGES); - if (!len || len > PAGE_SIZE*MAX_ARG_PAGES) + len = strnlen_user((void *)p, MMUPAGE_SIZE*MAX_ARG_PAGES); + if (!len || len > MMUPAGE_SIZE*MAX_ARG_PAGES) return; p += len; } @@ -241,8 +241,8 @@ while (envc-- > 0) { size_t len; __put_user((elf_addr_t)p, envp++); - len = strnlen_user((void *)p, PAGE_SIZE*MAX_ARG_PAGES); - if (!len || len > PAGE_SIZE*MAX_ARG_PAGES) + len = strnlen_user((void *)p, MMUPAGE_SIZE*MAX_ARG_PAGES); + if (!len || len > MMUPAGE_SIZE*MAX_ARG_PAGES) return; p += len; } @@ -763,9 +763,9 @@ and some applications "depend" upon this behavior. Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ - /* N.B. Shouldn't the size here be PAGE_SIZE?? */ + /* N.B. Shouldn't the size here be MMUPAGE_SIZE?? */ down_write(¤t->mm->mmap_sem); - error = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC, + error = do_mmap(NULL, 0, MMUPAGE_SIZE, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, 0); up_write(¤t->mm->mmap_sem); } @@ -1317,21 +1317,21 @@ for (addr = vma->vm_start; addr < vma->vm_end; - addr += PAGE_SIZE) { + addr += MMUPAGE_SIZE) { struct page* page; struct vm_area_struct *vma; if (get_user_pages(current, current->mm, addr, 1, 0, 1, &page, &vma) <= 0) { - DUMP_SEEK (file->f_pos + PAGE_SIZE); + DUMP_SEEK (file->f_pos + MMUPAGE_SIZE); } else { if (page == ZERO_PAGE(addr)) { - DUMP_SEEK (file->f_pos + PAGE_SIZE); + DUMP_SEEK (file->f_pos + MMUPAGE_SIZE); } else { void *kaddr; flush_cache_page(vma, addr); kaddr = kmap(page); - DUMP_WRITE(kaddr, PAGE_SIZE); + DUMP_WRITE(kaddr, MMUPAGE_SIZE); flush_page_to_ram(page); kunmap(page); } diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/fs/exec.c pgcl-bk/fs/exec.c --- linux-2.5-bk/fs/exec.c Tue Jan 7 19:08:41 2003 +++ pgcl-bk/fs/exec.c Tue Jan 7 19:06:39 2003 @@ -1033,7 +1033,7 @@ if (IS_ERR(file)) return retval; - bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); + bprm.p = MMUPAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); bprm.file = file; @@ -1108,6 +1108,7 @@ allow_write_access(bprm.file); fput(bprm.file); } + return retval; } diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/asm-i386/fixmap.h pgcl-bk/include/asm-i386/fixmap.h --- linux-2.5-bk/include/asm-i386/fixmap.h Tue Jan 7 19:09:00 2003 +++ pgcl-bk/include/asm-i386/fixmap.h Mon Jan 6 22:17:40 2003 @@ -41,6 +41,17 @@ * TLB entries of such buffers will not be flushed across * task switches. */ + +/* + * Right now we initialize only a single pte table. It can be extended + * easily, subsequent pte tables have to be allocated in one physical + * chunk of RAM. + */ +#define PKMAP_NR(virt) (((virt) - PKMAP_BASE) >> PAGE_SHIFT) +#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) +#define LAST_PKMAP 1024 +#define LAST_PKMAP_MASK (LAST_PKMAP-1) + enum fixed_addresses { FIX_HOLE, FIX_VSYSCALL, @@ -65,7 +76,9 @@ #endif #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ - FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS*PAGE_MMUCOUNT)-1, + FIX_PKMAP_BEGIN, + FIX_PKMAP_END = FIX_PKMAP_BEGIN + LAST_PKMAP*PAGE_MMUCOUNT - 1, #endif #ifdef CONFIG_ACPI_BOOT FIX_ACPI_BEGIN, @@ -101,11 +114,11 @@ * the start of the fixmap. */ #define FIXADDR_TOP (0xfffff000UL) -#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) +#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << MMUPAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) -#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) +#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << MMUPAGE_SHIFT)) +#define __virt_to_fix(x) ((FIXADDR_TOP - ((x) & MMUPAGE_MASK)) >> MMUPAGE_SHIFT) extern void __this_fixmap_does_not_exist(void); @@ -136,5 +149,7 @@ BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); return __virt_to_fix(vaddr); } + +#define PKMAP_BASE fix_to_virt(FIX_PKMAP_END) #endif diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/asm-i386/highmem.h pgcl-bk/include/asm-i386/highmem.h --- linux-2.5-bk/include/asm-i386/highmem.h Tue Jan 7 19:09:00 2003 +++ pgcl-bk/include/asm-i386/highmem.h Mon Jan 6 22:17:40 2003 @@ -34,23 +34,8 @@ extern void kmap_init(void); -/* - * Right now we initialize only a single pte table. It can be extended - * easily, subsequent pte tables have to be allocated in one physical - * chunk of RAM. - */ -#define PKMAP_BASE (0xff800000UL) -#ifdef CONFIG_X86_PAE -#define LAST_PKMAP 512 -#else -#define LAST_PKMAP 1024 -#endif -#define LAST_PKMAP_MASK (LAST_PKMAP-1) -#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) -#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) - -extern void * FASTCALL(kmap_high(struct page *page)); -extern void FASTCALL(kunmap_high(struct page *page)); +void *FASTCALL(kmap_high(struct page *page)); +void FASTCALL(kunmap_high(struct page *page)); void *kmap(struct page *page); void kunmap(struct page *page); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/asm-i386/io.h pgcl-bk/include/asm-i386/io.h --- linux-2.5-bk/include/asm-i386/io.h Tue Jan 7 19:09:01 2003 +++ pgcl-bk/include/asm-i386/io.h Mon Jan 6 22:17:40 2003 @@ -95,7 +95,7 @@ /* * Change "struct page" to physical address. */ -#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) +#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << MMUPAGE_SHIFT) extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/asm-i386/io_apic.h pgcl-bk/include/asm-i386/io_apic.h --- linux-2.5-bk/include/asm-i386/io_apic.h Tue Jan 7 19:09:01 2003 +++ pgcl-bk/include/asm-i386/io_apic.h Mon Jan 6 22:17:40 2003 @@ -17,7 +17,7 @@ #define IO_APIC_BASE(idx) \ ((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \ - + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK))) + + (mp_ioapics[idx].mpc_apicaddr & ~MMUPAGE_MASK))) /* * The structure of the IO-APIC: diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/asm-i386/mmzone.h pgcl-bk/include/asm-i386/mmzone.h --- linux-2.5-bk/include/asm-i386/mmzone.h Tue Jan 7 19:09:01 2003 +++ pgcl-bk/include/asm-i386/mmzone.h Tue Jan 7 19:06:40 2003 @@ -28,18 +28,18 @@ #define alloc_bootmem_low(x) \ __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(NODE_DATA(0), (x), MMUPAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) + __alloc_bootmem_node(NODE_DATA(0), (x), MMUPAGE_SIZE, 0) #define alloc_bootmem_node(ignore, x) \ __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(NODE_DATA(0), (x), MMUPAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) + __alloc_bootmem_node(NODE_DATA(0), (x), MMUPAGE_SIZE, 0) #define node_size(nid) (node_data[nid]->node_size) -#define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) +#define node_localnr(pfn, nid) (((pfn) - node_data[nid]->node_start_pfn) / PAGE_MMUCOUNT) /* * Following are macros that each numa implmentation must define. @@ -48,27 +48,38 @@ /* * Given a kernel address, find the home node of the underlying memory. */ -#define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT) +#define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> MMUPAGE_SHIFT) /* * Return a pointer to the node data for node n. */ #define NODE_DATA(nid) (node_data[nid]) +/* + * Um, these things have name clashes. I blame mbligh + */ #define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) + +/* + * pgdat->node_size is calculated from zone_sizes[], which is in + * units of PAGE_SIZE. I don't really trust this... + */ #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ - NODE_DATA(nid)->node_size) + NODE_DATA(nid)->node_size*PAGE_MMUCOUNT) #define local_mapnr(kvaddr) \ - ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) ) + (((__pa(kvaddr)/MMUPAGE_SIZE) \ + - node_start_pfn(kvaddr_to_nid(kvaddr)))/PAGE_MMUCOUNT) + +#define local_pfn(page) (((page)-page_zone(page)->zone_mem_map)*PAGE_MMUCOUNT) #define kern_addr_valid(kaddr) test_bit(local_mapnr(kaddr), \ NODE_DATA(kvaddr_to_nid(kaddr))->valid_addr_bitmap) #define pfn_to_page(pfn) (node_mem_map(pfn_to_nid(pfn)) + node_localnr(pfn, pfn_to_nid(pfn))) -#define page_to_pfn(page) ((page - page_zone(page)->zone_mem_map) + page_zone(page)->zone_start_pfn) -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) +#define page_to_pfn(page) (local_pfn(page) + page_zone(page)->zone_start_pfn) +#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> MMUPAGE_SHIFT)) /* * pfn_valid should be made as fast as possible, and the current definition * is valid for machines that are NUMA, but still contiguous, which is what @@ -76,6 +87,6 @@ * be something like this - mbligh: * ( pfn_to_pgdat(pfn) && (pfn < node_end_pfn(pfn_to_nid(pfn))) ) */ -#define pfn_valid(pfn) (pfn < num_physpages) +#define pfn_valid(pfn) ((pfn) < num_physpages) #endif /* CONFIG_DISCONTIGMEM */ #endif /* _ASM_MMZONE_H_ */ diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/asm-i386/numaq.h pgcl-bk/include/asm-i386/numaq.h --- linux-2.5-bk/include/asm-i386/numaq.h Tue Jan 7 19:09:01 2003 +++ pgcl-bk/include/asm-i386/numaq.h Mon Jan 6 22:17:40 2003 @@ -39,7 +39,7 @@ #define PAGES_PER_ELEMENT (16777216/256) #define pfn_to_pgdat(pfn) NODE_DATA(pfn_to_nid(pfn)) -#define PHYSADDR_TO_NID(pa) pfn_to_nid(pa >> PAGE_SHIFT) +#define PHYSADDR_TO_NID(pa) pfn_to_nid((pa) >> MMUPAGE_SHIFT) #define MAX_NUMNODES 8 extern int pfn_to_nid(unsigned long); extern void get_memcfg_numaq(void); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/asm-i386/page.h pgcl-bk/include/asm-i386/page.h --- linux-2.5-bk/include/asm-i386/page.h Tue Jan 7 19:09:01 2003 +++ pgcl-bk/include/asm-i386/page.h Tue Jan 7 19:06:40 2003 @@ -1,13 +1,34 @@ #ifndef _I386_PAGE_H #define _I386_PAGE_H -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 +/* + * One mmupage is represented by one Page Table Entry at the MMU level, + * and corresponds to one page at the user process level: its size is + * the same as param.h EXEC_PAGESIZE (for getpagesize(2) and mmap(2)). + */ +#define MMUPAGE_SHIFT 12 +#define MMUPAGE_SIZE (1 << MMUPAGE_SHIFT) +#define MMUPAGE_MASK (~(MMUPAGE_SIZE-1)) + +/* + * 2**N adjacent mmupages may be clustered to make up one kernel page. + * Reasonable and tested values for PAGE_MMUSHIFT are 0 (4k page), + * 1 (8k page), 2 (16k page), 3 (32k page). Higher values will not + * work without further changes e.g. to unsigned short b_size. + */ +#define PAGE_MMUSHIFT 2 +#define PAGE_MMUCOUNT (1 << PAGE_MMUSHIFT) + +/* + * One kernel page is represented by one struct page (see mm.h), + * and is the kernel's principal unit of memory allocation. + */ +#define PAGE_SHIFT (PAGE_MMUSHIFT + MMUPAGE_SHIFT) #define PAGE_SIZE (1UL << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) -#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) +#define LARGE_PAGE_SIZE (1 << PMD_SHIFT) #ifdef __KERNEL__ #ifndef __ASSEMBLY__ @@ -75,6 +96,7 @@ /* to align the pointer to the (next) page boundary */ #define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) +#define MMUPAGE_ALIGN(addr) (((addr)+MMUPAGE_SIZE-1)&MMUPAGE_MASK) /* * This handles the memory map.. We could make this a config @@ -142,15 +164,16 @@ #define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)) #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((pfn)*MMUPAGE_SIZE) #ifndef CONFIG_DISCONTIGMEM -#define pfn_to_page(pfn) (mem_map + (pfn)) -#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) +#define pfn_to_page(pfn) (&mem_map[(pfn)/PAGE_MMUCOUNT]) +#define page_to_mapnr(page) ((unsigned long)((page)-mem_map)) +#define page_to_pfn(page) (page_to_mapnr(page)*PAGE_MMUCOUNT) #define pfn_valid(pfn) ((pfn) < max_mapnr) #endif /* !CONFIG_DISCONTIGMEM */ -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr)/MMUPAGE_SIZE) -#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr)/MMUPAGE_SIZE) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/asm-i386/pgtable-3level.h pgcl-bk/include/asm-i386/pgtable-3level.h --- linux-2.5-bk/include/asm-i386/pgtable-3level.h Tue Jan 7 19:09:01 2003 +++ pgcl-bk/include/asm-i386/pgtable-3level.h Mon Jan 6 22:17:40 2003 @@ -65,7 +65,7 @@ static inline void pgd_clear (pgd_t * pgd) { } #define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +((unsigned long) __va(pgd_val(pgd) & MMUPAGE_MASK)) /* Find an entry in the second-level page table.. */ #define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \ @@ -90,20 +90,20 @@ #define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low && !(x).pte_high) -#define pte_pfn(x) (((x).pte_low >> PAGE_SHIFT) | ((x).pte_high << (32 - PAGE_SHIFT))) +#define pte_pfn(x) (((x).pte_low >> MMUPAGE_SHIFT) | ((x).pte_high << (32 - MMUPAGE_SHIFT))) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { pte_t pte; - pte.pte_high = page_nr >> (32 - PAGE_SHIFT); - pte.pte_low = (page_nr << PAGE_SHIFT) | pgprot_val(pgprot); + pte.pte_high = page_nr >> (32 - MMUPAGE_SHIFT); + pte.pte_low = (page_nr << MMUPAGE_SHIFT) | pgprot_val(pgprot); return pte; } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd(((unsigned long long)page_nr << PAGE_SHIFT) | pgprot_val(pgprot)); + return __pmd(((unsigned long long)page_nr << MMUPAGE_SHIFT) | pgprot_val(pgprot)); } extern struct kmem_cache_s *pae_pgd_cachep; diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/asm-i386/pgtable.h pgcl-bk/include/asm-i386/pgtable.h --- linux-2.5-bk/include/asm-i386/pgtable.h Tue Jan 7 19:09:01 2003 +++ pgcl-bk/include/asm-i386/pgtable.h Mon Jan 6 22:17:40 2003 @@ -88,11 +88,7 @@ #define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \ ~(VMALLOC_OFFSET-1)) #define VMALLOC_VMADDR(x) ((unsigned long)(x)) -#if CONFIG_HIGHMEM -# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) -#else -# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) -#endif +# define VMALLOC_END (FIXADDR_START-2*MMUPAGE_SIZE) /* * The 4MB page is guessing.. Detailed in the infamous "Chapter H" @@ -184,7 +180,7 @@ #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) -#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define pmd_bad(x) ((pmd_val(x) & (~MMUPAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) @@ -232,17 +228,17 @@ #define page_pte(page) page_pte_prot(page, __pgprot(0)) #define pmd_page_kernel(pmd) \ -((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) +((unsigned long) __va(pmd_val(pmd) & MMUPAGE_MASK)) #ifndef CONFIG_DISCONTIGMEM -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) +#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> MMUPAGE_SHIFT)) #endif /* !CONFIG_DISCONTIGMEM */ #define pmd_large(pmd) \ ((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT)) /* to find an entry in a page-table-directory. */ -#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) #define __pgd_offset(address) pgd_index(address) @@ -256,7 +252,7 @@ /* Find an entry in the third-level page table.. */ #define __pte_offset(address) \ - (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + (((address) >> MMUPAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define pte_offset_kernel(dir, address) \ ((pte_t *) pmd_page_kernel(*(dir)) + __pte_offset(address)) diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/asm-i386/setup.h pgcl-bk/include/asm-i386/setup.h --- linux-2.5-bk/include/asm-i386/setup.h Tue Jan 7 19:09:01 2003 +++ pgcl-bk/include/asm-i386/setup.h Mon Jan 6 22:17:40 2003 @@ -6,15 +6,15 @@ #ifndef _i386_SETUP_H #define _i386_SETUP_H -#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -#define PFN_PHYS(x) ((x) << PAGE_SHIFT) +#define PFN_UP(x) (((x) + MMUPAGE_SIZE-1) >> MMUPAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> MMUPAGE_SHIFT) +#define PFN_PHYS(x) ((x) << MMUPAGE_SHIFT) /* * Reserved space for vmalloc and iomap - defined in asm/page.h */ #define MAXMEM_PFN PFN_DOWN(MAXMEM) -#define MAX_NONPAE_PFN (1 << 20) +#define MAX_NONPAE_PFN (1 << (32 - MMUPAGE_SHIFT)) /* * This is set up by the setup-routine at boot-time diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/asm-i386/thread_info.h pgcl-bk/include/asm-i386/thread_info.h --- linux-2.5-bk/include/asm-i386/thread_info.h Tue Jan 7 19:09:01 2003 +++ pgcl-bk/include/asm-i386/thread_info.h Mon Jan 6 22:17:40 2003 @@ -51,6 +51,7 @@ #endif #define PREEMPT_ACTIVE 0x4000000 +#define THREAD_SIZE (2*MMUPAGE_SIZE) /* * macros/functions for gaining access to the thread information structure @@ -79,14 +80,13 @@ static inline struct thread_info *current_thread_info(void) { struct thread_info *ti; - __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~8191UL)); + __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~(THREAD_SIZE - 1))); return ti; } /* thread information allocation */ -#define THREAD_SIZE (2*PAGE_SIZE) -#define alloc_thread_info() ((struct thread_info *) __get_free_pages(GFP_KERNEL,1)) -#define free_thread_info(ti) free_pages((unsigned long) (ti), 1) +#define alloc_thread_info() ((struct thread_info *) kmalloc(THREAD_SIZE, SLAB_KERNEL)) +#define free_thread_info(ti) kfree(ti) #define get_thread_info(ti) get_task_struct((ti)->task) #define put_thread_info(ti) put_task_struct((ti)->task) @@ -94,7 +94,7 @@ /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movl $-8192, reg; \ + movl $~(THREAD_SIZE-1), reg; \ andl %esp, reg #endif diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/linux/blkdev.h pgcl-bk/include/linux/blkdev.h --- linux-2.5-bk/include/linux/blkdev.h Tue Jan 7 19:09:32 2003 +++ pgcl-bk/include/linux/blkdev.h Mon Jan 6 22:17:45 2003 @@ -287,8 +287,8 @@ * BLK_BOUNCE_ANY : don't bounce anything * BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary */ -#define BLK_BOUNCE_HIGH (blk_max_low_pfn << PAGE_SHIFT) -#define BLK_BOUNCE_ANY (blk_max_pfn << PAGE_SHIFT) +#define BLK_BOUNCE_HIGH ((dma_addr_t)blk_max_low_pfn << MMUPAGE_SHIFT) +#define BLK_BOUNCE_ANY ((dma_addr_t)blk_max_pfn << MMUPAGE_SHIFT) #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) extern int init_emergency_isa_pool(void); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/linux/highmem.h pgcl-bk/include/linux/highmem.h --- linux-2.5-bk/include/linux/highmem.h Tue Jan 7 19:09:33 2003 +++ pgcl-bk/include/linux/highmem.h Mon Jan 6 22:17:46 2003 @@ -43,6 +43,13 @@ kunmap_atomic(addr, KM_USER0); } +static inline void clear_user_mmupages(struct page *page, int offset, int size) +{ + char *addr = kmap_atomic(page, KM_USER0); + memset(&addr[offset], 0, size); + kunmap_atomic(addr, KM_USER0); +} + static inline void clear_highpage(struct page *page) { void *kaddr = kmap_atomic(page, KM_USER0); @@ -76,6 +83,17 @@ copy_user_page(vto, vfrom, vaddr, to); kunmap_atomic(vfrom, KM_USER0); kunmap_atomic(vto, KM_USER1); +} + +static inline void copy_user_mmupages(struct page *dst, struct page *src, int offset, int size) +{ + char *vfrom, *vto; + + vfrom = kmap_atomic(src, KM_USER0); + vto = kmap_atomic(dst, KM_USER1); + memcpy(&vto[offset], &vfrom[offset], size); + kunmap_atomic(src, KM_USER0); + kunmap_atomic(dst, KM_USER1); } static inline void copy_highpage(struct page *to, struct page *from) diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/linux/mm.h pgcl-bk/include/linux/mm.h --- linux-2.5-bk/include/linux/mm.h Tue Jan 7 19:09:33 2003 +++ pgcl-bk/include/linux/mm.h Tue Jan 7 19:06:42 2003 @@ -70,7 +70,7 @@ struct vm_operations_struct * vm_ops; /* Information about our backing store: */ - unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE + unsigned long vm_pgoff; /* Offset (within vm_file) in MMUPAGE_SIZE units, *not* PAGE_CACHE_SIZE */ struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ @@ -163,7 +163,8 @@ atomic_t count; /* Usage count, see below. */ struct list_head list; /* ->mapping has some page lists. */ struct address_space *mapping; /* The inode (or ...) we belong to. */ - unsigned long index; /* Our offset within mapping. */ + unsigned long index; /* Our offset within mapping. + * in PAGE_CACHE_SIZE units. */ struct list_head lru; /* Pageout list, eg. active_list; protected by zone->lru_lock !! */ union { @@ -295,9 +296,18 @@ page->flags |= zone_num << ZONE_SHIFT; } +#if 0 +#define lowmem_page_address(page) __va(page_to_pfn(page)*MMUPAGE_SHIFT) +#else #define lowmem_page_address(page) \ - __va( ( ((page) - page_zone(page)->zone_mem_map) \ - + page_zone(page)->zone_start_pfn) << PAGE_SHIFT) +({ \ + extern unsigned long max_low_pfn; \ + const unsigned long __lpa_pfn = page_to_pfn(page); \ + BUG_ON(max_low_pfn && __lpa_pfn > max_low_pfn); \ + BUG_ON(__lpa_pfn >= (~PAGE_OFFSET+1)/MMUPAGE_SIZE); \ + __va(__lpa_pfn << MMUPAGE_SHIFT); \ +}) +#endif #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) #define HASHED_PAGE_VIRTUAL @@ -465,10 +475,10 @@ unsigned long flag, unsigned long offset) { unsigned long ret = -EINVAL; - if ((offset + PAGE_ALIGN(len)) < offset) + if ((offset + MMUPAGE_ALIGN(len)) < offset) goto out; - if (!(offset & ~PAGE_MASK)) - ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); + if (!(offset & ~MMUPAGE_MASK)) + ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> MMUPAGE_SHIFT); out: return ret; } @@ -543,6 +553,16 @@ } extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr); +/* + * Return byte offset from start of page containing virtual address in + * vma, to start of mmupage containing it: 0 if PAGE_MMUSHIFT 0. + */ +static inline unsigned long vma_suboffset(struct vm_area_struct *vma, unsigned long address) +{ + return (address - vma->vm_start + vma->vm_pgoff * MMUPAGE_SIZE) + & (MMUPAGE_MASK - PAGE_MASK); +} + extern struct page * vmalloc_to_page(void *addr); extern unsigned long get_page_cache_size(void); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/linux/pagemap.h pgcl-bk/include/linux/pagemap.h --- linux-2.5-bk/include/linux/pagemap.h Tue Jan 7 19:09:35 2003 +++ pgcl-bk/include/linux/pagemap.h Mon Jan 6 22:17:46 2003 @@ -22,6 +22,9 @@ #define PAGE_CACHE_MASK PAGE_MASK #define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK) +#define PAGE_CACHE_MMUSHIFT (PAGE_CACHE_SHIFT - MMUPAGE_SHIFT) +#define PAGE_CACHE_MMUCOUNT (PAGE_CACHE_SIZE/MMUPAGE_SIZE) + #define page_cache_get(page) get_page(page) #define page_cache_release(page) put_page(page) void release_pages(struct page **pages, int nr, int cold); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/linux/sched.h pgcl-bk/include/linux/sched.h --- linux-2.5-bk/include/linux/sched.h Tue Jan 7 19:09:37 2003 +++ pgcl-bk/include/linux/sched.h Tue Jan 7 18:17:22 2003 @@ -192,7 +192,7 @@ unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; - unsigned long rss, total_vm, locked_vm; + unsigned long rss, total_vm, locked_vm; /* in MMUPAGE_SIZE units */ unsigned long def_flags; unsigned long cpu_vm_mask; unsigned long swap_address; diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/linux/shm.h pgcl-bk/include/linux/shm.h --- linux-2.5-bk/include/linux/shm.h Tue Jan 7 19:09:38 2003 +++ pgcl-bk/include/linux/shm.h Mon Jan 6 22:17:46 2003 @@ -12,7 +12,7 @@ #define SHMMAX 0x2000000 /* max shared seg size (bytes) */ #define SHMMIN 1 /* min shared seg size (bytes) */ #define SHMMNI 4096 /* max num of segs system wide */ -#define SHMALL (SHMMAX/PAGE_SIZE*(SHMMNI/16)) /* max shm system wide (pages) */ +#define SHMALL (SHMMAX/MMUPAGE_SIZE*(SHMMNI/16)) /* max shm system wide (mmupages) */ #define SHMSEG SHMMNI /* max shared segs per process */ #include diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/include/linux/swap.h pgcl-bk/include/linux/swap.h --- linux-2.5-bk/include/linux/swap.h Tue Jan 7 19:09:39 2003 +++ pgcl-bk/include/linux/swap.h Mon Jan 6 22:17:47 2003 @@ -45,7 +45,7 @@ */ union swap_header { struct { - char reserved[PAGE_SIZE - 10]; + char reserved[MMUPAGE_SIZE - 10]; char magic[10]; /* SWAP-SPACE or SWAPSPACE2 */ } magic; struct { @@ -103,8 +103,8 @@ #define SWAP_CLUSTER_MAX 32 -#define SWAP_MAP_MAX 0x7fff -#define SWAP_MAP_BAD 0x8000 +#define SWAP_MAP_MAX 0xfffe +#define SWAP_MAP_BAD 0xffff /* * The in-memory structure used to track swap areas. diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/init/main.c pgcl-bk/init/main.c --- linux-2.5-bk/init/main.c Tue Jan 7 19:09:54 2003 +++ pgcl-bk/init/main.c Tue Jan 7 18:17:22 2003 @@ -409,9 +409,9 @@ calibrate_delay(); #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && - initrd_start < min_low_pfn << PAGE_SHIFT) { + initrd_start < min_low_pfn << MMUPAGE_SHIFT) { printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - " - "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT); + "disabling it.\n",initrd_start,min_low_pfn << MMUPAGE_SHIFT); initrd_start = 0; } #endif diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/ipc/shm.c pgcl-bk/ipc/shm.c --- linux-2.5-bk/ipc/shm.c Tue Jan 7 19:09:54 2003 +++ pgcl-bk/ipc/shm.c Mon Jan 6 22:17:48 2003 @@ -110,7 +110,7 @@ */ static void shm_destroy (struct shmid_kernel *shp) { - shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; + shm_tot -= (shp->shm_segsz + MMUPAGE_SIZE - 1) >> MMUPAGE_SHIFT; shm_rmid (shp->id); shm_unlock(shp); if (!is_file_hugepages(shp->shm_file)) @@ -169,7 +169,7 @@ { int error; struct shmid_kernel *shp; - int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; + int numpages = (size + MMUPAGE_SIZE -1) >> MMUPAGE_SHIFT; struct file * file; char name[13]; int id; @@ -710,7 +710,7 @@ for (shmd = mm->mmap; shmd; shmd = shmdnext) { shmdnext = shmd->vm_next; if ((shmd->vm_ops == &shm_vm_ops || (shmd->vm_flags & VM_HUGETLB)) - && shmd->vm_start - (shmd->vm_pgoff << PAGE_SHIFT) == (ulong) shmaddr) { + && shmd->vm_start - (shmd->vm_pgoff << MMUPAGE_SHIFT) == (ulong) shmaddr) { do_munmap(mm, shmd->vm_start, shmd->vm_end - shmd->vm_start); retval = 0; } diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/kernel/fork.c pgcl-bk/kernel/fork.c --- linux-2.5-bk/kernel/fork.c Tue Jan 7 19:09:54 2003 +++ pgcl-bk/kernel/fork.c Mon Jan 6 22:17:48 2003 @@ -156,16 +156,17 @@ task_struct_cachep = kmem_cache_create("task_struct", sizeof(struct task_struct),0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_MUST_HWCACHE_ALIGN, + NULL, NULL); if (!task_struct_cachep) panic("fork_init(): cannot create task_struct SLAB cache"); /* * The default maximum number of threads is set to a safe * value: the thread structures can take up at most half - * of memory. + * of low memory. */ - max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8; + max_threads = mempages / 8; /* * we need to allow at least 20 threads to boot a system */ diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/mm/bootmem.c pgcl-bk/mm/bootmem.c --- linux-2.5-bk/mm/bootmem.c Tue Jan 7 19:09:56 2003 +++ pgcl-bk/mm/bootmem.c Tue Jan 7 19:06:42 2003 @@ -33,10 +33,7 @@ unsigned long mapsize; mapsize = (pages+7)/8; - mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK; - mapsize >>= PAGE_SHIFT; - - return mapsize; + return (mapsize + MMUPAGE_SIZE - 1) >> MMUPAGE_SHIFT; } /* @@ -46,14 +43,17 @@ unsigned long mapstart, unsigned long start, unsigned long end) { bootmem_data_t *bdata = pgdat->bdata; - unsigned long mapsize = ((end - start)+7)/8; + unsigned long mapsize; pgdat->pgdat_next = pgdat_list; pgdat_list = pgdat; + /* round start down to simplify free_all_bootmem_core() */ + start &= ~(PAGE_MMUCOUNT - 1); + mapsize = ((end - start)+7)/8; mapsize = (mapsize + (sizeof(long) - 1UL)) & ~(sizeof(long) - 1UL); - bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); - bdata->node_boot_start = (start << PAGE_SHIFT); + bdata->node_bootmem_map = phys_to_virt(mapstart << MMUPAGE_SHIFT); + bdata->node_boot_start = (start << MMUPAGE_SHIFT); bdata->node_low_pfn = end; /* @@ -77,10 +77,10 @@ * round up, partially reserved pages are considered * fully reserved. */ - unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE; + unsigned long sidx = (addr - bdata->node_boot_start)/MMUPAGE_SIZE; unsigned long eidx = (addr + size - bdata->node_boot_start + - PAGE_SIZE-1)/PAGE_SIZE; - unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE; + MMUPAGE_SIZE-1)/MMUPAGE_SIZE; + unsigned long end_pfn = (addr + size + MMUPAGE_SIZE-1)/MMUPAGE_SIZE; if (!size) BUG(); @@ -90,13 +90,11 @@ BUG(); if (sidx >= eidx) BUG(); - if ((addr >> PAGE_SHIFT) >= bdata->node_low_pfn) - BUG(); - if (end > bdata->node_low_pfn) + if (end_pfn > bdata->node_low_pfn) BUG(); for (i = sidx; i < eidx; i++) if (test_and_set_bit(i, bdata->node_bootmem_map)) - printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); + printk("hm, page %08lx reserved twice.\n", i*MMUPAGE_SIZE); } static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) @@ -108,18 +106,18 @@ * considered reserved. */ unsigned long sidx; - unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE; - unsigned long end = (addr + size)/PAGE_SIZE; + unsigned long eidx = (addr + size - bdata->node_boot_start)/MMUPAGE_SIZE; + unsigned long end_pfn = (addr + size)/MMUPAGE_SIZE; if (!size) BUG(); - if (end > bdata->node_low_pfn) + if (end_pfn > bdata->node_low_pfn) BUG(); /* * Round up the beginning of the address. */ - start = (addr + PAGE_SIZE-1) / PAGE_SIZE; - sidx = start - (bdata->node_boot_start/PAGE_SIZE); + start = (addr + MMUPAGE_SIZE-1) / MMUPAGE_SIZE; + sidx = start - (bdata->node_boot_start/MMUPAGE_SIZE); for (i = sidx; i < eidx; i++) { if (!test_and_clear_bit(i, bdata->node_bootmem_map)) @@ -148,7 +146,7 @@ unsigned long offset, remaining_size; unsigned long areasize, preferred, incr; unsigned long eidx = bdata->node_low_pfn - (bdata->node_boot_start >> - PAGE_SHIFT); + MMUPAGE_SHIFT); if (!size) BUG(); @@ -159,22 +157,22 @@ if (align && (bdata->node_boot_start & (align - 1UL)) != 0) offset = (align - (bdata->node_boot_start & (align - 1UL))); - offset >>= PAGE_SHIFT; + offset >>= MMUPAGE_SHIFT; /* * We try to allocate bootmem pages above 'goal' * first, then we try to allocate lower pages. */ if (goal && (goal >= bdata->node_boot_start) && - ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { + ((goal >> MMUPAGE_SHIFT) < bdata->node_low_pfn)) { preferred = goal - bdata->node_boot_start; } else preferred = 0; - preferred = ((preferred + align - 1) & ~(align - 1)) >> PAGE_SHIFT; + preferred = ((preferred + align - 1) & ~(align - 1)) >> MMUPAGE_SHIFT; preferred += offset; - areasize = (size+PAGE_SIZE-1)/PAGE_SIZE; - incr = align >> PAGE_SHIFT ? : 1; + areasize = (size+MMUPAGE_SIZE-1)/MMUPAGE_SIZE; + incr = align >> MMUPAGE_SHIFT ? : 1; restart_scan: for (i = preferred; i < eidx; i += incr) { @@ -205,31 +203,31 @@ * of this allocation's buffer? If yes then we can 'merge' * the previous partial page with this allocation. */ - if (align < PAGE_SIZE + if (align < MMUPAGE_SIZE && bdata->last_offset && bdata->last_pos+1 == start) { offset = (bdata->last_offset+align-1) & ~(align-1); - if (offset > PAGE_SIZE) + if (offset > MMUPAGE_SIZE) BUG(); - remaining_size = PAGE_SIZE-offset; + remaining_size = MMUPAGE_SIZE-offset; if (size < remaining_size) { areasize = 0; // last_pos unchanged bdata->last_offset = offset+size; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + + ret = phys_to_virt(bdata->last_pos*MMUPAGE_SIZE + offset + bdata->node_boot_start); } else { remaining_size = size - remaining_size; - areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE; - ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + + areasize = (remaining_size+MMUPAGE_SIZE-1)/MMUPAGE_SIZE; + ret = phys_to_virt(bdata->last_pos*MMUPAGE_SIZE + offset + bdata->node_boot_start); bdata->last_pos = start+areasize-1; bdata->last_offset = remaining_size; } - bdata->last_offset &= ~PAGE_MASK; + bdata->last_offset &= ~MMUPAGE_MASK; } else { bdata->last_pos = start + areasize - 1; - bdata->last_offset = size & ~PAGE_MASK; - ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); + bdata->last_offset = size & ~MMUPAGE_MASK; + ret = phys_to_virt(start * MMUPAGE_SIZE + bdata->node_boot_start); } /* * Reserve the area now: @@ -245,49 +243,37 @@ { struct page *page = pgdat->node_mem_map; bootmem_data_t *bdata = pgdat->bdata; - unsigned long i, count, total = 0; - unsigned long idx; + unsigned long i, total = 0; + unsigned long idx, mapnr, node_low_mapnr; unsigned long *map; - if (!bdata->node_bootmem_map) BUG(); - - count = 0; - idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); + BUG_ON(!bdata->node_bootmem_map); map = bdata->node_bootmem_map; - for (i = 0; i < idx; ) { - unsigned long v = ~map[i / BITS_PER_LONG]; - if (v) { - unsigned long m; - for (m = 1; m && i < idx; m<<=1, page++, i++) { - if (v & m) { - count++; + i = 0; + idx = bdata->node_low_pfn - (bdata->node_boot_start >> MMUPAGE_SHIFT); + + node_low_mapnr = (bdata->node_low_pfn - bdata->node_boot_start/MMUPAGE_SIZE)/PAGE_MMUCOUNT; + for (mapnr = 0; mapnr < node_low_mapnr; ++mapnr) { + int k, should_free = 1; + for (k = 0; k < PAGE_MMUCOUNT; ++k) + if (test_bit(mapnr*PAGE_MMUCOUNT + k, map)) + should_free = 0; + if (should_free) { + page = &pgdat->node_mem_map[mapnr]; ClearPageReserved(page); set_page_count(page, 1); __free_page(page); + ++total; } } - } else { - i+=BITS_PER_LONG; - page+=BITS_PER_LONG; - } - } - total += count; - - /* - * Now free the allocator bitmap itself, it's not - * needed anymore: - */ - page = virt_to_page(bdata->node_bootmem_map); - count = 0; - for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { - count++; - ClearPageReserved(page); - set_page_count(page, 1); - __free_page(page); + + /* } - total += count; - bdata->node_bootmem_map = NULL; + * Leak the allocator bitmap; it's not worth saving. + */ + bdata->node_bootmem_map = NULL; + printk("bootmem: freed %lx pages in node %d\n", total, pgdat->node_id); return total; } diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/mm/filemap.c pgcl-bk/mm/filemap.c --- linux-2.5-bk/mm/filemap.c Tue Jan 7 19:09:56 2003 +++ pgcl-bk/mm/filemap.c Tue Jan 7 19:06:42 2003 @@ -695,8 +695,8 @@ * If the page was already mapped, this will get a cache miss * for sure, so try to avoid doing it. */ - if (((unsigned long)uaddr & PAGE_MASK) != - ((unsigned long)end & PAGE_MASK)) + if (((unsigned long)uaddr & MMUPAGE_MASK) != + ((unsigned long)end & MMUPAGE_MASK)) ret = __put_user(0, end); } return ret; @@ -711,8 +711,8 @@ if (ret == 0) { const char *end = uaddr + size - 1; - if (((unsigned long)uaddr & PAGE_MASK) != - ((unsigned long)end & PAGE_MASK)) + if (((unsigned long)uaddr & MMUPAGE_MASK) != + ((unsigned long)end & MMUPAGE_MASK)) __get_user(c, (char *)end); } } @@ -971,7 +971,7 @@ * it in the page cache, and handles the special cases reasonably without * having a lot of duplicated code. */ -struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address, int unused) +struct page *filemap_nopage(struct vm_area_struct * area, unsigned long address, int unused) { int error; struct file *file = area->vm_file; @@ -982,8 +982,8 @@ unsigned long size, pgoff, endoff; int did_readahead; - pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; - endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; + pgoff = (address - area->vm_start)/MMUPAGE_SIZE + area->vm_pgoff; + endoff = (area->vm_end - area->vm_start + MMUPAGE_SIZE - 1)/MMUPAGE_SIZE + area->vm_pgoff; retry_all: /* @@ -991,15 +991,15 @@ * accessible.. */ size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if ((pgoff >= size) && (area->vm_mm == current->mm)) + if ((pgoff/PAGE_CACHE_MMUCOUNT >= size) && (area->vm_mm == current->mm)) return NULL; /* * The "size" of the file, as far as mmap is concerned, isn't bigger * than the mapping */ - if (size > endoff) - size = endoff; + if (size > endoff/PAGE_CACHE_MMUCOUNT) + size = endoff/PAGE_CACHE_MMUCOUNT; did_readahead = 0; @@ -1009,23 +1009,23 @@ */ if (VM_SequentialReadHint(area)) { did_readahead = 1; - page_cache_readahead(mapping, ra, file, pgoff); + page_cache_readahead(mapping, ra, file, pgoff/PAGE_CACHE_MMUCOUNT); } /* * If the offset is outside the mapping size we're off the end * of a privately mapped file, so we need to map a zero page. */ - if ((pgoff < size) && !VM_RandomReadHint(area)) { + if ((pgoff/PAGE_CACHE_MMUCOUNT < size) && !VM_RandomReadHint(area)) { did_readahead = 1; - page_cache_readaround(mapping, ra, file, pgoff); + page_cache_readaround(mapping, ra, file, pgoff/PAGE_CACHE_MMUCOUNT); } /* * Do we have something in the page cache already? */ retry_find: - page = find_get_page(mapping, pgoff); + page = find_get_page(mapping, pgoff/PAGE_CACHE_MMUCOUNT); if (!page) { if (did_readahead) { handle_ra_miss(mapping,ra); @@ -1055,7 +1055,7 @@ * We're only likely to ever get here if MADV_RANDOM is in * effect. */ - error = page_cache_read(file, pgoff); + error = page_cache_read(file, pgoff/PAGE_CACHE_MMUCOUNT); /* * The page we want has now been added to the page cache. @@ -1273,8 +1273,8 @@ } } - len -= PAGE_SIZE; - addr += PAGE_SIZE; + len -= MMUPAGE_SIZE; + addr += MMUPAGE_SIZE; pgoff++; if (len) goto repeat; diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/mm/highmem.c pgcl-bk/mm/highmem.c --- linux-2.5-bk/mm/highmem.c Tue Jan 7 19:09:56 2003 +++ pgcl-bk/mm/highmem.c Tue Jan 7 19:06:42 2003 @@ -53,7 +53,7 @@ static unsigned int last_pkmap_nr; static spinlock_t kmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; -pte_t * pkmap_page_table; +/* pte_t * pkmap_page_table; */ static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait); @@ -64,7 +64,12 @@ flush_cache_all(); for (i = 0; i < LAST_PKMAP; i++) { + int j; + unsigned long vaddr = PKMAP_ADDR(i); struct page *page; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; /* * zero means we don't have anything to do, @@ -76,9 +81,14 @@ continue; pkmap_count[i] = 0; + pgd = pgd_offset_k(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + /* sanity check */ - if (pte_none(pkmap_page_table[i])) - BUG(); + /* buggy, may span discontiguous L3 pagetables */ + for (j = 0; j < PAGE_MMUCOUNT; ++j) + BUG_ON(pte_none(pte[j])); /* * Don't need an atomic fetch-and-clear op here; @@ -87,8 +97,10 @@ * getting the kmap_lock (which is held here). * So no dangers, even with speculative execution. */ - page = pte_page(pkmap_page_table[i]); - pte_clear(&pkmap_page_table[i]); + page = pte_page(*pte); + /* buggy, may span discontiguous L3 pagetables */ + for (j = 0; j < PAGE_MMUCOUNT; ++j) + pte_clear(&pte[j]); set_page_address(page, NULL); } @@ -98,7 +110,10 @@ static inline unsigned long map_new_virtual(struct page *page) { unsigned long vaddr; - int count; + int k, count; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; start: count = LAST_PKMAP; @@ -136,7 +151,12 @@ } } vaddr = PKMAP_ADDR(last_pkmap_nr); - set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot)); + pgd = pgd_offset_k(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + /* buggy, may span discontiguous L3 pagetables */ + for (k = 0; k < PAGE_MMUCOUNT; ++k) + set_pte(&pte[k], pfn_pte(page_to_pfn(page) + k, kmap_prot)); pkmap_count[last_pkmap_nr] = 1; set_page_address(page, (void *)vaddr); @@ -483,10 +503,14 @@ preempt_disable(); for (type = 0; type < KM_TYPE_NR; type++) { + int k; idx = type + KM_TYPE_NR*smp_processor_id(); - if (!pte_none(*(kmap_pte-idx))) { - printk("scheduling with KM_TYPE %d held!\n", type); - BUG(); + /* buggy, kmap_pte may not be aligned/contiguous */ + for (k = 0; k < PAGE_MMUCOUNT; ++k) { + if (!pte_none(kmap_pte[idx*PAGE_MMUCOUNT + k])) { + printk("scheduling with KM_TYPE %d held!\n", type); + BUG(); + } } } preempt_enable(); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/mm/memory.c pgcl-bk/mm/memory.c --- linux-2.5-bk/mm/memory.c Tue Jan 7 19:09:56 2003 +++ pgcl-bk/mm/memory.c Tue Jan 7 19:06:43 2003 @@ -342,7 +342,7 @@ src_pte = pte_offset_map_nested(src_pmd, address); cont_copy_pte_range_noset: - address += PAGE_SIZE; + address += MMUPAGE_SIZE; if (address >= end) { pte_unmap_nested(src_pte); pte_unmap(dst_pte); @@ -387,8 +387,8 @@ offset = address & ~PMD_MASK; if (offset + size > PMD_SIZE) size = PMD_SIZE - offset; - size &= PAGE_MASK; - for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) { + size &= MMUPAGE_MASK; + for (offset=0; offset < size; ptep++, offset += MMUPAGE_SIZE) { pte_t pte = *ptep; if (pte_none(pte)) continue; @@ -466,12 +466,12 @@ /* Dispose of an entire mmu_gather_t per rescheduling point */ #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) -#define ZAP_BLOCK_SIZE (FREE_PTE_NR * PAGE_SIZE) +#define ZAP_BLOCK_SIZE (FREE_PTE_NR * MMUPAGE_SIZE) #endif /* For UP, 256 pages at a time gives nice low latency */ #if !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) -#define ZAP_BLOCK_SIZE (256 * PAGE_SIZE) +#define ZAP_BLOCK_SIZE (256 * MMUPAGE_SIZE) #endif /* No preempt: go for the best straight-line efficiency */ @@ -642,7 +642,7 @@ if (vmas) vmas[i] = vma; i++; - start += PAGE_SIZE; + start += MMUPAGE_SIZE; len--; } while(len && start < vma->vm_end); spin_unlock(&mm->page_table_lock); @@ -664,7 +664,7 @@ pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot)); BUG_ON(!pte_none(*pte)); set_pte(pte, zero_pte); - address += PAGE_SIZE; + address += MMUPAGE_SIZE; pte++; } while (address && (address < end)); } @@ -735,12 +735,12 @@ end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; - pfn = phys_addr >> PAGE_SHIFT; + pfn = phys_addr >> MMUPAGE_SHIFT; do { BUG_ON(!pte_none(*pte)); if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn))) set_pte(pte, pfn_pte(pfn, prot)); - address += PAGE_SIZE; + address += MMUPAGE_SIZE; pfn++; pte++; } while (address && (address < end)); @@ -822,10 +822,13 @@ static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address, pte_t *page_table) { + unsigned long pfn = page_to_pfn(new_page) + + vma_suboffset(vma, address)/MMUPAGE_SIZE; + pte_t pte = pfn_pte(pfn, vma->vm_page_prot); invalidate_vcache(address, vma->vm_mm, new_page); flush_page_to_ram(new_page); flush_cache_page(vma, address); - establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); + establish_pte(vma, address, page_table, pte_mkwrite(pte_mkdirty(pte))); } /* @@ -941,14 +944,14 @@ } /* mapping wholly unaffected? */ - len = len >> PAGE_SHIFT; + len = len >> MMUPAGE_SHIFT; diff = pgoff - vma->vm_pgoff; if (diff >= len) continue; /* Ok, partially affected.. */ - start += diff << PAGE_SHIFT; - len = (len - diff) << PAGE_SHIFT; + start += diff << MMUPAGE_SHIFT; + len = (len - diff) << MMUPAGE_SHIFT; zap_page_range(vma, start, len); } } @@ -974,7 +977,7 @@ if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared)) goto out_unlock; - pgoff = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + pgoff = (offset + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_MMUCOUNT; if (!list_empty(&mapping->i_mmap)) vmtruncate_list(&mapping->i_mmap, pgoff); if (!list_empty(&mapping->i_mmap_shared)) @@ -1101,6 +1104,9 @@ remove_exclusive_swap_page(page); mm->rss++; + /* + * This is obviously wrong. How to fix? + */ pte = mk_pte(page, vma->vm_page_prot); if (write_access && can_share_swap_page(page)) pte = pte_mkdirty(pte_mkwrite(pte)); @@ -1146,7 +1152,10 @@ page_table = pte_offset_map(pmd, addr); } - /* Read-only mapping of ZERO_PAGE. */ + /* + * Read-only mapping of ZERO_PAGE. + * Is this correct? NFI... + */ entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); /* ..except if it's a write access */ @@ -1172,6 +1181,10 @@ } mm->rss++; flush_page_to_ram(page); + + /* + * NFI if this is correct either. + */ entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); lru_cache_add_active(page); mark_page_accessed(page); @@ -1221,7 +1234,7 @@ pte_unmap(page_table); spin_unlock(&mm->page_table_lock); - new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0); + new_page = vma->vm_ops->nopage(vma, address & MMUPAGE_MASK, 0); /* no page was available -- either SIGBUS or OOM */ if (new_page == NOPAGE_SIGBUS) @@ -1260,10 +1273,13 @@ */ /* Only go through if we didn't race with anybody else... */ if (pte_none(*page_table)) { + unsigned long pfn; ++mm->rss; flush_page_to_ram(new_page); flush_icache_page(vma, new_page); - entry = mk_pte(new_page, vma->vm_page_prot); + pfn = page_to_pfn(new_page) + + vma_suboffset(vma, address)/MMUPAGE_SIZE; + entry = pfn_pte(pfn, vma->vm_page_prot); if (write_access) entry = pte_mkwrite(pte_mkdirty(entry)); set_pte(page_table, entry); @@ -1409,7 +1425,7 @@ BUG(); if (end > vma->vm_end) BUG(); - len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE; + len = (end+MMUPAGE_SIZE-1)/MMUPAGE_SIZE-addr/MMUPAGE_SIZE; ret = get_user_pages(current, current->mm, addr, len, write, 0, NULL, NULL); return ret == len ? 0 : -1; diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/mm/mmap.c pgcl-bk/mm/mmap.c --- linux-2.5-bk/mm/mmap.c Tue Jan 7 19:09:56 2003 +++ pgcl-bk/mm/mmap.c Tue Jan 7 19:06:43 2003 @@ -106,9 +106,9 @@ * factors balance out... */ free += (dentry_stat.nr_unused * sizeof(struct dentry)) >> - PAGE_SHIFT; + MMUPAGE_SHIFT; free += (inodes_stat.nr_unused * sizeof(struct inode)) >> - PAGE_SHIFT; + MMUPAGE_SHIFT; if (free > pages) return 1; @@ -165,8 +165,8 @@ if (brk < mm->end_code) goto out; - newbrk = PAGE_ALIGN(brk); - oldbrk = PAGE_ALIGN(mm->brk); + newbrk = MMUPAGE_ALIGN(brk); + oldbrk = MMUPAGE_ALIGN(mm->brk); if (oldbrk == newbrk) goto set_brk; @@ -183,7 +183,7 @@ goto out; /* Check against existing mmap mappings. */ - if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) + if (find_vma_intersection(mm, oldbrk, newbrk+MMUPAGE_SIZE)) goto out; /* Ok, looks good - let it rip. */ @@ -427,10 +427,10 @@ if (len > TASK_SIZE) return -EINVAL; - len = PAGE_ALIGN(len); + len = MMUPAGE_ALIGN(len); /* offset overflow? */ - if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) + if ((pgoff + (len >> MMUPAGE_SHIFT)) < pgoff) return -EINVAL; /* Too many mappings? */ @@ -441,7 +441,7 @@ * that it represents a valid section of the address space. */ addr = get_unmapped_area(file, addr, len, pgoff, flags); - if (addr & ~PAGE_MASK) + if (addr & ~MMUPAGE_MASK) return addr; /* Do simple checking here so the lower-level routines won't have @@ -457,7 +457,7 @@ } /* mlock MCL_FUTURE? */ if (vm_flags & VM_LOCKED) { - unsigned long locked = mm->locked_vm << PAGE_SHIFT; + unsigned long locked = mm->locked_vm << MMUPAGE_SHIFT; locked += len; if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; @@ -519,7 +519,7 @@ } /* Check against address space limit. */ - if ((mm->total_vm << PAGE_SHIFT) + len + if ((mm->total_vm << MMUPAGE_SHIFT) + len > current->rlim[RLIMIT_AS].rlim_cur) return -ENOMEM; @@ -529,7 +529,7 @@ vm_flags |= VM_ACCOUNT; } else if (vm_flags & VM_WRITE) { /* Private writable mapping: check memory availability */ - charged = len >> PAGE_SHIFT; + charged = len >> MMUPAGE_SHIFT; if (!vm_enough_memory(charged)) return -ENOMEM; vm_flags |= VM_ACCOUNT; @@ -602,9 +602,9 @@ atomic_inc(&inode->i_writecount); out: - mm->total_vm += len >> PAGE_SHIFT; + mm->total_vm += len >> MMUPAGE_SHIFT; if (vm_flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + mm->locked_vm += len >> MMUPAGE_SHIFT; make_pages_present(addr, addr + len); } if (flags & MAP_POPULATE) { @@ -637,7 +637,7 @@ * Ugly calling convention alert: * Return value with the low bits set means error value, * ie - * if (ret & ~PAGE_MASK) + * if (ret & ~MMUPAGE_MASK) * error = ret; * * This function "knows" that -ENOMEM has the bits set. @@ -653,7 +653,7 @@ return -ENOMEM; if (addr) { - addr = PAGE_ALIGN(addr); + addr = MMUPAGE_ALIGN(addr); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && (!vma || addr + len <= vma->vm_start)) @@ -686,7 +686,7 @@ if (flags & MAP_FIXED) { if (addr > TASK_SIZE - len) return -ENOMEM; - if (addr & ~PAGE_MASK) + if (addr & ~MMUPAGE_MASK) return -EINVAL; return addr; } @@ -782,10 +782,10 @@ * is required to hold the mmap_sem in read mode. We need to get * the spinlock only before relocating the vma range ourself. */ - address += 4 + PAGE_SIZE - 1; - address &= PAGE_MASK; + address += 4 + MMUPAGE_SIZE - 1; + address &= MMUPAGE_MASK; spin_lock(&vma->vm_mm->page_table_lock); - grow = (address - vma->vm_end) >> PAGE_SHIFT; + grow = (address - vma->vm_end) >> MMUPAGE_SHIFT; /* Overcommit.. */ if (!vm_enough_memory(grow)) { @@ -794,7 +794,7 @@ } if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur || - ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > + ((vma->vm_mm->total_vm + grow) << MMUPAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) { spin_unlock(&vma->vm_mm->page_table_lock); vm_unacct_memory(grow); @@ -812,7 +812,7 @@ { struct vm_area_struct *vma, *prev; - addr &= PAGE_MASK; + addr &= MMUPAGE_MASK; vma = find_vma_prev(mm, addr, &prev); if (vma && (vma->vm_start <= addr)) return vma; @@ -836,9 +836,9 @@ * is required to hold the mmap_sem in read mode. We need to get * the spinlock only before relocating the vma range ourself. */ - address &= PAGE_MASK; + address &= MMUPAGE_MASK; spin_lock(&vma->vm_mm->page_table_lock); - grow = (vma->vm_start - address) >> PAGE_SHIFT; + grow = (vma->vm_start - address) >> MMUPAGE_SHIFT; /* Overcommit.. */ if (!vm_enough_memory(grow)) { @@ -847,7 +847,7 @@ } if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur || - ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > + ((vma->vm_mm->total_vm + grow) << MMUPAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) { spin_unlock(&vma->vm_mm->page_table_lock); vm_unacct_memory(grow); @@ -867,7 +867,7 @@ struct vm_area_struct * vma; unsigned long start; - addr &= PAGE_MASK; + addr &= MMUPAGE_MASK; vma = find_vma(mm,addr); if (!vma) return NULL; @@ -959,9 +959,9 @@ { size_t len = area->vm_end - area->vm_start; - area->vm_mm->total_vm -= len >> PAGE_SHIFT; + area->vm_mm->total_vm -= len >> MMUPAGE_SHIFT; if (area->vm_flags & VM_LOCKED) - area->vm_mm->locked_vm -= len >> PAGE_SHIFT; + area->vm_mm->locked_vm -= len >> MMUPAGE_SHIFT; /* * Is this a new hole at the lowest possible address? */ @@ -1020,7 +1020,7 @@ if (mpnt->vm_flags & VM_ACCOUNT) { len = to - from; - vm_unacct_memory(len >> PAGE_SHIFT); + vm_unacct_memory(len >> MMUPAGE_SHIFT); } } while ((mpnt = mpnt->vm_next) != NULL); @@ -1081,11 +1081,11 @@ if (new_below) { new->vm_end = addr; vma->vm_start = addr; - vma->vm_pgoff += ((addr - new->vm_start) >> PAGE_SHIFT); + vma->vm_pgoff += ((addr - new->vm_start) >> MMUPAGE_SHIFT); } else { vma->vm_end = addr; new->vm_start = addr; - new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); + new->vm_pgoff += ((addr - vma->vm_start) >> MMUPAGE_SHIFT); } if (new->vm_file) @@ -1108,10 +1108,10 @@ unsigned long end; struct vm_area_struct *mpnt, *prev, *last; - if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start) + if ((start & ~MMUPAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start) return -EINVAL; - if ((len = PAGE_ALIGN(len)) == 0) + if ((len = MMUPAGE_ALIGN(len)) == 0) return -EINVAL; /* Find the first overlapping VMA */ @@ -1183,7 +1183,7 @@ unsigned long flags; struct rb_node ** rb_link, * rb_parent; - len = PAGE_ALIGN(len); + len = MMUPAGE_ALIGN(len); if (!len) return addr; @@ -1191,7 +1191,7 @@ * mlock MCL_FUTURE? */ if (mm->def_flags & VM_LOCKED) { - unsigned long locked = mm->locked_vm << PAGE_SHIFT; + unsigned long locked = mm->locked_vm << MMUPAGE_SHIFT; locked += len; if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur) return -EAGAIN; @@ -1209,14 +1209,14 @@ } /* Check against address space limits *after* clearing old maps... */ - if ((mm->total_vm << PAGE_SHIFT) + len + if ((mm->total_vm << MMUPAGE_SHIFT) + len > current->rlim[RLIMIT_AS].rlim_cur) return -ENOMEM; if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; - if (!vm_enough_memory(len >> PAGE_SHIFT)) + if (!vm_enough_memory(len >> MMUPAGE_SHIFT)) return -ENOMEM; flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; @@ -1230,7 +1230,7 @@ */ vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (!vma) { - vm_unacct_memory(len >> PAGE_SHIFT); + vm_unacct_memory(len >> MMUPAGE_SHIFT); return -ENOMEM; } @@ -1248,9 +1248,9 @@ vma_link(mm, vma, prev, rb_link, rb_parent); out: - mm->total_vm += len >> PAGE_SHIFT; + mm->total_vm += len >> MMUPAGE_SHIFT; if (flags & VM_LOCKED) { - mm->locked_vm += len >> PAGE_SHIFT; + mm->locked_vm += len >> MMUPAGE_SHIFT; make_pages_present(addr, addr + len); } return addr; @@ -1295,7 +1295,7 @@ * removal */ if (mpnt->vm_flags & VM_ACCOUNT) - vm_unacct_memory((end - start) >> PAGE_SHIFT); + vm_unacct_memory((end - start) >> MMUPAGE_SHIFT); mm->map_count--; unmap_page_range(tlb, mpnt, start, end); diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/mm/page_alloc.c pgcl-bk/mm/page_alloc.c --- linux-2.5-bk/mm/page_alloc.c Tue Jan 7 19:09:56 2003 +++ pgcl-bk/mm/page_alloc.c Mon Jan 6 22:17:48 2003 @@ -57,7 +57,7 @@ */ static int bad_range(struct zone *zone, struct page *page) { - if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages) + if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages*PAGE_MMUCOUNT) return 1; if (page_to_pfn(page) < zone->zone_start_pfn) return 1; @@ -1085,7 +1085,7 @@ { unsigned long i, j; unsigned long local_offset; - const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1); + const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-PAGE_MMUSHIFT-1); int cpu, nid = pgdat->node_id; struct page *lmem_map = pgdat->node_mem_map; unsigned long zone_start_pfn = pgdat->node_start_pfn; @@ -1145,7 +1145,7 @@ INIT_LIST_HEAD(&pcp->list); } printk(" %s zone: %lu pages, LIFO batch:%lu\n", - zone_names[j], realsize, batch); + zone_names[j], realsize*PAGE_MMUCOUNT, batch); INIT_LIST_HEAD(&zone->active_list); INIT_LIST_HEAD(&zone->inactive_list); atomic_set(&zone->refill_counter, 0); @@ -1203,9 +1203,9 @@ * ZONE_NORMAL is below 4G. */ set_page_address(page, - __va(zone_start_pfn << PAGE_SHIFT)); + __va(zone_start_pfn << MMUPAGE_SHIFT)); #endif - zone_start_pfn++; + zone_start_pfn += PAGE_MMUCOUNT; } local_offset += size; diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/mm/shmem.c pgcl-bk/mm/shmem.c --- linux-2.5-bk/mm/shmem.c Tue Jan 7 19:09:56 2003 +++ pgcl-bk/mm/shmem.c Tue Jan 7 19:06:43 2003 @@ -45,7 +45,7 @@ #define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) #define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) -#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) +#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> MMUPAGE_SHIFT) /* Pretend that each entry is of this size in directory's i_size */ #define BOGO_DIRENT_SIZE 20 @@ -69,14 +69,14 @@ /* * The above definition of ENTRIES_PER_PAGE, and the use of * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: - * might be reconsidered if it ever diverges from PAGE_SIZE. + * might be reconsidered if it ever diverges from MMUPAGE_SIZE. */ - return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT); + return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-MMUPAGE_SHIFT); } static inline void shmem_dir_free(struct page *page) { - __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT); + __free_pages(page, PAGE_CACHE_SHIFT-MMUPAGE_SHIFT); } static struct page **shmem_dir_map(struct page *page) @@ -931,9 +931,9 @@ unsigned long idx; int error; - idx = (address - vma->vm_start) >> PAGE_SHIFT; + idx = (address - vma->vm_start) >> MMUPAGE_SHIFT; idx += vma->vm_pgoff; - idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; + idx >>= PAGE_CACHE_SHIFT - MMUPAGE_SHIFT; error = shmem_getpage(inode, idx, &page, SGP_CACHE); if (error) @@ -952,15 +952,15 @@ enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE; unsigned long size; - size = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size) + size = (inode->i_size + MMUPAGE_SIZE - 1) >> MMUPAGE_SHIFT; + if (pgoff >= size || pgoff + (len >> MMUPAGE_SHIFT) > size) return -EINVAL; while ((long) len > 0) { struct page *page = NULL; int err; /* - * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE + * Will need changing if PAGE_CACHE_SIZE != MMUPAGE_SIZE */ err = shmem_getpage(inode, pgoff, &page, sgp); if (err) @@ -972,8 +972,8 @@ return err; } } - len -= PAGE_SIZE; - addr += PAGE_SIZE; + len -= MMUPAGE_SIZE; + addr += MMUPAGE_SIZE; pgoff++; } return 0; diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/mm/swapfile.c pgcl-bk/mm/swapfile.c --- linux-2.5-bk/mm/swapfile.c Tue Jan 7 19:09:56 2003 +++ pgcl-bk/mm/swapfile.c Mon Jan 6 22:17:48 2003 @@ -1513,3 +1513,40 @@ swap_device_unlock(swapdev); return ret; } + +int swap_count(struct page *page) +{ + struct swap_info_struct * p; + unsigned long offset, type; + swp_entry_t entry; + int retval = 0; + + entry.val = page->index; + if (!entry.val) + goto bad_entry; + type = __swp_type(entry); + if (type >= nr_swapfiles) + goto bad_file; + p = type + swap_info; + offset = __swp_offset(entry); + if (offset >= p->max) + goto bad_offset; + if (!p->swap_map[offset]) + goto bad_unused; + retval = p->swap_map[offset]; +out: + return retval; + +bad_entry: + printk(KERN_ERR "swap_count: null entry!\n"); + goto out; +bad_file: + printk(KERN_ERR "swap_count: %s%08lx\n", Bad_file, entry.val); + goto out; +bad_offset: + printk(KERN_ERR "swap_count: %s%08lx\n", Bad_offset, entry.val); + goto out; +bad_unused: + printk(KERN_ERR "swap_count: %s%08lx\n", Unused_offset, entry.val); + goto out; +} diff -Nur --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-2.5-bk/mm/vmalloc.c pgcl-bk/mm/vmalloc.c --- linux-2.5-bk/mm/vmalloc.c Tue Jan 7 19:09:56 2003 +++ pgcl-bk/mm/vmalloc.c Mon Jan 6 22:17:48 2003 @@ -44,15 +44,14 @@ end = PMD_SIZE; do { - pte_t page; - page = ptep_get_and_clear(pte); + if (pte_present(*pte)) { + int k; + for (k = 0; k < PAGE_MMUCOUNT; ++k) + pte_clear(pte); + } else if (!pte_none(*pte)) + printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n"); + pte += PAGE_MMUCOUNT; address += PAGE_SIZE; - pte++; - if (pte_none(page)) - continue; - if (pte_present(page)) - continue; - printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n"); } while (address < end); } @@ -95,16 +94,20 @@ end = PMD_SIZE; do { + int k; struct page *page = **pages; + unsigned long pfn; if (!pte_none(*pte)) printk(KERN_ERR "alloc_area_pte: page already exists\n"); if (!page) return -ENOMEM; - set_pte(pte, mk_pte(page, prot)); + pfn = page_to_pfn(page); + for (k = 0; k < PAGE_MMUCOUNT; ++k) + set_pte(&pte[k], pfn_pte(pfn + k, prot)); address += PAGE_SIZE; - pte++; + pte += PAGE_MMUCOUNT; (*pages)++; } while (address < end); return 0;