diff -prauN linux-2.5.70-bk17/Makefile highpmd-2.5.70-bk17-2/Makefile --- linux-2.5.70-bk17/Makefile 2003-06-12 06:21:31.000000000 -0700 +++ highpmd-2.5.70-bk17-2/Makefile 2003-06-13 10:33:29.000000000 -0700 @@ -216,7 +216,7 @@ NOSTDINC_FLAGS = -nostdinc -iwithprefix CPPFLAGS := -D__KERNEL__ -Iinclude CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -Wno-trigraphs -O2 \ - -fno-strict-aliasing -fno-common + -fno-strict-aliasing -fno-common -g AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) export VERSION PATCHLEVEL SUBLEVEL EXTRAVERSION KERNELRELEASE ARCH \ diff -prauN linux-2.5.70-bk17/arch/i386/Kconfig highpmd-2.5.70-bk17-2/arch/i386/Kconfig --- linux-2.5.70-bk17/arch/i386/Kconfig 2003-06-12 06:21:32.000000000 -0700 +++ highpmd-2.5.70-bk17-2/arch/i386/Kconfig 2003-06-12 20:33:54.000000000 -0700 @@ -709,6 +709,15 @@ config HIGHPTE low memory. Setting this option will put user-space page table entries in high memory. +config HIGHPMD + bool "Allocate 2nd-level pagetables from highmem" + depends on HIGHMEM64G + help + The VM uses one pmd entry for each pagetable page of physical + memory allocated. For systems with extreme amounts of highmem, + this cannot be tolerated. Setting this option will put + userspace 2nd-level pagetables in highmem. + config MATH_EMULATION bool "Math emulation" ---help--- diff -prauN linux-2.5.70-bk17/arch/i386/kernel/vm86.c highpmd-2.5.70-bk17-2/arch/i386/kernel/vm86.c --- linux-2.5.70-bk17/arch/i386/kernel/vm86.c 2003-05-26 18:00:23.000000000 -0700 +++ highpmd-2.5.70-bk17-2/arch/i386/kernel/vm86.c 2003-06-12 20:16:02.000000000 -0700 @@ -144,12 +144,14 @@ static void mark_screen_rdonly(struct ta pgd_clear(pgd); goto out; } - pmd = pmd_offset(pgd, 0xA0000); - if (pmd_none(*pmd)) + pmd = pmd_offset_map(pgd, 0xA0000); + if (pmd_none(*pmd)) { + pmd_unmap(pmd); goto out; - if (pmd_bad(*pmd)) { + } else if (pmd_bad(*pmd)) { pmd_ERROR(*pmd); pmd_clear(pmd); + pmd_unmap(pmd); goto out; } pte = mapped = pte_offset_map(pmd, 0xA0000); @@ -159,6 +161,7 @@ static void mark_screen_rdonly(struct ta pte++; } pte_unmap(mapped); + pmd_unmap(pmd); out: spin_unlock(&tsk->mm->page_table_lock); preempt_enable(); diff -prauN linux-2.5.70-bk17/arch/i386/mm/fault.c highpmd-2.5.70-bk17-2/arch/i386/mm/fault.c --- linux-2.5.70-bk17/arch/i386/mm/fault.c 2003-05-26 18:00:20.000000000 -0700 +++ highpmd-2.5.70-bk17-2/arch/i386/mm/fault.c 2003-06-12 20:16:23.000000000 -0700 @@ -330,8 +330,8 @@ vmalloc_fault: * and redundant with the set_pmd() on non-PAE. */ - pmd = pmd_offset(pgd, address); - pmd_k = pmd_offset(pgd_k, address); + pmd = pmd_offset_kernel(pgd, address); + pmd_k = pmd_offset_kernel(pgd_k, address); if (!pmd_present(*pmd_k)) goto no_context; set_pmd(pmd, *pmd_k); diff -prauN linux-2.5.70-bk17/arch/i386/mm/hugetlbpage.c highpmd-2.5.70-bk17-2/arch/i386/mm/hugetlbpage.c --- linux-2.5.70-bk17/arch/i386/mm/hugetlbpage.c 2003-05-26 18:00:58.000000000 -0700 +++ highpmd-2.5.70-bk17-2/arch/i386/mm/hugetlbpage.c 2003-06-12 20:23:10.000000000 -0700 @@ -57,8 +57,8 @@ static pte_t *huge_pte_alloc(struct mm_s pmd_t *pmd = NULL; pgd = pgd_offset(mm, addr); - pmd = pmd_alloc(mm, pgd, addr); - return (pte_t *) pmd; + pmd = pmd_alloc_map(mm, pgd, addr); + return (pte_t *)pmd; } static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) @@ -67,8 +67,8 @@ static pte_t *huge_pte_offset(struct mm_ pmd_t *pmd = NULL; pgd = pgd_offset(mm, addr); - pmd = pmd_offset(pgd, addr); - return (pte_t *) pmd; + pmd = pmd_offset_map(pgd, addr); + return (pte_t *)pmd; } static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, pte_t * page_table, int write_access) @@ -115,6 +115,8 @@ int copy_hugetlb_page_range(struct mm_st ptepage = pte_page(entry); get_page(ptepage); set_pte(dst_pte, entry); + pmd_unmap(dst_pte); + pmd_unmap_nested(src_pte); dst->rss += (HPAGE_SIZE / PAGE_SIZE); addr += HPAGE_SIZE; } @@ -152,6 +154,7 @@ follow_hugetlb_page(struct mm_struct *mm get_page(page); pages[i] = page; + pmd_unmap(pte); } if (vmas) @@ -241,6 +244,7 @@ follow_huge_pmd(struct mm_struct *mm, un page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); get_page(page); } + pmd_unmap(pmd); return page; } #endif @@ -284,6 +288,7 @@ void unmap_hugepage_range(struct vm_area page = pte_page(*pte); huge_page_release(page); pte_clear(pte); + pmd_unmap(pte); } mm->rss -= (end - start) >> PAGE_SHIFT; flush_tlb_range(vma, start, end); @@ -328,16 +333,19 @@ int hugetlb_prefault(struct address_spac page = alloc_hugetlb_page(); if (!page) { ret = -ENOMEM; + pmd_unmap(pte); goto out; } ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); unlock_page(page); if (ret) { free_huge_page(page); + pmd_unmap(pte); goto out; } } set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE); + pmd_unmap(pte); } out: spin_unlock(&mm->page_table_lock); diff -prauN linux-2.5.70-bk17/arch/i386/mm/init.c highpmd-2.5.70-bk17-2/arch/i386/mm/init.c --- linux-2.5.70-bk17/arch/i386/mm/init.c 2003-05-26 18:00:45.000000000 -0700 +++ highpmd-2.5.70-bk17-2/arch/i386/mm/init.c 2003-06-12 20:19:55.000000000 -0700 @@ -58,10 +58,10 @@ static pmd_t * __init one_md_table_init( #ifdef CONFIG_X86_PAE pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - if (pmd_table != pmd_offset(pgd, 0)) + if (pmd_table != pmd_offset_kernel(pgd, 0)) BUG(); #else - pmd_table = pmd_offset(pgd, 0); + pmd_table = pmd_offset_kernel(pgd, 0); #endif return pmd_table; @@ -112,7 +112,7 @@ static void __init page_table_range_init if (pgd_none(*pgd)) one_md_table_init(pgd); - pmd = pmd_offset(pgd, vaddr); + pmd = pmd_offset_kernel(pgd, vaddr); for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { if (pmd_none(*pmd)) one_page_table_init(pmd); @@ -193,7 +193,7 @@ pte_t *kmap_pte; pgprot_t kmap_prot; #define kmap_get_fixmap_pte(vaddr) \ - pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) + pte_offset_kernel(pmd_offset_kernel(pgd_offset_k(vaddr), (vaddr)), (vaddr)) void __init kmap_init(void) { @@ -217,7 +217,7 @@ void __init permanent_kmaps_init(pgd_t * page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); pgd = swapper_pg_dir + pgd_index(vaddr); - pmd = pmd_offset(pgd, vaddr); + pmd = pmd_offset_kernel(pgd, vaddr); pte = pte_offset_kernel(pmd, vaddr); pkmap_page_table = pte; } diff -prauN linux-2.5.70-bk17/arch/i386/mm/ioremap.c highpmd-2.5.70-bk17-2/arch/i386/mm/ioremap.c --- linux-2.5.70-bk17/arch/i386/mm/ioremap.c 2003-05-26 18:00:26.000000000 -0700 +++ highpmd-2.5.70-bk17-2/arch/i386/mm/ioremap.c 2003-06-12 20:16:38.000000000 -0700 @@ -82,7 +82,7 @@ static int remap_area_pages(unsigned lon spin_lock(&init_mm.page_table_lock); do { pmd_t *pmd; - pmd = pmd_alloc(&init_mm, dir, address); + pmd = pmd_alloc_kernel(&init_mm, dir, address); error = -ENOMEM; if (!pmd) break; diff -prauN linux-2.5.70-bk17/arch/i386/mm/pageattr.c highpmd-2.5.70-bk17-2/arch/i386/mm/pageattr.c --- linux-2.5.70-bk17/arch/i386/mm/pageattr.c 2003-05-26 18:00:39.000000000 -0700 +++ highpmd-2.5.70-bk17-2/arch/i386/mm/pageattr.c 2003-06-12 20:17:59.000000000 -0700 @@ -19,7 +19,7 @@ static inline pte_t *lookup_address(unsi pmd_t *pmd; if (pgd_none(*pgd)) return NULL; - pmd = pmd_offset(pgd, address); + pmd = pmd_offset_kernel(pgd, address); if (pmd_none(*pmd)) return NULL; if (pmd_large(*pmd)) @@ -65,7 +65,7 @@ static void set_pmd_pte(pte_t *kpte, uns spin_lock(&mmlist_lock); list_for_each(l, &init_mm.mmlist) { struct mm_struct *mm = list_entry(l, struct mm_struct, mmlist); - pmd_t *pmd = pmd_offset(pgd_offset(mm, address), address); + pmd_t *pmd = pmd_offset_kernel(pgd_offset(mm, address), address); set_pte_atomic((pte_t *)pmd, pte); } spin_unlock(&mmlist_lock); @@ -80,7 +80,7 @@ static void set_pmd_pte(pte_t *kpte, uns static inline void revert_page(struct page *kpte_page, unsigned long address) { pte_t *linear = (pte_t *) - pmd_offset(pgd_offset(&init_mm, address), address); + pmd_offset_kernel(pgd_offset_k(address), address); set_pmd_pte(linear, address, pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); diff -prauN linux-2.5.70-bk17/arch/i386/mm/pgtable.c highpmd-2.5.70-bk17-2/arch/i386/mm/pgtable.c --- linux-2.5.70-bk17/arch/i386/mm/pgtable.c 2003-05-26 18:01:03.000000000 -0700 +++ highpmd-2.5.70-bk17-2/arch/i386/mm/pgtable.c 2003-06-13 09:48:08.000000000 -0700 @@ -69,7 +69,7 @@ static void set_pte_pfn(unsigned long va BUG(); return; } - pmd = pmd_offset(pgd, vaddr); + pmd = pmd_offset_kernel(pgd, vaddr); if (pmd_none(*pmd)) { BUG(); return; @@ -109,7 +109,7 @@ void set_pmd_pfn(unsigned long vaddr, un printk ("set_pmd_pfn: pgd_none\n"); return; /* BUG(); */ } - pmd = pmd_offset(pgd, vaddr); + pmd = pmd_offset_kernel(pgd, vaddr); set_pmd(pmd, pfn_pmd(pfn, flags)); /* * It's enough to flush this one mapping. @@ -137,15 +137,17 @@ pte_t *pte_alloc_one_kernel(struct mm_st return pte; } +#ifdef CONFIG_HIGHPTE +#define GFP_PTE (__GFP_REPEAT|__GFP_HIGHMEM|GFP_KERNEL) +#else +#define GFP_PTE (__GFP_REPEAT|GFP_KERNEL) +#endif + struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *pte; -#ifdef CONFIG_HIGHPTE - pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0); -#else - pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); -#endif + pte = alloc_page(GFP_PTE); if (pte) clear_highpage(pte); return pte; @@ -153,6 +155,12 @@ struct page *pte_alloc_one(struct mm_str #ifdef CONFIG_X86_PAE +#ifdef CONFIG_HIGHPMD +#define GFP_PMD (__GFP_REPEAT|__GFP_HIGHMEM|GFP_KERNEL) +#else +#define GFP_PMD (__GFP_REPEAT|GFP_KERNEL) +#endif + pgd_t *pgd_alloc(struct mm_struct *mm) { int i; @@ -160,11 +168,11 @@ pgd_t *pgd_alloc(struct mm_struct *mm) if (pgd) { for (i = 0; i < USER_PTRS_PER_PGD; i++) { - unsigned long pmd = __get_free_page(GFP_KERNEL); + struct page *pmd = alloc_page(GFP_PMD); if (!pmd) goto out_oom; - clear_page(pmd); - set_pgd(pgd + i, __pgd(1 + __pa(pmd))); + clear_highpage(pmd); + set_pgd(&pgd[i], __pgd(1ULL | (u64)page_to_pfn(pmd) << PAGE_SHIFT)); } memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, @@ -173,7 +181,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) return pgd; out_oom: for (i--; i >= 0; i--) - free_page((unsigned long)__va(pgd_val(pgd[i])-1)); + __free_page(pgd_page(pgd[i])); kmem_cache_free(pae_pgd_cachep, pgd); return NULL; } @@ -183,7 +191,7 @@ void pgd_free(pgd_t *pgd) int i; for (i = 0; i < USER_PTRS_PER_PGD; i++) - free_page((unsigned long)__va(pgd_val(pgd[i])-1)); + __free_page(pgd_page(pgd[i])); kmem_cache_free(pae_pgd_cachep, pgd); } diff -prauN linux-2.5.70-bk17/arch/i386/pci/numa.c highpmd-2.5.70-bk17-2/arch/i386/pci/numa.c --- linux-2.5.70-bk17/arch/i386/pci/numa.c 2003-05-26 18:00:25.000000000 -0700 +++ highpmd-2.5.70-bk17-2/arch/i386/pci/numa.c 2003-06-12 21:15:18.000000000 -0700 @@ -127,7 +127,7 @@ static int __init pci_numa_init(void) return 0; pci_root_bus = pcibios_scan_root(0); - if (numnodes > 1) { + if (0 && numnodes > 1) { for (quad = 1; quad < numnodes; ++quad) { printk("Scanning PCI bus %d for quad %d\n", QUADLOCAL2BUS(quad,0), quad); diff -prauN linux-2.5.70-bk17/arch/sparc/mm/srmmu.c highpmd-2.5.70-bk17-2/arch/sparc/mm/srmmu.c --- linux-2.5.70-bk17/arch/sparc/mm/srmmu.c 2003-05-26 18:00:41.000000000 -0700 +++ highpmd-2.5.70-bk17-2/arch/sparc/mm/srmmu.c 2003-06-12 20:07:37.000000000 -0700 @@ -177,8 +177,10 @@ static struct page *srmmu_pmd_page(pmd_t return pfn_to_page((pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4)); } -static inline unsigned long srmmu_pgd_page(pgd_t pgd) -{ return srmmu_device_memory(pgd_val(pgd))?~0:(unsigned long)__nocache_va((pgd_val(pgd) & SRMMU_PTD_PMASK) << 4); } +static inline struct page *srmmu_pgd_page(pgd_t pgd) +{ + return virt_to_page(srmmu_device_memory(pgd_val(pgd))?~0:(unsigned long)__nocache_va((pgd_val(pgd) & SRMMU_PTD_PMASK) << 4)); +} static inline int srmmu_pte_none(pte_t pte) @@ -289,7 +291,7 @@ extern inline pgd_t *srmmu_pgd_offset(st /* Find an entry in the second-level page table.. */ static inline pmd_t *srmmu_pmd_offset(pgd_t * dir, unsigned long address) { - return (pmd_t *) srmmu_pgd_page(*dir) + + return (pmd_t *)page_address(srmmu_pgd_page(*dir)) + ((address >> SRMMU_PMD_SHIFT_SOFT) & (SRMMU_PTRS_PER_PMD_SOFT - 1)); } diff -prauN linux-2.5.70-bk17/arch/sparc/mm/sun4c.c highpmd-2.5.70-bk17-2/arch/sparc/mm/sun4c.c --- linux-2.5.70-bk17/arch/sparc/mm/sun4c.c 2003-05-26 18:00:45.000000000 -0700 +++ highpmd-2.5.70-bk17-2/arch/sparc/mm/sun4c.c 2003-06-12 20:08:24.000000000 -0700 @@ -1839,6 +1839,10 @@ static struct page *sun4c_pmd_page(pmd_t return virt_to_page(sun4c_pmd_page_v(pmd)); } +/* + * I broke this + * -- wli + */ static unsigned long sun4c_pgd_page(pgd_t pgd) { return 0; } /* to find an entry in a page-table-directory */ diff -prauN linux-2.5.70-bk17/fs/exec.c highpmd-2.5.70-bk17-2/fs/exec.c --- linux-2.5.70-bk17/fs/exec.c 2003-06-12 06:21:52.000000000 -0700 +++ highpmd-2.5.70-bk17-2/fs/exec.c 2003-06-12 23:17:37.000000000 -0700 @@ -304,10 +304,10 @@ void put_dirty_page(struct task_struct * if (!pte_chain) goto out_sig; spin_lock(&tsk->mm->page_table_lock); - pmd = pmd_alloc(tsk->mm, pgd, address); + pmd = pmd_alloc_map(tsk->mm, pgd, address); if (!pmd) goto out; - pte = pte_alloc_map(tsk->mm, pmd, address); + pte = pte_alloc_map(tsk->mm, &pmd, address); if (!pte) goto out; if (!pte_none(*pte)) { @@ -319,6 +319,7 @@ void put_dirty_page(struct task_struct * set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot)))); pte_chain = page_add_rmap(page, pte, pte_chain); pte_unmap(pte); + pmd_unmap(pmd); tsk->mm->rss++; spin_unlock(&tsk->mm->page_table_lock); @@ -326,6 +327,8 @@ void put_dirty_page(struct task_struct * pte_chain_free(pte_chain); return; out: + if (pmd) + pmd_unmap(pmd); spin_unlock(&tsk->mm->page_table_lock); out_sig: __free_page(page); diff -prauN linux-2.5.70-bk17/include/asm-alpha/pgtable.h highpmd-2.5.70-bk17-2/include/asm-alpha/pgtable.h --- linux-2.5.70-bk17/include/asm-alpha/pgtable.h 2003-05-26 18:00:27.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-alpha/pgtable.h 2003-06-12 19:57:27.000000000 -0700 @@ -230,7 +230,7 @@ pmd_page_kernel(pmd_t pmd) #endif extern inline unsigned long pgd_page(pgd_t pgd) -{ return PAGE_OFFSET + ((pgd_val(pgd) & _PFN_MASK) >> (32-PAGE_SHIFT)); } +{ return pfn_to_page((pgd_val(pgd) & _PFN_MASK) >> 32); } extern inline int pte_none(pte_t pte) { return !pte_val(pte); } extern inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_VALID; } @@ -280,7 +280,7 @@ extern inline pte_t pte_mkyoung(pte_t pt /* Find an entry in the second-level page table.. */ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) { - return (pmd_t *) pgd_page(*dir) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); + return (pmd_t *)page_address(pgd_page(*dir)) + ((address >> PMD_SHIFT) & (PTRS_PER_PAGE - 1)); } /* Find an entry in the third-level page table.. */ diff -prauN linux-2.5.70-bk17/include/asm-i386/kmap_types.h highpmd-2.5.70-bk17-2/include/asm-i386/kmap_types.h --- linux-2.5.70-bk17/include/asm-i386/kmap_types.h 2003-05-26 18:00:44.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-i386/kmap_types.h 2003-06-12 18:33:06.000000000 -0700 @@ -17,14 +17,16 @@ D(3) KM_USER0, D(4) KM_USER1, D(5) KM_BIO_SRC_IRQ, D(6) KM_BIO_DST_IRQ, -D(7) KM_PTE0, -D(8) KM_PTE1, -D(9) KM_PTE2, -D(10) KM_IRQ0, -D(11) KM_IRQ1, -D(12) KM_SOFTIRQ0, -D(13) KM_SOFTIRQ1, -D(14) KM_TYPE_NR +D(7) KM_PMD0, +D(8) KM_PMD1, +D(9) KM_PTE0, +D(10) KM_PTE1, +D(11) KM_PTE2, +D(12) KM_IRQ0, +D(13) KM_IRQ1, +D(14) KM_SOFTIRQ0, +D(15) KM_SOFTIRQ1, +D(16) KM_TYPE_NR }; #undef D diff -prauN linux-2.5.70-bk17/include/asm-i386/mach-numaq/mach_apic.h highpmd-2.5.70-bk17-2/include/asm-i386/mach-numaq/mach_apic.h --- linux-2.5.70-bk17/include/asm-i386/mach-numaq/mach_apic.h 2003-06-12 06:21:53.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-i386/mach-numaq/mach_apic.h 2003-06-12 21:15:08.000000000 -0700 @@ -31,6 +31,7 @@ static inline void init_apic_ldr(void) static inline void clustered_apic_check(void) { + nr_ioapics = min(2, nr_ioapics); printk("Enabling APIC mode: %s. Using %d I/O APICs\n", "NUMA-Q", nr_ioapics); } diff -prauN linux-2.5.70-bk17/include/asm-i386/pgalloc.h highpmd-2.5.70-bk17-2/include/asm-i386/pgalloc.h --- linux-2.5.70-bk17/include/asm-i386/pgalloc.h 2003-05-26 18:00:21.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-i386/pgalloc.h 2003-06-12 21:04:46.000000000 -0700 @@ -46,6 +46,7 @@ static inline void pte_free(struct page */ #define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) +#define pmd_alloc_one_kernel(mm, addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(x) do { } while (0) #define __pmd_free_tlb(tlb,x) do { } while (0) #define pgd_populate(mm, pmd, pte) BUG() diff -prauN linux-2.5.70-bk17/include/asm-i386/pgtable-2level.h highpmd-2.5.70-bk17-2/include/asm-i386/pgtable-2level.h --- linux-2.5.70-bk17/include/asm-i386/pgtable-2level.h 2003-05-26 18:00:40.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-i386/pgtable-2level.h 2003-06-12 20:15:01.000000000 -0700 @@ -48,13 +48,18 @@ static inline int pgd_present(pgd_t pgd) #define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) #define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) -#define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) -static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) +static inline pmd_t *pmd_offset_map(pgd_t *pgd, unsigned long address) { - return (pmd_t *) dir; + return (pmd_t *)pgd; } + +static inline pmd_t *pmd_offset_map_nested(pgd_t *pgd, unsigned long address) +{ + return (pmd_t *)pgd; +} + #define ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte_low, 0)) #define pte_same(a, b) ((a).pte_low == (b).pte_low) #define pte_page(x) pfn_to_page(pte_pfn(x)) diff -prauN linux-2.5.70-bk17/include/asm-i386/pgtable-3level.h highpmd-2.5.70-bk17-2/include/asm-i386/pgtable-3level.h --- linux-2.5.70-bk17/include/asm-i386/pgtable-3level.h 2003-05-26 18:00:46.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-i386/pgtable-3level.h 2003-06-12 20:49:08.000000000 -0700 @@ -64,12 +64,25 @@ static inline void set_pte(pte_t *ptep, */ static inline void pgd_clear (pgd_t * pgd) { } -#define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) + +static inline unsigned long pgd_pfn(pgd_t pgd) +{ + return pgd_val(pgd) >> PAGE_SHIFT; +} + +#define pmd_offset_kernel(pgd, addr) \ + ((pmd_t *)__va(pgd_val(*(pgd)) & PAGE_MASK) + pmd_index(addr)) /* Find an entry in the second-level page table.. */ -#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \ - pmd_index(address)) +#define __pmd_offset(pgd, addr, type) \ + ((pmd_t *)kmap_atomic(pgd_page(*(pgd)), type) + pmd_index(addr)) + +#define pmd_offset_map(pgd, addr) __pmd_offset(pgd, addr, KM_PMD0) +#define pmd_offset_map_nested(pgd, addr) __pmd_offset(pgd, addr, KM_PMD1) + +#define pmd_unmap(pmd) kunmap_atomic(pmd, KM_PMD0); +#define pmd_unmap_nested(pmd) kunmap_atomic(pmd, KM_PMD1); static inline pte_t ptep_get_and_clear(pte_t *ptep) { diff -prauN linux-2.5.70-bk17/include/asm-i386/pgtable.h highpmd-2.5.70-bk17-2/include/asm-i386/pgtable.h --- linux-2.5.70-bk17/include/asm-i386/pgtable.h 2003-06-12 06:21:53.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-i386/pgtable.h 2003-06-12 20:41:40.000000000 -0700 @@ -294,32 +294,25 @@ static inline pte_t pte_modify(pte_t pte #define pte_offset_kernel(dir, address) \ ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) -#if defined(CONFIG_HIGHPTE) -#define pte_offset_map(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) -#define pte_offset_map_nested(dir, address) \ - ((pte_t *)kmap_atomic(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) -#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) -#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) -#else -#define pte_offset_map(dir, address) \ - ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address)) -#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address) -#define pte_unmap(pte) do { } while (0) -#define pte_unmap_nested(pte) do { } while (0) -#endif +#define __pte_offset(pmd, addr, type) \ + ((pte_t *)kmap_atomic(pmd_page(*pmd), type) + pte_index(addr)) -#if defined(CONFIG_HIGHPTE) && defined(CONFIG_HIGHMEM4G) -typedef u32 pte_addr_t; -#endif +#define pte_offset_map(pmd, addr) __pte_offset(pmd, addr, KM_PTE0) +#define pte_offset_map_nested(pmd, addr) __pte_offset(pmd, addr, KM_PTE1) +#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) +#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) + +#ifdef CONFIG_HIGHPTE -#if defined(CONFIG_HIGHPTE) && defined(CONFIG_HIGHMEM64G) +#ifdef CONFIG_HIGHMEM64G typedef u64 pte_addr_t; -#endif +#else /* CONFIG_HIGHMEM4G */ +typedef u32 pte_addr_t; +#endif /* CONFIG_HIGHMEM4G */ -#if !defined(CONFIG_HIGHPTE) +#else /* !CONFIG_HIGHPTE */ typedef pte_t *pte_addr_t; -#endif +#endif /* !CONFIG_HIGHPTE */ /* * The i386 doesn't have any external MMU info: the kernel page diff -prauN linux-2.5.70-bk17/include/asm-ia64/pgtable.h highpmd-2.5.70-bk17-2/include/asm-ia64/pgtable.h --- linux-2.5.70-bk17/include/asm-ia64/pgtable.h 2003-06-12 06:21:53.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-ia64/pgtable.h 2003-06-12 19:59:36.000000000 -0700 @@ -250,7 +250,7 @@ ia64_phys_addr_valid (unsigned long addr #define pgd_bad(pgd) (!ia64_phys_addr_valid(pgd_val(pgd))) #define pgd_present(pgd) (pgd_val(pgd) != 0UL) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL) -#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & _PFN_MASK)) +#define pgd_page(pgd) pfn_to_page((pgd_val(pgd) & _PFN_MASK) >> PAGE_SHIFT) /* * The following have defined behavior only work if pte_present() is true. @@ -319,7 +319,7 @@ pgd_offset (struct mm_struct *mm, unsign /* Find an entry in the second-level page table.. */ #define pmd_offset(dir,addr) \ - ((pmd_t *) pgd_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + ((pmd_t *)page_address(pgd_page(*(dir))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) /* * Find an entry in the third-level page table. This looks more complicated than it diff -prauN linux-2.5.70-bk17/include/asm-m68k/motorola_pgtable.h highpmd-2.5.70-bk17-2/include/asm-m68k/motorola_pgtable.h --- linux-2.5.70-bk17/include/asm-m68k/motorola_pgtable.h 2003-05-26 18:00:41.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-m68k/motorola_pgtable.h 2003-06-12 20:00:29.000000000 -0700 @@ -115,6 +115,7 @@ extern inline void pgd_set(pgd_t * pgdp, #define __pte_page(pte) ((unsigned long)__va(pte_val(pte) & PAGE_MASK)) #define __pmd_page(pmd) ((unsigned long)__va(pmd_val(pmd) & _TABLE_MASK)) #define __pgd_page(pgd) ((unsigned long)__va(pgd_val(pgd) & _TABLE_MASK)) +#define pgd_page(pgd) pfn_to_page((pgd_val(pgd) & _TABLE_MASK) >> PAGE_SHIFT) #define pte_none(pte) (!pte_val(pte)) diff -prauN linux-2.5.70-bk17/include/asm-mips64/pgtable.h highpmd-2.5.70-bk17-2/include/asm-mips64/pgtable.h --- linux-2.5.70-bk17/include/asm-mips64/pgtable.h 2003-05-26 18:01:00.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-mips64/pgtable.h 2003-06-12 20:00:55.000000000 -0700 @@ -276,7 +276,7 @@ extern inline unsigned long pmd_page(pmd extern inline unsigned long pgd_page(pgd_t pgd) { - return pgd_val(pgd); + return virt_to_page(pgd_val(pgd)); } extern inline void pmd_set(pmd_t * pmdp, pte_t * ptep) @@ -520,7 +520,7 @@ extern inline pgd_t *pgd_offset(struct m /* Find an entry in the second-level page table.. */ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) { - return (pmd_t *) pgd_page(*dir) + + return (pmd_t *)page_address(pgd_page(*dir)) + ((address >> PMD_SHIFT) & (PTRS_PER_PMD - 1)); } diff -prauN linux-2.5.70-bk17/include/asm-parisc/pgtable.h highpmd-2.5.70-bk17-2/include/asm-parisc/pgtable.h --- linux-2.5.70-bk17/include/asm-parisc/pgtable.h 2003-05-26 18:00:56.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-parisc/pgtable.h 2003-06-12 20:01:26.000000000 -0700 @@ -228,7 +228,7 @@ extern unsigned long *empty_zero_page; #ifdef __LP64__ -#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) /* For 64 bit we have three level tables */ @@ -323,7 +323,7 @@ extern inline pte_t pte_modify(pte_t pte #ifdef __LP64__ #define pmd_offset(dir,address) \ -((pmd_t *) pgd_page(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) +((pmd_t *)page_address(pgd_page(*(dir))) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) #else #define pmd_offset(dir,addr) ((pmd_t *) dir) #endif diff -prauN linux-2.5.70-bk17/include/asm-ppc/pgtable.h highpmd-2.5.70-bk17-2/include/asm-ppc/pgtable.h --- linux-2.5.70-bk17/include/asm-ppc/pgtable.h 2003-06-12 06:21:53.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-ppc/pgtable.h 2003-06-12 20:01:50.000000000 -0700 @@ -370,8 +370,7 @@ static inline int pgd_bad(pgd_t pgd) { static inline int pgd_present(pgd_t pgd) { return 1; } #define pgd_clear(xp) do { } while (0) -#define pgd_page(pgd) \ - ((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) /* * The following only work if pte_present() is true. diff -prauN linux-2.5.70-bk17/include/asm-ppc64/pgtable.h highpmd-2.5.70-bk17-2/include/asm-ppc64/pgtable.h --- linux-2.5.70-bk17/include/asm-ppc64/pgtable.h 2003-06-12 06:21:53.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-ppc64/pgtable.h 2003-06-12 20:03:25.000000000 -0700 @@ -190,7 +190,7 @@ extern unsigned long empty_zero_page[PAG #define pgd_bad(pgd) ((pgd_val(pgd)) == 0) #define pgd_present(pgd) (pgd_val(pgd) != 0UL) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL) -#define pgd_page(pgd) (__bpn_to_ba(pgd_val(pgd))) +#define pgd_page(pgd) virt_to_page(__bpn_to_ba(pgd_val(pgd))) /* * Find an entry in a page-table-directory. We combine the address region @@ -203,7 +203,7 @@ extern unsigned long empty_zero_page[PAG /* Find an entry in the second-level page table.. */ #define pmd_offset(dir,addr) \ - ((pmd_t *) pgd_page(*(dir)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + ((pmd_t *)page_address(pgd_page(*(dir))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) /* Find an entry in the third-level page table.. */ #define pte_offset_kernel(dir,addr) \ diff -prauN linux-2.5.70-bk17/include/asm-s390/pgtable.h highpmd-2.5.70-bk17-2/include/asm-s390/pgtable.h --- linux-2.5.70-bk17/include/asm-s390/pgtable.h 2003-06-12 06:21:53.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-s390/pgtable.h 2003-06-12 20:04:04.000000000 -0700 @@ -608,6 +608,8 @@ static inline pte_t mk_pte_phys(unsigned #define pmd_page(pmd) (mem_map+(pmd_val(pmd) >> PAGE_SHIFT)) +#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) + #define pgd_page_kernel(pgd) (pgd_val(pgd) & PAGE_MASK) /* to find an entry in a page-table-directory */ diff -prauN linux-2.5.70-bk17/include/asm-sh/pgtable-2level.h highpmd-2.5.70-bk17-2/include/asm-sh/pgtable-2level.h --- linux-2.5.70-bk17/include/asm-sh/pgtable-2level.h 2003-05-26 18:00:59.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-sh/pgtable-2level.h 2003-06-12 20:04:22.000000000 -0700 @@ -48,8 +48,7 @@ static inline void pgd_clear (pgd_t * pg #define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) #define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) -#define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) +#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT) static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) { diff -prauN linux-2.5.70-bk17/include/asm-sparc64/pgtable.h highpmd-2.5.70-bk17-2/include/asm-sparc64/pgtable.h --- linux-2.5.70-bk17/include/asm-sparc64/pgtable.h 2003-05-26 18:00:20.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-sparc64/pgtable.h 2003-06-12 20:05:08.000000000 -0700 @@ -228,7 +228,7 @@ static inline pte_t pte_modify(pte_t ori (pgd_val(*(pgdp)) = (__pa((unsigned long) (pmdp)) >> 11UL)) #define __pmd_page(pmd) ((unsigned long) __va((pmd_val(pmd)<<11UL))) #define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) -#define pgd_page(pgd) ((unsigned long) __va((pgd_val(pgd)<<11UL))) +#define pgd_page(pgd) virt_to_page(__va((pgd_val(pgd)<<11UL))) #define pte_none(pte) (!pte_val(pte)) #define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT) #define pte_clear(pte) (pte_val(*(pte)) = 0UL) @@ -270,7 +270,7 @@ static inline pte_t pte_modify(pte_t ori #define pgd_offset_k(address) pgd_offset(&init_mm, address) /* Find an entry in the second-level page table.. */ -#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \ +#define pmd_offset(dir, address) ((pmd_t *)page_address(pgd_page(*(dir))) + \ ((address >> PMD_SHIFT) & (REAL_PTRS_PER_PMD-1))) /* Find an entry in the third-level page table.. */ diff -prauN linux-2.5.70-bk17/include/asm-x86_64/pgtable.h highpmd-2.5.70-bk17-2/include/asm-x86_64/pgtable.h --- linux-2.5.70-bk17/include/asm-x86_64/pgtable.h 2003-06-12 06:21:53.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/asm-x86_64/pgtable.h 2003-06-12 20:05:51.000000000 -0700 @@ -98,8 +98,7 @@ static inline void set_pml4(pml4_t *dst, pml4_val(*dst) = pml4_val(val); } -#define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PHYSICAL_PAGE_MASK)) +#define pgd_page(pgd) pfn_to_page((pgd_val(pgd) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) #define ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte, 0)) #define pte_same(a, b) ((a).pte == (b).pte) @@ -332,7 +331,7 @@ static inline pgd_t *current_pgd_offset_ #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \ +#define pmd_offset(dir, address) ((pmd_t *)page_address(pgd_page(*(dir))) + \ pmd_index(address)) #define pmd_none(x) (!pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) diff -prauN linux-2.5.70-bk17/include/linux/mm.h highpmd-2.5.70-bk17-2/include/linux/mm.h --- linux-2.5.70-bk17/include/linux/mm.h 2003-05-26 18:00:21.000000000 -0700 +++ highpmd-2.5.70-bk17-2/include/linux/mm.h 2003-06-12 21:40:29.000000000 -0700 @@ -421,15 +421,16 @@ int copy_page_range(struct mm_struct *ds int zeromap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long size, pgprot_t prot); -extern int vmtruncate(struct inode * inode, loff_t offset); -extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); -extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); -extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); -extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); -extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); -extern int make_pages_present(unsigned long addr, unsigned long end); -extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); -extern long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long nonblock); +int vmtruncate(struct inode * inode, loff_t offset); +pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); +pmd_t *FASTCALL(__pmd_alloc_kernel(struct mm_struct *mm, pgd_t *pmd, unsigned long address)); +pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); +pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t **pmd, unsigned long address)); +int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); +int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); +int make_pages_present(unsigned long addr, unsigned long end); +int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); +long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long nonblock); void put_dirty_page(struct task_struct *tsk, struct page *page, unsigned long address, pgprot_t prot); @@ -485,12 +486,11 @@ static inline int set_page_dirty(struct * inlining and the symmetry break with pte_alloc_map() that does all * of this out-of-line. */ -static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) -{ - if (pgd_none(*pgd)) - return __pmd_alloc(mm, pgd, address); - return pmd_offset(pgd, address); -} +#define pmd_alloc_map(mm, pgd, addr) \ + (pgd_none(*(pgd))? __pmd_alloc(mm,pgd,addr): pmd_offset_map(pgd,addr)) + +#define pmd_alloc_kernel(mm, pgd, addr) \ + (pgd_none(*(pgd))? __pmd_alloc_kernel(mm,pgd,addr): pmd_offset_kernel(pgd,addr)) extern void free_area_init(unsigned long * zones_size); extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap, diff -prauN linux-2.5.70-bk17/mm/fremap.c highpmd-2.5.70-bk17-2/mm/fremap.c --- linux-2.5.70-bk17/mm/fremap.c 2003-05-26 18:00:20.000000000 -0700 +++ highpmd-2.5.70-bk17-2/mm/fremap.c 2003-06-13 10:37:25.000000000 -0700 @@ -67,11 +67,11 @@ int install_page(struct mm_struct *mm, s pgd = pgd_offset(mm, addr); spin_lock(&mm->page_table_lock); - pmd = pmd_alloc(mm, pgd, addr); + pmd = pmd_alloc_map(mm, pgd, addr); if (!pmd) goto err_unlock; - pte = pte_alloc_map(mm, pmd, addr); + pte = pte_alloc_map(mm, &pmd, addr); if (!pte) goto err_unlock; @@ -82,6 +82,7 @@ int install_page(struct mm_struct *mm, s set_pte(pte, mk_pte(page, prot)); pte_chain = page_add_rmap(page, pte, pte_chain); pte_unmap(pte); + pmd_unmap(pmd); if (flush) flush_tlb_page(vma, addr); update_mmu_cache(vma, addr, *pte); diff -prauN linux-2.5.70-bk17/mm/memory.c highpmd-2.5.70-bk17-2/mm/memory.c --- linux-2.5.70-bk17/mm/memory.c 2003-05-26 18:00:39.000000000 -0700 +++ highpmd-2.5.70-bk17-2/mm/memory.c 2003-06-13 10:37:00.000000000 -0700 @@ -103,7 +103,8 @@ static inline void free_one_pmd(struct m static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir) { int j; - pmd_t * pmd; + pmd_t *pmd; + struct page *page; if (pgd_none(*dir)) return; @@ -112,11 +113,13 @@ static inline void free_one_pgd(struct m pgd_clear(dir); return; } - pmd = pmd_offset(dir, 0); + page = pgd_page(*dir); + pmd = pmd_offset_map(dir, 0); pgd_clear(dir); for (j = 0; j < PTRS_PER_PMD ; j++) free_one_pmd(tlb, pmd+j); - pmd_free_tlb(tlb, pmd); + pmd_unmap(pmd); + pmd_free_tlb(tlb, page); } /* @@ -136,30 +139,40 @@ void clear_page_tables(struct mmu_gather } while (--nr); } -pte_t * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address) +/* + * error return happens with pmd unmapped + */ +pte_t *pte_alloc_map(struct mm_struct *mm, pmd_t **pmd, unsigned long address) { - if (!pmd_present(*pmd)) { + if (!pmd_present(**pmd)) { + pgd_t *pgd; struct page *new; + pmd_unmap(*pmd); spin_unlock(&mm->page_table_lock); new = pte_alloc_one(mm, address); spin_lock(&mm->page_table_lock); - if (!new) + if (!new) { + *pmd = NULL; return NULL; + } + + pgd = pgd_offset(mm, address); + *pmd = pmd_offset_map(pgd, address); /* * Because we dropped the lock, we should re-check the * entry, as somebody else could have populated it.. */ - if (pmd_present(*pmd)) { + if (pmd_present(**pmd)) { pte_free(new); goto out; } pgtable_add_rmap(new, mm, address); - pmd_populate(mm, pmd, new); + pmd_populate(mm, *pmd, new); } out: - return pte_offset_map(pmd, address); + return pte_offset_map(*pmd, address); } pte_t * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address) @@ -244,10 +257,10 @@ skip_copy_pmd_range: address = (address continue; } - src_pmd = pmd_offset(src_pgd, address); - dst_pmd = pmd_alloc(dst, dst_pgd, address); + dst_pmd = pmd_alloc_map(dst, dst_pgd, address); if (!dst_pmd) goto nomem; + src_pmd = pmd_offset_map_nested(src_pgd, address); do { pte_t * src_pte, * dst_pte; @@ -261,15 +274,20 @@ skip_copy_pmd_range: address = (address pmd_clear(src_pmd); skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; - if (address >= end) + if (address >= end) { + pmd_unmap(dst_pmd); + pmd_unmap_nested(src_pmd); goto out; + } goto cont_copy_pmd_range; } - dst_pte = pte_alloc_map(dst, dst_pmd, address); + pmd_unmap_nested(src_pmd); + dst_pte = pte_alloc_map(dst, &dst_pmd, address); if (!dst_pte) goto nomem; spin_lock(&src->page_table_lock); + src_pmd = pmd_offset_map_nested(src_pgd, address); src_pte = pte_offset_map_nested(src_pmd, address); do { pte_t pte = *src_pte; @@ -336,6 +354,8 @@ skip_copy_pte_range: */ pte_unmap_nested(src_pte); pte_unmap(dst_pte); + pmd_unmap_nested(src_pmd); + pmd_unmap(dst_pmd); spin_unlock(&src->page_table_lock); spin_unlock(&dst->page_table_lock); pte_chain = pte_chain_alloc(GFP_KERNEL); @@ -343,12 +363,16 @@ skip_copy_pte_range: if (!pte_chain) goto nomem; spin_lock(&src->page_table_lock); + dst_pmd = pmd_offset_map(dst_pgd, address); + src_pmd = pmd_offset_map_nested(src_pgd, address); dst_pte = pte_offset_map(dst_pmd, address); src_pte = pte_offset_map_nested(src_pmd, address); cont_copy_pte_range_noset: address += PAGE_SIZE; if (address >= end) { + pmd_unmap(dst_pmd); + pmd_unmap_nested(src_pmd); pte_unmap_nested(src_pte); pte_unmap(dst_pte); goto out_unlock; @@ -364,6 +388,8 @@ cont_copy_pmd_range: src_pmd++; dst_pmd++; } while ((unsigned long)src_pmd & PMD_TABLE_MASK); + pmd_unmap_nested(src_pmd-1); + pmd_unmap(dst_pmd-1); } out_unlock: spin_unlock(&src->page_table_lock); @@ -439,7 +465,7 @@ zap_pmd_range(struct mmu_gather *tlb, pg pgd_clear(dir); return; } - pmd = pmd_offset(dir, address); + pmd = pmd_offset_map(dir, address); end = address + size; if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) end = ((address + PGDIR_SIZE) & PGDIR_MASK); @@ -448,6 +474,7 @@ zap_pmd_range(struct mmu_gather *tlb, pg address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); + pmd_unmap(pmd - 1); } void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, @@ -629,20 +656,24 @@ follow_page(struct mm_struct *mm, unsign if (pgd_none(*pgd) || pgd_bad(*pgd)) goto out; - pmd = pmd_offset(pgd, address); + pmd = pmd_offset_map(pgd, address); if (pmd_none(*pmd)) - goto out; - if (pmd_huge(*pmd)) - return follow_huge_pmd(mm, address, pmd, write); + goto out_unmap; + if (pmd_huge(*pmd)) { + struct page *page = follow_huge_pmd(mm, address, pmd, write); + pmd_unmap(pmd); + return page; + } if (pmd_bad(*pmd)) - goto out; + goto out_unmap; ptep = pte_offset_map(pmd, address); if (!ptep) - goto out; + goto out_unmap; pte = *ptep; pte_unmap(ptep); + pmd_unmap(pmd); if (pte_present(pte)) { if (!write || (pte_write(pte) && pte_dirty(pte))) { pfn = pte_pfn(pte); @@ -653,6 +684,9 @@ follow_page(struct mm_struct *mm, unsign out: return NULL; +out_unmap: + pmd_unmap(pmd); + goto out; } /* @@ -708,7 +742,7 @@ int get_user_pages(struct task_struct *t pgd = pgd_offset_k(pg); if (!pgd) return i ? : -EFAULT; - pmd = pmd_offset(pgd, pg); + pmd = pmd_offset_kernel(pgd, pg); if (!pmd) return i ? : -EFAULT; pte = pte_offset_kernel(pmd, pg); @@ -811,7 +845,7 @@ static inline int zeromap_pmd_range(stru if (end > PGDIR_SIZE) end = PGDIR_SIZE; do { - pte_t * pte = pte_alloc_map(mm, pmd, address); + pte_t *pte = pte_alloc_map(mm, &pmd, address); if (!pte) return -ENOMEM; zeromap_pte_range(pte, address, end - address, prot); @@ -837,13 +871,14 @@ int zeromap_page_range(struct vm_area_st spin_lock(&mm->page_table_lock); do { - pmd_t *pmd = pmd_alloc(mm, dir, address); + pmd_t *pmd = pmd_alloc_map(mm, dir, address); error = -ENOMEM; if (!pmd) break; error = zeromap_pmd_range(mm, pmd, address, end - address, prot); if (error) break; + pmd_unmap(pmd - 1); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); @@ -890,7 +925,7 @@ static inline int remap_pmd_range(struct end = PGDIR_SIZE; phys_addr -= address; do { - pte_t * pte = pte_alloc_map(mm, pmd, base + address); + pte_t *pte = pte_alloc_map(mm, &pmd, base + address); if (!pte) return -ENOMEM; remap_pte_range(pte, base + address, end - address, address + phys_addr, prot); @@ -918,13 +953,14 @@ int remap_page_range(struct vm_area_stru spin_lock(&mm->page_table_lock); do { - pmd_t *pmd = pmd_alloc(mm, dir, from); + pmd_t *pmd = pmd_alloc_map(mm, dir, from); error = -ENOMEM; if (!pmd) break; error = remap_pmd_range(mm, pmd, from, end - from, phys_addr + from, prot); if (error) break; + pmd_unmap(pmd); from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (from && (from < end)); @@ -994,6 +1030,7 @@ static int do_wp_page(struct mm_struct * * data, but for the moment just pretend this is OOM. */ pte_unmap(page_table); + pmd_unmap(pmd); printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n", address); goto oom; @@ -1008,11 +1045,13 @@ static int do_wp_page(struct mm_struct * establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); pte_unmap(page_table); + pmd_unmap(pmd); ret = VM_FAULT_MINOR; goto out; } } pte_unmap(page_table); + pmd_unmap(pmd); /* * Ok, we need to copy. Oh, well.. @@ -1032,6 +1071,7 @@ static int do_wp_page(struct mm_struct * * Re-check the pte - we dropped the lock */ spin_lock(&mm->page_table_lock); + pmd = pmd_offset_map(pgd_offset(mm, address), address); page_table = pte_offset_map(pmd, address); if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) @@ -1045,6 +1085,7 @@ static int do_wp_page(struct mm_struct * new_page = old_page; } pte_unmap(page_table); + pmd_unmap(pmd); page_cache_release(new_page); page_cache_release(old_page); ret = VM_FAULT_MINOR; @@ -1178,6 +1219,7 @@ static int do_swap_page(struct mm_struct struct pte_chain *pte_chain = NULL; pte_unmap(page_table); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); page = lookup_swap_cache(entry); if (!page) { @@ -1189,12 +1231,14 @@ static int do_swap_page(struct mm_struct * we released the page table lock. */ spin_lock(&mm->page_table_lock); + pmd = pmd_offset_map(pgd_offset(mm, address), address); page_table = pte_offset_map(pmd, address); if (pte_same(*page_table, orig_pte)) ret = VM_FAULT_OOM; else ret = VM_FAULT_MINOR; pte_unmap(page_table); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); goto out; } @@ -1217,9 +1261,11 @@ static int do_swap_page(struct mm_struct * released the page table lock. */ spin_lock(&mm->page_table_lock); + pmd = pmd_offset_map(pgd_offset(mm, address), address); page_table = pte_offset_map(pmd, address); if (!pte_same(*page_table, orig_pte)) { pte_unmap(page_table); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); unlock_page(page); page_cache_release(page); @@ -1245,6 +1291,7 @@ static int do_swap_page(struct mm_struct /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); + pmd_unmap(pmd); pte_unmap(page_table); spin_unlock(&mm->page_table_lock); out: @@ -1270,11 +1317,13 @@ do_anonymous_page(struct mm_struct *mm, pte_chain = pte_chain_alloc(GFP_ATOMIC); if (!pte_chain) { pte_unmap(page_table); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) goto no_mem; spin_lock(&mm->page_table_lock); + pmd = pmd_offset_map(pgd_offset(mm, addr), addr); page_table = pte_offset_map(pmd, addr); } @@ -1285,6 +1334,7 @@ do_anonymous_page(struct mm_struct *mm, if (write_access) { /* Allocate our own private page. */ pte_unmap(page_table); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); page = alloc_page(GFP_HIGHUSER); @@ -1293,9 +1343,11 @@ do_anonymous_page(struct mm_struct *mm, clear_user_highpage(page, addr); spin_lock(&mm->page_table_lock); + pmd = pmd_offset_map(pgd_offset(mm, addr), addr); page_table = pte_offset_map(pmd, addr); if (!pte_none(*page_table)) { + pmd_unmap(pmd); pte_unmap(page_table); page_cache_release(page); spin_unlock(&mm->page_table_lock); @@ -1311,6 +1363,7 @@ do_anonymous_page(struct mm_struct *mm, set_pte(page_table, entry); /* ignores ZERO_PAGE */ pte_chain = page_add_rmap(page, page_table, pte_chain); + pmd_unmap(pmd); pte_unmap(page_table); /* No need to invalidate - it was non-present before */ @@ -1351,6 +1404,7 @@ do_no_page(struct mm_struct *mm, struct return do_anonymous_page(mm, vma, page_table, pmd, write_access, address); pte_unmap(page_table); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0); @@ -1381,6 +1435,7 @@ do_no_page(struct mm_struct *mm, struct } spin_lock(&mm->page_table_lock); + pmd = pmd_offset_map(pgd_offset(mm, address), address); page_table = pte_offset_map(pmd, address); /* @@ -1403,9 +1458,11 @@ do_no_page(struct mm_struct *mm, struct set_pte(page_table, entry); pte_chain = page_add_rmap(new_page, page_table, pte_chain); pte_unmap(page_table); + pmd_unmap(pmd); } else { /* One of our sibling threads was faster, back out. */ pte_unmap(page_table); + pmd_unmap(pmd); page_cache_release(new_page); spin_unlock(&mm->page_table_lock); ret = VM_FAULT_MINOR; @@ -1449,6 +1506,7 @@ static int do_file_page(struct mm_struct pgoff = pte_to_pgoff(*pte); pte_unmap(pte); + pmd_unmap(pmd); spin_unlock(&mm->page_table_lock); err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, vma->vm_page_prot, pgoff, 0); @@ -1535,10 +1593,10 @@ int handle_mm_fault(struct mm_struct *mm * and the SMP-safe atomic PTE updates. */ spin_lock(&mm->page_table_lock); - pmd = pmd_alloc(mm, pgd, address); + pmd = pmd_alloc_map(mm, pgd, address); if (pmd) { - pte_t * pte = pte_alloc_map(mm, pmd, address); + pte_t *pte = pte_alloc_map(mm, &pmd, address); if (pte) return handle_pte_fault(mm, vma, address, write_access, pte, pmd); } @@ -1575,7 +1633,30 @@ pmd_t *__pmd_alloc(struct mm_struct *mm, } pgd_populate(mm, pgd, new); out: - return pmd_offset(pgd, address); + return pmd_offset_map(pgd, address); +} + +pmd_t *__pmd_alloc_kernel(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +{ + pmd_t *new; + + spin_unlock(&mm->page_table_lock); + new = pmd_alloc_one_kernel(mm, address); + spin_lock(&mm->page_table_lock); + if (!new) + return NULL; + + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (pgd_present(*pgd)) { + pmd_free(new); + goto out; + } + pgd_populate(mm, pgd, new); +out: + return pmd_offset_kernel(pgd, address); } int make_pages_present(unsigned long addr, unsigned long end) @@ -1598,7 +1679,7 @@ int make_pages_present(unsigned long add /* * Map a vmalloc()-space virtual address to the physical page. */ -struct page * vmalloc_to_page(void * vmalloc_addr) +struct page *vmalloc_to_page(void *vmalloc_addr) { unsigned long addr = (unsigned long) vmalloc_addr; struct page *page = NULL; @@ -1607,7 +1688,7 @@ struct page * vmalloc_to_page(void * vma pte_t *ptep, pte; if (!pgd_none(*pgd)) { - pmd = pmd_offset(pgd, addr); + pmd = pmd_offset_map(pgd, addr); if (!pmd_none(*pmd)) { preempt_disable(); ptep = pte_offset_map(pmd, addr); @@ -1617,6 +1698,7 @@ struct page * vmalloc_to_page(void * vma pte_unmap(ptep); preempt_enable(); } + pmd_unmap(pmd); } return page; } diff -prauN linux-2.5.70-bk17/mm/mprotect.c highpmd-2.5.70-bk17-2/mm/mprotect.c --- linux-2.5.70-bk17/mm/mprotect.c 2003-05-26 18:00:38.000000000 -0700 +++ highpmd-2.5.70-bk17-2/mm/mprotect.c 2003-06-12 19:52:22.000000000 -0700 @@ -73,7 +73,7 @@ change_pmd_range(pgd_t *pgd, unsigned lo pgd_clear(pgd); return; } - pmd = pmd_offset(pgd, address); + pmd = pmd_offset_map(pgd, address); address &= ~PGDIR_MASK; end = address + size; if (end > PGDIR_SIZE) @@ -83,6 +83,7 @@ change_pmd_range(pgd_t *pgd, unsigned lo address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); + pmd_unmap(pmd - 1); } static void diff -prauN linux-2.5.70-bk17/mm/mremap.c highpmd-2.5.70-bk17-2/mm/mremap.c --- linux-2.5.70-bk17/mm/mremap.c 2003-05-26 18:00:40.000000000 -0700 +++ highpmd-2.5.70-bk17-2/mm/mremap.c 2003-06-13 10:52:01.000000000 -0700 @@ -22,10 +22,10 @@ #include #include -static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr) +static pte_t *get_one_pte_map_nested(struct mm_struct *mm, + unsigned long addr, pmd_t **pmd) { pgd_t *pgd; - pmd_t *pmd; pte_t *pte = NULL; pgd = pgd_offset(mm, addr); @@ -37,19 +37,22 @@ static pte_t *get_one_pte_map_nested(str goto end; } - pmd = pmd_offset(pgd, addr); - if (pmd_none(*pmd)) + *pmd = pmd_offset_map_nested(pgd, addr); + if (pmd_none(**pmd)) + pmd_unmap_nested(*pmd); goto end; - if (pmd_bad(*pmd)) { - pmd_ERROR(*pmd); - pmd_clear(pmd); + if (pmd_bad(**pmd)) { + pmd_ERROR(**pmd); + pmd_clear(*pmd); + pmd_unmap_nested(*pmd); goto end; } - pte = pte_offset_map_nested(pmd, addr); + pte = pte_offset_map_nested(*pmd, addr); if (pte_none(*pte)) { pte_unmap_nested(pte); pte = NULL; + pmd_unmap_nested(*pmd); } end: return pte; @@ -60,24 +63,26 @@ static inline int page_table_present(str { pgd_t *pgd; pmd_t *pmd; + int ret; pgd = pgd_offset(mm, addr); if (pgd_none(*pgd)) return 0; - pmd = pmd_offset(pgd, addr); - return pmd_present(*pmd); + pmd = pmd_offset_map(pgd, addr); + ret = pmd_present(*pmd); + pmd_unmap(pmd); + return ret; } #else #define page_table_present(mm, addr) (1) #endif -static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr) +static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr, pmd_t **pmd) { - pmd_t *pmd; pte_t *pte = NULL; - pmd = pmd_alloc(mm, pgd_offset(mm, addr), addr); - if (pmd) + *pmd = pmd_alloc_map(mm, pgd_offset(mm, addr), addr); + if (*pmd) pte = pte_alloc_map(mm, pmd, addr); return pte; } @@ -116,6 +121,7 @@ move_one_page(struct vm_area_struct *vma struct mm_struct *mm = vma->vm_mm; int error = 0; pte_t *src, *dst; + pmd_t *src_pmd = NULL, *dst_pmd = NULL; struct pte_chain *pte_chain; pte_chain = pte_chain_alloc(GFP_KERNEL); @@ -124,7 +130,7 @@ move_one_page(struct vm_area_struct *vma goto out; } spin_lock(&mm->page_table_lock); - src = get_one_pte_map_nested(mm, old_addr); + src = get_one_pte_map_nested(mm, old_addr, &src_pmd); if (src) { /* * Look to see whether alloc_one_pte_map needs to perform a @@ -133,14 +139,29 @@ move_one_page(struct vm_area_struct *vma */ if (!page_table_present(mm, new_addr)) { pte_unmap_nested(src); + if (src_pmd) { + pmd_unmap_nested(src_pmd); + src_pmd = NULL; + } src = NULL; } - dst = alloc_one_pte_map(mm, new_addr); - if (src == NULL) - src = get_one_pte_map_nested(mm, old_addr); - error = copy_one_pte(mm, src, dst, &pte_chain); - pte_unmap_nested(src); - pte_unmap(dst); + + dst = alloc_one_pte_map(mm, new_addr, &dst_pmd); + if (!src) + src = get_one_pte_map_nested(mm, old_addr, &src_pmd); + + if (dst && src) + error = copy_one_pte(mm, src, dst, &pte_chain); + else + error = -ENOMEM; + if (src) + pte_unmap_nested(src); + if (dst) + pte_unmap(dst); + if (src_pmd) + pmd_unmap_nested(src_pmd); + if (dst_pmd) + pmd_unmap(dst_pmd); } flush_tlb_page(vma, old_addr); spin_unlock(&mm->page_table_lock); diff -prauN linux-2.5.70-bk17/mm/msync.c highpmd-2.5.70-bk17-2/mm/msync.c --- linux-2.5.70-bk17/mm/msync.c 2003-05-26 18:00:38.000000000 -0700 +++ highpmd-2.5.70-bk17-2/mm/msync.c 2003-06-12 19:52:42.000000000 -0700 @@ -82,7 +82,7 @@ static inline int filemap_sync_pmd_range pgd_clear(pgd); return 0; } - pmd = pmd_offset(pgd, address); + pmd = pmd_offset_map(pgd, address); if ((address & PGDIR_MASK) != (end & PGDIR_MASK)) end = (address & PGDIR_MASK) + PGDIR_SIZE; error = 0; @@ -91,6 +91,7 @@ static inline int filemap_sync_pmd_range address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); + pmd_unmap(pmd - 1); return error; } diff -prauN linux-2.5.70-bk17/mm/swapfile.c highpmd-2.5.70-bk17-2/mm/swapfile.c --- linux-2.5.70-bk17/mm/swapfile.c 2003-05-26 18:00:25.000000000 -0700 +++ highpmd-2.5.70-bk17-2/mm/swapfile.c 2003-06-12 19:52:00.000000000 -0700 @@ -444,7 +444,7 @@ static int unuse_pgd(struct vm_area_stru pgd_clear(dir); return 0; } - pmd = pmd_offset(dir, address); + pmd = pmd_offset_map(dir, address); offset = address & PGDIR_MASK; address &= ~PGDIR_MASK; end = address + size; @@ -459,6 +459,7 @@ static int unuse_pgd(struct vm_area_stru address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); + pmd_unmap(pmd - 1); return 0; } diff -prauN linux-2.5.70-bk17/mm/vmalloc.c highpmd-2.5.70-bk17-2/mm/vmalloc.c --- linux-2.5.70-bk17/mm/vmalloc.c 2003-05-26 18:00:41.000000000 -0700 +++ highpmd-2.5.70-bk17-2/mm/vmalloc.c 2003-06-12 21:04:29.000000000 -0700 @@ -70,7 +70,7 @@ static void unmap_area_pmd(pgd_t *dir, u return; } - pmd = pmd_offset(dir, address); + pmd = pmd_offset_kernel(dir, address); address &= ~PGDIR_MASK; end = address + size; if (end > PGDIR_SIZE) @@ -159,7 +159,7 @@ int map_vm_area(struct vm_struct *area, dir = pgd_offset_k(address); spin_lock(&init_mm.page_table_lock); do { - pmd_t *pmd = pmd_alloc(&init_mm, dir, address); + pmd_t *pmd = pmd_alloc_kernel(&init_mm, dir, address); if (!pmd) { err = -ENOMEM; break;