(1) fix for fault_in_page_*() not faulting in enough mmupages. (2) fix for bogus open-coded ptep_to_address() (3) fix for iounmap() missing its targets Unfortunately, none of these are the bug we're looking for. -- wli diff -prauN pgcl-2.5.70-1/include/linux/pagemap.h pgcl-2.5.70-2/include/linux/pagemap.h --- pgcl-2.5.70-1/include/linux/pagemap.h 2003-05-26 21:30:23.000000000 -0700 +++ pgcl-2.5.70-2/include/linux/pagemap.h 2003-05-28 02:00:29.000000000 -0700 @@ -177,40 +177,27 @@ extern void end_page_writeback(struct pa */ static inline int fault_in_pages_writeable(char __user *uaddr, int size) { - int ret; + int ret = 0; + unsigned long addr, end = (unsigned long)uaddr + size - 1; /* * Writing zeroes into userspace here is OK, because we know that if * the zero gets there, we'll be overwriting it. */ - ret = __put_user(0, uaddr); - if (ret == 0) { - char __user *end = uaddr + size - 1; + for (addr = (unsigned long)uaddr; addr <= MMUPAGE_ALIGN(end); addr += MMUPAGE_SIZE) + if ((ret = __put_user(0, (char *)min(addr, end))) != 0) + break; - /* - * If the page was already mapped, this will get a cache miss - * for sure, so try to avoid doing it. - */ - if (((unsigned long)uaddr & MMUPAGE_MASK) != - ((unsigned long)end & MMUPAGE_MASK)) - ret = __put_user(0, end); - } return ret; } static inline void fault_in_pages_readable(const char __user *uaddr, int size) { volatile char c; - int ret; + unsigned long addr, end = (unsigned long)uaddr + size - 1; - ret = __get_user(c, (char *)uaddr); - if (ret == 0) { - const char __user *end = uaddr + size - 1; - - if (((unsigned long)uaddr & MMUPAGE_MASK) != - ((unsigned long)end & MMUPAGE_MASK)) - __get_user(c, (char *)end); - } + for (addr = (unsigned long)uaddr; addr <= MMUPAGE_ALIGN(end); addr += MMUPAGE_SIZE) + __get_user(c, (char *)min(addr, end)); } #endif /* _LINUX_PAGEMAP_H */ diff -prauN pgcl-2.5.70-1/mm/memory.c pgcl-2.5.70-2/mm/memory.c --- pgcl-2.5.70-1/mm/memory.c 2003-05-26 21:30:23.000000000 -0700 +++ pgcl-2.5.70-2/mm/memory.c 2003-05-27 12:39:30.000000000 -0700 @@ -1443,7 +1443,8 @@ do_anonymous_page(struct mm_struct *mm, dn_vaddr = addr & MMUPAGE_MASK; up_vaddr = MMUPAGE_ALIGN(addr + 1); - pr_debug("vma->vm_start = 0x%lx, vma->vm_end = 0x%lx\n", vma->vm_start, vma->vm_end); + pr_debug("vma->vm_start = 0x%lx, vma->vm_end = 0x%lx\n", + vma->vm_start, vma->vm_end); pr_debug("lo_vaddr = 0x%lx, hi_vaddr = 0x%lx\n", lo_vaddr, hi_vaddr); pr_debug("dn_vaddr = 0x%lx, up_vaddr = 0x%lx\n", dn_vaddr, up_vaddr); @@ -1479,9 +1480,12 @@ do_anonymous_page(struct mm_struct *mm, dn_vaddr -= MMUPAGE_SIZE; dn_pte--; } - pr_debug("dn_vaddr = 0x%lx, up_vaddr = 0x%lx\n", dn_vaddr, up_vaddr); - pr_debug("dn_subpfn = 0x%lx, up_subpfn = 0x%lx\n", dn_subpfn, up_subpfn); - } while ((up_vaddr < hi_vaddr || dn_vaddr >= lo_vaddr) && up_subpfn > dn_subpfn); + pr_debug("dn_vaddr = 0x%lx, up_vaddr = 0x%lx\n", + dn_vaddr, up_vaddr); + pr_debug("dn_subpfn = 0x%lx, up_subpfn = 0x%lx\n", + dn_subpfn, up_subpfn); + } while ((up_vaddr < hi_vaddr || dn_vaddr >= lo_vaddr) && + up_subpfn > dn_subpfn); pr_debug("finishing PTE search loop\n"); pr_debug("starting PTE instantiation loop\n"); @@ -1513,29 +1517,10 @@ do_anonymous_page(struct mm_struct *mm, unsigned long vaddr, offset; int k; - pr_debug("doing sleeping alloc of non-anonymous page\n"); + pr_debug("doing sleeping alloc of pte_chain" + " for non-anonymous page\n"); - /* fugly. wtf else can I do? */ - vaddr = (unsigned long)ptes[subpfn]; - - pr_debug("pte vaddr = 0x%lx\n", vaddr); - - /* - * this computes the offset from the - * PAGE_SIZE-aligned kmap_atomic() aperture - * the PAGE_SIZE-sized pte pages end up - * mapping PAGE_MMUCOUNT*PMD_SIZE; hence... - */ - vaddr &= PAGE_MASK; - - pr_debug("vaddr offset = 0x%lx\n", vaddr); - - vaddr /= sizeof(pte_t); - - pr_debug("vaddr offset in ptes = 0x%lx\n", vaddr); - - vaddr = (lo_vaddr & ~(PAGE_MMUCOUNT*PMD_SIZE-1)) - + vaddr * MMUPAGE_SIZE; + vaddr = ptep_to_address(ptes[subpfn]); pr_debug("vaddr = 0x%lx\n", vaddr); @@ -1544,7 +1529,7 @@ do_anonymous_page(struct mm_struct *mm, pte_chain = pte_chain_alloc(GFP_KERNEL); if (!pte_chain) { pr_debug("going to out_oom\n"); - ret = VM_FAULT_OOM; + ret = VM_FAULT_OOM; goto out_oom; } spin_lock(&mm->page_table_lock); @@ -1552,15 +1537,22 @@ do_anonymous_page(struct mm_struct *mm, /* is this safe from gcc? NFI */ if (page_table != ptes[subpfn]) { - pr_debug("(page_table) 0x%p != 0x%p (ptes[subpfn])\n", page_table, ptes[subpfn]); + pr_debug("(page_table) 0x%p != 0x%p" + " (ptes[subpfn])\n", + page_table, + ptes[subpfn]); offset = (unsigned long) (page_table - ptes[subpfn]); - pr_debug("adjusting all ptes by offset 0x%lx\n", - offset); + pr_debug("adjusting all ptes by" + " offset 0x%lx\n", + offset); for (k = subpfn; k < PAGE_MMUCOUNT; ++k) { - pr_debug("pte before 0x%p\n", ptes[k]); - ptes[k] += offset; - pr_debug("pte after 0x%p\n", ptes[k]); + pr_debug("pte before 0x%p\n", + ptes[k]); + if (ptes[k]) + ptes[k] += offset; + pr_debug("pte after 0x%p\n", + ptes[k]); } } @@ -1570,12 +1562,13 @@ do_anonymous_page(struct mm_struct *mm, continue; } } - pr_debug("setting pte for anonymous zeroed page thing\n"); + pr_debug("setting pte for anonymous zeroed page\n"); pr_debug("ptep = 0x%p, pte = 0x%Lx\n", ptes[subpfn], (u64)pte_val(pte)); set_pte(ptes[subpfn], pte_mkwrite(pte_mkdirty(pte))); pr_debug("about to page_add_rmap()\n"); - pte_chain = page_add_rmap(page, ptes[subpfn], pte_chain); + pte_chain = page_add_rmap(page, ptes[subpfn], + pte_chain); pr_debug("about to update_mmu_cache()\n"); update_mmu_cache(vma, addr, pte); rss++; diff -prauN pgcl-2.5.70-1/mm/vmalloc.c pgcl-2.5.70-2/mm/vmalloc.c --- pgcl-2.5.70-1/mm/vmalloc.c 2003-05-26 21:30:23.000000000 -0700 +++ pgcl-2.5.70-2/mm/vmalloc.c 2003-05-26 22:15:41.000000000 -0700 @@ -211,17 +211,20 @@ out: * * @addr: base address * - * Search for the kernel VM area starting at @addr, and remove it. + * Search for the kernel VM area containing @addr, and remove it. * This function returns the found VM area, but using it is NOT safe - * on SMP machines. + * on SMP machines; the final removal of an area must be serialized + * externally, and those who allocated the area own it. */ -struct vm_struct *remove_vm_area(void *addr) +struct vm_struct *remove_vm_area(void *__addr) { struct vm_struct **p, *tmp; + unsigned long addr = (unsigned long)__addr; write_lock(&vmlist_lock); - for (p = &vmlist ; (tmp = *p) ;p = &tmp->next) { - if (tmp->addr == addr) + for (p = &vmlist; (tmp = *p); p = &tmp->next) { + unsigned long tmp_addr = (unsigned long)tmp->addr; + if (addr >= tmp_addr && addr - tmp_addr < tmp->size) goto found; } write_unlock(&vmlist_lock);