diff -prauN pgcl-2.6.0-test11-2/include/linux/folio.h pgcl-2.6.0-test11-3/include/linux/folio.h --- pgcl-2.6.0-test11-2/include/linux/folio.h 2003-11-27 21:55:19.000000000 -0800 +++ pgcl-2.6.0-test11-3/include/linux/folio.h 2003-11-28 16:17:37.000000000 -0800 @@ -450,14 +450,11 @@ static void copy_folio(pte_addr_t paddrs static void copy_folio(pte_addr_t paddrs[], struct page *dst_page, struct page *src_page, unsigned long address) { - pte_t *folio[PAGE_MMUCOUNT+1]; char *src, *dst; unsigned int size, offset = 0; unsigned long src_pfn, dst_pfn; int j = 0; - kmap_atomic_sg(folio, paddrs, KM_FOLIO); - dst = kmap_atomic(dst_page, KM_USER0); dst_pfn = page_to_pfn(dst_page); if (src_page != ZERO_PAGE(address)) { @@ -470,14 +467,14 @@ static void copy_folio(pte_addr_t paddrs pr_debug("%d: zeroing out page\n", current->pid); } while (j < PAGE_MMUCOUNT) { - if (!folio[j]) { + if (!paddrs[j]) { offset += MMUPAGE_SIZE; j++; continue; } size = MMUPAGE_SIZE; while (++j < PAGE_MMUCOUNT) { - if (!folio[j]) + if (!paddrs[j]) break; size += MMUPAGE_SIZE; } @@ -502,7 +499,6 @@ static void copy_folio(pte_addr_t paddrs if (src) kunmap_atomic(src, KM_USER1); kunmap_atomic(dst, KM_USER0); - kunmap_atomic_sg(folio, KM_FOLIO); } #endif diff -prauN pgcl-2.6.0-test11-2/mm/page_alloc.c pgcl-2.6.0-test11-3/mm/page_alloc.c --- pgcl-2.6.0-test11-2/mm/page_alloc.c 2003-11-27 21:55:21.000000000 -0800 +++ pgcl-2.6.0-test11-3/mm/page_alloc.c 2003-11-28 17:33:14.000000000 -0800 @@ -1330,8 +1330,14 @@ static void __init free_area_init_core(s if (batch * PAGE_SIZE > 256 * 1024) batch = (256 * 1024) / PAGE_SIZE; batch /= 4; /* We effectively *= 4 below */ - if (batch < 1) - batch = 1; + + /* + * Some of this is to amortize the locking overhead. + * So try to at least get NR_CPUS elements in the pcp + * pool to reduce zone->lock's arrival rate properly. + */ + if (batch < max(NR_CPUS, 16)) + batch = max(NR_CPUS, 16); for (cpu = 0; cpu < NR_CPUS; cpu++) { struct per_cpu_pages *pcp;