diff -prauN pgcl-2.6.0-test11-2/include/linux/folio.h pgcl-2.6.0-test11-3/include/linux/folio.h
--- pgcl-2.6.0-test11-2/include/linux/folio.h	2003-11-27 21:55:19.000000000 -0800
+++ pgcl-2.6.0-test11-3/include/linux/folio.h	2003-11-28 16:17:37.000000000 -0800
@@ -450,14 +450,11 @@ static void copy_folio(pte_addr_t paddrs
 static void copy_folio(pte_addr_t paddrs[], struct page *dst_page,
 	struct page *src_page, unsigned long address)
 {
-	pte_t *folio[PAGE_MMUCOUNT+1];
 	char *src, *dst;
 	unsigned int size, offset = 0;
 	unsigned long src_pfn, dst_pfn;
 	int j = 0;
 
-	kmap_atomic_sg(folio, paddrs, KM_FOLIO);
-
 	dst = kmap_atomic(dst_page, KM_USER0);
 	dst_pfn = page_to_pfn(dst_page);
 	if (src_page != ZERO_PAGE(address)) {
@@ -470,14 +467,14 @@ static void copy_folio(pte_addr_t paddrs
 		pr_debug("%d: zeroing out page\n", current->pid);
 	}
 	while (j < PAGE_MMUCOUNT) {
-		if (!folio[j]) {
+		if (!paddrs[j]) {
 			offset += MMUPAGE_SIZE;
 			j++;
 			continue;
 		}
 		size = MMUPAGE_SIZE;
 		while (++j < PAGE_MMUCOUNT) {
-			if (!folio[j])
+			if (!paddrs[j])
 				break;
 			size += MMUPAGE_SIZE;
 		}
@@ -502,7 +499,6 @@ static void copy_folio(pte_addr_t paddrs
 	if (src)
 		kunmap_atomic(src, KM_USER1);
 	kunmap_atomic(dst, KM_USER0);
-	kunmap_atomic_sg(folio, KM_FOLIO);
 }
 #endif
 
diff -prauN pgcl-2.6.0-test11-2/mm/page_alloc.c pgcl-2.6.0-test11-3/mm/page_alloc.c
--- pgcl-2.6.0-test11-2/mm/page_alloc.c	2003-11-27 21:55:21.000000000 -0800
+++ pgcl-2.6.0-test11-3/mm/page_alloc.c	2003-11-28 17:33:14.000000000 -0800
@@ -1330,8 +1330,14 @@ static void __init free_area_init_core(s
 		if (batch * PAGE_SIZE > 256 * 1024)
 			batch = (256 * 1024) / PAGE_SIZE;
 		batch /= 4;		/* We effectively *= 4 below */
-		if (batch < 1)
-			batch = 1;
+
+		/*
+		 * Some of this is to amortize the locking overhead.
+		 * So try to at least get NR_CPUS elements in the pcp
+		 * pool to reduce zone->lock's arrival rate properly.
+		 */
+		if (batch < max(NR_CPUS, 16))
+			batch = max(NR_CPUS, 16);
 
 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
 			struct per_cpu_pages *pcp;