Index: linux-2.6.10-rc2-bk13-Percpu/mm/percpu.c =================================================================== --- linux-2.6.10-rc2-bk13-Percpu.orig/mm/percpu.c 2004-12-02 15:34:02.000000000 +1100 +++ linux-2.6.10-rc2-bk13-Percpu/mm/percpu.c 2004-12-03 17:48:50.000000000 +1100 @@ -108,6 +108,10 @@ return b; } +/* Enough for slab.c to bootstrap */ +#define INITIAL_NUM_ALLOCATED 40 +static __initdata int initial_sizes[INITIAL_NUM_ALLOCATED]; + /* Done early, so areas can be used. */ void __init setup_per_cpu_areas(void) { @@ -115,11 +119,15 @@ char *ptr; /* Copy section for each CPU (we discard the original) */ - reserved_size = ALIGN(__per_cpu_end - __per_cpu_start,SMP_CACHE_BYTES); + reserved_size = __per_cpu_end - __per_cpu_start; + /* Extra for initial slab allocations. */ + reserved_size = ALIGN(reserved_size + 64,SMP_CACHE_BYTES); + #ifdef CONFIG_MODULES /* Enough to cover all DEFINE_PER_CPUs in modules, too. */ - reserved_size = min(reserved_size, 8192UL * sizeof(unsigned long)); + reserved_size = max(reserved_size, 8192UL * sizeof(unsigned long)); #endif + /* Arch may choose to allocate much more for each CPU * (eg. large pages). */ percpu_size = reserved_size; @@ -129,40 +137,49 @@ __per_cpu_offset[i] = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); } -} -static int __init percpu_alloc_init(void) -{ + /* kmalloc is not available yet: install enough to get slab.c + * to bootstrap. */ percpu_core.num_used = 2; - percpu_core.num_allocated = 4; - percpu_core.size = kmalloc(sizeof(percpu_core.size[0]) - * percpu_core.num_allocated, - GFP_KERNEL); + percpu_core.num_allocated = INITIAL_NUM_ALLOCATED; + percpu_core.size = initial_sizes; /* Static in-kernel percpu data (used, so negative). */ percpu_core.size[0] = -(__per_cpu_end - __per_cpu_start); /* Free room. */ percpu_core.size[1] = percpu_size + percpu_core.size[0]; INIT_LIST_HEAD(&percpu_core.list); +} + +/* Change over to a real sizes array now kmalloc exists. */ +void __init percpu_alloc_init(void) +{ + down(&percpu_lock); + percpu_core.size = kmalloc(sizeof(initial_sizes), GFP_KERNEL); + memcpy(percpu_core.size, initial_sizes, sizeof(initial_sizes)); + /* Arch allocated more than we need for modules? */ if (percpu_size > reserved_size) { + unsigned long extra = percpu_size - reserved_size; struct percpu_block *b; - /* Mark out extra space as allocated. */ - percpu_core.size[1] = reserved_size + percpu_core.size[0]; - percpu_core.size[2] = -(percpu_size - reserved_size); + /* Clip off extra space, mark as allocated. */ + BUG_ON(percpu_core.size[percpu_core.num_used-1] < 0); + BUG_ON(percpu_core.num_used >= percpu_core.num_allocated); + + percpu_core.size[percpu_core.num_used-1] -= extra; + percpu_core.size[percpu_core.num_used] = -extra; percpu_core.num_used++; /* Duplicate of core block, but with core space allocated. */ b = new_block(); - b->size[0] = -reserved_size; - b->size[1] = percpu_size - reserved_size; + b->size[0] = -(percpu_size - extra); + b->size[1] = extra; b->num_used = 2; b->start = percpu_core.start; list_add(&b->list, &percpu_core.list); } - return 0; + up(&percpu_lock); } -core_initcall(percpu_alloc_init); static int split_block(unsigned int i, unsigned short size, struct percpu_block *pb) @@ -171,6 +188,7 @@ if (pb->num_used + 1 > pb->num_allocated) { int *new = kmalloc(sizeof(new[0]) * pb->num_allocated*2, GFP_KERNEL); + if (!new) return 0; @@ -288,6 +306,13 @@ unsigned int cpu; down(&percpu_lock); + /* Bootstrap mode: allocations for slab.c. */ + if (percpu_core.size == initial_sizes) { + BUG_ON(percpu_core.num_used == percpu_core.num_allocated); + ret = alloc_from_block(size, align, &percpu_core); + goto success; + } + /* Cleverly skips over kernel reserved space. */ list_for_each_entry(b, &percpu_core.list, list) { ret = alloc_from_block(size, align, b); @@ -334,7 +359,9 @@ goto unlock; } } - BUG(); + if (system_state == SYSTEM_RUNNING) + printk("percpu: freeing bootstrap allocation? %p\n", freeme); + free_from_block(freeme, &percpu_core); unlock: up(&percpu_lock); } @@ -538,11 +565,14 @@ - atomic_read(&percpu_local_ptr_count) - atomic_read(&percpu_local_count)); + local_irq_disable(); atomic_set(&percpu_local_count, 0); atomic_set(&percpu_count, 0); atomic_set(&percpu_local_ptr_count, 0); atomic_set(&percpu_ptr_count, 0); atomic_set(&smp_id_count, 0); + local_irq_enable(); + return len; } Index: linux-2.6.10-rc2-bk13-Percpu/include/linux/percpu.h =================================================================== --- linux-2.6.10-rc2-bk13-Percpu.orig/include/linux/percpu.h 2004-12-02 15:02:31.000000000 +1100 +++ linux-2.6.10-rc2-bk13-Percpu/include/linux/percpu.h 2004-12-03 17:29:25.000000000 +1100 @@ -35,7 +35,7 @@ extern void free_percpu(const void *); extern void *percpu_modalloc(unsigned long size, unsigned long align); extern void percpu_modfree(void *freeme); - +extern void percpu_alloc_init(void); #else /* CONFIG_SMP */ #define per_cpu_ptr(ptr, cpu) (ptr) @@ -61,6 +61,10 @@ static inline void percpu_modfree(void *freeme) { } + +static inline void percpu_alloc_init(void) +{ +} #endif /* CONFIG_SMP */ /* Simple wrapper for the common case: zeros memory. */ Index: linux-2.6.10-rc2-bk13-Percpu/mm/slab.c =================================================================== --- linux-2.6.10-rc2-bk13-Percpu.orig/mm/slab.c 2004-12-03 17:26:46.000000000 +1100 +++ linux-2.6.10-rc2-bk13-Percpu/mm/slab.c 2004-12-03 17:36:54.000000000 +1100 @@ -92,6 +92,7 @@ #include #include #include +#include #include #include @@ -283,7 +284,7 @@ struct kmem_cache_s { /* 1) per-cpu data, touched during every alloc/free */ - struct array_cache *array[NR_CPUS]; + struct array_cache *pc_array; unsigned int batchcount; unsigned int limit; /* 2) touched by every alloc & free from the backend */ @@ -508,9 +509,10 @@ #undef CACHE }; -static struct arraycache_init initarray_cache __initdata = +/* Used during bootstrap. */ +static DEFINE_PER_CPU(struct arraycache_init, initarray_cache) = { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; -static struct arraycache_init initarray_generic = +static DEFINE_PER_CPU(struct arraycache_init, initarray_generic) = { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; /* internal cache of cache description objs */ @@ -558,7 +560,7 @@ static inline struct array_cache *ac_data(kmem_cache_t *cachep) { - return cachep->array[smp_processor_id()]; + return __get_cpu_ptr(cachep->pc_array); } static kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags) @@ -636,24 +638,18 @@ } } -static struct array_cache *alloc_arraycache(int cpu, int entries, int batchcount) +static void adjust_free_limits(unsigned int num_cpus) { - int memsize = sizeof(void*)*entries+sizeof(struct array_cache); - struct array_cache *nc = NULL; + kmem_cache_t* cachep; - if (cpu != -1) { - nc = kmem_cache_alloc_node(kmem_find_general_cachep(memsize, - GFP_KERNEL), cpu_to_node(cpu)); - } - if (!nc) - nc = kmalloc(memsize, GFP_KERNEL); - if (nc) { - nc->avail = 0; - nc->limit = entries; - nc->batchcount = batchcount; - nc->touched = 0; + down(&cache_chain_sem); + list_for_each_entry(cachep, &cache_chain, next) { + spin_lock_irq(&cachep->spinlock); + cachep->free_limit = num_cpus*cachep->batchcount + + cachep->num; + spin_unlock_irq(&cachep->spinlock); } - return nc; + up(&cache_chain_sem); } static int __devinit cpuup_callback(struct notifier_block *nfb, @@ -661,26 +657,10 @@ void *hcpu) { long cpu = (long)hcpu; - kmem_cache_t* cachep; switch (action) { case CPU_UP_PREPARE: - down(&cache_chain_sem); - list_for_each_entry(cachep, &cache_chain, next) { - struct array_cache *nc; - - nc = alloc_arraycache(cpu, cachep->limit, cachep->batchcount); - if (!nc) - goto bad; - - spin_lock_irq(&cachep->spinlock); - cachep->array[cpu] = nc; - cachep->free_limit = (1+num_online_cpus())*cachep->batchcount - + cachep->num; - spin_unlock_irq(&cachep->spinlock); - - } - up(&cache_chain_sem); + adjust_free_limits(1+num_online_cpus()); break; case CPU_ONLINE: start_cpu_timer(cpu); @@ -689,28 +669,11 @@ case CPU_DEAD: /* fall thru */ case CPU_UP_CANCELED: - down(&cache_chain_sem); - - list_for_each_entry(cachep, &cache_chain, next) { - struct array_cache *nc; - - spin_lock_irq(&cachep->spinlock); - /* cpu is dead; no one can alloc from it. */ - nc = cachep->array[cpu]; - cachep->array[cpu] = NULL; - cachep->free_limit -= cachep->batchcount; - free_block(cachep, nc, nc->avail); - spin_unlock_irq(&cachep->spinlock); - kfree(nc); - } - up(&cache_chain_sem); + adjust_free_limits(num_online_cpus()); break; #endif } return NOTIFY_OK; -bad: - up(&cache_chain_sem); - return NOTIFY_BAD; } static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; @@ -753,8 +716,8 @@ INIT_LIST_HEAD(&cache_chain); list_add(&cache_cache.next, &cache_chain); cache_cache.colour_off = cache_line_size(); - cache_cache.array[smp_processor_id()] = &initarray_cache.cache; - + /* FIXME: Tricky, but make a macro to do this --RR */ + cache_cache.pc_array = &per_cpu__initarray_cache.cache; cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size()); cache_estimate(0, cache_cache.objsize, cache_line_size(), 0, @@ -797,25 +760,26 @@ } /* 4) Replace the bootstrap head arrays */ { - void * ptr; - - ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + struct arraycache_init *ptr; + + ptr = alloc_percpu(struct arraycache_init); + memcpy(__get_cpu_ptr(ptr), ac_data(&cache_cache),sizeof(*ptr)); local_irq_disable(); - BUG_ON(ac_data(&cache_cache) != &initarray_cache.cache); - memcpy(ptr, ac_data(&cache_cache), sizeof(struct arraycache_init)); - cache_cache.array[smp_processor_id()] = ptr; + cache_cache.pc_array = &ptr->cache; local_irq_enable(); - - ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + + ptr = alloc_percpu(struct arraycache_init); + memcpy(__get_cpu_ptr(ptr), ac_data(malloc_sizes[0].cs_cachep), + sizeof(*ptr)); local_irq_disable(); - BUG_ON(ac_data(malloc_sizes[0].cs_cachep) != &initarray_generic.cache); - memcpy(ptr, ac_data(malloc_sizes[0].cs_cachep), - sizeof(struct arraycache_init)); - malloc_sizes[0].cs_cachep->array[smp_processor_id()] = ptr; + malloc_sizes[0].cs_cachep->pc_array = &ptr->cache; local_irq_enable(); } - /* 5) resize the head arrays to their final sizes */ + /* 5) Take per-cpu allocation out of bootstrap (now kmalloc works). */ + percpu_alloc_init(); + + /* 6) resize the head arrays to their final sizes */ { kmem_cache_t *cachep; down(&cache_chain_sem); @@ -1396,10 +1360,12 @@ * the cache that's used by kmalloc(24), otherwise * the creation of further caches will BUG(). */ - cachep->array[smp_processor_id()] = &initarray_generic.cache; + /* FIXME: Tricky, but make a macro to do this --RR */ + cachep->pc_array = &per_cpu__initarray_generic.cache; g_cpucache_up = PARTIAL; } else { - cachep->array[smp_processor_id()] = kmalloc(sizeof(struct arraycache_init),GFP_KERNEL); + cachep->pc_array + = &alloc_percpu(struct arraycache_init)->cache; } BUG_ON(!ac_data(cachep)); ac_data(cachep)->avail = 0; @@ -1597,8 +1563,6 @@ */ int kmem_cache_destroy (kmem_cache_t * cachep) { - int i; - if (!cachep || in_interrupt()) BUG(); @@ -1628,8 +1592,7 @@ /* no cpu_online check required here since we clear the percpu * array on cpu offline and set this to NULL. */ - for (i = 0; i < NR_CPUS; i++) - kfree(cachep->array[i]); + free_percpu(cachep->pc_array); /* NUMA: free the list3 structures */ kfree(cachep->lists.shared); @@ -2515,67 +2478,87 @@ struct ccupdate_struct { kmem_cache_t *cachep; - struct array_cache *new[NR_CPUS]; + struct array_cache *pc_array; }; -static void do_ccupdate_local(void *info) +static int set_pc_array(void *info) { struct ccupdate_struct *new = (struct ccupdate_struct *)info; struct array_cache *old; check_irq_off(); - old = ac_data(new->cachep); - - new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()]; - new->new[smp_processor_id()] = old; -} + old = new->cachep->pc_array; + new->cachep->pc_array = new->pc_array; + new->pc_array = old; + return 0; +} -static int do_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount, int shared) +static int do_tune_cpucache (kmem_cache_t* cachep, int limit, int batchcount, int shared, int init) { struct ccupdate_struct new; - struct array_cache *new_shared; - int i; + int err, i, memsize = sizeof(void*)*limit+sizeof(struct array_cache); + struct array_cache *share_array; - memset(&new.new,0,sizeof(new.new)); - for (i = 0; i < NR_CPUS; i++) { - if (cpu_online(i)) { - new.new[i] = alloc_arraycache(i, limit, batchcount); - if (!new.new[i]) { - for (i--; i >= 0; i--) kfree(new.new[i]); - return -ENOMEM; - } - } else { - new.new[i] = NULL; + new.cachep = cachep; + new.pc_array = __alloc_percpu(memsize, __alignof__(*new.pc_array)); + if (!new.pc_array) + return -ENOMEM; + + for_each_cpu(i) { + per_cpu_ptr(new.pc_array, i)->avail = 0; + per_cpu_ptr(new.pc_array, i)->limit = limit; + per_cpu_ptr(new.pc_array, i)->batchcount = batchcount; + per_cpu_ptr(new.pc_array, i)->touched = 0; + } + + /* Either before other CPUs up, or before cache returned. */ + if (init) { + spin_lock_irq(&cachep->spinlock); + set_pc_array(&new); + spin_unlock_irq(&cachep->spinlock); + } else { + /* Do it atomically. */ + err = stop_machine_run(set_pc_array, &new, NR_CPUS); + if (err) { + free_percpu(new.pc_array); + return err; } } - new.cachep = cachep; - smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); - - check_irq_on(); spin_lock_irq(&cachep->spinlock); cachep->batchcount = batchcount; cachep->limit = limit; cachep->free_limit = (1+num_online_cpus())*cachep->batchcount + cachep->num; spin_unlock_irq(&cachep->spinlock); - for (i = 0; i < NR_CPUS; i++) { - struct array_cache *ccold = new.new[i]; - if (!ccold) - continue; + /* Old array is returned in new.pc_array. */ + if (new.pc_array) { spin_lock_irq(&cachep->spinlock); - free_block(cachep, ccold, ccold->avail); + for_each_cpu(i) { + struct array_cache *ac; + + ac = per_cpu_ptr(cachep->pc_array, i); + free_block(cachep, ac, ac->avail); + } spin_unlock_irq(&cachep->spinlock); - kfree(ccold); + free_percpu(new.pc_array); } - new_shared = alloc_arraycache(-1, batchcount*shared, 0xbaadf00d); - if (new_shared) { + + memsize = sizeof(void*)*(batchcount*shared)+sizeof(struct array_cache); + share_array = kmalloc(memsize, GFP_KERNEL); + + if (share_array) { struct array_cache *old; + share_array->avail = 0; + share_array->limit = batchcount*shared; + share_array->batchcount = 0xbaadf00d; + share_array->touched = 0; + spin_lock_irq(&cachep->spinlock); old = cachep->lists.shared; - cachep->lists.shared = new_shared; + cachep->lists.shared = share_array; if (old) free_block(cachep, old, old->avail); spin_unlock_irq(&cachep->spinlock); @@ -2632,7 +2615,7 @@ if (limit > 32) limit = 32; #endif - err = do_tune_cpucache(cachep, limit, (limit+1)/2, shared); + err = do_tune_cpucache(cachep, limit, (limit+1)/2, shared, 1); if (err) printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", cachep->name, -err); @@ -2653,7 +2636,7 @@ } free_block(cachep, ac, tofree); ac->avail -= tofree; - memmove(ac->entries, ac->entires + tofree, + memmove(ac->entries, ac->entries + tofree, sizeof(void*)*ac->avail); } } @@ -2940,7 +2923,7 @@ shared < 0) { res = -EINVAL; } else { - res = do_tune_cpucache(cachep, limit, batchcount, shared); + res = do_tune_cpucache(cachep, limit, batchcount, shared, 0); } break; }