Name: __alloc_percpu Author: Rusty Russell Status: Tested on 2.5.69 D: By overallocating the per-cpu data at boot, we can make quite an D: efficient allocator, and then use it to support per-cpu data in D: modules (next patch). diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .11647-linux-2.5.69/include/asm-generic/percpu.h .11647-linux-2.5.69.updated/include/asm-generic/percpu.h --- .11647-linux-2.5.69/include/asm-generic/percpu.h 2003-01-02 12:32:47.000000000 +1100 +++ .11647-linux-2.5.69.updated/include/asm-generic/percpu.h 2003-05-07 16:26:42.000000000 +1000 @@ -2,37 +2,10 @@ #define _ASM_GENERIC_PERCPU_H_ #include -#define __GENERIC_PER_CPU +/* Some archs may want to keep __per_cpu_offset for this CPU in a register, + or do their own allocation. */ #ifdef CONFIG_SMP - -extern unsigned long __per_cpu_offset[NR_CPUS]; - -/* Separate out the type, so (int[3], foo) works. */ -#ifndef MODULE -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu -#endif - -/* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu])) #define __get_cpu_var(var) per_cpu(var, smp_processor_id()) - -#else /* ! SMP */ - -/* Can't define per-cpu variables in modules. Sorry --RR */ -#ifndef MODULE -#define DEFINE_PER_CPU(type, name) \ - __typeof__(type) name##__per_cpu -#endif - -#define per_cpu(var, cpu) ((void)cpu, var##__per_cpu) -#define __get_cpu_var(var) var##__per_cpu - -#endif /* SMP */ - -#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu - -#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var##__per_cpu) -#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var##__per_cpu) - +#define __NEED_SETUP_PER_CPU_AREAS +#endif /* SMP */ #endif /* _ASM_GENERIC_PERCPU_H_ */ diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .11647-linux-2.5.69/include/linux/percpu.h .11647-linux-2.5.69.updated/include/linux/percpu.h --- .11647-linux-2.5.69/include/linux/percpu.h 2003-02-07 19:20:01.000000000 +1100 +++ .11647-linux-2.5.69.updated/include/linux/percpu.h 2003-05-07 16:32:37.000000000 +1000 @@ -1,12 +1,16 @@ #ifndef __LINUX_PERCPU_H #define __LINUX_PERCPU_H -#include /* For preempt_disable() */ -#include /* For kmalloc_percpu() */ +#include /* For preempt_disable() */ +#include /* For kmalloc() */ +#include +#include +#include #include -/* Must be an lvalue. */ +/* For variables declared with DECLARE_PER_CPU()/DEFINE_PER_CPU(). */ #define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); })) #define put_cpu_var(var) preempt_enable() +/* Also, per_cpu(var, cpu) to get another cpu's value. */ #ifdef CONFIG_SMP @@ -22,7 +26,7 @@ struct percpu_data { */ #define per_cpu_ptr(ptr, cpu) \ ({ \ - struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \ + struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr);\ (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) @@ -30,7 +34,32 @@ extern void *kmalloc_percpu(size_t size, extern void kfree_percpu(const void *); extern void kmalloc_percpu_init(void); -#else /* CONFIG_SMP */ +/* For modules. */ +extern void *__alloc_percpu(size_t size, size_t align); + +extern unsigned long __per_cpu_offset[NR_CPUS]; + +/* Separate out the type, so (int[3], foo) works. */ +#ifndef MODULE +#define DEFINE_PER_CPU(type, name) \ + __attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu +#endif + +/* var is in discarded region: offset to particular copy we want */ +#define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu])) + +extern void setup_per_cpu_areas(void); +#else /* !CONFIG_SMP */ + +/* Can't define per-cpu variables in modules. Sorry --RR */ +#ifndef MODULE +#define DEFINE_PER_CPU(type, name) \ + __typeof__(type) name##__per_cpu +#endif + +#define per_cpu(var, cpu) ((void)(cpu), var##__per_cpu) +#define __get_cpu_var(var) var##__per_cpu +#define per_cpu_ptr(ptr, cpu) ((void)(cpu), (ptr)) #define per_cpu_ptr(ptr, cpu) (ptr) @@ -44,8 +73,22 @@ static inline void kfree_percpu(const vo } static inline void kmalloc_percpu_init(void) { } +/* For modules. */ +static inline void *__alloc_percpu(size_t size, size_t align) +{ + /* kmalloc always cacheline aligns. */ + BUG_ON(align > SMP_CACHE_BYTES); + return kmalloc(size, GFP_KERNEL); +} + +static inline void setup_per_cpu_areas(void) { } #endif /* CONFIG_SMP */ +#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu + +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(var##__per_cpu) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(var##__per_cpu) + /* * Use these with kmalloc_percpu. If * 1. You want to operate on memory allocated by kmalloc_percpu (dereference diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .11647-linux-2.5.69/init/main.c .11647-linux-2.5.69.updated/init/main.c --- .11647-linux-2.5.69/init/main.c 2003-05-05 12:37:13.000000000 +1000 +++ .11647-linux-2.5.69.updated/init/main.c 2003-05-07 16:26:42.000000000 +1000 @@ -301,35 +301,10 @@ static void __init smp_init(void) #define smp_init() do { } while (0) #endif -static inline void setup_per_cpu_areas(void) { } static inline void smp_prepare_cpus(unsigned int maxcpus) { } #else -#ifdef __GENERIC_PER_CPU -unsigned long __per_cpu_offset[NR_CPUS]; - -static void __init setup_per_cpu_areas(void) -{ - unsigned long size, i; - char *ptr; - /* Created by linker magic */ - extern char __per_cpu_start[], __per_cpu_end[]; - - /* Copy section for each CPU (we discard the original) */ - size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); - if (!size) - return; - - ptr = alloc_bootmem(size * NR_CPUS); - - for (i = 0; i < NR_CPUS; i++, ptr += size) { - __per_cpu_offset[i] = ptr - __per_cpu_start; - memcpy(ptr, __per_cpu_start, size); - } -} -#endif /* !__GENERIC_PER_CPU */ - /* Called by boot processor to activate the rest. */ static void __init smp_init(void) { diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .11647-linux-2.5.69/kernel/ksyms.c .11647-linux-2.5.69.updated/kernel/ksyms.c --- .11647-linux-2.5.69/kernel/ksyms.c 2003-05-05 12:37:13.000000000 +1000 +++ .11647-linux-2.5.69.updated/kernel/ksyms.c 2003-05-07 16:26:42.000000000 +1000 @@ -607,9 +607,6 @@ EXPORT_SYMBOL(init_thread_union); EXPORT_SYMBOL(tasklist_lock); EXPORT_SYMBOL(find_task_by_pid); EXPORT_SYMBOL(next_thread); -#if defined(CONFIG_SMP) && defined(__GENERIC_PER_CPU) -EXPORT_SYMBOL(__per_cpu_offset); -#endif /* debug */ EXPORT_SYMBOL(dump_stack); diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .11647-linux-2.5.69/mm/Makefile .11647-linux-2.5.69.updated/mm/Makefile --- .11647-linux-2.5.69/mm/Makefile 2003-02-11 14:26:20.000000000 +1100 +++ .11647-linux-2.5.69.updated/mm/Makefile 2003-05-07 16:26:42.000000000 +1000 @@ -12,3 +12,4 @@ obj-y := bootmem.o filemap.o mempool.o slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y) obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o +obj-$(CONFIG_SMP) += percpu.o diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .26261-linux-2.5.69/mm/percpu.c .26261-linux-2.5.69.updated/mm/percpu.c --- .26261-linux-2.5.69/mm/percpu.c 1970-01-01 10:00:00.000000000 +1000 +++ .26261-linux-2.5.69.updated/mm/percpu.c 2003-05-06 18:26:34.000000000 +1000 @@ -0,0 +1,216 @@ +/* + * Dynamic per-cpu allocation. + * This version (C) 2003 Rusty Russell, IBM Corporation. + */ + +/* Simple allocator: we don't stress it hard, but do want it + fairly space-efficient. */ +#include +#include +#include +#include +#include + +static DECLARE_MUTEX(pcpu_lock); + +struct pcpu_block +{ + /* Number of blocks used and allocated. */ + unsigned short num_used, num_allocated; + + /* Size of each block. -ve means used. */ + int size[0]; +}; +static struct pcpu_block *pcpu; /* = NULL */ + +/* Created by linker magic */ +extern char __per_cpu_start[], __per_cpu_end[]; + +/* Splits a block into two. Reallocs pcpu if neccessary. */ +static int split_block(unsigned int i, unsigned short size) +{ + /* Reallocation required? */ + if (pcpu->num_used + 1 > pcpu->num_allocated) { + struct pcpu_block *new; + + new = kmalloc(sizeof(*pcpu) + + sizeof(pcpu->size[0]) * pcpu->num_allocated*2, + GFP_KERNEL); + if (!new) + return 0; + new->num_used = pcpu->num_used; + new->num_allocated = pcpu->num_allocated * 2; + memcpy(new->size, pcpu->size, + sizeof(pcpu->size[0])*pcpu->num_used); + kfree(pcpu); + pcpu = new; + } + + /* Insert a new subblock */ + memmove(&pcpu->size[i+1], &pcpu->size[i], + sizeof(pcpu->size[0]) * (pcpu->num_used - i)); + pcpu->num_used++; + + pcpu->size[i+1] -= size; + pcpu->size[i] = size; + return 1; +} + +static inline unsigned int abs(int val) +{ + if (val < 0) + return -val; + return val; +} + +static inline void zero_all(void *pcpuptr, unsigned int size) +{ + unsigned int i;; + + for (i = 0; i < NR_CPUS; i++) + memset(per_cpu_ptr(pcpuptr, i), 0, size); +} + +static unsigned long pool_size; + +void *__alloc_percpu(size_t size, size_t align) +{ + unsigned long extra; + unsigned int i; + void *ptr; + + BUG_ON(align > SMP_CACHE_BYTES); + BUG_ON(size > pool_size/2); + + down(&pcpu_lock); + ptr = __per_cpu_start; + for (i = 0; i < pcpu->num_used; ptr += abs(pcpu->size[i]), i++) { + /* Extra for alignment requirement. */ + extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr; + + /* Allocated or not large enough? */ + if (pcpu->size[i] < 0 || pcpu->size[i] < extra + size) + continue; + + /* Transfer extra to previous block. */ + if (pcpu->size[i-1] < 0) + pcpu->size[i-1] -= extra; + else + pcpu->size[i-1] += extra; + pcpu->size[i] -= extra; + ptr += extra; + + /* Split block if warranted */ + if (pcpu->size[i] - size > sizeof(unsigned long)) + if (!split_block(i, size)) + break; + + /* Mark allocated */ + pcpu->size[i] = -pcpu->size[i]; + zero_all(ptr, size); + goto out; + } + ptr = NULL; + out: + up(&pcpu_lock); + return ptr; +} + +void kfree_percpu(const void *freeme) +{ + unsigned int i; + void *ptr = __per_cpu_start; + + down(&pcpu_lock); + for (i = 0; i < pcpu->num_used; ptr += abs(pcpu->size[i]), i++) { + if (ptr == freeme) { + /* Double free? */ + BUG_ON(pcpu->size[i] > 0); + /* Block 0 is for non-dynamic per-cpu data. */ + BUG_ON(i == 0); + pcpu->size[i] = -pcpu->size[i]; + goto merge; + } + } + BUG(); + + merge: + /* Merge with previous? */ + if (pcpu->size[i-1] >= 0) { + pcpu->size[i-1] += pcpu->size[i]; + pcpu->num_used--; + memmove(&pcpu->size[i], &pcpu->size[i+1], + (pcpu->num_used - i) * sizeof(pcpu->size[0])); + i--; + } + /* Merge with next? */ + if (i+1 < pcpu->num_used && pcpu->size[i+1] >= 0) { + pcpu->size[i] += pcpu->size[i+1]; + pcpu->num_used--; + memmove(&pcpu->size[i+1], &pcpu->size[i+2], + (pcpu->num_used - (i+1)) * sizeof(pcpu->size[0])); + } + + /* There's always one block: the core kernel one. */ + BUG_ON(pcpu->num_used == 0); + up(&pcpu_lock); +} + +unsigned long __per_cpu_offset[NR_CPUS]; +EXPORT_SYMBOL(__per_cpu_offset); + +#define PERCPU_INIT_BLOCKS 4 + +#ifdef __NEED_SETUP_PER_CPU_AREAS +/* Generic version: allocates for all NR_CPUs. */ +void __init setup_per_cpu_areas(void) +{ + unsigned long i; + void *ptr; + + /* Leave at least 16k for __alloc_percpu */ + pool_size = ALIGN(__per_cpu_end - __per_cpu_start + 16384, + SMP_CACHE_BYTES); + /* Plenty of memory? 1GB = 64k per-cpu. */ + pool_size = max(((long long)num_physpages << PAGE_SHIFT) / 16384, + (long long)pool_size); +#ifdef PERCPU_POOL_MAX + if (pool_size > PERCPU_POOL_MAX) + pool_size = PERCPU_POOL_MAX; +#endif + + ptr = alloc_bootmem(pool_size * NR_CPUS); + + /* Don't panic yet, they won't see it */ + if (__per_cpu_end - __per_cpu_start > pool_size) + return; + + for (i = 0; i < NR_CPUS; i++, ptr += pool_size) { + __per_cpu_offset[i] = ptr - (void *)__per_cpu_start; + /* Copy section for each CPU (we discard the original) */ + memcpy(__per_cpu_start + __per_cpu_offset[i], + __per_cpu_start, + __per_cpu_end - __per_cpu_start); + } +} +#endif + +static int init_alloc_percpu(void) +{ + printk("Per-cpu data: %Zu of %u bytes\n", + __per_cpu_end - __per_cpu_start, pool_size); + + if (__per_cpu_end - __per_cpu_start > pool_size) + panic("Too much per-cpu data.\n"); + + pcpu = kmalloc(sizeof(*pcpu)+sizeof(pcpu->size[0])*PERCPU_INIT_BLOCKS, + GFP_KERNEL); + pcpu->num_allocated = PERCPU_INIT_BLOCKS; + pcpu->num_used = 2; + pcpu->size[0] = -(__per_cpu_end - __per_cpu_start); + pcpu->size[1] = pool_size-(__per_cpu_end - __per_cpu_start); + + return 0; +} + +__initcall(init_alloc_percpu);