Name: Make PPC64 Use r14 For Per-Cpu Data Status: Untested Version: ppc64 Signed-off-by: Rusty Russell (authored) To quote Anton Blanchard : I just had a look at what a percpu data access looks like on ppc64: lhz 0,18(13) /* smp_processor_id() */ ld 8,.LC92@toc(2) /* __per_cpu_offset */ ld 9,.LC108@toc(2) /* per_cpu__total_mmio_ffs */ sldi 0,0,3 /* smp_processor_id() * 8 */ mr 10,9 /* gcc sucks */ ldx 11,8,0 /* __per_cpu_offset[smp_processor_id()] */ ldx 9,10,11 /* finally, load our per cpu variable */ diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-ppc64-2.5/arch/ppc64/Makefile working-ppc64-2.5-r14/arch/ppc64/Makefile --- linux-ppc64-2.5/arch/ppc64/Makefile 2004-06-07 12:05:50.000000000 +1000 +++ working-ppc64-2.5-r14/arch/ppc64/Makefile 2004-07-09 13:40:57.394987478 +1000 @@ -27,7 +27,7 @@ CHECK := $(CHECK) -m64 -D__powerpc__=1 LDFLAGS := -m elf64ppc LDFLAGS_vmlinux := -Bstatic -e $(KERNELLOAD) -Ttext $(KERNELLOAD) CFLAGS += -msoft-float -pipe -Wno-uninitialized -mminimal-toc \ - -mtraceback=none + -mtraceback=none -ffixed-r14 ifeq ($(CONFIG_POWER4_ONLY),y) CFLAGS += $(call check_gcc,-mcpu=power4,) diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-ppc64-2.5/arch/ppc64/kernel/smp.c working-ppc64-2.5-r14/arch/ppc64/kernel/smp.c --- linux-ppc64-2.5/arch/ppc64/kernel/smp.c 2004-07-05 15:08:04.000000000 +1000 +++ working-ppc64-2.5-r14/arch/ppc64/kernel/smp.c 2004-07-09 15:43:17.523788809 +1000 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -878,6 +879,35 @@ void __init smp_prepare_cpus(unsigned in smp_create_idle(cpu); } +unsigned long __r14[NR_CPUS]; +EXPORT_SYMBOL(__r14); + +void __init setup_per_cpu_areas(void) +{ + unsigned long size, i; + char *ptr; + /* Created by linker magic */ + extern char __per_cpu_start[], __per_cpu_end[]; + + /* Copy section for each CPU (we discard the original) */ + size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); +#ifdef CONFIG_MODULES + if (size < PERCPU_ENOUGH_ROOM) + size = PERCPU_ENOUGH_ROOM; +#endif + + ptr = alloc_bootmem(size * NR_CPUS); + + for (i = 0; i < NR_CPUS; i++, ptr += size) { + __r14[i] = (unsigned long)ptr + 32768; + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + } + + /* Set up this cpu's r14. */ + asm("mr 14,%0" : : "r" (__r14[boot_cpuid])); +} + + void __devinit smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); @@ -971,6 +1001,7 @@ int __devinit start_secondary(void *unus smp_store_cpu_info(cpu); set_dec(paca[cpu].default_decr); cpu_callin_map[cpu] = 1; + asm("mr 14,%0" : : "r" (__r14[cpu])); smp_ops->setup_cpu(cpu); if (smp_ops->take_timebase) diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-ppc64-2.5/arch/ppc64/kernel/stab.c working-ppc64-2.5-r14/arch/ppc64/kernel/stab.c --- linux-ppc64-2.5/arch/ppc64/kernel/stab.c 2004-07-05 15:08:04.000000000 +1000 +++ working-ppc64-2.5-r14/arch/ppc64/kernel/stab.c 2004-07-09 15:33:07.381080622 +1000 @@ -188,7 +188,7 @@ static inline void __ste_allocate(unsign offset = __get_cpu_var(stab_cache_ptr); if (offset < NR_STAB_CACHE_ENTRIES) - __get_cpu_var(stab_cache[offset++]) = stab_entry; + __get_cpu_var(stab_cache)[offset++] = stab_entry; else offset = NR_STAB_CACHE_ENTRIES+1; __get_cpu_var(stab_cache_ptr) = offset; @@ -286,7 +286,7 @@ void flush_stab(struct task_struct *tsk, int i; for (i = 0; i < offset; i++) { - ste = stab + __get_cpu_var(stab_cache[i]); + ste = stab + __get_cpu_var(stab_cache)[i]; ste->dw0.dw0.v = 0; } } else { @@ -419,7 +419,7 @@ static inline void __slb_allocate(unsign offset = __get_cpu_var(stab_cache_ptr); if (offset < NR_STAB_CACHE_ENTRIES) - __get_cpu_var(stab_cache[offset++]) = esid; + __get_cpu_var(stab_cache)[offset++] = esid; else offset = NR_STAB_CACHE_ENTRIES+1; __get_cpu_var(stab_cache_ptr) = offset; @@ -515,7 +515,7 @@ void flush_slb(struct task_struct *tsk, asm volatile("isync" : : : "memory"); for (i = 0; i < offset; i++) { esid_data.word0 = 0; - esid_data.data.esid = __get_cpu_var(stab_cache[i]); + esid_data.data.esid = __get_cpu_var(stab_cache)[i]; BUG_ON(esid_data.data.esid == GET_ESID(VMALLOCBASE)); asm volatile("slbie %0" : : "r" (esid_data)); } diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-ppc64-2.5/include/asm-ppc64/percpu.h working-ppc64-2.5-r14/include/asm-ppc64/percpu.h --- linux-ppc64-2.5/include/asm-ppc64/percpu.h 2004-02-01 17:04:31.000000000 +1100 +++ working-ppc64-2.5-r14/include/asm-ppc64/percpu.h 2004-07-09 17:34:07.749009651 +1000 @@ -1,6 +1,56 @@ #ifndef __ARCH_PPC64_PERCPU__ #define __ARCH_PPC64_PERCPU__ -#include +#include +#include + +#ifdef CONFIG_SMP +extern void setup_per_cpu_areas(void); +extern unsigned long __r14[NR_CPUS]; + +/* Separate out the type, so (int[3], foo) works. */ +/* Needs to be marked used so even static ones don't get optimized away. */ +#define DEFINE_PER_CPU(type, name) \ + __attribute__((__section__(".data.percpu"), used)) __typeof__(type) per_cpu__##name + +/* var is thread-relative: offset to particular copy we want */ +#define per_cpu(var, cpu) (*({ \ + void *_ret; \ + asm("addi %0,%1," __stringify(per_cpu__##var) "@sectoff@l-32768"\ + : "=r" (_ret) : "r" (__r14[cpu])); \ + (__typeof__(per_cpu__##var) *)_ret; \ +})) + +/* r14 has per-cpu address in it, plus 32768-4096. */ +#define __get_cpu_var(var) (*({ \ + void *_ret; \ + asm("addi %0,14," __stringify(per_cpu__##var) "@sectoff@l-32768"\ + : "=r" (_ret)); \ + (__typeof__(per_cpu__##var) *)_ret; \ +})) + +/* A macro to avoid #include hell... */ +#define percpu_modcopy(pcpudst, src, size) \ +do { \ + unsigned int __i; \ + for (__i = 0; __i < NR_CPUS; __i++) \ + if (cpu_possible(__i)) \ + memcpy((pcpudst)+__r14[__i]-32768, \ + (src), (size)); \ +} while (0) + +#else /* ! SMP */ + +#define DEFINE_PER_CPU(type, name) \ + __typeof__(type) per_cpu__##name + +#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) +#define __get_cpu_var(var) per_cpu__##var + +#endif /* SMP */ + +#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu__##name +#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) +#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) #endif /* __ARCH_PPC64_PERCPU__ */