Name: Gather Stats on per-cpu Usage Author: Rusty Russell Status: Experimental Gathers stats on per-cpu variable usage. Hack prints them out every time /proc/modules opened. Index: linux-2.6.10-rc2-bk13-Percpu/mm/percpu.c =================================================================== --- linux-2.6.10-rc2-bk13-Percpu.orig/mm/percpu.c 2004-12-02 14:50:04.124129536 +1100 +++ linux-2.6.10-rc2-bk13-Percpu/mm/percpu.c 2004-12-02 14:50:13.980631120 +1100 @@ -62,6 +62,13 @@ /* All blocks have to be the same size per cpu, otherwise span would differ. */ static unsigned long reserved_size, percpu_size; +atomic_t percpu_local_count = ATOMIC_INIT(0), percpu_local_ptr_count = ATOMIC_INIT(0), percpu_count = ATOMIC_INIT(0), percpu_ptr_count = ATOMIC_INIT(0), smp_id_count = ATOMIC_INIT(0); +EXPORT_SYMBOL(percpu_local_count); +EXPORT_SYMBOL(percpu_local_ptr_count); +EXPORT_SYMBOL(percpu_count); +EXPORT_SYMBOL(percpu_ptr_count); +EXPORT_SYMBOL(smp_id_count); + #ifdef __GENERIC_PER_CPU /* Ideally, an arch will sew together pages local to CPUs to form a * continuous allocation. */ @@ -518,3 +518,34 @@ } late_initcall(test_percpu); #endif + +#include +static int percpu_stats(char *buffer, char **start, off_t offset, int length) +{ + int len = 0; + + len += sprintf(buffer+len, "percpu: local = %u, non-local = %u\n", + atomic_read(&percpu_local_count), + atomic_read(&percpu_count) + - atomic_read(&percpu_local_count)); + len += sprintf(buffer+len, + "percpu pointers: local = %u, non-local = %u\n", + atomic_read(&percpu_local_ptr_count), + atomic_read(&percpu_ptr_count) - + atomic_read(&percpu_local_ptr_count)); + len += sprintf(buffer+len, "smp_processor_id(): %u\n", + atomic_read(&smp_id_count) + - atomic_read(&percpu_local_ptr_count) + - atomic_read(&percpu_local_count)); + return len; +} + +static int procfile_init(void) +{ + struct proc_dir_entry *proc; + + proc = create_proc_entry("percpu_stats", 0400, NULL); + proc->get_info = percpu_stats; + return 0; +} +__initcall(procfile_init); Index: linux-2.6.10-rc2-bk13-Percpu/include/asm-i386/smp.h =================================================================== --- linux-2.6.10-rc2-bk13-Percpu.orig/include/asm-i386/smp.h 2004-10-19 14:34:18.000000000 +1000 +++ linux-2.6.10-rc2-bk13-Percpu/include/asm-i386/smp.h 2004-12-02 14:49:53.338769160 +1100 @@ -9,6 +9,7 @@ #include #include #include +#include #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -45,12 +46,13 @@ #define MAX_APICID 256 extern u8 x86_cpu_to_apicid[]; +extern atomic_t smp_id_count; /* * This function is needed by all SMP systems. It must _always_ be valid * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define smp_processor_id() (current_thread_info()->cpu) +#define smp_processor_id() ({ atomic_inc(&smp_id_count); (current_thread_info()->cpu); }) extern cpumask_t cpu_callout_map; #define cpu_possible_map cpu_callout_map Index: linux-2.6.10-rc2-bk13-Percpu/include/linux/percpu.h =================================================================== --- linux-2.6.10-rc2-bk13-Percpu.orig/include/linux/percpu.h 2004-12-02 14:21:08.716951528 +1100 +++ linux-2.6.10-rc2-bk13-Percpu/include/linux/percpu.h 2004-12-02 14:49:53.338769160 +1100 @@ -16,8 +16,11 @@ #define put_cpu_var(var) preempt_enable() #ifdef CONFIG_SMP +#include +extern atomic_t percpu_local_count, percpu_local_ptr_count, percpu_count, percpu_ptr_count; + /* var is in discarded region: offset to particular copy we want */ -#define per_cpu(var, cpu) (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu])) +#define per_cpu(var, cpu) (*({ atomic_inc(&percpu_count); RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]); })) extern unsigned long __per_cpu_offset[NR_CPUS]; /* @@ -26,7 +29,7 @@ * to use get_cpu_ptr... */ #define per_cpu_ptr(ptr, cpu) \ - ((__typeof__(ptr))((void *)ptr + __per_cpu_offset[(cpu)])) + ({ atomic_inc(&percpu_ptr_count); ((__typeof__(ptr))((void *)ptr + __per_cpu_offset[(cpu)])); }) extern void *__alloc_percpu(unsigned long size, unsigned long align); extern void free_percpu(const void *); Index: linux-2.6.10-rc2-bk13-Percpu/include/asm-generic/percpu.h =================================================================== --- linux-2.6.10-rc2-bk13-Percpu.orig/include/asm-generic/percpu.h 2004-12-02 14:21:08.718951224 +1100 +++ linux-2.6.10-rc2-bk13-Percpu/include/asm-generic/percpu.h 2004-12-02 14:49:53.338769160 +1100 @@ -1,6 +1,7 @@ #ifndef _ASM_GENERIC_PERCPU_H_ #define _ASM_GENERIC_PERCPU_H_ #include +#include #define __GENERIC_PER_CPU #ifdef CONFIG_SMP @@ -9,9 +10,9 @@ #define DEFINE_PER_CPU(type, name) \ __attribute__((__section__(".data.percpu"))) __typeof__(type) per_cpu__##name -#define __get_cpu_var(var) per_cpu(var, smp_processor_id()) +#define __get_cpu_var(var) (*({ atomic_inc(&percpu_local_count); &per_cpu(var, smp_processor_id()); })) #define __get_cpu_ptr(ptr) \ - ((__typeof__(ptr))((void *)ptr + __per_cpu_offset[smp_processor_id()])) + ({ atomic_inc(&percpu_local_ptr_count); ((__typeof__(ptr))((void *)ptr + __per_cpu_offset[smp_processor_id()])); }) /* A macro to avoid #include hell... */ #define percpu_modcopy(pcpudst, src, size) \