Name: Extra Debugging Checks for Hotplug CPU Author: Rusty Russell Status: Experimental Depends: Misc/ppc64-debug-spinlock.patch.gz After boot, if CONFIG_PREEMPT or CONFIG_DEBUG_SPINLOCK_SLEEP is enabled, and you haven't done lock_cpu_hotplug(), then examining the cpu_online_map is racy. Insert checks. diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .21056-linux-2.6.2-mm1/arch/i386/kernel/i386_ksyms.c .21056-linux-2.6.2-mm1.updated/arch/i386/kernel/i386_ksyms.c --- .21056-linux-2.6.2-mm1/arch/i386/kernel/i386_ksyms.c 2004-02-06 12:06:44.000000000 +1100 +++ .21056-linux-2.6.2-mm1.updated/arch/i386/kernel/i386_ksyms.c 2004-02-09 18:18:41.000000000 +1100 @@ -148,7 +148,7 @@ EXPORT_SYMBOL(cpu_sibling_map); #ifdef CONFIG_SMP EXPORT_SYMBOL(cpu_data); -EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(get_cpu_online_map); EXPORT_SYMBOL(cpu_callout_map); EXPORT_SYMBOL_NOVERS(__write_lock_failed); EXPORT_SYMBOL_NOVERS(__read_lock_failed); diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .21056-linux-2.6.2-mm1/arch/i386/kernel/smpboot.c .21056-linux-2.6.2-mm1.updated/arch/i386/kernel/smpboot.c --- .21056-linux-2.6.2-mm1/arch/i386/kernel/smpboot.c 2004-02-09 18:18:40.000000000 +1100 +++ .21056-linux-2.6.2-mm1.updated/arch/i386/kernel/smpboot.c 2004-02-09 18:18:41.000000000 +1100 @@ -66,8 +66,6 @@ static int __initdata smp_b_stepping; int smp_num_siblings = 1; int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ -/* bitmap of online cpus */ -cpumask_t cpu_online_map; static cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .21056-linux-2.6.2-mm1/include/linux/cpu.h .21056-linux-2.6.2-mm1.updated/include/linux/cpu.h --- .21056-linux-2.6.2-mm1/include/linux/cpu.h 2004-02-09 18:18:40.000000000 +1100 +++ .21056-linux-2.6.2-mm1.updated/include/linux/cpu.h 2004-02-09 18:18:41.000000000 +1100 @@ -56,15 +56,15 @@ extern struct sysdev_class cpu_sysdev_cl #ifdef CONFIG_HOTPLUG_CPU /* Stop CPUs going up and down. */ extern struct semaphore cpucontrol; -#define lock_cpu_hotplug() down(&cpucontrol) -#define unlock_cpu_hotplug() up(&cpucontrol) -#define lock_cpu_hotplug_interruptible() down_interruptible(&cpucontrol) +extern void lock_cpu_hotplug(void); +extern int lock_cpu_hotplug_interruptible(void); +extern void unlock_cpu_hotplug(void); int cpu_down(unsigned int cpu); #define hotcpu_notifier(fn, pri) { \ static struct notifier_block fn##_nb = { fn, pri }; \ register_cpu_notifier(&fn##_nb); \ } -#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) +extern int cpu_is_offline(unsigned int cpu); #else #define lock_cpu_hotplug() do { } while (0) #define unlock_cpu_hotplug() do { } while (0) diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .21056-linux-2.6.2-mm1/include/linux/cpumask.h .21056-linux-2.6.2-mm1.updated/include/linux/cpumask.h --- .21056-linux-2.6.2-mm1/include/linux/cpumask.h 2004-02-09 18:18:40.000000000 +1100 +++ .21056-linux-2.6.2-mm1.updated/include/linux/cpumask.h 2004-02-09 18:18:41.000000000 +1100 @@ -8,7 +8,8 @@ #ifdef CONFIG_SMP -extern cpumask_t cpu_online_map; +extern cpumask_t *get_cpu_online_map(void); +#define cpu_online_map (*get_cpu_online_map()) extern cpumask_t cpu_possible_map; #define num_online_cpus() cpus_weight(cpu_online_map) diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .21056-linux-2.6.2-mm1/include/linux/sched.h .21056-linux-2.6.2-mm1.updated/include/linux/sched.h --- .21056-linux-2.6.2-mm1/include/linux/sched.h 2004-02-09 18:18:40.000000000 +1100 +++ .21056-linux-2.6.2-mm1.updated/include/linux/sched.h 2004-02-09 18:18:41.000000000 +1100 @@ -528,6 +528,7 @@ do { if (atomic_dec_and_test(&(tsk)->usa #define PF_SWAPOFF 0x00080000 /* I am in swapoff */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ +#define PF_CPULOCK 0x00400000 /* I hold the cpucontrol lock. */ #ifdef CONFIG_SMP #define SD_FLAG_NEWIDLE 1 /* Balance when about to become idle */ diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .21056-linux-2.6.2-mm1/kernel/cpu.c .21056-linux-2.6.2-mm1.updated/kernel/cpu.c --- .21056-linux-2.6.2-mm1/kernel/cpu.c 2004-02-09 18:18:40.000000000 +1100 +++ .21056-linux-2.6.2-mm1.updated/kernel/cpu.c 2004-02-09 18:18:41.000000000 +1100 @@ -14,6 +14,7 @@ #include #include #include +#include #include /* This protects CPUs going up and down... */ @@ -198,3 +199,61 @@ out: unlock_cpu_hotplug(); return ret; } + +/* Shouldn't even look at this if we're preemptable. */ +cpumask_t *get_cpu_online_map(void) +{ + static cpumask_t real_cpu_online_map; +/* Need one of these, otherwise in_atomic() is always 0. */ +#if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_PREEMPT) + if (!in_atomic() + && !irqs_disabled() + && system_running + && current->pid != 0 + && !(current->flags & PF_CPULOCK)) { + printk("Loose cpu_online_map on %i\n", smp_processor_id()); + dump_stack(); + } +#endif + return &real_cpu_online_map; +} + +int cpu_is_offline(unsigned int cpu) +{ + int ret; + + /* We can always ask "am I online?" */ + if (cpu == smp_processor_id()) { + preempt_disable(); + ret = !cpu_online(cpu); + preempt_enable(); + } else + ret = !cpu_online(cpu); + return ret; +} + +void lock_cpu_hotplug(void) +{ + down(&cpucontrol); + BUG_ON(current->flags & PF_CPULOCK); + current->flags |= PF_CPULOCK; +} + +int lock_cpu_hotplug_interruptible(void) +{ + int err; + + BUG_ON(current->flags & PF_CPULOCK); + + err = down_interruptible(&cpucontrol); + if (!err) + current->flags |= PF_CPULOCK; + return err; +} + +void unlock_cpu_hotplug(void) +{ + BUG_ON(!(current->flags & PF_CPULOCK)); + current->flags &= ~PF_CPULOCK; + up(&cpucontrol); +} diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .24393-2.6.3-rc2-bk4-atomic-cpudown-ppc64.pre/arch/ppc64/kernel/prom.c .24393-2.6.3-rc2-bk4-atomic-cpudown-ppc64/arch/ppc64/kernel/prom.c --- .24393-2.6.3-rc2-bk4-atomic-cpudown-ppc64.pre/arch/ppc64/kernel/prom.c 2004-02-15 12:42:58.000000000 +1100 +++ .24393-2.6.3-rc2-bk4-atomic-cpudown-ppc64/arch/ppc64/kernel/prom.c 2004-02-15 16:01:33.000000000 +1100 @@ -1106,7 +1112,10 @@ prom_hold_cpus(unsigned long mem) prom_print_nl(); cpu_set(cpuid, RELOC(cpu_available_map)); cpu_set(cpuid, RELOC(cpu_possible_map)); - cpu_set(cpuid, RELOC(cpu_online_map)); + { + extern cpumask_t real_cpu_online_map; + cpu_set(cpuid, RELOC(real_cpu_online_map)); + } cpu_set(cpuid, RELOC(cpu_present_at_boot)); } diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .24393-2.6.3-rc2-bk4-atomic-cpudown-ppc64.pre/arch/ppc64/kernel/smp.c .24393-2.6.3-rc2-bk4-atomic-cpudown-ppc64/arch/ppc64/kernel/smp.c --- .24393-2.6.3-rc2-bk4-atomic-cpudown-ppc64.pre/arch/ppc64/kernel/smp.c 2004-02-15 12:42:58.000000000 +1100 +++ .24393-2.6.3-rc2-bk4-atomic-cpudown-ppc64/arch/ppc64/kernel/smp.c 2004-02-15 16:01:33.000000000 +1100 @@ -55,13 +56,14 @@ unsigned long cache_decay_ticks; /* Initialised so it doesn't end up in bss */ cpumask_t cpu_possible_map = CPU_MASK_NONE; -cpumask_t cpu_online_map = CPU_MASK_NONE; cpumask_t cpu_available_map = CPU_MASK_NONE; cpumask_t cpu_present_at_boot = CPU_MASK_NONE; -EXPORT_SYMBOL(cpu_online_map); +EXPORT_SYMBOL(get_cpu_online_map); EXPORT_SYMBOL(cpu_possible_map); +static DEFINE_PER_CPU(struct cpu, cpu_devices); + struct smp_ops_t *smp_ops; static volatile unsigned int cpu_callin_map[NR_CPUS]; diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .24393-2.6.3-rc2-bk4-atomic-cpudown-ppc64.pre/include/asm-ppc64/smp.h .24393-2.6.3-rc2-bk4-atomic-cpudown-ppc64/include/asm-ppc64/smp.h --- .24393-2.6.3-rc2-bk4-atomic-cpudown-ppc64.pre/include/asm-ppc64/smp.h 2004-02-04 15:39:12.000000000 +1100 +++ .24393-2.6.3-rc2-bk4-atomic-cpudown-ppc64/include/asm-ppc64/smp.h 2004-02-15 16:01:33.000000000 +1100 @@ -48,7 +48,6 @@ extern void smp_message_recv(int, struct * code to handle special cases for processor start up. */ extern cpumask_t cpu_present_at_boot; -extern cpumask_t cpu_online_map; extern cpumask_t cpu_possible_map; extern cpumask_t cpu_available_map;