Name: Hotplug CPU Remove for i386 Author: Rusty Russell Status: Tested on 2.5.44 Depends: Hotcpu/hotcpu-cpudown.patch.gz Depends: Hotcpu/wake_idle_cpu.patch.gz D: This introduces hotplug CPU capability for i386. It doesn't D: actually take the CPU down, it just leaves it looping with interrupts off. diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .9779-2.5.44-hotcpu-cpudown-i386.pre/arch/i386/kernel/process.c .9779-2.5.44-hotcpu-cpudown-i386/arch/i386/kernel/process.c --- .9779-2.5.44-hotcpu-cpudown-i386.pre/arch/i386/kernel/process.c 2002-10-15 15:30:51.000000000 +1000 +++ .9779-2.5.44-hotcpu-cpudown-i386/arch/i386/kernel/process.c 2002-10-28 17:28:35.000000000 +1100 @@ -34,8 +34,11 @@ #include #include #include +#include +#include #include +#include #include #include #include @@ -77,6 +80,8 @@ void enable_hlt(void) hlt_counter--; } +DECLARE_PER_CPU(int, cpu_die); + /* * We use this if we don't have any better * idle routine.. @@ -124,6 +129,36 @@ static void poll_idle (void) } } +#ifdef CONFIG_HOTPLUG +static inline void maybe_play_dead(void) +{ + if (unlikely(__get_cpu_var(cpu_die))) { + printk("Cpu %u Dust Dust Dust\n", smp_processor_id()); + /* Ack it */ + __get_cpu_var(cpu_die) = 2; + + /* Death loop */ + local_irq_disable(); + while (__get_cpu_var(cpu_die)) + cpu_relax(); + local_irq_enable(); + + /* Now, we missed any cache flush IPIs, so be safe. */ + local_flush_tlb(); + + /* Ack it by setting online bit */ + br_write_lock_irq(BR_CPU_LOCK); + set_bit(smp_processor_id(), &cpu_online_map); + br_write_unlock_irq(BR_CPU_LOCK); + printk("Cpu %u arisen\n", smp_processor_id()); + } +} +#else +static inline void maybe_play_dead(void) +{ +} +#endif /*CONFIG_HOTPLUG*/ + /* * The idle thread. There's no useful work to be * done, so just try to conserve power and have a @@ -138,8 +173,10 @@ void cpu_idle (void) if (!idle) idle = default_idle; irq_stat[smp_processor_id()].idle_timestamp = jiffies; - while (!need_resched()) + while (!need_resched()) { + maybe_play_dead(); idle(); + } schedule(); } } diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .9779-2.5.44-hotcpu-cpudown-i386.pre/arch/i386/kernel/smp.c .9779-2.5.44-hotcpu-cpudown-i386/arch/i386/kernel/smp.c --- .9779-2.5.44-hotcpu-cpudown-i386.pre/arch/i386/kernel/smp.c 2002-08-28 09:29:40.000000000 +1000 +++ .9779-2.5.44-hotcpu-cpudown-i386/arch/i386/kernel/smp.c 2002-10-28 17:28:35.000000000 +1100 @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -397,13 +398,18 @@ static void flush_tlb_others (unsigned l */ if (!cpumask) BUG(); - if ((cpumask & cpu_online_map) != cpumask) + if ((cpumask & cpu_callout_map) != cpumask) BUG(); if (cpumask & (1 << smp_processor_id())) BUG(); if (!mm) BUG(); + /* CPUs might have gone offline: don't worry about them. */ + cpumask &= cpu_online_map; + if (!cpumask) + return; + /* * i'm not happy about this global shared spinlock in the * MM hot path, but we'll see how contended it is. @@ -562,10 +568,15 @@ int smp_call_function (void (*func) (voi */ { struct call_data_struct data; - int cpus = num_online_cpus()-1; + int cpus; - if (!cpus) + br_read_lock(BR_CPU_LOCK); + cpus = num_online_cpus()-1; + + if (!cpus) { + br_read_unlock(BR_CPU_LOCK); return 0; + } data.func = func; data.info = info; @@ -589,6 +600,7 @@ int smp_call_function (void (*func) (voi barrier(); spin_unlock(&call_lock); + br_read_unlock(BR_CPU_LOCK); return 0; } diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .9779-2.5.44-hotcpu-cpudown-i386.pre/arch/i386/kernel/smpboot.c .9779-2.5.44-hotcpu-cpudown-i386/arch/i386/kernel/smpboot.c --- .9779-2.5.44-hotcpu-cpudown-i386.pre/arch/i386/kernel/smpboot.c 2002-10-28 17:27:59.000000000 +1100 +++ .9779-2.5.44-hotcpu-cpudown-i386/arch/i386/kernel/smpboot.c 2002-10-28 17:28:35.000000000 +1100 @@ -45,6 +45,8 @@ #include #include +#include +#include #include #include #include @@ -63,9 +65,10 @@ int __initdata phys_proc_id[NR_CPUS]; /* /* Bitmask of currently online CPUs */ unsigned long cpu_online_map; -static volatile unsigned long cpu_callin_map; +/* Initialize, although master cpu never calls in */ +static volatile unsigned long cpu_callin_map = 1; volatile unsigned long cpu_callout_map; -static unsigned long smp_commenced_mask; +static unsigned long smp_commenced_mask = 1; /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; @@ -460,7 +463,9 @@ int __init start_secondary(void *unused) * the local TLBs too. */ local_flush_tlb(); + br_write_lock_irq(BR_CPU_LOCK); set_bit(smp_processor_id(), &cpu_online_map); + br_write_unlock_irq(BR_CPU_LOCK); wmb(); return cpu_idle(); } @@ -1175,16 +1180,25 @@ void __init smp_prepare_cpus(unsigned in smp_boot_cpus(max_cpus); } +DEFINE_PER_CPU(int, cpu_die); + int __devinit __cpu_up(unsigned int cpu) { - /* This only works at boot for x86. See "rewrite" above. */ - if (test_bit(cpu, &smp_commenced_mask)) - return -ENOSYS; - /* In case one didn't come up */ if (!test_bit(cpu, &cpu_callin_map)) return -EIO; + /* Already up, and in maybe_play_dead now? */ + if (test_bit(cpu, &smp_commenced_mask)) { + per_cpu(cpu_die, cpu) = 0; + wmb(); + wake_idle_cpu(cpu); + while (!cpu_online(cpu)) + yield(); + printk("Cpu %u says it's online\n", cpu); + return 0; + } + /* Unleash the CPU! */ set_bit(cpu, &smp_commenced_mask); while (!test_bit(cpu, &cpu_online_map)) @@ -1194,13 +1208,33 @@ int __devinit __cpu_up(unsigned int cpu) int __cpu_disable(void) { - return -ENOSYS; + br_write_lock_irq(BR_CPU_LOCK); + cpu_online_map &= ~(1UL << smp_processor_id()); + br_write_unlock_irq(BR_CPU_LOCK); + + printk("Disabled cpu %u\n", smp_processor_id()); + return 0; } -/* Since we fail __cpu_disable, this is never called. */ void __cpu_die(unsigned int cpu) { - BUG(); + unsigned int start; + + /* Final threads can take some time to actually clean up */ + while (!idle_cpu(cpu)) + yield(); + + /* FIXME: Rebind interrupts which have affinity. --RR */ + per_cpu(cpu_die, cpu) = 1; + wmb(); + for (start = jiffies; time_before(jiffies, start + HZ); ) { + wake_idle_cpu(cpu); + /* They ack this in maybe_play_dead by incrementing it. */ + if (per_cpu(cpu_die, cpu) == 2) + return; + yield(); + } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); } void __init smp_cpus_done(unsigned int max_cpus)