Name: Add basic support to enable CPU hotplug for IA64 Author: Ashok Raj (Intel Corporation) Depends: Hotcpu/hotcpu-ia64-cpu-topology.patch.gz Depends: Depends: Supports basic ability to enable hotplug functions for IA64. Code is just evolving, and there are several loose ends to tie up. Caveat: disable module_init() in kthread.c, it does not seem to work right - Support logical online and offline - Boot CPU cannot be removed in this release - Handling for CPEI/PMI/MCA special cases not yet done. - Issues with interrupt migration can cause interrupts to be lost in the case of stress. (solution under development) - some more entries in proc should migrate to sysfs (sometime) --- linux-2.6.5-rc2-lhcs-root/arch/ia64/Kconfig | 8 + linux-2.6.5-rc2-lhcs-root/arch/ia64/kernel/irq.c | 36 +++++ linux-2.6.5-rc2-lhcs-root/arch/ia64/kernel/process.c | 43 ++++++ linux-2.6.5-rc2-lhcs-root/arch/ia64/kernel/smp.c | 14 ++ linux-2.6.5-rc2-lhcs-root/arch/ia64/kernel/smpboot.c | 123 +++++++++++++++++-- linux-2.6.5-rc2-lhcs-root/arch/ia64/kernel/time.c | 9 + 6 files changed, 224 insertions(+), 9 deletions(-) diff -puN arch/ia64/Kconfig~hotcpu_ia64 arch/ia64/Kconfig diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .26733-linux-2.6.5-rc2-bk3/arch/ia64/Kconfig .26733-linux-2.6.5-rc2-bk3.updated/arch/ia64/Kconfig --- .26733-linux-2.6.5-rc2-bk3/arch/ia64/Kconfig 2004-03-20 21:20:49.000000000 +1100 +++ .26733-linux-2.6.5-rc2-bk3.updated/arch/ia64/Kconfig 2004-03-24 18:20:56.000000000 +1100 @@ -417,6 +417,14 @@ config PCI_DOMAINS source "drivers/pci/Kconfig" +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && HOTPLUG && EXPERIMENTAL + ---help--- + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/cpu. + Say N. + source "drivers/pci/hotplug/Kconfig" source "drivers/pcmcia/Kconfig" diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .26733-linux-2.6.5-rc2-bk3/arch/ia64/kernel/irq.c .26733-linux-2.6.5-rc2-bk3.updated/arch/ia64/kernel/irq.c --- .26733-linux-2.6.5-rc2-bk3/arch/ia64/kernel/irq.c 2004-03-12 07:56:38.000000000 +1100 +++ .26733-linux-2.6.5-rc2-bk3.updated/arch/ia64/kernel/irq.c 2004-03-24 18:20:56.000000000 +1100 @@ -35,6 +35,8 @@ #include #include #include +#include +#include #include #include @@ -45,6 +47,8 @@ #include #include #include +#include +#include @@ -432,6 +436,7 @@ void enable_irq(unsigned int irq) } EXPORT_SYMBOL(enable_irq); + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -1000,6 +1005,37 @@ static int irq_affinity_write_proc (stru #endif /* CONFIG_SMP */ +#ifdef CONFIG_HOTPLUG_CPU +void fixup_irqs(void) +{ + cpumask_t mask; + unsigned int irq, redir; + irq_desc_t *desc; + static int warned; + + for (irq = 0; irq < NR_IRQS; irq++) { + cpus_and(mask, irq_affinity[irq], cpu_online_map); + if (any_online_cpu(mask) == NR_CPUS) { + printk("Breaking affinity for irq %ui\n", irq); + mask = any_online_cpu(cpu_online_map); + } + desc = irq_descp(irq); + if (desc->handler->set_affinity) { + redir = irq_redir[irq]; + desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0), + mask); + } + else if (desc->action && !(warned++)) + printk("Cannot set affinity for irq %i\n", irq); + } + local_flush_tlb_all(); + max_xtp(); + local_irq_disable(); + __get_cpu_var(cpu_state) = CPU_DEAD; +} + +#endif + static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .26733-linux-2.6.5-rc2-bk3/arch/ia64/kernel/process.c .26733-linux-2.6.5-rc2-bk3.updated/arch/ia64/kernel/process.c --- .26733-linux-2.6.5-rc2-bk3/arch/ia64/kernel/process.c 2004-03-20 21:20:50.000000000 +1100 +++ .26733-linux-2.6.5-rc2-bk3.updated/arch/ia64/kernel/process.c 2004-03-24 18:20:56.000000000 +1100 @@ -9,6 +9,8 @@ #include #include +#include +#include #include #include #include @@ -22,6 +24,7 @@ #include #include #include +#include #include #include @@ -30,8 +33,12 @@ #include #include #include +#include +#include +#include #include #include +#include #ifdef CONFIG_PERFMON # include @@ -171,6 +178,39 @@ default_idle (void) #endif } +#ifdef CONFIG_HOTPLUG_CPU +/* We don't actually take CPU down, just spin without interrupts. */ +static inline void play_dead(void) +{ + /* Ack it */ + __get_cpu_var(cpu_state) = CPU_DEAD; + + /* We shouldn't have to disable interrupts while dead, but + * some interrupts just don't seem to go away, and this makes + * it "work" for testing purposes. */ + //printk ("Playdead max_xtp\n"); + max_xtp(); + local_irq_disable(); + /* Death loop */ + while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) + cpu_relax(); + + /* + * Enable timer interrupts from now on + * Not required if we put processor in SAL_BOOT_RENDEZ mode. + */ + ia64_set_itv(IA64_TIMER_VECTOR); + local_flush_tlb_all(); + cpu_set(smp_processor_id(), cpu_online_map); + local_irq_enable(); +} +#else +static inline void play_dead(void) +{ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + void __attribute__((noreturn)) cpu_idle (void *unused) { @@ -186,7 +226,6 @@ cpu_idle (void *unused) if (!need_resched()) min_xtp(); #endif - while (!need_resched()) { if (mark_idle) (*mark_idle)(1); @@ -201,6 +240,8 @@ cpu_idle (void *unused) #endif schedule(); check_pgt_cache(); + if (cpu_is_offline(smp_processor_id())) + play_dead(); } } diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .26733-linux-2.6.5-rc2-bk3/arch/ia64/kernel/smp.c .26733-linux-2.6.5-rc2-bk3.updated/arch/ia64/kernel/smp.c --- .26733-linux-2.6.5-rc2-bk3/arch/ia64/kernel/smp.c 2004-02-18 23:54:12.000000000 +1100 +++ .26733-linux-2.6.5-rc2-bk3.updated/arch/ia64/kernel/smp.c 2004-03-24 18:20:56.000000000 +1100 @@ -71,10 +71,11 @@ static volatile struct call_data_struct /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned; +extern void cpu_halt (void); + static void stop_this_cpu (void) { - extern void cpu_halt (void); /* * Remove this CPU: */ @@ -84,6 +85,17 @@ stop_this_cpu (void) cpu_halt(); } +void +cpu_die(void) +{ + max_xtp(); + local_irq_disable(); + cpu_halt(); + /* Should never be here */ + BUG(); + for (;;); +} + irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs) { diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .26733-linux-2.6.5-rc2-bk3/arch/ia64/kernel/smpboot.c .26733-linux-2.6.5-rc2-bk3.updated/arch/ia64/kernel/smpboot.c --- .26733-linux-2.6.5-rc2-bk3/arch/ia64/kernel/smpboot.c 2004-03-24 18:20:54.000000000 +1100 +++ .26733-linux-2.6.5-rc2-bk3.updated/arch/ia64/kernel/smpboot.c 2004-03-24 18:20:56.000000000 +1100 @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include #include @@ -26,6 +28,7 @@ #include #include #include +#include #include #include @@ -75,6 +78,11 @@ extern unsigned long ia64_iobase; task_t *task_for_booting_cpu; +/* + * State for each CPU + */ +DEFINE_PER_CPU(int, cpu_state) = { 0 }; + /* Bitmask of currently online CPUs */ cpumask_t cpu_online_map; EXPORT_SYMBOL(cpu_online_map); @@ -358,29 +366,51 @@ fork_by_hand (void) return copy_process(CLONE_VM|CLONE_IDLETASK, 0, 0, 0, NULL, NULL); } +struct create_idle { + struct task_struct *idle; + struct completion done; +}; + +void +do_fork_idle(void *_c_idle) +{ + struct create_idle *c_idle = _c_idle; + + c_idle->idle = fork_by_hand(); + complete(&c_idle->done); +} + static int __devinit do_boot_cpu (int sapicid, int cpu) { - struct task_struct *idle; int timeout; + struct create_idle c_idle; + DECLARE_WORK(work, do_fork_idle, &c_idle); + init_completion(&c_idle.done); /* * We can't use kernel_thread since we must avoid to reschedule the child. */ - idle = fork_by_hand(); - if (IS_ERR(idle)) + if (!keventd_up() || current_is_keventd()) + work.func(work.data); + else { + schedule_work(&work); + wait_for_completion(&c_idle.done); + } + + if (IS_ERR(c_idle.idle)) panic("failed fork for CPU %d", cpu); - wake_up_forked_process(idle); + wake_up_forked_process(c_idle.idle); /* * We remove it from the pidhash and the runqueue * once we got the process: */ - init_idle(idle, cpu); + init_idle(c_idle.idle, cpu); - unhash_process(idle); + unhash_process(c_idle.idle); - task_for_booting_cpu = idle; + task_for_booting_cpu = c_idle.idle; Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid); @@ -544,6 +574,74 @@ void __devinit smp_prepare_boot_cpu(void cpu_set(smp_processor_id(), cpu_callin_map); } +#ifdef CONFIG_HOTPLUG_CPU +extern void fixup_irqs(void); +/* must be called with cpucontrol mutex held */ +static int __devinit cpu_enable(unsigned int cpu) +{ + per_cpu(cpu_state,cpu) = CPU_UP_PREPARE; + wmb(); + + while (!cpu_online(cpu)) + cpu_relax(); + return 0; +} + +int __cpu_disable(void) +{ + int cpu = smp_processor_id(); + + /* + * dont permit boot processor for now + */ + if (cpu == 0) + return -EBUSY; + + fixup_irqs(); + ia64_set_itv(1 << 16); + printk ("Disabled cpu %u\n", smp_processor_id()); + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + unsigned int i; + + for (i = 0; i < 100; i++) { + /* They ack this in play_dead by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) + { + /* + * TBD: Enable this when physical removal + * or when we put the processor is put in + * SAL_BOOT_RENDEZ mode + * cpu_clear(cpu, cpu_callin_map); + */ + return; + } + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(HZ/10); + } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); +} +#else /* !CONFIG_HOTPLUG_CPU */ +static int __devinit cpu_enable(unsigned int cpu) +{ + return 0; +} + +int __cpu_disable(void) +{ + return -ENOSYS; +} + +void __cpu_die(unsigned int cpu) +{ + /* We said "no" in __cpu_disable */ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + void smp_cpus_done (unsigned int dummy) { @@ -572,6 +670,17 @@ __cpu_up (unsigned int cpu) if (sapicid == -1) return -EINVAL; + /* + * Already booted.. just enable and get outa idle lool + */ + if (cpu_isset(cpu, cpu_callin_map)) + { + cpu_enable(cpu); + local_irq_enable(); + while (!cpu_isset(cpu, cpu_online_map)) + mb(); + return 0; + } /* Processor goes to start_secondary(), sets online flag */ ret = do_boot_cpu(sapicid, cpu); if (ret < 0) diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .26733-linux-2.6.5-rc2-bk3/arch/ia64/kernel/time.c .26733-linux-2.6.5-rc2-bk3.updated/arch/ia64/kernel/time.c --- .26733-linux-2.6.5-rc2-bk3/arch/ia64/kernel/time.c 2004-03-24 18:20:54.000000000 +1100 +++ .26733-linux-2.6.5-rc2-bk3.updated/arch/ia64/kernel/time.c 2004-03-24 18:20:56.000000000 +1100 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -254,6 +255,14 @@ timer_interrupt (int irq, void *dev_id, ia64_do_profile(regs); + if (unlikely(cpu_is_offline(smp_processor_id()))) { + /* + * Disable ITV, future timer interrupts + */ + ia64_set_itv(1<<16); + return IRQ_HANDLED; + } + while (1) { #ifdef CONFIG_SMP