Name: Hotplug CPU Remove for PPC64 Author: Rusty Russell Status: Experimental Depends: Depends: Hotcpu/hotcpu-cpudown.patch.gz D: This introduces hotplug CPU capability for PPC64. diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23604-linux-2.5.33/arch/ppc64/kernel/idle.c .23604-linux-2.5.33.updated/arch/ppc64/kernel/idle.c --- .23604-linux-2.5.33/arch/ppc64/kernel/idle.c 2002-09-04 15:28:43.000000000 +1000 +++ .23604-linux-2.5.33.updated/arch/ppc64/kernel/idle.c 2002-09-04 15:29:09.000000000 +1000 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,10 @@ #include #include #include +#include + +/* Set to true to kill the cpu */ +DEFINE_PER_CPU(int, ppc_cpu_die); #ifdef CONFIG_PPC_ISERIES @@ -142,6 +147,11 @@ int cpu_idle(void) set_need_resched(); } + /* We must re-read this each time */ + rmb(); + if (__get_cpu_var(ppc_cpu_die)) + rtas_stop_self(); + schedule(); } diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23604-linux-2.5.33/arch/ppc64/kernel/irq.c .23604-linux-2.5.33.updated/arch/ppc64/kernel/irq.c --- .23604-linux-2.5.33/arch/ppc64/kernel/irq.c 2002-09-04 15:28:48.000000000 +1000 +++ .23604-linux-2.5.33.updated/arch/ppc64/kernel/irq.c 2002-09-04 15:29:09.000000000 +1000 @@ -398,7 +398,7 @@ handle_irq_event(int irq, struct pt_regs } #ifdef CONFIG_SMP -extern unsigned int irq_affinity [NR_IRQS]; +extern unsigned long irq_affinity [NR_IRQS]; typedef struct { unsigned long cpu; @@ -654,9 +654,9 @@ static struct proc_dir_entry * irq_dir [ static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; #ifdef CONFIG_IRQ_ALL_CPUS -unsigned int irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = 0xffffffff}; +unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = 0xffffffff}; #else /* CONFIG_IRQ_ALL_CPUS */ -unsigned int irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = 0x00000000}; +unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = 0x00000000}; #endif /* CONFIG_IRQ_ALL_CPUS */ #define HEX_DIGITS 8 @@ -666,7 +666,7 @@ static int irq_affinity_read_proc (char { if (count < HEX_DIGITS+1) return -EINVAL; - return sprintf (page, "%08x\n", irq_affinity[(int)(long)data]); + return sprintf (page, "%016lx\n", irq_affinity[(int)(long)data]); } static unsigned int parse_hex_value (const char *buffer, diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23604-linux-2.5.33/arch/ppc64/kernel/rtas.c .23604-linux-2.5.33.updated/arch/ppc64/kernel/rtas.c --- .23604-linux-2.5.33/arch/ppc64/kernel/rtas.c 2002-07-25 10:13:04.000000000 +1000 +++ .23604-linux-2.5.33.updated/arch/ppc64/kernel/rtas.c 2002-09-04 15:29:09.000000000 +1000 @@ -186,6 +186,20 @@ rtas_call(int token, int nargs, int nret return (ulong)((nret > 0) ? rtas_args->rets[0] : 0); } +/* This version can't take the spinlock. */ +void rtas_stop_self(void) +{ + struct rtas_args *rtas_args = &(get_paca()->xRtas); + + rtas_args->token = rtas_token("stop-self"); + rtas_args->nargs = 0; + rtas_args->nret = 1; + rtas_args->rets = (rtas_arg_t *)&(rtas_args->args[0]); + + enter_rtas((void *)__pa((unsigned long)rtas_args)); + panic("Alas, I survived.\n"); +} + #define FLASH_BLOCK_LIST_VERSION (1UL) static void rtas_flash_firmware(void) diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23604-linux-2.5.33/arch/ppc64/kernel/smp.c .23604-linux-2.5.33.updated/arch/ppc64/kernel/smp.c --- .23604-linux-2.5.33/arch/ppc64/kernel/smp.c 2002-09-04 15:28:48.000000000 +1000 +++ .23604-linux-2.5.33.updated/arch/ppc64/kernel/smp.c 2002-09-04 15:31:13.000000000 +1000 @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include @@ -70,6 +72,7 @@ static unsigned long iSeries_smp_message void xics_setup_cpu(void); void xics_cause_IPI(int cpu); +void xics_migrate_irqs_away(void); /* * XICS only has a single IPI, so encode the messages per CPU @@ -292,7 +295,8 @@ smp_xics_message_pass(int target, int ms int i; for (i = 0; i < NR_CPUS; ++i) { - if (!cpu_online(i)) + /* We can still target offline CPUs specifically. */ + if (target != i && !cpu_online(i)) continue; if (target == MSG_ALL || target == i @@ -343,6 +347,107 @@ static void __devinit pSeries_take_timeb spin_unlock(&timebase_lock); } +/* Kill this cpu */ +void pSeries_cpu_die(void) +{ + printk("%u: Dust dust dust.\n", smp_processor_id()); + local_irq_disable(); + rtas_stop_self(); +} + +/* FIXME: Boot should put CPUs into rtas stopped state so starting + * them is always the same. --RR */ +static unsigned long cpu_stopped_mask; + +/* Return CPU to its maker. */ +void pSeries_cpu_go_home(unsigned int cpu) +{ + unsigned long cpu_status; + struct device_node *np; + unsigned int *ireg, len; + int status; + + do { + status = rtas_call(rtas_token("query-cpu-stopped-state"), + 1, 2, + &cpu_status, (unsigned long)cpu); + if (status != 0) { + printk(KERN_ERR + "RTAS query-cpu-stopped-state failed: %i\n", + status); + return; + } + /* cpu_status 2 means not doing stop-self. */ + printk("query-cpu-stopped-state %u : %i, %lu\n", + cpu, status, cpu_status); + mdelay(10); + } while (cpu_status != 0); + + /* Find the index number it wants to refer to the CPU */ + np = find_path_device("/cpus"); + if (!np) { + printk(KERN_ERR "Could not find /cpus in device tree!\n"); + return; + } + ireg = (unsigned int *)get_property(np, "ibm,drc-indexes", &len); + if (!ireg) { + printk(KERN_ERR "Could not find ibm,drc-indexec in cpus!\n"); + return; + } + /* First element in array is the number of CPUs. */ + if (cpu >= ireg[0]) { + printk(KERN_ERR "Um, cpu %u of %u? Inconceivable!\n", + cpu, ireg[0]); + return; + } + + printk("About to try doing isolate!\n"); + +#if 0 /* Not yet... */ + /* Now isolate it. */ + do { + status = rtas_call(rtas_token("set-indicator"), 3, 1, + NULL, + 9001UL, /* isolation-state */ + ireg[cpu+1], /* index of cpu "connector" */ + 0UL); /* = Isolate */ + printk("set-indicator isolate returned %i\n", status); + } while (status == -2); + + /* And put it back in the pool. */ + do { + status = rtas_call(rtas_token("set-indicator"), 3, 1, + NULL, + 9003UL, /* allocation-state */ + ireg[cpu+1], /* index of cpu "connector" */ + 0UL); /* = Unusable */ + printk("set-indicator unusable returned %i\n", status); + } while (status == -2); +#endif + + cpu_stopped_mask |= (1 << cpu); +} + +DECLARE_PER_CPU(int, ppc_cpu_die); + +/* CPU has been stopped (as separate from spinning after boot). */ +static void __devinit pSeries_kick_cpu(unsigned int cpu) +{ + int status; + extern void (*pseries_secondary_smp_init)(int cpu); + + /* Reset flag so it doesn't die immediately */ + per_cpu(ppc_cpu_die, cpu) = 0; + wmb(); + status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL, + (unsigned long)cpu, + pseries_secondary_smp_init, + (unsigned long)cpu); + if (status != 0) + printk(KERN_ERR "start-cpu failed: %i\n", status); + /* xProcStart is already initialized, so will fall thru */ +} + /* This is called very early */ void __init smp_init_pSeries(void) { @@ -351,9 +456,13 @@ void __init smp_init_pSeries(void) if (naca->interrupt_controller == IC_OPEN_PIC) { smp_ops->message_pass = smp_openpic_message_pass; smp_ops->probe = smp_chrp_probe; +#if 0 /* FIXME: write this. */ + smp_ops->migrate_irqs_away = smp_openpic_migrate_irqs_away; +#endif } else { smp_ops->message_pass = smp_xics_message_pass; smp_ops->probe = smp_xics_probe; + smp_ops->migrate_irqs_away = xics_migrate_irqs_away; } if (naca->platform == PLATFORM_PSERIES) { @@ -363,6 +472,8 @@ void __init smp_init_pSeries(void) smp_ops->kick_cpu = smp_kick_cpu; smp_ops->setup_cpu = pSeries_setup_cpu; + smp_ops->cpu_die = pSeries_cpu_die; + smp_ops->cpu_go_home = pSeries_cpu_go_home; } void smp_local_timer_interrupt(struct pt_regs * regs) @@ -607,27 +718,41 @@ void __init smp_prepare_cpus(unsigned in smp_space_timers(max_cpus); } +/* FIXME: If we had a decent way of really killing idle task from + another cpu, we wouldn't need this. --RR */ +static unsigned long idle_task_mask; /* = 0 */ + int __devinit __cpu_up(unsigned int cpu) { struct pt_regs regs; struct task_struct *p; int c; - /* create a process for the processor */ - /* only regs.msr is actually used, and 0 is OK for it */ - memset(®s, 0, sizeof(struct pt_regs)); - p = do_fork(CLONE_VM|CLONE_IDLETASK, 0, ®s, 0, NULL); - if (IS_ERR(p)) - panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p)); + if (!(idle_task_mask & (1 << cpu))) { + /* create a process for the processor */ + /* only regs.msr is actually used, and 0 is OK for it */ + memset(®s, 0, sizeof(struct pt_regs)); + p = do_fork(CLONE_VM|CLONE_IDLETASK, 0, ®s, 0, NULL); + if (IS_ERR(p)) { + printk("failed fork for CPU %u: %li", cpu, PTR_ERR(p)); + return PTR_ERR(p); + } - init_idle(p, cpu); - unhash_process(p); + init_idle(p, cpu); + unhash_process(p); - paca[cpu].xCurrent = (u64)p; - current_set[cpu] = p->thread_info; + paca[cpu].xCurrent = (u64)p; + current_set[cpu] = p->thread_info; + idle_task_mask |= (1 << cpu); + } - /* wake up cpus */ - smp_ops->kick_cpu(cpu); + /* FIXME: should really have kick_cpu per-cpu. --RR */ + if (cpu_stopped_mask & (1 << cpu)) + pSeries_kick_cpu(cpu); + else { + /* wake up cpu */ + smp_ops->kick_cpu(cpu); + } /* * wait to see if the cpu made a callin (is actually up). @@ -671,6 +796,34 @@ int __devinit start_secondary(void *unus return cpu_idle(NULL); } +/* Disable the CPU. */ +int __devinit __cpu_disable(void) +{ + unsigned int cpu = smp_processor_id(); + + printk("Removing cpu from ONLINE map\n"); + br_write_lock_irq(BR_CPU_LOCK); + clear_bit(cpu, cpu_online_map); + br_write_unlock_irq(BR_CPU_LOCK); + + if (smp_ops->migrate_irqs_away) + smp_ops->migrate_irqs_away(); + return 0; +} + +/* Kill the CPU. */ +void __devinit __cpu_die(unsigned int cpu) +{ + printk("Killing the CPU.\n"); + per_cpu(ppc_cpu_die, cpu) = 1; + wmb(); + /* Wake up idle loop. */ + smp_send_reschedule(cpu); + + if (smp_ops->cpu_go_home) + smp_ops->cpu_go_home(cpu); +} + int setup_profiling_timer(unsigned int multiplier) { return 0; diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23604-linux-2.5.33/arch/ppc64/kernel/xics.c .23604-linux-2.5.33.updated/arch/ppc64/kernel/xics.c --- .23604-linux-2.5.33/arch/ppc64/kernel/xics.c 2002-09-04 15:28:48.000000000 +1000 +++ .23604-linux-2.5.33.updated/arch/ppc64/kernel/xics.c 2002-09-04 15:29:09.000000000 +1000 @@ -23,6 +23,8 @@ #include "xics.h" #include +extern unsigned long irq_affinity [NR_IRQS]; + void xics_enable_irq(u_int irq); void xics_disable_irq(u_int irq); void xics_mask_and_ack_irq(u_int irq); @@ -468,3 +470,38 @@ void xics_set_affinity(unsigned int virq out: spin_unlock_irqrestore(&desc->lock, flags); } + +/* Migrate interrupts away from this CPU. */ +void xics_migrate_irqs_away(void) +{ + unsigned int virq; + + printk("Migrating IRQs away\n"); + for (virq = XICS_IRQ_OFFSET; virq < NR_IRQS; virq++) { + irq_desc_t *desc = irq_desc + virq; + + /* We need to get IPIs still. */ + if (virt_irq_to_real(virq - XICS_IRQ_OFFSET) == XICS_IPI) { + printk("Not irq %x\n", virq); + continue; + } + + /* FIXME: Currently affinity is only FF... or one bit. --RR */ + spin_lock_irq(&desc->lock); + if (irq_desc[virq].handler->set_affinity + && (irq_affinity[virq] & (1 << smp_processor_id()))) { + unsigned int new_cpu; + irq_affinity[virq] &= ~(1L << smp_processor_id()); + if (!irq_affinity[virq]) { + printk("Irq %x affinity broken off cpu %u\n", + virq, smp_processor_id()); + irq_affinity[virq] = ~(1L<set_affinity(virq, 1 << new_cpu); + } + } + spin_unlock_irq(&desc->lock); + } +} diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23604-linux-2.5.33/include/asm-ppc64/machdep.h .23604-linux-2.5.33.updated/include/asm-ppc64/machdep.h --- .23604-linux-2.5.33/include/asm-ppc64/machdep.h 2002-09-04 15:28:44.000000000 +1000 +++ .23604-linux-2.5.33.updated/include/asm-ppc64/machdep.h 2002-09-04 15:29:09.000000000 +1000 @@ -26,6 +26,9 @@ struct smp_ops_t { void (*setup_cpu)(int nr); void (*take_timebase)(void); void (*give_timebase)(void); + void (*cpu_die)(void); + void (*cpu_go_home)(unsigned int cpu); + void (*migrate_irqs_away)(void); }; #endif diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23604-linux-2.5.33/include/asm-ppc64/rtas.h .23604-linux-2.5.33.updated/include/asm-ppc64/rtas.h --- .23604-linux-2.5.33/include/asm-ppc64/rtas.h 2002-06-09 19:37:16.000000000 +1000 +++ .23604-linux-2.5.33.updated/include/asm-ppc64/rtas.h 2002-09-04 15:29:09.000000000 +1000 @@ -163,6 +163,7 @@ extern void call_rtas_display_status(cha extern void rtas_restart(char *cmd); extern void rtas_power_off(void); extern void rtas_halt(void); +void rtas_stop_self(void) __attribute__((noreturn)); extern struct proc_dir_entry *rtas_proc_dir; diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .23604-linux-2.5.33/include/asm-ppc64/smp.h .23604-linux-2.5.33.updated/include/asm-ppc64/smp.h --- .23604-linux-2.5.33/include/asm-ppc64/smp.h 2002-09-04 15:28:48.000000000 +1000 +++ .23604-linux-2.5.33.updated/include/asm-ppc64/smp.h 2002-09-04 15:29:09.000000000 +1000 @@ -63,6 +63,9 @@ static inline int any_online_cpu(const u return NR_CPUS; } +extern int __cpu_disable(void); +extern void __cpu_die(unsigned int cpu); + extern volatile unsigned long cpu_callin_map[NR_CPUS]; #define smp_processor_id() (get_paca()->xPacaIndex)