Name: Rewritten Hot-plug CPU Core Infrastructure for ia64 Author: Kimio Suganuma Status: Tested on 2.5.14 Depends: Hotcpu/init-removal-ia64.patch.gz D: This modifies the ia64 boot sequence to "plug in" CPUs one at a D: time, and adds support for more general plugging on ia64. diff -Nur linux-2.5.14-base/arch/ia64/kernel/acpi.c linux-2.5.14-new/arch/ia64/kernel/acpi.c --- linux-2.5.14-base/arch/ia64/kernel/acpi.c Sun May 5 20:38:02 2002 +++ linux-2.5.14-new/arch/ia64/kernel/acpi.c Fri May 24 16:27:39 2002 @@ -621,6 +621,8 @@ /* Make boot-up look pretty */ printk("%d CPUs available, %d CPUs total\n", available_cpus, total_cpus); + smp_build_cpu_map(); + return 0; } diff -Nur linux-2.5.14-base/arch/ia64/kernel/irq.c linux-2.5.14-new/arch/ia64/kernel/irq.c --- linux-2.5.14-base/arch/ia64/kernel/irq.c Fri May 24 16:23:04 2002 +++ linux-2.5.14-new/arch/ia64/kernel/irq.c Fri May 24 16:27:39 2002 @@ -1112,7 +1112,7 @@ static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; -static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; +unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; void set_irq_affinity_info(int irq, int hwid, int redir) diff -Nur linux-2.5.14-base/arch/ia64/kernel/setup.c linux-2.5.14-new/arch/ia64/kernel/setup.c --- linux-2.5.14-base/arch/ia64/kernel/setup.c Fri May 24 16:23:09 2002 +++ linux-2.5.14-new/arch/ia64/kernel/setup.c Fri May 24 16:27:39 2002 @@ -72,6 +72,7 @@ #define COMMAND_LINE_SIZE 512 char saved_command_line[COMMAND_LINE_SIZE]; /* used in proc filesystem */ +static void *my_cpu_data[NR_CPUS]; /* * Entries defined so far: @@ -286,8 +287,10 @@ void __init setup_arch (char **cmdline_p) { + int cpu; extern unsigned long ia64_iobase; unsigned long phys_iobase; + extern char __per_cpu_start[], __per_cpu_end[]; unw_init(); @@ -342,6 +345,11 @@ cpu_physical_id(0) = hard_smp_processor_id(); #endif + /* Move this from cpu_init() to support CPU hot-adding */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + my_cpu_data[cpu] = alloc_bootmem_pages(__per_cpu_end - __per_cpu_start); + } + cpu_init(); /* initialize the bootstrap CPU */ #ifdef CONFIG_ACPI_BOOT @@ -523,6 +531,8 @@ /* start_kernel() requires this... */ } +unsigned long cpu_initialized __devinitdata = 0; + /* * cpu_init() initializes state that is per-CPU. This function acts * as a 'CPU state barrier', nothing should get across. @@ -536,20 +546,23 @@ pal_vm_info_2_u_t vmi; unsigned int max_ctx; struct cpuinfo_ia64 *my_cpu_info; - void *my_cpu_data; #ifdef CONFIG_SMP extern char __per_cpu_end[]; int cpu = smp_processor_id(); - my_cpu_data = alloc_bootmem_pages(__per_cpu_end - __per_cpu_start); - memcpy(my_cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start); - __per_cpu_offset[cpu] = (char *) my_cpu_data - __per_cpu_start; - my_cpu_info = my_cpu_data + ((char *) &cpu_info - __per_cpu_start); + if (test_and_set_bit(cpu, &cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); + for (;;) __sti(); + } + + memcpy(my_cpu_data[cpu], __phys_per_cpu_start, __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char *) my_cpu_data[cpu] - __per_cpu_start; + my_cpu_info = my_cpu_data[cpu] + ((char *) &cpu_info - __per_cpu_start); #else my_cpu_data = __phys_per_cpu_start; + my_cpu_info = my_cpu_data[0] + ((char *) &cpu_info - __per_cpu_start); #endif - my_cpu_info = my_cpu_data + ((char *) &cpu_info - __per_cpu_start); /* * We can't pass "local_cpu_data" to identify_cpu() because we haven't called @@ -581,7 +594,7 @@ if (current->mm) BUG(); - ia64_mmu_init(my_cpu_data); + ia64_mmu_init(my_cpu_data[cpu]); #ifdef CONFIG_IA32_SUPPORT /* initialize global ia32 state - CR0 and CR4 */ diff -Nur linux-2.5.14-base/arch/ia64/kernel/smp.c linux-2.5.14-new/arch/ia64/kernel/smp.c --- linux-2.5.14-base/arch/ia64/kernel/smp.c Fri May 24 16:23:04 2002 +++ linux-2.5.14-new/arch/ia64/kernel/smp.c Fri May 24 16:27:39 2002 @@ -242,6 +242,9 @@ return -EBUSY; } + if (!cpu_online(cpuid)) + return -EFAULT; + data.func = func; data.info = info; atomic_set(&data.started, 0); diff -Nur linux-2.5.14-base/arch/ia64/kernel/smpboot.c linux-2.5.14-new/arch/ia64/kernel/smpboot.c --- linux-2.5.14-base/arch/ia64/kernel/smpboot.c Fri May 24 16:23:09 2002 +++ linux-2.5.14-new/arch/ia64/kernel/smpboot.c Fri May 24 16:27:39 2002 @@ -70,18 +70,19 @@ extern void start_ap(void); extern unsigned long ia64_iobase; -int cpucount; +unsigned long phys_cpu_present_map; task_t *task_for_booting_cpu; /* Setup configured maximum number of CPUs to activate */ static int max_cpus = -1; /* Bitmask of currently online CPUs */ -volatile unsigned long cpu_online_map; +volatile unsigned long cpu_online_map = 1; /* which logical CPU number maps to which CPU (physical APIC ID) */ volatile int ia64_cpu_to_sapicid[NR_CPUS]; +extern unsigned long cpu_initialized; static volatile unsigned long cpu_callin_map; struct smp_boot_data smp_boot_data __initdata; @@ -95,35 +96,6 @@ unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ -/* - * Setup routine for controlling SMP activation - * - * Command-line option of "nosmp" or "maxcpus=0" will disable SMP - * activation entirely (the MPS table probe still happens, though). - * - * Command-line option of "maxcpus=", where is an integer - * greater than 0, limits the maximum number of CPUs activated in - * SMP mode to . - */ - -static int __init -nosmp (char *str) -{ - max_cpus = 0; - return 1; -} - -__setup("nosmp", nosmp); - -static int __init -maxcpus (char *str) -{ - get_option(&str, &max_cpus); - return 1; -} - -__setup("maxcpus=", maxcpus); - static int __init nointroute (char *str) { @@ -316,6 +288,7 @@ smp_callin (void) { int cpuid, phys_id; + int master_cpu; extern void ia64_init_itm(void); #ifdef CONFIG_PERFMON @@ -337,7 +310,9 @@ */ Dprintk("Going to syncup ITC with BP.\n"); - ia64_sync_itc(0); + master_cpu = ffz(~cpu_online_map); + printk("call ia64_sync_itc(%d)\n", master_cpu); + ia64_sync_itc(master_cpu); /* * Get our bogomips. */ @@ -380,11 +355,7 @@ efi_map_pal_code(); cpu_init(); smp_callin(); - Dprintk("CPU %d is set to go.\n", smp_processor_id()); - while (!atomic_read(&smp_commenced)) - cpu_relax(); - Dprintk("CPU %d is starting idle.\n", smp_processor_id()); return cpu_idle(); } @@ -398,13 +369,12 @@ return do_fork(CLONE_VM|CLONE_IDLETASK, 0, 0, 0); } -static void __devinit -do_boot_cpu (int sapicid) +static int __devinit +do_boot_cpu (int sapicid, int cpu) { struct task_struct *idle; - int timeout, cpu; + int timeout; - cpu = ++cpucount; /* * We can't use kernel_thread since we must avoid to * reschedule the child. @@ -421,8 +391,6 @@ */ init_idle(idle, cpu); - ia64_cpu_to_sapicid[cpu] = sapicid; - unhash_process(idle); task_for_booting_cpu = idle; @@ -445,13 +413,15 @@ if (test_bit(cpu, &cpu_callin_map)) { /* number CPUs logically, starting from 1 (BSP is 0) */ printk("CPU%d: ", cpu); - /*print_cpu_info(&cpu_data[cpu]); */ printk("CPU has booted.\n"); } else { printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid); ia64_cpu_to_sapicid[cpu] = -1; - cpucount--; + clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ + clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */ + return -EINVAL; } + return 0; } unsigned long cache_decay_ticks; /* # of ticks an idle task is considered cache-hot */ @@ -466,21 +436,45 @@ } /* + * Initialize the logical to physical CPU number mapping + */ +void __init +smp_build_cpu_map(void) +{ + int sapicid, cpu, i; + int boot_cpu_id = hard_smp_processor_id(); + + for (cpu = 0; cpu < NR_CPUS; cpu++) + ia64_cpu_to_sapicid[cpu] = -1; + + ia64_cpu_to_sapicid[0] = boot_cpu_id; + phys_cpu_present_map = 1; + + for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) { + sapicid = smp_boot_data.cpu_phys_id[i]; + if (sapicid == -1 || sapicid == boot_cpu_id) + continue; + phys_cpu_present_map |= (1 << cpu); + ia64_cpu_to_sapicid[cpu] = sapicid; + cpu++; + } +} + +/* * Cycle through the APs sending Wakeup IPIs to boot each. */ void __init -smp_boot_cpus (void) +smp_prepare_cpus (unsigned int max_cpu_num) { - int sapicid, cpu; int boot_cpu_id = hard_smp_processor_id(); /* - * Initialize the logical to physical CPU number mapping - * and the per-CPU profiling counter/multiplier + * Initialize the per-CPU profiling counter/multiplier */ - for (cpu = 0; cpu < NR_CPUS; cpu++) - ia64_cpu_to_sapicid[cpu] = -1; + if (max_cpu_num != -1) + max_cpus = max_cpu_num; + smp_setup_percpu_timer(); /* @@ -490,7 +484,6 @@ set_bit(0, &cpu_callin_map); local_cpu_data->loops_per_jiffy = loops_per_jiffy; - ia64_cpu_to_sapicid[0] = boot_cpu_id; printk("Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id); @@ -501,57 +494,34 @@ /* * If SMP should be disabled, then really disable it! */ - if (!max_cpus || (max_cpus < -1)) { + if (!max_cpus) { printk(KERN_INFO "SMP mode deactivated.\n"); - cpu_online_map = 1; - goto smp_done; + cpu_online_map = phys_cpu_present_map = 1; + return; } - if (max_cpus != -1) - printk (KERN_INFO "Limiting CPUs to %d\n", max_cpus); +} - if (smp_boot_data.cpu_count > 1) { +void +smp_cpus_done(unsigned int dummy) +{ + int cpu; + unsigned long bogosum = 0; - printk(KERN_INFO "SMP: starting up secondaries.\n"); - - for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) { - /* - * Don't even attempt to start the boot CPU! - */ - sapicid = smp_boot_data.cpu_phys_id[cpu]; - if ((sapicid == -1) || (sapicid == hard_smp_processor_id())) - continue; - - if ((max_cpus > 0) && (cpucount + 1 >= max_cpus)) - break; - - do_boot_cpu(sapicid); - - /* - * Make sure we unmap all failed CPUs - */ - if (ia64_cpu_to_sapicid[cpu] == -1) - printk("phys CPU#%d not responding - cannot use it.\n", cpu); - } + /* + * Allow the user to impress friends. + */ - /* - * Allow the user to impress friends. - */ - - printk("Before bogomips.\n"); - if (!cpucount) { - printk(KERN_ERR "Error: only one processor found.\n"); - } else { - unsigned long bogosum = 0; - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (cpu_online_map & (1<loops_per_jiffy; + for (cpu = 0; cpu < NR_CPUS; cpu++) + if (cpu_online(cpu)) + bogosum += cpu_data(cpu)->loops_per_jiffy; - printk(KERN_INFO"Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - cpucount + 1, bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); - } - } - smp_done: - ; + printk(KERN_INFO"Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + num_online_cpus(), + bogosum/(500000/HZ), + (bogosum/(5000/HZ))%100); + + Dprintk("Before bogocount - setting activated=1.\n"); + Dprintk("Boot done.\n"); } /* @@ -579,3 +549,97 @@ max_cpus = 0; } } + + +/* Upping and downing of CPUs */ +int +__cpu_disable(void) +{ + int i; + unsigned int cpu = smp_processor_id(); + int new_cpu; + unsigned long val; + extern volatile int time_keeper_id; + extern unsigned long irq_affinity[]; + + /* Remove from online map. */ + clear_bit(cpu, &cpu_online_map); + clear_bit(cpu, &cpu_callin_map); + + mb(); + + /* Update Time Keeper CPU */ + if (time_keeper_id == cpu) { + new_cpu = any_online_cpu(-1); + /* look for alternate CPU */ + if (new_cpu == -1) { + set_bit(cpu, &cpu_online_map); + return -1; + } + time_keeper_id = new_cpu; + printk("time keeper is assigned to CPU %d\n", new_cpu); + } + + /* Route IRQS elsewhere */ + for (i = 0; i < NR_IRQS; i++) { + if (irq_desc(i)->handler == &no_irq_type) + continue; + + if (!irq_desc(i)->handler->set_affinity) { + continue; + } + + val = irq_affinity[i]; + + if (val & (1 << cpu)) { + /* Damn need to find another CPU for this IRQ :) */ + new_cpu = any_online_cpu(-1); + if (!(val & cpu_online_map)) { + val |= (1 << new_cpu); + } else { + val = val & ~(1 << cpu); + } + val = val & cpu_online_map; + irq_affinity[i] = val; + printk("Set IRQ affinity %d -> %x\n", i, val); + irq_desc(i)->handler->set_affinity(i, val); + } + } + + return 0; +} + +void +__cpu_die(unsigned int cpu) +{ + /* FIXME: Send an IPI to clean it up, kill idle process, etc. */ + +} + +int +__cpu_up(unsigned int cpu) +{ + int ret; + int sapicid; + + /* We cheat if it's already come up once. */ + if (test_bit(cpu, &cpu_initialized)) { + set_bit(cpu, &cpu_online_map); + return 0; + } + + sapicid = ia64_cpu_to_sapicid[cpu]; + if (sapicid == -1) + return -EINVAL; + + printk(KERN_INFO "Processor %d/%d is spinning up...\n", + sapicid, cpu); + + /* Processor goes to start_secondary(), sets online flag */ + ret = do_boot_cpu(sapicid, cpu); + if (ret < 0) + return ret; + + printk(KERN_INFO "Processor %d has spun up...\n", cpu); + return 0; +} diff -Nur linux-2.5.14-base/arch/ia64/kernel/time.c linux-2.5.14-new/arch/ia64/kernel/time.c --- linux-2.5.14-base/arch/ia64/kernel/time.c Fri May 24 16:23:09 2002 +++ linux-2.5.14-new/arch/ia64/kernel/time.c Fri May 24 16:27:39 2002 @@ -33,6 +33,8 @@ #endif +volatile int time_keeper_id = 0; + static void do_profile (unsigned long ip) { @@ -65,10 +67,12 @@ { unsigned long elapsed_cycles, lost = jiffies - wall_jiffies; unsigned long now, last_tick; -# define time_keeper_id 0 /* smp_processor_id() of time-keeper */ + int tkid; + + tkid = time_keeper_id; - last_tick = (cpu_data(time_keeper_id)->itm_next - - (lost + 1)*cpu_data(time_keeper_id)->itm_delta); + last_tick = (cpu_data(tkid)->itm_next + - (lost + 1)*cpu_data(tkid)->itm_delta); now = ia64_get_itc(); if ((long) (now - last_tick) < 0) { @@ -170,7 +174,7 @@ #endif new_itm += local_cpu_data->itm_delta; - if (smp_processor_id() == 0) { + if (smp_processor_id() == time_keeper_id) { /* * Here we are in the timer irq handler. We have irqs locally * disabled, but we don't know if the timer_bh is running on diff -Nur linux-2.5.14-base/include/asm-ia64/smp.h linux-2.5.14-new/include/asm-ia64/smp.h --- linux-2.5.14-base/include/asm-ia64/smp.h Fri May 24 16:23:09 2002 +++ linux-2.5.14-new/include/asm-ia64/smp.h Fri May 24 16:27:39 2002 @@ -36,6 +36,7 @@ extern char no_int_routing __devinitdata; +extern unsigned long phys_cpu_present_map; extern volatile unsigned long cpu_online_map; extern unsigned long ipi_base_addr; extern unsigned char smp_int_redirect; @@ -119,7 +120,13 @@ return lid.f.id << 8 | lid.f.eid; } +static inline int cpu_possible(unsigned int cpu) +{ + return phys_cpu_present_map & (1 << cpu); +} + /* Upping and downing of CPUs */ +extern void __init smp_build_cpu_map(void); extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); extern int __cpu_up(unsigned int cpu);