Name: Rewritten Hot-plug CPU Core Infrastructure for x86 Author: Rusty Russell Status: Experimental Depends: Hotcpu/init-removal-i386.patch.gz D: This modifies the x86 boot sequence to "plug in" CPUs one at a D: time, and adds support for more general plugging on x86. diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.9-fork-nonlinear-init/arch/i386/kernel/apic.c working-2.5.9-hotcpu/arch/i386/kernel/apic.c --- linux-2.5.9-fork-nonlinear-init/arch/i386/kernel/apic.c Fri Apr 26 18:22:52 2002 +++ working-2.5.9-hotcpu/arch/i386/kernel/apic.c Fri Apr 26 18:44:25 2002 @@ -785,9 +785,9 @@ apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); } -void setup_APIC_timer(void * data) +static void setup_APIC_timer(unsigned int calibration_result) { - unsigned int clocks = (unsigned int) data, slice, t0, t1; + unsigned int slice, t0, t1; unsigned long flags; int delta; @@ -803,15 +803,16 @@ * The number of slices within a 'big' timeslice is NR_CPUS+1 */ - slice = clocks / (NR_CPUS+1); - printk("cpu: %d, clocks: %d, slice: %d\n", smp_processor_id(), clocks, slice); + slice = calibration_result / (NR_CPUS+1); + printk("cpu: %d, clocks: %d, slice: %d\n", + smp_processor_id(), calibration_result, slice); /* * Wait for IRQ0's slice: */ wait_8254_wraparound(); - __setup_APIC_LVTT(clocks); + __setup_APIC_LVTT(calibration_result); t0 = apic_read(APIC_TMICT)*APIC_DIVISOR; /* Wait till TMCCT gets reloaded from TMICT... */ @@ -825,11 +826,9 @@ delta = (int)(t0 - t1 - slice*(smp_processor_id()+1)); } while (delta < 0); - __setup_APIC_LVTT(clocks); - - printk("CPU%d\n", smp_processor_id(), t0, t1, delta, slice, clocks); - - __restore_flags(flags); + __setup_APIC_LVTT(calibration_result); + printk("CPU%d\n", smp_processor_id(), t0, t1, delta, slice, calibration_result); + __cli(); } /* @@ -908,11 +907,10 @@ return result; } -static unsigned int calibration_result; - int dont_use_local_apic_timer __devinitdata = 0; +static unsigned int calibration_result = 0; -void __devinit setup_APIC_clocks (void) +void __devinit setup_APIC_clock(void) { /* Disabled by DMI scan or kernel option? */ if (dont_use_local_apic_timer) @@ -922,17 +920,13 @@ using_apic_timer = 1; __cli(); - - calibration_result = calibrate_APIC_clock(); + if (!calibration_result) + calibration_result = calibrate_APIC_clock(); /* * Now set up the timer for real. */ - setup_APIC_timer((void *)calibration_result); - + setup_APIC_timer(calibration_result); __sti(); - - /* and update all other cpus */ - smp_call_function(setup_APIC_timer, (void *)calibration_result, 1, 1); } void __devinit disable_APIC_timer(void) @@ -1000,6 +994,11 @@ int user = user_mode(regs); int cpu = smp_processor_id(); +#if 0 /* FIXME: Once we clean up CPUs properly --RR */ + if (!cpu_online(cpu)) + BUG(); +#endif + /* * The profiling function is SMP safe. (nothing can mess * around with "current", and the profiling counters are @@ -1164,7 +1163,6 @@ if (!skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); #endif - setup_APIC_clocks(); - + setup_APIC_timer(calibrate_APIC_clock()); return 0; } diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.9-fork-nonlinear-init/arch/i386/kernel/smp.c working-2.5.9-hotcpu/arch/i386/kernel/smp.c --- linux-2.5.9-fork-nonlinear-init/arch/i386/kernel/smp.c Fri Apr 26 18:22:40 2002 +++ working-2.5.9-hotcpu/arch/i386/kernel/smp.c Fri Apr 26 18:24:33 2002 @@ -393,8 +393,11 @@ */ if (!cpumask) BUG(); + /* This can happen if a CPU is taken offline. --RR */ +#if 0 if ((cpumask & cpu_online_map) != cpumask) BUG(); +#endif if (cpumask & (1 << smp_processor_id())) BUG(); if (!mm) diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.9-fork-nonlinear-init/arch/i386/kernel/smpboot.c working-2.5.9-hotcpu/arch/i386/kernel/smpboot.c --- linux-2.5.9-fork-nonlinear-init/arch/i386/kernel/smpboot.c Fri Apr 26 18:22:52 2002 +++ working-2.5.9-hotcpu/arch/i386/kernel/smpboot.c Fri Apr 26 18:41:48 2002 @@ -53,18 +53,17 @@ /* Set if we find a B stepping CPU */ static int smp_b_stepping; -/* Setup configured maximum number of CPUs to activate */ -static int max_cpus = -1; - /* Number of siblings per CPU package */ int smp_num_siblings = 1; int __devinitdata phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ /* Bitmask of currently online CPUs */ -unsigned long cpu_online_map; + /* Bitmask of currently online CPUs */ +unsigned long cpu_online_map = 1; -static volatile unsigned long cpu_callin_map; -static volatile unsigned long cpu_callout_map; +extern unsigned long cpu_initialized; +volatile unsigned long cpu_callin_map; +volatile unsigned long cpu_callout_map; /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; @@ -73,33 +72,6 @@ int smp_threads_ready; /* - * Setup routine for controlling SMP activation - * - * Command-line option of "nosmp" or "maxcpus=0" will disable SMP - * activation entirely (the MPS table probe still happens, though). - * - * Command-line option of "maxcpus=", where is an integer - * greater than 0, limits the maximum number of CPUs activated in - * SMP mode to . - */ - -static int __init nosmp(char *str) -{ - max_cpus = 0; - return 1; -} - -__setup("nosmp", nosmp); - -static int __init maxcpus(char *str) -{ - get_option(&str, &max_cpus); - return 1; -} - -__setup("maxcpus=", maxcpus); - -/* * Trampoline 80x86 program as an array. */ @@ -187,6 +159,7 @@ } +#if 0 /* * Architecture specific routine called by the kernel just before init is * fired off. This allows the BP to have everything in order [we hope]. @@ -373,6 +346,78 @@ while (atomic_read(&tsc_count_stop) != num_online_cpus()) mb(); } } +#else /* new syncronization code */ + +static unsigned long long shared_tsc; +static atomic_t shared_tsc_waiting = ATOMIC_INIT(0); +static atomic_t shared_tsc_set = ATOMIC_INIT(0); +static atomic_t shared_tsc_reached = ATOMIC_INIT(0); +static atomic_t shared_tsc_taken = ATOMIC_INIT(0); + +/* Give the new CPU our TSC value. If we don't do TSC, these just + serve as synchronization points. */ +static void give_tsc(void) +{ + unsigned long long tsc; + + /* Wait for waiter to arrive */ + while (!atomic_read(&shared_tsc_waiting)) + cpu_relax(); + + if (cpu_has_tsc) { + /* Aim for 10,000 cycles in the future */ + rdtscll(tsc); + shared_tsc = tsc + 10000; + + atomic_set(&shared_tsc_set, 1); + do { + rdtscll(tsc); + } while ((long long)(tsc - shared_tsc) < 0); + atomic_set(&shared_tsc_reached, 1); + } + + /* Wait for them to grab value. */ + while (!atomic_read(&shared_tsc_taken)) + cpu_relax(); + + /* Re-initialize for next time. */ + atomic_set(&shared_tsc_set, 0); + atomic_set(&shared_tsc_reached, 0); + atomic_set(&shared_tsc_taken, 0); + atomic_set(&shared_tsc_waiting, 0); +} + +/* Take the TSC value from another cpu running give_tsc(). */ +static void take_tsc(void) +{ + unsigned int tsc_hi, tsc_low; + + /* Tell giver we are waiting */ + atomic_set(&shared_tsc_waiting, 1); + + if (cpu_has_tsc) { + /* Wait for value to be valid. */ + while (!atomic_read(&shared_tsc_set)) + cpu_relax(); + + rmb(); + tsc_hi = shared_tsc >> 32; + tsc_low = shared_tsc; + rmb(); + + /* Wait for that tsc to be accurate */ + while (!atomic_read(&shared_tsc_reached)) + mb(); + + /* Set it! */ + write_tsc(tsc_low, tsc_hi); + } + + /* Tell other CPU we are done. */ + atomic_set(&shared_tsc_taken, 1); +} +#endif /* TSC synchronization code */ + #undef NR_LOOPS extern void calibrate_delay(void); @@ -467,21 +512,12 @@ */ smp_store_cpu_info(cpuid); - disable_APIC_timer(); /* * Allow the master to continue. */ set_bit(cpuid, &cpu_callin_map); - - /* - * Synchronize the TSC with the BP - */ - if (cpu_has_tsc) - synchronize_tsc_ap(); } -int cpucount; - extern int cpu_idle(void); /* @@ -496,11 +532,16 @@ */ cpu_init(); smp_callin(); - while (!atomic_read(&smp_commenced)) - rep_nop(); + + /* Grab tsc. */ + take_tsc(); + + disable_APIC_timer(); + setup_APIC_clock(); enable_APIC_timer(); + /* - * low-memory mappings have been cleared, flush them from + * low-memory mappings may have been cleared, flush them from * the local TLBs too. */ local_flush_tlb(); @@ -807,9 +848,7 @@ return (send_status | accept_status); } -extern unsigned long cpu_initialized; - -static void __devinit do_boot_cpu (int apicid) +static int __devinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. @@ -817,11 +856,10 @@ { struct task_struct *idle; unsigned long boot_error = 0; - int timeout, cpu; + int timeout; unsigned long start_eip; unsigned short nmi_high, nmi_low; - cpu = ++cpucount; /* * We can't use kernel_thread since we must avoid to * reschedule the child. @@ -940,7 +978,7 @@ clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */ clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ clear_bit(cpu, &cpu_online_map); /* was set in smp_callin() */ - cpucount--; + return -EINVAL; } /* mark "stuck" area as not stuck */ @@ -951,6 +989,8 @@ *((volatile unsigned short *) TRAMPOLINE_HIGH) = nmi_high; *((volatile unsigned short *) TRAMPOLINE_LOW) = nmi_low; } + + return 0; } cycles_t cacheflush_time; @@ -1011,9 +1051,9 @@ int cpu_sibling_map[NR_CPUS] __cacheline_aligned; -void __devinit smp_boot_cpus(void) +void __init smp_prepare_cpus(unsigned int max_cpus) { - int apicid, cpu, bit; + int cpu; if (clustered_apic_mode && (numnodes > 1)) { printk("Remapping cross-quad port I/O for %d quads\n", @@ -1052,7 +1092,6 @@ /* * We have the boot CPU online for sure. */ - set_bit(0, &cpu_online_map); boot_cpu_logical_apicid = logical_smp_processor_id(); map_cpu_to_boot_apicid(0, boot_cpu_apicid); @@ -1073,7 +1112,7 @@ if (APIC_init_uniprocessor()) printk(KERN_NOTICE "Local APIC not detected." " Using dummy APIC emulation.\n"); - goto smp_done; + goto out; } /* @@ -1100,7 +1139,7 @@ io_apic_irqs = 0; #endif cpu_online_map = phys_cpu_present_map = 1; - goto smp_done; + goto out; } verify_local_APIC(); @@ -1115,47 +1154,29 @@ io_apic_irqs = 0; #endif cpu_online_map = phys_cpu_present_map = 1; - goto smp_done; - } - - connect_bsp_APIC(); - setup_local_APIC(); - - if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid) - BUG(); - - /* - * Scan the CPU present map and fire up the other CPUs via do_boot_cpu - * - * In clustered apic mode, phys_cpu_present_map is a constructed thus: - * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the - * clustered apic ID. - */ - Dprintk("CPU present map: %lx\n", phys_cpu_present_map); + } else { + connect_bsp_APIC(); + setup_local_APIC(); - for (bit = 0; bit < NR_CPUS; bit++) { - apicid = cpu_present_to_apicid(bit); - /* - * Don't even attempt to start the boot CPU! - */ - if (apicid == boot_cpu_apicid) - continue; + if (GET_APIC_ID(apic_read(APIC_ID)) + != boot_cpu_physical_apicid) + BUG(); - if (!(phys_cpu_present_map & (1 << bit))) - continue; - if ((max_cpus >= 0) && (max_cpus <= cpucount+1)) - continue; + setup_APIC_clock(); + } - do_boot_cpu(apicid); + out: + /* CPU hotplugging - don't clear low mappings. We'll need them + * for additional on/offlining of CPUs */ +#ifndef CONFIG_HOTPLUG + zap_low_mappings(); +#endif +} - /* - * Make sure we unmap all failed CPUs - */ - if ((boot_apicid_to_cpu(apicid) == -1) && - (phys_cpu_present_map & (1 << bit))) - printk("CPU #%d not responding - cannot use it.\n", - apicid); - } +void smp_cpus_done(unsigned int max_cpus) +{ + unsigned int cpu; + unsigned long bogosum = 0; /* * Cleanup possible dangling ends... @@ -1181,20 +1202,14 @@ * Allow the user to impress friends. */ - Dprintk("Before bogomips.\n"); - if (!cpucount) { - printk(KERN_ERR "Error: only one processor found.\n"); - } else { - unsigned long bogosum = 0; - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (cpu_online_map & (1<> 10); +#else + unsigned short *addr; + + addr = (unsigned short *)(&__init_begin); + for (; addr < (unsigned short *)(&__init_end); addr++) + /* BUG()! */ + *addr = 0x0f0b; + + printk ("BUGging unused kernel memory: %dk\n", + (&__init_end - &__init_begin) >> 10); +#endif } #ifdef CONFIG_BLK_DEV_INITRD diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.9-fork-nonlinear-init/include/asm-i386/apic.h working-2.5.9-hotcpu/include/asm-i386/apic.h --- linux-2.5.9-fork-nonlinear-init/include/asm-i386/apic.h Mon Apr 29 11:16:14 2002 +++ working-2.5.9-hotcpu/include/asm-i386/apic.h Tue Apr 30 18:07:09 2002 @@ -9,7 +9,7 @@ #ifdef CONFIG_X86_LOCAL_APIC -#define APIC_DEBUG 0 +#define APIC_DEBUG 1 #if APIC_DEBUG #define Dprintk(x...) printk(x) @@ -76,7 +76,7 @@ extern void setup_local_APIC (void); extern void init_apic_mappings (void); extern void smp_local_timer_interrupt (struct pt_regs * regs); -extern void setup_APIC_clocks (void); +extern void setup_APIC_clock(void); extern void setup_apic_nmi_watchdog (void); extern inline void nmi_watchdog_tick (struct pt_regs * regs); extern int APIC_init_uniprocessor (void); diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.15/include/asm-i386/smp.h working-2.5.15-nonlinear-i386/include/asm-i386/smp.h --- linux-2.5.15/include/asm-i386/smp.h Wed Feb 20 17:56:40 2002 +++ working-2.5.15-nonlinear-i386/include/asm-i386/smp.h Mon May 20 17:51:17 2002 @@ -119,6 +119,15 @@ return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); } +static inline int cpu_possible(unsigned int cpu) +{ + return phys_cpu_present_map & (1 << cpu); +} + +/* Upping and downing of CPUs */ +extern int __cpu_disable(void); +extern void __cpu_die(unsigned int cpu); +extern int __cpu_up(unsigned int cpu); #endif /* !__ASSEMBLY__ */ #define NO_PROC_ID 0xFF /* No processor magic marker */