From: Nick Piggin This is a (somewhat) trivial patch which converts cpu_sibling_map from an array of CPUs to an array of cpumasks. Needed for >2 siblings per package, but it actually can simplify code as it allows the cpu_sibling_map to be set up even when there is 1 sibling per package. Intel want this, I use it in the next patch to build scheduling domains for the P4 HT. DESC p4-clockmod sibling_map fix EDESC From: Thomas Schlichter the patch "sched-sibling-map-to-cpumask.patch" inroduced following compile error on UP machines: CC [M] arch/i386/kernel/cpu/cpufreq/p4-clockmod.o arch/i386/kernel/cpu/cpufreq/p4-clockmod.c: In function `cpufreq_p4_setdc': arch/i386/kernel/cpu/cpufreq/p4-clockmod.c:71: error: `cpu_sibling_map' undeclared (first use in this function) arch/i386/kernel/cpu/cpufreq/p4-clockmod.c:71: error: (Each undeclared identifier is reported only once arch/i386/kernel/cpu/cpufreq/p4-clockmod.c:71: error: for each function it appears in.) DESC p4-clockmod: handle more than two siblings EDESC From: "Pallipadi, Venkatesh" This is required to handle more than 2 siblings per package. --- arch/i386/kernel/cpu/cpufreq/p4-clockmod.c | 33 ++++++++++------------- arch/i386/kernel/io_apic.c | 19 ++++++++----- arch/i386/kernel/smpboot.c | 40 +++++++++++++++-------------- arch/i386/oprofile/op_model_p4.c | 7 +---- include/asm-i386/smp.h | 2 - 5 files changed, 51 insertions(+), 50 deletions(-) diff -puN arch/i386/kernel/cpu/cpufreq/p4-clockmod.c~sched-sibling-map-to-cpumask arch/i386/kernel/cpu/cpufreq/p4-clockmod.c --- 25/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c~sched-sibling-map-to-cpumask 2004-02-18 23:13:34.000000000 -0800 +++ 25-akpm/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-02-18 23:14:15.000000000 -0800 @@ -57,8 +57,7 @@ static int cpufreq_p4_setdc(unsigned int u32 l, h; cpumask_t cpus_allowed, affected_cpu_map; struct cpufreq_freqs freqs; - int hyperthreading = 0; - int sibling = 0; + int j; if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) @@ -68,13 +67,10 @@ static int cpufreq_p4_setdc(unsigned int cpus_allowed = current->cpus_allowed; /* only run on CPU to be set, or on its sibling */ - affected_cpu_map = cpumask_of_cpu(cpu); -#ifdef CONFIG_X86_HT - hyperthreading = ((cpu_has_ht) && (smp_num_siblings == 2)); - if (hyperthreading) { - sibling = cpu_sibling_map[cpu]; - cpu_set(sibling, affected_cpu_map); - } +#ifdef CONFIG_SMP + affected_cpu_map = cpu_sibling_map[cpu]; +#else + affected_cpu_map = cpumask_of_cpu(cpu); #endif set_cpus_allowed(current, affected_cpu_map); BUG_ON(!cpu_isset(smp_processor_id(), affected_cpu_map)); @@ -97,11 +93,11 @@ static int cpufreq_p4_setdc(unsigned int /* notifiers */ freqs.old = stock_freq * l / 8; freqs.new = stock_freq * newstate / 8; - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - if (hyperthreading) { - freqs.cpu = sibling; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + for_each_cpu(j) { + if (cpu_isset(j, affected_cpu_map)) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } } rdmsr(MSR_IA32_THERM_STATUS, l, h); @@ -132,10 +128,11 @@ static int cpufreq_p4_setdc(unsigned int set_cpus_allowed(current, cpus_allowed); /* notifiers */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - if (hyperthreading) { - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + for_each_cpu(j) { + if (cpu_isset(j, affected_cpu_map)) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } } return 0; diff -puN arch/i386/kernel/io_apic.c~sched-sibling-map-to-cpumask arch/i386/kernel/io_apic.c --- 25/arch/i386/kernel/io_apic.c~sched-sibling-map-to-cpumask 2004-02-18 23:13:34.000000000 -0800 +++ 25-akpm/arch/i386/kernel/io_apic.c 2004-02-18 23:13:34.000000000 -0800 @@ -317,8 +317,7 @@ struct irq_cpu_info { #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) -#define CPU_TO_PACKAGEINDEX(i) \ - ((physical_balance && i > cpu_sibling_map[i]) ? cpu_sibling_map[i] : i) +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) @@ -401,6 +400,7 @@ static void do_irq_balance(void) unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); unsigned long move_this_load = 0; int max_loaded = 0, min_loaded = 0; + int load; unsigned long useful_load_threshold = balanced_irq_interval + 10; int selected_irq; int tmp_loaded, first_attempt = 1; @@ -452,7 +452,7 @@ static void do_irq_balance(void) for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; - if (physical_balance && i > cpu_sibling_map[i]) + if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (min_cpu_irq > CPU_IRQ(i)) { min_cpu_irq = CPU_IRQ(i); @@ -471,7 +471,7 @@ tryanothercpu: for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; - if (physical_balance && i > cpu_sibling_map[i]) + if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (max_cpu_irq <= CPU_IRQ(i)) continue; @@ -551,9 +551,14 @@ tryanotherirq: * We seek the least loaded sibling by making the comparison * (A+B)/2 vs B */ - if (physical_balance && (CPU_IRQ(min_loaded) >> 1) > - CPU_IRQ(cpu_sibling_map[min_loaded])) - min_loaded = cpu_sibling_map[min_loaded]; + load = CPU_IRQ(min_loaded) >> 1; + for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { + if (load > CPU_IRQ(j)) { + /* This won't change cpu_sibling_map[min_loaded] */ + load = CPU_IRQ(j); + min_loaded = j; + } + } cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); target_cpu_mask = cpumask_of_cpu(min_loaded); diff -puN arch/i386/kernel/smpboot.c~sched-sibling-map-to-cpumask arch/i386/kernel/smpboot.c --- 25/arch/i386/kernel/smpboot.c~sched-sibling-map-to-cpumask 2004-02-18 23:13:34.000000000 -0800 +++ 25-akpm/arch/i386/kernel/smpboot.c 2004-02-18 23:13:34.000000000 -0800 @@ -934,7 +934,7 @@ static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; -int cpu_sibling_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; static void __init smp_boot_cpus(unsigned int max_cpus) { @@ -1079,32 +1079,34 @@ static void __init smp_boot_cpus(unsigne Dprintk("Boot done.\n"); /* - * If Hyper-Threading is avaialble, construct cpu_sibling_map[], so - * that we can tell the sibling CPU efficiently. + * construct cpu_sibling_map[], so that we can tell sibling CPUs + * efficiently. */ - if (cpu_has_ht && smp_num_siblings > 1) { - for (cpu = 0; cpu < NR_CPUS; cpu++) - cpu_sibling_map[cpu] = NO_PROC_ID; - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - int i; - if (!cpu_isset(cpu, cpu_callout_map)) - continue; + for (cpu = 0; cpu < NR_CPUS; cpu++) + cpu_sibling_map[cpu] = CPU_MASK_NONE; + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + int siblings = 0; + int i; + if (!cpu_isset(cpu, cpu_callout_map)) + continue; + if (smp_num_siblings > 1) { for (i = 0; i < NR_CPUS; i++) { - if (i == cpu || !cpu_isset(i, cpu_callout_map)) + if (!cpu_isset(i, cpu_callout_map)) continue; if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_sibling_map[cpu] = i; - printk("cpu_sibling_map[%d] = %d\n", cpu, cpu_sibling_map[cpu]); - break; + siblings++; + cpu_set(i, cpu_sibling_map[cpu]); } } - if (cpu_sibling_map[cpu] == NO_PROC_ID) { - smp_num_siblings = 1; - printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu); - } + } else { + siblings++; + cpu_set(cpu, cpu_sibling_map[cpu]); } + + if (siblings != smp_num_siblings) + printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); } smpboot_setup_io_apic(); diff -puN arch/i386/oprofile/op_model_p4.c~sched-sibling-map-to-cpumask arch/i386/oprofile/op_model_p4.c --- 25/arch/i386/oprofile/op_model_p4.c~sched-sibling-map-to-cpumask 2004-02-18 23:13:34.000000000 -0800 +++ 25-akpm/arch/i386/oprofile/op_model_p4.c 2004-02-18 23:13:34.000000000 -0800 @@ -382,11 +382,8 @@ static struct p4_event_binding p4_events static unsigned int get_stagger(void) { #ifdef CONFIG_SMP - int cpu; - if (smp_num_siblings > 1) { - cpu = smp_processor_id(); - return (cpu_sibling_map[cpu] > cpu) ? 0 : 1; - } + int cpu = smp_processor_id(); + return (cpu != first_cpu(cpu_sibling_map[cpu])); #endif return 0; } diff -puN include/asm-i386/smp.h~sched-sibling-map-to-cpumask include/asm-i386/smp.h --- 25/include/asm-i386/smp.h~sched-sibling-map-to-cpumask 2004-02-18 23:13:34.000000000 -0800 +++ 25-akpm/include/asm-i386/smp.h 2004-02-18 23:13:34.000000000 -0800 @@ -34,7 +34,7 @@ extern void smp_alloc_memory(void); extern int pic_mode; extern int smp_num_siblings; -extern int cpu_sibling_map[]; +extern cpumask_t cpu_sibling_map[]; extern void smp_flush_tlb(void); extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); _