diff -urN 2.2.14/kernel/sched.c sched/kernel/sched.c --- 2.2.14/kernel/sched.c Wed Jan 5 14:16:56 2000 +++ sched/kernel/sched.c Thu Feb 24 20:40:11 2000 @@ -212,101 +212,89 @@ } /* - * If there is a dependency between p1 and p2, - * don't be too eager to go into the slow schedule. - * In particular, if p1 and p2 both want the kernel - * lock, there is no point in trying to make them - * extremely parallel.. - * - * (No lock - lock_depth < 0) - * - * There are two additional metrics here: - * - * first, a 'cutoff' interval, currently 0-200 usecs on - * x86 CPUs, depending on the size of the 'SMP-local cache'. - * If the current process has longer average timeslices than - * this, then we utilize the idle CPU. - * - * second, if the wakeup comes from a process context, - * then the two processes are 'related'. (they form a - * 'gang') - * - * An idle CPU is almost always a bad thing, thus we skip - * the idle-CPU utilization only if both these conditions - * are true. (ie. a 'process-gang' rescheduling with rather - * high frequency should stay on the same CPU). - * - * [We can switch to something more finegrained in 2.3.] - * - * do not 'guess' if the to-be-scheduled task is RT. + * This is ugly, but reschedule_idle() is very timing-critical. + * We enter with the runqueue spinlock held, but we might end + * up unlocking it early, so the caller must not unlock the + * runqueue, it's always done by reschedule_idle(). */ -#define related(p1,p2) (((p1)->lock_depth >= 0) && (p2)->lock_depth >= 0) && \ - (((p2)->policy == SCHED_OTHER) && ((p1)->avg_slice < cacheflush_time)) - -static inline void reschedule_idle_slow(struct task_struct * p) +static inline void reschedule_idle(struct task_struct * p, unsigned long flags) { #ifdef __SMP__ -/* - * (see reschedule_idle() for an explanation first ...) - * - * Pass #2 - * - * We try to find another (idle) CPU for this woken-up process. - * - * On SMP, we mostly try to see if the CPU the task used - * to run on is idle.. but we will use another idle CPU too, - * at this point we already know that this CPU is not - * willing to reschedule in the near future. - * - * An idle CPU is definitely wasted, especially if this CPU is - * running long-timeslice processes. The following algorithm is - * pretty good at finding the best idle CPU to send this process - * to. - * - * [We can try to preempt low-priority processes on other CPUs in - * 2.3. Also we can try to use the avg_slice value to predict - * 'likely reschedule' events even on other CPUs.] - */ int this_cpu = smp_processor_id(), target_cpu; - struct task_struct *tsk, *target_tsk; - int cpu, best_cpu, weight, best_weight, i; - unsigned long flags; - - best_weight = 0; /* prevents negative weight */ - - spin_lock_irqsave(&runqueue_lock, flags); + struct task_struct *tsk; + int cpu, best_cpu, i; /* * shortcut if the woken up task's last CPU is * idle now. */ best_cpu = p->processor; - target_tsk = idle_task(best_cpu); - if (cpu_curr(best_cpu) == target_tsk) + tsk = idle_task(best_cpu); + if (cpu_curr(best_cpu) == tsk) goto send_now; - target_tsk = NULL; - for (i = 0; i < smp_num_cpus; i++) { + /* + * We know that the preferred CPU has a cache-affine current + * process, lets try to find a new idle CPU for the woken-up + * process: + */ + for (i = smp_num_cpus - 1; i >= 0; i--) { cpu = cpu_logical_map(i); + if (cpu == best_cpu) + continue; tsk = cpu_curr(cpu); - if (related(tsk, p)) - goto out_no_target; - weight = preemption_goodness(tsk, p, cpu); - if (weight > best_weight) { - best_weight = weight; - target_tsk = tsk; - } + /* + * We use the last available idle CPU. This creates + * a priority list between idle CPUs, but this is not + * a problem. + */ + if (tsk == idle_task(cpu)) + goto send_now; } /* - * found any suitable CPU? + * No CPU is idle, but maybe this process has enough priority + * to preempt it's preferred CPU. + */ + tsk = cpu_curr(best_cpu); + if (preemption_goodness(tsk, p, best_cpu) > 0) + goto send_now; + + /* + * We will get here often - or in the high CPU contention + * case. No CPU is idle and this process is either lowprio or + * the preferred CPU is highprio. Try to preemt some other CPU + * only if it's RT or if it's iteractive and the preferred + * cpu won't reschedule shortly. */ - if (!target_tsk) - goto out_no_target; + if ((p->avg_slice < cacheflush_time && cpu_curr(best_cpu)->avg_slice > cacheflush_time) || + ((p->policy & ~SCHED_YIELD) != SCHED_OTHER)) + { + int weight, best_weight = 0; + struct task_struct * best_tsk = NULL; + + for (i = smp_num_cpus - 1; i >= 0; i--) { + cpu = cpu_logical_map(i); + if (cpu == best_cpu) + continue; + tsk = cpu_curr(cpu); + weight = preemption_goodness(tsk, p, cpu); + if (weight > best_weight) { + best_weight = weight; + best_tsk = tsk; + } + } + + if ((tsk = best_tsk)) + goto send_now; + } + + spin_unlock_irqrestore(&runqueue_lock, flags); + return; send_now: - target_cpu = target_tsk->processor; - target_tsk->need_resched = 1; + target_cpu = tsk->processor; + tsk->need_resched = 1; spin_unlock_irqrestore(&runqueue_lock, flags); /* * the APIC stuff can go outside of the lock because @@ -315,9 +303,6 @@ if (target_cpu != this_cpu) smp_send_reschedule(target_cpu); return; -out_no_target: - spin_unlock_irqrestore(&runqueue_lock, flags); - return; #else /* UP */ int this_cpu = smp_processor_id(); struct task_struct *tsk; @@ -325,38 +310,10 @@ tsk = current; if (preemption_goodness(tsk, p, this_cpu) > 0) tsk->need_resched = 1; + spin_unlock_irqrestore(&runqueue_lock, flags); #endif } -static void reschedule_idle(struct task_struct * p) -{ -#ifdef __SMP__ - int cpu = smp_processor_id(); - /* - * ("wakeup()" should not be called before we've initialized - * SMP completely. - * Basically a not-yet initialized SMP subsystem can be - * considered as a not-yet working scheduler, simply dont use - * it before it's up and running ...) - * - * SMP rescheduling is done in 2 passes: - * - pass #1: faster: 'quick decisions' - * - pass #2: slower: 'lets try and find a suitable CPU' - */ - - /* - * Pass #1. (subtle. We might be in the middle of __switch_to, so - * to preserve scheduling atomicity we have to use cpu_curr) - */ - if ((p->processor == cpu) && related(cpu_curr(cpu), p)) - return; -#endif /* __SMP__ */ - /* - * Pass #2 - */ - reschedule_idle_slow(p); -} - /* * Careful! * @@ -453,9 +410,8 @@ if (p->next_run) goto out; add_to_runqueue(p); - spin_unlock_irqrestore(&runqueue_lock, flags); + reschedule_idle(p, flags); // spin_unlocks runqueue - reschedule_idle(p); return; out: spin_unlock_irqrestore(&runqueue_lock, flags); @@ -661,8 +617,12 @@ { #ifdef __SMP__ if ((prev->state == TASK_RUNNING) && - (prev != idle_task(smp_processor_id()))) - reschedule_idle(prev); + (prev != idle_task(smp_processor_id()))) { + unsigned long flags; + + spin_lock_irqsave(&runqueue_lock, flags); + reschedule_idle(prev, flags); // spin_unlocks runqueue + } wmb(); prev->has_cpu = 0; #endif /* __SMP__ */