diff -urN 2.2.14/kernel/sched.c sched/kernel/sched.c
--- 2.2.14/kernel/sched.c	Wed Jan  5 14:16:56 2000
+++ sched/kernel/sched.c	Thu Feb 24 20:40:11 2000
@@ -212,101 +212,89 @@
 }
 
 /*
- * If there is a dependency between p1 and p2,
- * don't be too eager to go into the slow schedule.
- * In particular, if p1 and p2 both want the kernel
- * lock, there is no point in trying to make them
- * extremely parallel..
- *
- * (No lock - lock_depth < 0)
- *
- * There are two additional metrics here:
- *
- * first, a 'cutoff' interval, currently 0-200 usecs on
- * x86 CPUs, depending on the size of the 'SMP-local cache'.
- * If the current process has longer average timeslices than
- * this, then we utilize the idle CPU.
- *
- * second, if the wakeup comes from a process context,
- * then the two processes are 'related'. (they form a
- * 'gang')
- *
- * An idle CPU is almost always a bad thing, thus we skip
- * the idle-CPU utilization only if both these conditions
- * are true. (ie. a 'process-gang' rescheduling with rather
- * high frequency should stay on the same CPU).
- *
- * [We can switch to something more finegrained in 2.3.]
- *
- * do not 'guess' if the to-be-scheduled task is RT.
+ * This is ugly, but reschedule_idle() is very timing-critical.
+ * We enter with the runqueue spinlock held, but we might end
+ * up unlocking it early, so the caller must not unlock the
+ * runqueue, it's always done by reschedule_idle().
  */
-#define related(p1,p2) (((p1)->lock_depth >= 0) && (p2)->lock_depth >= 0) && \
-	(((p2)->policy == SCHED_OTHER) && ((p1)->avg_slice < cacheflush_time))
-
-static inline void reschedule_idle_slow(struct task_struct * p)
+static inline void reschedule_idle(struct task_struct * p, unsigned long flags)
 {
 #ifdef __SMP__
-/*
- * (see reschedule_idle() for an explanation first ...)
- *
- * Pass #2
- *
- * We try to find another (idle) CPU for this woken-up process.
- *
- * On SMP, we mostly try to see if the CPU the task used
- * to run on is idle.. but we will use another idle CPU too,
- * at this point we already know that this CPU is not
- * willing to reschedule in the near future.
- *
- * An idle CPU is definitely wasted, especially if this CPU is
- * running long-timeslice processes. The following algorithm is
- * pretty good at finding the best idle CPU to send this process
- * to.
- *
- * [We can try to preempt low-priority processes on other CPUs in
- * 2.3. Also we can try to use the avg_slice value to predict
- * 'likely reschedule' events even on other CPUs.]
- */
 	int this_cpu = smp_processor_id(), target_cpu;
-	struct task_struct *tsk, *target_tsk;
-	int cpu, best_cpu, weight, best_weight, i;
-	unsigned long flags;
-
-	best_weight = 0; /* prevents negative weight */
-
-	spin_lock_irqsave(&runqueue_lock, flags);
+	struct task_struct *tsk;
+	int cpu, best_cpu, i;
 
 	/*
 	 * shortcut if the woken up task's last CPU is
 	 * idle now.
 	 */
 	best_cpu = p->processor;
-	target_tsk = idle_task(best_cpu);
-	if (cpu_curr(best_cpu) == target_tsk)
+	tsk = idle_task(best_cpu);
+	if (cpu_curr(best_cpu) == tsk)
 		goto send_now;
 
-	target_tsk = NULL;
-	for (i = 0; i < smp_num_cpus; i++) {
+	/*
+	 * We know that the preferred CPU has a cache-affine current
+	 * process, lets try to find a new idle CPU for the woken-up
+	 * process:
+	 */
+	for (i = smp_num_cpus - 1; i >= 0; i--) {
 		cpu = cpu_logical_map(i);
+		if (cpu == best_cpu)
+			continue;
 		tsk = cpu_curr(cpu);
-		if (related(tsk, p))
-			goto out_no_target;
-		weight = preemption_goodness(tsk, p, cpu);
-		if (weight > best_weight) {
-			best_weight = weight;
-			target_tsk = tsk;
-		}
+		/*
+		 * We use the last available idle CPU. This creates
+		 * a priority list between idle CPUs, but this is not
+		 * a problem.
+		 */
+		if (tsk == idle_task(cpu))
+			goto send_now;
 	}
 
 	/*
-	 * found any suitable CPU?
+	 * No CPU is idle, but maybe this process has enough priority
+	 * to preempt it's preferred CPU.
+	 */
+	tsk = cpu_curr(best_cpu);
+	if (preemption_goodness(tsk, p, best_cpu) > 0)
+		goto send_now;
+
+	/*
+	 * We will get here often - or in the high CPU contention
+	 * case. No CPU is idle and this process is either lowprio or
+	 * the preferred CPU is highprio. Try to preemt some other CPU
+	 * only if it's RT or if it's iteractive and the preferred
+	 * cpu won't reschedule shortly.
 	 */
-	if (!target_tsk)
-		goto out_no_target;
+	if ((p->avg_slice < cacheflush_time && cpu_curr(best_cpu)->avg_slice > cacheflush_time) ||
+	    ((p->policy & ~SCHED_YIELD) != SCHED_OTHER))
+	{
+		int weight, best_weight = 0;
+		struct task_struct * best_tsk = NULL;
+
+		for (i = smp_num_cpus - 1; i >= 0; i--) {
+			cpu = cpu_logical_map(i);
+			if (cpu == best_cpu)
+				continue;
+			tsk = cpu_curr(cpu);
+			weight = preemption_goodness(tsk, p, cpu);
+			if (weight > best_weight) {
+				best_weight = weight;
+				best_tsk = tsk;
+			}
+		}
+
+		if ((tsk = best_tsk))
+			goto send_now;
+	}
+
+	spin_unlock_irqrestore(&runqueue_lock, flags);
+	return;
 		
 send_now:
-	target_cpu = target_tsk->processor;
-	target_tsk->need_resched = 1;
+	target_cpu = tsk->processor;
+	tsk->need_resched = 1;
 	spin_unlock_irqrestore(&runqueue_lock, flags);
 	/*
 	 * the APIC stuff can go outside of the lock because
@@ -315,9 +303,6 @@
 	if (target_cpu != this_cpu)
 		smp_send_reschedule(target_cpu);
 	return;
-out_no_target:
-	spin_unlock_irqrestore(&runqueue_lock, flags);
-	return;
 #else /* UP */
 	int this_cpu = smp_processor_id();
 	struct task_struct *tsk;
@@ -325,38 +310,10 @@
 	tsk = current;
 	if (preemption_goodness(tsk, p, this_cpu) > 0)
 		tsk->need_resched = 1;
+	spin_unlock_irqrestore(&runqueue_lock, flags);
 #endif
 }
 
-static void reschedule_idle(struct task_struct * p)
-{
-#ifdef __SMP__
-	int cpu = smp_processor_id();
-	/*
-	 * ("wakeup()" should not be called before we've initialized
-	 * SMP completely.
-	 * Basically a not-yet initialized SMP subsystem can be
-	 * considered as a not-yet working scheduler, simply dont use
-	 * it before it's up and running ...)
-	 *
-	 * SMP rescheduling is done in 2 passes:
-	 *  - pass #1: faster: 'quick decisions'
-	 *  - pass #2: slower: 'lets try and find a suitable CPU'
-	 */
-
-	/*
-	 * Pass #1. (subtle. We might be in the middle of __switch_to, so
-	 * to preserve scheduling atomicity we have to use cpu_curr)
-	 */
-	if ((p->processor == cpu) && related(cpu_curr(cpu), p))
-		return;
-#endif /* __SMP__ */
-	/*
-	 * Pass #2
-	 */
-	reschedule_idle_slow(p);
-}
-
 /*
  * Careful!
  *
@@ -453,9 +410,8 @@
 	if (p->next_run)
 		goto out;
 	add_to_runqueue(p);
-	spin_unlock_irqrestore(&runqueue_lock, flags);
+	reschedule_idle(p, flags); // spin_unlocks runqueue
 
-	reschedule_idle(p);
 	return;
 out:
 	spin_unlock_irqrestore(&runqueue_lock, flags);
@@ -661,8 +617,12 @@
 {
 #ifdef __SMP__
 	if ((prev->state == TASK_RUNNING) &&
-			(prev != idle_task(smp_processor_id())))
-		reschedule_idle(prev);
+			(prev != idle_task(smp_processor_id()))) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&runqueue_lock, flags);
+		reschedule_idle(prev, flags); // spin_unlocks runqueue
+	}
 	wmb();
 	prev->has_cpu = 0;
 #endif /* __SMP__ */