diff -urN 2.3.36/arch/alpha/kernel/Makefile 2.3.36aa1-whole/arch/alpha/kernel/Makefile --- 2.3.36/arch/alpha/kernel/Makefile Wed Dec 8 00:05:25 1999 +++ 2.3.36aa1-whole/arch/alpha/kernel/Makefile Wed Jan 5 18:22:04 2000 @@ -14,7 +14,7 @@ O_TARGET := kernel.o O_OBJS := entry.o traps.o process.o osf_sys.o irq.o signal.o setup.o \ - ptrace.o time.o semaphore.o + ptrace.o time.o semaphore.o i8259.o rtc_irq.o OX_OBJS := alpha_ksyms.o diff -urN 2.3.36/arch/alpha/kernel/alpha_ksyms.c 2.3.36aa1-whole/arch/alpha/kernel/alpha_ksyms.c --- 2.3.36/arch/alpha/kernel/alpha_ksyms.c Fri Dec 17 18:17:06 1999 +++ 2.3.36aa1-whole/arch/alpha/kernel/alpha_ksyms.c Wed Jan 5 18:22:03 2000 @@ -36,6 +36,7 @@ extern struct hwrpb_struct *hwrpb; extern void dump_thread(struct pt_regs *, struct user *); extern int dump_fpu(struct pt_regs *, elf_fpregset_t *); +extern spinlock_t kernel_flag; /* these are C runtime functions with special calling conventions: */ extern void __divl (void); @@ -155,6 +156,7 @@ */ #ifdef __SMP__ +EXPORT_SYMBOL(kernel_flag); EXPORT_SYMBOL(synchronize_irq); EXPORT_SYMBOL(flush_tlb_all); EXPORT_SYMBOL(flush_tlb_mm); diff -urN 2.3.36/arch/alpha/kernel/i8259.c 2.3.36aa1-whole/arch/alpha/kernel/i8259.c --- 2.3.36/arch/alpha/kernel/i8259.c Thu Jan 1 01:00:00 1970 +++ 2.3.36aa1-whole/arch/alpha/kernel/i8259.c Wed Jan 5 18:22:04 2000 @@ -0,0 +1,123 @@ +/* started hacking from linux-2.3.30pre6/arch/i386/kernel/i8259.c */ + +#include +#include +#include +#include +#include + +#include +#include + +/* + * This is the 'legacy' 8259A Programmable Interrupt Controller, + * present in the majority of PC/AT boxes. + */ + +static void enable_8259A_irq(unsigned int irq); +static void disable_8259A_irq(unsigned int irq); + +/* shutdown is same as "disable" */ +#define end_8259A_irq enable_8259A_irq +#define shutdown_8259A_irq disable_8259A_irq + +static void mask_and_ack_8259A(unsigned int); + +static unsigned int startup_8259A_irq(unsigned int irq) +{ + enable_8259A_irq(irq); + return 0; /* never anything pending */ +} + +static struct hw_interrupt_type i8259A_irq_type = { + "XT-PIC", + startup_8259A_irq, + shutdown_8259A_irq, + enable_8259A_irq, + disable_8259A_irq, + mask_and_ack_8259A, + end_8259A_irq +}; + +/* + * 8259A PIC functions to handle ISA devices: + */ + +/* + * This contains the irq mask for both 8259A irq controllers, + */ +static unsigned int cached_irq_mask = 0xffff; + +#define __byte(x,y) (((unsigned char *)&(y))[x]) +#define cached_21 (__byte(0,cached_irq_mask)) +#define cached_A1 (__byte(1,cached_irq_mask)) + +/* + * These have to be protected by the irq controller spinlock + * before being called. + */ +static void disable_8259A_irq(unsigned int irq) +{ + unsigned int mask = 1 << irq; + cached_irq_mask |= mask; + if (irq & 8) + outb(cached_A1,0xA1); + else + outb(cached_21,0x21); +} + +static void enable_8259A_irq(unsigned int irq) +{ + unsigned int mask = ~(1 << irq); + cached_irq_mask &= mask; + if (irq & 8) + outb(cached_A1,0xA1); + else + outb(cached_21,0x21); +} + +static void mask_and_ack_8259A(unsigned int irq) +{ + disable_8259A_irq(irq); + + /* Ack the interrupt making it the lowest priority */ + /* First the slave .. */ + if (irq > 7) { + outb(0xE0 | (irq - 8), 0xa0); + irq = 2; + } + /* .. then the master */ + outb(0xE0 | irq, 0x20); +} + +static void init_8259A(void) +{ + outb(0xff, 0x21); /* mask all of 8259A-1 */ + outb(0xff, 0xA1); /* mask all of 8259A-2 */ +} + +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL}; + +void __init +init_ISA_irqs (void) +{ + int i; + + for (i = 0; i < NR_IRQS; i++) { + if (i == RTC_IRQ) + continue; + if (i >= 16) + break; + irq_desc[i].status = IRQ_DISABLED; + /* + * 16 old-style INTA-cycle interrupts: + */ + irq_desc[i].handler = &i8259A_irq_type; + } + + init_8259A(); + setup_irq(2, &irq2); +} diff -urN 2.3.36/arch/alpha/kernel/irq.c 2.3.36aa1-whole/arch/alpha/kernel/irq.c --- 2.3.36/arch/alpha/kernel/irq.c Thu Dec 9 02:27:27 1999 +++ 2.3.36aa1-whole/arch/alpha/kernel/irq.c Wed Jan 5 18:22:04 2000 @@ -39,6 +39,7 @@ #ifndef __SMP__ int __local_irq_count; int __local_bh_count; +unsigned long __irq_attempt[NR_IRQS]; #endif #if NR_IRQS > 128 @@ -57,12 +58,6 @@ /* - * Shadow-copy of masked interrupts. - */ - -unsigned long _alpha_irq_masks[2] = { ~0UL, ~0UL }; - -/* * The ack_irq routine used by 80% of the systems. */ @@ -135,7 +130,7 @@ return; } } - handle_irq(j, j, regs); + handle_irq(j, regs); #else unsigned long pic; @@ -169,77 +164,201 @@ void srm_device_interrupt(unsigned long vector, struct pt_regs * regs) { - int irq, ack; + int irq; - ack = irq = (vector - 0x800) >> 4; - handle_irq(irq, ack, regs); + irq = (vector - 0x800) >> 4; + handle_irq(irq, regs); } /* + * Special irq handlers. + */ + +void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } + +/* * Initial irq handlers. */ -static struct irqaction timer_irq = { NULL, 0, 0, NULL, NULL, NULL}; -spinlock_t irq_controller_lock = SPIN_LOCK_UNLOCKED; -irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = { [0 ... NR_IRQS-1] = {0,} }; +static void enable_none(unsigned int irq) { } +static unsigned int startup_none(unsigned int irq) { return 0; } +static void disable_none(unsigned int irq) { } +static void ack_none(unsigned int irq) +{ + printk("unexpected IRQ trap at vector %02x\n", irq); +} + +/* startup is the same as "enable", shutdown is same as "disable" */ +#define shutdown_none disable_none +#define end_none enable_none + +struct hw_interrupt_type no_irq_type = { + "none", + startup_none, + shutdown_none, + enable_none, + disable_none, + ack_none, + end_none +}; +spinlock_t irq_controller_lock = SPIN_LOCK_UNLOCKED; +irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = + { [0 ... NR_IRQS-1] = { 0, &no_irq_type, }}; -static inline void -mask_irq(unsigned long irq) +int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action) { - set_bit(irq, _alpha_irq_masks); - alpha_mv.update_irq_hw(irq, alpha_irq_mask, 0); -} + int status; + int cpu = smp_processor_id(); -static inline void -unmask_irq(unsigned long irq) -{ - clear_bit(irq, _alpha_irq_masks); - alpha_mv.update_irq_hw(irq, alpha_irq_mask, 1); + kstat.irqs[cpu][irq]++; + irq_enter(cpu, irq); + + status = 1; /* Force the "do bottom halves" bit */ + + do { + if (!(action->flags & SA_INTERRUPT)) + __sti(); + else + __cli(); + + status |= action->flags; + action->handler(irq, action->dev_id, regs); + action = action->next; + } while (action); + if (status & SA_SAMPLE_RANDOM) + add_interrupt_randomness(irq); + __cli(); + + irq_exit(cpu, irq); + + return status; } +/* + * Generic enable/disable code: this just calls + * down into the PIC-specific version for the actual + * hardware disable after having gotten the irq + * controller lock. + */ void -disable_irq_nosync(unsigned int irq_nr) +disable_irq_nosync(unsigned int irq) { unsigned long flags; - save_and_cli(flags); - mask_irq(irq_nr); - restore_flags(flags); + spin_lock_irqsave(&irq_controller_lock, flags); + if (!irq_desc[irq].depth++) { + irq_desc[irq].status |= IRQ_DISABLED; + irq_desc[irq].handler->disable(irq); + } + spin_unlock_irqrestore(&irq_controller_lock, flags); } +/* + * Synchronous version of the above, making sure the IRQ is + * no longer running on any other IRQ.. + */ void -disable_irq(unsigned int irq_nr) +disable_irq(unsigned int irq) { - /* This works non-SMP, and SMP until we write code to distribute - interrupts to more that cpu 0. */ - disable_irq_nosync(irq_nr); + disable_irq_nosync(irq); + + if (!local_irq_count(smp_processor_id())) { + do { + barrier(); + } while (irq_desc[irq].status & IRQ_INPROGRESS); + } } void -enable_irq(unsigned int irq_nr) +enable_irq(unsigned int irq) { unsigned long flags; - save_and_cli(flags); - unmask_irq(irq_nr); - restore_flags(flags); + spin_lock_irqsave(&irq_controller_lock, flags); + switch (irq_desc[irq].depth) { + case 1: { + unsigned int status = irq_desc[irq].status & ~IRQ_DISABLED; + irq_desc[irq].status = status; + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + irq_desc[irq].status = status | IRQ_REPLAY; + hw_resend_irq(irq_desc[irq].handler,irq); /* noop */ + } + irq_desc[irq].handler->enable(irq); + /* fall-through */ + } + default: + irq_desc[irq].depth--; + break; + case 0: + printk("enable_irq() unbalanced from %p\n", + __builtin_return_address(0)); + } + spin_unlock_irqrestore(&irq_controller_lock, flags); } int -check_irq(unsigned int irq) +setup_irq(unsigned int irq, struct irqaction * new) { - return irq_desc[irq].action ? -EBUSY : 0; + int shared = 0; + struct irqaction *old, **p; + unsigned long flags; + + /* + * Some drivers like serial.c use request_irq() heavily, + * so we have to be careful not to interfere with a + * running system. + */ + if (new->flags & SA_SAMPLE_RANDOM) { + /* + * This function might sleep, we want to call it first, + * outside of the atomic block. + * Yes, this might clear the entropy pool if the wrong + * driver is attempted to be loaded, without actually + * installing a new handler, but is this really a problem, + * only the sysadmin is able to do this. + */ + rand_initialize_irq(irq); + } + + /* + * The following block of code has to be executed atomically + */ + spin_lock_irqsave(&irq_controller_lock,flags); + p = &irq_desc[irq].action; + if ((old = *p) != NULL) { + /* Can't share interrupts unless both agree to */ + if (!(old->flags & new->flags & SA_SHIRQ)) { + spin_unlock_irqrestore(&irq_controller_lock,flags); + return -EBUSY; + } + + /* add new interrupt at end of irq queue */ + do { + p = &old->next; + old = *p; + } while (old); + shared = 1; + } + + *p = new; + + if (!shared) { + irq_desc[irq].depth = 0; + irq_desc[irq].status &= ~IRQ_DISABLED; + irq_desc[irq].handler->startup(irq); + } + spin_unlock_irqrestore(&irq_controller_lock,flags); + return 0; } int request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *), unsigned long irqflags, const char * devname, void *dev_id) { - int shared = 0; - struct irqaction * action, **p; - unsigned long flags; + int retval; + struct irqaction * action; if (irq >= ACTUAL_NR_IRQS) return -EINVAL; @@ -248,36 +367,25 @@ if (!handler) return -EINVAL; - p = &irq_desc[irq].action; - action = *p; - if (action) { - /* Can't share interrupts unless both agree to */ - if (!(action->flags & irqflags & SA_SHIRQ)) - return -EBUSY; - - /* Can't share interrupts unless both are same type */ - if ((action->flags ^ irqflags) & SA_INTERRUPT) - return -EBUSY; - - /* Add new interrupt at end of irq queue */ - do { - p = &action->next; - action = *p; - } while (action); - shared = 1; +#if 1 + /* + * Sanity-check: shared interrupts should REALLY pass in + * a real dev-ID, otherwise we'll have trouble later trying + * to figure out which interrupt is which (messes up the + * interrupt freeing logic etc). + */ + if (irqflags & SA_SHIRQ) { + if (!dev_id) + printk("Bad boy: %s (at %p) called us without a dev_id!\n", + devname, __builtin_return_address(0)); } +#endif - action = &timer_irq; - if (irq != TIMER_IRQ) { - action = (struct irqaction *) + action = (struct irqaction *) kmalloc(sizeof(struct irqaction), GFP_KERNEL); - } if (!action) return -ENOMEM; - if (irqflags & SA_SAMPLE_RANDOM) - rand_initialize_irq(irq); - action->handler = handler; action->flags = irqflags; action->mask = 0; @@ -285,20 +393,16 @@ action->next = NULL; action->dev_id = dev_id; - save_and_cli(flags); - *p = action; - - if (!shared) - unmask_irq(irq); - - restore_flags(flags); - return 0; + retval = setup_irq(irq, action); + if (retval) + kfree(action); + return retval; } - + void free_irq(unsigned int irq, void *dev_id) { - struct irqaction * action, **p; + struct irqaction **p; unsigned long flags; if (irq >= ACTUAL_NR_IRQS) { @@ -309,25 +413,39 @@ printk("Trying to free reserved IRQ %d\n", irq); return; } - for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) { - if (action->dev_id != dev_id) - continue; + spin_lock_irqsave(&irq_controller_lock,flags); + p = &irq_desc[irq].action; + for (;;) { + struct irqaction * action = *p; + if (action) { + struct irqaction **pp = p; + p = &action->next; + if (action->dev_id != dev_id) + continue; - /* Found it - now free it */ - save_and_cli(flags); - *p = action->next; - if (!irq_desc[irq].action) - mask_irq(irq); - restore_flags(flags); - kfree(action); + /* Found it - now remove it from the list of entries */ + *pp = action->next; + if (!irq_desc[irq].action) { + irq_desc[irq].status |= IRQ_DISABLED; + irq_desc[irq].handler->shutdown(irq); + } + spin_unlock_irqrestore(&irq_controller_lock,flags); + + /* Wait to make sure it's not being used on another CPU */ + while (irq_desc[irq].status & IRQ_INPROGRESS) + barrier(); + kfree(action); + return; + } + printk("Trying to free free IRQ%d\n",irq); + spin_unlock_irqrestore(&irq_controller_lock,flags); return; } - printk("Trying to free free IRQ%d\n",irq); } int get_irq_list(char *buf) { - int i; + int i, j; struct irqaction * action; char *p = buf; @@ -335,6 +453,8 @@ p += sprintf(p, " "); for (i = 0; i < smp_num_cpus; i++) p += sprintf(p, "CPU%d ", i); + for (i = 0; i < smp_num_cpus; i++) + p += sprintf(p, "TRY%d ", i); *p++ = '\n'; #endif @@ -346,13 +466,14 @@ #ifndef __SMP__ p += sprintf(p, "%10u ", kstat_irqs(i)); #else - { - int j; - for (j = 0; j < smp_num_cpus; j++) - p += sprintf(p, "%10u ", - kstat.irqs[cpu_logical_map(j)][i]); - } + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + kstat.irqs[cpu_logical_map(j)][i]); + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10lu ", + irq_attempt(cpu_logical_map(j), i)); #endif + p += sprintf(p, " %14s", irq_desc[i].handler->typename); p += sprintf(p, " %c%s", (action->flags & SA_INTERRUPT)?'+':' ', action->name); @@ -364,6 +485,13 @@ } *p++ = '\n'; } +#if CONFIG_SMP + p += sprintf(p, "LOC: "); + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10lu ", + cpu_data[cpu_logical_map(j)].smp_local_irq_count); + p += sprintf(p, "\n"); +#endif return p - buf; } @@ -638,139 +766,157 @@ } #endif /* __SMP__ */ -static void -unexpected_irq(int irq, struct pt_regs * regs) -{ -#if 0 -#if 1 - printk("device_interrupt: unexpected interrupt %d\n", irq); -#else - struct irqaction *action; - int i; - - printk("IO device interrupt, irq = %d\n", irq); - printk("PC = %016lx PS=%04lx\n", regs->pc, regs->ps); - printk("Expecting: "); - for (i = 0; i < ACTUAL_NR_IRQS; i++) - if ((action = irq_desc[i].action)) - while (action->handler) { - printk("[%s:%d] ", action->name, i); - action = action->next; - } - printk("\n"); -#endif -#endif - -#if defined(CONFIG_ALPHA_JENSEN) - /* ??? Is all this just debugging, or are the inb's and outb's - necessary to make things work? */ - printk("64=%02x, 60=%02x, 3fa=%02x 2fa=%02x\n", - inb(0x64), inb(0x60), inb(0x3fa), inb(0x2fa)); - outb(0x0c, 0x3fc); - outb(0x0c, 0x2fc); - outb(0,0x61); - outb(0,0x461); -#endif -} - +/* + * do_IRQ handles all normal device IRQ's (the special + * SMP cross-CPU interrupts have their own specific + * handlers). + */ void -handle_irq(int irq, int ack, struct pt_regs * regs) -{ - struct irqaction * action; +handle_irq(int irq, struct pt_regs * regs) +{ + /* + * We ack quickly, we don't want the irq controller + * thinking we're snobs just because some other CPU has + * disabled global interrupts (we have already done the + * INT_ACK cycles, it's too late to try to pretend to the + * controller that we aren't taking the interrupt). + * + * 0 return value means that this irq is already being + * handled by some other CPU. (or is disabled) + */ int cpu = smp_processor_id(); + irq_desc_t *desc; + struct irqaction * action; + unsigned int status; if ((unsigned) irq > ACTUAL_NR_IRQS) { printk("device_interrupt: illegal interrupt %d\n", irq); return; } -#if 0 - /* A useful bit of code to find out if an interrupt is going wild. */ - { - static unsigned int last_msg, last_cc; - static int last_irq, count; - unsigned int cc; - - __asm __volatile("rpcc %0" : "=r"(cc)); - ++count; - if (cc - last_msg > 150000000 || irq != last_irq) { - printk("handle_irq: irq %d count %d cc %u @ %p\n", - irq, count, cc-last_cc, regs->pc); - count = 0; - last_msg = cc; - last_irq = irq; - } - last_cc = cc; + irq_attempt(cpu, irq)++; + desc = irq_desc + irq; + spin_lock_irq(&irq_controller_lock); /* mask also the RTC */ + desc->handler->ack(irq); + /* + REPLAY is when Linux resends an IRQ that was dropped earlier + WAITING is used by probe to mark irqs that are being tested + */ + status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); + status |= IRQ_PENDING; /* we _want_ to handle it */ + + /* + * If the IRQ is disabled for whatever reason, we cannot + * use the action we have. + */ + action = NULL; + if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { + action = desc->action; + status &= ~IRQ_PENDING; /* we commit to handling */ + status |= IRQ_INPROGRESS; /* we are handling it */ } -#endif + desc->status = status; + spin_unlock(&irq_controller_lock); - irq_enter(cpu, irq); - kstat.irqs[cpu][irq] += 1; - action = irq_desc[irq].action; + /* + * If there is no IRQ handler or it was disabled, exit early. + Since we set PENDING, if another processor is handling + a different instance of this same irq, the other processor + will take care of it. + */ + if (!action) + return; /* - * For normal interrupts, we mask it out, and then ACK it. - * This way another (more timing-critical) interrupt can - * come through while we're doing this one. - * - * Note! An irq without a handler gets masked and acked, but - * never unmasked. The autoirq stuff depends on this (it looks - * at the masks before and after doing the probing). - */ - if (ack >= 0) { - mask_irq(ack); - alpha_mv.ack_irq(ack); - } - if (action) { - if (action->flags & SA_SAMPLE_RANDOM) - add_interrupt_randomness(irq); - do { - action->handler(irq, action->dev_id, regs); - action = action->next; - } while (action); - if (ack >= 0) - unmask_irq(ack); - } else { - unexpected_irq(irq, regs); + * Edge triggered interrupts need to remember + * pending events. + * This applies to any hw interrupts that allow a second + * instance of the same irq to arrive while we are in do_IRQ + * or in the handler. But the code here only handles the _second_ + * instance of the irq, not the third or fourth. So it is mostly + * useful for irq hardware that does not mask cleanly in an + * SMP environment. + */ + for (;;) { + handle_IRQ_event(irq, regs, action); + spin_lock(&irq_controller_lock); + + if (!(desc->status & IRQ_PENDING) + || (desc->status & IRQ_LEVEL)) + break; + desc->status &= ~IRQ_PENDING; + spin_unlock(&irq_controller_lock); } - irq_exit(cpu, irq); + desc->status &= ~IRQ_INPROGRESS; + if (!(desc->status & IRQ_DISABLED)) + desc->handler->end(irq); + spin_unlock(&irq_controller_lock); } - /* - * Start listening for interrupts.. + * IRQ autodetection code.. + * + * This depends on the fact that any interrupt that + * comes in on to an unassigned handler will get stuck + * with "IRQ_WAITING" cleared and the interrupt + * disabled. */ - unsigned long probe_irq_on(void) { - struct irqaction * action; - unsigned long irqs = 0; - unsigned long delay; unsigned int i; + unsigned long delay; - /* Handle only the first 64 IRQs here. This is enough for - [E]ISA, which is the only thing that needs probing anyway. */ - for (i = (ACTUAL_NR_IRQS - 1) & 63; i > 0; i--) { - if (!(PROBE_MASK & (1UL << i))) { - continue; - } - action = irq_desc[i].action; - if (!action) { - enable_irq(i); - irqs |= (1UL << i); + /* Something may have generated an irq long ago and we want to + flush such a longstanding irq before considering it as spurious. */ + spin_lock_irq(&irq_controller_lock); + for (i = NR_IRQS-1; i > 0; i--) + if (!irq_desc[i].action) + irq_desc[i].handler->startup(i); + spin_unlock_irq(&irq_controller_lock); + + /* Wait for longstanding interrupts to trigger. */ + for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) + /* about 20ms delay */ synchronize_irq(); + + /* enable any unassigned irqs (we must startup again here because + if a longstanding irq happened in the previous stage, it may have + masked itself) first, enable any unassigned irqs. */ + spin_lock_irq(&irq_controller_lock); + for (i = NR_IRQS-1; i > 0; i--) { + if (!irq_desc[i].action) { + irq_desc[i].status |= IRQ_AUTODETECT | IRQ_WAITING; + if(irq_desc[i].handler->startup(i)) + irq_desc[i].status |= IRQ_PENDING; } } + spin_unlock_irq(&irq_controller_lock); /* - * Wait about 100ms for spurious interrupts to mask themselves - * out again... + * Wait for spurious interrupts to trigger */ - for (delay = jiffies + HZ/10; time_before(jiffies, delay); ) - barrier(); + for (delay = jiffies + HZ/10; time_after(delay, jiffies); ) + /* about 100ms delay */ synchronize_irq(); + + /* + * Now filter out any obviously spurious interrupts + */ + spin_lock_irq(&irq_controller_lock); + for (i=0; ishutdown(i); + } + } + spin_unlock_irq(&irq_controller_lock); + + return 0x12345678; } /* @@ -780,19 +926,35 @@ */ int -probe_irq_off(unsigned long irqs) +probe_irq_off(unsigned long unused) { - int i; - - /* Handle only the first 64 IRQs here. This is enough for - [E]ISA, which is the only thing that needs probing anyway. */ - irqs &= alpha_irq_mask; - if (!irqs) - return 0; - i = ffz(~irqs); - if (irqs != (1UL << i)) - i = -i; - return i; + int i, irq_found, nr_irqs; + + if (unused != 0x12345678) + printk("Bad IRQ probe from %lx\n", (&unused)[-1]); + + nr_irqs = 0; + irq_found = 0; + spin_lock_irq(&irq_controller_lock); + for (i=0; ishutdown(i); + } + spin_unlock_irq(&irq_controller_lock); + + if (nr_irqs > 1) + irq_found = -irq_found; + return irq_found; } @@ -815,7 +977,12 @@ #endif break; case 1: - handle_irq(RTC_IRQ, -1, ®s); +#ifdef __SMP__ + cpu_data[smp_processor_id()].smp_local_irq_count++; + smp_percpu_timer_interrupt(®s); + if (smp_processor_id() == smp_boot_cpuid) +#endif + handle_irq(RTC_IRQ, ®s); return; case 2: alpha_mv.machine_check(vector, la_ptr, ®s); diff -urN 2.3.36/arch/alpha/kernel/rtc_irq.c 2.3.36aa1-whole/arch/alpha/kernel/rtc_irq.c --- 2.3.36/arch/alpha/kernel/rtc_irq.c Thu Jan 1 01:00:00 1970 +++ 2.3.36aa1-whole/arch/alpha/kernel/rtc_irq.c Wed Jan 5 18:22:04 2000 @@ -0,0 +1,26 @@ +/* RTC irq callbacks, 1999 Andrea Arcangeli */ + +#include +#include +#include + +static void enable_rtc(unsigned int irq) { } +static unsigned int startup_rtc(unsigned int irq) { return 0; } +#define shutdown_rtc enable_rtc +#define end_rtc enable_rtc +#define ack_rtc enable_rtc +#define disable_rtc enable_rtc + +void __init +init_RTC_irq(void) +{ + static struct hw_interrupt_type rtc_irq_type = { "RTC", + startup_rtc, + shutdown_rtc, + enable_rtc, + disable_rtc, + ack_rtc, + end_rtc }; + irq_desc[RTC_IRQ].status = IRQ_DISABLED; + irq_desc[RTC_IRQ].handler = &rtc_irq_type; +} diff -urN 2.3.36/arch/alpha/kernel/signal.c 2.3.36aa1-whole/arch/alpha/kernel/signal.c --- 2.3.36/arch/alpha/kernel/signal.c Wed Nov 24 18:22:03 1999 +++ 2.3.36aa1-whole/arch/alpha/kernel/signal.c Wed Jan 5 18:22:04 2000 @@ -437,6 +437,8 @@ err |= __copy_to_user(frame->extramask, &set->sig[1], sizeof(frame->extramask)); } + if (err) + goto give_sigsegv; /* Set up to return from userspace. If provided, use a stub already in userspace. */ @@ -499,6 +501,8 @@ err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, sw, set->sig[0], oldsp); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; /* Set up to return from userspace. If provided, use a stub already in userspace. */ diff -urN 2.3.36/arch/alpha/kernel/smp.c 2.3.36aa1-whole/arch/alpha/kernel/smp.c --- 2.3.36/arch/alpha/kernel/smp.c Wed Dec 8 00:05:25 1999 +++ 2.3.36aa1-whole/arch/alpha/kernel/smp.c Wed Jan 5 18:22:04 2000 @@ -62,6 +62,7 @@ static unsigned long smp_secondary_alive; unsigned long cpu_present_mask; /* Which cpus ids came online. */ +static unsigned long __cpu_present_mask __initdata = 0; /* cpu reported in the hwrpb */ static int max_cpus = -1; /* Command-line limitation. */ int smp_boot_cpuid; /* Which processor we booted from. */ @@ -506,7 +507,7 @@ if ((cpu->flags & 0x1cc) == 0x1cc) { smp_num_probed++; /* Assume here that "whami" == index */ - cpu_present_mask |= (1L << i); + __cpu_present_mask |= (1L << i); cpu->pal_revision = boot_cpu_palrev; } @@ -517,11 +518,12 @@ } } else { smp_num_probed = 1; - cpu_present_mask = (1L << smp_boot_cpuid); + __cpu_present_mask = (1L << smp_boot_cpuid); } + cpu_present_mask = 1L << smp_boot_cpuid; printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_mask = %lx\n", - smp_num_probed, cpu_present_mask); + smp_num_probed, __cpu_present_mask); } /* @@ -565,13 +567,14 @@ if (i == smp_boot_cpuid) continue; - if (((cpu_present_mask >> i) & 1) == 0) + if (((__cpu_present_mask >> i) & 1) == 0) continue; if (smp_boot_one_cpu(i, cpu_count)) continue; cpu_count++; + cpu_present_mask |= 1L << i; } if (cpu_count == 1) { @@ -865,6 +868,22 @@ } return 0; +} + +static void +ipi_imb(void) +{ + imb(); +} + +void +smp_imb(void) +{ + /* Must wait other processors to flush their icache before continue. */ + if (smp_call_function(ipi_imb, NULL, 1, 1)) + printk(KERN_CRIT "smp_imb: timed out\n"); + + imb(); } static void diff -urN 2.3.36/arch/alpha/kernel/sys_dp264.c 2.3.36aa1-whole/arch/alpha/kernel/sys_dp264.c --- 2.3.36/arch/alpha/kernel/sys_dp264.c Wed Dec 8 00:05:25 1999 +++ 2.3.36aa1-whole/arch/alpha/kernel/sys_dp264.c Wed Jan 5 18:22:04 2000 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -36,60 +37,157 @@ * HACK ALERT! only the boot cpu is used for interrupts. */ +static void enable_tsunami_irq(unsigned int irq); +static void disable_tsunami_irq(unsigned int irq); +static void enable_clipper_irq(unsigned int irq); +static void disable_clipper_irq(unsigned int irq); + +#define end_tsunami_irq enable_tsunami_irq +#define shutdown_tsunami_irq disable_tsunami_irq +#define mask_and_ack_tsunami_irq disable_tsunami_irq + +#define end_clipper_irq enable_clipper_irq +#define shutdown_clipper_irq disable_clipper_irq +#define mask_and_ack_clipper_irq disable_clipper_irq + + +static unsigned int +startup_tsunami_irq(unsigned int irq) +{ + enable_tsunami_irq(irq); + return 0; /* never anything pending */ +} + +static unsigned int +startup_clipper_irq(unsigned int irq) +{ + enable_clipper_irq(irq); + return 0; /* never anything pending */ +} + +static struct hw_interrupt_type tsunami_irq_type = { + "TSUNAMI", + startup_tsunami_irq, + shutdown_tsunami_irq, + enable_tsunami_irq, + disable_tsunami_irq, + mask_and_ack_tsunami_irq, + end_tsunami_irq +}; + +static struct hw_interrupt_type clipper_irq_type = { + "CLIPPER", + startup_clipper_irq, + shutdown_clipper_irq, + enable_clipper_irq, + disable_clipper_irq, + mask_and_ack_clipper_irq, + end_clipper_irq +}; + +static unsigned long cached_irq_mask = ~0UL; + +#define TSUNAMI_SET_IRQ_MASK(cpu, value) \ +do { \ + volatile unsigned long *csr; \ + \ + csr = &TSUNAMI_cchip->dim##cpu##.csr; \ + *csr = (value); \ + mb(); \ + *csr; \ +} while(0) + +static inline void +do_flush_irq_mask(unsigned long value) +{ + switch (TSUNAMI_bootcpu) + { + case 0: + TSUNAMI_SET_IRQ_MASK(0, value); + break; + case 1: + TSUNAMI_SET_IRQ_MASK(1, value); + break; + case 2: + TSUNAMI_SET_IRQ_MASK(2, value); + break; + case 3: + TSUNAMI_SET_IRQ_MASK(3, value); + break; + } +} + +#ifdef CONFIG_SMP +do_flush_smp_irq_mask(unsigned long value) +{ + extern unsigned long cpu_present_mask; + unsigned long other_cpus = cpu_present_mask & ~(1L << TSUNAMI_bootcpu); + + if (other_cpus & 1) + TSUNAMI_SET_IRQ_MASK(0, value); + if (other_cpus & 2) + TSUNAMI_SET_IRQ_MASK(1, value); + if (other_cpus & 4) + TSUNAMI_SET_IRQ_MASK(2, value); + if (other_cpus & 8) + TSUNAMI_SET_IRQ_MASK(3, value); +} +#endif + static void -dp264_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p) +dp264_flush_irq_mask(unsigned long mask) { - volatile unsigned long *csr; + unsigned long value; - if (TSUNAMI_bootcpu < 2) { - if (!TSUNAMI_bootcpu) - csr = &TSUNAMI_cchip->dim0.csr; - else - csr = &TSUNAMI_cchip->dim1.csr; - } else { - if (TSUNAMI_bootcpu == 2) - csr = &TSUNAMI_cchip->dim2.csr; - else - csr = &TSUNAMI_cchip->dim3.csr; - } + value = ~mask | (1UL << 55) | 0xffff; /* isa irqs always enabled */ + do_flush_irq_mask(value); - *csr = ~mask; - mb(); - *csr; - - if (irq < 16) { - if (irq >= 8) - outb(mask >> 8, 0xA1); /* ISA PIC2 */ - else - outb(mask, 0x21); /* ISA PIC1 */ - } +#ifdef CONFIG_SMP + value = ~mask; + do_flush_smp_irq_mask(value); +#endif } static void -clipper_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p) +enable_tsunami_irq(unsigned int irq) { - if (irq >= 16) { - volatile unsigned long *csr; + cached_irq_mask &= ~(1UL << irq); + dp264_flush_irq_mask(cached_irq_mask); +} - if (TSUNAMI_bootcpu < 2) - if (!TSUNAMI_bootcpu) - csr = &TSUNAMI_cchip->dim0.csr; - else - csr = &TSUNAMI_cchip->dim1.csr; - else - if (TSUNAMI_bootcpu == 2) - csr = &TSUNAMI_cchip->dim2.csr; - else - csr = &TSUNAMI_cchip->dim3.csr; - - *csr = (~mask >> 16) | (1UL << 55); /* master ISA enable */ - mb(); - *csr; - } - else if (irq >= 8) - outb(mask >> 8, 0xA1); /* ISA PIC2 */ - else - outb(mask, 0x21); /* ISA PIC1 */ +static void +disable_tsunami_irq(unsigned int irq) +{ + cached_irq_mask |= 1UL << irq; + dp264_flush_irq_mask(cached_irq_mask); +} + +static void +clipper_flush_irq_mask(unsigned long mask) +{ + unsigned long value; + + value = (~mask >> 16) | (1UL << 55); /* master ISA enable */ + do_flush_irq_mask(value); + +#ifdef CONFIG_SMP + value = ~mask >> 16; + do_flush_smp_irq_mask(value); +#endif +} + +static void +enable_clipper_irq(unsigned int irq) +{ + cached_irq_mask &= ~(1UL << irq); + clipper_flush_irq_mask(cached_irq_mask); +} + +static void +disable_clipper_irq(unsigned int irq) +{ + cached_irq_mask |= 1UL << irq; + clipper_flush_irq_mask(cached_irq_mask); } static void @@ -126,9 +224,9 @@ static void dp264_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) { - int irq, ack; + int irq; - ack = irq = (vector - 0x800) >> 4; + irq = (vector - 0x800) >> 4; /* * The SRM console reports PCI interrupts with a vector calculated by: @@ -142,17 +240,17 @@ * so we don't count them. */ if (irq >= 32) - ack = irq = irq - 16; + irq -= 16; - handle_irq(irq, ack, regs); + handle_irq(irq, regs); } static void clipper_srm_device_interrupt(unsigned long vector, struct pt_regs * regs) { - int irq, ack; + int irq; - ack = irq = (vector - 0x800) >> 4; + irq = (vector - 0x800) >> 4; /* * The SRM console reports PCI interrupts with a vector calculated by: @@ -166,7 +264,22 @@ * * Eg IRQ 24 is DRIR bit 8, etc, etc */ - handle_irq(irq, ack, regs); + handle_irq(irq, regs); +} + +static void __init +init_TSUNAMI_irqs(struct hw_interrupt_type * ops) +{ + int i; + + for (i = 0; i < NR_IRQS; i++) { + if (i == RTC_IRQ) + continue; + if (i < 16) + continue; + irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; + irq_desc[i].handler = ops; + } } static void __init @@ -180,10 +293,11 @@ if (alpha_using_srm) alpha_mv.device_interrupt = dp264_srm_device_interrupt; - dp264_update_irq_hw(16, alpha_irq_mask, 0); + init_ISA_irqs(); + init_RTC_irq(); + init_TSUNAMI_irqs(&tsunami_irq_type); - enable_irq(55); /* Enable ISA interrupt controller. */ - enable_irq(2); + dp264_flush_irq_mask(~0UL); } static void __init @@ -197,10 +311,11 @@ if (alpha_using_srm) alpha_mv.device_interrupt = clipper_srm_device_interrupt; - clipper_update_irq_hw(16, alpha_irq_mask, 0); + init_ISA_irqs(); + init_RTC_irq(); + init_TSUNAMI_irqs(&clipper_irq_type); - enable_irq(55); /* Enable ISA interrupt controller. */ - enable_irq(2); + clipper_flush_irq_mask(~0UL); } @@ -431,9 +546,6 @@ min_mem_address: DEFAULT_MEM_BASE, nr_irqs: 64, - irq_probe_mask: TSUNAMI_PROBE_MASK, - update_irq_hw: dp264_update_irq_hw, - ack_irq: common_ack_irq, device_interrupt: dp264_device_interrupt, init_arch: tsunami_init_arch, @@ -458,9 +570,6 @@ min_mem_address: DEFAULT_MEM_BASE, nr_irqs: 64, - irq_probe_mask: TSUNAMI_PROBE_MASK, - update_irq_hw: dp264_update_irq_hw, - ack_irq: common_ack_irq, device_interrupt: dp264_device_interrupt, init_arch: tsunami_init_arch, @@ -484,9 +593,6 @@ min_mem_address: DEFAULT_MEM_BASE, nr_irqs: 64, - irq_probe_mask: TSUNAMI_PROBE_MASK, - update_irq_hw: dp264_update_irq_hw, - ack_irq: common_ack_irq, device_interrupt: dp264_device_interrupt, init_arch: tsunami_init_arch, @@ -510,9 +616,6 @@ min_mem_address: DEFAULT_MEM_BASE, nr_irqs: 64, - irq_probe_mask: TSUNAMI_PROBE_MASK, - update_irq_hw: clipper_update_irq_hw, - ack_irq: common_ack_irq, device_interrupt: dp264_device_interrupt, init_arch: tsunami_init_arch, diff -urN 2.3.36/arch/alpha/kernel/sys_sx164.c 2.3.36aa1-whole/arch/alpha/kernel/sys_sx164.c --- 2.3.36/arch/alpha/kernel/sys_sx164.c Wed Dec 8 00:05:25 1999 +++ 2.3.36aa1-whole/arch/alpha/kernel/sys_sx164.c Wed Jan 5 18:22:04 2000 @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include @@ -26,47 +28,83 @@ #include #include "proto.h" -#include #include "pci_impl.h" #include "machvec_impl.h" +/* Note invert on MASK bits. */ +static unsigned long cached_irq_mask; + +static inline void +sx164_change_irq_mask(unsigned long mask) +{ + *(vulp)PYXIS_INT_MASK = mask; + mb(); + *(vulp)PYXIS_INT_MASK; +} + +static inline void +sx164_enable_irq(unsigned int irq) +{ + sx164_change_irq_mask(cached_irq_mask |= 1UL << (irq - 16)); +} + static void -sx164_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p) +sx164_disable_irq(unsigned int irq) { - if (irq >= 16) { - /* Make CERTAIN none of the bogus ints get enabled */ - *(vulp)PYXIS_INT_MASK = - ~((long)mask >> 16) & ~0x000000000000003bUL; - mb(); - /* ... and read it back to make sure it got written. */ - *(vulp)PYXIS_INT_MASK; - } - else if (irq >= 8) - outb(mask >> 8, 0xA1); /* ISA PIC2 */ - else - outb(mask, 0x21); /* ISA PIC1 */ + sx164_change_irq_mask(cached_irq_mask &= ~(1UL << (irq - 16))); +} + +static unsigned int +sx164_startup_irq(unsigned int irq) +{ + sx164_enable_irq(irq); + return 0; +} + +static inline void +sx164_srm_enable_irq(unsigned int irq) +{ + cserve_ena(irq - 16); } static void -sx164_srm_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p) +sx164_srm_disable_irq(unsigned int irq) { - if (irq >= 16) { - if (unmask_p) - cserve_ena(irq - 16); - else - cserve_dis(irq - 16); - } - else if (irq >= 8) - outb(mask >> 8, 0xA1); /* ISA PIC2 */ - else - outb(mask, 0x21); /* ISA PIC1 */ + cserve_dis(irq - 16); } +static unsigned int +sx164_srm_startup_irq(unsigned int irq) +{ + sx164_srm_enable_irq(irq); + return 0; +} + +static struct hw_interrupt_type sx164_irq_type = { + typename: "SX164", + startup: sx164_startup_irq, + shutdown: sx164_disable_irq, + enable: sx164_enable_irq, + disable: sx164_disable_irq, + ack: sx164_disable_irq, + end: sx164_enable_irq, +}; + +static struct hw_interrupt_type sx164_srm_irq_type = { + typename: "SX164-SRM", + startup: sx164_srm_startup_irq, + shutdown: sx164_srm_disable_irq, + enable: sx164_srm_enable_irq, + disable: sx164_srm_disable_irq, + ack: sx164_srm_disable_irq, + end: sx164_srm_enable_irq, +}; + static void sx164_device_interrupt(unsigned long vector, struct pt_regs *regs) { - unsigned long pld, tmp; + unsigned long pld; unsigned int i; /* Read the interrupt summary register of PYXIS */ @@ -93,35 +131,48 @@ continue; } else { /* if not timer int */ - handle_irq(16 + i, 16 + i, regs); + handle_irq(16 + i, regs); } - *(vulp)PYXIS_INT_REQ = 1UL << i; mb(); - tmp = *(vulp)PYXIS_INT_REQ; + + *(vulp)PYXIS_INT_REQ = 1UL << i; + mb(); + *(vulp)PYXIS_INT_REQ; } } static void sx164_init_irq(void) { + struct hw_interrupt_type *ops; + long i; + outb(0, DMA1_RESET_REG); outb(0, DMA2_RESET_REG); outb(DMA_MODE_CASCADE, DMA2_MODE_REG); outb(0, DMA2_MASK_REG); + init_ISA_irqs(); + init_RTC_irq(); + if (alpha_using_srm) { - alpha_mv.update_irq_hw = sx164_srm_update_irq_hw; alpha_mv.device_interrupt = srm_device_interrupt; + ops = &sx164_srm_irq_type; } else { - /* Note invert on MASK bits. */ - *(vulp)PYXIS_INT_MASK = ~((long)alpha_irq_mask >> 16); - mb(); - *(vulp)PYXIS_INT_MASK; + sx164_change_irq_mask(0); + ops = &sx164_irq_type; + } + + for (i = 16; i < 40; ++i) { + /* Make CERTAIN none of the bogus ints get enabled. */ + if ((0x3b0000 >> i) & 1) + continue; + irq_desc[i].status = IRQ_DISABLED; + irq_desc[i].handler = ops; } - enable_irq(16 + 6); /* enable timer */ - enable_irq(16 + 7); /* enable ISA PIC cascade */ - enable_irq(2); /* enable cascade */ + ops->startup(16 + 6); /* enable timer */ + ops->startup(16 + 7); /* enable ISA PIC cascade */ } /* @@ -202,9 +253,6 @@ min_mem_address: DEFAULT_MEM_BASE, nr_irqs: 40, - irq_probe_mask: _PROBE_MASK(40), - update_irq_hw: sx164_update_irq_hw, - ack_irq: common_ack_irq, device_interrupt: sx164_device_interrupt, init_arch: pyxis_init_arch, diff -urN 2.3.36/arch/alpha/kernel/time.c 2.3.36aa1-whole/arch/alpha/kernel/time.c --- 2.3.36/arch/alpha/kernel/time.c Wed Dec 8 00:05:25 1999 +++ 2.3.36aa1-whole/arch/alpha/kernel/time.c Wed Jan 5 18:22:04 2000 @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include @@ -88,13 +90,7 @@ __u32 now; long nticks; -#ifdef __SMP__ - /* When SMP, do this for *all* CPUs, but only do the rest for - the boot CPU. */ - smp_percpu_timer_interrupt(regs); - if (smp_processor_id() != smp_boot_cpuid) - return; -#else +#ifndef __SMP__ /* Not SMP, do kernel PC profiling here. */ if (!user_mode(regs)) alpha_do_profile(regs->pc); @@ -248,10 +244,12 @@ void time_init(void) { - void (*irq_handler)(int, void *, struct pt_regs *); unsigned int year, mon, day, hour, min, sec, cc1, cc2; unsigned long cycle_freq, one_percent; long diff; + static struct irqaction timer_irqaction = { timer_interrupt, + SA_INTERRUPT, 0, "timer", + NULL, NULL}; /* * The Linux interpretation of the CMOS clock register contents: @@ -337,9 +335,7 @@ state.partial_tick = 0L; /* setup timer */ - irq_handler = timer_interrupt; - if (request_irq(TIMER_IRQ, irq_handler, 0, "timer", NULL)) - panic("Could not allocate timer IRQ!"); + setup_irq(TIMER_IRQ, &timer_irqaction); } /* diff -urN 2.3.36/arch/alpha/mm/fault.c 2.3.36aa1-whole/arch/alpha/mm/fault.c --- 2.3.36/arch/alpha/mm/fault.c Wed Nov 24 18:22:03 1999 +++ 2.3.36aa1-whole/arch/alpha/mm/fault.c Wed Jan 5 18:22:04 2000 @@ -130,13 +130,13 @@ * make sure we exit gracefully rather than endlessly redo * the fault. */ +survive: fault = handle_mm_fault(current, vma, address, cause > 0); - up(&mm->mmap_sem); - if (fault < 0) goto out_of_memory; if (fault == 0) goto do_sigbus; + up(&mm->mmap_sem); return; @@ -177,13 +177,23 @@ * us unable to handle the page fault gracefully. */ out_of_memory: - printk(KERN_ALERT "VM: killing process %s(%d)\n", - current->comm, current->pid); - if (!user_mode(regs)) - goto no_context; - do_exit(SIGKILL); + if (current->pid == 1) + { + current->policy |= SCHED_YIELD; + schedule(); + goto survive; + } + up(&mm->mmap_sem); + if (user_mode(regs)) + { + printk(KERN_ALERT "VM: killing process %s(%d)\n", + current->comm, current->pid); + do_exit(SIGKILL); + } + goto no_context; do_sigbus: + up(&mm->mmap_sem); /* * Send a sigbus, regardless of whether we were in kernel * or user mode. diff -urN 2.3.36/arch/i386/kernel/irq.c 2.3.36aa1-whole/arch/i386/kernel/irq.c --- 2.3.36/arch/i386/kernel/irq.c Wed Jan 5 17:42:50 2000 +++ 2.3.36aa1-whole/arch/i386/kernel/irq.c Wed Jan 5 18:22:05 2000 @@ -733,8 +733,24 @@ unsigned long delay; unsigned long val; + /* + * something may have generated an irq long ago and we want to + * flush such a longstanding irq before considering it as spurious. + */ + spin_lock_irq(&irq_controller_lock); + for (i = NR_IRQS-1; i > 0; i--) + if (!irq_desc[i].action) + irq_desc[i].handler->startup(i); + spin_unlock_irq(&irq_controller_lock); + + /* Wait for longstanding interrupts to trigger. */ + for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) + /* about 20ms delay */ synchronize_irq(); + /* - * first, enable any unassigned irqs + * enable any unassigned irqs + * (we must startup again here because if a longstanding irq + * happened in the previous stage, it may have masked itself) */ spin_lock_irq(&irq_controller_lock); for (i = NR_IRQS-1; i > 0; i--) { diff -urN 2.3.36/arch/i386/kernel/pci-i386.c 2.3.36aa1-whole/arch/i386/kernel/pci-i386.c --- 2.3.36/arch/i386/kernel/pci-i386.c Wed Jan 5 17:42:50 2000 +++ 2.3.36aa1-whole/arch/i386/kernel/pci-i386.c Wed Jan 5 18:22:04 2000 @@ -102,7 +102,7 @@ * Expects start=0, end=size-1, flags=resource type. */ -int pci_assign_resource(struct pci_dev *dev, int i) +static int __init pcibios_assign_resource(struct pci_dev *dev, int i) { struct resource *r = &dev->resource[i]; struct resource *pr = pci_find_parent_resource(dev, r); @@ -278,7 +278,7 @@ * address was unusable for some reason. */ if (!r->start && r->end) - pci_assign_resource(dev, idx); + pcibios_assign_resource(dev, idx); } if (pci_probe & PCI_ASSIGN_ROMS) { @@ -286,7 +286,7 @@ r->end -= r->start; r->start = 0; if (r->end) - pci_assign_resource(dev, PCI_ROM_RESOURCE); + pcibios_assign_resource(dev, PCI_ROM_RESOURCE); } } } diff -urN 2.3.36/arch/i386/kernel/signal.c 2.3.36aa1-whole/arch/i386/kernel/signal.c --- 2.3.36/arch/i386/kernel/signal.c Wed Nov 24 18:22:03 1999 +++ 2.3.36aa1-whole/arch/i386/kernel/signal.c Wed Jan 5 18:22:04 2000 @@ -419,13 +419,19 @@ ? current->exec_domain->signal_invmap[sig] : sig), &frame->sig); + if (err) + goto give_sigsegv; err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + if (err) + goto give_sigsegv; if (_NSIG_WORDS > 1) { err |= __copy_to_user(frame->extramask, &set->sig[1], sizeof(frame->extramask)); } + if (err) + goto give_sigsegv; /* Set up to return from userspace. If provided, use a stub already in userspace. */ @@ -486,6 +492,8 @@ err |= __put_user(&frame->info, &frame->pinfo); err |= __put_user(&frame->uc, &frame->puc); err |= __copy_to_user(&frame->info, info, sizeof(*info)); + if (err) + goto give_sigsegv; /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); @@ -497,6 +505,8 @@ err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; /* Set up to return from userspace. If provided, use a stub already in userspace. */ diff -urN 2.3.36/arch/i386/kernel/traps.c 2.3.36aa1-whole/arch/i386/kernel/traps.c --- 2.3.36/arch/i386/kernel/traps.c Tue Dec 14 15:48:49 1999 +++ 2.3.36aa1-whole/arch/i386/kernel/traps.c Wed Jan 5 18:22:04 2000 @@ -172,9 +172,8 @@ printk("\nCall Trace: "); stack = (unsigned long *) esp; i = 1; - module_start = PAGE_OFFSET + (max_mapnr << PAGE_SHIFT); - module_start = ((module_start + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)); - module_end = module_start + MODULE_RANGE; + module_start = VMALLOC_START; + module_end = VMALLOC_END; while (((long) stack & 4095) != 0) { addr = *stack++; /* diff -urN 2.3.36/arch/i386/mm/fault.c 2.3.36aa1-whole/arch/i386/mm/fault.c --- 2.3.36/arch/i386/mm/fault.c Wed Nov 24 18:22:04 1999 +++ 2.3.36aa1-whole/arch/i386/mm/fault.c Wed Jan 5 18:22:04 2000 @@ -31,6 +31,7 @@ { struct vm_area_struct * vma; unsigned long start = (unsigned long) addr; + int fault; if (!size) return 1; @@ -50,8 +51,12 @@ start &= PAGE_MASK; for (;;) { - if (handle_mm_fault(current, vma, start, 1) <= 0) - goto bad_area; +survive: + fault = handle_mm_fault(current, vma, start, 1); + if (!fault) + goto do_sigbus; + if (fault < 0) + goto out_of_memory; if (!size) break; size--; @@ -74,6 +79,19 @@ bad_area: return 0; + +do_sigbus: + force_sig(SIGBUS, current); + goto bad_area; + +out_of_memory: + if (current->pid == 1) + { + current->policy |= SCHED_YIELD; + schedule(); + goto survive; + } + goto bad_area; } static inline void handle_wp_test (void) @@ -188,6 +206,7 @@ * make sure we exit gracefully rather than endlessly redo * the fault. */ +survive: { int fault = handle_mm_fault(tsk, vma, address, write); if (fault < 0) @@ -280,10 +299,33 @@ * us unable to handle the page fault gracefully. */ out_of_memory: + if (tsk->pid == 1) + { + tsk->policy |= SCHED_YIELD; + schedule(); + goto survive; + } up(&mm->mmap_sem); - printk("VM: killing process %s\n", tsk->comm); if (error_code & 4) - do_exit(SIGKILL); + { + if (!((regs->eflags >> 12) & 3)) + { + printk(KERN_ALERT "VM: killing process %s\n", + tsk->comm); + do_exit(SIGKILL); + } + else + { + /* + * The task is running with privilegies and so we + * trust it and we give it a chance to die gracefully. + */ + printk(KERN_ALERT "VM: terminating process %s\n", + tsk->comm); + force_sig(SIGTERM, current); + return; + } + } goto no_context; do_sigbus: diff -urN 2.3.36/arch/sparc/mm/asyncd.c 2.3.36aa1-whole/arch/sparc/mm/asyncd.c --- 2.3.36/arch/sparc/mm/asyncd.c Wed Jan 5 17:42:50 2000 +++ 2.3.36aa1-whole/arch/sparc/mm/asyncd.c Wed Jan 5 18:22:04 2000 @@ -260,10 +260,11 @@ save_flags(flags); cli(); while (!async_queue) { - spin_lock_irq(¤t->sigmask_lock); + spin_lock(¤t->sigmask_lock); flush_signals(current); - spin_unlock_irq(¤t->sigmask_lock); + spin_unlock(¤t->sigmask_lock); interruptible_sleep_on(&asyncd_wait); + __sti(); cli(); } restore_flags(flags); diff -urN 2.3.36/arch/sparc64/mm/asyncd.c 2.3.36aa1-whole/arch/sparc64/mm/asyncd.c --- 2.3.36/arch/sparc64/mm/asyncd.c Fri Dec 31 16:33:02 1999 +++ 2.3.36aa1-whole/arch/sparc64/mm/asyncd.c Wed Jan 5 18:22:04 2000 @@ -263,10 +263,11 @@ save_flags(flags); cli(); while (!async_queue) { - spin_lock_irq(¤t->sigmask_lock); + spin_lock(¤t->sigmask_lock); flush_signals(current); - spin_unlock_irq(¤t->sigmask_lock); + spin_unlock(¤t->sigmask_lock); interruptible_sleep_on(&asyncd_wait); + __sti(); cli(); /* acquire gloabl_irq_lock */ } restore_flags(flags); diff -urN 2.3.36/drivers/ap1000/ddv.c 2.3.36aa1-whole/drivers/ap1000/ddv.c --- 2.3.36/drivers/ap1000/ddv.c Tue Dec 14 15:48:49 1999 +++ 2.3.36aa1-whole/drivers/ap1000/ddv.c Wed Jan 5 18:22:04 2000 @@ -386,10 +386,11 @@ save_flags(flags); cli(); while (!rem_queue) { - spin_lock_irq(¤t->sigmask_lock); + spin_lock(¤t->sigmask_lock); flush_signals(current); - spin_unlock_irq(¤t->sigmask_lock); + spin_unlock(¤t->sigmask_lock); interruptible_sleep_on(&ddv_daemon_wait); + __sti(); cli(); } rem = rem_queue; diff -urN 2.3.36/drivers/block/ll_rw_blk.c 2.3.36aa1-whole/drivers/block/ll_rw_blk.c --- 2.3.36/drivers/block/ll_rw_blk.c Tue Dec 14 15:48:49 1999 +++ 2.3.36aa1-whole/drivers/block/ll_rw_blk.c Wed Jan 5 18:22:03 2000 @@ -484,10 +484,6 @@ count = bh->b_size >> 9; sector = bh->b_rsector; - /* It had better not be a new buffer by the time we see it */ - if (buffer_new(bh)) - BUG(); - /* Only one thread can actually submit the I/O. */ if (test_and_set_bit(BH_Lock, &bh->b_state)) return; diff -urN 2.3.36/drivers/block/rd.c 2.3.36aa1-whole/drivers/block/rd.c --- 2.3.36/drivers/block/rd.c Fri Dec 31 16:33:02 1999 +++ 2.3.36aa1-whole/drivers/block/rd.c Wed Jan 5 18:22:05 2000 @@ -270,7 +270,7 @@ } } if (rbh) { - set_bit(BH_Protected, &rbh->b_state); + mark_buffer_protected(rbh); brelse(rbh); } @@ -290,7 +290,10 @@ switch (cmd) { case BLKFLSBUF: if (!capable(CAP_SYS_ADMIN)) return -EACCES; - invalidate_buffers(inode->i_rdev); + /* special: we want to release the ramdisk memory, + it's not like with the other blockdevices where + this ioctl only flushes away the buffer cache. */ + destroy_buffers(inode->i_rdev); break; case BLKGETSIZE: /* Return device size */ @@ -399,7 +402,7 @@ int i; for (i = 0 ; i < NUM_RAMDISKS; i++) - invalidate_buffers(MKDEV(MAJOR_NR, i)); + destroy_buffers(MKDEV(MAJOR_NR, i)); unregister_blkdev( MAJOR_NR, "ramdisk" ); blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); diff -urN 2.3.36/drivers/char/rtc.c 2.3.36aa1-whole/drivers/char/rtc.c --- 2.3.36/drivers/char/rtc.c Wed Nov 24 18:22:04 1999 +++ 2.3.36aa1-whole/drivers/char/rtc.c Wed Jan 5 18:22:03 2000 @@ -126,6 +126,7 @@ static const unsigned char days_in_mo[] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; +#ifndef __alpha__ /* * A very tiny interrupt handler. It runs with SA_INTERRUPT set, * so that there is no possibility of conflicting with the @@ -150,6 +151,7 @@ if (rtc_status & RTC_TIMER_ON) mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100); } +#endif /* * Now all the various file operations that we export. @@ -163,6 +165,9 @@ static ssize_t rtc_read(struct file *file, char *buf, size_t count, loff_t *ppos) { +#ifdef __alpha__ + return -EIO; +#else DECLARE_WAITQUEUE(wait, current); unsigned long data; ssize_t retval; @@ -194,6 +199,7 @@ remove_wait_queue(&rtc_wait, &wait); return retval; +#endif } static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, @@ -204,6 +210,7 @@ struct rtc_time wtime; switch (cmd) { +#ifndef __alpha__ case RTC_AIE_OFF: /* Mask alarm int. enab. bit */ { mask_rtc_irq_bit(RTC_AIE); @@ -251,6 +258,7 @@ set_rtc_irq_bit(RTC_UIE); return 0; } +#endif case RTC_ALM_READ: /* Read the present alarm time */ { /* @@ -386,6 +394,7 @@ restore_flags(flags); return 0; } +#ifndef __alpha__ case RTC_IRQP_READ: /* Read the periodic IRQ rate. */ { return put_user(rtc_freq, (unsigned long *)arg); @@ -426,7 +435,7 @@ restore_flags(flags); return 0; } -#ifdef __alpha__ +#else case RTC_EPOCH_READ: /* Read the epoch. */ { return put_user (epoch, (unsigned long *)arg); @@ -472,6 +481,7 @@ static int rtc_release(struct inode *inode, struct file *file) { +#ifndef __alpha__ /* * Turn off all interrupts once the device is no longer * in use, and clear the data. @@ -495,6 +505,7 @@ del_timer(&rtc_irq_timer); } +#endif MOD_DEC_USE_COUNT; rtc_irq_data = 0; @@ -502,6 +513,7 @@ return 0; } +#ifndef __alpha__ static unsigned int rtc_poll(struct file *file, poll_table *wait) { poll_wait(file, &rtc_wait, wait); @@ -509,6 +521,7 @@ return POLLIN | POLLRDNORM; return 0; } +#endif /* * The various file operations we support. @@ -519,7 +532,11 @@ rtc_read, NULL, /* No write */ NULL, /* No readdir */ +#ifdef __alpha__ + NULL, +#else rtc_poll, +#endif rtc_ioctl, NULL, /* No mmap */ rtc_open, @@ -577,12 +594,14 @@ } misc_register(&rtc_dev); #else +#ifndef __alpha__ if(request_irq(RTC_IRQ, rtc_interrupt, SA_INTERRUPT, "rtc", NULL)) { /* Yeah right, seeing as irq 8 doesn't even hit the bus. */ printk(KERN_ERR "rtc: IRQ %d is not free.\n", RTC_IRQ); return -EIO; } +#endif misc_register(&rtc_dev); create_proc_read_entry ("rtc", 0, NULL, rtc_read_proc, NULL); @@ -619,6 +638,7 @@ if (guess) printk("rtc: %s epoch (%lu) detected\n", guess, epoch); #endif +#ifndef __alpha__ init_timer(&rtc_irq_timer); rtc_irq_timer.function = rtc_dropped_irq; save_flags(flags); @@ -626,6 +646,7 @@ /* Initialize periodic freq. to CMOS reset default, which is 1024Hz */ CMOS_WRITE(((CMOS_READ(RTC_FREQ_SELECT) & 0xF0) | 0x06), RTC_FREQ_SELECT); restore_flags(flags); +#endif rtc_freq = 1024; return 0; } @@ -649,6 +670,7 @@ module_exit(rtc_exit); EXPORT_NO_SYMBOLS; +#ifndef __alpha__ /* * At IRQ rates >= 4096Hz, an interrupt may get lost altogether. * (usually during an IDE disk interrupt, with IRQ unmasking off) @@ -675,6 +697,7 @@ rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & 0xF0); /* restart */ restore_flags(flags); } +#endif /* * Info exported via "/proc/rtc". @@ -863,6 +886,7 @@ } } +#ifndef __alpha__ /* * Used to disable/enable interrupts for any one of UIE, AIE, PIE. * Rumour has it that if you frob the interrupt enable/disable @@ -902,3 +926,4 @@ rtc_irq_data = 0; restore_flags(flags); } +#endif diff -urN 2.3.36/drivers/pci/pcisyms.c 2.3.36aa1-whole/drivers/pci/pcisyms.c --- 2.3.36/drivers/pci/pcisyms.c Fri Dec 31 16:33:03 1999 +++ 2.3.36aa1-whole/drivers/pci/pcisyms.c Wed Jan 5 18:22:04 2000 @@ -26,7 +26,6 @@ EXPORT_SYMBOL(pci_set_master); EXPORT_SYMBOL(pci_simple_probe); EXPORT_SYMBOL(pci_set_power_state); -EXPORT_SYMBOL(pci_assign_resource); #ifdef CONFIG_PROC_FS EXPORT_SYMBOL(pci_proc_attach_device); EXPORT_SYMBOL(pci_proc_detach_device); diff -urN 2.3.36/fs/block_dev.c 2.3.36aa1-whole/fs/block_dev.c --- 2.3.36/fs/block_dev.c Tue Jul 13 02:02:00 1999 +++ 2.3.36aa1-whole/fs/block_dev.c Wed Jan 5 18:22:03 2000 @@ -67,39 +67,50 @@ if (chars != blocksize) fn = bread; bh = fn(dev, block, blocksize); + if (!bh) + return written ? written : -EIO; + if (!buffer_uptodate(bh)) + wait_on_buffer(bh); } #else bh = getblk(dev, block, blocksize); + if (!bh) + return written ? written : -EIO; - if (chars != blocksize && !buffer_uptodate(bh)) { - if(!filp->f_reada || - !read_ahead[MAJOR(dev)]) { - /* We do this to force the read of a single buffer */ - brelse(bh); - bh = bread(dev,block,blocksize); - } else { - /* Read-ahead before write */ - blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9) / 2; - if (block + blocks > size) blocks = size - block; - if (blocks > NBUF) blocks=NBUF; + if (!buffer_uptodate(bh)) + { + if (chars == blocksize) + wait_on_buffer(bh); + else + { bhlist[0] = bh; - for(i=1; i= 0) brelse(bhlist[i--]); - return written ? written : -EIO; - }; - }; + if (!filp->f_reada || !read_ahead[MAJOR(dev)]) { + /* We do this to force the read of a single buffer */ + blocks = 1; + } else { + /* Read-ahead before write */ + blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9) / 2; + if (block + blocks > size) blocks = size - block; + if (blocks > NBUF) blocks=NBUF; + for(i=1; i= 0) brelse(bhlist[i--]); + return written ? written : -EIO; + } + } + } ll_rw_block(READ, blocks, bhlist); for(i=1; ib_data; offset = 0; *ppos += chars; diff -urN 2.3.36/fs/buffer.c 2.3.36aa1-whole/fs/buffer.c --- 2.3.36/fs/buffer.c Fri Dec 24 02:00:56 1999 +++ 2.3.36aa1-whole/fs/buffer.c Wed Jan 5 18:22:05 2000 @@ -94,6 +94,7 @@ kmem_cache_t *bh_cachep; static int grow_buffers(int size); +static void __refile_buffer(struct buffer_head *); /* This is used by some architectures to estimate available memory. */ atomic_t buffermem_pages = ATOMIC_INIT(0); @@ -277,11 +278,14 @@ void sync_dev(kdev_t dev) { - sync_buffers(dev, 0); sync_supers(dev); sync_inodes(dev); - sync_buffers(dev, 0); DQUOT_SYNC(dev); + /* sync all the dirty buffers out to disk only _after_ all the + high level layers finished generated buffer dirty data + (or we'll return with some buffer still dirty on the blockdevice + so breaking the semantics of this call) */ + sync_buffers(dev, 0); /* * FIXME(eric) we need to sync the physical devices here. * This is because some (scsi) controllers have huge amounts of @@ -412,40 +416,6 @@ return err; } -void invalidate_buffers(kdev_t dev) -{ - int nlist; - - spin_lock(&lru_list_lock); - for(nlist = 0; nlist < NR_LIST; nlist++) { - struct buffer_head * bh; - int i; - retry: - bh = lru_list[nlist]; - if (!bh) - continue; - for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bh->b_next_free) { - if (bh->b_dev != dev) - continue; - if (buffer_locked(bh)) { - atomic_inc(&bh->b_count); - spin_unlock(&lru_list_lock); - wait_on_buffer(bh); - spin_lock(&lru_list_lock); - atomic_dec(&bh->b_count); - goto retry; - } - if (atomic_read(&bh->b_count)) - continue; - clear_bit(BH_Protected, &bh->b_state); - clear_bit(BH_Uptodate, &bh->b_state); - clear_bit(BH_Dirty, &bh->b_state); - clear_bit(BH_Req, &bh->b_state); - } - } - spin_unlock(&lru_list_lock); -} - /* After several hours of tedious analysis, the following hash * function won. Do not mess with it... -DaveM */ @@ -464,10 +434,13 @@ static __inline__ void __hash_unlink(struct buffer_head *bh) { - if (bh->b_next) - bh->b_next->b_pprev = bh->b_pprev; - *(bh->b_pprev) = bh->b_next; - bh->b_pprev = NULL; + if (bh->b_pprev) + { + if (bh->b_next) + bh->b_next->b_pprev = bh->b_pprev; + *(bh->b_pprev) = bh->b_next; + bh->b_pprev = NULL; + } } static void __insert_into_lru_list(struct buffer_head * bh, int blist) @@ -514,17 +487,12 @@ bh->b_next_free = bh->b_prev_free = NULL; } -/* The following two functions must operate atomically - * because they control the visibility of a buffer head - * to the rest of the kernel. - */ -static __inline__ void __remove_from_queues(struct buffer_head *bh) +/* must be called with both the hash_table_lock and the lru_list_lock + held */ +static void __remove_from_queues(struct buffer_head *bh) { - write_lock(&hash_table_lock); - if (bh->b_pprev) - __hash_unlink(bh); + __hash_unlink(bh); __remove_from_lru_list(bh, bh->b_list); - write_unlock(&hash_table_lock); } static void insert_into_queues(struct buffer_head *bh) @@ -547,6 +515,8 @@ struct bh_free_head *head = &free_list[BUFSIZE_INDEX(bh->b_size)]; struct buffer_head **bhp = &head->list; + bh->b_state = 0; + spin_lock(&head->lock); bh->b_dev = B_FREE; if(!*bhp) { @@ -604,11 +574,69 @@ return 0; } +/* If invalidate_buffers() will trash dirty buffers, it means some kind + of fs corruption is going on. Trashing dirty data always imply losing + information that was supposed to be just stored on the physical layer + by the user. + + Thus invalidate_buffers in general usage is not allwowed to trash dirty + buffers. For example ioctl(FLSBLKBUF) expects dirty data to be preserved. + + NOTE: In the case where the user removed a removable-media-disk even if + there's still dirty data not synced on disk (due a bug in the device driver + or due an error of the user), by not destroying the dirty buffers we could + generate corruption also on the next media inserted, thus a parameter is + necessary to handle this case in the most safe way possible (trying + to not corrupt also the new disk inserted with the data belonging to + the old now corrupted disk). Also for the ramdisk the natural thing + to do in order to release the ramdisk memory is to destroy dirty buffers. + + These are two special cases. Normal usage imply the device driver + to issue a sync on the device (without waiting I/O completation) and + then an invalidate_buffers call that doesn't trashes dirty buffers. */ +void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers) +{ + int i, nlist; + struct buffer_head * bh; + + spin_lock(&lru_list_lock); + for(nlist = 0; nlist < NR_LIST; nlist++) { + retry: + bh = lru_list[nlist]; + if (!bh) + continue; + for (i = nr_buffers_type[nlist]; --i > 0 ; bh = bh->b_next_free) { + if (bh->b_dev != dev) + continue; + if (buffer_locked(bh)) { + atomic_inc(&bh->b_count); + spin_unlock(&lru_list_lock); + wait_on_buffer(bh); + atomic_dec(&bh->b_count); + spin_lock(&lru_list_lock); + goto retry; + } + /* We must enforce that nobody will find the buffer + while we are dropping it. This in turn also means + nobody can mark the buffer dirty from under us. */ + write_lock(&hash_table_lock); + if (!atomic_read(&bh->b_count) && + (destroy_dirty_buffers || !buffer_dirty(bh))) + { + __remove_from_queues(bh); + put_last_free(bh); + } + write_unlock(&hash_table_lock); + } + } + spin_unlock(&lru_list_lock); +} + void set_blocksize(kdev_t dev, int size) { extern int *blksize_size[]; int i, nlist; - struct buffer_head * bh, *bhnext; + struct buffer_head * bh; if (!blksize_size[MAJOR(dev)]) return; @@ -626,41 +654,50 @@ sync_buffers(dev, 2); blksize_size[MAJOR(dev)][MINOR(dev)] = size; - /* We need to be quite careful how we do this - we are moving entries - * around on the free list, and we can get in a loop if we are not careful. - */ + spin_lock(&lru_list_lock); for(nlist = 0; nlist < NR_LIST; nlist++) { - repeat: - spin_lock(&lru_list_lock); + retry: bh = lru_list[nlist]; - for (i = nr_buffers_type[nlist]*2 ; --i > 0 ; bh = bhnext) { - if(!bh) - break; - - bhnext = bh->b_next_free; - if (bh->b_dev != dev) - continue; - if (bh->b_size == size) - continue; + if (!bh) + continue; + for (i = nr_buffers_type[nlist]; --i > 0 ; bh = bh->b_next_free) { + if (bh->b_dev != dev || bh->b_size == size) + continue; if (buffer_locked(bh)) { atomic_inc(&bh->b_count); spin_unlock(&lru_list_lock); wait_on_buffer(bh); atomic_dec(&bh->b_count); - goto repeat; - } - if (bh->b_dev == dev && bh->b_size != size) { - clear_bit(BH_Dirty, &bh->b_state); - clear_bit(BH_Uptodate, &bh->b_state); - clear_bit(BH_Req, &bh->b_state); + spin_lock(&lru_list_lock); + goto retry; } - if (atomic_read(&bh->b_count) == 0) { + + /* We must enforce that nobody will find the buffer + while we are dropping it. This in turn also means + nobody can mark the buffer dirty from under us. */ + write_lock(&hash_table_lock); + if (!atomic_read(&bh->b_count)) + { + if (buffer_dirty(bh)) + printk(KERN_WARNING "set_blocksize: dev %s buffer_dirty %lu size %hu\n", kdevname(dev), bh->b_blocknr, bh->b_size); __remove_from_queues(bh); put_last_free(bh); } + else + { + if (atomic_set_buffer_clean(bh)) + __refile_buffer(bh); + clear_bit(BH_Uptodate, &bh->b_state); + printk(KERN_WARNING + "set_blocksize: " + "b_count %d, dev %s, block %lu, from %p\n", + atomic_read(&bh->b_count), bdevname(bh->b_dev), + bh->b_blocknr, __builtin_return_address(0)); + } + write_unlock(&hash_table_lock); } - spin_unlock(&lru_list_lock); } + spin_unlock(&lru_list_lock); } /* @@ -785,30 +822,29 @@ atomic_set(&bh->b_count, 1); } spin_unlock(&free_list[isize].lock); - if (!bh) - goto refill; - - /* OK, FINALLY we know that this buffer is the only one of its kind, - * we hold a reference (b_count>0), it is unlocked, and it is clean. - */ - init_buffer(bh, end_buffer_io_sync, NULL); - bh->b_dev = dev; - bh->b_blocknr = block; - bh->b_state = 1 << BH_Mapped; + if (bh) + { + /* OK, FINALLY we know that this buffer is the only one of + its kind, we hold a reference (b_count>0), it is unlocked, + and it is clean. */ + init_buffer(bh, end_buffer_io_sync, NULL); + bh->b_dev = dev; + bh->b_blocknr = block; + bh->b_state = 1 << BH_Mapped; - /* Insert the buffer into the regular lists */ - insert_into_queues(bh); - goto out; + /* Insert the buffer into the regular lists */ + insert_into_queues(bh); + out: + touch_buffer(bh); + return bh; + } /* * If we block while refilling the free list, somebody may * create the buffer first ... search the hashes again. */ -refill: refill_freelist(size); goto repeat; -out: - return bh; } /* -1 -> no need to flush @@ -820,6 +856,7 @@ dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT; tot = nr_free_buffer_pages(); + tot -= size_buffers_type[BUF_PROTECTED] >> PAGE_SHIFT; hard_dirty_limit = tot * bdf_prm.b_un.nfract / 100; soft_dirty_limit = hard_dirty_limit >> 1; @@ -848,29 +885,46 @@ wakeup_bdflush(state); } -static inline void __mark_dirty(struct buffer_head *bh, int flag) +#define set_bh_age(bh, flag) \ +do { \ + (bh)->b_flushtime = jiffies + \ + ((flag) ? bdf_prm.b_un.age_super : \ + bdf_prm.b_un.age_buffer); \ +} while(0) + +static __inline__ void __mark_dirty(struct buffer_head *bh, int flag) { - bh->b_flushtime = jiffies + (flag ? bdf_prm.b_un.age_super : bdf_prm.b_un.age_buffer); - clear_bit(BH_New, &bh->b_state); + set_bh_age(bh, flag); refile_buffer(bh); } +/* atomic version, the user must call balance_dirty() by hand + as soon as it become possible to block */ void __mark_buffer_dirty(struct buffer_head *bh, int flag) { - __mark_dirty(bh, flag); + if (!atomic_set_buffer_dirty(bh)) + __mark_dirty(bh, flag); +} + +void mark_buffer_dirty(struct buffer_head *bh, int flag) +{ + __mark_buffer_dirty(bh, flag); + balance_dirty(bh->b_dev); } /* * A buffer may need to be moved from one buffer list to another * (e.g. in case it is not shared any more). Handle this. */ -static __inline__ void __refile_buffer(struct buffer_head *bh) +static void __refile_buffer(struct buffer_head *bh) { int dispose = BUF_CLEAN; if (buffer_locked(bh)) dispose = BUF_LOCKED; if (buffer_dirty(bh)) dispose = BUF_DIRTY; + if (buffer_protected(bh)) + dispose = BUF_PROTECTED; if (dispose != bh->b_list) { __remove_from_lru_list(bh, bh->b_list); bh->b_list = dispose; @@ -890,8 +944,6 @@ */ void __brelse(struct buffer_head * buf) { - touch_buffer(buf); - if (atomic_read(&buf->b_count)) { atomic_dec(&buf->b_count); return; @@ -912,12 +964,10 @@ write_lock(&hash_table_lock); if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf)) goto in_use; - if (buf->b_pprev) - __hash_unlink(buf); + __hash_unlink(buf); write_unlock(&hash_table_lock); __remove_from_lru_list(buf, buf->b_list); spin_unlock(&lru_list_lock); - buf->b_state = 0; put_last_free(buf); return; @@ -1225,6 +1275,7 @@ clear_bit(BH_Uptodate, &bh->b_state); clear_bit(BH_Mapped, &bh->b_state); clear_bit(BH_Req, &bh->b_state); + clear_bit(BH_New, &bh->b_state); } } @@ -1303,7 +1354,6 @@ static void unmap_underlying_metadata(struct buffer_head * bh) { -#if 0 if (buffer_new(bh)) { struct buffer_head *old_bh; @@ -1316,7 +1366,6 @@ __bforget(old_bh); } } -#endif } /* @@ -1326,7 +1375,7 @@ int block_write_full_page(struct dentry *dentry, struct page *page) { struct inode *inode = dentry->d_inode; - int err, i; + int err, i, need_balance_dirty = 0; unsigned long block; struct buffer_head *bh, *head; @@ -1364,12 +1413,19 @@ unmap_underlying_metadata(bh); } set_bit(BH_Uptodate, &bh->b_state); - mark_buffer_dirty(bh,0); + if (!atomic_set_buffer_dirty(bh)) + { + __mark_dirty(bh, 0); + need_balance_dirty = 1; + } bh = bh->b_this_page; block++; } while (bh != head); + if (need_balance_dirty) + balance_dirty(bh->b_dev); + SetPageUptodate(page); return 0; out: @@ -1416,12 +1472,12 @@ if (err) goto out; unmap_underlying_metadata(bh); - } - if (buffer_new(bh)) { - zeroto = block_end; - if (block_start < zerofrom) - zerofrom = block_start; - continue; + if (buffer_new(bh)) { + zeroto = block_end; + if (block_start < zerofrom) + zerofrom = block_start; + continue; + } } if (!buffer_uptodate(bh) && (block_start < zerofrom || block_end > to)) { @@ -1475,7 +1531,7 @@ partial = 1; } else { set_bit(BH_Uptodate, &bh->b_state); - if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { + if (!atomic_set_buffer_dirty(bh)) { __mark_dirty(bh, 0); need_balance_dirty = 1; } @@ -2031,13 +2087,10 @@ /* The buffer can be either on the regular * queues or on the free list.. */ - if (p->b_dev == B_FREE) { + if (p->b_dev != B_FREE) + __remove_from_queues(p); + else __remove_from_free_list(p, index); - } else { - if (p->b_pprev) - __hash_unlink(p); - __remove_from_lru_list(p, p->b_list); - } __put_unused_buffer_head(p); } while (tmp != bh); spin_unlock(&unused_list_lock); @@ -2071,7 +2124,7 @@ int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0; int protected = 0; int nlist; - static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY" }; + static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY", "PROTECTED", }; #endif printk("Buffer memory: %6dkB\n", @@ -2097,10 +2150,16 @@ used++, lastused = found; bh = bh->b_next_free; } while (bh != lru_list[nlist]); - printk("%8s: %d buffers, %d used (last=%d), " + { + int tmp = nr_buffers_type[nlist]; + if (found != tmp) + printk("%9s: BUG -> found %d, reported %d\n", + buf_types[nlist], found, tmp); + } + printk("%9s: %d buffers, %lu kbyte, %d used (last=%d), " "%d locked, %d protected, %d dirty\n", - buf_types[nlist], found, used, lastused, - locked, protected, dirty); + buf_types[nlist], found, size_buffers_type[nlist]>>10, + used, lastused, locked, protected, dirty); } spin_unlock(&lru_list_lock); #endif @@ -2384,10 +2443,10 @@ /* * If there are still a lot of dirty buffers around, * skip the sleep and flush some more. Otherwise, we - * sleep for a while. + * go to sleep waiting a wakeup. */ if (balance_dirty_state(NODEV) < 0) - schedule_timeout(5*HZ); + schedule(); /* Remember to mark us as running otherwise the next schedule will block. */ __set_current_state(TASK_RUNNING); diff -urN 2.3.36/fs/devices.c 2.3.36aa1-whole/fs/devices.c --- 2.3.36/fs/devices.c Wed Jan 5 17:42:51 2000 +++ 2.3.36aa1-whole/fs/devices.c Wed Jan 5 18:22:03 2000 @@ -216,7 +216,8 @@ if (sb && invalidate_inodes(sb)) printk("VFS: busy inodes on changed media.\n"); - invalidate_buffers(dev); + /* special: trash all dirty data as well as the media is changed */ + destroy_buffers(dev); if (fops->revalidate) fops->revalidate(dev); diff -urN 2.3.36/fs/exec.c 2.3.36aa1-whole/fs/exec.c --- 2.3.36/fs/exec.c Wed Jan 5 17:42:51 2000 +++ 2.3.36aa1-whole/fs/exec.c Wed Jan 5 18:22:04 2000 @@ -277,13 +277,13 @@ pmd = pmd_alloc(pgd, address); if (!pmd) { __free_page(page); - oom(tsk); + force_sig(SIGKILL, tsk); return; } pte = pte_alloc(pmd, address); if (!pte) { __free_page(page); - oom(tsk); + force_sig(SIGKILL, tsk); return; } if (!pte_none(*pte)) { diff -urN 2.3.36/fs/ext2/fsync.c 2.3.36aa1-whole/fs/ext2/fsync.c --- 2.3.36/fs/ext2/fsync.c Wed Dec 8 00:05:27 1999 +++ 2.3.36aa1-whole/fs/ext2/fsync.c Wed Jan 5 18:22:03 2000 @@ -23,6 +23,7 @@ */ #include +#include @@ -40,10 +41,21 @@ if (!bh) return 0; if (wait && buffer_req(bh) && !buffer_uptodate(bh)) { - brelse(bh); - return -1; + /* There can be a parallell read(2) that started read-I/O + on the buffer so we can't assume that there's been + an I/O error without first waiting I/O completation. */ + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + { + brelse (bh); + return -1; + } } if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) { + if (wait) + /* when we return from fsync all the blocks + must be _just_ stored on disk */ + wait_on_buffer(bh); brelse(bh); return 0; } diff -urN 2.3.36/fs/ext2/inode.c 2.3.36aa1-whole/fs/ext2/inode.c --- 2.3.36/fs/ext2/inode.c Tue Dec 14 15:48:51 1999 +++ 2.3.36aa1-whole/fs/ext2/inode.c Wed Jan 5 18:22:03 2000 @@ -254,6 +254,8 @@ } if (metadata) { result = getblk (inode->i_dev, tmp, blocksize); + if (!buffer_uptodate(result)) + wait_on_buffer(result); memset(result->b_data, 0, blocksize); mark_buffer_uptodate(result, 1); mark_buffer_dirty(result, 1); @@ -363,6 +365,8 @@ goto out; if (metadata) { result = getblk (bh->b_dev, tmp, blocksize); + if (!buffer_uptodate(result)) + wait_on_buffer(result); memset(result->b_data, 0, inode->i_sb->s_blocksize); mark_buffer_uptodate(result, 1); mark_buffer_dirty(result, 1); @@ -542,6 +546,8 @@ struct buffer_head *bh; bh = getblk(dummy.b_dev, dummy.b_blocknr, inode->i_sb->s_blocksize); if (buffer_new(&dummy)) { + if (!buffer_uptodate(bh)) + wait_on_buffer(bh); memset(bh->b_data, 0, inode->i_sb->s_blocksize); mark_buffer_uptodate(bh, 1); mark_buffer_dirty(bh, 1); diff -urN 2.3.36/fs/super.c 2.3.36aa1-whole/fs/super.c --- 2.3.36/fs/super.c Wed Jan 5 17:42:52 2000 +++ 2.3.36aa1-whole/fs/super.c Wed Jan 5 18:22:03 2000 @@ -1274,7 +1274,10 @@ umount_error = do_umount(old_root_dev,1, 0); if (!umount_error) { printk("okay\n"); - invalidate_buffers(old_root_dev); + /* special: the old device driver is going to be + a ramdisk and the point of this call is to free its + protected memory (even if dirty). */ + destroy_buffers(old_root_dev); return 0; } printk(KERN_ERR "error %d\n",umount_error); diff -urN 2.3.36/include/asm-alpha/hardirq.h 2.3.36aa1-whole/include/asm-alpha/hardirq.h --- 2.3.36/include/asm-alpha/hardirq.h Wed Dec 29 22:55:04 1999 +++ 2.3.36aa1-whole/include/asm-alpha/hardirq.h Wed Jan 5 18:22:04 2000 @@ -8,8 +8,11 @@ #ifndef __SMP__ extern int __local_irq_count; #define local_irq_count(cpu) ((void)(cpu), __local_irq_count) +extern unsigned long __irq_attempt[]; +#define irq_attempt(cpu, irq) ((void)(cpu), __irq_attempt[irq]) #else #define local_irq_count(cpu) (cpu_data[cpu].irq_count) +#define irq_attempt(cpu, irq) (cpu_data[cpu].irq_attempt[irq]) #endif /* diff -urN 2.3.36/include/asm-alpha/hw_irq.h 2.3.36aa1-whole/include/asm-alpha/hw_irq.h --- 2.3.36/include/asm-alpha/hw_irq.h Wed Dec 29 22:56:43 1999 +++ 2.3.36aa1-whole/include/asm-alpha/hw_irq.h Wed Jan 5 18:22:04 2000 @@ -18,14 +18,11 @@ outb(0, DMA1_CLR_MASK_REG); \ outb(0, DMA2_CLR_MASK_REG) -extern unsigned long _alpha_irq_masks[2]; -#define alpha_irq_mask _alpha_irq_masks[0] - extern void common_ack_irq(unsigned long irq); extern void isa_device_interrupt(unsigned long vector, struct pt_regs * regs); extern void srm_device_interrupt(unsigned long vector, struct pt_regs * regs); -extern void handle_irq(int irq, int ack, struct pt_regs * regs); +extern void handle_irq(int irq, struct pt_regs * regs); #define RTC_IRQ 8 #ifdef CONFIG_RTC @@ -71,10 +68,11 @@ #endif -extern char _stext; static inline void alpha_do_profile (unsigned long pc) { if (prof_buffer && current->pid) { + extern char _stext; + pc -= (unsigned long) &_stext; pc >>= prof_shift; /* @@ -87,5 +85,10 @@ atomic_inc((atomic_t *)&prof_buffer[pc]); } } + +static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {} +extern void no_action(int cpl, void *dev_id, struct pt_regs *regs); +extern void init_ISA_irqs(void); +extern void init_RTC_irq(void); #endif diff -urN 2.3.36/include/asm-alpha/pgalloc.h 2.3.36aa1-whole/include/asm-alpha/pgalloc.h --- 2.3.36/include/asm-alpha/pgalloc.h Fri Dec 31 16:39:18 1999 +++ 2.3.36aa1-whole/include/asm-alpha/pgalloc.h Wed Jan 5 18:22:03 2000 @@ -3,13 +3,27 @@ #include -/* Caches aren't brain-dead on the Alpha. */ -#define flush_cache_all() do { } while (0) +/* The icache is not coherent with the dcache on alpha, thus before + running self modified code we must always run an imb(). + Actually flush_cache_all() is real overkill as it's recalled from + vmalloc() before accessing pagetables and on the Alpha we are not required + to flush the icache before doing that, but the semantic of flush_cache_all() + requires us to flush _all_ the caches and so we must be correct here. It's + instead vmalloc that should be changed to use a more finegrined cache + flush operation (I suspect that also other archs doesn't need an icache + flush while handling pagetables). OTOH vmalloc is not a performance critical + path so after all we can live with it for now. */ +#define flush_cache_all() flush_icache_range(0, 0) #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(mm, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0) #define flush_page_to_ram(page) do { } while (0) -#define flush_icache_range(start, end) do { } while (0) +#ifndef __SMP__ +#define flush_icache_range(start, end) imb() +#else +#define flush_icache_range(start, end) smp_imb() +extern void smp_imb(void); +#endif /* * Use a few helper functions to hide the ugly broken ASN diff -urN 2.3.36/include/asm-alpha/smp.h 2.3.36aa1-whole/include/asm-alpha/smp.h --- 2.3.36/include/asm-alpha/smp.h Wed Dec 29 22:55:04 1999 +++ 2.3.36aa1-whole/include/asm-alpha/smp.h Wed Jan 5 18:22:04 2000 @@ -20,6 +20,7 @@ #ifdef __SMP__ #include +#include struct cpuinfo_alpha { unsigned long loops_per_sec; @@ -28,6 +29,8 @@ unsigned long *pte_cache; unsigned long pgtable_cache_sz; unsigned long ipi_count; + unsigned long irq_attempt[NR_IRQS]; + unsigned long smp_local_irq_count; unsigned long prof_multiplier; unsigned long prof_counter; int irq_count, bh_count; diff -urN 2.3.36/include/linux/fs.h 2.3.36aa1-whole/include/linux/fs.h --- 2.3.36/include/linux/fs.h Wed Jan 5 17:42:52 2000 +++ 2.3.36aa1-whole/include/linux/fs.h Wed Jan 5 18:22:05 2000 @@ -786,7 +786,8 @@ #define BUF_CLEAN 0 #define BUF_LOCKED 1 /* Buffers scheduled for write */ #define BUF_DIRTY 2 /* Dirty buffers, not yet scheduled for write */ -#define NR_LIST 3 +#define BUF_PROTECTED 3 /* Ramdisk persistent storage */ +#define NR_LIST 4 /* * This is called by bh->b_end_io() handlers when I/O has completed. @@ -812,21 +813,31 @@ __mark_buffer_clean(bh); } -extern void FASTCALL(__mark_buffer_dirty(struct buffer_head *bh, int flag)); +#define atomic_set_buffer_protected(bh) test_and_set_bit(BH_Protected, &(bh)->b_state) -#define atomic_set_buffer_dirty(bh) test_and_set_bit(BH_Dirty, &(bh)->b_state) +extern inline void __mark_buffer_protected(struct buffer_head *bh) +{ + refile_buffer(bh); +} -extern inline void mark_buffer_dirty(struct buffer_head * bh, int flag) +extern inline void mark_buffer_protected(struct buffer_head * bh) { - if (!atomic_set_buffer_dirty(bh)) - __mark_buffer_dirty(bh, flag); + if (!atomic_set_buffer_protected(bh)) + __mark_buffer_protected(bh); } +extern void FASTCALL(__mark_buffer_dirty(struct buffer_head *bh, int flag)); +extern void FASTCALL(mark_buffer_dirty(struct buffer_head *bh, int flag)); + +#define atomic_set_buffer_dirty(bh) test_and_set_bit(BH_Dirty, &(bh)->b_state) + extern void balance_dirty(kdev_t); extern int check_disk_change(kdev_t); extern int invalidate_inodes(struct super_block *); extern void invalidate_inode_pages(struct inode *); -extern void invalidate_buffers(kdev_t); +#define invalidate_buffers(dev) __invalidate_buffers((dev), 0) +#define destroy_buffers(dev) __invalidate_buffers((dev), 1) +extern void __invalidate_buffers(kdev_t dev, int); extern int floppy_is_wp(int); extern void sync_inodes(kdev_t); extern void write_inode_now(struct inode *); diff -urN 2.3.36/include/linux/irq.h 2.3.36aa1-whole/include/linux/irq.h --- 2.3.36/include/linux/irq.h Fri Dec 31 02:43:15 1999 +++ 2.3.36aa1-whole/include/linux/irq.h Wed Jan 5 18:22:04 2000 @@ -11,6 +11,7 @@ #define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */ #define IRQ_AUTODETECT 16 /* IRQ is being autodetected */ #define IRQ_WAITING 32 /* IRQ not yet seen - for autodetection */ +#define IRQ_LEVEL 64 /* IRQ level triggered */ /* * Interrupt controller descriptor. This is all we need diff -urN 2.3.36/include/linux/mm.h 2.3.36aa1-whole/include/linux/mm.h --- 2.3.36/include/linux/mm.h Wed Jan 5 17:42:52 2000 +++ 2.3.36aa1-whole/include/linux/mm.h Wed Jan 5 18:22:04 2000 @@ -400,7 +400,6 @@ unsigned int * zones_size, unsigned long zone_start_paddr); extern void mem_init(void); extern void show_mem(void); -extern void oom(struct task_struct * tsk); extern void si_meminfo(struct sysinfo * val); extern void swapin_readahead(swp_entry_t); diff -urN 2.3.36/include/linux/rbtree.h 2.3.36aa1-whole/include/linux/rbtree.h --- 2.3.36/include/linux/rbtree.h Thu Jan 1 01:00:00 1970 +++ 2.3.36aa1-whole/include/linux/rbtree.h Wed Jan 5 18:22:05 2000 @@ -0,0 +1,128 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + Some example of insert and search follows here. The search is a plain + normal search over an ordered tree. The insert instead must be implemented + int two steps: as first thing the code must insert the element in + order as a red leaf in the tree, then the support library function + rb_insert_color() must be called. Such function will do the + not trivial work to rebalance the rbtree if necessary. + +----------------------------------------------------------------------- +static inline struct page * rb_search_page_cache(struct inode * inode, + unsigned long offset) +{ + rb_node_t * n = inode->i_rb_page_cache.rb_node; + struct page * page; + + while (n) + { + page = rb_entry(n, struct page, rb_page_cache); + + if (offset < page->offset) + n = n->rb_left; + else if (offset > page->offset) + n = n->rb_right; + else + return page; + } + return NULL; +} + +static inline struct page * __rb_insert_page_cache(struct inode * inode, + unsigned long offset, + rb_node_t * node) +{ + rb_node_t ** p = &inode->i_rb_page_cache.rb_node; + rb_node_t * parent = NULL; + struct page * page; + + while (*p) + { + parent = *p; + page = rb_entry(parent, struct page, rb_page_cache); + + if (offset < page->offset) + p = &(*p)->rb_left; + else if (offset > page->offset) + p = &(*p)->rb_right; + else + return page; + } + + node->rb_parent = parent; + node->rb_color = RB_RED; + node->rb_left = node->rb_right = NULL; + + *p = node; + + return NULL; +} + +static inline struct page * rb_insert_page_cache(struct inode * inode, + unsigned long offset, + rb_node_t * node) +{ + struct page * ret; + if ((ret = __rb_insert_page_cache(inode, offset, node))) + goto out; + rb_insert_color(node, &inode->i_rb_page_cache); + out: + return ret; +} +----------------------------------------------------------------------- +*/ + +#ifndef _LINUX_RBTREE_H +#define _LINUX_RBTREE_H + +#include +#include + +typedef struct rb_node_s +{ + struct rb_node_s * rb_parent; + int rb_color; +#define RB_RED 0 +#define RB_BLACK 1 + struct rb_node_s * rb_right; + struct rb_node_s * rb_left; +} +rb_node_t; + +typedef struct rb_root_s +{ + struct rb_node_s * rb_node; +} +rb_root_t; + +#define RB_ROOT (rb_root_t) { NULL, } +#define rb_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +extern void rb_insert_color(rb_node_t *, rb_root_t *); +extern void rb_erase(rb_node_t *, rb_root_t *); + +#endif /* _LINUX_RBTREE_H */ diff -urN 2.3.36/kernel/ksyms.c 2.3.36aa1-whole/kernel/ksyms.c --- 2.3.36/kernel/ksyms.c Wed Jan 5 17:42:52 2000 +++ 2.3.36aa1-whole/kernel/ksyms.c Wed Jan 5 18:22:03 2000 @@ -151,6 +151,7 @@ EXPORT_SYMBOL(d_alloc); EXPORT_SYMBOL(d_lookup); EXPORT_SYMBOL(d_path); +EXPORT_SYMBOL(mark_buffer_dirty); EXPORT_SYMBOL(__mark_buffer_dirty); EXPORT_SYMBOL(__mark_inode_dirty); EXPORT_SYMBOL(free_kiovec); @@ -163,7 +164,7 @@ EXPORT_SYMBOL(put_filp); EXPORT_SYMBOL(files_lock); EXPORT_SYMBOL(check_disk_change); -EXPORT_SYMBOL(invalidate_buffers); +EXPORT_SYMBOL(__invalidate_buffers); EXPORT_SYMBOL(invalidate_inodes); EXPORT_SYMBOL(invalidate_inode_pages); EXPORT_SYMBOL(truncate_inode_pages); diff -urN 2.3.36/kernel/ptrace.c 2.3.36aa1-whole/kernel/ptrace.c --- 2.3.36/kernel/ptrace.c Sun Nov 21 03:20:20 1999 +++ 2.3.36aa1-whole/kernel/ptrace.c Wed Jan 5 18:22:04 2000 @@ -26,6 +26,7 @@ unsigned long mapnr; unsigned long maddr; struct page *page; + int fault; repeat: pgdir = pgd_offset(vma->vm_mm, addr); @@ -64,8 +65,12 @@ fault_in_page: /* -1: out of memory. 0 - unmapped page */ - if (handle_mm_fault(tsk, vma, addr, write) > 0) + fault = handle_mm_fault(tsk, vma, addr, write); + if (fault > 0) goto repeat; + if (fault < 0) + /* the out of memory is been triggered by the current task. */ + force_sig(SIGKILL, current); return 0; bad_pgd: diff -urN 2.3.36/lib/Makefile 2.3.36aa1-whole/lib/Makefile --- 2.3.36/lib/Makefile Mon Jan 18 02:27:00 1999 +++ 2.3.36aa1-whole/lib/Makefile Wed Jan 5 18:22:05 2000 @@ -7,6 +7,6 @@ # L_TARGET := lib.a -L_OBJS := errno.o ctype.o string.o vsprintf.o +L_OBJS := errno.o ctype.o string.o vsprintf.o rbtree.o include $(TOPDIR)/Rules.make diff -urN 2.3.36/lib/rbtree.c 2.3.36aa1-whole/lib/rbtree.c --- 2.3.36/lib/rbtree.c Thu Jan 1 01:00:00 1970 +++ 2.3.36aa1-whole/lib/rbtree.c Wed Jan 5 18:22:05 2000 @@ -0,0 +1,293 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include + +static void __rb_rotate_left(rb_node_t * node, rb_root_t * root) +{ + rb_node_t * right = node->rb_right; + + if ((node->rb_right = right->rb_left)) + right->rb_left->rb_parent = node; + right->rb_left = node; + + if ((right->rb_parent = node->rb_parent)) + { + if (node == node->rb_parent->rb_left) + node->rb_parent->rb_left = right; + else + node->rb_parent->rb_right = right; + } + else + root->rb_node = right; + node->rb_parent = right; +} + +static void __rb_rotate_right(rb_node_t * node, rb_root_t * root) +{ + rb_node_t * left = node->rb_left; + + if ((node->rb_left = left->rb_right)) + left->rb_right->rb_parent = node; + left->rb_right = node; + + if ((left->rb_parent = node->rb_parent)) + { + if (node == node->rb_parent->rb_right) + node->rb_parent->rb_right = left; + else + node->rb_parent->rb_left = left; + } + else + root->rb_node = left; + node->rb_parent = left; +} + +void rb_insert_color(rb_node_t * node, rb_root_t * root) +{ + rb_node_t * parent, * gparent; + + while ((parent = node->rb_parent) && parent->rb_color == RB_RED) + { + gparent = parent->rb_parent; + + if (parent == gparent->rb_left) + { + { + register rb_node_t * uncle = gparent->rb_right; + if (uncle && uncle->rb_color == RB_RED) + { + uncle->rb_color = RB_BLACK; + parent->rb_color = RB_BLACK; + gparent->rb_color = RB_RED; + node = gparent; + continue; + } + } + + if (parent->rb_right == node) + { + register rb_node_t * tmp; + __rb_rotate_left(parent, root); + tmp = parent; + parent = node; + node = tmp; + } + + parent->rb_color = RB_BLACK; + gparent->rb_color = RB_RED; + __rb_rotate_right(gparent, root); + } else { + { + register rb_node_t * uncle = gparent->rb_left; + if (uncle && uncle->rb_color == RB_RED) + { + uncle->rb_color = RB_BLACK; + parent->rb_color = RB_BLACK; + gparent->rb_color = RB_RED; + node = gparent; + continue; + } + } + + if (parent->rb_left == node) + { + register rb_node_t * tmp; + __rb_rotate_right(parent, root); + tmp = parent; + parent = node; + node = tmp; + } + + parent->rb_color = RB_BLACK; + gparent->rb_color = RB_RED; + __rb_rotate_left(gparent, root); + } + } + + root->rb_node->rb_color = RB_BLACK; +} + +static void __rb_erase_color(rb_node_t * node, rb_node_t * parent, + rb_root_t * root) +{ + rb_node_t * other; + + while ((!node || node->rb_color == RB_BLACK) && node != root->rb_node) + { + if (parent->rb_left == node) + { + other = parent->rb_right; + if (other->rb_color == RB_RED) + { + other->rb_color = RB_BLACK; + parent->rb_color = RB_RED; + __rb_rotate_left(parent, root); + other = parent->rb_right; + } + if ((!other->rb_left || + other->rb_left->rb_color == RB_BLACK) + && (!other->rb_right || + other->rb_right->rb_color == RB_BLACK)) + { + other->rb_color = RB_RED; + node = parent; + parent = node->rb_parent; + } + else + { + if (!other->rb_right || + other->rb_right->rb_color == RB_BLACK) + { + register rb_node_t * o_left; + if ((o_left = other->rb_left)) + o_left->rb_color = RB_BLACK; + other->rb_color = RB_RED; + __rb_rotate_right(other, root); + other = parent->rb_right; + } + other->rb_color = parent->rb_color; + parent->rb_color = RB_BLACK; + if (other->rb_right) + other->rb_right->rb_color = RB_BLACK; + __rb_rotate_left(parent, root); + node = root->rb_node; + break; + } + } + else + { + other = parent->rb_left; + if (other->rb_color == RB_RED) + { + other->rb_color = RB_BLACK; + parent->rb_color = RB_RED; + __rb_rotate_right(parent, root); + other = parent->rb_left; + } + if ((!other->rb_left || + other->rb_left->rb_color == RB_BLACK) + && (!other->rb_right || + other->rb_right->rb_color == RB_BLACK)) + { + other->rb_color = RB_RED; + node = parent; + parent = node->rb_parent; + } + else + { + if (!other->rb_left || + other->rb_left->rb_color == RB_BLACK) + { + register rb_node_t * o_right; + if ((o_right = other->rb_right)) + o_right->rb_color = RB_BLACK; + other->rb_color = RB_RED; + __rb_rotate_left(other, root); + other = parent->rb_left; + } + other->rb_color = parent->rb_color; + parent->rb_color = RB_BLACK; + if (other->rb_left) + other->rb_left->rb_color = RB_BLACK; + __rb_rotate_right(parent, root); + node = root->rb_node; + break; + } + } + } + if (node) + node->rb_color = RB_BLACK; +} + +void rb_erase(rb_node_t * node, rb_root_t * root) +{ + rb_node_t * child, * parent; + int color; + + if (!node->rb_left) + child = node->rb_right; + else if (!node->rb_right) + child = node->rb_left; + else + { + rb_node_t * old = node, * left; + + node = node->rb_right; + while ((left = node->rb_left)) + node = left; + child = node->rb_right; + parent = node->rb_parent; + color = node->rb_color; + + if (child) + child->rb_parent = parent; + if (parent) + { + if (parent->rb_left == node) + parent->rb_left = child; + else + parent->rb_right = child; + } + else + root->rb_node = child; + + if (node->rb_parent == old) + parent = node; + node->rb_parent = old->rb_parent; + node->rb_color = old->rb_color; + node->rb_right = old->rb_right; + node->rb_left = old->rb_left; + + if (old->rb_parent) + { + if (old->rb_parent->rb_left == old) + old->rb_parent->rb_left = node; + else + old->rb_parent->rb_right = node; + } else + root->rb_node = node; + + old->rb_left->rb_parent = node; + if (old->rb_right) + old->rb_right->rb_parent = node; + goto color; + } + + parent = node->rb_parent; + color = node->rb_color; + + if (child) + child->rb_parent = parent; + if (parent) + { + if (parent->rb_left == node) + parent->rb_left = child; + else + parent->rb_right = child; + } + else + root->rb_node = child; + + color: + if (color == RB_BLACK) + __rb_erase_color(child, parent, root); +} diff -urN 2.3.36/mm/memory.c 2.3.36aa1-whole/mm/memory.c --- 2.3.36/mm/memory.c Wed Jan 5 17:42:52 2000 +++ 2.3.36aa1-whole/mm/memory.c Wed Jan 5 18:22:04 2000 @@ -70,16 +70,6 @@ mem_map_t * mem_map = NULL; /* - * oom() prints a message (so that the user knows why the process died), - * and gives the process an untrappable SIGKILL. - */ -void oom(struct task_struct * task) -{ - printk("\nOut of memory for %s.\n", task->comm); - force_sig(SIGKILL, task); -} - -/* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. */ diff -urN 2.3.36/mm/vmscan.c 2.3.36aa1-whole/mm/vmscan.c --- 2.3.36/mm/vmscan.c Fri Dec 31 16:33:05 1999 +++ 2.3.36aa1-whole/mm/vmscan.c Wed Jan 5 18:22:04 2000 @@ -328,6 +328,7 @@ struct task_struct * p; int counter; int __ret = 0; + int assign = 0; lock_kernel(); /* @@ -347,12 +348,9 @@ counter = nr_threads / (priority+1); if (counter < 1) counter = 1; - if (counter > nr_threads) - counter = nr_threads; for (; counter >= 0; counter--) { - int assign = 0; - int max_cnt = 0; + unsigned long max_cnt = 0; struct mm_struct *best = NULL; int pid = 0; select: @@ -365,7 +363,7 @@ if (mm->rss <= 0) continue; /* Refresh swap_cnt? */ - if (assign) + if (assign == 1) mm->swap_cnt = mm->rss; if (mm->swap_cnt > max_cnt) { max_cnt = mm->swap_cnt; @@ -374,6 +372,8 @@ } } read_unlock(&tasklist_lock); + if (assign == 1) + assign = 2; if (!best) { if (!assign) { assign = 1;