diff -urN ref/arch/alpha/kernel/signal.c oom/arch/alpha/kernel/signal.c --- ref/arch/alpha/kernel/signal.c Thu Feb 3 20:49:40 2000 +++ oom/arch/alpha/kernel/signal.c Thu Feb 3 20:49:49 2000 @@ -437,6 +437,8 @@ err |= __copy_to_user(frame->extramask, &set->sig[1], sizeof(frame->extramask)); } + if (err) + goto give_sigsegv; /* Set up to return from userspace. If provided, use a stub already in userspace. */ @@ -499,6 +501,8 @@ err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, sw, set->sig[0], oldsp); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; /* Set up to return from userspace. If provided, use a stub already in userspace. */ diff -urN ref/arch/alpha/mm/fault.c oom/arch/alpha/mm/fault.c --- ref/arch/alpha/mm/fault.c Thu Feb 3 20:49:40 2000 +++ oom/arch/alpha/mm/fault.c Thu Feb 3 20:49:49 2000 @@ -130,13 +130,13 @@ * make sure we exit gracefully rather than endlessly redo * the fault. */ +survive: fault = handle_mm_fault(current, vma, address, cause > 0); - up(&mm->mmap_sem); - if (fault < 0) goto out_of_memory; if (fault == 0) goto do_sigbus; + up(&mm->mmap_sem); return; @@ -177,13 +177,23 @@ * us unable to handle the page fault gracefully. */ out_of_memory: - printk(KERN_ALERT "VM: killing process %s(%d)\n", - current->comm, current->pid); - if (!user_mode(regs)) - goto no_context; - do_exit(SIGKILL); + if (current->pid == 1) + { + current->policy |= SCHED_YIELD; + schedule(); + goto survive; + } + up(&mm->mmap_sem); + if (user_mode(regs)) + { + printk(KERN_ALERT "VM: killing process %s(%d)\n", + current->comm, current->pid); + do_exit(SIGKILL); + } + goto no_context; do_sigbus: + up(&mm->mmap_sem); /* * Send a sigbus, regardless of whether we were in kernel * or user mode. diff -urN ref/arch/i386/kernel/signal.c oom/arch/i386/kernel/signal.c --- ref/arch/i386/kernel/signal.c Thu Feb 3 20:49:40 2000 +++ oom/arch/i386/kernel/signal.c Thu Feb 3 20:49:49 2000 @@ -419,13 +419,19 @@ ? current->exec_domain->signal_invmap[sig] : sig), &frame->sig); + if (err) + goto give_sigsegv; err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + if (err) + goto give_sigsegv; if (_NSIG_WORDS > 1) { err |= __copy_to_user(frame->extramask, &set->sig[1], sizeof(frame->extramask)); } + if (err) + goto give_sigsegv; /* Set up to return from userspace. If provided, use a stub already in userspace. */ @@ -486,6 +492,8 @@ err |= __put_user(&frame->info, &frame->pinfo); err |= __put_user(&frame->uc, &frame->puc); err |= __copy_to_user(&frame->info, info, sizeof(*info)); + if (err) + goto give_sigsegv; /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); @@ -497,6 +505,8 @@ err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + goto give_sigsegv; /* Set up to return from userspace. If provided, use a stub already in userspace. */ diff -urN ref/arch/i386/mm/fault.c oom/arch/i386/mm/fault.c --- ref/arch/i386/mm/fault.c Thu Feb 3 20:49:40 2000 +++ oom/arch/i386/mm/fault.c Thu Feb 3 20:49:58 2000 @@ -32,6 +32,7 @@ { struct vm_area_struct * vma; unsigned long start = (unsigned long) addr; + int fault; if (!size) return 1; @@ -51,8 +52,12 @@ start &= PAGE_MASK; for (;;) { - if (handle_mm_fault(current, vma, start, 1) <= 0) - goto bad_area; +survive: + fault = handle_mm_fault(current, vma, start, 1); + if (!fault) + goto do_sigbus; + if (fault < 0) + goto out_of_memory; if (!size) break; size--; @@ -75,6 +80,19 @@ bad_area: return 0; + +do_sigbus: + force_sig(SIGBUS, current); + goto bad_area; + +out_of_memory: + if (current->pid == 1) + { + current->policy |= SCHED_YIELD; + schedule(); + goto survive; + } + goto bad_area; } static void __init handle_wp_test (void) @@ -192,6 +210,7 @@ * make sure we exit gracefully rather than endlessly redo * the fault. */ +survive: { int fault = handle_mm_fault(tsk, vma, address, write); if (fault < 0) @@ -288,10 +307,39 @@ * us unable to handle the page fault gracefully. */ out_of_memory: + if (tsk->pid == 1) + { + tsk->policy |= SCHED_YIELD; + schedule(); + goto survive; + } up(&mm->mmap_sem); - printk("VM: killing process %s\n", tsk->comm); if (error_code & 4) - do_exit(SIGKILL); + { + if (tsk->oom_kill_try++ > 10 || + !((regs->eflags >> 12) & 3)) + { + printk(KERN_ALERT "VM: killing process %s\n", + tsk->comm); + do_exit(SIGKILL); + } + else + { + /* + * The task is running with privilegies and so we + * trust it and we give it a chance to die gracefully. + */ + printk(KERN_ALERT "VM: terminating process %s\n", + tsk->comm); + force_sig(SIGTERM, current); + if (tsk->oom_kill_try > 1) + { + tsk->policy |= SCHED_YIELD; + schedule(); + } + return; + } + } goto no_context; do_sigbus: diff -urN ref/fs/exec.c oom/fs/exec.c --- ref/fs/exec.c Thu Feb 3 20:49:40 2000 +++ oom/fs/exec.c Thu Feb 3 20:49:49 2000 @@ -277,13 +277,13 @@ pmd = pmd_alloc(pgd, address); if (!pmd) { __free_page(page); - oom(tsk); + force_sig(SIGKILL, tsk); return; } pte = pte_alloc(pmd, address); if (!pte) { __free_page(page); - oom(tsk); + force_sig(SIGKILL, tsk); return; } if (!pte_none(*pte)) { diff -urN ref/include/linux/mm.h oom/include/linux/mm.h --- ref/include/linux/mm.h Thu Feb 3 20:49:40 2000 +++ oom/include/linux/mm.h Thu Feb 3 20:49:49 2000 @@ -399,7 +399,6 @@ unsigned int * zones_size, unsigned long zone_start_paddr); extern void mem_init(void); extern void show_mem(void); -extern void oom(struct task_struct * tsk); extern void si_meminfo(struct sysinfo * val); extern void swapin_readahead(swp_entry_t); diff -urN ref/include/linux/sched.h oom/include/linux/sched.h --- ref/include/linux/sched.h Sun Jan 30 15:43:30 2000 +++ oom/include/linux/sched.h Thu Feb 3 20:51:08 2000 @@ -357,6 +357,9 @@ u32 self_exec_id; /* Protection of fields allocatio/deallocation */ struct semaphore exit_sem; + +/* oom handling, left at the end since it's not critical info */ + int oom_kill_try; }; /* @@ -427,6 +430,7 @@ /* signals */ SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \ /* exec cts */ 0,0, \ /* exit_sem */ __MUTEX_INITIALIZER(name.exit_sem), \ +/* oom */ 0, \ } #ifndef INIT_TASK_SIZE diff -urN ref/kernel/ptrace.c oom/kernel/ptrace.c --- ref/kernel/ptrace.c Thu Feb 3 20:49:40 2000 +++ oom/kernel/ptrace.c Thu Feb 3 20:49:49 2000 @@ -26,6 +26,7 @@ unsigned long mapnr; unsigned long maddr; struct page *page; + int fault; repeat: pgdir = pgd_offset(vma->vm_mm, addr); @@ -64,8 +65,12 @@ fault_in_page: /* -1: out of memory. 0 - unmapped page */ - if (handle_mm_fault(tsk, vma, addr, write) > 0) + fault = handle_mm_fault(tsk, vma, addr, write); + if (fault > 0) goto repeat; + if (fault < 0) + /* the out of memory is been triggered by the current task. */ + force_sig(SIGKILL, current); return 0; bad_pgd: diff -urN ref/mm/memory.c oom/mm/memory.c --- ref/mm/memory.c Thu Feb 3 20:49:40 2000 +++ oom/mm/memory.c Thu Feb 3 20:49:49 2000 @@ -70,16 +70,6 @@ mem_map_t * mem_map = NULL; /* - * oom() prints a message (so that the user knows why the process died), - * and gives the process an untrappable SIGKILL. - */ -void oom(struct task_struct * task) -{ - printk("\nOut of memory for %s.\n", task->comm); - force_sig(SIGKILL, task); -} - -/* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. */ diff -urN ref/mm/vmscan.c oom/mm/vmscan.c --- ref/mm/vmscan.c Thu Feb 3 20:49:40 2000 +++ oom/mm/vmscan.c Thu Feb 3 20:49:49 2000 @@ -325,6 +325,7 @@ struct task_struct * p; int counter; int __ret = 0; + int assign = 0; lock_kernel(); /* @@ -344,12 +345,9 @@ counter = nr_threads / (priority+1); if (counter < 1) counter = 1; - if (counter > nr_threads) - counter = nr_threads; for (; counter >= 0; counter--) { - int assign = 0; - int max_cnt = 0; + unsigned long max_cnt = 0; struct mm_struct *best = NULL; int pid = 0; select: @@ -362,7 +360,7 @@ if (mm->rss <= 0) continue; /* Refresh swap_cnt? */ - if (assign) + if (assign == 1) mm->swap_cnt = mm->rss; if (mm->swap_cnt > max_cnt) { max_cnt = mm->swap_cnt; @@ -371,6 +369,8 @@ } } read_unlock(&tasklist_lock); + if (assign == 1) + assign = 2; if (!best) { if (!assign) { assign = 1;