diff -urN 2.2.15pre12/arch/i386/mm/fault.c VM/arch/i386/mm/fault.c --- 2.2.15pre12/arch/i386/mm/fault.c Mon Jan 17 16:44:33 2000 +++ VM/arch/i386/mm/fault.c Fri Mar 3 03:50:11 2000 @@ -291,7 +291,8 @@ up(&mm->mmap_sem); if (error_code & 4) { - if (!((regs->eflags >> 12) & 3)) + if (tsk->oom_kill_try++ > 10 || + !((regs->eflags >> 12) & 3)) { printk("VM: killing process %s\n", tsk->comm); do_exit(SIGKILL); @@ -304,6 +305,11 @@ */ printk("VM: terminating process %s\n", tsk->comm); force_sig(SIGTERM, current); + if (tsk->oom_kill_try > 1) + { + tsk->policy |= SCHED_YIELD; + schedule(); + } return; } } diff -urN 2.2.15pre12/include/linux/mm.h VM/include/linux/mm.h --- 2.2.15pre12/include/linux/mm.h Sun Feb 20 18:47:36 2000 +++ VM/include/linux/mm.h Fri Mar 3 03:49:18 2000 @@ -275,7 +275,8 @@ #define free_page(addr) free_pages((addr),0) extern void FASTCALL(free_pages(unsigned long addr, unsigned long order)); -extern void FASTCALL(__free_page(struct page *)); +#define __free_page(page) __free_pages((page),0) +extern void FASTCALL(__free_pages(struct page *, unsigned long)); extern void show_free_areas(void); extern unsigned long put_dirty_page(struct task_struct * tsk,unsigned long page, @@ -334,7 +335,7 @@ #define __GFP_DMA 0x80 -#define GFP_BUFFER (__GFP_LOW | __GFP_WAIT) +#define GFP_BUFFER (__GFP_MED | __GFP_WAIT) #define GFP_ATOMIC (__GFP_HIGH) #define GFP_USER (__GFP_LOW | __GFP_WAIT | __GFP_IO) #define GFP_KERNEL (__GFP_MED | __GFP_WAIT | __GFP_IO) diff -urN 2.2.15pre12/include/linux/sched.h VM/include/linux/sched.h --- 2.2.15pre12/include/linux/sched.h Wed Feb 23 20:51:09 2000 +++ VM/include/linux/sched.h Fri Mar 3 03:53:36 2000 @@ -291,6 +291,7 @@ /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap; int swappable:1; + int trashing_mem:1; /* process credentials */ uid_t uid,euid,suid,fsuid; gid_t gid,egid,sgid,fsgid; @@ -328,6 +329,9 @@ /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; + +/* oom handling */ + int oom_kill_try; }; /* @@ -378,7 +382,7 @@ /* utime */ {0,0,0,0},0, \ /* per CPU times */ {0, }, {0, }, \ /* flt */ 0,0,0,0,0,0, \ -/* swp */ 0, \ +/* swp */ 0,0, \ /* process credentials */ \ /* uid etc */ 0,0,0,0,0,0,0,0, \ /* suppl grps*/ 0, {0,}, \ @@ -395,6 +399,7 @@ /* mm */ &init_mm, \ /* signals */ SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \ /* exec cts */ 0,0, \ +/* oom */ 0, \ } union task_union { diff -urN 2.2.15pre12/kernel/ksyms.c VM/kernel/ksyms.c --- 2.2.15pre12/kernel/ksyms.c Fri Mar 3 03:48:51 2000 +++ VM/kernel/ksyms.c Fri Mar 3 03:49:18 2000 @@ -93,7 +93,7 @@ /* internal kernel memory management */ EXPORT_SYMBOL(__get_free_pages); EXPORT_SYMBOL(free_pages); -EXPORT_SYMBOL(__free_page); +EXPORT_SYMBOL(__free_pages); EXPORT_SYMBOL(kmem_find_general_cachep); EXPORT_SYMBOL(kmem_cache_create); EXPORT_SYMBOL(kmem_cache_shrink); diff -urN 2.2.15pre12/mm/Makefile VM/mm/Makefile --- 2.2.15pre12/mm/Makefile Fri Mar 3 03:48:51 2000 +++ VM/mm/Makefile Fri Mar 3 03:48:58 2000 @@ -9,7 +9,7 @@ O_TARGET := mm.o O_OBJS := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ - vmalloc.o slab.o oom_kill.o \ + vmalloc.o slab.o \ swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o include $(TOPDIR)/Rules.make diff -urN 2.2.15pre12/mm/filemap.c VM/mm/filemap.c --- 2.2.15pre12/mm/filemap.c Wed Jan 5 14:16:56 2000 +++ VM/mm/filemap.c Fri Mar 3 03:51:26 2000 @@ -164,16 +164,16 @@ clock = page - mem_map; } + /* We can't free pages unless there's just one user */ + if (atomic_read(&page->count) != 1) + continue; + referenced = test_and_clear_bit(PG_referenced, &page->flags); if (PageLocked(page)) continue; if ((gfp_mask & __GFP_DMA) && !PageDMA(page)) - continue; - - /* We can't free pages unless there's just one user */ - if (atomic_read(&page->count) != 1) continue; count--; diff -urN 2.2.15pre12/mm/oom_kill.c VM/mm/oom_kill.c --- 2.2.15pre12/mm/oom_kill.c Fri Mar 3 03:48:51 2000 +++ VM/mm/oom_kill.c Thu Jan 1 01:00:00 1970 @@ -1,188 +0,0 @@ -/* - * linux/mm/oom_kill.c - * - * Copyright (C) 1998,2000 Rik van Riel - * Thanks go out to Claus Fischer for some serious inspiration and - * for goading me into coding this file... - * - * The routines in this file are used to kill a process when - * we're seriously out of memory. This gets called from kswapd() - * in linux/mm/vmscan.c when we really run out of memory. - * - * Since we won't call these routines often (on a well-configured - * machine) this file will double as a 'coding guide' and a signpost - * for newbie kernel hackers. It features several pointers to major - * kernel subsystems and hints as to where to find out what things do. - */ - -#include -#include -#include -#include -#include -#include - -/* #define DEBUG */ -#define min(a,b) (((a)<(b))?(a):(b)) - -/* - * A rough approximation to the sqrt() function. - */ -inline int int_sqrt(unsigned int x) -{ - unsigned int out = x; - while (x & ~(unsigned int)1) x >>=2, out >>=1; - if (x) out -= out >> 2; - return (out ? out : 1); -} - -/* - * Basically, points = size / (sqrt(CPU_used) * sqrt(sqrt(time_running))) - * with some bonusses/penalties. - * - * We try to chose our `guilty' task in such a way that we free - * up the maximum amount of memory and lose the minimum amount of - * done work. - * - * The definition of the task_struct, the structure describing the state - * of each process, can be found in include/linux/sched.h. For - * capability info, you should read include/linux/capability.h. - */ - -inline int badness(struct task_struct *p) -{ - int points = p->mm->total_vm; - points /= int_sqrt((p->times.tms_utime + p->times.tms_stime) >> (SHIFT_HZ + 3)); - points /= int_sqrt(int_sqrt((jiffies - p->start_time) >> (SHIFT_HZ + 10))); -/* - * Niced processes are probably less important; kernel/sched.c - * and include/linux/sched.h contain most info on scheduling. - */ - if (p->priority < DEF_PRIORITY) - points <<= 1; -/* - * p->(e)uid is the process User ID, ID 0 is root, the super user. - * The super user usually only runs (important) system services - * and properly checked programs which we don't want to kill. - */ - if (p->uid == 0 || p->euid == 0 || cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_ADMIN)) - points >>= 2; -/* - * We don't want to kill a process with direct hardware access. - * Not only could this mess up the hardware, but these processes - * are usually fairly important too. - */ - if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) - points >>= 1; -#ifdef DEBUG - printk(KERN_DEBUG "OOMkill: task %d (%s) got %d points\n", - p->pid, p->comm, points); -#endif - return points; -} - -/* - * Simple selection loop. We chose the process with the highest - * number of 'points'. We need the locks to make sure that the - * list of task structs doesn't change while we look the other way. - */ -inline struct task_struct * select_bad_process(void) -{ - int points = 0, maxpoints = 0; - struct task_struct *p = NULL; - struct task_struct *chosen = NULL; - - read_lock(&tasklist_lock); - for_each_task(p) - { - if (p->pid) - points = badness(p); - if (points > maxpoints) { - chosen = p; - maxpoints = points; - } - } - read_unlock(&tasklist_lock); - return chosen; -} - -/* - * We kill the 'best' process and print a message to userspace. - * The only things to be careful about are: - * - don't SIGKILL a process with direct hardware access. - * - are we killing ourselves? - * - when we kill someone else, can we sleep and get out of the way? - */ -void oom_kill(unsigned long gfp_mask) -{ - - struct task_struct *p = select_bad_process(); - - if (p == NULL) - return; - - if (p == current) { - printk(KERN_ERR "Out of Memory: Killed process %d (%s).", - p->pid, p->comm); - } else { - printk(KERN_ERR "Out of Memory: Killed process %d (%s), " - "saved process %d (%s).", - p->pid, p->comm, current->pid, current->comm); - } - - /* This process has hardware access, be more careful */ - if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) { - force_sig(SIGTERM, p); - } else { - force_sig(SIGKILL, p); - } - - /* Get out of the way so that p can die */ - if (p != current && (gfp_mask & __GFP_WAIT) && current->state == TASK_RUNNING) { - p->counter = 2 * DEF_PRIORITY; - current->policy |= SCHED_YIELD; - schedule(); - } - return; -} - -/* - * We are called when __get_free_pages() thinks the system may - * be out of memory. If we really are out of memory, we can do - * nothing except freeing up memory by killing a process... - */ - -int out_of_memory(unsigned long gfp_mask) -{ - int count = page_cluster; - int loop = 0; - int freed = 0; - -again: - if (gfp_mask & __GFP_WAIT) { - /* Try to free up some memory */ - current->flags |= PF_MEMALLOC; - do { - freed += try_to_free_pages(gfp_mask); - run_task_queue(&tq_disk); - if (freed && nr_free_pages > freepages.min) { - current->flags &= ~PF_MEMALLOC; - return 0; - } - } while (--count); - current->flags &= ~PF_MEMALLOC; - } - - /* Darn, we failed. Now we have to kill something */ - if (!loop) - oom_kill(gfp_mask); - - if (nr_free_pages > freepages.min) - return 0; - if (!loop) { - loop = 1; - goto again; - } - /* Still out of memory, let the caller deal with it */ - return 1; -} diff -urN 2.2.15pre12/mm/page_alloc.c VM/mm/page_alloc.c --- 2.2.15pre12/mm/page_alloc.c Fri Mar 3 03:48:51 2000 +++ VM/mm/page_alloc.c Fri Mar 3 03:53:36 2000 @@ -20,9 +20,6 @@ int nr_swap_pages = 0; int nr_free_pages = 0; -int low_on_memory = 0; -extern struct wait_queue * kswapd_wait; -extern int out_of_memory(unsigned long); /* * Free area management @@ -126,7 +123,7 @@ spin_unlock_irqrestore(&page_alloc_lock, flags); } -static inline void __free_pages(struct page *page, unsigned long order) +void __free_pages(struct page *page, unsigned long order) { if (!PageReserved(page) && atomic_dec_and_test(&page->count)) { if (PageSwapCache(page)) @@ -137,11 +134,6 @@ } } -void __free_page(struct page *page) -{ - __free_pages(page, 0); -} - void free_pages(unsigned long addr, unsigned long order) { unsigned long map_nr = MAP_NR(addr); @@ -211,50 +203,30 @@ * further thought. */ if (!(current->flags & PF_MEMALLOC)) { -#ifdef SLEEP_MEMORY_DEBUGGING - if (current->state != TASK_RUNNING && (gfp_mask & __GFP_WAIT)) { - printk("gfp called by non-running (%ld) task from %p!\n", - current->state, __builtin_return_address(0)); - /* if we're not running, we can't sleep */ - gfp_mask &= ~__GFP_WAIT; - } -#endif + int freed; + extern struct wait_queue * kswapd_wait; - if (low_on_memory) { - int freed; - current->flags |= PF_MEMALLOC; - freed = try_to_free_pages(gfp_mask); - current->flags &= ~PF_MEMALLOC; - if (time_after(jiffies, low_on_memory + 60 * HZ)) - out_of_memory(gfp_mask); - if (freed && nr_free_pages > freepages.low) - low_on_memory = 0; + if (nr_free_pages >= freepages.high) + { + /* share RO cachelines in fast path */ + if (current->trashing_mem) + current->trashing_mem = 0; + goto ok_to_allocate; } - - if (nr_free_pages <= freepages.low) { - wake_up_interruptible(&kswapd_wait); - if ((gfp_mask & __GFP_WAIT) && current->state == TASK_RUNNING) { - schedule(); - /* kswapd couldn't save us */ - if (nr_free_pages <= freepages.low) - low_on_memory = jiffies; - } + else + { + if (nr_free_pages < freepages.low) + wake_up_interruptible(&kswapd_wait); + if (nr_free_pages > freepages.min && !current->trashing_mem) + goto ok_to_allocate; } - if (nr_free_pages > freepages.min) - goto ok_to_allocate; - - /* - * out_of_memory() should usually fix the situation. - * If it does, we can continue like nothing happened. - */ - if (!out_of_memory(gfp_mask)) - goto ok_to_allocate; - - if ((gfp_mask & __GFP_MED) && nr_free_pages > freepages.min / 2) - goto ok_to_allocate; + current->trashing_mem = 1; + current->flags |= PF_MEMALLOC; + freed = try_to_free_pages(gfp_mask); + current->flags &= ~PF_MEMALLOC; - if (!(gfp_mask & __GFP_HIGH)) + if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH))) goto nopage; } ok_to_allocate: diff -urN 2.2.15pre12/mm/vmscan.c VM/mm/vmscan.c --- 2.2.15pre12/mm/vmscan.c Fri Mar 3 03:48:51 2000 +++ VM/mm/vmscan.c Fri Mar 3 03:54:39 2000 @@ -19,7 +19,6 @@ #include #include -extern int low_on_memory; /* * The swap-out functions return 1 if they successfully @@ -65,14 +64,6 @@ return 0; /* - * By setting this bit shrink_mmap() will do - * second-chance page replacement, only do this - * when we are the only (non-pagecache) user. - */ - if (atomic_read(&page_map->count) <= 2) - set_bit(PG_referenced, &page_map->flags); - - /* * Is the page already in the swap cache? If so, then * we can just drop our reference to it without doing * any IO - it's already up-to-date on disk. @@ -446,7 +437,7 @@ printk ("Starting kswapd v%.*s\n", i, s); } -struct wait_queue * kswapd_wait = NULL; +struct wait_queue * kswapd_wait; /* * The background pageout daemon, started as a kernel thread @@ -494,24 +485,18 @@ * the processes needing more memory will wake us * up on a more timely basis. */ + interruptible_sleep_on(&kswapd_wait); while (nr_free_pages < freepages.high) { - if (!do_try_to_free_pages(GFP_KSWAPD)) { - /* out of memory? we can't do much */ - low_on_memory = jiffies; - if (nr_free_pages < freepages.min) { - run_task_queue(&tq_disk); - tsk->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ); - } else { - break; - } + if (do_try_to_free_pages(GFP_KSWAPD)) + { + if (tsk->need_resched) + schedule(); + continue; } - if (tsk->need_resched) - schedule(); + tsk->state = TASK_INTERRUPTIBLE; + schedule_timeout(10*HZ); } - run_task_queue(&tq_disk); - interruptible_sleep_on_timeout(&kswapd_wait, HZ); } }