diff -ur 2.3.12/fs/buffer.c 2.3.12-lru/fs/buffer.c --- 2.3.12/fs/buffer.c Sun Aug 1 18:11:17 1999 +++ 2.3.12-lru/fs/buffer.c Wed Aug 4 00:40:08 1999 @@ -1252,7 +1252,7 @@ if (!PageLocked(page)) BUG(); if (!page->buffers) - return 0; + return 1; head = page->buffers; bh = head; @@ -1293,10 +1293,13 @@ */ if (!offset) { if (!try_to_free_buffers(page)) + { atomic_add(PAGE_CACHE_SIZE, &buffermem); + return 0; + } } - return 0; + return 1; } static void create_empty_buffers(struct page *page, struct inode *inode, unsigned long blocksize) @@ -1905,6 +1908,7 @@ static int grow_buffers(int size) { unsigned long page; + struct page * page_map; struct buffer_head *bh, *tmp; struct buffer_head * insert_point; int isize; @@ -1947,7 +1951,9 @@ free_list[isize].list = bh; spin_unlock(&free_list[isize].lock); - mem_map[MAP_NR(page)].buffers = bh; + page_map = mem_map + MAP_NR(page); + page_map->buffers = bh; + lru_cache_add(page_map, 0); atomic_add(PAGE_SIZE, &buffermem); return 1; } diff -ur 2.3.12/fs/dcache.c 2.3.12-lru/fs/dcache.c --- 2.3.12/fs/dcache.c Tue Jul 13 02:01:39 1999 +++ 2.3.12-lru/fs/dcache.c Tue Aug 3 18:28:27 1999 @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -473,9 +474,11 @@ { if (gfp_mask & __GFP_IO) { int count = 0; + lock_kernel(); if (priority) count = dentry_stat.nr_unused / priority; prune_dcache(count); + unlock_kernel(); } } diff -ur 2.3.12/include/linux/mm.h 2.3.12-lru/include/linux/mm.h --- 2.3.12/include/linux/mm.h Sun Aug 1 18:11:20 1999 +++ 2.3.12-lru/include/linux/mm.h Tue Aug 3 18:15:24 1999 @@ -125,6 +125,7 @@ struct page *next_hash; atomic_t count; unsigned long flags; /* atomic flags, some possibly updated asynchronously */ + struct list_head lru; wait_queue_head_t wait; struct page **pprev_hash; struct buffer_head * buffers; diff -ur 2.3.12/include/linux/swap.h 2.3.12-lru/include/linux/swap.h --- 2.3.12/include/linux/swap.h Tue Jul 20 18:47:12 1999 +++ 2.3.12-lru/include/linux/swap.h Wed Aug 4 00:48:21 1999 @@ -64,6 +64,8 @@ extern int nr_swap_pages; extern int nr_free_pages; +extern int nr_lru_pages; +extern struct list_head lru_cache, lru_swap_cache; extern atomic_t nr_async_pages; extern struct inode swapper_inode; extern atomic_t page_cache_size; @@ -160,6 +162,27 @@ count--; return count > 1; } + +extern spinlock_t pagemap_lru_lock; + +/* + * Helper macros for lru_pages handling. + */ +#define lru_cache_add(page, swap_cache) \ +do { \ + spin_lock(&pagemap_lru_lock); \ + list_add(&(page)->lru, !(swap_cache) ? &lru_cache:&lru_swap_cache); \ + nr_lru_pages++; \ + spin_unlock(&pagemap_lru_lock); \ +} while (0) + +#define lru_cache_del(page) \ +do { \ + spin_lock(&pagemap_lru_lock); \ + list_del(&(page)->lru); \ + nr_lru_pages--; \ + spin_unlock(&pagemap_lru_lock); \ +} while (0) #endif /* __KERNEL__*/ diff -ur 2.3.12/ipc/shm.c 2.3.12-lru/ipc/shm.c --- 2.3.12/ipc/shm.c Thu Jul 22 01:07:28 1999 +++ 2.3.12-lru/ipc/shm.c Tue Aug 3 16:16:16 1999 @@ -719,10 +719,12 @@ int loop = 0; int counter; struct page * page_map; + int ret = 0; + lock_kernel(); counter = shm_rss >> prio; if (!counter || !(swap_nr = get_swap_page())) - return 0; + goto out_unlock; check_id: shp = shm_segs[swap_id]; @@ -755,7 +757,7 @@ if (--counter < 0) { /* failed */ failed: swap_free (swap_nr); - return 0; + goto out_unlock; } if (page_count(mem_map + MAP_NR(pte_page(page))) != 1) goto check_table; @@ -768,7 +770,10 @@ swap_successes++; shm_swp++; shm_rss--; - return 1; + ret = 1; + out_unlock: + unlock_kernel(); + return ret; } /* diff -ur 2.3.12/mm/filemap.c 2.3.12-lru/mm/filemap.c --- 2.3.12/mm/filemap.c Thu Jul 22 01:07:28 1999 +++ 2.3.12-lru/mm/filemap.c Wed Aug 4 00:55:41 1999 @@ -33,6 +33,8 @@ * * finished 'unifying' the page and buffer cache and SMP-threaded the * page-cache, 21.05.1999, Ingo Molnar + * + * SMP-threaded pagemap-LRU 1999, Andrea Arcangeli */ atomic_t page_cache_size = ATOMIC_INIT(0); @@ -40,6 +42,11 @@ struct page **page_hash_table; spinlock_t pagecache_lock = SPIN_LOCK_UNLOCKED; +/* + * NOTE: to avoid deadlocking you must never acquire the pagecache_lock with + * the pagemap_lru_lock held. + */ +spinlock_t pagemap_lru_lock = SPIN_LOCK_UNLOCKED; void __add_page_to_hash_queue(struct page * page, struct page **p) @@ -117,6 +124,7 @@ } if (page_count(page) != 2) printk("hm, busy page invalidated? (not necesserily a bug)\n"); + lru_cache_del(page); remove_page_from_inode_queue(page); remove_page_from_hash_queue(page); @@ -152,7 +160,8 @@ lock_page(page); if (inode->i_op->flushpage) - inode->i_op->flushpage(inode, page, 0); + if (inode->i_op->flushpage(inode, page, 0)) + lru_cache_del(page); /* * We remove the page from the page cache @@ -214,91 +223,62 @@ extern atomic_t too_many_dirty_buffers; -int shrink_mmap(int priority, int gfp_mask) +static inline int shrink_mmap_lru(struct list_head * lru, int * count, + int gfp_mask) { - static unsigned long clock = 0; - unsigned long limit = num_physpages << 1; + int ret = 0; + LIST_HEAD(young); + LIST_HEAD(old); + LIST_HEAD(forget); + struct list_head * page_lru, * dispose; struct page * page; - int count, users; - - count = limit >> priority; - - page = mem_map + clock; - do { - int referenced; - /* This works even in the presence of PageSkip because - * the first two entries at the beginning of a hole will - * be marked, not just the first. - */ - page++; - clock++; - if (clock >= max_mapnr) { - clock = 0; - page = mem_map; - } - if (PageSkip(page)) { - /* next_hash is overloaded for PageSkip */ - page = page->next_hash; - clock = page - mem_map; + while (*count > 0 && (page_lru = lru->prev) != lru) + { + page = list_entry(page_lru, struct page, lru); + list_del(page_lru); + + if (test_and_clear_bit(PG_referenced, &page->flags)) + { + /* Roll the page at the top of the lru list, + * we could also be more aggressive putting + * the page in the young-dispose-list, so + * avoiding to free young pages in each pass. + */ + list_add(page_lru, lru); + continue; } - - referenced = test_and_clear_bit(PG_referenced, &page->flags); + spin_unlock(&pagemap_lru_lock); + dispose = &old; if ((gfp_mask & __GFP_DMA) && !PageDMA(page)) - continue; + goto dispose_continue; - count--; + (*count)--; - /* - * Some common cases that we just short-circuit without - * getting the locks - we need to re-check this once we - * have the lock, but that's fine. - */ - users = page_count(page); - if (!users) - continue; - if (!page->buffers) { - if (!page->inode) - continue; - if (users > 1) - continue; - } + dispose = &young; + /* avoid unscalable SMP locking */ + if (!page->buffers && page_count(page) > 1) + goto dispose_continue; - /* - * ok, now the page looks interesting. Re-check things - * and keep the lock. - */ spin_lock(&pagecache_lock); - if (!page->inode && !page->buffers) { - spin_unlock(&pagecache_lock); - continue; - } - if (!page_count(page)) { - spin_unlock(&pagecache_lock); - BUG(); - continue; - } - get_page(page); - if (TryLockPage(page)) { + if (TryLockPage(page)) + { spin_unlock(&pagecache_lock); - goto put_continue; + goto dispose_continue; } - /* - * we keep pagecache_lock locked and unlock it in - * each branch, so that the page->inode case doesnt - * have to re-grab it. Here comes the 'real' logic - * to free memory: - */ + /* avoid freeing the page while it's locked */ + get_page(page); /* Is it a buffer page? */ if (page->buffers) { - int mem = page->inode ? 0 : PAGE_CACHE_SIZE; spin_unlock(&pagecache_lock); if (!try_to_free_buffers(page)) goto unlock_continue; - atomic_sub(mem, &buffermem); + /* page was locked, inode can't go away under us */ + if (!page->inode) + atomic_sub(PAGE_CACHE_SIZE, &buffermem); spin_lock(&pagecache_lock); } @@ -307,7 +287,7 @@ * (count == 2 because we added one ourselves above). */ if (page_count(page) != 2) - goto spin_unlock_continue; + goto cache_unlock_continue; /* * Is it a page swap page? If so, we want to @@ -316,35 +296,66 @@ */ if (PageSwapCache(page)) { spin_unlock(&pagecache_lock); - if (referenced && swap_count(page->offset) != 2) - goto unlock_continue; __delete_from_swap_cache(page); - page_cache_release(page); goto made_progress; } /* is it a page-cache page? */ - if (!referenced && page->inode && !pgcache_under_min()) { + if (page->inode && !pgcache_under_min()) { remove_page_from_inode_queue(page); remove_page_from_hash_queue(page); page->inode = NULL; spin_unlock(&pagecache_lock); - - page_cache_release(page); goto made_progress; } -spin_unlock_continue: + + dispose = &forget; + printk(KERN_ERR "shrink_mmap: unknown LRU page!\n"); + +cache_unlock_continue: spin_unlock(&pagecache_lock); unlock_continue: UnlockPage(page); -put_continue: put_page(page); - } while (count > 0); - return 0; +dispose_continue: + /* no need of the spinlock to play with the + local dispose lists */ + list_add(page_lru, dispose); + spin_lock(&pagemap_lru_lock); + } + goto out; + made_progress: + page_cache_release(page); UnlockPage(page); put_page(page); - return 1; + ret = 1; + spin_lock(&pagemap_lru_lock); + /* nr_lru_pages needs the spinlock */ + nr_lru_pages--; + +out: + list_splice(&young, lru); + list_splice(&old, lru->prev); + + return ret; +} + +int shrink_mmap(int priority, int gfp_mask) +{ + int ret = 0, count, i; + struct list_head * lru[2] = { &lru_swap_cache, &lru_cache, }; + + count = nr_lru_pages / (priority+1); + + spin_lock(&pagemap_lru_lock); + + for (i=0; count > 0 && !ret && i<2; i++) + ret = shrink_mmap_lru(lru[i], &count, gfp_mask); + + spin_unlock(&pagemap_lru_lock); + + return ret; } static inline struct page * __find_page_nolock(struct inode * inode, unsigned long offset, struct page *page) @@ -461,13 +472,14 @@ { unsigned long flags; - flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error)); - page->flags = flags | ((1 << PG_locked) | (1 << PG_referenced)); + flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced)); + page->flags = flags | (1 << PG_locked); page->owner = current; /* REMOVEME */ get_page(page); page->offset = offset; add_page_to_inode_queue(inode, page); __add_page_to_hash_queue(page, hash); + lru_cache_add(page, PageSwapCache(page)); } void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset) diff -ur 2.3.12/mm/page_alloc.c 2.3.12-lru/mm/page_alloc.c --- 2.3.12/mm/page_alloc.c Tue Jul 13 02:02:40 1999 +++ 2.3.12-lru/mm/page_alloc.c Mon Aug 2 17:21:47 1999 @@ -20,6 +20,9 @@ int nr_swap_pages = 0; int nr_free_pages = 0; +int nr_lru_pages; +LIST_HEAD(lru_cache); +LIST_HEAD(lru_swap_cache); /* * Free area management @@ -127,7 +130,6 @@ if (PageLocked(page)) PAGE_BUG(page); - page->flags &= ~(1 << PG_referenced); free_pages_ok(page - mem_map, 0); return 1; } @@ -145,7 +147,6 @@ PAGE_BUG(map); if (PageLocked(map)) PAGE_BUG(map); - map->flags &= ~(1 << PG_referenced); free_pages_ok(map_nr, order); return 1; } @@ -269,8 +270,9 @@ unsigned long total = 0; printk("Free pages: %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10)); - printk("Free: %d (%d %d %d)\n", + printk("Free: %d, lru_cache: %d (%d %d %d)\n", nr_free_pages, + nr_lru_pages, freepages.min, freepages.low, freepages.high); diff -ur 2.3.12/mm/swap_state.c 2.3.12-lru/mm/swap_state.c --- 2.3.12/mm/swap_state.c Tue Jul 13 02:02:10 1999 +++ 2.3.12-lru/mm/swap_state.c Wed Aug 4 01:53:38 1999 @@ -214,8 +214,6 @@ page_address(page), page_count(page)); #endif PageClearSwapCache(page); - if (inode->i_op->flushpage) - inode->i_op->flushpage(inode, page, 0); remove_inode_page(page); } @@ -247,6 +245,10 @@ { lock_page(page); + if (swapper_inode.i_op->flushpage) + if (swapper_inode.i_op->flushpage(&swapper_inode, page, 0)) + lru_cache_del(page); + __delete_from_swap_cache(page); UnlockPage(page); @@ -267,9 +269,7 @@ */ lock_page(page); if (PageSwapCache(page) && !is_page_shared(page)) { - long entry = page->offset; - remove_from_swap_cache(page); - swap_free(entry); + __delete_from_swap_cache(page); page_cache_release(page); } UnlockPage(page); diff -ur 2.3.12/mm/swapfile.c 2.3.12-lru/mm/swapfile.c --- 2.3.12/mm/swapfile.c Thu Jul 22 01:07:28 1999 +++ 2.3.12-lru/mm/swapfile.c Wed Aug 4 01:03:46 1999 @@ -110,6 +110,7 @@ struct swap_info_struct * p; unsigned long offset, type; + lock_kernel(); if (!entry) goto out; @@ -141,6 +142,7 @@ entry, p->swap_map[offset]); #endif out: + unlock_kernel(); return; bad_nofile: diff -ur 2.3.12/mm/vmscan.c 2.3.12-lru/mm/vmscan.c --- 2.3.12/mm/vmscan.c Thu Jul 22 01:07:28 1999 +++ 2.3.12-lru/mm/vmscan.c Tue Aug 3 18:27:54 1999 @@ -155,7 +155,7 @@ */ entry = get_swap_page(); if (!entry) - goto out_failed; /* No swap space left */ + goto out_failed_unlock; /* No swap space left */ vma->vm_mm->rss--; tsk->nswap++; @@ -323,7 +323,9 @@ { struct task_struct * p, * pbest; int counter, assign, max_cnt; + int ret = 0; + lock_kernel(); /* * We make one or two passes through the task list, indexed by * assign = {0, 1}: @@ -373,11 +375,14 @@ goto out; } + ret = 1; if (swap_out_process(pbest, gfp_mask)) - return 1; + goto out; + ret = 0; } out: - return 0; + unlock_kernel(); + return ret; } /* @@ -394,8 +399,6 @@ int priority; int count = SWAP_CLUSTER_MAX; - lock_kernel(); - /* Always trim SLAB caches when memory gets low. */ kmem_cache_reap(gfp_mask); @@ -423,7 +426,6 @@ shrink_dcache_memory(priority, gfp_mask); } while (--priority >= 0); done: - unlock_kernel(); return priority >= 0; }