From: Jens Axboe DESC dm plug buglet EDESC From: Jens Axboe At SUSE I reproduced a problem with dm and ide disks on a 4-way where ide request_fn would discover the queue plugged all of a sudden, and then give up on doing io. The problem is due to the fact that dm now sets QUEUE_FLAG_PLUGGED without holding the target queue lock. I think the best fix is simply not to set the PLUGGED bit outside of the queue lock, and just let __generic_unplug_device() always call down into the request_fn() even if the queue wasn't plugged. This should be a very rare occurence. DESC per-backing-dev unplugging: fix BIO_RW_SYNC handling EDESC It's a bitshift, not a bitmask. DESC per-backing dev unplugging oops fix #42 EDESC From: Chris Mason Hmpf, one more. If one proc does a wait_on_buffer while another does discard_buffer, bh->b_bdev might be null by the time __wait_on_buffer uses it. Someone hit this with reiserfs, but it should be possible to trigger anywhere. DESC fix md for per-address_space unplugging EDESC From: Jens Axboe DESC more backing_dev unplug functions EDESC shmem.c and rd.c have standalone bakcing_dev_info's. DESC plugged bit EDESC From: Jens Axboe Following some consideration, I think it's better to simply always invoke the request_fn even if the device wasn't plugged if someone unplugs it. This solves the problem of md + dm setting the plugged bit unconditionally _outside_ of the queue lock, thus confusing some drivers that checks this bit for sanity in the request_fn. --- 25-akpm/drivers/block/ll_rw_blk.c | 108 ++++++++++++----------------------- 25-akpm/drivers/block/loop.c | 15 ++++ 25-akpm/drivers/block/rd.c | 1 25-akpm/drivers/block/umem.c | 3 25-akpm/drivers/md/dm-crypt.c | 2 25-akpm/drivers/md/dm-table.c | 16 +++++ 25-akpm/drivers/md/dm.c | 23 ++++++- 25-akpm/drivers/md/dm.h | 1 25-akpm/drivers/md/md.c | 32 +++++++++- 25-akpm/drivers/md/raid1.c | 3 25-akpm/drivers/md/raid5.c | 4 - 25-akpm/drivers/md/raid6main.c | 3 25-akpm/drivers/mtd/devices/blkmtd.c | 6 - 25-akpm/fs/buffer.c | 12 ++- 25-akpm/fs/direct-io.c | 4 - 25-akpm/fs/jfs/jfs_logmgr.c | 6 - 25-akpm/fs/ntfs/compress.c | 3 25-akpm/fs/ufs/truncate.c | 3 25-akpm/fs/xfs/linux/xfs_buf.c | 24 ++----- 25-akpm/include/linux/backing-dev.h | 3 25-akpm/include/linux/bio.h | 3 25-akpm/include/linux/blkdev.h | 23 +++++-- 25-akpm/include/linux/fs.h | 2 25-akpm/include/linux/raid/md.h | 1 25-akpm/include/linux/raid/md_k.h | 26 -------- 25-akpm/include/linux/swap.h | 2 25-akpm/kernel/power/disk.c | 1 25-akpm/kernel/power/pmdisk.c | 3 25-akpm/kernel/power/swsusp.c | 5 - 25-akpm/mm/filemap.c | 4 - 25-akpm/mm/mempool.c | 2 25-akpm/mm/readahead.c | 8 +- 25-akpm/mm/shmem.c | 1 25-akpm/mm/swap_state.c | 1 25-akpm/mm/swapfile.c | 65 ++++++++++++++++++++- 35 files changed, 259 insertions(+), 160 deletions(-) diff -puN drivers/block/ll_rw_blk.c~per-backing_dev-unplugging drivers/block/ll_rw_blk.c --- 25/drivers/block/ll_rw_blk.c~per-backing_dev-unplugging 2004-03-18 18:54:11.337673696 -0800 +++ 25-akpm/drivers/block/ll_rw_blk.c 2004-03-18 18:55:50.591584808 -0800 @@ -42,12 +42,6 @@ static void blk_unplug_timeout(unsigned */ static kmem_cache_t *request_cachep; -/* - * plug management - */ -static LIST_HEAD(blk_plug_list); -static spinlock_t blk_plug_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; - static wait_queue_head_t congestion_wqh[2] = { __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) @@ -237,9 +231,13 @@ void blk_queue_make_request(request_queu blk_queue_dma_alignment(q, 511); q->unplug_thresh = 4; /* hmm */ +#if 0 q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ if (q->unplug_delay == 0) q->unplug_delay = 1; +#else + q->unplug_delay = HZ; +#endif INIT_WORK(&q->unplug_work, blk_unplug_work, q); @@ -251,8 +249,6 @@ void blk_queue_make_request(request_queu */ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); - INIT_LIST_HEAD(&q->plug_list); - blk_queue_activity_fn(q, NULL, NULL); } @@ -1104,13 +1100,11 @@ void blk_plug_device(request_queue_t *q) * don't plug a stopped queue, it must be paired with blk_start_queue() * which will restart the queueing */ - if (!blk_queue_plugged(q) - && !test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) { - spin_lock(&blk_plug_lock); - list_add_tail(&q->plug_list, &blk_plug_list); + if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) + return; + + if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); - spin_unlock(&blk_plug_lock); - } } EXPORT_SYMBOL(blk_plug_device); @@ -1122,15 +1116,12 @@ EXPORT_SYMBOL(blk_plug_device); int blk_remove_plug(request_queue_t *q) { WARN_ON(!irqs_disabled()); - if (blk_queue_plugged(q)) { - spin_lock(&blk_plug_lock); - list_del_init(&q->plug_list); - del_timer(&q->unplug_timer); - spin_unlock(&blk_plug_lock); - return 1; - } - return 0; + if (!test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) + return 0; + + del_timer(&q->unplug_timer); + return 1; } EXPORT_SYMBOL(blk_remove_plug); @@ -1143,8 +1134,11 @@ static inline void __generic_unplug_devi if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) return; - if (!blk_remove_plug(q)) - return; + /* + * always call down, since we can race now with setting the plugged + * bit outside of the queue lock + */ + blk_remove_plug(q); /* * was plugged, fire request_fn if queue has stuff to do @@ -1161,14 +1155,11 @@ static inline void __generic_unplug_devi * Linux uses plugging to build bigger requests queues before letting * the device have at them. If a queue is plugged, the I/O scheduler * is still adding and merging requests on the queue. Once the queue - * gets unplugged (either by manually calling this function, or by - * calling blk_run_queues()), the request_fn defined for the - * queue is invoked and transfers started. + * gets unplugged, the request_fn defined for the queue is invoked and + * transfers started. **/ -void generic_unplug_device(void *data) +void generic_unplug_device(request_queue_t *q) { - request_queue_t *q = data; - spin_lock_irq(q->queue_lock); __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); @@ -1176,9 +1167,23 @@ void generic_unplug_device(void *data) EXPORT_SYMBOL(generic_unplug_device); +static inline void blk_backing_dev_unplug(struct backing_dev_info *bdi) +{ + request_queue_t *q = bdi->unplug_io_data; + + /* + * devices don't necessarily have an ->unplug_fn defined + */ + if (q->unplug_fn) + q->unplug_fn(q); +} + +EXPORT_SYMBOL(blk_backing_dev_unplug); + static void blk_unplug_work(void *data) { request_queue_t *q = data; + q->unplug_fn(q); } @@ -1256,42 +1261,6 @@ void blk_run_queue(struct request_queue EXPORT_SYMBOL(blk_run_queue); /** - * blk_run_queues - fire all plugged queues - * - * Description: - * Start I/O on all plugged queues known to the block layer. Queues that - * are currently stopped are ignored. This is equivalent to the older - * tq_disk task queue run. - **/ -#define blk_plug_entry(entry) list_entry((entry), request_queue_t, plug_list) -void blk_run_queues(void) -{ - LIST_HEAD(local_plug_list); - - spin_lock_irq(&blk_plug_lock); - - /* - * this will happen fairly often - */ - if (list_empty(&blk_plug_list)) - goto out; - - list_splice_init(&blk_plug_list, &local_plug_list); - - while (!list_empty(&local_plug_list)) { - request_queue_t *q = blk_plug_entry(local_plug_list.next); - - spin_unlock_irq(&blk_plug_lock); - q->unplug_fn(q); - spin_lock_irq(&blk_plug_lock); - } -out: - spin_unlock_irq(&blk_plug_lock); -} - -EXPORT_SYMBOL(blk_run_queues); - -/** * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed * @q: the request queue to be released * @@ -1396,6 +1365,10 @@ request_queue_t *blk_alloc_queue(int gfp memset(q, 0, sizeof(*q)); init_timer(&q->unplug_timer); atomic_set(&q->refcnt, 1); + + q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; + q->backing_dev_info.unplug_io_data = q; + return q; } @@ -2056,7 +2029,6 @@ long blk_congestion_wait(int rw, long ti DEFINE_WAIT(wait); wait_queue_head_t *wqh = &congestion_wqh[rw]; - blk_run_queues(); prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); ret = io_schedule_timeout(timeout); finish_wait(wqh, &wait); @@ -2321,7 +2293,7 @@ out: if (blk_queue_plugged(q)) { int nr_queued = q->rq.count[READ] + q->rq.count[WRITE]; - if (nr_queued == q->unplug_thresh) + if (nr_queued == q->unplug_thresh || bio_sync(bio)) __generic_unplug_device(q); } spin_unlock_irq(q->queue_lock); diff -puN drivers/block/loop.c~per-backing_dev-unplugging drivers/block/loop.c --- 25/drivers/block/loop.c~per-backing_dev-unplugging 2004-03-18 18:54:11.339673392 -0800 +++ 25-akpm/drivers/block/loop.c 2004-03-18 18:54:11.382666856 -0800 @@ -434,6 +434,17 @@ inactive: goto out; } +/* + * kick off io on the underlying address space + */ +static void loop_unplug(request_queue_t *q) +{ + struct loop_device *lo = q->queuedata; + + clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags); + blk_run_address_space(lo->lo_backing_file->f_mapping); +} + struct switch_request { struct file *file; struct completion wait; @@ -614,7 +625,6 @@ static int loop_set_fd(struct loop_devic { struct file *file; struct inode *inode; - struct block_device *lo_device = NULL; struct address_space *mapping; unsigned lo_blocksize; int lo_flags = 0; @@ -671,7 +681,7 @@ static int loop_set_fd(struct loop_devic set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); lo->lo_blocksize = lo_blocksize; - lo->lo_device = lo_device; + lo->lo_device = bdev; lo->lo_flags = lo_flags; lo->lo_backing_file = file; lo->transfer = NULL; @@ -689,6 +699,7 @@ static int loop_set_fd(struct loop_devic */ blk_queue_make_request(lo->lo_queue, loop_make_request); lo->lo_queue->queuedata = lo; + lo->lo_queue->unplug_fn = loop_unplug; set_capacity(disks[lo->lo_number], size); diff -puN drivers/block/umem.c~per-backing_dev-unplugging drivers/block/umem.c --- 25/drivers/block/umem.c~per-backing_dev-unplugging 2004-03-18 18:54:11.341673088 -0800 +++ 25-akpm/drivers/block/umem.c 2004-03-18 18:54:11.383666704 -0800 @@ -368,9 +368,8 @@ static inline void reset_page(struct mm_ page->biotail = & page->bio; } -static void mm_unplug_device(void *data) +static void mm_unplug_device(request_queue_t *q) { - request_queue_t *q = data; struct cardinfo *card = q->queuedata; unsigned long flags; diff -puN drivers/md/dm.c~per-backing_dev-unplugging drivers/md/dm.c --- 25/drivers/md/dm.c~per-backing_dev-unplugging 2004-03-18 18:54:11.343672784 -0800 +++ 25-akpm/drivers/md/dm.c 2004-03-18 18:55:50.592584656 -0800 @@ -575,6 +575,17 @@ static int dm_request(request_queue_t *q return 0; } +static void dm_unplug_all(request_queue_t *q) +{ + struct mapped_device *md = q->queuedata; + struct dm_table *map = dm_get_table(md); + + if (map) { + dm_table_unplug_all(map); + dm_table_put(map); + } +} + static int dm_any_congested(void *congested_data, int bdi_bits) { int r; @@ -672,6 +683,7 @@ static struct mapped_device *alloc_dev(u md->queue->backing_dev_info.congested_fn = dm_any_congested; md->queue->backing_dev_info.congested_data = md; blk_queue_make_request(md->queue, dm_request); + md->queue->unplug_fn = dm_unplug_all; md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab, mempool_free_slab, _io_cache); @@ -896,11 +908,17 @@ int dm_suspend(struct mapped_device *md) add_wait_queue(&md->wait, &wait); up_write(&md->lock); + /* unplug */ + map = dm_get_table(md); + if (map) { + dm_table_unplug_all(map); + dm_table_put(map); + } + /* * Then we wait for the already mapped ios to * complete. */ - blk_run_queues(); while (1) { set_current_state(TASK_INTERRUPTIBLE); @@ -945,10 +963,9 @@ int dm_resume(struct mapped_device *md) def = bio_list_get(&md->deferred); __flush_deferred_io(md, def); up_write(&md->lock); + dm_table_unplug_all(md->map); dm_table_put(map); - blk_run_queues(); - return 0; } diff -puN drivers/md/dm-crypt.c~per-backing_dev-unplugging drivers/md/dm-crypt.c --- 25/drivers/md/dm-crypt.c~per-backing_dev-unplugging 2004-03-18 18:54:11.345672480 -0800 +++ 25-akpm/drivers/md/dm-crypt.c 2004-03-18 18:54:11.385666400 -0800 @@ -668,7 +668,7 @@ static int crypt_map(struct dm_target *t /* out of memory -> run queues */ if (remaining) - blk_run_queues(); + blk_congestion_wait(bio_data_dir(clone), HZ/100); } /* drop reference, clones could have returned before we reach this */ diff -puN drivers/md/dm.h~per-backing_dev-unplugging drivers/md/dm.h --- 25/drivers/md/dm.h~per-backing_dev-unplugging 2004-03-18 18:54:11.346672328 -0800 +++ 25-akpm/drivers/md/dm.h 2004-03-18 18:54:11.386666248 -0800 @@ -116,6 +116,7 @@ int dm_table_get_mode(struct dm_table *t void dm_table_suspend_targets(struct dm_table *t); void dm_table_resume_targets(struct dm_table *t); int dm_table_any_congested(struct dm_table *t, int bdi_bits); +void dm_table_unplug_all(struct dm_table *t); /*----------------------------------------------------------------- * A registry of target types. diff -puN drivers/md/dm-table.c~per-backing_dev-unplugging drivers/md/dm-table.c --- 25/drivers/md/dm-table.c~per-backing_dev-unplugging 2004-03-18 18:54:11.347672176 -0800 +++ 25-akpm/drivers/md/dm-table.c 2004-03-18 18:54:14.692163736 -0800 @@ -885,8 +885,24 @@ int dm_table_any_congested(struct dm_tab return r; } +void dm_table_unplug_all(struct dm_table *t) +{ + struct list_head *d, *devices = dm_table_get_devices(t); + + for (d = devices->next; d != devices; d = d->next) { + struct dm_dev *dd = list_entry(d, struct dm_dev, list); + request_queue_t *q = bdev_get_queue(dd->bdev); + + if (q->unplug_fn) + q->unplug_fn(q); + } +} + EXPORT_SYMBOL(dm_vcalloc); EXPORT_SYMBOL(dm_get_device); EXPORT_SYMBOL(dm_put_device); EXPORT_SYMBOL(dm_table_event); EXPORT_SYMBOL(dm_table_get_mode); +EXPORT_SYMBOL(dm_table_put); +EXPORT_SYMBOL(dm_table_get); +EXPORT_SYMBOL(dm_table_unplug_all); diff -puN drivers/md/md.c~per-backing_dev-unplugging drivers/md/md.c --- 25/drivers/md/md.c~per-backing_dev-unplugging 2004-03-18 18:54:11.349671872 -0800 +++ 25-akpm/drivers/md/md.c 2004-03-18 18:55:50.595584200 -0800 @@ -160,6 +160,30 @@ static int md_fail_request (request_queu return 0; } +void md_unplug_mddev(mddev_t *mddev) +{ + struct list_head *tmp; + mdk_rdev_t *rdev; + + /* + * this list iteration is done without any locking in md?! + */ + ITERATE_RDEV(mddev, rdev, tmp) { + request_queue_t *r_queue = bdev_get_queue(rdev->bdev); + + if (r_queue->unplug_fn) + r_queue->unplug_fn(r_queue); + } +} +EXPORT_SYMBOL(md_unplug_mddev); + +static void md_unplug_all(request_queue_t *q) +{ + mddev_t *mddev = q->queuedata; + + md_unplug_mddev(mddev); +} + static inline mddev_t *mddev_get(mddev_t *mddev) { atomic_inc(&mddev->active); @@ -335,6 +359,8 @@ static int sync_page_io(struct block_dev struct bio_vec vec; struct completion event; + rw |= (1 << BIO_RW_SYNC); + bio_init(&bio); bio.bi_io_vec = &vec; vec.bv_page = page; @@ -349,7 +375,6 @@ static int sync_page_io(struct block_dev bio.bi_private = &event; bio.bi_end_io = bi_complete; submit_bio(rw, &bio); - blk_run_queues(); wait_for_completion(&event); return test_bit(BIO_UPTODATE, &bio.bi_flags); @@ -1644,6 +1669,7 @@ static int do_md_run(mddev_t * mddev) */ mddev->queue->queuedata = mddev; mddev->queue->make_request_fn = mddev->pers->make_request; + mddev->queue->unplug_fn = md_unplug_all; mddev->changed = 1; return 0; @@ -2718,7 +2744,7 @@ int md_thread(void * arg) run = thread->run; if (run) { run(thread->mddev); - blk_run_queues(); + md_unplug_mddev(thread->mddev); } if (signal_pending(current)) flush_signals(current); @@ -3286,7 +3312,7 @@ static void md_do_sync(mddev_t *mddev) test_bit(MD_RECOVERY_ERR, &mddev->recovery)) break; - blk_run_queues(); + md_unplug_mddev(mddev); repeat: if (jiffies >= mark[last_mark] + SYNC_MARK_STEP ) { diff -puN drivers/md/raid1.c~per-backing_dev-unplugging drivers/md/raid1.c --- 25/drivers/md/raid1.c~per-backing_dev-unplugging 2004-03-18 18:54:11.350671720 -0800 +++ 25-akpm/drivers/md/raid1.c 2004-03-18 18:55:02.643873968 -0800 @@ -451,6 +451,7 @@ rb_out: static void device_barrier(conf_t *conf, sector_t sect) { + md_unplug_mddev(conf->mddev); spin_lock_irq(&conf->resync_lock); wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), conf->resync_lock); @@ -478,6 +479,7 @@ static int make_request(request_queue_t * thread has put up a bar for new requests. * Continue immediately if no resync is active currently. */ + md_unplug_mddev(conf->mddev); spin_lock_irq(&conf->resync_lock); wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock); conf->nr_pending++; @@ -644,6 +646,7 @@ static void print_conf(conf_t *conf) static void close_sync(conf_t *conf) { + md_unplug_mddev(conf->mddev); spin_lock_irq(&conf->resync_lock); wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock); spin_unlock_irq(&conf->resync_lock); diff -puN drivers/md/raid5.c~per-backing_dev-unplugging drivers/md/raid5.c --- 25/drivers/md/raid5.c~per-backing_dev-unplugging 2004-03-18 18:54:11.352671416 -0800 +++ 25-akpm/drivers/md/raid5.c 2004-03-18 18:55:02.644873816 -0800 @@ -249,6 +249,7 @@ static struct stripe_head *get_active_st break; if (!sh) { conf->inactive_blocked = 1; + md_unplug_mddev(conf->mddev); wait_event_lock_irq(conf->wait_for_stripe, !list_empty(&conf->inactive_list) && (atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4) @@ -1292,9 +1293,8 @@ static inline void raid5_activate_delaye } } } -static void raid5_unplug_device(void *data) +static void raid5_unplug_device(request_queue_t *q) { - request_queue_t *q = data; mddev_t *mddev = q->queuedata; raid5_conf_t *conf = mddev_to_conf(mddev); unsigned long flags; diff -puN drivers/md/raid6main.c~per-backing_dev-unplugging drivers/md/raid6main.c --- 25/drivers/md/raid6main.c~per-backing_dev-unplugging 2004-03-18 18:54:11.353671264 -0800 +++ 25-akpm/drivers/md/raid6main.c 2004-03-18 18:54:11.395664880 -0800 @@ -1454,9 +1454,8 @@ static inline void raid6_activate_delaye } } } -static void raid6_unplug_device(void *data) +static void raid6_unplug_device(request_queue_t *q) { - request_queue_t *q = data; mddev_t *mddev = q->queuedata; raid6_conf_t *conf = mddev_to_conf(mddev); unsigned long flags; diff -puN drivers/mtd/devices/blkmtd.c~per-backing_dev-unplugging drivers/mtd/devices/blkmtd.c --- 25/drivers/mtd/devices/blkmtd.c~per-backing_dev-unplugging 2004-03-18 18:54:11.355670960 -0800 +++ 25-akpm/drivers/mtd/devices/blkmtd.c 2004-03-18 18:54:11.396664728 -0800 @@ -147,8 +147,7 @@ static int blkmtd_readpage(struct blkmtd bio->bi_private = &event; bio->bi_end_io = bi_read_complete; if(bio_add_page(bio, page, PAGE_SIZE, 0) == PAGE_SIZE) { - submit_bio(READ, bio); - blk_run_queues(); + submit_bio(READ_SYNC, bio); wait_for_completion(&event); err = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : -EIO; bio_put(bio); @@ -179,8 +178,7 @@ static int blkmtd_write_out(struct bio * init_completion(&event); bio->bi_private = &event; bio->bi_end_io = bi_write_complete; - submit_bio(WRITE, bio); - blk_run_queues(); + submit_bio(WRITE_SYNC, bio); wait_for_completion(&event); DEBUG(3, "submit_bio completed, bi_vcnt = %d\n", bio->bi_vcnt); err = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : -EIO; diff -puN fs/buffer.c~per-backing_dev-unplugging fs/buffer.c --- 25/fs/buffer.c~per-backing_dev-unplugging 2004-03-18 18:54:11.357670656 -0800 +++ 25-akpm/fs/buffer.c 2004-03-18 18:54:39.173442016 -0800 @@ -132,7 +132,11 @@ void __wait_on_buffer(struct buffer_head do { prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); if (buffer_locked(bh)) { - blk_run_queues(); + struct block_device *bd; + smp_mb(); + bd = bh->b_bdev; + if (bd) + blk_run_address_space(bd->bd_inode->i_mapping); io_schedule(); } } while (buffer_locked(bh)); @@ -491,7 +495,6 @@ static void free_more_memory(void) pg_data_t *pgdat; wakeup_bdflush(1024); - blk_run_queues(); yield(); for_each_pgdat(pgdat) { @@ -2929,7 +2932,10 @@ EXPORT_SYMBOL(try_to_free_buffers); int block_sync_page(struct page *page) { - blk_run_queues(); + struct address_space *mapping; + smp_mb(); + mapping = page->mapping; + blk_run_address_space(mapping); return 0; } diff -puN fs/direct-io.c~per-backing_dev-unplugging fs/direct-io.c --- 25/fs/direct-io.c~per-backing_dev-unplugging 2004-03-18 18:54:11.358670504 -0800 +++ 25-akpm/fs/direct-io.c 2004-03-18 18:54:11.399664272 -0800 @@ -329,7 +329,7 @@ static struct bio *dio_await_one(struct if (dio->bio_list == NULL) { dio->waiter = current; spin_unlock_irqrestore(&dio->bio_list_lock, flags); - blk_run_queues(); + blk_run_address_space(dio->inode->i_mapping); io_schedule(); spin_lock_irqsave(&dio->bio_list_lock, flags); dio->waiter = NULL; @@ -960,7 +960,7 @@ direct_io_worker(int rw, struct kiocb *i if (ret == 0) ret = dio->result; /* Bytes written */ finished_one_bio(dio); /* This can free the dio */ - blk_run_queues(); + blk_run_address_space(inode->i_mapping); } else { finished_one_bio(dio); ret2 = dio_await_completion(dio); diff -puN fs/jfs/jfs_logmgr.c~per-backing_dev-unplugging fs/jfs/jfs_logmgr.c --- 25/fs/jfs/jfs_logmgr.c~per-backing_dev-unplugging 2004-03-18 18:54:11.360670200 -0800 +++ 25-akpm/fs/jfs/jfs_logmgr.c 2004-03-18 18:54:11.401663968 -0800 @@ -1972,8 +1972,7 @@ static int lbmRead(struct jfs_log * log, bio->bi_end_io = lbmIODone; bio->bi_private = bp; - submit_bio(READ, bio); - blk_run_queues(); + submit_bio(READ_SYNC, bio); wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); @@ -2117,9 +2116,8 @@ static void lbmStartIO(struct lbuf * bp) /* check if journaling to disk has been disabled */ if (!log->no_integrity) { - submit_bio(WRITE, bio); + submit_bio(WRITE_SYNC, bio); INCREMENT(lmStat.submitted); - blk_run_queues(); } else { bio->bi_size = 0; diff -puN fs/ntfs/compress.c~per-backing_dev-unplugging fs/ntfs/compress.c --- 25/fs/ntfs/compress.c~per-backing_dev-unplugging 2004-03-18 18:54:11.361670048 -0800 +++ 25-akpm/fs/ntfs/compress.c 2004-03-18 18:54:11.402663816 -0800 @@ -23,6 +23,7 @@ #include #include +#include #include "ntfs.h" @@ -668,7 +669,7 @@ lock_retry_remap: "uptodate! Unplugging the disk queue " "and rescheduling."); get_bh(tbh); - blk_run_queues(); + blk_run_address_space(mapping); schedule(); put_bh(tbh); if (unlikely(!buffer_uptodate(tbh))) diff -puN fs/ufs/truncate.c~per-backing_dev-unplugging fs/ufs/truncate.c --- 25/fs/ufs/truncate.c~per-backing_dev-unplugging 2004-03-18 18:54:11.362669896 -0800 +++ 25-akpm/fs/ufs/truncate.c 2004-03-18 18:54:11.402663816 -0800 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "swab.h" @@ -456,7 +457,7 @@ void ufs_truncate (struct inode * inode) break; if (IS_SYNC(inode) && (inode->i_state & I_DIRTY)) ufs_sync_inode (inode); - blk_run_queues(); + blk_run_address_space(inode->i_mapping); yield(); } offset = inode->i_size & uspi->s_fshift; diff -puN fs/xfs/linux/xfs_buf.c~per-backing_dev-unplugging fs/xfs/linux/xfs_buf.c --- 25/fs/xfs/linux/xfs_buf.c~per-backing_dev-unplugging 2004-03-18 18:54:11.364669592 -0800 +++ 25-akpm/fs/xfs/linux/xfs_buf.c 2004-03-18 18:54:11.404663512 -0800 @@ -1013,7 +1013,7 @@ pagebuf_lock( { PB_TRACE(pb, "lock", 0); if (atomic_read(&pb->pb_io_remaining)) - blk_run_queues(); + blk_run_address_space(pb->pb_target->pbr_mapping); down(&pb->pb_sema); PB_SET_OWNER(pb); PB_TRACE(pb, "locked", 0); @@ -1109,7 +1109,7 @@ _pagebuf_wait_unpin( if (atomic_read(&pb->pb_pin_count) == 0) break; if (atomic_read(&pb->pb_io_remaining)) - blk_run_queues(); + blk_run_address_space(pb->pb_target->pbr_mapping); schedule(); } remove_wait_queue(&pb->pb_waiters, &wait); @@ -1407,7 +1407,7 @@ submit_io: if (pb->pb_flags & PBF_RUN_QUEUES) { pb->pb_flags &= ~PBF_RUN_QUEUES; if (atomic_read(&pb->pb_io_remaining) > 1) - blk_run_queues(); + blk_run_address_space(pb->pb_target->pbr_mapping); } } @@ -1471,7 +1471,7 @@ pagebuf_iowait( { PB_TRACE(pb, "iowait", 0); if (atomic_read(&pb->pb_io_remaining)) - blk_run_queues(); + blk_run_address_space(pb->pb_target->pbr_mapping); down(&pb->pb_iodonesema); PB_TRACE(pb, "iowaited", (long)pb->pb_error); return pb->pb_error; @@ -1617,7 +1617,6 @@ STATIC int pagebuf_daemon( void *data) { - int count; page_buf_t *pb; struct list_head *curr, *next, tmp; @@ -1640,7 +1639,6 @@ pagebuf_daemon( spin_lock(&pbd_delwrite_lock); - count = 0; list_for_each_safe(curr, next, &pbd_delwrite_queue) { pb = list_entry(curr, page_buf_t, pb_list); @@ -1657,7 +1655,6 @@ pagebuf_daemon( pb->pb_flags &= ~PBF_DELWRI; pb->pb_flags |= PBF_WRITE; list_move(&pb->pb_list, &tmp); - count++; } } @@ -1667,12 +1664,11 @@ pagebuf_daemon( list_del_init(&pb->pb_list); pagebuf_iostrategy(pb); + blk_run_address_space(pb->pb_target->pbr_mapping); } if (as_list_len > 0) purge_addresses(); - if (count) - blk_run_queues(); force_flush = 0; } while (pagebuf_daemon_active); @@ -1689,7 +1685,6 @@ pagebuf_delwri_flush( page_buf_t *pb; struct list_head *curr, *next, tmp; int pincount = 0; - int flush_cnt = 0; pagebuf_runall_queues(pagebuf_dataio_workqueue); pagebuf_runall_queues(pagebuf_logio_workqueue); @@ -1733,14 +1728,8 @@ pagebuf_delwri_flush( pagebuf_lock(pb); pagebuf_iostrategy(pb); - if (++flush_cnt > 32) { - blk_run_queues(); - flush_cnt = 0; - } } - blk_run_queues(); - while (!list_empty(&tmp)) { pb = list_entry(tmp.next, page_buf_t, pb_list); @@ -1751,6 +1740,9 @@ pagebuf_delwri_flush( pagebuf_rele(pb); } + if (flags & PBDF_WAIT) + blk_run_address_space(target->pbr_mapping); + if (pinptr) *pinptr = pincount; } diff -puN include/linux/backing-dev.h~per-backing_dev-unplugging include/linux/backing-dev.h --- 25/include/linux/backing-dev.h~per-backing_dev-unplugging 2004-03-18 18:54:11.365669440 -0800 +++ 25-akpm/include/linux/backing-dev.h 2004-03-18 18:55:38.998347248 -0800 @@ -28,9 +28,12 @@ struct backing_dev_info { int memory_backed; /* Cannot clean pages with writepage */ congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ + void (*unplug_io_fn)(struct backing_dev_info *); + void *unplug_io_data; }; extern struct backing_dev_info default_backing_dev_info; +void default_unplug_io_fn(struct backing_dev_info *bdi); int writeback_acquire(struct backing_dev_info *bdi); int writeback_in_progress(struct backing_dev_info *bdi); diff -puN include/linux/bio.h~per-backing_dev-unplugging include/linux/bio.h --- 25/include/linux/bio.h~per-backing_dev-unplugging 2004-03-18 18:54:11.366669288 -0800 +++ 25-akpm/include/linux/bio.h 2004-03-18 18:54:23.414837688 -0800 @@ -119,11 +119,13 @@ struct bio { * bit 1 -- rw-ahead when set * bit 2 -- barrier * bit 3 -- fail fast, don't want low level driver retries + * bit 4 -- synchronous I/O hint: the block layer will unplug immediately */ #define BIO_RW 0 #define BIO_RW_AHEAD 1 #define BIO_RW_BARRIER 2 #define BIO_RW_FAILFAST 3 +#define BIO_RW_SYNC 4 /* * various member access, note that bio_data should of course not be used @@ -138,6 +140,7 @@ struct bio { #define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9) #define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) +#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) /* * will die diff -puN include/linux/blkdev.h~per-backing_dev-unplugging include/linux/blkdev.h --- 25/include/linux/blkdev.h~per-backing_dev-unplugging 2004-03-18 18:54:11.367669136 -0800 +++ 25-akpm/include/linux/blkdev.h 2004-03-18 18:54:30.956691152 -0800 @@ -243,7 +243,7 @@ typedef int (merge_requests_fn) (request typedef void (request_fn_proc) (request_queue_t *q); typedef int (make_request_fn) (request_queue_t *q, struct bio *bio); typedef int (prep_rq_fn) (request_queue_t *, struct request *); -typedef void (unplug_fn) (void *q); +typedef void (unplug_fn) (request_queue_t *); struct bio_vec; typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *); @@ -315,8 +315,6 @@ struct request_queue unsigned long bounce_pfn; int bounce_gfp; - struct list_head plug_list; - /* * various queue flags, see QUEUE_* below */ @@ -370,8 +368,9 @@ struct request_queue #define QUEUE_FLAG_WRITEFULL 4 /* read queue has been filled */ #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ +#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ -#define blk_queue_plugged(q) !list_empty(&(q)->plug_list) +#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) @@ -515,7 +514,7 @@ extern int scsi_cmd_ioctl(struct gendisk extern void blk_start_queue(request_queue_t *q); extern void blk_stop_queue(request_queue_t *q); extern void __blk_stop_queue(request_queue_t *q); -extern void blk_run_queue(request_queue_t *q); +extern void blk_run_queue(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int); extern int blk_rq_unmap_user(struct request *, void __user *, unsigned int); @@ -526,6 +525,18 @@ static inline request_queue_t *bdev_get_ return bdev->bd_disk->queue; } +static inline void blk_run_backing_dev(struct backing_dev_info *bdi) +{ + if (bdi && bdi->unplug_io_fn) + bdi->unplug_io_fn(bdi); +} + +static inline void blk_run_address_space(struct address_space *mapping) +{ + if (mapping) + blk_run_backing_dev(mapping->backing_dev_info); +} + /* * end_request() and friends. Must be called with the request queue spinlock * acquired. All functions called within end_request() _must_be_ atomic. @@ -572,7 +583,7 @@ extern struct backing_dev_info *blk_get_ extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); -extern void generic_unplug_device(void *); +extern void generic_unplug_device(request_queue_t *); extern long nr_blockdev_pages(void); int blk_get_queue(request_queue_t *); diff -puN include/linux/fs.h~per-backing_dev-unplugging include/linux/fs.h --- 25/include/linux/fs.h~per-backing_dev-unplugging 2004-03-18 18:54:11.369668832 -0800 +++ 25-akpm/include/linux/fs.h 2004-03-18 18:54:23.415837536 -0800 @@ -83,6 +83,8 @@ extern int leases_enable, dir_notify_ena #define WRITE 1 #define READA 2 /* read-ahead - don't block if no resources */ #define SPECIAL 4 /* For non-blockdevice requests in request queue */ +#define READ_SYNC (READ | (1 << BIO_RW_SYNC)) +#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) #define SEL_IN 1 #define SEL_OUT 2 diff -puN include/linux/raid/md_k.h~per-backing_dev-unplugging include/linux/raid/md_k.h --- 25/include/linux/raid/md_k.h~per-backing_dev-unplugging 2004-03-18 18:54:11.370668680 -0800 +++ 25-akpm/include/linux/raid/md_k.h 2004-03-18 18:54:11.408662904 -0800 @@ -326,7 +326,6 @@ do { \ if (condition) \ break; \ spin_unlock_irq(&lock); \ - blk_run_queues(); \ schedule(); \ spin_lock_irq(&lock); \ } \ @@ -341,30 +340,5 @@ do { \ __wait_event_lock_irq(wq, condition, lock); \ } while (0) - -#define __wait_disk_event(wq, condition) \ -do { \ - wait_queue_t __wait; \ - init_waitqueue_entry(&__wait, current); \ - \ - add_wait_queue(&wq, &__wait); \ - for (;;) { \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - if (condition) \ - break; \ - blk_run_queues(); \ - schedule(); \ - } \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while (0) - -#define wait_disk_event(wq, condition) \ -do { \ - if (condition) \ - break; \ - __wait_disk_event(wq, condition); \ -} while (0) - #endif diff -puN kernel/power/disk.c~per-backing_dev-unplugging kernel/power/disk.c --- 25/kernel/power/disk.c~per-backing_dev-unplugging 2004-03-18 18:54:11.371668528 -0800 +++ 25-akpm/kernel/power/disk.c 2004-03-18 18:54:11.408662904 -0800 @@ -84,7 +84,6 @@ static void free_some_memory(void) while (shrink_all_memory(10000)) printk("."); printk("|\n"); - blk_run_queues(); } diff -puN kernel/power/pmdisk.c~per-backing_dev-unplugging kernel/power/pmdisk.c --- 25/kernel/power/pmdisk.c~per-backing_dev-unplugging 2004-03-18 18:54:11.373668224 -0800 +++ 25-akpm/kernel/power/pmdisk.c 2004-03-18 18:54:23.416837384 -0800 @@ -859,7 +859,6 @@ static int end_io(struct bio * bio, unsi static void wait_io(void) { - blk_run_queues(); while(atomic_read(&io_done)) io_schedule(); } @@ -898,7 +897,7 @@ static int submit(int rw, pgoff_t page_o if (rw == WRITE) bio_set_pages_dirty(bio); start_io(); - submit_bio(rw,bio); + submit_bio(rw | (1 << BIO_RW_SYNC), bio); wait_io(); Done: bio_put(bio); diff -puN kernel/power/swsusp.c~per-backing_dev-unplugging kernel/power/swsusp.c --- 25/kernel/power/swsusp.c~per-backing_dev-unplugging 2004-03-18 18:54:11.374668072 -0800 +++ 25-akpm/kernel/power/swsusp.c 2004-03-18 18:54:11.410662600 -0800 @@ -707,11 +707,6 @@ int software_suspend(void) free_some_memory(); - /* No need to invalidate any vfsmnt list -- - * they will be valid after resume, anyway. - */ - blk_run_queues(); - /* Save state of all device drivers, and stop them. */ if ((res = device_suspend(4))==0) /* If stopping device drivers worked, we proceed basically into diff -puN mm/mempool.c~per-backing_dev-unplugging mm/mempool.c --- 25/mm/mempool.c~per-backing_dev-unplugging 2004-03-18 18:54:11.375667920 -0800 +++ 25-akpm/mm/mempool.c 2004-03-18 18:54:11.411662448 -0800 @@ -234,8 +234,6 @@ repeat_alloc: if (!(gfp_mask & __GFP_WAIT)) return NULL; - blk_run_queues(); - prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); mb(); if (!pool->curr_nr) diff -puN mm/readahead.c~per-backing_dev-unplugging mm/readahead.c --- 25/mm/readahead.c~per-backing_dev-unplugging 2004-03-18 18:54:11.377667616 -0800 +++ 25-akpm/mm/readahead.c 2004-03-18 18:55:38.997347400 -0800 @@ -15,11 +15,16 @@ #include #include +void default_unplug_io_fn(struct backing_dev_info *bdi) +{ +} +EXPORT_SYMBOL(default_unplug_io_fn); + struct backing_dev_info default_backing_dev_info = { .ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE, .state = 0, + .unplug_io_fn = default_unplug_io_fn, }; - EXPORT_SYMBOL_GPL(default_backing_dev_info); /* @@ -32,7 +37,6 @@ file_ra_state_init(struct file_ra_state ra->ra_pages = mapping->backing_dev_info->ra_pages; ra->average = ra->ra_pages / 2; } - EXPORT_SYMBOL(file_ra_state_init); /* diff -puN mm/filemap.c~per-backing_dev-unplugging mm/filemap.c --- 25/mm/filemap.c~per-backing_dev-unplugging 2004-03-18 18:54:30.951691912 -0800 +++ 25-akpm/mm/filemap.c 2004-03-18 18:54:30.957691000 -0800 @@ -118,8 +118,10 @@ void remove_from_page_cache(struct page static inline int sync_page(struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping; + smp_mb(); + mapping = page->mapping; if (mapping && mapping->a_ops && mapping->a_ops->sync_page) return mapping->a_ops->sync_page(page); return 0; diff -puN include/linux/raid/md.h~per-backing_dev-unplugging include/linux/raid/md.h --- 25/include/linux/raid/md.h~per-backing_dev-unplugging 2004-03-18 18:55:02.637874880 -0800 +++ 25-akpm/include/linux/raid/md.h 2004-03-18 18:55:02.645873664 -0800 @@ -76,6 +76,7 @@ extern void md_handle_safemode(mddev_t * extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_sync_acct(mdk_rdev_t *rdev, unsigned long nr_sectors); extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev); +extern void md_unplug_mddev(mddev_t *mddev); extern void md_print_devices (void); diff -puN include/linux/swap.h~per-backing_dev-unplugging include/linux/swap.h --- 25/include/linux/swap.h~per-backing_dev-unplugging 2004-03-18 18:55:23.105763288 -0800 +++ 25-akpm/include/linux/swap.h 2004-03-18 18:55:23.158755232 -0800 @@ -232,6 +232,8 @@ extern sector_t map_swap_page(struct swa extern struct swap_info_struct *get_swap_info_struct(unsigned); extern int can_share_swap_page(struct page *); extern int remove_exclusive_swap_page(struct page *); +struct backing_dev_info; +extern void swap_unplug_io_fn(struct backing_dev_info *); extern struct swap_list_t swap_list; extern spinlock_t swaplock; diff -puN mm/swapfile.c~per-backing_dev-unplugging mm/swapfile.c --- 25/mm/swapfile.c~per-backing_dev-unplugging 2004-03-18 18:55:23.129759640 -0800 +++ 25-akpm/mm/swapfile.c 2004-03-18 18:55:23.158755232 -0800 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -44,8 +45,64 @@ struct swap_list_t swap_list = {-1, -1}; struct swap_info_struct swap_info[MAX_SWAPFILES]; +/* + * Array of backing blockdevs, for swap_unplug_fn. We need this because the + * bdev->unplug_fn can sleep and we cannot hold swap_list_lock while calling + * the unplug_fn. And swap_list_lock cannot be turned into a semaphore. + */ +static DECLARE_MUTEX(swap_bdevs_sem); +static struct block_device *swap_bdevs[MAX_SWAPFILES]; + #define SWAPFILE_CLUSTER 256 +/* + * Caller holds swap_bdevs_sem + */ +static void install_swap_bdev(struct block_device *bdev) +{ + int i; + + for (i = 0; i < MAX_SWAPFILES; i++) { + if (swap_bdevs[i] == NULL) { + swap_bdevs[i] = bdev; + return; + } + } + BUG(); +} + +static void remove_swap_bdev(struct block_device *bdev) +{ + int i; + + for (i = 0; i < MAX_SWAPFILES; i++) { + if (swap_bdevs[i] == bdev) { + memcpy(&swap_bdevs[i], &swap_bdevs[i + 1], + (MAX_SWAPFILES - i - 1) * sizeof(*swap_bdevs)); + swap_bdevs[MAX_SWAPFILES - 1] = NULL; + return; + } + } + BUG(); +} + +void swap_unplug_io_fn(struct backing_dev_info *unused_bdi) +{ + int i; + + down(&swap_bdevs_sem); + for (i = 0; i < MAX_SWAPFILES; i++) { + struct block_device *bdev = swap_bdevs[i]; + struct backing_dev_info *bdi; + + if (bdev == NULL) + break; + bdi = bdev->bd_inode->i_mapping->backing_dev_info; + (*bdi->unplug_io_fn)(bdi); + } + up(&swap_bdevs_sem); +} + static inline int scan_swap_map(struct swap_info_struct *si) { unsigned long offset; @@ -1088,6 +1145,7 @@ asmlinkage long sys_swapoff(const char _ swap_list_unlock(); goto out_dput; } + down(&swap_bdevs_sem); swap_list_lock(); swap_device_lock(p); swap_file = p->swap_file; @@ -1099,6 +1157,8 @@ asmlinkage long sys_swapoff(const char _ destroy_swap_extents(p); swap_device_unlock(p); swap_list_unlock(); + remove_swap_bdev(p->bdev); + up(&swap_bdevs_sem); vfree(swap_map); if (S_ISBLK(mapping->host->i_mode)) { struct block_device *bdev = I_BDEV(mapping->host); @@ -1414,6 +1474,7 @@ asmlinkage long sys_swapon(const char __ if (error) goto bad_swap; + down(&swap_bdevs_sem); swap_list_lock(); swap_device_lock(p); p->flags = SWP_ACTIVE; @@ -1439,6 +1500,8 @@ asmlinkage long sys_swapon(const char __ } swap_device_unlock(p); swap_list_unlock(); + install_swap_bdev(p->bdev); + up(&swap_bdevs_sem); error = 0; goto out; bad_swap: @@ -1458,7 +1521,7 @@ bad_swap_2: destroy_swap_extents(p); if (swap_map) vfree(swap_map); - if (swap_file && !IS_ERR(swap_file)) + if (swap_file) filp_close(swap_file, NULL); out: if (page && !IS_ERR(page)) { diff -puN mm/swap_state.c~per-backing_dev-unplugging mm/swap_state.c --- 25/mm/swap_state.c~per-backing_dev-unplugging 2004-03-18 18:55:23.153755992 -0800 +++ 25-akpm/mm/swap_state.c 2004-03-18 18:55:23.159755080 -0800 @@ -19,6 +19,7 @@ static struct backing_dev_info swap_backing_dev_info = { .ra_pages = 0, /* No readahead */ .memory_backed = 1, /* Does not contribute to dirty memory */ + .unplug_io_fn = swap_unplug_io_fn, }; extern struct address_space_operations swap_aops; diff -puN drivers/block/rd.c~per-backing_dev-unplugging drivers/block/rd.c --- 25/drivers/block/rd.c~per-backing_dev-unplugging 2004-03-18 18:55:38.921358952 -0800 +++ 25-akpm/drivers/block/rd.c 2004-03-18 18:55:38.998347248 -0800 @@ -271,6 +271,7 @@ static int rd_ioctl(struct inode *inode, static struct backing_dev_info rd_backing_dev_info = { .ra_pages = 0, /* No readahead */ .memory_backed = 1, /* Does not contribute to dirty memory */ + .unplug_io_fn = default_unplug_io_fn, }; static int rd_open(struct inode *inode, struct file *filp) diff -puN mm/shmem.c~per-backing_dev-unplugging mm/shmem.c --- 25/mm/shmem.c~per-backing_dev-unplugging 2004-03-18 18:55:38.993348008 -0800 +++ 25-akpm/mm/shmem.c 2004-03-18 18:55:38.996347552 -0800 @@ -133,6 +133,7 @@ static struct vm_operations_struct shmem static struct backing_dev_info shmem_backing_dev_info = { .ra_pages = 0, /* No readahead */ .memory_backed = 1, /* Does not contribute to dirty memory */ + .unplug_io_fn = default_unplug_io_fn, }; LIST_HEAD(shmem_inodes); _