diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/block/DAC960.c linux/drivers/block/DAC960.c --- /opt/kernel/linux-2.5.1-pre2/drivers/block/DAC960.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/block/DAC960.c Wed Nov 28 08:52:26 2001 @@ -1946,7 +1946,7 @@ Initialize the I/O Request Queue. */ RequestQueue = BLK_DEFAULT_QUEUE(MajorNumber); - blk_init_queue(RequestQueue, DAC960_RequestFunction, "dac960"); + blk_init_queue(RequestQueue, DAC960_RequestFunction); blk_queue_headactive(RequestQueue, 0); RequestQueue->queuedata = Controller; RequestQueue->max_segments = Controller->DriverScatterGatherLimit; diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/block/blkpg.c linux/drivers/block/blkpg.c --- /opt/kernel/linux-2.5.1-pre2/drivers/block/blkpg.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/block/blkpg.c Wed Nov 28 09:08:40 2001 @@ -285,10 +285,6 @@ case BLKELVSET: return -ENOTTY; - case BLKHASHPROF: - case BLKHASHCLEAR: - return bio_ioctl(dev, cmd, arg); - case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */ intval = BLOCK_SIZE; diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/block/cciss.c linux/drivers/block/cciss.c --- /opt/kernel/linux-2.5.1-pre2/drivers/block/cciss.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/block/cciss.c Wed Nov 28 08:51:50 2001 @@ -1866,7 +1866,7 @@ q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); q->queuedata = hba[i]; - blk_init_queue(q, do_cciss_request, hba[i]->devname); + blk_init_queue(q, do_cciss_request); blk_queue_headactive(q, 0); blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask); q->max_segments = MAXSGENTRIES; diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/block/cpqarray.c linux/drivers/block/cpqarray.c --- /opt/kernel/linux-2.5.1-pre2/drivers/block/cpqarray.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/block/cpqarray.c Wed Nov 28 08:51:57 2001 @@ -467,7 +467,7 @@ q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); q->queuedata = hba[i]; - blk_init_queue(q, do_ida_request, hba[i]->devname); + blk_init_queue(q, do_ida_request); blk_queue_headactive(q, 0); blk_queue_bounce_limit(q, hba[i]->pci_dev->dma_mask); q->max_segments = SG_MAX; diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/block/elevator.c linux/drivers/block/elevator.c --- /opt/kernel/linux-2.5.1-pre2/drivers/block/elevator.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/block/elevator.c Wed Nov 28 09:03:26 2001 @@ -20,7 +20,6 @@ * * Jens: * - Rework again to work with bio instead of buffer_heads - * - added merge by hash-lookup * - loose bi_dev comparisons, partition handling is right now * - completely modularize elevator setup and teardown * @@ -106,102 +105,54 @@ return 0; } -/* - * find a struct request that has a bio linked that we can merge with - */ -inline struct request *bio_get_hash_rq(kdev_t dev, sector_t sector, int vc) -{ - struct bio *bio = bio_hash_find(dev, sector, vc); - struct request *rq = NULL; - - /* - * bio is pinned until we bio_put it - */ - if (bio) { - rq = bio->bi_hash_desc; - - BUG_ON(!rq); - - bio_put(bio); - } - - return rq; -} - int elevator_linus_merge(request_queue_t *q, struct request **req, struct list_head *head, struct bio *bio) { unsigned int count = bio_sectors(bio); - struct elv_linus_data *edat = q->elevator.elevator_data; - unsigned int vc = q->hash_valid_counter; - struct list_head *entry; + struct list_head *entry = &q->queue_head; + int ret = ELEVATOR_NO_MERGE; struct request *__rq; - /* - * first try a back merge, then front, then give up and scan. this - * will of course fail for different size bios on the same queue, - * however that isn't really an issue - */ - if (likely(edat->flags & ELV_LINUS_BACK_MERGE)) { - __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector - count, vc); - if (__rq) { - if (!elv_rq_merge_ok(q, __rq, bio)) - goto front; - - /* - * looks ok to merge - */ - if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { - *req = __rq; - return ELEVATOR_BACK_MERGE; - } - } - } - -front: - if (likely(edat->flags & ELV_LINUS_FRONT_MERGE)) { - __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector + count, vc); - if (__rq) { - if (!elv_rq_merge_ok(q, __rq, bio)) - goto scan; - - /* - * looks ok to merge - */ - if (__rq->sector - count == bio->bi_sector) { - *req = __rq; - return ELEVATOR_FRONT_MERGE; - } - } - } - - /* - * no merge possible, scan for insertion - */ -scan: entry = &q->queue_head; while ((entry = entry->prev) != head) { __rq = list_entry_rq(entry); prefetch(list_entry_rq(entry->prev)); + /* + * simply "aging" of requests in queue + */ + if (__rq->elevator_sequence-- <= 0) + break; + if (unlikely(__rq->waiting || __rq->special)) continue; if (unlikely(!__rq->inactive)) break; if (!*req && bio_rq_in_between(bio, __rq, &q->queue_head)) *req = __rq; + if (!elv_rq_merge_ok(q, __rq, bio)) + continue; + + if (__rq->elevator_sequence < count) + break; /* - * simple "aging" of requests in queue + * we can merge and sequence is ok, check if it's possible */ - if (__rq->elevator_sequence-- <= 0) + if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { + ret = ELEVATOR_BACK_MERGE; + *req = __rq; break; - else if (__rq->elevator_sequence < count) + } else if (__rq->sector - count == bio->bi_sector) { + ret = ELEVATOR_FRONT_MERGE; + __rq->elevator_sequence -= count; + *req = __rq; break; + } } - return ELEVATOR_NO_MERGE; + return ret; } void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count) @@ -231,10 +182,6 @@ void elv_add_request_fn(request_queue_t *q, struct request *rq, struct list_head *insert_here) { - /* - * insert into queue pending list, merge hash, and possible latency - * list - */ list_add(&rq->queuelist, insert_here); } @@ -248,78 +195,60 @@ int elv_linus_init(request_queue_t *q, elevator_t *e) { - struct elv_linus_data *edata; - - edata = kmalloc(sizeof(struct elv_linus_data), GFP_ATOMIC); - if (!edata) - return -ENOMEM; - - /* - * default to doing both front and back merges - */ - edata->flags = ELV_LINUS_BACK_MERGE | ELV_LINUS_FRONT_MERGE; - e->elevator_data = edata; return 0; } void elv_linus_exit(request_queue_t *q, elevator_t *e) { - kfree(e->elevator_data); } /* * See if we can find a request that this buffer can be coalesced with. */ int elevator_noop_merge(request_queue_t *q, struct request **req, - struct list_head * head, struct bio *bio) + struct list_head *head, struct bio *bio) { + unsigned int count = bio_sectors(bio); + struct list_head *entry = &q->queue_head; struct request *__rq; - int count, ret; - unsigned int vc; - count = bio_sectors(bio); - ret = ELEVATOR_NO_MERGE; - vc = q->hash_valid_counter; + entry = &q->queue_head; + while ((entry = entry->prev) != head) { + __rq = list_entry_rq(entry); + + prefetch(list_entry_rq(entry->prev)); - __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector - count, vc); - if (__rq) { + if (unlikely(__rq->waiting || __rq->special)) + continue; + if (unlikely(!__rq->inactive)) + break; if (!elv_rq_merge_ok(q, __rq, bio)) - goto front; + continue; + /* + * we can merge and sequence is ok, check if it's possible + */ if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { - ret = ELEVATOR_BACK_MERGE; *req = __rq; - goto out; - } - } - -front: - __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector + count, vc); - if (__rq) { - if (!elv_rq_merge_ok(q, __rq, bio)) - goto out; - - if (__rq->sector - count == bio->bi_sector) { - ret = ELEVATOR_FRONT_MERGE; + return ELEVATOR_BACK_MERGE; + } else if (__rq->sector - count == bio->bi_sector) { *req = __rq; - goto out; + return ELEVATOR_FRONT_MERGE; } } -out: - return ret; + return ELEVATOR_NO_MERGE; } void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {} void elevator_noop_merge_req(struct request *req, struct request *next) {} -int elevator_init(request_queue_t *q, elevator_t *e, elevator_t type,char *name) +int elevator_init(request_queue_t *q, elevator_t *e, elevator_t type) { *e = type; INIT_LIST_HEAD(&q->queue_head); - strncpy(e->queue_name, name, 15); if (e->elevator_init_fn) return e->elevator_init_fn(q, e); diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/block/floppy.c linux/drivers/block/floppy.c --- /opt/kernel/linux-2.5.1-pre2/drivers/block/floppy.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/block/floppy.c Wed Nov 28 08:52:06 2001 @@ -4170,7 +4170,7 @@ blk_size[MAJOR_NR] = floppy_sizes; blksize_size[MAJOR_NR] = floppy_blocksizes; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST, "floppy"); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); reschedule_timeout(MAXTIMEOUT, "floppy init", MAXTIMEOUT); config_types(); diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- /opt/kernel/linux-2.5.1-pre2/drivers/block/ll_rw_blk.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/block/ll_rw_blk.c Wed Nov 28 21:02:21 2001 @@ -243,6 +243,16 @@ q->hardsect_size = size; } +/** + * blk_queue_segment_boundary - set boundary rules for segment merging + * @q: the request queue for the device + * @mask: the memory boundary mask + **/ +void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask) +{ + q->seg_boundary_mask = mask; +} + /* * can we merge the two segments, or do we need to start a new one? */ @@ -256,10 +266,10 @@ return 0; /* - * bio and nxt are contigous, if they don't span a 4GB mem boundary - * return ok + * bio and nxt are contigous in memory, check if the queue allows + * these two to be merged into one */ - if (BIO_PHYS_4G(bio, nxt)) + if (BIO_SEG_BOUNDARY(q, bio, nxt)) return 1; return 0; @@ -274,11 +284,12 @@ unsigned long long lastend; struct bio_vec *bvec; struct bio *bio; - int nsegs, i; + int nsegs, i, cluster; nsegs = 0; bio = rq->bio; lastend = ~0ULL; + cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); /* * for each bio in rq @@ -290,22 +301,22 @@ bio_for_each_segment(bvec, bio, i) { int nbytes = bvec->bv_len; - BIO_BUG_ON(i > bio->bi_io_vec->bvl_cnt); + BIO_BUG_ON(i > bio->bi_vcnt); + + if (!cluster) + goto new_segment; if (bvec_to_phys(bvec) == lastend) { - if (sg[nsegs - 1].length + nbytes > q->max_segment_size) { - printk("blk_rq_map_sg: %d segment size exceeded\n", q->max_segment_size); + if (sg[nsegs - 1].length + nbytes > q->max_segment_size) goto new_segment; - } /* - * make sure to not map a 4GB boundary into - * same sg entry + * make sure to not map a segment across a + * boundary that the queue doesn't want */ - if (!__BIO_PHYS_4G(lastend, lastend + nbytes)) { - printk("blk_rq_map_sg: 4GB cross\n"); + if (!__BIO_SEG_BOUNDARY(lastend, lastend + nbytes, q->seg_boundary_mask)) lastend = ~0ULL; - } else + else lastend += nbytes; sg[nsegs - 1].length += nbytes; @@ -549,14 +560,14 @@ * blk_init_queue() must be paired with a blk_cleanup_queue() call * when the block device is deactivated (such as at module unload). **/ -int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, char *name) +int blk_init_queue(request_queue_t *q, request_fn_proc *rfn) { int ret; if (blk_init_free_list(q)) return -ENOMEM; - if ((ret = elevator_init(q, &q->elevator, ELEVATOR_LINUS, name))) { + if ((ret = elevator_init(q, &q->elevator, ELEVATOR_LINUS))) { blk_cleanup_queue(q); return ret; } @@ -568,13 +579,15 @@ q->plug_tq.sync = 0; q->plug_tq.routine = &generic_unplug_device; q->plug_tq.data = q; - q->queue_flags = 0; - + q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); + /* * by default assume old behaviour and bounce for any highmem page */ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); + blk_queue_segment_boundary(q, 0xffffffff); + blk_queue_make_request(q, __make_request); blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); return 0; @@ -709,17 +722,6 @@ req->q = NULL; /* - * should only happen on freereq logic in __make_request, in which - * case we don't want to prune these entries from the hash - */ -#if 1 - if (req->bio) - bio_hash_remove(req->bio); - if (req->biotail) - bio_hash_remove(req->biotail); -#endif - - /* * Request may not have originated from ll_rw_blk. if not, * assume it has free buffers and check waiters */ @@ -756,11 +758,6 @@ if (q->merge_requests_fn(q, req, next)) { q->elevator.elevator_merge_req_fn(req, next); - bio_hash_remove(req->biotail); - - /* - * will handle dangling hash too - */ blkdev_dequeue_request(next); req->biotail->bi_next = next->bio; @@ -768,8 +765,6 @@ next->bio = next->biotail = NULL; - bio_hash_add_unique(req->biotail, req, q->hash_valid_counter); - req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; blkdev_release_request(next); @@ -857,10 +852,8 @@ * the back of the queue and invalidate the entire existing merge hash * for this device */ - if (barrier && !freereq) { + if (barrier && !freereq) latency = 0; - bio_hash_invalidate(q, bio->bi_dev); - } insert_here = head->prev; if (blk_queue_empty(q) || barrier) { @@ -887,8 +880,6 @@ break; elevator->elevator_merge_cleanup_fn(q, req, nr_sectors); - bio_hash_remove(req->biotail); - req->biotail->bi_next = bio; req->biotail = bio; req->nr_sectors = req->hard_nr_sectors += nr_sectors; @@ -903,8 +894,6 @@ break; elevator->elevator_merge_cleanup_fn(q, req, nr_sectors); - bio_hash_remove(req->bio); - bio->bi_next = req->bio; req->bio = bio; /* @@ -973,7 +962,7 @@ req->hard_sector = req->sector = sector; req->hard_nr_sectors = req->nr_sectors = nr_sectors; req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors; - req->nr_segments = bio->bi_io_vec->bvl_cnt; + req->nr_segments = bio->bi_vcnt; req->nr_hw_segments = req->nr_segments; req->buffer = bio_data(bio); /* see ->buffer comment above */ req->waiting = NULL; @@ -987,7 +976,6 @@ } spin_unlock_irq(&q->queue_lock); - bio_hash_add_unique(bio, req, q->hash_valid_counter); return 0; end_io: @@ -1035,13 +1023,13 @@ * * The caller of generic_make_request must make sure that bi_io_vec * are set to describe the memory buffer, and that bi_dev and bi_sector are - & set to describe the device address, and the + * set to describe the device address, and the * bi_end_io and optionally bi_private are set to describe how * completion notification should be signaled. * * generic_make_request and the drivers it calls may use bi_next if this * bio happens to be merged with someone else, and may change bi_dev and - * bi_rsector for remaps as it sees fit. So the values of these fields + * bi_sector for remaps as it sees fit. So the values of these fields * should NOT be depended on after the call to generic_make_request. * * */ @@ -1121,11 +1109,6 @@ BIO_BUG_ON(nr_sectors != (bh->b_size >> 9)); - /* - * I/O is complete -- remove from hash, end buffer_head, put bio - */ - bio_hash_remove(bio); - bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags)); bio_put(bio); @@ -1194,13 +1177,13 @@ bio->bi_private = bh; bio->bi_end_io = end_bio_bh_io_sync; - bio->bi_io_vec->bvl_vec[0].bv_page = bh->b_page; - bio->bi_io_vec->bvl_vec[0].bv_len = bh->b_size; - bio->bi_io_vec->bvl_vec[0].bv_offset = bh_offset(bh); - - bio->bi_io_vec->bvl_cnt = 1; - bio->bi_io_vec->bvl_idx = 0; - bio->bi_io_vec->bvl_size = bh->b_size; + bio->bi_io_vec[0].bv_page = bh->b_page; + bio->bi_io_vec[0].bv_len = bh->b_size; + bio->bi_io_vec[0].bv_offset = bh_offset(bh); + + bio->bi_vcnt = 1; + bio->bi_idx = 0; + bio->bi_size = bh->b_size; return submit_bio(rw, bio); } @@ -1317,9 +1300,9 @@ /** * end_that_request_first - end I/O on one buffer. - * &q: queue that finished request * @req: the request being processed * @uptodate: 0 for I/O error + * @nr_sectors: number of sectors to end I/O on * * Description: * Ends I/O on the first buffer attached to @req, and sets it up @@ -1354,7 +1337,6 @@ bio->bi_next = nxt; if ((bio = req->bio) != NULL) { - bio_hash_add_unique(bio,req,req->q->hash_valid_counter); req->hard_sector += nsect; req->hard_nr_sectors -= nsect; req->sector = req->hard_sector; diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/block/loop.c linux/drivers/block/loop.c --- /opt/kernel/linux-2.5.1-pre2/drivers/block/loop.c Wed Nov 28 21:56:07 2001 +++ linux/drivers/block/loop.c Wed Nov 28 21:09:16 2001 @@ -330,7 +330,10 @@ * check bi_end_io, may just be a remapped bio */ if (bio && bio->bi_end_io == loop_end_io_transfer) { - __free_page(bio_page(bio)); + int i; + for (i = 0; i < bio->bi_vcnt; i++) + __free_page(bio->bi_io_vec[i].bv_page); + bio_put(bio); } } @@ -398,7 +401,6 @@ static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh) { - struct page *page; struct bio *bio; /* @@ -409,29 +411,7 @@ goto out_bh; } - bio = bio_alloc(GFP_NOIO, 1); - - /* - * easy way out, although it does waste some memory for < PAGE_SIZE - * blocks... if highmem bounce buffering can get away with it, - * so can we :-) - */ - do { - page = alloc_page(GFP_NOIO); - if (page) - break; - - run_task_queue(&tq_disk); - schedule_timeout(HZ); - } while (1); - - bio->bi_io_vec->bvl_vec[0].bv_page = page; - bio->bi_io_vec->bvl_vec[0].bv_len = bio_size(rbh); - bio->bi_io_vec->bvl_vec[0].bv_offset = bio_offset(rbh); - - bio->bi_io_vec->bvl_cnt = 1; - bio->bi_io_vec->bvl_idx = 1; - bio->bi_io_vec->bvl_size = bio_size(rbh); + bio = bio_copy(rbh, GFP_NOIO, rbh->bi_rw & WRITE); bio->bi_end_io = loop_end_io_transfer; bio->bi_private = rbh; diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/block/nbd.c linux/drivers/block/nbd.c --- /opt/kernel/linux-2.5.1-pre2/drivers/block/nbd.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/block/nbd.c Wed Nov 28 08:52:14 2001 @@ -501,7 +501,7 @@ #endif blksize_size[MAJOR_NR] = nbd_blksizes; blk_size[MAJOR_NR] = nbd_sizes; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request, "nbd"); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request); blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); for (i = 0; i < MAX_NBD; i++) { nbd_dev[i].refcnt = 0; diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/ide/ide-probe.c linux/drivers/ide/ide-probe.c --- /opt/kernel/linux-2.5.1-pre2/drivers/ide/ide-probe.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/ide/ide-probe.c Wed Nov 28 14:29:39 2001 @@ -597,7 +597,8 @@ int max_sectors; q->queuedata = HWGROUP(drive); - blk_init_queue(q, do_ide_request, drive->name); + blk_init_queue(q, do_ide_request); + blk_queue_segment_boundary(q, 0xffff); /* IDE can do up to 128K per request, pdc4030 needs smaller limit */ #ifdef CONFIG_BLK_DEV_PDC4030 diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/ide/ide.c linux/drivers/ide/ide.c --- /opt/kernel/linux-2.5.1-pre2/drivers/ide/ide.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/ide/ide.c Wed Nov 28 09:09:18 2001 @@ -2834,8 +2834,6 @@ case BLKELVSET: case BLKBSZGET: case BLKBSZSET: - case BLKHASHPROF: - case BLKHASHCLEAR: return blk_ioctl(inode->i_rdev, cmd, arg); case HDIO_GET_BUSSTATE: diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/message/i2o/i2o_block.c linux/drivers/message/i2o/i2o_block.c --- /opt/kernel/linux-2.5.1-pre2/drivers/message/i2o/i2o_block.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/message/i2o/i2o_block.c Wed Nov 28 09:16:20 2001 @@ -410,7 +410,7 @@ * unlocked. */ - while (end_that_request_first(req, !req->errors)) + while (end_that_request_first(req, !req->errors, req->hard_cur_sectors)) ; /* @@ -459,12 +459,6 @@ struct i2ob_device *dev = &i2ob_dev[(unit&0xF0)]; /* - * Pull the lock over ready - */ - - spin_lock_prefetch(&io_request_lock); - - /* * FAILed message */ if(m[0] & (1<<13)) @@ -1405,7 +1399,6 @@ */ static int i2ob_init_iop(unsigned int unit) { - char name[16]; int i; i2ob_queues[unit] = (struct i2ob_iop_queue*) @@ -1429,8 +1422,7 @@ i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0]; atomic_set(&i2ob_queues[unit]->queue_depth, 0); - sprintf(name, "i2o%d", unit); - blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request, name); + blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request); blk_queue_headactive(&i2ob_queues[unit]->req_queue, 0); i2ob_queues[unit]->req_queue.queuedata = &i2ob_queues[unit]; @@ -1829,7 +1821,7 @@ blk_size[MAJOR_NR] = i2ob_sizes; blk_dev[MAJOR_NR].queue = i2ob_get_queue; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request, "i2o"); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request); blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); for (i = 0; i < MAX_I2OB << 4; i++) { diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/message/i2o/i2o_scsi.c linux/drivers/message/i2o/i2o_scsi.c --- /opt/kernel/linux-2.5.1-pre2/drivers/message/i2o/i2o_scsi.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/message/i2o/i2o_scsi.c Wed Nov 28 09:17:43 2001 @@ -151,11 +151,10 @@ static void i2o_scsi_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *msg) { Scsi_Cmnd *current_command; + spinlock_t *lock; u32 *m = (u32 *)msg; u8 as,ds,st; - spin_lock_prefetch(&io_request_lock); - if(m[0] & (1<<13)) { printk("IOP fail.\n"); @@ -190,12 +189,13 @@ { /* Create a scsi error for this */ current_command = (Scsi_Cmnd *)m[3]; + lock = ¤t_command->host->host_lock; printk("Aborted %ld\n", current_command->serial_number); - spin_lock_irq(&io_request_lock); + spin_lock_irq(lock); current_command->result = DID_ERROR << 16; current_command->scsi_done(current_command); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(lock); /* Now flush the message by making it a NOP */ m[0]&=0x00FFFFFF; @@ -284,9 +284,10 @@ * It worked maybe ? */ current_command->result = DID_OK << 16 | ds; - spin_lock(&io_request_lock); + lock = ¤t_command->host->host_lock; + spin_lock(lock); current_command->scsi_done(current_command); - spin_unlock(&io_request_lock); + spin_unlock(lock); return; } diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/scsi/ide-scsi.c linux/drivers/scsi/ide-scsi.c --- /opt/kernel/linux-2.5.1-pre2/drivers/scsi/ide-scsi.c Wed Nov 28 21:56:07 2001 +++ linux/drivers/scsi/ide-scsi.c Wed Nov 28 21:51:54 2001 @@ -661,11 +661,12 @@ if ((first_bh = bhp = bh = bio_alloc(GFP_ATOMIC, 1)) == NULL) goto abort; - memset (bh, 0, sizeof (struct bio)); + bio_init(bh); while (--count) { if ((bh = bio_alloc(GFP_ATOMIC, 1)) == NULL) goto abort; - memset (bh, 0, sizeof (struct bio)); + bio_init(bh); + bh->bi_vcnt = 1; bhp->bi_next = bh; bhp = bh; bh->bi_next = NULL; @@ -707,10 +708,16 @@ printk ("ide-scsi: %s: building DMA table, %d segments, %dkB total\n", drive->name, segments, pc->request_transfer >> 10); #endif /* IDESCSI_DEBUG_LOG */ while (segments--) { - bh->bi_io_vec.bv_page = sg->page; - bh->bi_io_vec.bv_len = sg->length; - bh->bi_io_vec.bv_offset = sg->offset; + bh->bi_io_vec[0].bv_page = sg->page; + bh->bi_io_vec[0].bv_len = sg->length; + bh->bi_io_vec[0].bv_offset = sg->offset; + bh->bi_size = sg->length; bh = bh->bi_next; + /* + * just until scsi_merge is fixed up... + */ + BUG_ON(PageHighMem(sg->page)); + sg->address = page_address(sg->page) + sg->offset; sg++; } } else { @@ -719,9 +726,10 @@ #if IDESCSI_DEBUG_LOG printk ("ide-scsi: %s: building DMA table for a single buffer (%dkB)\n", drive->name, pc->request_transfer >> 10); #endif /* IDESCSI_DEBUG_LOG */ - bh->bi_io_vec.bv_page = virt_to_page(pc->scsi_cmd->request_buffer); - bh->bi_io_vec.bv_len = pc->request_transfer; - bh->bi_io_vec.bv_offset = (unsigned long) pc->scsi_cmd->request_buffer & ~PAGE_MASK; + bh->bi_io_vec[0].bv_page = virt_to_page(pc->scsi_cmd->request_buffer); + bh->bi_io_vec[0].bv_len = pc->request_transfer; + bh->bi_io_vec[0].bv_offset = (unsigned long) pc->scsi_cmd->request_buffer & ~PAGE_MASK; + bh->bi_size = pc->request_transfer; } return first_bh; } diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/scsi/scsi.c linux/drivers/scsi/scsi.c --- /opt/kernel/linux-2.5.1-pre2/drivers/scsi/scsi.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/scsi/scsi.c Wed Nov 28 12:23:27 2001 @@ -188,12 +188,9 @@ */ void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) { - char name[16]; - request_queue_t *q = &SDpnt->request_queue; - sprintf(name, "scsi%d%d%d", SDpnt->id, SDpnt->lun, SDpnt->channel); - blk_init_queue(q, scsi_request_fn, name); + blk_init_queue(q, scsi_request_fn); blk_queue_headactive(q, 0); q->queuedata = (void *) SDpnt; #ifdef DMA_CHUNK_SIZE @@ -202,6 +199,9 @@ blk_queue_max_segments(q, SHpnt->sg_tablesize); #endif blk_queue_max_sectors(q, SHpnt->max_sectors); + + if (!SHpnt->use_clustering) + clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); } #ifdef MODULE diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c --- /opt/kernel/linux-2.5.1-pre2/drivers/scsi/scsi_merge.c Wed Nov 28 21:56:07 2001 +++ linux/drivers/scsi/scsi_merge.c Wed Nov 28 21:05:27 2001 @@ -107,15 +107,6 @@ } /* - * FIXME(eric) - the original disk code disabled clustering for MOD - * devices. I have no idea why we thought this was a good idea - my - * guess is that it was an attempt to limit the size of requests to MOD - * devices. - */ -#define CLUSTERABLE_DEVICE(SH,SD) (SH->use_clustering && \ - SD->type != TYPE_MOD) - -/* * This entire source file deals with the new queueing code. */ @@ -126,7 +117,6 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * use_clustering - 1 if this host wishes to use clustering * dma_host - 1 if this host has ISA DMA issues (bus doesn't * expose all of the address lines, so that DMA cannot * be done from an arbitrary address). @@ -141,13 +131,14 @@ * Notes: This is only used for diagnostic purposes. */ __inline static int __count_segments(struct request *req, - int use_clustering, int dma_host, int * remainder) { int ret = 1; int reqsize = 0; - struct bio *bio, *bionext; + int i; + struct bio *bio; + struct bio_vec *bvec; if (remainder) reqsize = *remainder; @@ -161,54 +152,16 @@ ret++; #endif - for (bio = req->bio, bionext = bio->bi_next; - bionext != NULL; - bio = bionext, bionext = bio->bi_next) { - if (use_clustering) { - /* - * See if we can do this without creating another - * scatter-gather segment. In the event that this is a - * DMA capable host, make sure that a segment doesn't span - * the DMA threshold boundary. - */ - if (dma_host && bio_to_phys(bionext) - 1 == ISA_DMA_THRESHOLD) { - ret++; - reqsize = bio_size(bionext); - } else if (BIO_CONTIG(bio, bionext)) { - /* - * This one is OK. Let it go. - */ -#ifdef DMA_SEGMENT_SIZE_LIMITED - /* Note scsi_malloc is only able to hand out - * chunks of memory in sizes of PAGE_SIZE or - * less. Thus we need to keep track of - * the size of the piece that we have - * seen so far, and if we have hit - * the limit of PAGE_SIZE, then we are - * kind of screwed and we need to start - * another segment. - */ - if(dma_host && bio_to_phys(bionext) - 1 >= ISA_DMA_THRESHOLD - && reqsize + bio_size(bionext) > PAGE_SIZE ) - { - ret++; - reqsize = bio_size(bionext); - continue; - } -#endif - reqsize += bio_size(bionext); - continue; - } - ret++; - reqsize = bio_size(bionext); - } else { + rq_for_each_bio(bio, req) { + bio_for_each_segment(bvec, bio, i) ret++; - reqsize = bio_size(bionext); - } + + reqsize += bio_size(bio); } - if( remainder != NULL ) { + + if (remainder) *remainder = reqsize; - } + return ret; } @@ -244,9 +197,7 @@ SHpnt = SCpnt->host; SDpnt = SCpnt->device; - req->nr_segments = __count_segments(req, - CLUSTERABLE_DEVICE(SHpnt, SDpnt), - SHpnt->unchecked_isa_dma, NULL); + req->nr_segments = __count_segments(req, SHpnt->unchecked_isa_dma,NULL); } #define MERGEABLE_BUFFERS(X,Y) \ @@ -317,7 +268,6 @@ * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. * bio - Block which we may wish to merge into request - * use_clustering - 1 if this host wishes to use clustering * dma_host - 1 if this host has ISA DMA issues (bus doesn't * expose all of the address lines, so that DMA cannot * be done from an arbitrary address). @@ -335,9 +285,9 @@ * * This function is not designed to be directly called. Instead * it should be referenced from other functions where the - * use_clustering and dma_host parameters should be integer - * constants. The compiler should thus be able to properly - * optimize the code, eliminating stuff that is irrelevant. + * dma_host parameter should be an integer constant. The + * compiler should thus be able to properly optimize the code, + * eliminating stuff that is irrelevant. * It is more maintainable to do this way with a single function * than to have 4 separate functions all doing roughly the * same thing. @@ -345,45 +295,15 @@ __inline static int __scsi_back_merge_fn(request_queue_t * q, struct request *req, struct bio *bio, - int use_clustering, int dma_host) { - unsigned int count; - unsigned int segment_size = 0; Scsi_Device *SDpnt = q->queuedata; if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) return 0; - else if (!BIO_PHYS_4G(req->biotail, bio)) + else if (!BIO_SEG_BOUNDARY(q, req->biotail, bio)) return 0; - if (use_clustering) { - /* - * See if we can do this without creating another - * scatter-gather segment. In the event that this is a - * DMA capable host, make sure that a segment doesn't span - * the DMA threshold boundary. - */ - if (dma_host && bio_to_phys(req->biotail) - 1 == ISA_DMA_THRESHOLD) { - goto new_end_segment; - } - if (BIO_CONTIG(req->biotail, bio)) { -#ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host && bio_to_phys(bio) - 1 >= ISA_DMA_THRESHOLD ) { - segment_size = 0; - count = __count_segments(req, use_clustering, dma_host, &segment_size); - if( segment_size + bio_size(bio) > PAGE_SIZE ) { - goto new_end_segment; - } - } -#endif - /* - * This one is OK. Let it go. - */ - return 1; - } - } - new_end_segment: #ifdef DMA_CHUNK_SIZE if (MERGEABLE_BUFFERS(req->biotail, bio)) return scsi_new_mergeable(q, req, SDpnt->host); @@ -394,45 +314,15 @@ __inline static int __scsi_front_merge_fn(request_queue_t * q, struct request *req, struct bio *bio, - int use_clustering, int dma_host) { - unsigned int count; - unsigned int segment_size = 0; Scsi_Device *SDpnt = q->queuedata; if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) return 0; - else if (!BIO_PHYS_4G(bio, req->bio)) + else if (!BIO_SEG_BOUNDARY(q, bio, req->bio)) return 0; - if (use_clustering) { - /* - * See if we can do this without creating another - * scatter-gather segment. In the event that this is a - * DMA capable host, make sure that a segment doesn't span - * the DMA threshold boundary. - */ - if (dma_host && bio_to_phys(bio) - 1 == ISA_DMA_THRESHOLD) { - goto new_start_segment; - } - if (BIO_CONTIG(bio, req->bio)) { -#ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host && bio_to_phys(bio) - 1 >= ISA_DMA_THRESHOLD ) { - segment_size = bio_size(bio); - count = __count_segments(req, use_clustering, dma_host, &segment_size); - if( count != req->nr_segments ) { - goto new_start_segment; - } - } -#endif - /* - * This one is OK. Let it go. - */ - return 1; - } - } - new_start_segment: #ifdef DMA_CHUNK_SIZE if (MERGEABLE_BUFFERS(bio, req->bio)) return scsi_new_mergeable(q, req, SDpnt->host); @@ -457,7 +347,7 @@ * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. */ -#define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA) \ +#define MERGEFCT(_FUNCTION, _BACK_FRONT, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ struct bio *bio) \ @@ -466,21 +356,15 @@ ret = __scsi_ ## _BACK_FRONT ## _merge_fn(q, \ req, \ bio, \ - _CLUSTER, \ _DMA); \ return ret; \ } -/* Version with use_clustering 0 and dma_host 1 is not necessary, - * since the only use of dma_host above is protected by use_clustering. - */ -MERGEFCT(scsi_back_merge_fn_, back, 0, 0) -MERGEFCT(scsi_back_merge_fn_c, back, 1, 0) -MERGEFCT(scsi_back_merge_fn_dc, back, 1, 1) - -MERGEFCT(scsi_front_merge_fn_, front, 0, 0) -MERGEFCT(scsi_front_merge_fn_c, front, 1, 0) -MERGEFCT(scsi_front_merge_fn_dc, front, 1, 1) +MERGEFCT(scsi_back_merge_fn_, back, 0) +MERGEFCT(scsi_back_merge_fn_d, back, 1) + +MERGEFCT(scsi_front_merge_fn_, front, 0) +MERGEFCT(scsi_front_merge_fn_d, front, 1) /* * Function: __scsi_merge_requests_fn() @@ -490,7 +374,6 @@ * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. * next - 2nd request that we might want to combine with req - * use_clustering - 1 if this host wishes to use clustering * dma_host - 1 if this host has ISA DMA issues (bus doesn't * expose all of the address lines, so that DMA cannot * be done from an arbitrary address). @@ -505,20 +388,10 @@ * function is called from ll_rw_blk before it attempts to merge * a new block into a request to make sure that the request will * not become too large. - * - * This function is not designed to be directly called. Instead - * it should be referenced from other functions where the - * use_clustering and dma_host parameters should be integer - * constants. The compiler should thus be able to properly - * optimize the code, eliminating stuff that is irrelevant. - * It is more maintainable to do this way with a single function - * than to have 4 separate functions all doing roughly the - * same thing. */ __inline static int __scsi_merge_requests_fn(request_queue_t * q, struct request *req, struct request *next, - int use_clustering, int dma_host) { Scsi_Device *SDpnt; @@ -530,7 +403,7 @@ */ if (req->special || next->special) return 0; - else if (!BIO_PHYS_4G(req->biotail, next->bio)) + else if (!BIO_SEG_BOUNDARY(q, req->biotail, next->bio)) return 0; SDpnt = (Scsi_Device *) q->queuedata; @@ -559,51 +432,6 @@ if ((req->nr_sectors + next->nr_sectors) > SHpnt->max_sectors) return 0; - /* - * The main question is whether the two segments at the boundaries - * would be considered one or two. - */ - if (use_clustering) { - /* - * See if we can do this without creating another - * scatter-gather segment. In the event that this is a - * DMA capable host, make sure that a segment doesn't span - * the DMA threshold boundary. - */ - if (dma_host && bio_to_phys(req->biotail) - 1 == ISA_DMA_THRESHOLD) { - goto dont_combine; - } -#ifdef DMA_SEGMENT_SIZE_LIMITED - /* - * We currently can only allocate scatter-gather bounce - * buffers in chunks of PAGE_SIZE or less. - */ - if (dma_host - && BIO_CONTIG(req->biotail, next->bio) - && bio_to_phys(req->biotail) - 1 >= ISA_DMA_THRESHOLD ) - { - int segment_size = 0; - int count = 0; - - count = __count_segments(req, use_clustering, dma_host, &segment_size); - count += __count_segments(next, use_clustering, dma_host, &segment_size); - if( count != req->nr_segments + next->nr_segments ) { - goto dont_combine; - } - } -#endif - if (BIO_CONTIG(req->biotail, next->bio)) { - /* - * This one is OK. Let it go. - */ - req->nr_segments += next->nr_segments - 1; -#ifdef DMA_CHUNK_SIZE - req->nr_hw_segments += next->nr_hw_segments - 1; -#endif - return 1; - } - } - dont_combine: #ifdef DMA_CHUNK_SIZE if (req->nr_segments + next->nr_segments > q->max_segments) return 0; @@ -657,22 +485,18 @@ * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. */ -#define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA) \ +#define MERGEREQFCT(_FUNCTION, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ struct request * next) \ { \ int ret; \ - ret = __scsi_merge_requests_fn(q, req, next, _CLUSTER, _DMA); \ + ret = __scsi_merge_requests_fn(q, req, next, _DMA); \ return ret; \ } -/* Version with use_clustering 0 and dma_host 1 is not necessary, - * since the only use of dma_host above is protected by use_clustering. - */ -MERGEREQFCT(scsi_merge_requests_fn_, 0, 0) -MERGEREQFCT(scsi_merge_requests_fn_c, 1, 0) -MERGEREQFCT(scsi_merge_requests_fn_dc, 1, 1) +MERGEREQFCT(scsi_merge_requests_fn_, 0) +MERGEREQFCT(scsi_merge_requests_fn_d, 1) /* * Function: __init_io() * @@ -680,7 +504,6 @@ * * Arguments: SCpnt - Command descriptor we wish to initialize * sg_count_valid - 1 if the sg count in the req is valid. - * use_clustering - 1 if this host wishes to use clustering * dma_host - 1 if this host has ISA DMA issues (bus doesn't * expose all of the address lines, so that DMA cannot * be done from an arbitrary address). @@ -708,7 +531,6 @@ */ __inline static int __init_io(Scsi_Cmnd * SCpnt, int sg_count_valid, - int use_clustering, int dma_host) { struct bio * bio; @@ -722,18 +544,13 @@ int this_count; void ** bbpnt; - /* - * now working right now - */ - BUG_ON(dma_host); - req = &SCpnt->request; /* * First we need to know how many scatter gather segments are needed. */ if (!sg_count_valid) { - count = __count_segments(req, use_clustering, dma_host, NULL); + count = __count_segments(req, dma_host, NULL); } else { count = req->nr_segments; } @@ -975,10 +792,10 @@ return 1; } -#define INITIO(_FUNCTION, _VALID, _CLUSTER, _DMA) \ -static int _FUNCTION(Scsi_Cmnd * SCpnt) \ -{ \ - return __init_io(SCpnt, _VALID, _CLUSTER, _DMA); \ +#define INITIO(_FUNCTION, _VALID, _DMA) \ +static int _FUNCTION(Scsi_Cmnd * SCpnt) \ +{ \ + return __init_io(SCpnt, _VALID, _DMA); \ } /* @@ -987,10 +804,8 @@ * We always force "_VALID" to 1. Eventually clean this up * and get rid of the extra argument. */ -INITIO(scsi_init_io_v, 1, 0, 0) -INITIO(scsi_init_io_vd, 1, 0, 1) -INITIO(scsi_init_io_vc, 1, 1, 0) -INITIO(scsi_init_io_vdc, 1, 1, 1) +INITIO(scsi_init_io_v, 1, 0) +INITIO(scsi_init_io_vd, 1, 1) /* * Function: initialize_merge_fn() @@ -1022,26 +837,16 @@ * is simply easier to do it ourselves with our own functions * rather than rely upon the default behavior of ll_rw_blk. */ - if (!CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma == 0) { + if (SHpnt->unchecked_isa_dma == 0) { q->back_merge_fn = scsi_back_merge_fn_; q->front_merge_fn = scsi_front_merge_fn_; q->merge_requests_fn = scsi_merge_requests_fn_; SDpnt->scsi_init_io_fn = scsi_init_io_v; - } else if (!CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma != 0) { - q->back_merge_fn = scsi_back_merge_fn_; - q->front_merge_fn = scsi_front_merge_fn_; - q->merge_requests_fn = scsi_merge_requests_fn_; + } else { + q->back_merge_fn = scsi_back_merge_fn_d; + q->front_merge_fn = scsi_front_merge_fn_d; + q->merge_requests_fn = scsi_merge_requests_fn_d; SDpnt->scsi_init_io_fn = scsi_init_io_vd; - } else if (CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma == 0) { - q->back_merge_fn = scsi_back_merge_fn_c; - q->front_merge_fn = scsi_front_merge_fn_c; - q->merge_requests_fn = scsi_merge_requests_fn_c; - SDpnt->scsi_init_io_fn = scsi_init_io_vc; - } else if (CLUSTERABLE_DEVICE(SHpnt, SDpnt) && SHpnt->unchecked_isa_dma != 0) { - q->back_merge_fn = scsi_back_merge_fn_dc; - q->front_merge_fn = scsi_front_merge_fn_dc; - q->merge_requests_fn = scsi_merge_requests_fn_dc; - SDpnt->scsi_init_io_fn = scsi_init_io_vdc; } /* diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/scsi/sd.c linux/drivers/scsi/sd.c --- /opt/kernel/linux-2.5.1-pre2/drivers/scsi/sd.c Wed Nov 28 21:56:29 2001 +++ linux/drivers/scsi/sd.c Wed Nov 28 09:09:26 2001 @@ -236,8 +236,6 @@ case BLKELVSET: case BLKBSZGET: case BLKBSZSET: - case BLKHASHPROF: - case BLKHASHCLEAR: return blk_ioctl(inode->i_rdev, cmd, arg); case BLKRRPART: /* Re-read partition tables */ diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/drivers/scsi/sr.c linux/drivers/scsi/sr.c --- /opt/kernel/linux-2.5.1-pre2/drivers/scsi/sr.c Wed Nov 28 09:13:59 2001 +++ linux/drivers/scsi/sr.c Wed Nov 28 15:33:25 2001 @@ -326,11 +326,14 @@ } if (old_sg) { memcpy(sg + i, old_sg, SCpnt->use_sg * sizeof(struct scatterlist)); - memcpy(bbpnt + i, old_bbpnt, SCpnt->use_sg * sizeof(void *)); + if (old_bbpnt) + memcpy(bbpnt + i, old_bbpnt, SCpnt->use_sg * sizeof(void *)); scsi_free(old_sg, (((SCpnt->use_sg * sizeof(struct scatterlist)) + (SCpnt->use_sg * sizeof(void *))) + 511) & ~511); } else { - sg[i].address = SCpnt->request_buffer; + sg[i].address = NULL; + sg[i].page = virt_to_page(SCpnt->request_buffer); + sg[i].offset = (unsigned long) SCpnt->request_buffer&~PAGE_MASK; sg[i].length = SCpnt->request_bufflen; } @@ -340,7 +343,9 @@ SCpnt->use_sg += i; if (bsize) { - sg[SCpnt->use_sg].address = back; + sg[SCpnt->use_sg].address = NULL; + sg[SCpnt->use_sg].page = virt_to_page(back); + sg[SCpnt->use_sg].offset = (unsigned long) back & ~PAGE_MASK; bbpnt[SCpnt->use_sg] = back; sg[SCpnt->use_sg].length = bsize; SCpnt->use_sg++; diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/fs/bio.c linux/fs/bio.c --- /opt/kernel/linux-2.5.1-pre2/fs/bio.c Wed Nov 28 21:56:29 2001 +++ linux/fs/bio.c Wed Nov 28 21:38:07 2001 @@ -40,9 +40,6 @@ static DECLARE_WAIT_QUEUE_HEAD(bio_pool_wait); static DECLARE_WAIT_QUEUE_HEAD(biovec_pool_wait); -struct bio_hash_bucket *bio_hash_table; -unsigned int bio_hash_bits, bio_hash_mask; - static unsigned int bio_pool_free; #define BIOVEC_NR_POOLS 6 @@ -63,269 +60,12 @@ #define BIO_MAX_PAGES (bvec_pool_sizes[BIOVEC_NR_POOLS - 1]) -#ifdef BIO_HASH_PROFILING -static struct bio_hash_stats bio_stats; -#endif - -/* - * optimized for 2^BIO_HASH_SCALE kB block size - */ -#define BIO_HASH_SCALE 3 -#define BIO_HASH_BLOCK(sector) ((sector) >> BIO_HASH_SCALE) - -/* - * pending further testing, grabbed from fs/buffer.c hash so far... - */ -#define __bio_hash(dev,block) \ - (((((dev)<<(bio_hash_bits - 6)) ^ ((dev)<<(bio_hash_bits - 9))) ^ \ - (((block)<<(bio_hash_bits - 6)) ^ ((block) >> 13) ^ \ - ((block) << (bio_hash_bits - 12)))) & bio_hash_mask) - -#define bio_hash(dev, sector) &((bio_hash_table + __bio_hash(dev, BIO_HASH_BLOCK((sector))))->hash) - -#define bio_hash_bucket(dev, sector) (bio_hash_table + __bio_hash(dev, BIO_HASH_BLOCK((sector)))) - -#define __BIO_HASH_RWLOCK(dev, sector) \ - &((bio_hash_table + __bio_hash((dev), BIO_HASH_BLOCK((sector))))->lock) -#define BIO_HASH_RWLOCK(bio) \ - __BIO_HASH_RWLOCK((bio)->bi_dev, (bio)->bi_sector) - /* * TODO: change this to use slab reservation scheme once that infrastructure * is in place... */ #define BIO_POOL_SIZE (256) -void __init bio_hash_init(unsigned long mempages) -{ - unsigned long htable_size, order; - int i; - - /* - * need to experiment on size of hash - */ - mempages >>= 2; - - htable_size = mempages * sizeof(struct bio_hash_bucket *); - for (order = 0; (PAGE_SIZE << order) < htable_size; order++) - ; - - do { - unsigned long tmp = (PAGE_SIZE << order) / sizeof(struct bio_hash_bucket); - - bio_hash_bits = 0; - while ((tmp >>= 1UL) != 0UL) - bio_hash_bits++; - - bio_hash_table = (struct bio_hash_bucket *) __get_free_pages(GFP_ATOMIC, order); - } while (bio_hash_table == NULL && --order > 0); - - if (!bio_hash_table) - panic("Failed to allocate page hash table\n"); - - printk("Bio-cache hash table entries: %ld (order: %ld, %ld bytes)\n", - BIO_HASH_SIZE, order, (PAGE_SIZE << order)); - - for (i = 0; i < BIO_HASH_SIZE; i++) { - struct bio_hash_bucket *hb = &bio_hash_table[i]; - - rwlock_init(&hb->lock); - hb->hash = NULL; - } - - bio_hash_mask = BIO_HASH_SIZE - 1; -} - -inline void __bio_hash_remove(struct bio *bio) -{ - bio_hash_t *entry = &bio->bi_hash; - bio_hash_t **pprev = entry->pprev_hash; - - if (pprev) { - bio_hash_t *nxt = entry->next_hash; - - if (nxt) - nxt->pprev_hash = pprev; - - *pprev = nxt; -#if 1 - entry->next_hash = NULL; -#endif - entry->pprev_hash = NULL; - entry->valid_counter = 0; - bio->bi_hash_desc = NULL; -#ifdef BIO_HASH_PROFILING - atomic_dec(&bio_stats.nr_entries); -#endif - } -} - -inline void bio_hash_remove(struct bio *bio) -{ - rwlock_t *hash_lock = BIO_HASH_RWLOCK(bio); - unsigned long flags; - - write_lock_irqsave(hash_lock, flags); - __bio_hash_remove(bio); - write_unlock_irqrestore(hash_lock, flags); -} - -inline void __bio_hash_add(struct bio *bio, bio_hash_t **hash, - void *hash_desc, unsigned int vc) -{ - bio_hash_t *entry = &bio->bi_hash; - bio_hash_t *nxt = *hash; - - BUG_ON(entry->pprev_hash); - - *hash = entry; - entry->next_hash = nxt; - entry->pprev_hash = hash; - entry->valid_counter = vc; - - if (nxt) - nxt->pprev_hash = &entry->next_hash; - - bio->bi_hash_desc = hash_desc; - -#ifdef BIO_HASH_PROFILING - atomic_inc(&bio_stats.nr_inserts); - atomic_inc(&bio_stats.nr_entries); - { - int entries = atomic_read(&bio_stats.nr_entries); - if (entries > atomic_read(&bio_stats.max_entries)) - atomic_set(&bio_stats.max_entries, entries); - } -#endif -} - -inline void bio_hash_add(struct bio *bio, void *hash_desc, unsigned int vc) -{ - struct bio_hash_bucket *hb =bio_hash_bucket(bio->bi_dev,bio->bi_sector); - unsigned long flags; - - write_lock_irqsave(&hb->lock, flags); - __bio_hash_add(bio, &hb->hash, hash_desc, vc); - write_unlock_irqrestore(&hb->lock, flags); -} - -inline struct bio *__bio_hash_find(kdev_t dev, sector_t sector, - bio_hash_t **hash, unsigned int vc) -{ - bio_hash_t *next = *hash, *entry; - struct bio *bio; - int nr = 0; - -#ifdef BIO_HASH_PROFILING - atomic_inc(&bio_stats.nr_lookups); -#endif - while ((entry = next)) { - next = entry->next_hash; - prefetch(next); - bio = bio_hash_entry(entry); - - if (entry->valid_counter == vc) { - if (bio->bi_sector == sector && bio->bi_dev == dev) { -#ifdef BIO_HASH_PROFILING - if (nr > atomic_read(&bio_stats.max_bucket_size)) - atomic_set(&bio_stats.max_bucket_size, nr); - if (nr <= MAX_PROFILE_BUCKETS) - atomic_inc(&bio_stats.bucket_size[nr]); - atomic_inc(&bio_stats.nr_hits); -#endif - bio_get(bio); - return bio; - } - } - nr++; - } - - return NULL; -} - -inline struct bio *bio_hash_find(kdev_t dev, sector_t sector, unsigned int vc) -{ - struct bio_hash_bucket *hb = bio_hash_bucket(dev, sector); - unsigned long flags; - struct bio *bio; - - read_lock_irqsave(&hb->lock, flags); - bio = __bio_hash_find(dev, sector, &hb->hash, vc); - read_unlock_irqrestore(&hb->lock, flags); - - return bio; -} - -inline int __bio_hash_add_unique(struct bio *bio, bio_hash_t **hash, - void *hash_desc, unsigned int vc) -{ - struct bio *alias = __bio_hash_find(bio->bi_dev, bio->bi_sector, hash, vc); - - if (!alias) { - __bio_hash_add(bio, hash, hash_desc, vc); - return 0; - } - - /* - * release reference to alias - */ - bio_put(alias); - return 1; -} - -inline int bio_hash_add_unique(struct bio *bio, void *hash_desc, unsigned int vc) -{ - struct bio_hash_bucket *hb =bio_hash_bucket(bio->bi_dev,bio->bi_sector); - unsigned long flags; - int ret = 1; - - if (!bio->bi_hash.pprev_hash) { - write_lock_irqsave(&hb->lock, flags); - ret = __bio_hash_add_unique(bio, &hb->hash, hash_desc, vc); - write_unlock_irqrestore(&hb->lock, flags); - } - - return ret; -} - -/* - * increment validity counter on barrier inserts. if it wraps, we must - * prune all existing entries for this device to be completely safe - * - * q->queue_lock must be held by caller - */ -void bio_hash_invalidate(request_queue_t *q, kdev_t dev) -{ - bio_hash_t *hash; - struct bio *bio; - int i; - - if (++q->hash_valid_counter) - return; - - /* - * it wrapped... - */ - for (i = 0; i < (1 << bio_hash_bits); i++) { - struct bio_hash_bucket *hb = &bio_hash_table[i]; - unsigned long flags; - - write_lock_irqsave(&hb->lock, flags); - while ((hash = hb->hash) != NULL) { - bio = bio_hash_entry(hash); - if (bio->bi_dev != dev) - __bio_hash_remove(bio); - } - write_unlock_irqrestore(&hb->lock, flags); - } - - /* - * entries pruned, reset validity counter - */ - q->hash_valid_counter = 1; -} - - /* * if need be, add bio_pool_get_irq() to match... */ @@ -384,38 +124,37 @@ #define BIO_CAN_WAIT(gfp_mask) \ (((gfp_mask) & (__GFP_WAIT | __GFP_IO)) == (__GFP_WAIT | __GFP_IO)) -static inline struct bio_vec_list *bvec_alloc(int gfp_mask, int nr) +static inline struct bio_vec *bvec_alloc(int gfp_mask, int nr, int *idx) { - struct bio_vec_list *bvl = NULL; + struct bio_vec *bvl = NULL; struct biovec_pool *bp; - int idx; /* * see comment near bvec_pool_sizes define! */ switch (nr) { case 1: - idx = 0; + *idx = 0; break; case 2 ... 4: - idx = 1; + *idx = 1; break; case 5 ... 16: - idx = 2; + *idx = 2; break; case 17 ... 64: - idx = 3; + *idx = 3; break; case 65 ... 128: - idx = 4; + *idx = 4; break; case 129 ... 256: - idx = 5; + *idx = 5; break; default: return NULL; } - bp = &bvec_list[idx]; + bp = &bvec_list[*idx]; /* * ok, so idx now points to the slab we want to allocate from @@ -444,15 +183,9 @@ __set_current_state(TASK_RUNNING); } - /* - * we use bvl_max as index into bvec_pool_sizes, non-slab originated - * bvecs may use it for something else if they use their own - * destructor - */ if (bvl) { out_gotit: memset(bvl, 0, bp->bp_size); - bvl->bvl_max = idx; } return bvl; @@ -463,9 +196,9 @@ */ void bio_destructor(struct bio *bio) { - struct biovec_pool *bp = &bvec_list[bio->bi_io_vec->bvl_max]; + struct biovec_pool *bp = &bvec_list[bio->bi_max]; - BUG_ON(bio->bi_io_vec->bvl_max >= BIOVEC_NR_POOLS); + BUG_ON(bio->bi_max >= BIOVEC_NR_POOLS); /* * cloned bio doesn't own the veclist @@ -476,6 +209,15 @@ bio_pool_put(bio); } +inline void bio_init(struct bio *bio) +{ + bio->bi_next = NULL; + atomic_set(&bio->bi_cnt, 1); + bio->bi_flags = 0; + bio->bi_rw = 0; + bio->bi_end_io = NULL; +} + static inline struct bio *__bio_alloc(int gfp_mask, bio_destructor_t *dest) { struct bio *bio; @@ -514,14 +256,8 @@ if (bio) { gotit: - bio->bi_next = NULL; - bio->bi_hash.pprev_hash = NULL; - atomic_set(&bio->bi_cnt, 1); + bio_init(bio); bio->bi_io_vec = NULL; - bio->bi_flags = 0; - bio->bi_rw = 0; - bio->bi_end_io = NULL; - bio->bi_hash_desc = NULL; bio->bi_destructor = dest; } @@ -543,12 +279,12 @@ struct bio *bio_alloc(int gfp_mask, int nr_iovecs) { struct bio *bio = __bio_alloc(gfp_mask, bio_destructor); - struct bio_vec_list *bvl = NULL; + struct bio_vec *bvl = NULL; if (unlikely(!bio)) return NULL; - if (!nr_iovecs || (bvl = bvec_alloc(gfp_mask, nr_iovecs))) { + if (!nr_iovecs || (bvl = bvec_alloc(gfp_mask,nr_iovecs,&bio->bi_max))) { bio->bi_io_vec = bvl; return bio; } @@ -562,8 +298,6 @@ */ static inline void bio_free(struct bio *bio) { - BUG_ON(bio_is_hashed(bio)); - bio->bi_destructor(bio); } @@ -609,6 +343,11 @@ b->bi_dev = bio->bi_dev; b->bi_flags |= 1 << BIO_CLONED; b->bi_rw = bio->bi_rw; + + b->bi_vcnt = bio->bi_vcnt; + b->bi_idx = bio->bi_idx; + b->bi_size = bio->bi_size; + b->bi_max = bio->bi_max; } return b; @@ -618,14 +357,15 @@ * bio_copy - create copy of a bio * @bio: bio to copy * @gfp_mask: allocation priority + * @copy: copy data to allocated bio * * Create a copy of a &bio. Caller will own the returned bio and * the actual data it points to. Reference count of returned * bio will be one. */ -struct bio *bio_copy(struct bio *bio, int gfp_mask) +struct bio *bio_copy(struct bio *bio, int gfp_mask, int copy) { - struct bio *b = bio_alloc(gfp_mask, bio->bi_io_vec->bvl_cnt); + struct bio *b = bio_alloc(gfp_mask, bio->bi_vcnt); unsigned long flags = 0; /* gcc silly */ int i; @@ -636,33 +376,37 @@ * iterate iovec list and alloc pages + copy data */ bio_for_each_segment(bv, bio, i) { - struct bio_vec *bbv = &b->bi_io_vec->bvl_vec[i]; + struct bio_vec *bbv = &b->bi_io_vec[i]; char *vfrom, *vto; bbv->bv_page = alloc_page(gfp_mask); if (bbv->bv_page == NULL) goto oom; + if (!copy) + goto fill_in; + if (gfp_mask & __GFP_WAIT) { vfrom = kmap(bv->bv_page); - vto = kmap(bv->bv_page); + vto = kmap(bbv->bv_page); } else { __save_flags(flags); __cli(); vfrom = kmap_atomic(bv->bv_page, KM_BIO_IRQ); - vto = kmap_atomic(bv->bv_page, KM_BIO_IRQ); + vto = kmap_atomic(bbv->bv_page, KM_BIO_IRQ); } - memcpy(vto + bv->bv_offset, vfrom + bv->bv_offset, bv->bv_len); + memcpy(vto + bbv->bv_offset, vfrom + bv->bv_offset, bv->bv_len); if (gfp_mask & __GFP_WAIT) { - kunmap(vto); - kunmap(vfrom); + kunmap(bbv->bv_page); + kunmap(bv->bv_page); } else { kunmap_atomic(vto, KM_BIO_IRQ); kunmap_atomic(vfrom, KM_BIO_IRQ); __restore_flags(flags); } +fill_in: bbv->bv_len = bv->bv_len; bbv->bv_offset = bv->bv_offset; } @@ -671,15 +415,15 @@ b->bi_dev = bio->bi_dev; b->bi_rw = bio->bi_rw; - b->bi_io_vec->bvl_cnt = bio->bi_io_vec->bvl_cnt; - b->bi_io_vec->bvl_size = bio->bi_io_vec->bvl_size; + b->bi_vcnt = bio->bi_vcnt; + b->bi_size = bio->bi_size; } return b; oom: while (i >= 0) { - __free_page(b->bi_io_vec->bvl_vec[i].bv_page); + __free_page(b->bi_io_vec[i].bv_page); i--; } @@ -712,23 +456,20 @@ static int bio_end_io_kio(struct bio *bio, int nr_sectors) { struct kiobuf *kio = (struct kiobuf *) bio->bi_private; - struct bio_vec_list *bv = bio->bi_io_vec; int uptodate, done; - BUG_ON(!bv); - done = 0; uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); do { - int sectors = bv->bvl_vec[bv->bvl_idx].bv_len >> 9; + int sectors = bio->bi_io_vec[bio->bi_idx].bv_len >> 9; nr_sectors -= sectors; - bv->bvl_idx++; + bio->bi_idx++; done = !end_kio_request(kio, uptodate); - if (bv->bvl_idx == bv->bvl_cnt) + if (bio->bi_idx == bio->bi_vcnt) done = 1; } while (!done && nr_sectors > 0); @@ -737,7 +478,6 @@ * all done */ if (done) { - bio_hash_remove(bio); bio_put(bio); return 0; } @@ -844,12 +584,12 @@ bio->bi_sector = sector; bio->bi_dev = dev; - bio->bi_io_vec->bvl_idx = 0; + bio->bi_idx = 0; bio->bi_flags |= 1 << BIO_PREBUILT; bio->bi_end_io = bio_end_io_kio; bio->bi_private = kio; - bvec = &bio->bi_io_vec->bvl_vec[0]; + bvec = bio->bi_io_vec; for (i = 0; i < nr_pages; i++, bvec++, map_i++) { int nbytes = PAGE_SIZE - offset; @@ -858,11 +598,11 @@ BUG_ON(kio->maplist[map_i] == NULL); - if (bio->bi_io_vec->bvl_size + nbytes > max_bytes) + if (bio->bi_size + nbytes > max_bytes) goto queue_io; - bio->bi_io_vec->bvl_cnt++; - bio->bi_io_vec->bvl_size += nbytes; + bio->bi_vcnt++; + bio->bi_size += nbytes; bvec->bv_page = kio->maplist[map_i]; bvec->bv_len = nbytes; @@ -931,7 +671,6 @@ struct biovec_pool *bp = &bvec_list[i]; size = bvec_pool_sizes[i] * sizeof(struct bio_vec); - size += sizeof(struct bio_vec_list); printk("biovec: init pool %d, %d entries, %d bytes\n", i, bvec_pool_sizes[i], size); @@ -962,29 +701,6 @@ biovec_init_pool(); -#ifdef BIO_HASH_PROFILING - memset(&bio_stats, 0, sizeof(bio_stats)); -#endif - - return 0; -} - -int bio_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) -{ -#ifdef BIO_HASH_PROFILING - switch (cmd) { - case BLKHASHPROF: - if (copy_to_user((struct bio_hash_stats *) arg, &bio_stats, sizeof(bio_stats))) - return -EFAULT; - break; - case BLKHASHCLEAR: - memset(&bio_stats, 0, sizeof(bio_stats)); - break; - default: - return -ENOTTY; - } - -#endif return 0; } @@ -993,7 +709,7 @@ EXPORT_SYMBOL(bio_alloc); EXPORT_SYMBOL(bio_put); EXPORT_SYMBOL(ll_rw_kio); -EXPORT_SYMBOL(bio_hash_remove); -EXPORT_SYMBOL(bio_hash_add); -EXPORT_SYMBOL(bio_hash_add_unique); EXPORT_SYMBOL(bio_endio); +EXPORT_SYMBOL(bio_init); +EXPORT_SYMBOL(bio_copy); +EXPORT_SYMBOL(bio_clone); diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/fs/minix/inode.c linux/fs/minix/inode.c --- /opt/kernel/linux-2.5.1-pre2/fs/minix/inode.c Wed Nov 28 21:56:29 2001 +++ linux/fs/minix/inode.c Wed Nov 28 08:35:05 2001 @@ -292,7 +292,7 @@ return 0; } -static int minix_get_block(struct inode *inode, long block, +static int minix_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create) { if (INODE_VERSION(inode) == MINIX_V1) diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/include/linux/bio.h linux/include/linux/bio.h --- /opt/kernel/linux-2.5.1-pre2/include/linux/bio.h Wed Nov 28 21:56:29 2001 +++ linux/include/linux/bio.h Wed Nov 28 21:31:56 2001 @@ -30,26 +30,6 @@ #endif /* - * hash profiling stuff.. - */ -#define BIO_HASH_PROFILING - -#define BLKHASHPROF _IOR(0x12,108,sizeof(struct bio_hash_stats)) -#define BLKHASHCLEAR _IO(0x12,109) - -#define MAX_PROFILE_BUCKETS 64 - -struct bio_hash_stats { - atomic_t nr_lookups; - atomic_t nr_hits; - atomic_t nr_inserts; - atomic_t nr_entries; - atomic_t max_entries; - atomic_t max_bucket_size; - atomic_t bucket_size[MAX_PROFILE_BUCKETS + 1]; -}; - -/* * was unsigned short, but we might as well be ready for > 64kB I/O pages */ struct bio_vec { @@ -58,37 +38,6 @@ unsigned int bv_offset; }; -struct bio_vec_list { - unsigned int bvl_cnt; /* how may bio_vec's */ - unsigned int bvl_idx; /* current index into bvl_vec */ - unsigned int bvl_size; /* total size in bytes */ - unsigned int bvl_max; /* max bvl_vecs we can hold, used - as index into pool */ - struct bio_vec bvl_vec[0]; /* the iovec array */ -}; - -typedef struct bio_hash_s { - struct bio_hash_s *next_hash; - struct bio_hash_s **pprev_hash; - unsigned long valid_counter; -} bio_hash_t; - -struct bio_hash_bucket { - rwlock_t lock; - bio_hash_t *hash; -} __attribute__((__aligned__(16))); - -#define BIO_HASH_BITS (bio_hash_bits) -#define BIO_HASH_SIZE (1UL << BIO_HASH_BITS) - -/* - * shamelessly stolen from the list.h implementation - */ -#define hash_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) -#define bio_hash_entry(ptr) \ - hash_entry((ptr), struct bio, bi_hash) - /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) @@ -96,26 +45,27 @@ struct bio { sector_t bi_sector; struct bio *bi_next; /* request queue link */ - bio_hash_t bi_hash; atomic_t bi_cnt; /* pin count */ kdev_t bi_dev; /* will be block device */ - struct bio_vec_list *bi_io_vec; unsigned long bi_flags; /* status, command, etc */ unsigned long bi_rw; /* bottom bits READ/WRITE, * top bits priority */ + + unsigned int bi_vcnt; /* how may bio_vec's */ + unsigned int bi_idx; /* current index into bvl_vec */ + unsigned int bi_size; /* total size in bytes */ + unsigned int bi_max; /* max bvl_vecs we can hold, + used as index into pool */ + + struct bio_vec *bi_io_vec; /* the actual vec list */ + int (*bi_end_io)(struct bio *bio, int nr_sectors); void *bi_private; - void *bi_hash_desc; /* cookie for hash */ - void (*bi_destructor)(struct bio *); /* destructor */ }; -#define BIO_SECTOR_BITS 9 -#define BIO_OFFSET_MASK ((1UL << (PAGE_CACHE_SHIFT - BIO_SECTOR_BITS)) - 1) -#define BIO_PAGE_MASK (PAGE_CACHE_SIZE - 1) - /* * bio flags */ @@ -125,8 +75,6 @@ #define BIO_PREBUILT 3 /* not merged big */ #define BIO_CLONED 4 /* doesn't own data */ -#define bio_is_hashed(bio) ((bio)->bi_hash.pprev_hash) - /* * bio bi_rw flags * @@ -142,12 +90,13 @@ * various member access, note that bio_data should of course not be used * on highmem page vectors */ -#define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec->bvl_vec[(idx)])) -#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_io_vec->bvl_idx) +#define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(bio)->bi_idx])) +#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_idx) #define bio_page(bio) bio_iovec((bio))->bv_page -#define bio_size(bio) ((bio)->bi_io_vec->bvl_size) +#define bio_size(bio) ((bio)->bi_size) +#define __bio_offset(bio, idx) bio_iovec_idx((bio), (idx))->bv_offset #define bio_offset(bio) bio_iovec((bio))->bv_offset -#define bio_sectors(bio) (bio_size((bio)) >> BIO_SECTOR_BITS) +#define bio_sectors(bio) (bio_size((bio)) >> 9) #define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_BARRIER)) @@ -170,15 +119,17 @@ * permanent PIO fall back, user is probably better off disabling highmem * I/O completely on that queue (see ide-dma for example) */ -#define bio_kmap(bio) kmap(bio_page((bio))) + bio_offset((bio)) -#define bio_kunmap(bio) kunmap(bio_page((bio))) +#define __bio_kmap(bio, idx) (kmap(bio_iovec_idx((bio), (idx))->bv_page) + bio_iovec_idx((bio), (idx))->bv_offset) +#define bio_kmap(bio) __bio_kmap((bio), (bio)->bi_idx) +#define __bio_kunmap(bio, idx) kunmap(bio_iovec_idx((bio), (idx))->bv_page) +#define bio_kunmap(bio) __bio_kunmap((bio), (bio)->bi_idx) #define BIO_CONTIG(bio, nxt) \ (bio_to_phys((bio)) + bio_size((bio)) == bio_to_phys((nxt))) -#define __BIO_PHYS_4G(addr1, addr2) \ - (((addr1) | 0xffffffff) == (((addr2) -1 ) | 0xffffffff)) -#define BIO_PHYS_4G(b1, b2) \ - __BIO_PHYS_4G(bio_to_phys((b1)), bio_to_phys((b2)) + bio_size((b2))) +#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ + (((addr1) | (mask)) == (((addr2) - 1) | (mask))) +#define BIO_SEG_BOUNDARY(q, b1, b2) \ + __BIO_SEG_BOUNDARY(bvec_to_phys(bio_iovec_idx((b1), (b1)->bi_cnt - 1)), bio_to_phys((b2)) + bio_size((b2)), (q)->seg_boundary_mask) typedef int (bio_end_io_t) (struct bio *, int); typedef void (bio_destructor_t) (struct bio *); @@ -186,8 +137,8 @@ #define bio_io_error(bio) bio_endio((bio), 0, bio_sectors((bio))) #define bio_for_each_segment(bvl, bio, i) \ - for (bvl = bio_iovec((bio)), i = (bio)->bi_io_vec->bvl_idx; \ - i < (bio)->bi_io_vec->bvl_cnt; \ + for (bvl = bio_iovec((bio)), i = (bio)->bi_idx; \ + i < (bio)->bi_vcnt; \ bvl++, i++) /* @@ -209,21 +160,12 @@ extern struct bio *bio_alloc(int, int); extern void bio_put(struct bio *); -/* - * the hash stuff is pretty closely tied to the request queue (needed for - * locking etc anyway, and it's in no way an attempt at a generic hash) - */ -struct request_queue; - -extern inline void bio_hash_remove(struct bio *); -extern inline void bio_hash_add(struct bio *, void *, unsigned int); -extern inline struct bio *bio_hash_find(kdev_t, sector_t, unsigned int); -extern inline int bio_hash_add_unique(struct bio *, void *, unsigned int); -extern void bio_hash_invalidate(struct request_queue *, kdev_t); extern int bio_endio(struct bio *, int, int); extern struct bio *bio_clone(struct bio *, int); -extern struct bio *bio_copy(struct bio *, int); +extern struct bio *bio_copy(struct bio *, int, int); + +extern inline void bio_init(struct bio *); extern int bio_ioctl(kdev_t, unsigned int, unsigned long); diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/include/linux/blk.h linux/include/linux/blk.h --- /opt/kernel/linux-2.5.1-pre2/include/linux/blk.h Wed Nov 28 21:56:29 2001 +++ linux/include/linux/blk.h Wed Nov 28 21:32:42 2001 @@ -83,11 +83,6 @@ static inline void blkdev_dequeue_request(struct request *req) { - if (req->bio) - bio_hash_remove(req->bio); - if (req->biotail) - bio_hash_remove(req->biotail); - list_del(&req->queuelist); } diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/include/linux/blkdev.h linux/include/linux/blkdev.h --- /opt/kernel/linux-2.5.1-pre2/include/linux/blkdev.h Wed Nov 28 21:56:29 2001 +++ linux/include/linux/blkdev.h Wed Nov 28 21:32:41 2001 @@ -127,9 +127,9 @@ unsigned short hardsect_size; unsigned int max_segment_size; - wait_queue_head_t queue_wait; + unsigned long seg_boundary_mask; - unsigned int hash_valid_counter; + wait_queue_head_t queue_wait; }; #define RQ_INACTIVE (-1) @@ -140,6 +140,7 @@ #define QUEUE_FLAG_PLUGGED 0 /* queue is plugged */ #define QUEUE_FLAG_NOSPLIT 1 /* can process bio over several goes */ +#define QUEUE_FLAG_CLUSTER 2 /* cluster several segments into 1 */ #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) @@ -166,11 +167,6 @@ if (rq) { rq->inactive = 0; wmb(); - - if (rq->bio) - bio_hash_remove(rq->bio); - if (rq->biotail) - bio_hash_remove(rq->biotail); } return rq; @@ -187,7 +183,7 @@ { struct page *page = bio_page(*bio); - if (page - page->zone->zone_mem_map > q->bounce_pfn) + if ((page - page->zone->zone_mem_map) + (page->zone->zone_start_paddr >> PAGE_SHIFT) < q->bounce_pfn) create_bounce(bio, q->bounce_gfp); } @@ -235,7 +231,7 @@ /* * Access functions for manipulating queue properties */ -extern int blk_init_queue(request_queue_t *, request_fn_proc *, char *); +extern int blk_init_queue(request_queue_t *, request_fn_proc *); extern void blk_cleanup_queue(request_queue_t *); extern void blk_queue_make_request(request_queue_t *, make_request_fn *); extern void blk_queue_bounce_limit(request_queue_t *, unsigned long long); @@ -243,6 +239,7 @@ extern void blk_queue_max_segments(request_queue_t *q, unsigned short); extern void blk_queue_max_segment_size(request_queue_t *q, unsigned int); extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short); +extern void blk_queue_segment_boundary(request_queue_t *q, unsigned long); extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); extern void generic_unplug_device(void *); diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/include/linux/elevator.h linux/include/linux/elevator.h --- /opt/kernel/linux-2.5.1-pre2/include/linux/elevator.h Wed Nov 28 21:56:29 2001 +++ linux/include/linux/elevator.h Wed Nov 28 08:51:34 2001 @@ -33,13 +33,6 @@ elevator_init_fn *elevator_init_fn; elevator_exit_fn *elevator_exit_fn; - - /* - * per-elevator private data - */ - void *elevator_data; - - char queue_name[16]; }; int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); @@ -66,7 +59,7 @@ #define BLKELVGET _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t)) #define BLKELVSET _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t)) -extern int elevator_init(request_queue_t *, elevator_t *, elevator_t, char *); +extern int elevator_init(request_queue_t *, elevator_t *, elevator_t); extern void elevator_exit(request_queue_t *, elevator_t *); /* diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/include/linux/fs.h linux/include/linux/fs.h --- /opt/kernel/linux-2.5.1-pre2/include/linux/fs.h Wed Nov 28 21:56:29 2001 +++ linux/include/linux/fs.h Wed Nov 28 21:32:30 2001 @@ -205,7 +205,6 @@ extern void update_atime (struct inode *); #define UPDATE_ATIME(inode) update_atime (inode) -extern void bio_hash_init(unsigned long); extern void buffer_init(unsigned long); extern void inode_init(unsigned long); extern void mnt_init(unsigned long); diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/init/main.c linux/init/main.c --- /opt/kernel/linux-2.5.1-pre2/init/main.c Wed Nov 28 21:56:29 2001 +++ linux/init/main.c Wed Nov 28 09:08:12 2001 @@ -600,7 +600,6 @@ vfs_caches_init(mempages); buffer_init(mempages); page_cache_init(mempages); - bio_hash_init(mempages); #if defined(CONFIG_ARCH_S390) ccwcache_init(); #endif diff -ur -X /home/axboe/exclude /opt/kernel/linux-2.5.1-pre2/mm/highmem.c linux/mm/highmem.c --- /opt/kernel/linux-2.5.1-pre2/mm/highmem.c Wed Nov 28 21:56:29 2001 +++ linux/mm/highmem.c Wed Nov 28 21:34:36 2001 @@ -264,16 +264,17 @@ __initcall(init_emergency_pool); -static inline void bounce_end_io (struct bio *bio, int nr_sectors) +static inline int bounce_end_io (struct bio *bio, int nr_sectors) { struct bio *bio_orig = bio->bi_private; struct page *page = bio_page(bio); unsigned long flags; + int ret; if (test_bit(BIO_UPTODATE, &bio->bi_flags)) - set_bit(BIO_UPTODATE, bio_orig->bi_flags); + set_bit(BIO_UPTODATE, &bio_orig->bi_flags); - bio_orig->bi_end_io(bio_orig, nr_sectors); + ret = bio_orig->bi_end_io(bio_orig, nr_sectors); spin_lock_irqsave(&emergency_lock, flags); if (nr_emergency_pages >= POOL_SIZE) { @@ -289,23 +290,23 @@ spin_unlock_irqrestore(&emergency_lock, flags); } - bio_hash_remove(bio); bio_put(bio); + return ret; } -static void bounce_end_io_write (struct bio *bio, int nr_sectors) +static int bounce_end_io_write(struct bio *bio, int nr_sectors) { - bounce_end_io(bio, nr_sectors); + return bounce_end_io(bio, nr_sectors); } -static void bounce_end_io_read (struct bio *bio, int nr_sectors) +static int bounce_end_io_read (struct bio *bio, int nr_sectors) { struct bio *bio_orig = bio->bi_private; if (test_bit(BIO_UPTODATE, &bio->bi_flags)) copy_to_high_bio_irq(bio_orig, bio); - bounce_end_io(bio, nr_sectors); + return bounce_end_io(bio, nr_sectors); } struct page *alloc_bounce_page(int gfp_mask) @@ -350,31 +351,42 @@ { struct page *page; struct bio *bio; + int i, rw = bio_data_dir(*bio_orig); - bio = bio_alloc(GFP_NOHIGHIO, 1); + BUG_ON((*bio_orig)->bi_idx); - /* - * wasteful for 1kB fs, but machines with lots of ram are less likely - * to have 1kB fs for anything that needs to go fast. so all things - * considered, it should be ok. - */ - page = alloc_bounce_page(gfp_mask); + bio = bio_alloc(GFP_NOHIGHIO, (*bio_orig)->bi_vcnt); bio->bi_dev = (*bio_orig)->bi_dev; bio->bi_sector = (*bio_orig)->bi_sector; bio->bi_rw = (*bio_orig)->bi_rw; - bio->bi_io_vec->bvl_vec[0].bv_page = page; - bio->bi_io_vec->bvl_vec[0].bv_len = bio_size(*bio_orig); - bio->bi_io_vec->bvl_vec[0].bv_offset = 0; + bio->bi_vcnt = (*bio_orig)->bi_vcnt; + bio->bi_idx = 0; + bio->bi_size = (*bio_orig)->bi_size; - bio->bi_private = *bio_orig; - - if (bio_rw(bio) == WRITE) { + if (rw & WRITE) bio->bi_end_io = bounce_end_io_write; - copy_from_high_bio(bio, *bio_orig); - } else + else bio->bi_end_io = bounce_end_io_read; + for (i = 0; i < bio->bi_vcnt; i++) { + char *vto, *vfrom; + + page = alloc_bounce_page(gfp_mask); + + bio->bi_io_vec[i].bv_page = page; + bio->bi_io_vec[i].bv_len = (*bio_orig)->bi_io_vec[i].bv_len; + bio->bi_io_vec[i].bv_offset = 0; + + if (rw & WRITE) { + vto = page_address(page); + vfrom = __bio_kmap(*bio_orig, i); + memcpy(vto, vfrom + __bio_offset(*bio_orig, i), bio->bi_io_vec[i].bv_len); + __bio_kunmap(bio, i); + } + } + + bio->bi_private = *bio_orig; *bio_orig = bio; }