===== drivers/block/cciss.c 1.63 vs edited ===== --- 1.63/drivers/block/cciss.c Fri Oct 18 12:55:58 2002 +++ edited/drivers/block/cciss.c Sun Oct 27 15:35:37 2002 @@ -1681,6 +1681,28 @@ end_that_request_last(cmd->rq); } +struct cciss_map_state { + CommandList_struct *command; + int data_dir; +}; + +static void cciss_map_sg(request_queue_t *q, struct scatterlist *sg, int nseg, + void *cookie) +{ + struct cciss_map_state *s = cookie; + CommandList_struct *c = s->command; + ctlr_info_t *h= q->queuedata; + u64bit temp64; + + c->SG[nseg].Len = sg->length; + temp64.val = (__u64) pci_map_page(h->pdev, sg->page, sg->offset, + sg->length, s->data_dir); + + c->SG[nseg].Addr.lower = temp64.val32.lower; + c->SG[nseg].Addr.upper = temp64.val32.upper; + c->SG[nseg].Ext = 0; // we are not chaining +} + /* * Get a request and submit it to the controller. */ @@ -1690,10 +1712,8 @@ CommandList_struct *c; int start_blk, seg; struct request *creq; - u64bit temp64; - struct scatterlist tmp_sg[MAXSGENTRIES]; + struct cciss_map_state map_state; drive_info_struct *drv; - int i, dir; if (blk_queue_plugged(q)) goto startio; @@ -1735,24 +1755,16 @@ (int) creq->nr_sectors); #endif /* CCISS_DEBUG */ - seg = blk_rq_map_sg(q, creq, tmp_sg); - /* get the DMA records for the setup */ if (c->Request.Type.Direction == XFER_READ) - dir = PCI_DMA_FROMDEVICE; + map_state.data_dir = PCI_DMA_FROMDEVICE; else - dir = PCI_DMA_TODEVICE; + map_state.data_dir = PCI_DMA_TODEVICE; + + map_state.command = c; + + seg = blk_rq_map_consume(q, creq, cciss_map_sg, &map_state); - for (i=0; iSG[i].Len = tmp_sg[i].length; - temp64.val = (__u64) pci_map_page(h->pdev, tmp_sg[i].page, - tmp_sg[i].offset, tmp_sg[i].length, - dir); - c->SG[i].Addr.lower = temp64.val32.lower; - c->SG[i].Addr.upper = temp64.val32.upper; - c->SG[i].Ext = 0; // we are not chaining - } /* track how many SG entries we are using */ if( seg > h->maxSG) h->maxSG = seg; ===== drivers/block/elevator.c 1.29 vs edited ===== --- 1.29/drivers/block/elevator.c Fri Oct 4 15:58:57 2002 +++ edited/drivers/block/elevator.c Sun Oct 27 22:22:31 2002 @@ -272,13 +272,27 @@ e->elevator_merge_req_fn(q, rq, next); } -/* - * add_request and next_request are required to be supported, naturally - */ -void __elv_add_request(request_queue_t *q, struct request *rq, - struct list_head *insert_here) +void __elv_add_request(request_queue_t *q, struct request *rq, int at_end, + int plug) { - q->elevator.elevator_add_req_fn(q, rq, insert_here); + struct list_head *insert = &q->queue_head; + + if (at_end) + insert = insert->prev; + if (plug) + blk_plug_device(q); + + q->elevator.elevator_add_req_fn(q, rq, insert); +} + +void elv_add_request(request_queue_t *q, struct request *rq, int at_end, + int plug) +{ + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + __elv_add_request(q, rq, at_end, plug); + spin_unlock_irqrestore(q->queue_lock, flags); } static inline struct request *__elv_next_request(request_queue_t *q) @@ -289,8 +303,14 @@ struct request *elv_next_request(request_queue_t *q) { struct request *rq; + int ret; while ((rq = __elv_next_request(q))) { + /* + * just mark as started even if we don't start it, a request + * that has been delayed should not be passed by new incoming + * requests + */ rq->flags |= REQ_STARTED; if (&rq->queuelist == q->last_merge) @@ -299,20 +319,22 @@ if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) break; - /* - * all ok, break and return it - */ - if (!q->prep_rq_fn(q, rq)) + ret = q->prep_rq_fn(q, rq); + if (ret == BLKPREP_OK) { break; - - /* - * prep said no-go, kill it - */ - blkdev_dequeue_request(rq); - if (end_that_request_first(rq, 0, rq->nr_sectors)) - BUG(); - - end_that_request_last(rq); + } else if (ret == BLKPREP_DEFER) { + rq = NULL; + break; + } else if (ret == BLKPREP_KILL) { + blkdev_dequeue_request(rq); + rq->flags |= REQ_QUIET; + while (end_that_request_first(rq, 0, rq->nr_sectors)) + ; + end_that_request_last(rq); + } else { + printk("%s: bad return=%d\n", __FUNCTION__, ret); + break; + } } return rq; @@ -322,6 +344,16 @@ { elevator_t *e = &q->elevator; + /* + * the main clearing point for q->last_merge is on retrieval of + * request by driver (it calls elv_next_request()), but it _can_ + * also happen here if a request is added to the queue but later + * deleted without ever being given to driver (merged with another + * request). + */ + if (&rq->queuelist == q->last_merge) + q->last_merge = NULL; + if (e->elevator_remove_req_fn) e->elevator_remove_req_fn(q, rq); } @@ -357,6 +389,7 @@ EXPORT_SYMBOL(elevator_noop); +EXPORT_SYMBOL(elv_add_request); EXPORT_SYMBOL(__elv_add_request); EXPORT_SYMBOL(elv_next_request); EXPORT_SYMBOL(elv_remove_request); ===== drivers/block/ll_rw_blk.c 1.123 vs edited ===== --- 1.123/drivers/block/ll_rw_blk.c Fri Oct 18 19:41:37 2002 +++ edited/drivers/block/ll_rw_blk.c Mon Oct 28 11:43:27 2002 @@ -242,6 +242,7 @@ q->backing_dev_info.state = 0; blk_queue_max_sectors(q, MAX_SECTORS); blk_queue_hardsect_size(q, 512); + blk_queue_dma_alignment(q, 511); /* * by default assume old behaviour and bounce for any highmem page @@ -408,6 +409,21 @@ q->seg_boundary_mask = mask; } +/** + * blk_queue_dma_alignment - set dma length and memory alignment + * @q: the request queue for the device + * @dma_mask: alignment mask + * + * Description: + * Set required memory and length aligment for direct dma transactions. + * This is used when buiding direct io requests for the queue. + * + **/ +void blk_queue_dma_alignment(request_queue_t *q, int mask) +{ + q->dma_alignment = mask; +} + void blk_queue_assign_lock(request_queue_t *q, spinlock_t *lock) { spin_lock_init(lock); @@ -549,7 +565,7 @@ return; } - list_del(&rq->queuelist); + list_del_init(&rq->queuelist); rq->flags &= ~REQ_QUEUED; rq->tag = -1; @@ -633,13 +649,13 @@ if (rq->tag == -1) { printk("bad tag found on list\n"); - list_del(&rq->queuelist); + list_del_init(&rq->queuelist); rq->flags &= ~REQ_QUEUED; } else blk_queue_end_tag(q, rq); rq->flags &= ~REQ_STARTED; - elv_add_request(q, rq, 0); + __elv_add_request(q, rq, 0, 0); } } @@ -655,14 +671,19 @@ "REQ_PC", "REQ_BLOCK_PC", "REQ_SENSE", + "REQ_FAILED", + "REQ_QUIET", "REQ_SPECIAL" + "REQ_DRIVE_CMD", + "REQ_DRIVE_TASK", + "REQ_DRIVE_TASKFILE", }; void blk_dump_rq_flags(struct request *rq, char *msg) { int bit; - printk("%s: dev %02x:%02x: ", msg, major(rq->rq_dev), minor(rq->rq_dev)); + printk("%s: dev %02x:%02x: flags = ", msg, major(rq->rq_dev), minor(rq->rq_dev)); bit = 0; do { if (rq->flags & (1 << bit)) @@ -670,10 +691,17 @@ bit++; } while (bit < __REQ_NR_BITS); - printk("sector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, + printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector, rq->nr_sectors, rq->current_nr_sectors); - printk("bio %p, biotail %p\n", rq->bio, rq->biotail); + printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len); + + if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) { + printk("cdb: "); + for (bit = 0; bit < sizeof(rq->cmd); bit++) + printk("%02x ", rq->cmd[bit]); + printk("\n"); + } } void blk_recount_segments(request_queue_t *q, struct bio *bio) @@ -765,61 +793,123 @@ return 0; } -/* - * map a request to scatterlist, return number of sg entries setup. Caller - * must make sure sg can hold rq->nr_phys_segments entries - */ -int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg) +static int __blk_rq_map(request_queue_t *q, struct request *rq, + consume_sg_fn *consume_fn, void *cookie) { struct bio_vec *bvec, *bvprv; + struct scatterlist sg; struct bio *bio; - int nsegs, i, cluster; - - nsegs = 0; - cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); + int nsegs, i; + const int cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER); + const int max_seg = q->max_segment_size; /* * for each bio in rq */ - bvprv = NULL; + nsegs = 0; rq_for_each_bio(bio, rq) { /* * for each segment in bio */ + bvprv = NULL; bio_for_each_segment(bvec, bio, i) { - int nbytes = bvec->bv_len; - - if (bvprv && cluster) { - if (sg[nsegs - 1].length + nbytes > q->max_segment_size) - goto new_segment; - - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) - goto new_segment; - - sg[nsegs - 1].length += nbytes; - } else { -new_segment: - memset(&sg[nsegs],0,sizeof(struct scatterlist)); - sg[nsegs].page = bvec->bv_page; - sg[nsegs].length = nbytes; - sg[nsegs].offset = bvec->bv_offset; + /* + * if length does not exceed max segment size + * and does not straddle a physical memory + * boundary and does not start a new physical + * segment, cluster on to previous one + */ + if ((bvprv && cluster) + && (sg.length + bvec->bv_len <= max_seg) + && BIOVEC_PHYS_MERGEABLE(bvprv, bvec) + && BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) { + sg.length += bvec->bv_len; + continue; + } + /* + * could/must not cluster, start a new segment making + * sure to consume the previous one first + */ + if (bvprv) { + consume_fn(q, &sg, nsegs, cookie); nsegs++; } + + sg.page = bvec->bv_page; + sg.offset = bvec->bv_offset; + sg.length = bvec->bv_len; bvprv = bvec; + } /* segments in bio */ + + /* + * consume leftover segment, if any + */ + if (bvprv) { + consume_fn(q, &sg, nsegs, cookie); + nsegs++; + } + } /* bios in rq */ return nsegs; } +static void blk_consume_sg(request_queue_t *q, struct scatterlist *sg, + int segment, void *cookie) +{ + struct scatterlist *sglist = cookie; + + sglist[segment].page = sg->page; + sglist[segment].offset = sg->offset; + sglist[segment].length = sg->length; +} + +/** + * blk_rq_map_sg - map a request to a scatterlist + * @q: The &request_queue_t the request belongs to + * @rq: The request + * @sg: The scatterlist to map to + * + * Description: + * + * Map a request to a scatterlist, returning the number of sg entries + * setup. Caller must make sure that @sg can hold ->nr_phys_segment + * worth of entries. + **/ +int blk_rq_map_sg(request_queue_t *q, struct request *rq,struct scatterlist *sg) +{ + return __blk_rq_map(q, rq, blk_consume_sg, sg); +} + +/** + * blk_rq_map_consume - map a request, calling a consume function for each entry + * @q: The &request_queue_t the request belongs to + * @rq: The request + * @consume_fn: Function to call for each sg mapping + * @cookie: Per-mapping cookie + * + * Description: + * + * Like blk_rq_map_sg(), but call the @consume_fn for each sg entry + * prepared. Some drivers find this more handy if their internal + * scatterlist is different from the Linux scatterlist since they then + * don't have to allocate (on stack or dynamically) a large dummy scatterlist + * just for the intermediate request mapping. Drivers can use @cookie to + * separate various mappings from each other, typically by passing the actual + * hardware command here. + **/ +int blk_rq_map_consume(request_queue_t *q, struct request *rq, + consume_sg_fn *consume_fn, void *cookie) +{ + return __blk_rq_map(q, rq, consume_fn, cookie); +} + /* * the standard queue merge functions, can be overridden with device * specific ones if so desired */ - static inline int ll_new_mergeable(request_queue_t *q, struct request *req, struct bio *bio) @@ -1104,7 +1194,7 @@ while (!list_empty(head)) { rq = list_entry(head->next, struct request, queuelist); - list_del(&rq->queuelist); + list_del_init(&rq->queuelist); kmem_cache_free(request_cachep, rq); i++; } @@ -1264,13 +1354,20 @@ if (!list_empty(&rl->free)) { rq = blkdev_free_rq(&rl->free); - list_del(&rq->queuelist); + list_del_init(&rq->queuelist); + rq->ref_count = 1; rl->count--; if (rl->count < queue_congestion_on_threshold()) set_queue_congested(q, rw); rq->flags = 0; rq->rq_status = RQ_ACTIVE; + rq->errors = 0; rq->special = NULL; + rq->buffer = NULL; + rq->data = NULL; + rq->sense = NULL; + rq->waiting = NULL; + rq->bio = rq->biotail = NULL; rq->q = q; rq->rl = rl; } @@ -1386,7 +1483,7 @@ if (blk_rq_tagged(rq)) blk_queue_end_tag(q, rq); - _elv_add_request(q, rq, !at_head, 0); + __elv_add_request(q, rq, !at_head, 0); q->request_fn(q); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -1466,26 +1563,22 @@ * elevator indicated where it wants this request to be * inserted at elevator_merge time */ - __elv_add_request(q, req, insert_here); + __elv_add_request_pos(q, req, insert_here); } -/* - * Must be called with queue lock held and interrupts disabled - */ -void blk_put_request(struct request *req) +void __blk_put_request(request_queue_t *q, struct request *req) { struct request_list *rl = req->rl; - request_queue_t *q = req->q; + + if (unlikely(--req->ref_count)) + return; + if (unlikely(!q)) + return; req->rq_status = RQ_INACTIVE; req->q = NULL; req->rl = NULL; - if (q) { - if (q->last_merge == &req->queuelist) - q->last_merge = NULL; - } - /* * Request may not have originated from ll_rw_blk. if not, * it didn't come out of our reserved rq pools @@ -1493,6 +1586,8 @@ if (rl) { int rw = 0; + BUG_ON(!list_empty(&req->queuelist)); + list_add(&req->queuelist, &rl->free); if (rl == &q->rq[WRITE]) @@ -1510,6 +1605,23 @@ } } +void blk_put_request(struct request *req) +{ + request_queue_t *q = req->q; + + /* + * if req->q isn't set, this request didnt originate from the + * block layer, so it's safe to just disregard it + */ + if (q) { + unsigned long flags; + + spin_lock_irqsave(q->queue_lock, flags); + __blk_put_request(q, req); + spin_unlock_irqrestore(q->queue_lock, flags); + } +} + /** * blk_congestion_wait - wait for a queue to become uncongested * @rw: READ or WRITE @@ -1568,7 +1680,7 @@ elv_merge_requests(q, req, next); blkdev_dequeue_request(next); - blk_put_request(next); + __blk_put_request(q, next); } } @@ -1761,7 +1873,7 @@ add_request(q, req, insert_here); out: if (freereq) - blk_put_request(freereq); + __blk_put_request(q, freereq); spin_unlock_irq(q->queue_lock); return 0; @@ -1891,7 +2003,6 @@ { int count = bio_sectors(bio); - BUG_ON(!bio->bi_end_io); BIO_BUG_ON(!bio->bi_size); BIO_BUG_ON(!bio->bi_io_vec); bio->bi_rw = rw; @@ -1908,6 +2019,9 @@ struct bio *bio; int nr_phys_segs, nr_hw_segs; + if (!rq->bio) + return; + rq->buffer = bio_data(rq->bio); nr_phys_segs = nr_hw_segs = 0; @@ -1925,7 +2039,7 @@ inline void blk_recalc_rq_sectors(struct request *rq, int nsect) { - if (rq->bio) { + if (blk_fs_request(rq)) { rq->hard_sector += nsect; rq->nr_sectors = rq->hard_nr_sectors -= nsect; rq->sector = rq->hard_sector; @@ -1944,27 +2058,19 @@ } } -/** - * end_that_request_first - end I/O on one buffer. - * @req: the request being processed - * @uptodate: 0 for I/O error - * @nr_sectors: number of sectors to end I/O on - * - * Description: - * Ends I/O on a number of sectors attached to @req, and sets it up - * for the next range of segments (if any) in the cluster. - * - * Return: - * 0 - we are done with this request, call end_that_request_last() - * 1 - still buffers pending for this request - **/ - -int end_that_request_first(struct request *req, int uptodate, int nr_sectors) +static int __end_that_request_first(struct request *req, int uptodate, + int nr_bytes) { - int total_nsect = 0, error = 0; + int total_bytes, bio_nbytes, error = 0, next_idx = 0; struct bio *bio; - req->errors = 0; + /* + * for a REQ_BLOCK_PC request, we want to carry any eventual + * sense key with us all the way through + */ + if (!blk_pc_request(req)) + req->errors = 0; + if (!uptodate) { error = -EIO; if (!(req->flags & REQ_QUIET)) @@ -1973,56 +2079,56 @@ (unsigned long long)req->sector); } + total_bytes = bio_nbytes = 0; while ((bio = req->bio)) { - int new_bio = 0, nsect; + int nbytes; + + if (nr_bytes >= bio->bi_size) { + req->bio = bio->bi_next; + nbytes = bio->bi_size; + bio_endio(bio, nbytes, error); + next_idx = 0; + bio_nbytes = 0; + } else { + int idx = bio->bi_idx + next_idx; - if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { - printk("%s: bio idx %d >= vcnt %d\n", __FUNCTION__, + if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { + blk_dump_rq_flags(req, "__end_that"); + printk("%s: bio idx %d >= vcnt %d\n", + __FUNCTION__, bio->bi_idx, bio->bi_vcnt); - break; - } + break; + } - BIO_BUG_ON(bio_iovec(bio)->bv_len > bio->bi_size); + nbytes = bio_iovec_idx(bio, idx)->bv_len; + BIO_BUG_ON(nbytes > bio->bi_size); - /* - * not a complete bvec done - */ - nsect = bio_iovec(bio)->bv_len >> 9; - if (unlikely(nsect > nr_sectors)) { - int partial = nr_sectors << 9; - - bio_iovec(bio)->bv_offset += partial; - bio_iovec(bio)->bv_len -= partial; - bio_endio(bio, partial, error); - total_nsect += nr_sectors; - break; - } + /* + * not a complete bvec done + */ + if (unlikely(nbytes > nr_bytes)) { + bio_iovec(bio)->bv_offset += nr_bytes; + bio_iovec(bio)->bv_len -= nr_bytes; + bio_nbytes += nr_bytes; + total_bytes += nr_bytes; + break; + } - /* - * we are ending the last part of the bio, advance req pointer - */ - if ((nsect << 9) >= bio->bi_size) { - req->bio = bio->bi_next; - new_bio = 1; + /* + * advance to the next vector + */ + next_idx++; + bio_nbytes += nbytes; } - bio_endio(bio, nsect << 9, error); - - total_nsect += nsect; - nr_sectors -= nsect; - - /* - * if we didn't advance the req->bio pointer, advance bi_idx - * to indicate we are now on the next bio_vec - */ - if (!new_bio) - bio->bi_idx++; + total_bytes += nbytes; + nr_bytes -= nbytes; if ((bio = req->bio)) { /* * end more in this run, or just return 'not-done' */ - if (unlikely(nr_sectors <= 0)) + if (unlikely(nr_bytes <= 0)) break; } } @@ -2036,17 +2142,64 @@ /* * if the request wasn't completed, update state */ - blk_recalc_rq_sectors(req, total_nsect); + if (bio_nbytes) { + bio_endio(bio, bio_nbytes, error); + req->bio->bi_idx += next_idx; + } + + blk_recalc_rq_sectors(req, total_bytes >> 9); blk_recalc_rq_segments(req); return 1; } +/** + * end_that_request_first - end I/O on a request + * @req: the request being processed + * @uptodate: 0 for I/O error + * @nr_sectors: number of sectors to end I/O on + * + * Description: + * Ends I/O on a number of sectors attached to @req, and sets it up + * for the next range of segments (if any) in the cluster. + * + * Return: + * 0 - we are done with this request, call end_that_request_last() + * 1 - still buffers pending for this request + **/ +int end_that_request_first(struct request *req, int uptodate, int nr_sectors) +{ + return __end_that_request_first(req, uptodate, nr_sectors << 9); +} + +/** + * end_that_request_chunk - end I/O on a request + * @req: the request being processed + * @uptodate: 0 for I/O error + * @nr_bytes: number of bytes to complete + * + * Description: + * Ends I/O on a number of bytes attached to @req, and sets it up + * for the next range of segments (if any). Like end_that_request_first(), + * but deals with bytes instead of sectors. + * + * Return: + * 0 - we are done with this request, call end_that_request_last() + * 1 - still buffers pending for this request + **/ +int end_that_request_chunk(struct request *req, int uptodate, int nr_bytes) +{ + return __end_that_request_first(req, uptodate, nr_bytes); +} + +/* + * queue lock must be held + */ void end_that_request_last(struct request *req) { if (req->waiting) complete(req->waiting); - blk_put_request(req); + __blk_put_request(req->q, req); } int __init blk_dev_init(void) @@ -2092,6 +2245,7 @@ }; EXPORT_SYMBOL(end_that_request_first); +EXPORT_SYMBOL(end_that_request_chunk); EXPORT_SYMBOL(end_that_request_last); EXPORT_SYMBOL(blk_init_queue); EXPORT_SYMBOL(bdev_get_queue); @@ -2112,7 +2266,9 @@ EXPORT_SYMBOL(blk_queue_max_segment_size); EXPORT_SYMBOL(blk_queue_hardsect_size); EXPORT_SYMBOL(blk_queue_segment_boundary); +EXPORT_SYMBOL(blk_queue_dma_alignment); EXPORT_SYMBOL(blk_rq_map_sg); +EXPORT_SYMBOL(blk_rq_map_consume); EXPORT_SYMBOL(blk_nohighio); EXPORT_SYMBOL(blk_dump_rq_flags); EXPORT_SYMBOL(submit_bio); ===== drivers/block/scsi_ioctl.c 1.12 vs edited ===== --- 1.12/drivers/block/scsi_ioctl.c Tue Oct 15 20:50:18 2002 +++ edited/drivers/block/scsi_ioctl.c Mon Oct 28 13:41:48 2002 @@ -29,25 +29,36 @@ #include #include #include +#include +#include "../scsi/scsi.h" #include +#include #include -int blk_do_rq(request_queue_t *q, struct request *rq) +#define BLK_DEFAULT_TIMEOUT (60 * HZ) + +int blk_do_rq(request_queue_t *q, struct block_device *bdev, struct request *rq) { DECLARE_COMPLETION(wait); int err = 0; + rq->rq_dev = to_kdev_t(bdev->bd_dev); + rq->rq_disk = bdev->bd_disk; + + /* + * we need an extra reference to the request, so we can look at + * it after io completion + */ + rq->ref_count++; + rq->flags |= REQ_NOMERGE; rq->waiting = &wait; - elv_add_request(q, rq, 1); + elv_add_request(q, rq, 1, 1); generic_unplug_device(q); wait_for_completion(&wait); - /* - * for now, never retry anything - */ if (rq->errors) err = -EIO; @@ -74,42 +85,52 @@ static int sg_get_timeout(request_queue_t *q) { - return HZ; + return q->sg_timeout; } static int sg_set_timeout(request_queue_t *q, int *p) { - int timeout; - int error = get_user(timeout, p); - return error; -} + int timeout, err = get_user(timeout, p); + + if (!err) + q->sg_timeout = timeout; -static int reserved_size = 0; + return err; +} static int sg_get_reserved_size(request_queue_t *q, int *p) { - return put_user(reserved_size, p); + return put_user(q->sg_reserved_size, p); } static int sg_set_reserved_size(request_queue_t *q, int *p) { - int size; - int error = get_user(size, p); - if (!error) - reserved_size = size; - return error; + int size, err = get_user(size, p); + + if (!err) + q->sg_reserved_size = size; + + return err; } +/* + * will always return that we are ATAPI even for a real SCSI drive, I'm not + * so sure this is worth doing anything about (why would you care??) + */ static int sg_emulated_host(request_queue_t *q, int *p) { return put_user(1, p); } -static int sg_io(request_queue_t *q, struct sg_io_hdr *uptr) +static int sg_io(request_queue_t *q, struct block_device *bdev, + struct sg_io_hdr *uptr) { - int err; + unsigned long uaddr, start_time; + int err, reading, writing, nr_sectors; struct sg_io_hdr hdr; struct request *rq; + struct bio *bio; + char sense[24]; void *buffer; if (!access_ok(VERIFY_WRITE, uptr, sizeof(*uptr))) @@ -117,47 +138,262 @@ if (copy_from_user(&hdr, uptr, sizeof(*uptr))) return -EFAULT; - if ( hdr.cmd_len > sizeof(rq->cmd) ) + if (hdr.interface_id != 'S') + return -EINVAL; + if (hdr.cmd_len > sizeof(rq->cmd)) + return -EINVAL; + if (!access_ok(VERIFY_READ, hdr.cmdp, hdr.cmd_len)) + return -EFAULT; + + if (hdr.dxfer_len > 65536) return -EINVAL; + /* + * we'll do that later + */ + if (hdr.iovec_count) + return -EOPNOTSUPP; + + nr_sectors = 0; + reading = writing = 0; buffer = NULL; + bio = NULL; if (hdr.dxfer_len) { unsigned int bytes = (hdr.dxfer_len + 511) & ~511; switch (hdr.dxfer_direction) { default: return -EINVAL; + case SG_DXFER_TO_FROM_DEV: + reading = 1; + /* fall through */ case SG_DXFER_TO_DEV: + writing = 1; + break; case SG_DXFER_FROM_DEV: - case SG_DXFER_TO_FROM_DEV: + reading = 1; break; } - buffer = kmalloc(bytes, GFP_USER); - if (!buffer) - return -ENOMEM; - if (hdr.dxfer_direction == SG_DXFER_TO_DEV || - hdr.dxfer_direction == SG_DXFER_TO_FROM_DEV) - copy_from_user(buffer, hdr.dxferp, hdr.dxfer_len); + + uaddr = (unsigned long) hdr.dxferp; + if (writing && !access_ok(VERIFY_WRITE, uaddr, bytes)) + return -EFAULT; + else if (reading && !access_ok(VERIFY_READ, uaddr, bytes)) + return -EFAULT; + + /* + * first try to map it into a bio. reading from device will + * be a write to vm. + */ + bio = bio_map_user(bdev, uaddr, hdr.dxfer_len, reading); + if (bio) { + if (writing) + bio->bi_rw |= (1 << BIO_RW); + + nr_sectors = (bio->bi_size + 511) >> 9; + + if (bio->bi_size < hdr.dxfer_len) { + bio_endio(bio, bio->bi_size, 0); + bio_unmap_user(bio, 0); + bio = NULL; + } + } + + /* + * if bio setup failed, fall back to slow approach + */ + if (!bio) { + buffer = kmalloc(bytes, q->bounce_gfp | GFP_USER); + if (!buffer) + return -ENOMEM; + + nr_sectors = bytes >> 9; + if (writing) + copy_from_user(buffer,hdr.dxferp,hdr.dxfer_len); + else + memset(buffer, 0, hdr.dxfer_len); + } } rq = blk_get_request(q, WRITE, __GFP_WAIT); - rq->timeout = 60*HZ; - rq->data = buffer; - rq->data_len = hdr.dxfer_len; - rq->flags = REQ_BLOCK_PC; - memset(rq->cmd, 0, sizeof(rq->cmd)); + + /* + * fill in request structure + */ copy_from_user(rq->cmd, hdr.cmdp, hdr.cmd_len); - err = blk_do_rq(q, rq); + if (sizeof(rq->cmd) != hdr.cmd_len) + memset(rq->cmd + hdr.cmd_len, 0, sizeof(rq->cmd) - hdr.cmd_len); + + memset(sense, 0, sizeof(sense)); + rq->sense = sense; + rq->sense_len = 0; + + rq->flags |= REQ_BLOCK_PC; + if (writing) + rq->flags |= REQ_RW; + + rq->hard_nr_sectors = rq->nr_sectors = nr_sectors; + rq->hard_cur_sectors = rq->current_nr_sectors = nr_sectors; + + if (bio) { + /* + * subtle -- if bio_map_user() ended up bouncing a bio, it + * would normally disappear when its bi_end_io is run. + * however, we need it for the unmap, so grab an extra + * reference to it + */ + bio_get(bio); + + rq->nr_phys_segments = bio_phys_segments(q, bio); + rq->nr_hw_segments = bio_hw_segments(q, bio); + rq->current_nr_sectors = bio_cur_sectors(bio); + rq->hard_cur_sectors = rq->current_nr_sectors; + rq->buffer = bio_data(bio); + } + + rq->data_len = hdr.dxfer_len; + rq->data = buffer; + + rq->timeout = hdr.timeout; + if (!rq->timeout) + rq->timeout = q->sg_timeout; + if (!rq->timeout) + rq->timeout = BLK_DEFAULT_TIMEOUT; + + rq->bio = rq->biotail = bio; + + start_time = jiffies; + + /* + * return -EIO if we didn't transfer all data, caller can look at + * residual count to find out how much did succeed + */ + err = blk_do_rq(q, bdev, rq); + if (rq->data_len > 0) + err = -EIO; + + if (bio) { + bio_unmap_user(bio, reading); + bio_put(bio); + } + + hdr.status = rq->errors; + hdr.resid = rq->data_len; + hdr.duration = (jiffies - start_time) * (1000 / HZ); + + if (rq->sense_len && hdr.sbp) { + if (!copy_to_user(hdr.sbp,rq->sense, rq->sense_len)) + hdr.sb_len_wr = rq->sense_len; + } blk_put_request(rq); copy_to_user(uptr, &hdr, sizeof(*uptr)); + if (buffer) { - if (hdr.dxfer_direction == SG_DXFER_FROM_DEV || - hdr.dxfer_direction == SG_DXFER_TO_FROM_DEV) + if (reading) copy_to_user(hdr.dxferp, buffer, hdr.dxfer_len); + kfree(buffer); } + + return err; +} + +#define FORMAT_UNIT_TIMEOUT (2 * 60 * 60 * HZ) +#define START_STOP_TIMEOUT (60 * HZ) +#define MOVE_MEDIUM_TIMEOUT (5 * 60 * HZ) +#define READ_ELEMENT_STATUS_TIMEOUT (5 * 60 * HZ) +#define READ_DEFECT_DATA_TIMEOUT (60 * HZ ) + +static int sg_scsi_ioctl(request_queue_t *q, struct block_device *bdev, + Scsi_Ioctl_Command *sic) +{ + struct request *rq; + int err, in_len, out_len, bytes, opcode, cmdlen; + char *buffer = NULL, sense[24]; + + /* + * get in an out lengths, verify they don't exceed a page worth of data + */ + if (get_user(in_len, &sic->inlen)) + return -EFAULT; + if (get_user(out_len, &sic->outlen)) + return -EFAULT; + if (in_len > PAGE_SIZE || out_len > PAGE_SIZE) + return -EINVAL; + if (get_user(opcode, sic->data)) + return -EFAULT; + + printk("%s: old crap, in=%d,out=%d,op=%x\n", __FUNCTION__, in_len, out_len, opcode); + + bytes = max(in_len, out_len); + if (bytes) { + buffer = kmalloc(bytes, q->bounce_gfp | GFP_USER); + if (!buffer) + return -ENOMEM; + + memset(buffer, 0, bytes); + } + + rq = blk_get_request(q, WRITE, __GFP_WAIT); + + cmdlen = COMMAND_SIZE(opcode); + + /* + * get command and data to send to device, if any + */ + err = -EFAULT; + if (copy_from_user(rq->cmd, sic->data, cmdlen)) + goto error; + + if (copy_from_user(buffer, sic->data + cmdlen, in_len)) + goto error; + + switch (opcode) { + case FORMAT_UNIT: + rq->timeout = FORMAT_UNIT_TIMEOUT; + break; + case START_STOP: + rq->timeout = START_STOP_TIMEOUT; + break; + case MOVE_MEDIUM: + rq->timeout = MOVE_MEDIUM_TIMEOUT; + break; + case READ_ELEMENT_STATUS: + rq->timeout = READ_ELEMENT_STATUS_TIMEOUT; + break; + case READ_DEFECT_DATA: + rq->timeout = READ_DEFECT_DATA_TIMEOUT; + break; + default: + rq->timeout = BLK_DEFAULT_TIMEOUT; + break; + } + + memset(sense, 0, sizeof(sense)); + rq->sense = sense; + rq->sense_len = 0; + + rq->data = buffer; + rq->data_len = bytes; + rq->flags |= REQ_BLOCK_PC; + if (in_len) + rq->flags |= REQ_RW; + + err = blk_do_rq(q, bdev, rq); + if (err) { + if (rq->sense_len) + if (copy_to_user(sic->data, rq->sense, rq->sense_len)) + err = -EFAULT; + } else { + if (copy_to_user(sic->data, buffer, out_len)) + err = -EFAULT; + } + +error: + kfree(buffer); + blk_put_request(rq); return err; } @@ -172,6 +408,9 @@ return -ENXIO; switch (cmd) { + /* + * new sgv3 interface + */ case SG_GET_VERSION_NUM: return sg_get_version((int *) arg); case SCSI_IOCTL_GET_IDLUN: @@ -189,7 +428,25 @@ case SG_EMULATED_HOST: return sg_emulated_host(q, (int *) arg); case SG_IO: - return sg_io(q, (struct sg_io_hdr *) arg); + err = bd_claim(bdev, current); + if (err) + break; + err = sg_io(q, bdev, (struct sg_io_hdr *) arg); + bd_release(bdev); + break; + /* + * old junk scsi send command ioctl + */ + case SCSI_IOCTL_SEND_COMMAND: + if (!arg) + return -EINVAL; + + err = bd_claim(bdev, current); + if (err) + break; + err = sg_scsi_ioctl(q, bdev, (Scsi_Ioctl_Command *)arg); + bd_release(bdev); + break; case CDROMCLOSETRAY: close = 1; case CDROMEJECT: @@ -197,11 +454,11 @@ rq->flags = REQ_BLOCK_PC; rq->data = NULL; rq->data_len = 0; - rq->timeout = 60*HZ; + rq->timeout = BLK_DEFAULT_TIMEOUT; memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd[0] = GPCMD_START_STOP_UNIT; rq->cmd[4] = 0x02 + (close != 0); - err = blk_do_rq(q, rq); + err = blk_do_rq(q, bdev, rq); blk_put_request(rq); break; default: ===== drivers/block/umem.c 1.25 vs edited ===== --- 1.25/drivers/block/umem.c Fri Oct 18 12:56:00 2002 +++ edited/drivers/block/umem.c Wed Oct 23 21:32:14 2002 @@ -548,12 +548,7 @@ return_bio = bio->bi_next; bio->bi_next = NULL; - /* should use bio_endio(), however already cleared - * BIO_UPTODATE. so set bio->bi_size = 0 manually to indicate - * completely done - */ - bio->bi_size = 0; - bio->bi_end_io(bio, bytes, 0); + bio_endio(bio, bio->bi_size, 0); } } ===== drivers/ide/ide-cd.c 1.27 vs edited ===== --- 1.27/drivers/ide/ide-cd.c Fri Oct 18 20:02:55 2002 +++ edited/drivers/ide/ide-cd.c Sun Oct 27 20:47:00 2002 @@ -608,7 +608,7 @@ if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL) return ide_stopped; /* retry only "normal" I/O: */ - if ((rq->flags & REQ_DRIVE_CMD) || (rq->flags & REQ_DRIVE_TASK)) { + if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) { rq->errors = 1; ide_end_drive_cmd(drive, stat, err); return ide_stopped; @@ -638,12 +638,16 @@ static void cdrom_end_request (ide_drive_t *drive, int uptodate) { struct request *rq = HWGROUP(drive)->rq; + int nsectors = rq->hard_cur_sectors; if ((rq->flags & REQ_SENSE) && uptodate) { - /* For REQ_SENSE, "rq->buffer" points to the original failed request */ - struct request *failed = (struct request *) rq->buffer; + /* + * For REQ_SENSE, "rq->buffer" points to the original failed + * request + */ + struct request *failed = (struct request *) rq->buffer; struct cdrom_info *info = drive->driver_data; - void * sense = &info->sense_data; + void *sense = &info->sense_data; if (failed && failed->sense) sense = failed->sense; @@ -651,28 +655,27 @@ cdrom_analyze_sense_data(drive, failed, sense); } - if (blk_fs_request(rq) && !rq->current_nr_sectors) + if (!rq->current_nr_sectors && blk_fs_request(rq)) uptodate = 1; + if (!nsectors) + nsectors = 1; - ide_end_request(drive, uptodate, rq->hard_cur_sectors); + ide_end_request(drive, uptodate, nsectors); } -/* Handle differences between SCSI and ATAPI packet commands */ -static int pre_transform_command(struct request *); -static void post_transform_command(struct request *); - /* Returns 0 if the request should be continued. Returns 1 if the request was ended. */ -static int cdrom_decode_status (ide_startstop_t *startstop, ide_drive_t *drive, - int good_stat, int *stat_ret) +static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) { struct request *rq = HWGROUP(drive)->rq; int stat, err, sense_key; /* Check for errors. */ - *stat_ret = stat = HWIF(drive)->INB(IDE_STATUS_REG); + stat = HWIF(drive)->INB(IDE_STATUS_REG); + if (stat_ret) + *stat_ret = stat; - if (OK_STAT (stat, good_stat, BAD_R_STAT)) + if (OK_STAT(stat, good_stat, BAD_R_STAT)) return 0; /* Get the IDE error register. */ @@ -681,7 +684,6 @@ if (rq == NULL) { printk("%s: missing rq in cdrom_decode_status\n", drive->name); - *startstop = ide_stopped; return 1; } @@ -692,16 +694,19 @@ rq->flags |= REQ_FAILED; cdrom_end_request(drive, 0); - *startstop = DRIVER(drive)->error(drive, "request sense failure", stat); + DRIVER(drive)->error(drive, "request sense failure", stat); return 1; } else if (rq->flags & (REQ_PC | REQ_BLOCK_PC)) { /* All other functions, except for READ. */ struct completion *wait = NULL; - /* Fix up any SCSI command differences.. */ - if (rq->flags & REQ_BLOCK_PC) - post_transform_command(rq); + /* + * if we have an error, pass back CHECK_CONDITION as the + * scsi status byte + */ + if ((rq->flags & REQ_BLOCK_PC) && !rq->errors) + rq->errors = CHECK_CONDITION; /* Check for tray open. */ if (sense_key == NOT_READY) { @@ -763,7 +768,7 @@ } else if ((err & ~ABRT_ERR) != 0) { /* Go to the default handler for other errors. */ - *startstop = DRIVER(drive)->error(drive, "cdrom_decode_status", stat); + DRIVER(drive)->error(drive, "cdrom_decode_status",stat); return 1; } else if (sense_key == MEDIUM_ERROR) { /* No point in re-trying a zillion times on a bad @@ -779,11 +784,12 @@ queue a request sense command. */ if ((stat & ERR_STAT) != 0) cdrom_queue_request_sense(drive, NULL, NULL, NULL); - } else - blk_dump_rq_flags(rq, "ide-cd bad flags"); + } else { + blk_dump_rq_flags(rq, "ide-cd: bad rq"); + cdrom_end_request(drive, 0); + } /* Retry, or handle the next request. */ - *startstop = ide_stopped; return 1; } @@ -848,7 +854,7 @@ HWIF(drive)->OUTB(xferlen >> 8 , IDE_BCOUNTH_REG); if (IDE_CONTROL_REG) HWIF(drive)->OUTB(drive->ctl, IDE_CONTROL_REG); - + if (CDROM_CONFIG_FLAGS (drive)->drq_interrupt) { if (HWGROUP(drive)->handler != NULL) BUG(); @@ -876,20 +882,16 @@ struct request *rq, ide_handler_t *handler) { - unsigned char *cmd_buf = rq->cmd; - int cmd_len = sizeof(rq->cmd); - unsigned int timeout = rq->timeout; struct cdrom_info *info = drive->driver_data; ide_startstop_t startstop; if (CDROM_CONFIG_FLAGS(drive)->drq_interrupt) { /* Here we should have been called after receiving an interrupt from the device. DRQ should how be set. */ - int stat_dum; /* Check for errors. */ - if (cdrom_decode_status(&startstop, drive, DRQ_STAT, &stat_dum)) - return startstop; + if (cdrom_decode_status(drive, DRQ_STAT, NULL)) + return ide_stopped; } else { /* Otherwise, we must wait for DRQ to get set. */ if (ide_wait_stat(&startstop, drive, DRQ_STAT, @@ -901,10 +903,10 @@ BUG(); /* Arm the interrupt handler. */ - ide_set_handler(drive, handler, timeout, cdrom_timer_expiry); + ide_set_handler(drive, handler, rq->timeout, cdrom_timer_expiry); /* Send the command to the device. */ - HWIF(drive)->atapi_output_bytes(drive, cmd_buf, cmd_len); + HWIF(drive)->atapi_output_bytes(drive, rq->cmd, sizeof(rq->cmd)); /* Start the DMA if need be */ if (info->dma) @@ -970,14 +972,12 @@ static inline int cdrom_read_check_ireason (ide_drive_t *drive, int len, int ireason) { - ireason &= 3; - if (ireason == 2) return 0; - - if (ireason == 0) { + if (ireason == 2) + return 0; + else if (ireason == 0) { /* Whoops... The drive is expecting to receive data from us! */ - printk ("%s: cdrom_read_intr: " - "Drive wants to transfer data the wrong way!\n", - drive->name); + printk("%s: read_intr: Drive wants to transfer data the " + "wrong way!\n", drive->name); /* Throw some data at the drive so it doesn't hang and quit this request. */ @@ -994,8 +994,8 @@ return 0; } else { /* Drive wants a command packet, or invalid ireason... */ - printk ("%s: cdrom_read_intr: bad interrupt reason %d\n", - drive->name, ireason); + printk("%s: read_intr: bad interrupt reason %x\n", drive->name, + ireason); } cdrom_end_request(drive, 0); @@ -1012,20 +1012,21 @@ struct cdrom_info *info = drive->driver_data; u8 lowcyl = 0, highcyl = 0; int dma = info->dma, dma_error = 0; - ide_startstop_t startstop; struct request *rq = HWGROUP(drive)->rq; - /* Check for errors. */ + /* + * handle dma case + */ if (dma) { info->dma = 0; if ((dma_error = HWIF(drive)->ide_dma_end(drive))) HWIF(drive)->ide_dma_off(drive); } - if (cdrom_decode_status (&startstop, drive, 0, &stat)) - return startstop; - + if (cdrom_decode_status(drive, 0, &stat)) + return ide_stopped; + if (dma) { if (!dma_error) { ide_end_request(drive, 1, rq->nr_sectors); @@ -1035,7 +1036,7 @@ } /* Read the interrupt reason and the transfer length. */ - ireason = HWIF(drive)->INB(IDE_IREASON_REG); + ireason = HWIF(drive)->INB(IDE_IREASON_REG) & 0x3; lowcyl = HWIF(drive)->INB(IDE_BCOUNTL_REG); highcyl = HWIF(drive)->INB(IDE_BCOUNTH_REG); @@ -1080,7 +1081,7 @@ /* First, figure out if we need to bit-bucket any of the leading sectors. */ - nskip = MIN((int)(rq->current_nr_sectors - bio_sectors(rq->bio)), sectors_to_transfer); + nskip = MIN((int)(rq->current_nr_sectors - bio_cur_sectors(rq->bio)), sectors_to_transfer); while (nskip > 0) { /* We need to throw away a sector. */ @@ -1180,7 +1181,7 @@ represent the number of sectors to skip at the start of a transfer will fail. I think that this will never happen, but let's be paranoid and check. */ - if (rq->current_nr_sectors < bio_sectors(rq->bio) && + if (rq->current_nr_sectors < bio_cur_sectors(rq->bio) && (rq->sector % SECTORS_PER_FRAME) != 0) { printk("%s: cdrom_read_from_buffer: buffer botch (%ld)\n", drive->name, (long)rq->sector); @@ -1218,7 +1219,7 @@ nskip = (sector % SECTORS_PER_FRAME); if (nskip > 0) { /* Sanity check... */ - if (rq->current_nr_sectors != bio_sectors(rq->bio) && + if (rq->current_nr_sectors != bio_cur_sectors(rq->bio) && (rq->sector % CD_FRAMESIZE != 0)) { printk ("%s: cdrom_start_read_continuation: buffer botch (%u)\n", drive->name, rq->current_nr_sectors); @@ -1257,10 +1258,9 @@ struct cdrom_info *info = drive->driver_data; int stat; static int retry = 10; - ide_startstop_t startstop; - if (cdrom_decode_status (&startstop, drive, 0, &stat)) - return startstop; + if (cdrom_decode_status(drive, 0, &stat)) + return ide_stopped; CDROM_CONFIG_FLAGS(drive)->seeking = 1; if (retry && time_after(jiffies, info->start_seek + IDECD_SEEK_TIMER)) { @@ -1318,7 +1318,7 @@ rq->nr_sectors += n; rq->sector -= n; } - rq->hard_cur_sectors = rq->current_nr_sectors = bio_sectors(rq->bio); + rq->hard_cur_sectors = rq->current_nr_sectors = bio_cur_sectors(rq->bio); rq->hard_nr_sectors = rq->nr_sectors; rq->hard_sector = rq->sector; rq->q->prep_rq_fn(rq->q, rq); @@ -1353,6 +1353,7 @@ info->dma = 0; info->cmd = READ; + /* Start sending the read request to the drive. */ return cdrom_start_packet_command(drive, 32768, cdrom_start_read_continuation); } @@ -1361,21 +1362,17 @@ * Execute all other packet commands. */ -/* Forward declarations. */ -static int cdrom_lockdoor(ide_drive_t *drive, int lockflag, - struct request_sense *sense); - /* Interrupt routine for packet command completion. */ static ide_startstop_t cdrom_pc_intr (ide_drive_t *drive) { - int ireason, len, stat, thislen; + int ireason, len, thislen; struct request *rq = HWGROUP(drive)->rq; - ide_startstop_t startstop; u8 lowcyl = 0, highcyl = 0; + int stat; /* Check for errors. */ - if (cdrom_decode_status(&startstop, drive, 0, &stat)) - return startstop; + if (cdrom_decode_status(drive, 0, &stat)) + return ide_stopped; /* Read the interrupt reason and the transfer length. */ ireason = HWIF(drive)->INB(IDE_IREASON_REG); @@ -1420,8 +1417,10 @@ /* The drive wants to be written to. */ if ((ireason & 3) == 0) { - if (!rq->data) + if (!rq->data) { + blk_dump_rq_flags(rq, "cdrom_pc_intr, write"); goto confused; + } /* Transfer the data. */ HWIF(drive)->atapi_output_bytes(drive, rq->data, thislen); @@ -1440,8 +1439,10 @@ /* Same drill for reading. */ else if ((ireason & 3) == 2) { - if (!rq->data) + if (!rq->data) { + blk_dump_rq_flags(rq, "cdrom_pc_intr, write"); goto confused; + } /* Transfer the data. */ HWIF(drive)->atapi_input_bytes(drive, rq->data, thislen); @@ -1456,6 +1457,8 @@ /* Keep count of how much data we've moved. */ rq->data += thislen; rq->data_len -= thislen; + if (rq->cmd[0] == GPCMD_REQUEST_SENSE) + rq->sense_len++; } else { confused: printk ("%s: cdrom_pc_intr: The drive " @@ -1472,7 +1475,6 @@ return ide_started; } - static ide_startstop_t cdrom_do_pc_continuation (ide_drive_t *drive) { struct request *rq = HWGROUP(drive)->rq; @@ -1570,39 +1572,202 @@ { /* Two notes about IDE interrupt reason here - 0 means that * the drive wants to receive data from us, 2 means that - * the drive is expecting data from us. + * the drive is expecting to transfer data to us. */ - ireason &= 3; - - if (ireason == 2) { + if (ireason == 0) + return 0; + else if (ireason == 2) { /* Whoops... The drive wants to send data. */ - printk("%s: cdrom_write_intr: wrong transfer direction!\n", - drive->name); + printk("%s: write_intr: wrong transfer direction!\n", + drive->name); - /* Throw some data at the drive so it doesn't hang - and quit this request. */ while (len > 0) { int dum = 0; - HWIF(drive)->atapi_output_bytes(drive, &dum, sizeof(dum)); + HWIF(drive)->atapi_input_bytes(drive, &dum, sizeof(dum)); len -= sizeof(dum); } } else { /* Drive wants a command packet, or invalid ireason... */ - printk("%s: cdrom_write_intr: bad interrupt reason %d\n", - drive->name, ireason); + printk("%s: write_intr: bad interrupt reason %x\n", + drive->name, ireason); } cdrom_end_request(drive, 0); return 1; } +static void post_transform_command(struct request *req) +{ + char *ibuf = req->buffer; + u8 *c = req->cmd; + + if (!blk_pc_request(req)) + return; + + /* + * set ansi-revision and response data as atapi + */ + if (c[0] == GPCMD_INQUIRY) { + ibuf[2] |= 2; + ibuf[3] = (ibuf[3] & 0xf0) | 2; + } +} + +typedef void (xfer_func_t)(ide_drive_t *, void *, u32); + +/* + * best way to deal with dma that is not sector aligned right now... note + * that in this path we are not using ->data or ->buffer at all. this irs + * can replace cdrom_pc_intr, cdrom_read_intr, and cdrom_write_intr in the + * future. + */ +static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) +{ + struct cdrom_info *info = drive->driver_data; + struct request *rq = HWGROUP(drive)->rq; + int dma_error, dma, stat, ireason, len, thislen; + u8 lowcyl, highcyl; + xfer_func_t *xferfunc; + unsigned long flags; + + /* Check for errors. */ + dma_error = 0; + dma = info->dma; + if (dma) { + info->dma = 0; + if ((dma_error = HWIF(drive)->ide_dma_end(drive))) { + printk("ide-cd: dma error\n"); + HWIF(drive)->ide_dma_off(drive); + } + } + + if (cdrom_decode_status(drive, 0, &stat)) { + end_that_request_chunk(rq, 1, rq->data_len); + return ide_stopped; + } + + /* + * using dma, transfer is complete now + */ + if (dma) { + if (dma_error) + return DRIVER(drive)->error(drive, "dma error", stat); + + end_that_request_chunk(rq, 1, rq->data_len); + rq->data_len = 0; + goto end_request; + } + + /* + * ok we fall to pio :/ + */ + ireason = HWIF(drive)->INB(IDE_IREASON_REG) & 0x3; + lowcyl = HWIF(drive)->INB(IDE_BCOUNTL_REG); + highcyl = HWIF(drive)->INB(IDE_BCOUNTH_REG); + + len = lowcyl + (256 * highcyl); + thislen = rq->data_len; + if (thislen > len) + thislen = len; + + /* + * If DRQ is clear, the command has completed. + */ + if ((stat & DRQ_STAT) == 0) { + if (rq->data_len) + printk("%s: %u residual after xfer\n", __FUNCTION__, rq->data_len); + goto end_request; + } + + /* + * check which way to transfer data + */ + if (rq_data_dir(rq) == WRITE) { + /* + * write to drive + */ + if (cdrom_write_check_ireason(drive, len, ireason)) + return ide_stopped; + + xferfunc = HWIF(drive)->atapi_output_bytes; + } else { + /* + * read from drive + */ + if (cdrom_read_check_ireason(drive, len, ireason)) + return ide_stopped; + + xferfunc = HWIF(drive)->atapi_input_bytes; + } + + /* + * transfer data + */ + while (thislen > 0) { + int blen = blen = rq->data_len; + char *ptr = rq->data; + + /* + * bio backed? + */ + if (rq->bio) { + ptr = bio_data(rq->bio); + blen = bio_iovec(rq->bio)->bv_len; + } + + if (blen > thislen) + blen = thislen; + + xferfunc(drive, ptr, blen); + + thislen -= blen; + len -= blen; + rq->data_len -= blen; + + if (rq->bio) + end_that_request_chunk(rq, 1, blen); + else + rq->data += blen; + } + + /* + * pad, if necessary + */ + if (len) { + printk("%s: padding %u bytes\n", drive->name, len); + + while (len) { + int pad = 0; + + xferfunc(drive, &pad, sizeof(pad)); + len -= sizeof(pad); + } + } + + if (HWGROUP(drive)->handler != NULL) + BUG(); + + ide_set_handler(drive, cdrom_newpc_intr, rq->timeout, NULL); + return ide_started; + +end_request: + if (!rq->data_len) + post_transform_command(rq); + + spin_lock_irqsave(&ide_lock, flags); + blkdev_dequeue_request(rq); + end_that_request_last(rq); + HWGROUP(drive)->rq = NULL; + spin_unlock_irqrestore(&ide_lock, flags); + return ide_stopped; +} + static ide_startstop_t cdrom_write_intr(ide_drive_t *drive) { int stat, ireason, len, sectors_to_transfer, uptodate; struct cdrom_info *info = drive->driver_data; int dma_error = 0, dma = info->dma; u8 lowcyl = 0, highcyl = 0; - ide_startstop_t startstop; struct request *rq = HWGROUP(drive)->rq; @@ -1615,11 +1780,11 @@ } } - if (cdrom_decode_status(&startstop, drive, 0, &stat)) { + if (cdrom_decode_status(drive, 0, &stat)) { printk("ide-cd: write_intr decode_status bad\n"); - return startstop; + return ide_stopped; } - + /* * using dma, transfer is complete now */ @@ -1654,9 +1819,8 @@ } /* Check that the drive is expecting to do the same thing we are. */ - if (ireason & 3) - if (cdrom_write_check_ireason(drive, len, ireason)) - return ide_stopped; + if (cdrom_write_check_ireason(drive, len, ireason)) + return ide_stopped; sectors_to_transfer = len / SECTOR_SIZE; @@ -1704,10 +1868,6 @@ static ide_startstop_t cdrom_start_write_cont(ide_drive_t *drive) { struct request *rq = HWGROUP(drive)->rq; - unsigned nframes, frame; - - nframes = rq->nr_sectors >> 2; - frame = rq->sector >> 2; #if 0 /* the immediate bit */ rq->cmd[1] = 1 << 3; @@ -1748,56 +1908,40 @@ return cdrom_start_packet_command(drive, 32768, cdrom_start_write_cont); } -/* - * Most of the SCSI commands are supported directly by ATAPI devices. - * This transform handles the few exceptions. - */ -static int pre_transform_command(struct request *req) +static ide_startstop_t cdrom_do_newpc_cont(ide_drive_t *drive) { - u8 *c = req->cmd; - /* Transform 6-byte read/write commands to the 10-byte version. */ - if (c[0] == READ_6 || c[0] == WRITE_6) { - c[8] = c[4]; - c[5] = c[3]; - c[4] = c[2]; - c[3] = c[1] & 0x1f; - c[2] = 0; - c[1] &= 0xe0; - c[0] += (READ_10 - READ_6); - return 0; - } + struct request *rq = HWGROUP(drive)->rq; - /* These also need fixup, not done yet */ - if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) - return -EINVAL; + if (!rq->timeout) + rq->timeout = WAIT_CMD; - return 0; -} - -static void post_transform_command(struct request *req) -{ + return cdrom_transfer_packet_command(drive, rq, cdrom_newpc_intr); } static ide_startstop_t cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) { - ide_startstop_t startstop; - struct cdrom_info *info; - - if (pre_transform_command(rq) < 0) { - cdrom_end_request(drive, 0); - return ide_stopped; - } + struct cdrom_info *info = drive->driver_data; rq->flags |= REQ_QUIET; - info = drive->driver_data; info->dma = 0; info->cmd = 0; - /* Start sending the command to the drive. */ - startstop = cdrom_start_packet_command(drive, rq->data_len, cdrom_do_pc_continuation); + /* + * sg request + */ + if (rq->bio) { + if (rq->data_len & 3) { + printk("%s: block pc not aligned, len=%d\n", drive->name, rq->data_len); + cdrom_end_request(drive, 0); + return ide_stopped; + } + info->dma = drive->using_dma; + info->cmd = rq_data_dir(rq); + } - return startstop; + /* Start sending the command to the drive. */ + return cdrom_start_packet_command(drive, rq->data_len, cdrom_do_newpc_cont); } /**************************************************************************** @@ -2314,7 +2458,7 @@ req.sense = cgc->sense; cgc->stat = cdrom_queue_packet_command(drive, &req); if (!cgc->stat) - cgc->buflen -= req.data_len; + cgc->buflen = req.data_len; return cgc->stat; } @@ -2451,7 +2595,7 @@ int ret; cdrom_prepare_request(&req); - req.flags = REQ_SPECIAL; + req.flags = REQ_SPECIAL | REQ_QUIET; ret = ide_do_drive_cmd(drive, &req, ide_wait); /* @@ -2828,15 +2972,12 @@ /* * standard prep_rq_fn that builds 10 byte cmds */ -static int ll_10byte_cmd_build(request_queue_t *q, struct request *rq) +static int ide_cdrom_prep_fs(request_queue_t *q, struct request *rq) { int hard_sect = queue_hardsect_size(q); long block = (long)rq->hard_sector / (hard_sect >> 9); unsigned long blocks = rq->hard_nr_sectors / (hard_sect >> 9); - if (!(rq->flags & REQ_CMD)) - return 0; - BUG_ON(sizeof(rq->hard_sector) > 4 && (rq->hard_sector >> 32)); if (rq->hard_nr_sectors != rq->nr_sectors) { @@ -2863,6 +3004,50 @@ */ rq->cmd[7] = (blocks >> 8) & 0xff; rq->cmd[8] = blocks & 0xff; + return BLKPREP_OK; +} + +/* + * Most of the SCSI commands are supported directly by ATAPI devices. + * This transform handles the few exceptions. + */ +static int ide_cdrom_prep_pc(struct request *rq) +{ + u8 *c = rq->cmd; + + /* + * Transform 6-byte read/write commands to the 10-byte version + */ + if (c[0] == READ_6 || c[0] == WRITE_6) { + c[8] = c[4]; + c[5] = c[3]; + c[4] = c[2]; + c[3] = c[1] & 0x1f; + c[2] = 0; + c[1] &= 0xe0; + c[0] += (READ_10 - READ_6); + return BLKPREP_OK; + } + + /* + * it's silly to pretend we understand 6-byte sense commands, just + * reject with ILLEGAL_REQUEST and the caller should take the + * appropriate action + */ + if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) { + rq->errors = ILLEGAL_REQUEST; + return BLKPREP_KILL; + } + + return BLKPREP_OK; +} + +static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq) +{ + if (rq->flags & REQ_CMD) + return ide_cdrom_prep_fs(q, rq); + else if (rq->flags & REQ_BLOCK_PC) + return ide_cdrom_prep_pc(rq); return 0; } @@ -2880,7 +3065,8 @@ set_device_ro(mk_kdev(drive->disk->major, drive->disk->first_minor), 1); blk_queue_hardsect_size(&drive->queue, CD_FRAMESIZE); - blk_queue_prep_rq(&drive->queue, ll_10byte_cmd_build); + blk_queue_prep_rq(&drive->queue, ide_cdrom_prep_fn); + blk_queue_dma_alignment(&drive->queue, 3); drive->special.all = 0; drive->ready_stat = 0; ===== drivers/ide/ide-disk.c 1.27 vs edited ===== --- 1.27/drivers/ide/ide-disk.c Tue Oct 15 22:54:07 2002 +++ edited/drivers/ide/ide-disk.c Fri Oct 18 20:42:33 2002 @@ -1610,56 +1610,6 @@ #endif } -static int idedisk_suspend(struct device *dev, u32 state, u32 level) -{ - ide_drive_t *drive = dev->driver_data; - - printk("Suspending device %p\n", dev->driver_data); - - /* I hope that every freeze operation from the upper levels have - * already been done... - */ - - if (level != SUSPEND_SAVE_STATE) - return 0; - BUG_ON(in_interrupt()); - - printk("Waiting for commands to finish\n"); - - /* wait until all commands are finished */ - /* FIXME: waiting for spinlocks should be done instead. */ - if (!(HWGROUP(drive))) - printk("No hwgroup?\n"); - while (HWGROUP(drive)->handler) - yield(); - - /* set the drive to standby */ - printk(KERN_INFO "suspending: %s ", drive->name); - if (drive->driver) { - if (drive->driver->standby) - drive->driver->standby(drive); - } - drive->blocked = 1; - - while (HWGROUP(drive)->handler) - yield(); - - return 0; -} - -static int idedisk_resume(struct device *dev, u32 level) -{ - ide_drive_t *drive = dev->driver_data; - - if (level != RESUME_RESTORE_STATE) - return 0; - if (!drive->blocked) - panic("ide: Resume but not suspended?\n"); - - drive->blocked = 0; - return 0; -} - /* This is just a hook for the overall driver tree. */ ===== drivers/ide/ide-floppy.c 1.19 vs edited ===== --- 1.19/drivers/ide/ide-floppy.c Tue Oct 15 22:54:07 2002 +++ edited/drivers/ide/ide-floppy.c Fri Oct 25 14:12:36 2002 @@ -1238,6 +1238,21 @@ set_bit(PC_DMA_RECOMMENDED, &pc->flags); } +static int +idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy, idefloppy_pc_t *pc, struct request *rq) +{ + /* + * just support eject for now, it would not be hard to make the + * REQ_BLOCK_PC support fully-featured + */ + if (rq->cmd[0] != IDEFLOPPY_START_STOP_CMD) + return 1; + + idefloppy_init_pc(pc); + memcpy(pc->c, rq->cmd, sizeof(pc->c)); + return 0; +} + /* * idefloppy_do_request is our request handling function. */ @@ -1280,6 +1295,12 @@ idefloppy_create_rw_cmd(floppy, pc, rq, block); } else if (rq->flags & REQ_SPECIAL) { pc = (idefloppy_pc_t *) rq->buffer; + } else if (rq->flags & REQ_BLOCK_PC) { + pc = idefloppy_next_pc_storage(drive); + if (idefloppy_blockpc_cmd(floppy, pc, rq)) { + idefloppy_do_end_request(drive, 0, 0); + return ide_stopped; + } } else { blk_dump_rq_flags(rq, "ide-floppy: unsupported command in queue"); ===== drivers/ide/ide.c 1.33 vs edited ===== --- 1.33/drivers/ide/ide.c Fri Oct 18 21:44:11 2002 +++ edited/drivers/ide/ide.c Wed Oct 23 13:02:19 2002 @@ -878,13 +878,12 @@ { ide_startstop_t startstop; unsigned long block; - ide_hwif_t *hwif = HWIF(drive); BUG_ON(!(rq->flags & REQ_STARTED)); #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", - hwif->name, (unsigned long) rq); + HWIF(drive)->name, (unsigned long) rq); #endif /* bail early if we've exceeded max_failures */ @@ -910,7 +909,7 @@ block = 1; /* redirect MBR access to EZ-Drive partn table */ #if (DISK_RECOVERY_TIME > 0) - while ((read_timer() - hwif->last_time) < DISK_RECOVERY_TIME); + while ((read_timer() - HWIF(drive)->last_time) < DISK_RECOVERY_TIME); #endif SELECT_DRIVE(drive); @@ -1128,9 +1127,15 @@ break; } + /* + * we know that the queue isn't empty, but this can happen + * if the q->prep_rq_fn() decides to kill a request + */ rq = elv_next_request(&drive->queue); - if (!rq) + if (!rq) { + hwgroup->busy = !!ata_pending_commands(drive); break; + } if (!rq->bio && ata_pending_commands(drive)) break; @@ -1481,6 +1486,7 @@ void ide_init_drive_cmd (struct request *rq) { memset(rq, 0, sizeof(*rq)); + INIT_LIST_HEAD(&rq->queuelist); rq->flags = REQ_DRIVE_CMD; } @@ -1515,10 +1521,8 @@ { unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); - unsigned int major = HWIF(drive)->major; - request_queue_t *q = &drive->queue; - struct list_head *queue_head = &q->queue_head; DECLARE_COMPLETION(wait); + int insert_end = 1, err; #ifdef CONFIG_BLK_DEV_PDC4030 if (HWIF(drive)->chipset == ide_pdc4030 && rq->buffer != NULL) @@ -1540,29 +1544,35 @@ } rq->rq_disk = drive->disk; - if (action == ide_wait) + + /* + * we need to hold an extra reference to request for safe inspection + * after completion + */ + if (action == ide_wait) { + rq->ref_count++; rq->waiting = &wait; + } + spin_lock_irqsave(&ide_lock, flags); - if (blk_queue_empty(q) || action == ide_preempt) { - if (action == ide_preempt) - hwgroup->rq = NULL; - } else { - if (action == ide_wait || action == ide_end) { - queue_head = queue_head->prev; - } else - queue_head = queue_head->next; + if (action == ide_preempt) { + hwgroup->rq = NULL; + insert_end = 0; } - q->elevator.elevator_add_req_fn(q, rq, queue_head); + __elv_add_request(&drive->queue, rq, insert_end, 0); ide_do_request(hwgroup, 0); spin_unlock_irqrestore(&ide_lock, flags); + + err = 0; if (action == ide_wait) { - /* wait for it to be serviced */ wait_for_completion(&wait); - /* return -EIO if errors */ - return rq->errors ? -EIO : 0; + if (rq->errors) + err = -EIO; + + blk_put_request(rq); } - return 0; + return err; } EXPORT_SYMBOL(ide_do_drive_cmd); @@ -3369,7 +3379,7 @@ list_del_init(&drive->list); ata_attach(drive); } - driver->gen_driver.name = driver->name; + driver->gen_driver.name = (char *) driver->name; driver->gen_driver.bus = &ide_bus_type; driver->gen_driver.remove = ide_drive_remove; return driver_register(&driver->gen_driver); ===== drivers/md/linear.c 1.20 vs edited ===== --- 1.20/drivers/md/linear.c Wed Oct 16 06:49:22 2002 +++ edited/drivers/md/linear.c Tue Oct 22 18:32:00 2002 @@ -52,19 +52,21 @@ * @bio: the buffer head that's been built up so far * @biovec: the request that could be merged to it. * - * Return 1 if the merge is not permitted (because the - * result would cross a device boundary), 0 otherwise. + * Return amount of bytes we can take at this offset */ static int linear_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; - dev_info_t *dev0, *dev1; + dev_info_t *dev0; + int maxsectors, bio_sectors = (bio->bi_size + biovec->bv_len) >> 9; dev0 = which_dev(mddev, bio->bi_sector); - dev1 = which_dev(mddev, bio->bi_sector + - ((bio->bi_size + biovec->bv_len - 1) >> 9)); + maxsectors = (dev0->size << 1) - (bio->bi_sector - (dev0->offset<<1)); - return dev0 != dev1; + if (bio_sectors <= maxsectors) + return biovec->bv_len; + + return (maxsectors << 9) - bio->bi_size; } static int linear_run (mddev_t *mddev) ===== drivers/md/raid0.c 1.18 vs edited ===== --- 1.18/drivers/md/raid0.c Tue Oct 15 12:03:07 2002 +++ edited/drivers/md/raid0.c Mon Oct 21 09:22:30 2002 @@ -168,8 +168,7 @@ * @bio: the buffer head that's been built up so far * @biovec: the request that could be merged to it. * - * Return 1 if the merge is not permitted (because the - * result would cross a chunk boundary), 0 otherwise. + * Return amount of bytes we can accept at this offset */ static int raid0_mergeable_bvec(request_queue_t *q, struct bio *bio, struct bio_vec *biovec) { @@ -182,7 +181,7 @@ block = bio->bi_sector >> 1; bio_sz = (bio->bi_size + biovec->bv_len) >> 10; - return chunk_size < ((block & (chunk_size - 1)) + bio_sz); + return (chunk_size - ((block & (chunk_size - 1)) + bio_sz)) << 10; } static int raid0_run (mddev_t *mddev) ===== drivers/scsi/scsi_lib.c 1.35 vs edited ===== --- 1.35/drivers/scsi/scsi_lib.c Fri Oct 18 21:19:51 2002 +++ edited/drivers/scsi/scsi_lib.c Fri Oct 25 20:38:22 2002 @@ -240,7 +240,7 @@ SCpnt->request->special = (void *) SCpnt; if(blk_rq_tagged(SCpnt->request)) blk_queue_end_tag(q, SCpnt->request); - _elv_add_request(q, SCpnt->request, 0, 0); + __elv_add_request(q, SCpnt->request, 0, 0); } /* @@ -514,6 +514,12 @@ } } + if (blk_pc_request(req)) { + req->errors = result & 0xff; + if (!result) + req->data_len -= SCpnt->bufflen; + } + /* * Zero these out. They now point to freed memory, and it is * dangerous to hang onto the pointers. @@ -527,7 +533,7 @@ * Next deal with any sectors which we were able to correctly * handle. */ - if (good_sectors > 0) { + if (good_sectors >= 0) { SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d sectors done.\n", req->nr_sectors, good_sectors)); SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg)); @@ -951,7 +957,7 @@ SCpnt->request->flags |= REQ_SPECIAL; if(blk_rq_tagged(SCpnt->request)) blk_queue_end_tag(q, SCpnt->request); - _elv_add_request(q, SCpnt->request, 0, 0); + __elv_add_request(q, SCpnt->request, 0, 0); break; } ===== drivers/scsi/scsi_merge.c 1.24 vs edited ===== --- 1.24/drivers/scsi/scsi_merge.c Fri Oct 18 21:19:51 2002 +++ edited/drivers/scsi/scsi_merge.c Tue Oct 22 13:31:00 2002 @@ -62,15 +62,9 @@ int count, gfp_mask; /* - * non-sg block request. FIXME: check bouncing for isa hosts! + * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer */ if ((req->flags & REQ_BLOCK_PC) && !req->bio) { - /* - * FIXME: isa bouncing - */ - if (SCpnt->host->unchecked_isa_dma) - goto fail; - SCpnt->request_bufflen = req->data_len; SCpnt->request_buffer = req->data; req->buffer = req->data; @@ -100,6 +94,8 @@ SCpnt->request_buffer = (char *) sgpnt; SCpnt->request_bufflen = req->nr_sectors << 9; + if (blk_pc_request(req)) + SCpnt->request_bufflen = req->data_len; req->buffer = NULL; /* @@ -123,7 +119,6 @@ /* * kill it. there should be no leftover blocks in this request */ -fail: SCpnt = scsi_end_request(SCpnt, 0, req->nr_sectors); BUG_ON(SCpnt); return 0; ===== drivers/scsi/sd.c 1.73 vs edited ===== --- 1.73/drivers/scsi/sd.c Fri Oct 18 22:32:54 2002 +++ edited/drivers/scsi/sd.c Sat Oct 19 16:52:55 2002 @@ -308,6 +308,8 @@ if (rq->timeout) timeout = rq->timeout; + SCpnt->transfersize = rq->data_len; + SCpnt->underflow = rq->data_len; goto queue; } @@ -431,10 +433,10 @@ * host adapter, it's safe to assume that we can at least transfer * this many bytes between each connect / disconnect. */ -queue: SCpnt->transfersize = sdp->sector_size; SCpnt->underflow = this_count << 9; +queue: SCpnt->allowed = MAX_RETRIES; SCpnt->timeout_per_command = timeout; ===== drivers/scsi/sr.c 1.56 vs edited ===== --- 1.56/drivers/scsi/sr.c Sat Oct 19 00:03:16 2002 +++ edited/drivers/scsi/sr.c Sat Oct 19 16:53:32 2002 @@ -287,6 +287,8 @@ if (rq->timeout) timeout = rq->timeout; + SCpnt->transfersize = rq->data_len; + SCpnt->underflow = rq->data_len; goto queue; } @@ -360,10 +362,10 @@ * host adapter, it's safe to assume that we can at least transfer * this many bytes between each connect / disconnect. */ -queue: SCpnt->transfersize = cd->device->sector_size; SCpnt->underflow = this_count << 9; +queue: SCpnt->allowed = MAX_RETRIES; SCpnt->timeout_per_command = timeout; ===== drivers/scsi/sr_ioctl.c 1.21 vs edited ===== --- 1.21/drivers/scsi/sr_ioctl.c Thu Oct 17 19:52:39 2002 +++ edited/drivers/scsi/sr_ioctl.c Thu Oct 24 12:40:52 2002 @@ -160,13 +160,11 @@ if (!cgc->quiet) printk(KERN_ERR "%s: CDROM (ioctl) reports ILLEGAL " "REQUEST.\n", cd->cdi.name); + err = -EIO; if (SRpnt->sr_sense_buffer[12] == 0x20 && - SRpnt->sr_sense_buffer[13] == 0x00) { + SRpnt->sr_sense_buffer[13] == 0x00) /* sense: Invalid command operation code */ err = -EDRIVE_CANT_DO_THIS; - } else { - err = -EINVAL; - } #ifdef DEBUG print_command(cgc->cmd); print_req_sense("sr", SRpnt); ===== fs/bio.c 1.31 vs edited ===== --- 1.31/fs/bio.c Sat Oct 19 01:14:39 2002 +++ edited/fs/bio.c Mon Oct 28 16:11:33 2002 @@ -122,6 +122,7 @@ bio->bi_max_vecs = 0; bio->bi_end_io = NULL; atomic_set(&bio->bi_cnt, 1); + bio->bi_private = NULL; } /** @@ -354,7 +355,7 @@ request_queue_t *q = bdev_get_queue(bdev); int nr_pages; - nr_pages = q->max_sectors >> (PAGE_SHIFT - 9); + nr_pages = ((q->max_sectors << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; if (nr_pages > q->max_phys_segments) nr_pages = q->max_phys_segments; if (nr_pages > q->max_hw_segments) @@ -385,13 +386,13 @@ * cloned bio must not modify vec list */ if (unlikely(bio_flagged(bio, BIO_CLONED))) - return 1; + return 0; if (bio->bi_vcnt >= bio->bi_max_vecs) - return 1; + return 0; if (((bio->bi_size + len) >> 9) > q->max_sectors) - return 1; + return 0; /* * we might loose a segment or two here, but rather that than @@ -404,7 +405,7 @@ if (fail_segments) { if (retried_segments) - return 1; + return 0; bio->bi_flags &= ~(1 << BIO_SEG_VALID); retried_segments = 1; @@ -425,18 +426,154 @@ * depending on offset), it can specify a merge_bvec_fn in the * queue to get further control */ - if (q->merge_bvec_fn && q->merge_bvec_fn(q, bio, bvec)) { - bvec->bv_page = NULL; - bvec->bv_len = 0; - bvec->bv_offset = 0; - return 1; + if (q->merge_bvec_fn) { + /* + * merge_bvec_fn() returns number of bytes it can accept + * at this offset + */ + if (q->merge_bvec_fn(q, bio, bvec) < len) { + bvec->bv_page = NULL; + bvec->bv_len = 0; + bvec->bv_offset = 0; + return 0; + } } bio->bi_vcnt++; bio->bi_phys_segments++; bio->bi_hw_segments++; bio->bi_size += len; - return 0; + return len; +} + +/** + * bio_map_user - map user address into bio + * @bdev: destination block device + * @uaddr: start of user address + * @len: length in bytes + * @write_to_vm: bool indicating writing to pages or not + * + * Map the user space address into a bio suitable for io to a block + * device. Caller should check the size of the returned bio, we might + * not have mapped the entire range specified. + */ +struct bio *bio_map_user(struct block_device *bdev, unsigned long uaddr, + unsigned int len, int write_to_vm) +{ + unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = uaddr >> PAGE_SHIFT; + const int nr_pages = end - start; + request_queue_t *q = bdev_get_queue(bdev); + int ret, offset, i; + struct page **pages; + struct bio *bio; + + /* + * transfer and buffer must be aligned to at least hardsector + * size for now, in the future we can relax this restriction + */ + if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) + return NULL; + + bio = bio_alloc(GFP_KERNEL, nr_pages); + if (!bio) + return NULL; + + pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto out; + + down_read(¤t->mm->mmap_sem); + ret = get_user_pages(current, current->mm, uaddr, nr_pages, + write_to_vm, 0, pages, NULL); + up_read(¤t->mm->mmap_sem); + + if (ret < nr_pages) + goto out; + + bio->bi_bdev = bdev; + + offset = uaddr & ~PAGE_MASK; + for (i = 0; i < nr_pages; i++) { + unsigned int bytes = PAGE_SIZE - offset; + + if (len <= 0) + break; + + if (bytes > len) + bytes = len; + + /* + * sorry... + */ + if (bio_add_page(bio, pages[i], bytes, offset) < bytes) + break; + + if (write_to_vm) + flush_dcache_page(pages[i]); + + len -= bytes; + offset = 0; + } + + /* + * release the pages we didn't map into the bio, if any + */ + while (i < nr_pages) + page_cache_release(pages[i++]); + + kfree(pages); + + /* + * check if the mapped pages need bouncing for an isa host. + */ + blk_queue_bounce(q, &bio); + return bio; +out: + kfree(pages); + bio_put(bio); + return NULL; +} + +/** + * bio_unmap_user - unmap a bio + * @bio: the bio being unmapped + * @write_to_vm: bool indicating whether pages were written to + * + * Unmap a bio previously mapped by bio_map_user(). The @write_to_vm + * must be the same as passed into bio_map_user(). Must be called with + * a process context. + */ +void bio_unmap_user(struct bio *bio, int write_to_vm) +{ + struct bio_vec *bvec; + int i; + + /* + * find original bio if it was bounced + */ + if (bio->bi_private) { + /* + * someone stole our bio, must not happen + */ + BUG_ON(!bio_flagged(bio, BIO_BOUNCED)); + + bio = bio->bi_private; + } + + /* + * make sure we dirty pages we wrote to + */ + __bio_for_each_segment(bvec, bio, i, 0) { + if (write_to_vm) + set_page_dirty(bvec->bv_page); + else + flush_dcache_page(bvec->bv_page); + + page_cache_release(bvec->bv_page); + } + + bio_put(bio); } /** @@ -446,14 +583,15 @@ * @error: error, if any * * Description: - * bio_endio() will end I/O @bytes_done number of bytes. This may be just - * a partial part of the bio, or it may be the whole bio. bio_endio() is - * the preferred way to end I/O on a bio, it takes care of decrementing + * bio_endio() will end I/O on @bytes_done number of bytes. This may be + * just a partial part of the bio, or it may be the whole bio. bio_endio() + * is the preferred way to end I/O on a bio, it takes care of decrementing * bi_size and clearing BIO_UPTODATE on error. @error is 0 on success, and * and one of the established -Exxxx (-EIO, for instance) error values in - * case something went wrong. + * case something went wrong. Noone should call bi_end_io() directly on + * a bio unless they own it and thus know that it has an end_io function. **/ -int bio_endio(struct bio *bio, unsigned int bytes_done, int error) +void bio_endio(struct bio *bio, unsigned int bytes_done, int error) { if (error) clear_bit(BIO_UPTODATE, &bio->bi_flags); @@ -465,7 +603,9 @@ } bio->bi_size -= bytes_done; - return bio->bi_end_io(bio, bytes_done, error); + + if (bio->bi_end_io) + bio->bi_end_io(bio, bytes_done, error); } static void __init biovec_init_pools(void) @@ -537,7 +677,7 @@ return 0; } -module_init(init_bio); +subsys_initcall(init_bio); EXPORT_SYMBOL(bio_alloc); EXPORT_SYMBOL(bio_put); @@ -550,3 +690,5 @@ EXPORT_SYMBOL(bio_hw_segments); EXPORT_SYMBOL(bio_add_page); EXPORT_SYMBOL(bio_get_nr_vecs); +EXPORT_SYMBOL(bio_map_user); +EXPORT_SYMBOL(bio_unmap_user); ===== fs/direct-io.c 1.14 vs edited ===== --- 1.14/fs/direct-io.c Sun Oct 13 00:45:44 2002 +++ edited/fs/direct-io.c Mon Oct 28 09:17:09 2002 @@ -417,12 +417,12 @@ /* Take a ref against the page each time it is placed into a BIO */ page_cache_get(page); - if (bio_add_page(dio->bio, page, bv_len, bv_offset)) { + if (bio_add_page(dio->bio, page, bv_len, bv_offset) < bv_len) { dio_bio_submit(dio); ret = dio_new_bio(dio, blkno); if (ret == 0) { ret = bio_add_page(dio->bio, page, bv_len, bv_offset); - BUG_ON(ret != 0); + BUG_ON(ret < bv_len); } else { /* The page didn't make it into a BIO */ page_cache_release(page); ===== fs/mpage.c 1.25 vs edited ===== --- 1.25/fs/mpage.c Wed Oct 16 02:30:10 2002 +++ edited/fs/mpage.c Mon Oct 28 11:49:20 2002 @@ -176,6 +176,7 @@ unsigned first_hole = blocks_per_page; struct block_device *bdev = NULL; struct buffer_head bh; + int length; if (page_has_buffers(page)) goto confused; @@ -233,7 +234,8 @@ goto confused; } - if (bio_add_page(bio, page, first_hole << blkbits, 0)) { + length = first_hole << blkbits; + if (bio_add_page(bio, page, length, 0) < length) { bio = mpage_bio_submit(READ, bio); goto alloc_new; } @@ -334,6 +336,7 @@ int boundary = 0; sector_t boundary_block = 0; struct block_device *boundary_bdev = NULL; + int length; if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -467,7 +470,8 @@ try_to_free_buffers(page); } - if (bio_add_page(bio, page, first_unmapped << blkbits, 0)) { + length = first_unmapped << blkbits; + if (bio_add_page(bio, page, length, 0) < length) { bio = mpage_bio_submit(WRITE, bio); goto alloc_new; } @@ -591,6 +595,10 @@ test_clear_page_dirty(page)) { if (writepage) { ret = (*writepage)(page); + if (ret == -EAGAIN) { + __set_page_dirty_nobuffers(page); + ret = 0; + } } else { bio = mpage_writepage(bio, page, get_block, &last_block_in_bio, &ret); @@ -600,10 +608,6 @@ if (!pagevec_add(&pvec, page)) pagevec_deactivate_inactive(&pvec); page = NULL; - } - if (ret == -EAGAIN && page) { - __set_page_dirty_nobuffers(page); - ret = 0; } if (ret || (--(wbc->nr_to_write) <= 0)) done = 1; ===== fs/xfs/pagebuf/page_buf.c 1.15 vs edited ===== --- 1.15/fs/xfs/pagebuf/page_buf.c Mon Oct 14 22:54:12 2002 +++ edited/fs/xfs/pagebuf/page_buf.c Fri Oct 18 20:28:57 2002 @@ -1448,7 +1448,7 @@ if (nbytes > size) nbytes = size; - if (bio_add_page(bio, pb->pb_pages[map_i], nbytes, offset)) + if (bio_add_page(bio, pb->pb_pages[map_i], nbytes, offset) < nbytes) break; offset = 0; ===== include/asm-i386/ide.h 1.9 vs edited ===== --- 1.9/include/asm-i386/ide.h Wed Sep 11 09:06:00 2002 +++ edited/include/asm-i386/ide.h Wed Oct 23 08:27:58 2002 @@ -70,6 +70,7 @@ int index; for(index = 0; index < MAX_HWIFS; index++) { + memset(&hw, 0, sizeof hw); ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); hw.irq = ide_default_irq(ide_default_io_base(index)); ide_register_hw(&hw, NULL); ===== include/linux/bio.h 1.22 vs edited ===== --- 1.22/include/linux/bio.h Tue Oct 8 13:27:47 2002 +++ edited/include/linux/bio.h Thu Oct 24 08:43:13 2002 @@ -101,6 +101,7 @@ #define BIO_EOF 2 /* out-out-bounds error */ #define BIO_SEG_VALID 3 /* nr_hw_seg valid */ #define BIO_CLONED 4 /* doesn't own data */ +#define BIO_BOUNCED 5 /* bio is a bounce bio */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* @@ -131,6 +132,7 @@ #define bio_page(bio) bio_iovec((bio))->bv_page #define bio_offset(bio) bio_iovec((bio))->bv_offset #define bio_sectors(bio) ((bio)->bi_size >> 9) +#define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9) #define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) #define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) @@ -201,7 +203,7 @@ extern struct bio *bio_alloc(int, int); extern void bio_put(struct bio *); -extern int bio_endio(struct bio *, unsigned int, int); +extern void bio_endio(struct bio *, unsigned int, int); struct request_queue; extern inline int bio_phys_segments(struct request_queue *, struct bio *); extern inline int bio_hw_segments(struct request_queue *, struct bio *); @@ -214,6 +216,9 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_get_nr_vecs(struct block_device *); +extern struct bio *bio_map_user(struct block_device *, unsigned long, + unsigned int, int); +extern void bio_unmap_user(struct bio *, int); #ifdef CONFIG_HIGHMEM /* ===== include/linux/blk.h 1.27 vs edited ===== --- 1.27/include/linux/blk.h Wed Jun 19 03:06:24 2002 +++ edited/include/linux/blk.h Wed Oct 23 12:39:34 2002 @@ -39,32 +39,19 @@ */ extern int end_that_request_first(struct request *, int, int); +extern int end_that_request_chunk(struct request *, int, int); extern void end_that_request_last(struct request *); struct request *elv_next_request(request_queue_t *q); static inline void blkdev_dequeue_request(struct request *req) { - list_del(&req->queuelist); + BUG_ON(list_empty(&req->queuelist)); + + list_del_init(&req->queuelist); if (req->q) elv_remove_request(req->q, req); } - -#define _elv_add_request_core(q, rq, where, plug) \ - do { \ - if ((plug)) \ - blk_plug_device((q)); \ - (q)->elevator.elevator_add_req_fn((q), (rq), (where)); \ - } while (0) - -#define _elv_add_request(q, rq, back, p) do { \ - if ((back)) \ - _elv_add_request_core((q), (rq), (q)->queue_head.prev, (p)); \ - else \ - _elv_add_request_core((q), (rq), &(q)->queue_head, (p)); \ -} while (0) - -#define elv_add_request(q, rq, back) _elv_add_request((q), (rq), (back), 1) #if defined(MAJOR_NR) || defined(IDE_DRIVER) #if (MAJOR_NR != SCSI_TAPE_MAJOR) && (MAJOR_NR != OSST_MAJOR) ===== include/linux/blkdev.h 1.76 vs edited ===== --- 1.76/include/linux/blkdev.h Fri Oct 18 19:50:43 2002 +++ edited/include/linux/blkdev.h Sun Oct 27 22:06:19 2002 @@ -26,6 +26,8 @@ struct list_head queuelist; /* looking for ->queue? you must _not_ * access it directly, use * blkdev_dequeue_request! */ + int ref_count; + void *elevator_private; unsigned char cmd[16]; @@ -64,7 +66,10 @@ /* For packet commands */ unsigned int data_len; - void *data, *sense; + void *data; + + unsigned int sense_len; + void *sense; unsigned int timeout; struct completion *waiting; @@ -150,12 +155,6 @@ int max_depth; }; -/* - * Default nr free requests per queue, ll_rw_blk will scale it down - * according to available RAM at init time - */ -#define QUEUE_NR_REQUESTS 8192 - struct request_queue { /* @@ -216,9 +215,17 @@ unsigned long seg_boundary_mask; + unsigned int dma_alignment; + wait_queue_head_t queue_wait; struct blk_queue_tag *queue_tags; + + /* + * sg stuff + */ + unsigned int sg_timeout; + unsigned int sg_reserved_size; }; #define RQ_INACTIVE (-1) @@ -254,6 +261,13 @@ */ #define blk_queue_headactive(q, head_active) +/* + * q->prep_rq_fn return values + */ +#define BLKPREP_OK 0 /* serve it */ +#define BLKPREP_KILL 1 /* fatal error, kill */ +#define BLKPREP_DEFER 2 /* leave on queue */ + extern unsigned long blk_max_low_pfn, blk_max_pfn; /* @@ -268,7 +282,7 @@ #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) extern int init_emergency_isa_pool(void); -void blk_queue_bounce(request_queue_t *q, struct bio **bio); +inline void blk_queue_bounce(request_queue_t *q, struct bio **bio); #define rq_for_each_bio(bio, rq) \ if ((rq->bio)) \ @@ -339,9 +353,14 @@ extern void blk_queue_assign_lock(request_queue_t *, spinlock_t *); extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn); extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *); +extern void blk_queue_dma_alignment(request_queue_t *, int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); + +typedef void (consume_sg_fn) (request_queue_t *q, struct scatterlist *, int, void *); +extern int blk_rq_map_consume(request_queue_t *, struct request *, consume_sg_fn *, void *); + extern void blk_dump_rq_flags(struct request *, char *); extern void generic_unplug_device(void *); extern long nr_blockdev_pages(void); @@ -383,6 +402,21 @@ static inline int bdev_hardsect_size(struct block_device *bdev) { return queue_hardsect_size(bdev_get_queue(bdev)); +} + +static inline int queue_dma_alignment(request_queue_t *q) +{ + int retval = 511; + + if (q && q->dma_alignment) + retval = q->dma_alignment; + + return retval; +} + +static inline int bdev_dma_aligment(struct block_device *bdev) +{ + return queue_dma_alignment(bdev_get_queue(bdev)); } #define blk_finished_io(nsects) do { } while (0) ===== include/linux/elevator.h 1.16 vs edited ===== --- 1.16/include/linux/elevator.h Fri Oct 4 15:58:56 2002 +++ edited/include/linux/elevator.h Sat Oct 19 11:02:13 2002 @@ -40,8 +40,8 @@ /* * block elevator interface */ -extern void __elv_add_request(request_queue_t *, struct request *, - struct list_head *); +extern void elv_add_request(request_queue_t *, struct request *, int, int); +extern void __elv_add_request(request_queue_t *, struct request *, int, int); extern int elv_merge(request_queue_t *, struct list_head **, struct bio *); extern void elv_merge_requests(request_queue_t *, struct request *, struct request *); @@ -49,6 +49,9 @@ extern void elv_remove_request(request_queue_t *, struct request *); extern int elv_queue_empty(request_queue_t *); extern inline struct list_head *elv_get_sort_head(request_queue_t *, struct request *); + +#define __elv_add_request_pos(q, rq, pos) \ + (q)->elevator.elevator_add_req_fn((q), (rq), (pos)) /* * noop I/O scheduler. always merges, always inserts new request at tail ===== mm/highmem.c 1.36 vs edited ===== --- 1.36/mm/highmem.c Mon Oct 7 07:50:36 2002 +++ edited/mm/highmem.c Fri Oct 25 21:42:41 2002 @@ -366,34 +366,13 @@ return 0; } -void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig) +void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig, int bio_gfp, + mempool_t *pool) { struct page *page; struct bio *bio = NULL; - int i, rw = bio_data_dir(*bio_orig), bio_gfp; + int i, rw = bio_data_dir(*bio_orig); struct bio_vec *to, *from; - mempool_t *pool; - unsigned long pfn = q->bounce_pfn; - int gfp = q->bounce_gfp; - - BUG_ON((*bio_orig)->bi_idx); - - /* - * for non-isa bounce case, just check if the bounce pfn is equal - * to or bigger than the highest pfn in the system -- in that case, - * don't waste time iterating over bio segments - */ - if (!(gfp & GFP_DMA)) { - if (pfn >= blk_max_pfn) - return; - - bio_gfp = GFP_NOHIGHIO; - pool = page_pool; - } else { - BUG_ON(!isa_page_pool); - bio_gfp = GFP_NOIO; - pool = isa_page_pool; - } bio_for_each_segment(from, *bio_orig, i) { page = from->bv_page; @@ -401,7 +380,7 @@ /* * is destination page below bounce pfn? */ - if ((page - page_zone(page)->zone_mem_map) + (page_zone(page)->zone_start_pfn) < pfn) + if ((page - page_zone(page)->zone_mem_map) + (page_zone(page)->zone_start_pfn) < q->bounce_pfn) continue; /* @@ -412,11 +391,11 @@ to = bio->bi_io_vec + i; - to->bv_page = mempool_alloc(pool, gfp); + to->bv_page = mempool_alloc(pool, q->bounce_gfp); to->bv_len = from->bv_len; to->bv_offset = from->bv_offset; - if (rw & WRITE) { + if (rw == WRITE) { char *vto, *vfrom; vto = page_address(to->bv_page) + to->bv_offset; @@ -437,15 +416,16 @@ * pages */ bio_for_each_segment(from, *bio_orig, i) { - to = &bio->bi_io_vec[i]; + to = bio_iovec_idx(bio, i); if (!to->bv_page) { to->bv_page = from->bv_page; to->bv_len = from->bv_len; - to->bv_offset = to->bv_offset; + to->bv_offset = from->bv_offset; } } bio->bi_bdev = (*bio_orig)->bi_bdev; + bio->bi_flags |= (1 << BIO_BOUNCED); bio->bi_sector = (*bio_orig)->bi_sector; bio->bi_rw = (*bio_orig)->bi_rw; @@ -454,19 +434,48 @@ bio->bi_size = (*bio_orig)->bi_size; if (pool == page_pool) { - if (rw & WRITE) - bio->bi_end_io = bounce_end_io_write; - else + bio->bi_end_io = bounce_end_io_write; + if (rw == READ) bio->bi_end_io = bounce_end_io_read; } else { - if (rw & WRITE) - bio->bi_end_io = bounce_end_io_write_isa; - else + bio->bi_end_io = bounce_end_io_write_isa; + if (rw == READ) bio->bi_end_io = bounce_end_io_read_isa; } bio->bi_private = *bio_orig; *bio_orig = bio; +} + +inline void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig) +{ + mempool_t *pool; + int bio_gfp; + + BUG_ON((*bio_orig)->bi_idx); + + /* + * for non-isa bounce case, just check if the bounce pfn is equal + * to or bigger than the highest pfn in the system -- in that case, + * don't waste time iterating over bio segments + */ + if (!(q->bounce_gfp & GFP_DMA)) { + if (q->bounce_pfn >= blk_max_pfn) + return; + + bio_gfp = GFP_NOHIGHIO; + pool = page_pool; + } else { + BUG_ON(!isa_page_pool); + + bio_gfp = GFP_NOIO; + pool = isa_page_pool; + } + + /* + * slow path + */ + __blk_queue_bounce(q, bio_orig, bio_gfp, pool); } #if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_HIGHMEM)