diff -uNr -Xdontdiff ../master/linux-2.4.18-pre4/drivers/block/genhd.c linux/drivers/block/genhd.c --- ../master/linux-2.4.18-pre4/drivers/block/genhd.c Thu Oct 25 19:05:40 2001 +++ linux/drivers/block/genhd.c Mon Jan 21 21:41:24 2002 @@ -128,20 +135,39 @@ get_partition_list(char *page, char **start, off_t offset, int count) { struct gendisk *gp; + struct hd_struct *hd; char buf[64]; int len, n; - len = sprintf(page, "major minor #blocks name\n\n"); + len = sprintf(page, "major minor #blocks name " + "rio rmerge rsect ruse wio wmerge " + "wsect wuse running use aveq\n\n"); + + read_lock(&gendisk_lock); for (gp = gendisk_head; gp; gp = gp->next) { for (n = 0; n < (gp->nr_real << gp->minor_shift); n++) { if (gp->part[n].nr_sects == 0) continue; - len += snprintf(page + len, 63, - "%4d %4d %10d %s\n", + hd = &gp->part[n]; disk_round_stats(hd); + len += sprintf(page + len, + "%4d %4d %10d %s " + "%d %d %d %d %d %d %d %d %d %d %d\n", gp->major, n, gp->sizes[n], - disk_name(gp, n, buf)); + disk_name(gp, n, buf), + hd->rd_ios, hd->rd_merges, + hd->rd_sectors, +#define MSEC(x) ((x) * 1000 / HZ) + MSEC(hd->rd_ticks), + hd->wr_ios, hd->wr_merges, + hd->wr_sectors, + MSEC(hd->wr_ticks), + hd->ios_in_flight, + MSEC(hd->io_ticks), + MSEC(hd->aveq)); +#undef MSEC + if (len < offset) offset -= len, len = 0; else if (len >= offset + count) diff -uNr -Xdontdiff ../master/linux-2.4.18-pre4/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- ../master/linux-2.4.18-pre4/drivers/block/ll_rw_blk.c Mon Jan 21 21:24:41 2002 +++ linux/drivers/block/ll_rw_blk.c Mon Jan 21 21:31:49 2002 @@ -518,6 +518,121 @@ printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); } +/* Return up to two hd_structs on which to do IO accounting for a given + * request. On a partitioned device, we want to account both against + * the partition and against the whole disk. */ +static void locate_hd_struct(struct request *req, + struct hd_struct **hd1, + struct hd_struct **hd2) +{ + struct gendisk *gd; + + *hd1 = NULL; + *hd2 = NULL; + + gd = get_gendisk(req->rq_dev); + if (gd && gd->part) { + /* Mask out the partition bits: account for the entire disk */ + int devnr = MINOR(req->rq_dev) >> gd->minor_shift; + int whole_minor = devnr << gd->minor_shift; + *hd1 = &gd->part[whole_minor]; + if (whole_minor != MINOR(req->rq_dev)) + *hd2= &gd->part[MINOR(req->rq_dev)]; + } +} + +/* Round off the performance stats on an hd_struct. The average IO + * queue length and utilisation statistics are maintained by observing + * the current state of the queue length and the amount of time it has + * been in this state for. Normally, that accounting is done on IO + * completion, but that can result in more than a second's worth of IO + * being accounted for within any one second, leading to >100% + * utilisation. To deal with that, we do a round-off before returning + * the results when reading /proc/partitions, accounting immediately for + * all queue usage up to the current jiffies and restarting the counters + * again. */ +void disk_round_stats(struct hd_struct *hd) +{ + unsigned long now = jiffies; + + hd->aveq += (hd->ios_in_flight * (jiffies - hd->last_queue_change)); + hd->last_queue_change = now; + + if (hd->ios_in_flight) + hd->io_ticks += (now - hd->last_idle_time); + hd->last_idle_time = now; +} + + +static inline void down_ios(struct hd_struct *hd) +{ + disk_round_stats(hd); + --hd->ios_in_flight; +} + +static inline void up_ios(struct hd_struct *hd) +{ + disk_round_stats(hd); + ++hd->ios_in_flight; +} + +static void account_io_start(struct hd_struct *hd, struct request *req, + int merge, int sectors) +{ + switch (req->cmd) { + case READ: + if (merge) + hd->rd_merges++; + hd->rd_sectors += sectors; + break; + case WRITE: + if (merge) + hd->wr_merges++; + hd->wr_sectors += sectors; + break; + default: + } + if (!merge) + up_ios(hd); +} + +static void account_io_end(struct hd_struct *hd, struct request *req) +{ + unsigned long duration = jiffies - req->start_time; + switch (req->cmd) { + case READ: + hd->rd_ticks += duration; + hd->rd_ios++; + break; + case WRITE: + hd->wr_ticks += duration; + hd->wr_ios++; + break; + default: + } + down_ios(hd); +} + +void req_new_io(struct request *req, int merge, int sectors) +{ + struct hd_struct *hd1, *hd2; + locate_hd_struct(req, &hd1, &hd2); + if (hd1) + account_io_start(hd1, req, merge, sectors); + if (hd2) + account_io_start(hd2, req, merge, sectors); +} + +void req_finished_io(struct request *req) +{ + struct hd_struct *hd1, *hd2; + locate_hd_struct(req, &hd1, &hd2); + if (hd1) + account_io_end(hd1, req); + if (hd2) + account_io_end(hd2, req); +} + /* * add-request adds a request to the linked list. * io_request_lock is held and interrupts disabled, as we muck with the @@ -574,6 +689,7 @@ int max_segments) { struct request *next; + struct hd_struct *hd1, *hd2; next = blkdev_next_request(req); if (req->sector + req->nr_sectors != next->sector) @@ -597,6 +713,15 @@ req->bhtail = next->bhtail; req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; list_del(&next->queue); + + /* One last thing: we have removed a request, so we now have one + less expected IO to complete for accounting purposes. */ + + locate_hd_struct(req, &hd1, &hd2); + if (hd1) + down_ios(hd1); + if (hd2) + down_ios(hd2); blkdev_release_request(next); } @@ -704,6 +829,7 @@ req->nr_sectors = req->hard_nr_sectors += count; blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); + req_new_io(req, 1, count); attempt_back_merge(q, req, max_sectors, max_segments); goto out; @@ -721,6 +847,7 @@ req->nr_sectors = req->hard_nr_sectors += count; blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); + req_new_io(req, 1, count); attempt_front_merge(q, head, req, max_sectors, max_segments); goto out; @@ -774,6 +901,8 @@ req->bh = bh; req->bhtail = bh; req->rq_dev = bh->b_rdev; + req->start_time = jiffies; + req_new_io(req, 0, count); blk_started_io(count); add_request(q, req, insert_here); out: @@ -1086,6 +1215,7 @@ { if (req->waiting != NULL) complete(req->waiting); + req_finished_io(req); blkdev_release_request(req); } @@ -1244,4 +1374,5 @@ EXPORT_SYMBOL(blk_queue_make_request); EXPORT_SYMBOL(generic_make_request); EXPORT_SYMBOL(blkdev_release_request); +EXPORT_SYMBOL(req_finished_io); EXPORT_SYMBOL(generic_unplug_device); diff -uNr -Xdontdiff ../master/linux-2.4.18-pre4/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c --- ../master/linux-2.4.18-pre4/drivers/scsi/scsi_lib.c Thu Oct 25 19:05:46 2001 +++ linux/drivers/scsi/scsi_lib.c Mon Jan 21 21:26:39 2002 @@ -426,6 +426,7 @@ if (req->waiting != NULL) { complete(req->waiting); } + req_finished_io(req); add_blkdev_randomness(MAJOR(req->rq_dev)); SDpnt = SCpnt->device; diff -uNr -Xdontdiff ../master/linux-2.4.18-pre4/include/linux/blkdev.h linux/include/linux/blkdev.h --- ../master/linux-2.4.18-pre4/include/linux/blkdev.h Mon Nov 26 14:20:28 2001 +++ linux/include/linux/blkdev.h Mon Jan 21 21:26:39 2002 @@ -30,6 +30,7 @@ kdev_t rq_dev; int cmd; /* READ or WRITE */ int errors; + unsigned long start_time; unsigned long sector; unsigned long nr_sectors; unsigned long hard_sector, hard_nr_sectors; diff -uNr -Xdontdiff ../master/linux-2.4.18-pre4/include/linux/genhd.h linux/include/linux/genhd.h --- ../master/linux-2.4.18-pre4/include/linux/genhd.h Thu Oct 25 19:05:48 2001 +++ linux/include/linux/genhd.h Mon Jan 21 21:29:23 2002 @@ -62,6 +62,22 @@ unsigned long nr_sects; devfs_handle_t de; /* primary (master) devfs entry */ int number; /* stupid old code wastes space */ + + /* Performance stats: */ + unsigned int ios_in_flight; + unsigned int io_ticks; + unsigned int last_idle_time; + unsigned int last_queue_change; + unsigned int aveq; + + unsigned int rd_ios; + unsigned int rd_merges; + unsigned int rd_ticks; + unsigned int rd_sectors; + unsigned int wr_ios; + unsigned int wr_merges; + unsigned int wr_ticks; + unsigned int wr_sectors; }; #define GENHD_FL_REMOVABLE 1 @@ -241,6 +257,19 @@ char *disk_name (struct gendisk *hd, int minor, char *buf); +/* + * disk_round_stats is used to round off the IO statistics for a disk + * for a complete clock tick. + */ +void disk_round_stats(struct hd_struct *hd); + +/* + * Account for the completion of an IO request (used by drivers which + * bypass the normal end_request processing) + */ +struct request; +void req_finished_io(struct request *); + extern void devfs_register_partitions (struct gendisk *dev, int minor, int unregister);