Binary files ref/ID and elevator/ID differ diff -urN ref/drivers/block/ll_rw_blk.c elevator/drivers/block/ll_rw_blk.c --- ref/drivers/block/ll_rw_blk.c Mon Jan 31 21:28:16 2000 +++ elevator/drivers/block/ll_rw_blk.c Tue Feb 1 19:18:04 2000 @@ -3,6 +3,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1994, Karl Keyte: Added support for disk statistics + * Elevator latency, (C) 2000 Andrea Arcangeli SuSE */ /* @@ -299,6 +300,129 @@ printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); } +static inline int seek_to_not_starving_chunk(struct request ** req, int * lat) +{ + struct request * tmp = *req; + int found = 0, pos = 0; + int last_pos = 0; + + while (tmp) + { + if (tmp->elevator_latency <= 0) + { + *req = tmp; + found = 1; + last_pos = pos; + } + tmp = tmp->next; + pos++; + } + *lat -= last_pos; + + return found; +} + +#define CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY \ + case IDE0_MAJOR: /* same as HD_MAJOR */ \ + case IDE1_MAJOR: \ + case FLOPPY_MAJOR: \ + case IDE2_MAJOR: \ + case IDE3_MAJOR: \ + case IDE4_MAJOR: \ + case IDE5_MAJOR: \ + case ACSI_MAJOR: \ + case MFM_ACORN_MAJOR: \ + case MDISK_MAJOR: \ + case DASD_MAJOR: +#define CASE_COALESCE_ALSO_FIRST_REQUEST \ + case SCSI_DISK0_MAJOR: \ + case SCSI_DISK1_MAJOR: \ + case SCSI_DISK2_MAJOR: \ + case SCSI_DISK3_MAJOR: \ + case SCSI_DISK4_MAJOR: \ + case SCSI_DISK5_MAJOR: \ + case SCSI_DISK6_MAJOR: \ + case SCSI_DISK7_MAJOR: \ + case SCSI_CDROM_MAJOR: \ + case DAC960_MAJOR+0: \ + case DAC960_MAJOR+1: \ + case DAC960_MAJOR+2: \ + case DAC960_MAJOR+3: \ + case DAC960_MAJOR+4: \ + case DAC960_MAJOR+5: \ + case DAC960_MAJOR+6: \ + case DAC960_MAJOR+7: \ + case COMPAQ_SMART2_MAJOR+0: \ + case COMPAQ_SMART2_MAJOR+1: \ + case COMPAQ_SMART2_MAJOR+2: \ + case COMPAQ_SMART2_MAJOR+3: \ + case COMPAQ_SMART2_MAJOR+4: \ + case COMPAQ_SMART2_MAJOR+5: \ + case COMPAQ_SMART2_MAJOR+6: \ + case COMPAQ_SMART2_MAJOR+7: + +#define elevator_starve_rest_of_queue(req) \ +do { \ + for ((req) = (req)->next; (req); (req) = (req)->next) \ + (req)->elevator_latency--; \ +} while (0) + +static inline void elevator_queue(struct request * req, + struct request * tmp, + int latency, + struct request ** current_request, + int major) +{ + struct request * head; + int starving; + + starving = seek_to_not_starving_chunk(&tmp, &latency); + head = tmp; + + for ( ; tmp->next ; tmp = tmp->next) { + const int after_current = IN_ORDER(tmp,req); + const int before_next = IN_ORDER(req,tmp->next); + + if (!IN_ORDER(tmp,tmp->next)) { + if (after_current || before_next) + break; + } else { + if (after_current && before_next) + break; + } + + if (--latency <= 0) + { + tmp = head; + + if (starving) + break; + + switch (major) + { + CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY + if (head == blk_dev[major].current_request) + default: + goto link; + CASE_COALESCE_ALSO_FIRST_REQUEST + } + + req->next = tmp; + *current_request = req; + goto after_link; + } + } + + link: + req->next = tmp->next; + tmp->next = req; + + after_link: + req->elevator_latency = latency; + + elevator_starve_rest_of_queue(req); +} + /* * add-request adds a request to the linked list. * It disables interrupts (aquires the request spinlock) so that it can muck @@ -317,6 +441,7 @@ short disk_index; unsigned long flags; int queue_new_request = 0; + int latency; switch (major) { case DAC960_MAJOR+0: @@ -341,7 +466,10 @@ break; } - req->next = NULL; + if (req->cmd != READ) + latency = dev->elevator.write_latency; + else + latency = dev->elevator.read_latency; /* * We use the goto to reduce locking complexity @@ -352,28 +480,17 @@ if (req->bh) mark_buffer_clean(req->bh); if (!(tmp = *current_request)) { + req->next = NULL; + req->elevator_latency = latency; *current_request = req; if (dev->current_request != &dev->plug) queue_new_request = 1; goto out; } - for ( ; tmp->next ; tmp = tmp->next) { - const int after_current = IN_ORDER(tmp,req); - const int before_next = IN_ORDER(req,tmp->next); - - if (!IN_ORDER(tmp,tmp->next)) { - if (after_current || before_next) - break; - } else { - if (after_current && before_next) - break; - } - } - req->next = tmp->next; - tmp->next = req; + elevator_queue(req, tmp, latency, current_request, major); /* for SCSI devices, call request_fn unconditionally */ - if (scsi_blk_major(major) || + if ((0 && scsi_blk_major(major)) || (major >= DAC960_MAJOR+0 && major <= DAC960_MAJOR+7) || (major >= COMPAQ_SMART2_MAJOR+0 && major <= COMPAQ_SMART2_MAJOR+7)) @@ -407,6 +524,8 @@ total_segments--; if (total_segments > max_segments) return; + if (next->elevator_latency < req->elevator_latency) + req->elevator_latency = next->elevator_latency; req->bhtail->b_reqnext = next->bh; req->bhtail = next->bhtail; req->nr_sectors += next->nr_sectors; @@ -416,12 +535,29 @@ wake_up (&wait_for_request); } +#define read_pendings(req) \ +({ \ + int __ret = 0; \ + struct request * tmp = (req); \ + while (tmp) \ + { \ + if (tmp->cmd == READ) \ + { \ + __ret = 1; \ + break; \ + } \ + tmp = tmp->next; \ + } \ + __ret; \ +}) + void make_request(int major, int rw, struct buffer_head * bh) { unsigned int sector, count; struct request * req; int rw_ahead, max_req, max_sectors, max_segments; unsigned long flags; + int latency; count = bh->b_size >> 9; sector = bh->b_rsector; @@ -498,6 +634,11 @@ max_sectors = get_max_sectors(bh->b_rdev); max_segments = get_max_segments(bh->b_rdev); + if (rw != READ) + latency = blk_dev[major].elevator.write_latency; + else + latency = blk_dev[major].elevator.read_latency; + /* * Now we acquire the request spinlock, we have to be mega careful * not to schedule or do something nonatomic @@ -512,18 +653,9 @@ #endif major != DDV_MAJOR && major != NBD_MAJOR) plug_device(blk_dev + major); /* is atomic */ - } else switch (major) { - case IDE0_MAJOR: /* same as HD_MAJOR */ - case IDE1_MAJOR: - case FLOPPY_MAJOR: - case IDE2_MAJOR: - case IDE3_MAJOR: - case IDE4_MAJOR: - case IDE5_MAJOR: - case ACSI_MAJOR: - case MFM_ACORN_MAJOR: - case MDISK_MAJOR: - case DASD_MAJOR: + } else if (rw == READ || !read_pendings(req)) switch (major) { + struct request * prev; + CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY /* * The scsi disk and cdrom drivers completely remove the request * from the queue when they start processing an entry. For this @@ -534,38 +666,24 @@ * entry may be busy being processed and we thus can't change it. */ if (req == blk_dev[major].current_request) + { req = req->next; + latency--; + } if (!req) break; /* fall through */ + CASE_COALESCE_ALSO_FIRST_REQUEST - case SCSI_DISK0_MAJOR: - case SCSI_DISK1_MAJOR: - case SCSI_DISK2_MAJOR: - case SCSI_DISK3_MAJOR: - case SCSI_DISK4_MAJOR: - case SCSI_DISK5_MAJOR: - case SCSI_DISK6_MAJOR: - case SCSI_DISK7_MAJOR: - case SCSI_CDROM_MAJOR: - case DAC960_MAJOR+0: - case DAC960_MAJOR+1: - case DAC960_MAJOR+2: - case DAC960_MAJOR+3: - case DAC960_MAJOR+4: - case DAC960_MAJOR+5: - case DAC960_MAJOR+6: - case DAC960_MAJOR+7: - case COMPAQ_SMART2_MAJOR+0: - case COMPAQ_SMART2_MAJOR+1: - case COMPAQ_SMART2_MAJOR+2: - case COMPAQ_SMART2_MAJOR+3: - case COMPAQ_SMART2_MAJOR+4: - case COMPAQ_SMART2_MAJOR+5: - case COMPAQ_SMART2_MAJOR+6: - case COMPAQ_SMART2_MAJOR+7: - - do { + if (seek_to_not_starving_chunk(&req, &latency)) + { + req = req->next; + latency--; + } + for (prev = NULL; + req && latency >= 0; + prev = req, req = req->next, latency--) + { if (req->sem) continue; if (req->cmd != rw) @@ -582,11 +700,18 @@ req->nr_segments++; else continue; } + if (!latency) + continue; req->bhtail->b_reqnext = bh; req->bhtail = bh; req->nr_sectors += count; /* Can we now merge this req with the next? */ attempt_merge(req, max_sectors, max_segments); + + /* latency stuff */ + if (--latency < req->elevator_latency) + req->elevator_latency = latency; + elevator_starve_rest_of_queue(req); /* or to the beginning? */ } else if (req->sector - count == sector) { if (bh->b_data + bh->b_size @@ -601,6 +726,15 @@ req->current_nr_sectors = count; req->sector = sector; req->nr_sectors += count; + /* Can we now merge this req with the prev? */ + if (prev) + attempt_merge(prev, max_sectors, + max_segments); + + /* latency stuff */ + if (latency < --req->elevator_latency) + req->elevator_latency = latency; + elevator_starve_rest_of_queue(req); } else continue; @@ -608,7 +742,7 @@ spin_unlock_irqrestore(&io_request_lock,flags); return; - } while ((req = req->next) != NULL); + } } /* find an unused request. */ @@ -825,6 +959,8 @@ dev->plug_tq.sync = 0; dev->plug_tq.routine = &unplug_device; dev->plug_tq.data = dev; + dev->elevator.read_latency = ELEVATOR_READ_LATENCY; + dev->elevator.write_latency = ELEVATOR_WRITE_LATENCY; } req = all_requests + NR_REQUEST; diff -urN ref/include/linux/blkdev.h elevator/include/linux/blkdev.h --- ref/include/linux/blkdev.h Mon Jan 31 21:28:16 2000 +++ elevator/include/linux/blkdev.h Tue Feb 1 19:20:12 2000 @@ -32,10 +32,16 @@ struct buffer_head * bh; struct buffer_head * bhtail; struct request * next; + int elevator_latency; }; typedef void (request_fn_proc) (void); typedef struct request ** (queue_proc) (kdev_t dev); +typedef struct elevator_s +{ + int read_latency; + int write_latency; +} elevator_t; struct blk_dev_struct { request_fn_proc *request_fn; @@ -47,6 +53,7 @@ struct request *current_request; struct request plug; struct tq_struct plug_tq; + elevator_t elevator; }; struct sec_size { @@ -90,5 +97,8 @@ #define MAX_READAHEAD PageAlignSize(4096*31) #define MIN_READAHEAD PageAlignSize(4096*3) #endif + +#define ELEVATOR_READ_LATENCY (NR_REQUEST>>2) +#define ELEVATOR_WRITE_LATENCY (NR_REQUEST<<2) #endif