diff -urN 2.3.42/drivers/block/ll_rw_blk.c elevator/drivers/block/ll_rw_blk.c --- 2.3.42/drivers/block/ll_rw_blk.c Sun Jan 30 15:43:37 2000 +++ elevator/drivers/block/ll_rw_blk.c Sat Feb 5 21:57:31 2000 @@ -3,6 +3,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1994, Karl Keyte: Added support for disk statistics + * Elevator latency, (C) 2000 Andrea Arcangeli SuSE */ /* @@ -140,6 +141,18 @@ return &blk_dev[major].request_queue; } +static inline int get_request_latency(elevator_t * elevator, int rw) +{ + int latency; + + if (rw != READ) + latency = elevator->write_latency; + else + latency = elevator->read_latency; + + return latency; +} + void blk_cleanup_queue(request_queue_t * q) { memset(q, 0, sizeof(*q)); @@ -201,6 +214,9 @@ */ q->use_plug = 1; q->head_active = 1; + + q->elevator.read_latency = ELEVATOR_READ_LATENCY; + q->elevator.write_latency = ELEVATOR_WRITE_LATENCY; } /* @@ -374,6 +390,91 @@ printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); } +static inline int seek_to_not_starving_chunk(struct request ** req, int * lat) +{ + struct request * tmp = *req; + int found = 0, pos = 0; + int last_pos = 0, __lat = *lat; + + do { + if (tmp->elevator_latency <= 0) + { + *req = tmp; + found = 1; + last_pos = pos; + if (last_pos >= __lat) + break; + } + pos++; + } while ((tmp = tmp->next)); + *lat -= last_pos; + + return found; +} + +#define elevator_starve_rest_of_queue(req) \ +do { \ + for ((req) = (req)->next; (req); (req) = (req)->next) \ + (req)->elevator_latency--; \ +} while (0) + +static inline void elevator_queue(struct request * req, + struct request * tmp, + int latency, + request_queue_t * q, + int major) +{ + struct request * head; + int starving; + + starving = seek_to_not_starving_chunk(&tmp, &latency); + head = tmp; + + for (;;) + { + if (latency-- <= 0) + { + tmp = head; + + if (starving) + break; + + if (q->head_active && !q->plugged) + break; + + req->next = tmp; + q->current_request = req; + goto after_link; + } + + if (!tmp->next) + break; + + { + const int after_current = IN_ORDER(tmp,req); + const int before_next = IN_ORDER(req,tmp->next); + + if (!IN_ORDER(tmp,tmp->next)) { + if (after_current || before_next) + break; + } else { + if (after_current && before_next) + break; + } + } + + tmp = tmp->next; + } + + req->next = tmp->next; + tmp->next = req; + + after_link: + req->elevator_latency = latency; + + elevator_starve_rest_of_queue(req); +} + /* * add-request adds a request to the linked list. * It disables interrupts (aquires the request spinlock) so that it can muck @@ -389,9 +490,11 @@ int major = MAJOR(req->rq_dev); struct request * tmp; unsigned long flags; + int latency; drive_stat_acct(req, req->nr_sectors, 1); - req->next = NULL; + + latency = get_request_latency(&q->elevator, req->cmd); /* * We use the goto to reduce locking complexity @@ -399,23 +502,12 @@ spin_lock_irqsave(&io_request_lock,flags); if (!(tmp = q->current_request)) { + req->next = NULL; + req->elevator_latency = latency; q->current_request = req; goto out; } - for ( ; tmp->next ; tmp = tmp->next) { - const int after_current = IN_ORDER(tmp,req); - const int before_next = IN_ORDER(req,tmp->next); - - if (!IN_ORDER(tmp,tmp->next)) { - if (after_current || before_next) - break; - } else { - if (after_current && before_next) - break; - } - } - req->next = tmp->next; - tmp->next = req; + elevator_queue(req, tmp, latency, q, major); /* * FIXME(eric) I don't understand why there is a need for this @@ -463,6 +555,8 @@ if(!(q->merge_requests_fn)(q, req, next)) return; + if (next->elevator_latency < req->elevator_latency) + req->elevator_latency = next->elevator_latency; req->bhtail->b_reqnext = next->bh; req->bhtail = next->bhtail; req->nr_sectors += next->nr_sectors; @@ -471,15 +565,32 @@ wake_up (&wait_for_request); } +#define read_pendings(req) \ +({ \ + int __ret = 0; \ + struct request * tmp = (req); \ + while (tmp) \ + { \ + if (tmp->cmd == READ) \ + { \ + __ret = 1; \ + break; \ + } \ + tmp = tmp->next; \ + } \ + __ret; \ +}) + static void __make_request(request_queue_t * q, int major, int rw, struct buffer_head * bh) { unsigned int sector, count; - struct request * req; + struct request * req, * prev; int rw_ahead, max_req, max_sectors; unsigned long flags; + int latency, starving; count = bh->b_size >> 9; sector = bh->b_rsector; @@ -571,6 +682,8 @@ */ max_sectors = get_max_sectors(bh->b_rdev); + latency = get_request_latency(&q->elevator, rw); + /* * Now we acquire the request spinlock, we have to be mega careful * not to schedule or do something nonatomic @@ -586,6 +699,10 @@ goto get_rq; } + /* avoid write-bombs to not hurt iteractiveness of reads */ + if (rw != READ && read_pendings(req)) + goto get_rq; + if (q->head_active && !q->plugged) { /* * The scsi disk and cdrom drivers completely remove the request @@ -599,8 +716,11 @@ */ if ((req = req->next) == NULL) goto get_rq; + latency--; } + starving = seek_to_not_starving_chunk(&req, &latency); + prev = NULL; do { if (req->sem) continue; @@ -626,14 +746,23 @@ */ if(!(q->merge_fn)(q, req, bh)) continue; + if (latency-1 <= 0) + break; req->bhtail->b_reqnext = bh; req->bhtail = bh; req->nr_sectors += count; drive_stat_acct(req, count, 0); /* Can we now merge this req with the next? */ attempt_merge(q, req, max_sectors); + + /* latency stuff */ + if (--latency < req->elevator_latency) + req->elevator_latency = latency; + elevator_starve_rest_of_queue(req); /* or to the beginning? */ } else if (req->sector - count == sector) { + if (!prev && starving) + continue; /* * The merge_fn is a more advanced way * of accomplishing the same task. Instead @@ -655,13 +784,21 @@ req->sector = sector; req->nr_sectors += count; drive_stat_acct(req, count, 0); + /* Can we now merge this req with the prev? */ + if (prev) + attempt_merge(q, prev, max_sectors); + + /* latency stuff */ + if (latency < --req->elevator_latency) + req->elevator_latency = latency; + elevator_starve_rest_of_queue(req); } else continue; spin_unlock_irqrestore(&io_request_lock,flags); return; - } while ((req = req->next) != NULL); + } while (prev = req, (req = req->next) != NULL && --latency >= 0); /* find an unused request. */ get_rq: @@ -688,7 +825,6 @@ req->sem = NULL; req->bh = bh; req->bhtail = bh; - req->next = NULL; add_request(q, req); return; @@ -869,7 +1005,6 @@ req = all_requests + NR_REQUEST; while (--req >= all_requests) { req->rq_status = RQ_INACTIVE; - req->next = NULL; } memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); diff -urN 2.3.42/include/linux/blkdev.h elevator/include/linux/blkdev.h --- 2.3.42/include/linux/blkdev.h Fri Feb 4 20:23:49 2000 +++ elevator/include/linux/blkdev.h Sat Feb 5 21:57:31 2000 @@ -34,6 +34,7 @@ struct buffer_head * bh; struct buffer_head * bhtail; struct request * next; + int elevator_latency; }; typedef struct request_queue request_queue_t; @@ -46,6 +47,12 @@ typedef void (request_fn_proc) (request_queue_t *); typedef request_queue_t * (queue_proc) (kdev_t dev); +typedef struct elevator_s +{ + int read_latency; + int write_latency; +} elevator_t; + struct request_queue { struct request * current_request; @@ -78,6 +85,8 @@ * this queue or not. */ char use_plug; + + elevator_t elevator; }; struct blk_dev_struct { @@ -144,5 +153,8 @@ /* read-ahead in pages.. */ #define MAX_READAHEAD 31 #define MIN_READAHEAD 3 + +#define ELEVATOR_READ_LATENCY (NR_REQUEST>>2) +#define ELEVATOR_WRITE_LATENCY (NR_REQUEST<<2) #endif