diff -urN 2.3.42aa3/drivers/block/ll_rw_blk.c elevator/drivers/block/ll_rw_blk.c --- 2.3.42aa3/drivers/block/ll_rw_blk.c Wed Feb 9 03:26:25 2000 +++ elevator/drivers/block/ll_rw_blk.c Tue Feb 8 20:35:55 2000 @@ -3,6 +3,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1994, Karl Keyte: Added support for disk statistics + * Elevator latency, (C) 2000 Andrea Arcangeli SuSE */ /* @@ -148,6 +149,18 @@ return &blk_dev[major].request_queue; } +static inline int get_request_latency(elevator_t * elevator, int rw) +{ + int latency; + + if (rw != READ) + latency = elevator->write_latency; + else + latency = elevator->read_latency; + + return latency; +} + void blk_cleanup_queue(request_queue_t * q) { memset(q, 0, sizeof(*q)); @@ -209,6 +222,10 @@ */ q->use_plug = 1; q->head_active = 1; + + q->elevator.read_latency = ELEVATOR_READ_LATENCY; + q->elevator.write_latency = ELEVATOR_WRITE_LATENCY; + q->elevator.max_bomb_segments = ELEVATOR_MAX_BOMB_SEGMENTS; } /* @@ -382,6 +399,89 @@ printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); } +static inline int seek_to_not_starving_chunk(struct request ** req, int * lat) +{ + struct request * tmp = *req; + int found = 0, pos = 0; + int last_pos = 0, __lat = *lat; + + do { + if (tmp->elevator_latency <= 0) + { + *req = tmp; + found = 1; + last_pos = pos; + if (last_pos >= __lat) + break; + } + pos += tmp->nr_segments; + } while ((tmp = tmp->next)); + *lat -= last_pos; + + return found; +} + +#define elevator_starve_rest_of_queue(req) \ +do { \ + struct request * tmp = (req); \ + for ((tmp) = (tmp)->next; (tmp); (tmp) = (tmp)->next) \ + (tmp)->elevator_latency--; \ +} while (0) + +static inline void elevator_queue(struct request * req, + struct request * tmp, + int latency, + request_queue_t * q) +{ + struct request * head; + int starving; + + starving = seek_to_not_starving_chunk(&tmp, &latency); + head = tmp; + + for (;; tmp = tmp->next) + { + if ((latency -= tmp->nr_segments) <= 0) + { + tmp = head; + + if (starving) + break; + + if (q->head_active && !q->plugged) + break; + + req->next = tmp; + q->current_request = req; + goto after_link; + } + + if (!tmp->next) + break; + + { + const int after_current = IN_ORDER(tmp,req); + const int before_next = IN_ORDER(req,tmp->next); + + if (!IN_ORDER(tmp,tmp->next)) { + if (after_current || before_next) + break; + } else { + if (after_current && before_next) + break; + } + } + } + + req->next = tmp->next; + tmp->next = req; + + after_link: + req->elevator_latency = latency; + + elevator_starve_rest_of_queue(req); +} + /* * add-request adds a request to the linked list. * It disables interrupts (aquires the request spinlock) so that it can muck @@ -397,9 +497,11 @@ int major = MAJOR(req->rq_dev); struct request * tmp; unsigned long flags; + int latency; drive_stat_acct(req, req->nr_sectors, 1); - req->next = NULL; + + latency = get_request_latency(&q->elevator, req->cmd); /* * We use the goto to reduce locking complexity @@ -407,23 +509,12 @@ spin_lock_irqsave(&io_request_lock,flags); if (!(tmp = q->current_request)) { + req->next = NULL; + req->elevator_latency = latency; q->current_request = req; goto out; } - for ( ; tmp->next ; tmp = tmp->next) { - const int after_current = IN_ORDER(tmp,req); - const int before_next = IN_ORDER(req,tmp->next); - - if (!IN_ORDER(tmp,tmp->next)) { - if (after_current || before_next) - break; - } else { - if (after_current && before_next) - break; - } - } - req->next = tmp->next; - tmp->next = req; + elevator_queue(req, tmp, latency, q); /* * FIXME(eric) I don't understand why there is a need for this @@ -451,7 +542,8 @@ */ static inline void attempt_merge (request_queue_t * q, struct request *req, - int max_sectors) + int max_sectors, + int max_bomb_segments) { struct request *next = req->next; @@ -461,6 +553,8 @@ return; if (next->sem || req->cmd != next->cmd || req->rq_dev != next->rq_dev || req->nr_sectors + next->nr_sectors > max_sectors) return; + if (req->nr_segments + next->nr_segments > max_bomb_segments) + return; /* * If we are not allowed to merge these requests, then @@ -471,6 +565,8 @@ if(!(q->merge_requests_fn)(q, req, next)) return; + if (next->elevator_latency < req->elevator_latency) + req->elevator_latency = next->elevator_latency; req->bhtail->b_reqnext = next->bh; req->bhtail = next->bhtail; req->nr_sectors += next->nr_sectors; @@ -479,15 +575,31 @@ wake_up (&wait_for_request); } +#define read_pendings(req) \ +({ \ + int __ret = 0; \ + struct request * tmp = (req); \ + do { \ + if (tmp->cmd == READ) \ + { \ + __ret = 1; \ + break; \ + } \ + tmp = tmp->next; \ + } while (tmp); \ + __ret; \ +}) + static void __make_request(request_queue_t * q, int major, int rw, struct buffer_head * bh) { unsigned int sector, count; - struct request * req; - int rw_ahead, max_req, max_sectors; + struct request * req, * prev; + int rw_ahead, max_req, max_sectors, max_bomb_segments = INT_MAX; unsigned long flags; + int latency, starving; count = bh->b_size >> 9; sector = bh->b_rsector; @@ -575,6 +687,8 @@ */ max_sectors = get_max_sectors(bh->b_rdev); + latency = get_request_latency(&q->elevator, rw); + /* * Now we acquire the request spinlock, we have to be mega careful * not to schedule or do something nonatomic @@ -590,6 +704,10 @@ goto get_rq; } + /* avoid write-bombs to not hurt iteractiveness of reads */ + if (rw != READ && read_pendings(req)) + max_bomb_segments = q->elevator.max_bomb_segments; + if (q->head_active && !q->plugged) { /* * The scsi disk and cdrom drivers completely remove the request @@ -603,8 +721,11 @@ */ if ((req = req->next) == NULL) goto get_rq; + latency -= req->nr_segments; } + starving = seek_to_not_starving_chunk(&req, &latency); + prev = NULL; do { if (req->sem) continue; @@ -614,8 +735,12 @@ continue; if (req->rq_dev != bh->b_rdev) continue; + if (req->nr_segments > max_bomb_segments) + continue; /* Can we add it to the end of this request? */ if (req->sector + req->nr_sectors == sector) { + if (latency - req->nr_segments < 0) + break; /* * The merge_fn is a more advanced way * of accomplishing the same task. Instead @@ -634,10 +759,18 @@ req->bhtail = bh; req->nr_sectors += count; drive_stat_acct(req, count, 0); + + /* latency stuff */ + if ((latency -= req->nr_segments) < req->elevator_latency) + req->elevator_latency = latency; + elevator_starve_rest_of_queue(req); + /* Can we now merge this req with the next? */ - attempt_merge(q, req, max_sectors); + attempt_merge(q, req, max_sectors, max_bomb_segments); /* or to the beginning? */ } else if (req->sector - count == sector) { + if (!prev && starving) + continue; /* * The merge_fn is a more advanced way * of accomplishing the same task. Instead @@ -659,13 +792,22 @@ req->sector = sector; req->nr_sectors += count; drive_stat_acct(req, count, 0); + + /* latency stuff */ + if (latency < --req->elevator_latency) + req->elevator_latency = latency; + elevator_starve_rest_of_queue(req); + + if (prev) + attempt_merge(q, prev, max_sectors, max_bomb_segments); } else continue; spin_unlock_irqrestore(&io_request_lock,flags); return; - } while ((req = req->next) != NULL); + } while (prev = req, + (latency -= req->nr_segments) >= 0 && (req = req->next) != NULL); /* find an unused request. */ get_rq: @@ -692,7 +834,6 @@ req->sem = NULL; req->bh = bh; req->bhtail = bh; - req->next = NULL; add_request(q, req); return; @@ -900,7 +1041,6 @@ req = all_requests + NR_REQUEST; while (--req >= all_requests) { req->rq_status = RQ_INACTIVE; - req->next = NULL; } memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); diff -urN 2.3.42aa3/include/linux/blkdev.h elevator/include/linux/blkdev.h --- 2.3.42aa3/include/linux/blkdev.h Wed Feb 9 03:26:25 2000 +++ elevator/include/linux/blkdev.h Wed Feb 9 03:16:18 2000 @@ -34,6 +34,7 @@ struct buffer_head * bh; struct buffer_head * bhtail; struct request * next; + int elevator_latency; }; typedef struct request_queue request_queue_t; @@ -46,6 +47,13 @@ typedef void (request_fn_proc) (request_queue_t *); typedef request_queue_t * (queue_proc) (kdev_t dev); +typedef struct elevator_s +{ + int read_latency; + int write_latency; + int max_bomb_segments; +} elevator_t; + struct request_queue { struct request * current_request; @@ -78,6 +86,8 @@ * this queue or not. */ char use_plug; + + elevator_t elevator; }; struct blk_dev_struct { @@ -144,5 +154,9 @@ /* read-ahead in pages.. */ #define MAX_READAHEAD 31 #define MIN_READAHEAD 3 + +#define ELEVATOR_READ_LATENCY (NR_REQUEST>>1) +#define ELEVATOR_WRITE_LATENCY (NR_REQUEST<<5) +#define ELEVATOR_MAX_BOMB_SEGMENTS 4 #endif