diff -urN 2.2.14/drivers/block/ll_rw_blk.c elevator/drivers/block/ll_rw_blk.c --- 2.2.14/drivers/block/ll_rw_blk.c Fri Jan 7 18:19:10 2000 +++ elevator/drivers/block/ll_rw_blk.c Sun Feb 20 18:44:10 2000 @@ -3,6 +3,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 1994, Karl Keyte: Added support for disk statistics + * Elevator latency, (C) 2000 Andrea Arcangeli SuSE */ /* @@ -142,6 +143,18 @@ return &blk_dev[major].current_request; } +static inline int get_request_latency(elevator_t * elevator, int rw) +{ + int latency; + + if (rw != READ) + latency = elevator->write_latency; + else + latency = elevator->read_latency; + + return latency; +} + /* * remove the plug and let it rip.. */ @@ -291,6 +304,139 @@ printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n"); } +static inline int seek_to_not_starving_chunk(struct request ** req, int * lat) +{ + struct request * tmp = *req; + int found = 0, pos = 0; + int last_pos = 0, __lat = *lat; + + do { + if (tmp->elevator_latency <= 0) + { + *req = tmp; + found = 1; + last_pos = pos; + if (last_pos >= __lat) + break; + } + pos += tmp->nr_segments; + } while ((tmp = tmp->next)); + *lat -= last_pos; + + return found; +} + +#define CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY \ + case IDE0_MAJOR: /* same as HD_MAJOR */ \ + case IDE1_MAJOR: \ + case FLOPPY_MAJOR: \ + case IDE2_MAJOR: \ + case IDE3_MAJOR: \ + case IDE4_MAJOR: \ + case IDE5_MAJOR: \ + case ACSI_MAJOR: \ + case MFM_ACORN_MAJOR: \ + case MDISK_MAJOR: \ + case DASD_MAJOR: +#define CASE_COALESCE_ALSO_FIRST_REQUEST \ + case SCSI_DISK0_MAJOR: \ + case SCSI_DISK1_MAJOR: \ + case SCSI_DISK2_MAJOR: \ + case SCSI_DISK3_MAJOR: \ + case SCSI_DISK4_MAJOR: \ + case SCSI_DISK5_MAJOR: \ + case SCSI_DISK6_MAJOR: \ + case SCSI_DISK7_MAJOR: \ + case SCSI_CDROM_MAJOR: \ + case DAC960_MAJOR+0: \ + case DAC960_MAJOR+1: \ + case DAC960_MAJOR+2: \ + case DAC960_MAJOR+3: \ + case DAC960_MAJOR+4: \ + case DAC960_MAJOR+5: \ + case DAC960_MAJOR+6: \ + case DAC960_MAJOR+7: \ + case COMPAQ_SMART2_MAJOR+0: \ + case COMPAQ_SMART2_MAJOR+1: \ + case COMPAQ_SMART2_MAJOR+2: \ + case COMPAQ_SMART2_MAJOR+3: \ + case COMPAQ_SMART2_MAJOR+4: \ + case COMPAQ_SMART2_MAJOR+5: \ + case COMPAQ_SMART2_MAJOR+6: \ + case COMPAQ_SMART2_MAJOR+7: + +#define elevator_starve_rest_of_queue(req) \ +do { \ + struct request * tmp = (req); \ + for ((tmp) = (tmp)->next; (tmp); (tmp) = (tmp)->next) \ + (tmp)->elevator_latency--; \ +} while (0) + +static inline void elevator_queue(struct request * req, + struct request * tmp, + int latency, + struct blk_dev_struct * dev, + struct request ** queue_head) +{ + struct request * __tmp; + int starving, __latency; + + starving = seek_to_not_starving_chunk(&tmp, &latency); + __tmp = tmp; + __latency = latency; + + for (;; tmp = tmp->next) + { + if ((latency -= tmp->nr_segments) <= 0) + { + tmp = __tmp; + latency = __latency - tmp->nr_segments; + + if (starving) + break; + + switch (MAJOR(req->rq_dev)) + { + CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY + if (tmp == dev->current_request) + default: + goto link; + CASE_COALESCE_ALSO_FIRST_REQUEST + } + + latency += tmp->nr_segments; + req->next = tmp; + *queue_head = req; + goto after_link; + } + + if (!tmp->next) + break; + + { + const int after_current = IN_ORDER(tmp,req); + const int before_next = IN_ORDER(req,tmp->next); + + if (!IN_ORDER(tmp,tmp->next)) { + if (after_current || before_next) + break; + } else { + if (after_current && before_next) + break; + } + } + } + + link: + req->next = tmp->next; + tmp->next = req; + + after_link: + req->elevator_latency = latency; + + elevator_starve_rest_of_queue(req); +} + /* * add-request adds a request to the linked list. * It disables interrupts (aquires the request spinlock) so that it can muck @@ -309,6 +455,7 @@ short disk_index; unsigned long flags; int queue_new_request = 0; + int latency; switch (major) { case DAC960_MAJOR+0: @@ -333,7 +480,7 @@ break; } - req->next = NULL; + latency = get_request_latency(&dev->elevator, req->cmd); /* * We use the goto to reduce locking complexity @@ -344,28 +491,17 @@ if (req->bh) mark_buffer_clean(req->bh); if (!(tmp = *current_request)) { + req->next = NULL; + req->elevator_latency = latency; *current_request = req; if (dev->current_request != &dev->plug) queue_new_request = 1; goto out; } - for ( ; tmp->next ; tmp = tmp->next) { - const int after_current = IN_ORDER(tmp,req); - const int before_next = IN_ORDER(req,tmp->next); - - if (!IN_ORDER(tmp,tmp->next)) { - if (after_current || before_next) - break; - } else { - if (after_current && before_next) - break; - } - } - req->next = tmp->next; - tmp->next = req; + elevator_queue(req, tmp, latency, dev, current_request); /* for SCSI devices, call request_fn unconditionally */ - if (scsi_blk_major(major) || + if ((0 && scsi_blk_major(major)) || (major >= DAC960_MAJOR+0 && major <= DAC960_MAJOR+7) || (major >= COMPAQ_SMART2_MAJOR+0 && major <= COMPAQ_SMART2_MAJOR+7)) @@ -399,6 +535,8 @@ total_segments--; if (total_segments > max_segments) return; + if (next->elevator_latency < req->elevator_latency) + req->elevator_latency = next->elevator_latency; req->bhtail->b_reqnext = next->bh; req->bhtail = next->bhtail; req->nr_sectors += next->nr_sectors; @@ -408,12 +546,28 @@ wake_up (&wait_for_request); } +#define read_pendings(req) \ +({ \ + int __ret = 0; \ + struct request * tmp = (req); \ + do { \ + if (tmp->cmd == READ) \ + { \ + __ret = 1; \ + break; \ + } \ + tmp = tmp->next; \ + } while (tmp); \ + __ret; \ +}) + void make_request(int major, int rw, struct buffer_head * bh) { unsigned int sector, count; - struct request * req; + struct request * req, * prev; int rw_ahead, max_req, max_sectors, max_segments; unsigned long flags; + int latency, starving; count = bh->b_size >> 9; sector = bh->b_rsector; @@ -490,6 +644,8 @@ max_sectors = get_max_sectors(bh->b_rdev); max_segments = get_max_segments(bh->b_rdev); + latency = get_request_latency(&blk_dev[major].elevator, rw); + /* * Now we acquire the request spinlock, we have to be mega careful * not to schedule or do something nonatomic @@ -502,17 +658,7 @@ major != DDV_MAJOR && major != NBD_MAJOR) plug_device(blk_dev + major); /* is atomic */ } else switch (major) { - case IDE0_MAJOR: /* same as HD_MAJOR */ - case IDE1_MAJOR: - case FLOPPY_MAJOR: - case IDE2_MAJOR: - case IDE3_MAJOR: - case IDE4_MAJOR: - case IDE5_MAJOR: - case ACSI_MAJOR: - case MFM_ACORN_MAJOR: - case MDISK_MAJOR: - case DASD_MAJOR: + CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY /* * The scsi disk and cdrom drivers completely remove the request * from the queue when they start processing an entry. For this @@ -523,37 +669,20 @@ * entry may be busy being processed and we thus can't change it. */ if (req == blk_dev[major].current_request) - req = req->next; - if (!req) - break; + { + if (!(req = req->next)) + break; + latency -= req->nr_segments; + } /* fall through */ + CASE_COALESCE_ALSO_FIRST_REQUEST - case SCSI_DISK0_MAJOR: - case SCSI_DISK1_MAJOR: - case SCSI_DISK2_MAJOR: - case SCSI_DISK3_MAJOR: - case SCSI_DISK4_MAJOR: - case SCSI_DISK5_MAJOR: - case SCSI_DISK6_MAJOR: - case SCSI_DISK7_MAJOR: - case SCSI_CDROM_MAJOR: - case DAC960_MAJOR+0: - case DAC960_MAJOR+1: - case DAC960_MAJOR+2: - case DAC960_MAJOR+3: - case DAC960_MAJOR+4: - case DAC960_MAJOR+5: - case DAC960_MAJOR+6: - case DAC960_MAJOR+7: - case COMPAQ_SMART2_MAJOR+0: - case COMPAQ_SMART2_MAJOR+1: - case COMPAQ_SMART2_MAJOR+2: - case COMPAQ_SMART2_MAJOR+3: - case COMPAQ_SMART2_MAJOR+4: - case COMPAQ_SMART2_MAJOR+5: - case COMPAQ_SMART2_MAJOR+6: - case COMPAQ_SMART2_MAJOR+7: + /* avoid write-bombs to not hurt iteractiveness of reads */ + if (rw != READ && read_pendings(req)) + max_segments = blk_dev[major].elevator.max_bomb_segments; + starving = seek_to_not_starving_chunk(&req, &latency); + prev = NULL; do { if (req->sem) continue; @@ -565,6 +694,8 @@ continue; /* Can we add it to the end of this request? */ if (req->sector + req->nr_sectors == sector) { + if (latency - req->nr_segments < 0) + break; if (req->bhtail->b_data + req->bhtail->b_size != bh->b_data) { if (req->nr_segments < max_segments) @@ -574,10 +705,18 @@ req->bhtail->b_reqnext = bh; req->bhtail = bh; req->nr_sectors += count; + + /* latency stuff */ + if ((latency -= req->nr_segments) < req->elevator_latency) + req->elevator_latency = latency; + elevator_starve_rest_of_queue(req); + /* Can we now merge this req with the next? */ attempt_merge(req, max_sectors, max_segments); /* or to the beginning? */ } else if (req->sector - count == sector) { + if (!prev && starving) + continue; if (bh->b_data + bh->b_size != req->bh->b_data) { if (req->nr_segments < max_segments) @@ -590,6 +729,14 @@ req->current_nr_sectors = count; req->sector = sector; req->nr_sectors += count; + + /* latency stuff */ + if (latency < --req->elevator_latency) + req->elevator_latency = latency; + elevator_starve_rest_of_queue(req); + + if (prev) + attempt_merge(prev, max_sectors, max_segments); } else continue; @@ -597,7 +744,8 @@ spin_unlock_irqrestore(&io_request_lock,flags); return; - } while ((req = req->next) != NULL); + } while (prev = req, + (latency -= req->nr_segments) >= 0 && (req = req->next) != NULL); } /* find an unused request. */ @@ -623,7 +771,6 @@ req->sem = NULL; req->bh = bh; req->bhtail = bh; - req->next = NULL; add_request(major+blk_dev,req); return; @@ -792,12 +939,14 @@ dev->plug_tq.sync = 0; dev->plug_tq.routine = &unplug_device; dev->plug_tq.data = dev; + dev->elevator.read_latency = ELEVATOR_READ_LATENCY; + dev->elevator.write_latency = ELEVATOR_WRITE_LATENCY; + dev->elevator.max_bomb_segments = ELEVATOR_MAX_BOMB_SEGMENTS; } req = all_requests + NR_REQUEST; while (--req >= all_requests) { req->rq_status = RQ_INACTIVE; - req->next = NULL; } memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); diff -urN 2.2.14/include/linux/blkdev.h elevator/include/linux/blkdev.h --- 2.2.14/include/linux/blkdev.h Thu Feb 17 00:45:42 2000 +++ elevator/include/linux/blkdev.h Sun Feb 20 18:47:36 2000 @@ -32,11 +32,19 @@ struct buffer_head * bh; struct buffer_head * bhtail; struct request * next; + int elevator_latency; }; typedef void (request_fn_proc) (void); typedef struct request ** (queue_proc) (kdev_t dev); +typedef struct elevator_s +{ + int read_latency; + int write_latency; + int max_bomb_segments; +} elevator_t; + struct blk_dev_struct { request_fn_proc *request_fn; /* @@ -47,6 +55,8 @@ struct request *current_request; struct request plug; struct tq_struct plug_tq; + + elevator_t elevator; }; struct sec_size { @@ -90,5 +100,9 @@ #define MAX_READAHEAD PageAlignSize(4096*31) #define MIN_READAHEAD PageAlignSize(4096*3) #endif + +#define ELEVATOR_READ_LATENCY (NR_REQUEST>>1) +#define ELEVATOR_WRITE_LATENCY (NR_REQUEST<<5) +#define ELEVATOR_MAX_BOMB_SEGMENTS 4 #endif