diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/Documentation/Configure.help linux/Documentation/Configure.help --- /opt/kernel/linux-2.4.20-pre1/Documentation/Configure.help 2002-08-06 16:57:18.000000000 +0200 +++ linux/Documentation/Configure.help 2002-08-06 16:53:26.000000000 +0200 @@ -373,6 +373,12 @@ Select this if you have a 32-bit processor and more than 4 gigabytes of physical RAM. +HIGHMEM I/O support +CONFIG_HIGHIO + If you want to be able to do I/O to high memory pages, say Y. + Otherwise low memory pages are used as bounce buffers causing a + degrade in performance. + Normal floppy disk support CONFIG_BLK_DEV_FD If you want to use the floppy disk drive(s) of your PC under Linux, diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/Documentation/ide.txt linux/Documentation/ide.txt --- /opt/kernel/linux-2.4.20-pre1/Documentation/ide.txt 2001-09-07 18:28:38.000000000 +0200 +++ linux/Documentation/ide.txt 2002-08-06 08:55:50.000000000 +0200 @@ -306,6 +306,9 @@ "idex=serialize" : do not overlap operations on idex and ide(x^1) "idex=reset" : reset interface after probe "idex=dma" : automatically configure/use DMA if possible. + "idex=nohighio" : don't use i/o to high memory addresses on this + interface. i/o to memory locations higher + than ~860MiB will be bounced. The following are valid ONLY on ide0, and the defaults for the base,ctl ports must not be altered. diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/arch/i386/config.in linux/arch/i386/config.in --- /opt/kernel/linux-2.4.20-pre1/arch/i386/config.in 2002-08-06 16:57:18.000000000 +0200 +++ linux/arch/i386/config.in 2002-08-06 16:53:26.000000000 +0200 @@ -176,14 +176,19 @@ "off CONFIG_NOHIGHMEM \ 4GB CONFIG_HIGHMEM4G \ 64GB CONFIG_HIGHMEM64G" off -if [ "$CONFIG_HIGHMEM4G" = "y" ]; then +if [ "$CONFIG_HIGHMEM4G" = "y" -o "$CONFIG_HIGHMEM64G" = "y" ]; then define_bool CONFIG_HIGHMEM y +else + define_bool CONFIG_HIGHMEM n fi if [ "$CONFIG_HIGHMEM64G" = "y" ]; then - define_bool CONFIG_HIGHMEM y define_bool CONFIG_X86_PAE y fi +if [ "$CONFIG_HIGHMEM" = "y" ]; then + bool 'HIGHMEM I/O support' CONFIG_HIGHIO +fi + bool 'Math emulation' CONFIG_MATH_EMULATION bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR bool 'Symmetric multi-processing support' CONFIG_SMP diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/arch/i386/kernel/setup.c linux/arch/i386/kernel/setup.c --- /opt/kernel/linux-2.4.20-pre1/arch/i386/kernel/setup.c 2002-08-06 16:57:18.000000000 +0200 +++ linux/arch/i386/kernel/setup.c 2002-08-06 16:53:26.000000000 +0200 @@ -175,6 +175,8 @@ static int disable_x86_fxsr __initdata = 0; static int disable_x86_ht __initdata = 0; +extern int blk_nohighio; + int enable_acpi_smp_table; #if defined(CONFIG_AGP) || defined(CONFIG_AGP_MODULE) @@ -871,13 +873,17 @@ void __init setup_arch(char **cmdline_p) { unsigned long bootmap_size, low_mem_size; - unsigned long start_pfn, max_pfn, max_low_pfn; + unsigned long start_pfn, max_low_pfn; int i; #ifdef CONFIG_VISWS visws_get_board_type_and_rev(); #endif +#ifndef CONFIG_HIGHIO + blk_nohighio = 1; +#endif + ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV); drive_info = DRIVE_INFO; screen_info = SCREEN_INFO; @@ -1206,6 +1225,14 @@ __setup("notsc", tsc_setup); #endif +static int __init highio_setup(char *str) +{ + printk("i386: disabling HIGHMEM block I/O\n"); + blk_nohighio = 1; + return 1; +} +__setup("nohighio", highio_setup); + static int __init get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/block/cciss.c linux/drivers/block/cciss.c --- /opt/kernel/linux-2.4.20-pre1/drivers/block/cciss.c 2002-08-03 02:39:43.000000000 +0200 +++ linux/drivers/block/cciss.c 2002-08-06 08:50:25.000000000 +0200 @@ -1166,20 +1166,22 @@ static inline void complete_command( CommandList_struct *cmd, int timeout) { int status = 1; - int i; + int i, ddir; u64bit temp64; if (timeout) status = 0; /* unmap the DMA mapping for all the scatter gather elements */ + if (cmd->Request.Type.Direction == XFER_READ) + ddir = PCI_DMA_FROMDEVICE; + else + ddir = PCI_DMA_TODEVICE; for(i=0; iHeader.SGList; i++) { temp64.val32.lower = cmd->SG[i].Addr.lower; temp64.val32.upper = cmd->SG[i].Addr.upper; - pci_unmap_single(hba[cmd->ctlr]->pdev, - temp64.val, cmd->SG[i].Len, - (cmd->Request.Type.Direction == XFER_READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + pci_unmap_page(hba[cmd->ctlr]->pdev, + temp64.val, cmd->SG[i].Len, ddir); } if(cmd->err_info->CommandStatus != 0) @@ -1287,7 +1289,7 @@ static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, struct buffer_head *bh, int max_segments) { - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) + if (blk_seg_merge_ok(rq->bhtail, bh)) return 1; return cpq_new_segment(q, rq, max_segments); } @@ -1295,7 +1297,7 @@ static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, struct buffer_head *bh, int max_segments) { - if (bh->b_data + bh->b_size == rq->bh->b_data) + if (blk_seg_merge_ok(bh, rq->bh)) return 1; return cpq_new_segment(q, rq, max_segments); } @@ -1305,7 +1307,7 @@ { int total_segments = rq->nr_segments + nxt->nr_segments; - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) + if (blk_seg_merge_ok(rq->bhtail, nxt->bh)) total_segments--; if (total_segments > MAXSGENTRIES) @@ -1326,18 +1328,18 @@ ctlr_info_t *h= q->queuedata; CommandList_struct *c; int log_unit, start_blk, seg; - char *lastdataend; + unsigned long long lastdataend; struct buffer_head *bh; struct list_head *queue_head = &q->queue_head; struct request *creq; u64bit temp64; - struct my_sg tmp_sg[MAXSGENTRIES]; - int i; + struct scatterlist tmp_sg[MAXSGENTRIES]; + int i, ddir; if (q->plugged) goto startio; -queue_next: +next: if (list_empty(queue_head)) goto startio; @@ -1363,8 +1365,8 @@ spin_unlock_irq(&io_request_lock); c->cmd_type = CMD_RWREQ; - bh = creq->bh; c->rq = creq; + bh = creq->bh; /* fill in the request */ log_unit = MINOR(creq->rq_dev) >> NWD_SHIFT; @@ -1386,34 +1388,36 @@ printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",(int) creq->sector, (int) creq->nr_sectors); #endif /* CCISS_DEBUG */ - seg = 0; - lastdataend = NULL; + seg = 0; + lastdataend = ~0ULL; while(bh) { - if (bh->b_data == lastdataend) + if (bh_phys(bh) == lastdataend) { // tack it on to the last segment - tmp_sg[seg-1].len +=bh->b_size; + tmp_sg[seg-1].length +=bh->b_size; lastdataend += bh->b_size; } else { if (seg == MAXSGENTRIES) BUG(); - tmp_sg[seg].len = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; + tmp_sg[seg].page = bh->b_page; + tmp_sg[seg].length = bh->b_size; + tmp_sg[seg].offset = bh_offset(bh); + lastdataend = bh_phys(bh) + bh->b_size; seg++; } bh = bh->b_reqnext; } + /* get the DMA records for the setup */ - for (i=0; iSG[i].Len = tmp_sg[i].len; - temp64.val = (__u64) pci_map_single( h->pdev, - tmp_sg[i].start_addr, - tmp_sg[i].len, - (c->Request.Type.Direction == XFER_READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + if (c->Request.Type.Direction == XFER_READ) + ddir = PCI_DMA_FROMDEVICE; + else + ddir = PCI_DMA_TODEVICE; + for (i=0; iSG[i].Len = tmp_sg[i].length; + temp64.val = pci_map_page(h->pdev, tmp_sg[i].page, + tmp_sg[i].offset, tmp_sg[i].length, ddir); c->SG[i].Addr.lower = temp64.val32.lower; c->SG[i].Addr.upper = temp64.val32.upper; c->SG[i].Ext = 0; // we are not chaining @@ -1423,7 +1427,7 @@ h->maxSG = seg; #ifdef CCISS_DEBUG - printk(KERN_DEBUG "cciss: Submitting %d sectors in %d segments\n", creq->nr_sectors, seg); + printk(KERN_DEBUG "cciss: Submitting %d sectors in %d segments\n", sect, seg); #endif /* CCISS_DEBUG */ c->Header.SGList = c->Header.SGTotal = seg; @@ -1444,7 +1448,8 @@ if(h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; - goto queue_next; + goto next; + startio: start_io(h); } @@ -1969,7 +1974,18 @@ sprintf(hba[i]->devname, "cciss%d", i); hba[i]->ctlr = i; hba[i]->pdev = pdev; - + + /* configure PCI DMA stuff */ + if (!pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff)) + printk("cciss: using DAC cycles\n"); + else if (!pci_set_dma_mask(pdev, (u64) 0xffffffff)) + printk("cciss: not using DAC cycles\n"); + else { + printk("cciss: no suitable DMA available\n"); + free_hba(i); + return -ENODEV; + } + if( register_blkdev(MAJOR_NR+i, hba[i]->devname, &cciss_fops)) { printk(KERN_ERR "cciss: Unable to get major number " @@ -2043,9 +2059,10 @@ cciss_procinit(i); q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); - q->queuedata = hba[i]; - blk_init_queue(q, do_cciss_request); - blk_queue_headactive(q, 0); + q->queuedata = hba[i]; + blk_init_queue(q, do_cciss_request); + blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask); + blk_queue_headactive(q, 0); /* fill in the other Kernel structs */ blksize_size[MAJOR_NR+i] = hba[i]->blocksizes; diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/block/cciss.h linux/drivers/block/cciss.h --- /opt/kernel/linux-2.4.20-pre1/drivers/block/cciss.h 2002-08-03 02:39:43.000000000 +0200 +++ linux/drivers/block/cciss.h 2002-08-06 09:41:24.000000000 +0200 @@ -15,11 +15,6 @@ #define MAJOR_NR COMPAQ_CISS_MAJOR -struct my_sg { - int len; - char *start_addr; -}; - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/block/cpqarray.c linux/drivers/block/cpqarray.c --- /opt/kernel/linux-2.4.20-pre1/drivers/block/cpqarray.c 2002-08-03 02:39:43.000000000 +0200 +++ linux/drivers/block/cpqarray.c 2002-08-06 08:49:52.000000000 +0200 @@ -443,7 +443,7 @@ static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, struct buffer_head *bh, int max_segments) { - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) + if (blk_seg_merge_ok(rq->bhtail, bh)) return 1; return cpq_new_segment(q, rq, max_segments); } @@ -451,7 +451,7 @@ static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, struct buffer_head *bh, int max_segments) { - if (bh->b_data + bh->b_size == rq->bh->b_data) + if (blk_seg_merge_ok(bh, rq->bh)) return 1; return cpq_new_segment(q, rq, max_segments); } @@ -461,7 +461,7 @@ { int total_segments = rq->nr_segments + nxt->nr_segments; - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) + if (blk_seg_merge_ok(rq->bhtail, nxt->bh)) total_segments--; if (total_segments > SG_MAX) @@ -566,6 +566,7 @@ q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); q->queuedata = hba[i]; blk_init_queue(q, do_ida_request); + blk_queue_bounce_limit(q, hba[i]->pci_dev->dma_mask); blk_queue_headactive(q, 0); blksize_size[MAJOR_NR+i] = hba[i]->blocksizes; hardsect_size[MAJOR_NR+i] = hba[i]->hardsizes; @@ -966,17 +967,17 @@ { ctlr_info_t *h = q->queuedata; cmdlist_t *c; - char *lastdataend; + unsigned long lastdataend; struct list_head * queue_head = &q->queue_head; struct buffer_head *bh; struct request *creq; - struct my_sg tmp_sg[SG_MAX]; + struct scatterlist tmp_sg[SG_MAX]; int i, seg; if (q->plugged) goto startio; -queue_next: +next: if (list_empty(queue_head)) goto startio; @@ -1017,17 +1018,19 @@ printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors); ); - seg = 0; lastdataend = NULL; + seg = 0; + lastdataend = ~0UL; while(bh) { - if (bh->b_data == lastdataend) { - tmp_sg[seg-1].size += bh->b_size; + if (bh_phys(bh) == lastdataend) { + tmp_sg[seg-1].length += bh->b_size; lastdataend += bh->b_size; } else { if (seg == SG_MAX) BUG(); - tmp_sg[seg].size = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; + tmp_sg[seg].page = bh->b_page; + tmp_sg[seg].length = bh->b_size; + tmp_sg[seg].offset = bh_offset(bh); + lastdataend = bh_phys(bh) + bh->b_size; seg++; } bh = bh->b_reqnext; @@ -1035,10 +1038,10 @@ /* Now do all the DMA Mappings */ for( i=0; i < seg; i++) { - c->req.sg[i].size = tmp_sg[i].size; - c->req.sg[i].addr = (__u32) pci_map_single( - h->pci_dev, tmp_sg[i].start_addr, - tmp_sg[i].size, + c->req.sg[i].size = tmp_sg[i].length; + c->req.sg[i].addr = (__u32) pci_map_page( + h->pci_dev, tmp_sg[i].page, tmp_sg[i].offset, + tmp_sg[i].length, (creq->cmd == READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); } @@ -1056,7 +1059,7 @@ if (h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; - goto queue_next; + goto next; startio: start_io(h); @@ -1132,17 +1135,14 @@ /* unmap the DMA mapping for all the scatter gather elements */ for(i=0; ireq.hdr.sg_cnt; i++) { - pci_unmap_single(hba[cmd->ctlr]->pci_dev, + pci_unmap_page(hba[cmd->ctlr]->pci_dev, cmd->req.sg[i].addr, cmd->req.sg[i].size, (cmd->req.hdr.cmd == IDA_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); } complete_buffers(cmd->rq->bh, ok); - DBGPX(printk("Done with %p\n", cmd->rq);); end_that_request_last(cmd->rq); - - } /* diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/block/cpqarray.h linux/drivers/block/cpqarray.h --- /opt/kernel/linux-2.4.20-pre1/drivers/block/cpqarray.h 2002-08-03 02:39:43.000000000 +0200 +++ linux/drivers/block/cpqarray.h 2002-08-06 09:41:23.000000000 +0200 @@ -57,11 +57,6 @@ #ifdef __KERNEL__ -struct my_sg { - int size; - char *start_addr; -}; - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- /opt/kernel/linux-2.4.20-pre1/drivers/block/ll_rw_blk.c 2002-08-03 02:39:43.000000000 +0200 +++ linux/drivers/block/ll_rw_blk.c 2002-08-06 09:11:14.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -117,6 +118,9 @@ */ int * max_sectors[MAX_BLKDEV]; +unsigned long blk_max_low_pfn, blk_max_pfn; +int blk_nohighio = 0; + static inline int get_max_sectors(kdev_t dev) { if (!max_sectors[MAJOR(dev)]) @@ -238,6 +242,54 @@ q->make_request_fn = mfn; } +/** + * blk_queue_bounce_limit - set bounce buffer limit for queue + * @q: the request queue for the device + * @dma_addr: bus address limit + * + * Description: + * Different hardware can have different requirements as to what pages + * it can do I/O directly to. A low level driver can call + * blk_queue_bounce_limit to have lower memory pages allocated as bounce + * buffers for doing I/O to pages residing above @page. By default + * the block layer sets this to the highest numbered "low" memory page. + **/ +void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) +{ + unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; + unsigned long mb = dma_addr >> 20; + static request_queue_t *old_q; + + /* + * keep this for debugging for now... + */ + if (dma_addr != BLK_BOUNCE_HIGH && q != old_q) { + old_q = q; + printk("blk: queue %p, ", q); + if (dma_addr == BLK_BOUNCE_ANY) + printk("no I/O memory limit\n"); + else + printk("I/O limit %luMb (mask 0x%Lx)\n", mb, (u64) dma_addr); + } + + q->bounce_pfn = bounce_pfn; +} + + +/* + * can we merge the two segments, or do we need to start a new one? + */ +inline int blk_seg_merge_ok(struct buffer_head *bh, struct buffer_head *nxt) +{ + /* + * if bh and nxt are contigous and don't cross a 4g boundary, it's ok + */ + if (BH_CONTIG(bh, nxt) && BH_PHYS_4G(bh, nxt)) + return 1; + + return 0; +} + static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) { if (req->nr_segments < max_segments) { @@ -250,16 +302,18 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req, struct buffer_head *bh, int max_segments) { - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + if (blk_seg_merge_ok(req->bhtail, bh)) return 1; + return ll_new_segment(q, req, max_segments); } static int ll_front_merge_fn(request_queue_t *q, struct request *req, struct buffer_head *bh, int max_segments) { - if (bh->b_data + bh->b_size == req->bh->b_data) + if (blk_seg_merge_ok(bh, req->bh)) return 1; + return ll_new_segment(q, req, max_segments); } @@ -268,9 +322,9 @@ { int total_segments = req->nr_segments + next->nr_segments; - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + if (blk_seg_merge_ok(req->bhtail, next->bh)) total_segments--; - + if (total_segments > max_segments) return 0; @@ -444,6 +498,8 @@ */ q->plug_device_fn = generic_plug_device; q->head_active = 1; + + blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); } #define blkdev_free_rq(list) list_entry((list)->next, struct request, queue); @@ -540,7 +596,7 @@ if (q->rq[rw].count == 0) schedule(); spin_lock_irq(&io_request_lock); - rq = get_request(q,rw); + rq = get_request(q, rw); spin_unlock_irq(&io_request_lock); } while (rq == NULL); remove_wait_queue(&q->wait_for_requests[rw], &wait); @@ -866,9 +922,7 @@ * driver. Create a bounce buffer if the buffer data points into * high memory - keep the original buffer otherwise. */ -#if CONFIG_HIGHMEM - bh = create_bounce(rw, bh); -#endif + bh = blk_queue_bounce(q, rw, bh); /* look for a free request. */ /* @@ -918,8 +972,13 @@ elevator->elevator_merge_cleanup_fn(q, req, count); bh->b_reqnext = req->bh; req->bh = bh; + /* + * may not be valid, but queues not having bounce + * enabled for highmem pages must not look at + * ->buffer anyway + */ req->buffer = bh->b_data; - req->current_nr_sectors = count; + req->current_nr_sectors = req->hard_cur_sectors = count; req->sector = req->hard_sector = sector; req->nr_sectors = req->hard_nr_sectors += count; blk_started_io(count); @@ -978,7 +1037,7 @@ req->errors = 0; req->hard_sector = req->sector = sector; req->hard_nr_sectors = req->nr_sectors = count; - req->current_nr_sectors = count; + req->current_nr_sectors = req->hard_cur_sectors = count; req->nr_segments = 1; /* Always 1 for a new request. */ req->nr_hw_segments = 1; /* Always 1 for a new request. */ req->buffer = bh->b_data; @@ -1286,6 +1345,7 @@ req->nr_sectors = req->hard_nr_sectors; req->current_nr_sectors = bh->b_size >> 9; + req->hard_cur_sectors = req->current_nr_sectors; if (req->nr_sectors < req->current_nr_sectors) { req->nr_sectors = req->current_nr_sectors; printk("end_request: buffer-list destroyed\n"); @@ -1324,6 +1384,9 @@ memset(max_readahead, 0, sizeof(max_readahead)); memset(max_sectors, 0, sizeof(max_sectors)); + blk_max_low_pfn = max_low_pfn - 1; + blk_max_pfn = max_pfn - 1; + #ifdef CONFIG_AMIGA_Z2RAM z2_init(); #endif @@ -1441,3 +1504,8 @@ EXPORT_SYMBOL(blkdev_release_request); EXPORT_SYMBOL(req_finished_io); EXPORT_SYMBOL(generic_unplug_device); +EXPORT_SYMBOL(blk_queue_bounce_limit); +EXPORT_SYMBOL(blk_max_low_pfn); +EXPORT_SYMBOL(blk_max_pfn); +EXPORT_SYMBOL(blk_seg_merge_ok); +EXPORT_SYMBOL(blk_nohighio); diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/block/loop.c linux/drivers/block/loop.c --- /opt/kernel/linux-2.4.20-pre1/drivers/block/loop.c 2002-08-03 02:39:43.000000000 +0200 +++ linux/drivers/block/loop.c 2002-06-18 12:57:37.000000000 +0200 @@ -483,9 +483,7 @@ goto err; } -#if CONFIG_HIGHMEM - rbh = create_bounce(rw, rbh); -#endif + rbh = blk_queue_bounce(q, rw, rbh); /* * file backed, queue for loop_thread to handle diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c --- /opt/kernel/linux-2.4.20-pre1/drivers/ide/ide-disk.c 2002-08-03 02:39:44.000000000 +0200 +++ linux/drivers/ide/ide-disk.c 2002-07-03 11:40:23.000000000 +0200 @@ -29,6 +29,7 @@ * Version 1.10 request queue changes, Ultra DMA 100 * Version 1.11 added 48-bit lba * Version 1.12 adding taskfile io access method + * Highmem I/O support, Jens Axboe */ #define IDEDISK_VERSION "1.12" @@ -158,7 +159,9 @@ byte stat; int i; unsigned int msect, nsect; + unsigned long flags; struct request *rq; + char *to; /* new way for dealing with premature shared PCI interrupts */ if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { @@ -169,8 +172,8 @@ ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); return ide_started; } + msect = drive->mult_count; - read_next: rq = HWGROUP(drive)->rq; if (msect) { @@ -179,14 +182,15 @@ msect -= nsect; } else nsect = 1; - idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); + to = ide_map_buffer(rq, &flags); + idedisk_input_data(drive, to, nsect * SECTOR_WORDS); #ifdef DEBUG printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n", drive->name, rq->sector, rq->sector+nsect-1, (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); #endif + ide_unmap_buffer(to, &flags); rq->sector += nsect; - rq->buffer += nsect<<9; rq->errors = 0; i = (rq->nr_sectors -= nsect); if (((long)(rq->current_nr_sectors -= nsect)) <= 0) @@ -220,14 +224,16 @@ #endif if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) { rq->sector++; - rq->buffer += 512; rq->errors = 0; i = --rq->nr_sectors; --rq->current_nr_sectors; if (((long)rq->current_nr_sectors) <= 0) ide_end_request(1, hwgroup); if (i > 0) { - idedisk_output_data (drive, rq->buffer, SECTOR_WORDS); + unsigned long flags; + char *to = ide_map_buffer(rq, &flags); + idedisk_output_data (drive, to, SECTOR_WORDS); + ide_unmap_buffer(to, &flags); ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); return ide_started; } @@ -257,14 +263,14 @@ do { char *buffer; int nsect = rq->current_nr_sectors; - + unsigned long flags; + if (nsect > mcount) nsect = mcount; mcount -= nsect; - buffer = rq->buffer; + buffer = ide_map_buffer(rq, &flags); rq->sector += nsect; - rq->buffer += nsect << 9; rq->nr_sectors -= nsect; rq->current_nr_sectors -= nsect; @@ -278,7 +284,7 @@ } else { rq->bh = bh; rq->current_nr_sectors = bh->b_size >> 9; - rq->buffer = bh->b_data; + rq->hard_cur_sectors = rq->current_nr_sectors; } } @@ -287,6 +293,7 @@ * re-entering us on the last transfer. */ idedisk_output_data(drive, buffer, nsect<<7); + ide_unmap_buffer(buffer, &flags); } while (mcount); return 0; @@ -695,8 +702,11 @@ return ide_stopped; } } else { + unsigned long flags; + char *buffer = ide_map_buffer(rq, &flags); ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); - idedisk_output_data(drive, rq->buffer, SECTOR_WORDS); + idedisk_output_data(drive, buffer, SECTOR_WORDS); + ide_unmap_buffer(buffer, &flags); } return ide_started; } diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/ide/ide-dma.c linux/drivers/ide/ide-dma.c --- /opt/kernel/linux-2.4.20-pre1/drivers/ide/ide-dma.c 2002-08-03 02:39:44.000000000 +0200 +++ linux/drivers/ide/ide-dma.c 2002-08-06 10:26:16.000000000 +0200 @@ -252,33 +252,53 @@ { struct buffer_head *bh; struct scatterlist *sg = hwif->sg_table; + unsigned long lastdataend = ~0UL; int nents = 0; if (hwif->sg_dma_active) BUG(); - + if (rq->cmd == READ) hwif->sg_dma_direction = PCI_DMA_FROMDEVICE; else hwif->sg_dma_direction = PCI_DMA_TODEVICE; + bh = rq->bh; do { - unsigned char *virt_addr = bh->b_data; - unsigned int size = bh->b_size; + struct scatterlist *sge; + /* + * continue segment from before? + */ + if (bh_phys(bh) == lastdataend) { + sg[nents - 1].length += bh->b_size; + lastdataend += bh->b_size; + continue; + } + + /* + * start new segment + */ if (nents >= PRD_ENTRIES) return 0; - while ((bh = bh->b_reqnext) != NULL) { - if ((virt_addr + size) != (unsigned char *) bh->b_data) - break; - size += bh->b_size; + sge = &sg[nents]; + memset(sge, 0, sizeof(*sge)); + + if (bh->b_page) { + sge->page = bh->b_page; + sge->offset = bh_offset(bh); + } else { + if (((unsigned long) bh->b_data) < PAGE_SIZE) + BUG(); + + sge->address = bh->b_data; } - memset(&sg[nents], 0, sizeof(*sg)); - sg[nents].address = virt_addr; - sg[nents].length = size; + + sge->length = bh->b_size; + lastdataend = bh_phys(bh) + bh->b_size; nents++; - } while (bh != NULL); + } while ((bh = bh->b_reqnext) != NULL); return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); } @@ -340,7 +360,7 @@ return 0; sg = HWIF(drive)->sg_table; - while (i && sg_dma_len(sg)) { + while (i) { u32 cur_addr; u32 cur_len; @@ -354,36 +374,35 @@ */ while (cur_len) { - if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - goto use_pio_instead; - } else { - u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); + u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); + + if (count++ >= PRD_ENTRIES) + BUG(); + + if (bcount > cur_len) + bcount = cur_len; + *table++ = cpu_to_le32(cur_addr); + xcount = bcount & 0xffff; + if (is_trm290_chipset) + xcount = ((xcount >> 2) - 1) << 16; + if (xcount == 0x0000) { + /* + * Most chipsets correctly interpret a length + * of 0x0000 as 64KB, but at least one + * (e.g. CS5530) misinterprets it as zero (!). + * So here we break the 64KB entry into two + * 32KB entries instead. + */ + if (count++ >= PRD_ENTRIES) + goto use_pio_instead; - if (bcount > cur_len) - bcount = cur_len; - *table++ = cpu_to_le32(cur_addr); - xcount = bcount & 0xffff; - if (is_trm290_chipset) - xcount = ((xcount >> 2) - 1) << 16; - if (xcount == 0x0000) { - /* - * Most chipsets correctly interpret a length of 0x0000 as 64KB, - * but at least one (e.g. CS5530) misinterprets it as zero (!). - * So here we break the 64KB entry into two 32KB entries instead. - */ - if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - goto use_pio_instead; - } - *table++ = cpu_to_le32(0x8000); - *table++ = cpu_to_le32(cur_addr + 0x8000); - xcount = 0x8000; - } - *table++ = cpu_to_le32(xcount); - cur_addr += bcount; - cur_len -= bcount; + *table++ = cpu_to_le32(0x8000); + *table++ = cpu_to_le32(cur_addr + 0x8000); + xcount = 0x8000; } + *table++ = cpu_to_le32(xcount); + cur_addr += bcount; + cur_len -= bcount; } sg++; @@ -584,6 +603,23 @@ } #endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ +static inline void ide_toggle_bounce(ide_drive_t *drive, int on) +{ + dma64_addr_t addr = BLK_BOUNCE_HIGH; + + if (HWIF(drive)->no_highio || HWIF(drive)->pci_dev == NULL) + return; + + if (on && drive->media == ide_disk) { + if (!PCI_DMA_BUS_IS_PHYS) + addr = BLK_BOUNCE_ANY; + else + addr = HWIF(drive)->pci_dev->dma_mask; + } + + blk_queue_bounce_limit(&drive->queue, addr); +} + /* * ide_dmaproc() initiates/aborts DMA read/write operations on a drive. * @@ -606,18 +642,20 @@ ide_hwif_t *hwif = HWIF(drive); unsigned long dma_base = hwif->dma_base; byte unit = (drive->select.b.unit & 0x01); - unsigned int count, reading = 0; + unsigned int count, reading = 0, set_high = 1; byte dma_stat; switch (func) { case ide_dma_off: printk("%s: DMA disabled\n", drive->name); case ide_dma_off_quietly: + set_high = 0; outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2); case ide_dma_on: drive->using_dma = (func == ide_dma_on); if (drive->using_dma) outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2); + ide_toggle_bounce(drive, set_high); return 0; case ide_dma_check: return config_drive_for_dma (drive); @@ -759,8 +797,8 @@ request_region(dma_base, num_ports, hwif->name); hwif->dma_base = dma_base; hwif->dmatable_cpu = pci_alloc_consistent(hwif->pci_dev, - PRD_ENTRIES * PRD_BYTES, - &hwif->dmatable_dma); + PRD_ENTRIES * PRD_BYTES, + &hwif->dmatable_dma); if (hwif->dmatable_cpu == NULL) goto dma_alloc_failure; diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/ide/ide.c linux/drivers/ide/ide.c --- /opt/kernel/linux-2.4.20-pre1/drivers/ide/ide.c 2002-08-03 02:39:44.000000000 +0200 +++ linux/drivers/ide/ide.c 2002-06-20 08:29:43.000000000 +0200 @@ -3418,7 +3418,7 @@ const char *ide_words[] = { "noprobe", "serialize", "autotune", "noautotune", "reset", "dma", "ata66", "minus8", "minus9", "minus10", - "four", "qd65xx", "ht6560b", "cmd640_vlb", "dtc2278", "umc8672", "ali14xx", "dc4030", NULL }; + "four", "qd65xx", "ht6560b", "cmd640_vlb", "dtc2278", "umc8672", "ali14xx", "dc4030", "nohighio", NULL }; hw = s[3] - '0'; hwif = &ide_hwifs[hw]; i = match_parm(&s[4], ide_words, vals, 3); @@ -3437,6 +3437,10 @@ } switch (i) { + case -19: /* nohighio */ + hwif->no_highio = 1; + printk("%s: disabled high i/o capability\n", hwif->name); + goto done; #ifdef CONFIG_BLK_DEV_PDC4030 case -18: /* "dc4030" */ { diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/aic7xxx/aic7xxx_host.h linux/drivers/scsi/aic7xxx/aic7xxx_host.h --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/aic7xxx/aic7xxx_host.h 2002-08-03 02:39:44.000000000 +0200 +++ linux/drivers/scsi/aic7xxx/aic7xxx_host.h 2002-06-18 12:57:37.000000000 +0200 @@ -89,7 +89,8 @@ present: 0, /* number of 7xxx's present */\ unchecked_isa_dma: 0, /* no memory DMA restrictions*/\ use_clustering: ENABLE_CLUSTERING, \ - use_new_eh_code: 1 \ + use_new_eh_code: 1, \ + highmem_io: 1, \ } #endif /* _AIC7XXX_HOST_H_ */ diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/aic7xxx_old/aic7xxx.h linux/drivers/scsi/aic7xxx_old/aic7xxx.h --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/aic7xxx_old/aic7xxx.h 2001-03-04 23:30:18.000000000 +0100 +++ linux/drivers/scsi/aic7xxx_old/aic7xxx.h 2002-06-18 12:57:37.000000000 +0200 @@ -55,7 +55,8 @@ present: 0, /* number of 7xxx's present */\ unchecked_isa_dma: 0, /* no memory DMA restrictions */\ use_clustering: ENABLE_CLUSTERING, \ - use_new_eh_code: 0 \ + use_new_eh_code: 0, \ + highmem_io: 1 \ } extern int aic7xxx_queue(Scsi_Cmnd *, void (*)(Scsi_Cmnd *)); diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/hosts.c linux/drivers/scsi/hosts.c --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/hosts.c 2002-02-25 20:38:04.000000000 +0100 +++ linux/drivers/scsi/hosts.c 2002-06-18 12:57:37.000000000 +0200 @@ -129,7 +129,7 @@ * once we are 100% sure that we want to use this host adapter - it is a * pain to reverse this, so we try to avoid it */ - +extern int blk_nohighio; struct Scsi_Host * scsi_register(Scsi_Host_Template * tpnt, int j){ struct Scsi_Host * retval, *shpnt, *o_shp; Scsi_Host_Name *shn, *shn2; @@ -235,6 +235,8 @@ retval->cmd_per_lun = tpnt->cmd_per_lun; retval->unchecked_isa_dma = tpnt->unchecked_isa_dma; retval->use_clustering = tpnt->use_clustering; + if (!blk_nohighio) + retval->highmem_io = tpnt->highmem_io; retval->select_queue_depths = tpnt->select_queue_depths; retval->max_sectors = tpnt->max_sectors; diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/hosts.h linux/drivers/scsi/hosts.h --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/hosts.h 2002-02-25 20:38:04.000000000 +0100 +++ linux/drivers/scsi/hosts.h 2002-08-06 16:57:59.000000000 +0200 @@ -291,6 +291,8 @@ */ unsigned emulated:1; + unsigned highmem_io:1; + /* * Name of proc directory */ @@ -390,6 +392,8 @@ unsigned in_recovery:1; unsigned unchecked_isa_dma:1; unsigned use_clustering:1; + unsigned highmem_io:1; + /* * True if this host was loaded as a loadable module */ diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/ide-scsi.c linux/drivers/scsi/ide-scsi.c --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/ide-scsi.c 2002-08-03 02:39:44.000000000 +0200 +++ linux/drivers/scsi/ide-scsi.c 2002-06-18 12:57:37.000000000 +0200 @@ -739,7 +739,7 @@ int segments = pc->scsi_cmd->use_sg; struct scatterlist *sg = pc->scsi_cmd->request_buffer; - if (!drive->using_dma || !pc->request_transfer || pc->request_transfer % 1024) + if (!drive->using_dma || !pc->request_transfer || pc->request_transfer & 1023) return NULL; if (idescsi_set_direction(pc)) return NULL; @@ -750,12 +750,22 @@ printk ("ide-scsi: %s: building DMA table, %d segments, %dkB total\n", drive->name, segments, pc->request_transfer >> 10); #endif /* IDESCSI_DEBUG_LOG */ while (segments--) { - bh->b_data = sg->address; + if (sg->address) { + bh->b_page = virt_to_page(sg->address); + bh->b_data = (char *) ((unsigned long) sg->address & ~PAGE_MASK); + } else if (sg->page) { + bh->b_page = sg->page; + bh->b_data = (char *) sg->offset; + } + bh->b_size = sg->length; bh = bh->b_reqnext; sg++; } } else { + /* + * non-sg requests are guarenteed not to reside in highmem /jens + */ if ((first_bh = bh = idescsi_kmalloc_bh (1)) == NULL) return NULL; #if IDESCSI_DEBUG_LOG diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/megaraid.h linux/drivers/scsi/megaraid.h --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/megaraid.h 2002-08-06 16:57:21.000000000 +0200 +++ linux/drivers/scsi/megaraid.h 2002-08-06 16:53:30.000000000 +0200 @@ -223,7 +223,8 @@ cmd_per_lun: MAX_CMD_PER_LUN, /* SCSI Commands per LUN */\ present: 0, /* Present */\ unchecked_isa_dma: 0, /* Default Unchecked ISA DMA */\ - use_clustering: ENABLE_CLUSTERING /* Enable Clustering */\ + use_clustering: ENABLE_CLUSTERING, /* Enable Clustering */\ + highmem_io: 1, /* enable HIGHMEM I/O */ \ } #endif diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/qlogicfc.h linux/drivers/scsi/qlogicfc.h --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/qlogicfc.h 2001-10-21 19:36:54.000000000 +0200 +++ linux/drivers/scsi/qlogicfc.h 2002-06-18 12:57:37.000000000 +0200 @@ -95,7 +95,8 @@ cmd_per_lun: QLOGICFC_CMD_PER_LUN, \ present: 0, \ unchecked_isa_dma: 0, \ - use_clustering: ENABLE_CLUSTERING \ + use_clustering: ENABLE_CLUSTERING, \ + highmem_io: 1 \ } #endif /* _QLOGICFC_H */ diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/scsi.c linux/drivers/scsi/scsi.c --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/scsi.c 2002-08-03 02:39:44.000000000 +0200 +++ linux/drivers/scsi/scsi.c 2002-06-18 12:57:37.000000000 +0200 @@ -191,10 +191,13 @@ * handler in the list - ultimately they call scsi_request_fn * to do the dirty deed. */ -void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) { - blk_init_queue(&SDpnt->request_queue, scsi_request_fn); - blk_queue_headactive(&SDpnt->request_queue, 0); - SDpnt->request_queue.queuedata = (void *) SDpnt; +void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) +{ + request_queue_t *q = &SDpnt->request_queue; + + blk_init_queue(q, scsi_request_fn); + blk_queue_headactive(q, 0); + q->queuedata = (void *) SDpnt; } #ifdef MODULE diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/scsi.h linux/drivers/scsi/scsi.h --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/scsi.h 2002-08-03 02:39:44.000000000 +0200 +++ linux/drivers/scsi/scsi.h 2002-08-06 16:57:59.000000000 +0200 @@ -386,15 +386,6 @@ #define ASKED_FOR_SENSE 0x20 #define SYNC_RESET 0x40 -#if defined(__mc68000__) || defined(CONFIG_APUS) -#include -#define CONTIGUOUS_BUFFERS(X,Y) \ - (virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data)) -#else -#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data) -#endif - - /* * This is the crap from the old error handling code. We have it in a special * place so that we can more easily delete it later on. @@ -633,7 +624,7 @@ struct scatterlist *buffer; /* which buffer */ int buffers_residual; /* how many buffers left */ - dma_addr_t dma_handle; + dma_addr_t dma_handle; volatile int Status; volatile int Message; diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/scsi_lib.c 2002-08-06 16:57:21.000000000 +0200 +++ linux/drivers/scsi/scsi_lib.c 2002-08-06 09:35:33.000000000 +0200 @@ -360,9 +360,10 @@ int requeue, int frequeue) { + request_queue_t *q = &SCpnt->device->request_queue; struct request *req; struct buffer_head *bh; - Scsi_Device * SDpnt; + unsigned long flags; int nsect; ASSERT_LOCK(&io_request_lock, 0); @@ -388,6 +389,7 @@ req->nr_sectors -= nsect; req->current_nr_sectors = bh->b_size >> 9; + req->hard_cur_sectors = req->current_nr_sectors; if (req->nr_sectors < req->current_nr_sectors) { req->nr_sectors = req->current_nr_sectors; printk("scsi_end_request: buffer-list destroyed\n"); @@ -401,37 +403,29 @@ * to queue the remainder of them. */ if (req->bh) { - request_queue_t *q; - - if( !requeue ) - { - return SCpnt; - } - - q = &SCpnt->device->request_queue; - - req->buffer = bh->b_data; /* * Bleah. Leftovers again. Stick the leftovers in * the front of the queue, and goose the queue again. */ - scsi_queue_next_request(q, SCpnt); + if (requeue) + scsi_queue_next_request(q, SCpnt); + return SCpnt; } + /* * This request is done. If there is someone blocked waiting for this * request, wake them up. Typically used to wake up processes trying * to swap a page into memory. */ - if (req->waiting != NULL) { + if (req->waiting) complete(req->waiting); - } - spin_lock_irq(&io_request_lock); + + spin_lock_irqsave(&io_request_lock, flags); req_finished_io(req); - spin_unlock_irq(&io_request_lock); - add_blkdev_randomness(MAJOR(req->rq_dev)); + spin_unlock_irqrestore(&io_request_lock, flags); - SDpnt = SCpnt->device; + add_blkdev_randomness(MAJOR(req->rq_dev)); /* * This will goose the queue request function at the end, so we don't @@ -439,12 +433,9 @@ */ __scsi_release_command(SCpnt); - if( frequeue ) { - request_queue_t *q; + if (frequeue) + scsi_queue_next_request(q, NULL); - q = &SDpnt->request_queue; - scsi_queue_next_request(q, NULL); - } return NULL; } @@ -551,6 +542,7 @@ int result = SCpnt->result; int this_count = SCpnt->bufflen >> 9; request_queue_t *q = &SCpnt->device->request_queue; + struct request *req = &SCpnt->request; /* * We must do one of several things here: @@ -583,7 +575,7 @@ if (bbpnt) { for (i = 0; i < SCpnt->use_sg; i++) { if (bbpnt[i]) { - if (SCpnt->request.cmd == READ) { + if (req->cmd == READ) { memcpy(bbpnt[i], sgpnt[i].address, sgpnt[i].length); @@ -594,11 +586,11 @@ } scsi_free(SCpnt->buffer, SCpnt->sglist_len); } else { - if (SCpnt->buffer != SCpnt->request.buffer) { - if (SCpnt->request.cmd == READ) { - memcpy(SCpnt->request.buffer, SCpnt->buffer, - SCpnt->bufflen); - } + if (SCpnt->buffer != req->buffer) { + if (PageHighMem(req->bh->b_page)) + BUG(); + if (req->cmd == READ) + memcpy(req->buffer, SCpnt->buffer, SCpnt->bufflen); scsi_free(SCpnt->buffer, SCpnt->bufflen); } } @@ -622,7 +614,7 @@ good_sectors)); SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg)); - SCpnt->request.errors = 0; + req->errors = 0; /* * If multiple sectors are requested in one buffer, then * they will have been finished off by the first command. @@ -1051,17 +1043,25 @@ * get those allocated here. */ if (!SDpnt->scsi_init_io_fn(SCpnt)) { - SCpnt = __scsi_end_request(SCpnt, 0, - SCpnt->request.nr_sectors, 0, 0); - if( SCpnt != NULL ) - { - panic("Should not have leftover blocks\n"); - } + /* + * probably we ran out of sgtable memory, or + * __init_io() wanted to revert to a single + * segment request. this would require bouncing + * on highmem i/o, so mark the device as + * starved and continue later instead + */ spin_lock_irq(&io_request_lock); SHpnt->host_busy--; SDpnt->device_busy--; - continue; + if (SDpnt->device_busy == 0) { + SDpnt->starved = 1; + SHpnt->some_device_starved = 1; + } + SCpnt->request.special = SCpnt; + list_add(&SCpnt->request.queue, &q->queue_head); + break; } + /* * Initialize the actual SCSI command for this request. */ diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/scsi_merge.c 2002-02-25 20:38:04.000000000 +0100 +++ linux/drivers/scsi/scsi_merge.c 2002-08-06 10:23:09.000000000 +0200 @@ -6,6 +6,7 @@ * Based upon conversations with large numbers * of people at Linux Expo. * Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com). + * Support for highmem I/O: Jens Axboe */ /* @@ -48,7 +49,6 @@ #include #include - #define __KERNEL_SYSCALLS__ #include @@ -95,7 +95,7 @@ printk("Segment 0x%p, blocks %d, addr 0x%lx\n", bh, bh->b_size >> 9, - virt_to_phys(bh->b_data - 1)); + bh_phys(bh) - 1); } panic("Ththththaats all folks. Too dangerous to continue.\n"); } @@ -216,11 +216,10 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bh_phys(bhnext) - 1 == ISA_DMA_THRESHOLD) { ret++; reqsize = bhnext->b_size; - } else if (CONTIGUOUS_BUFFERS(bh, bhnext)) { + } else if (blk_seg_merge_ok(bh, bhnext)) { /* * This one is OK. Let it go. */ @@ -234,8 +233,7 @@ * kind of screwed and we need to start * another segment. */ - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD + if( dma_host && bh_phys(bh) - 1 >= ISA_DMA_THRESHOLD && reqsize + bhnext->b_size > PAGE_SIZE ) { ret++; @@ -297,7 +295,7 @@ } #define MERGEABLE_BUFFERS(X,Y) \ -(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \ +(((((long)bh_phys((X))+(X)->b_size)|((long)bh_phys((Y)))) & \ (DMA_CHUNK_SIZE - 1)) == 0) #ifdef DMA_CHUNK_SIZE @@ -413,6 +411,9 @@ if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) return 0; + if (!BH_PHYS_4G(req->bhtail, bh)) + return 0; + if (use_clustering) { /* * See if we can do this without creating another @@ -420,14 +421,11 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bh_phys(req->bhtail) - 1 == ISA_DMA_THRESHOLD) goto new_end_segment; - } - if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) { + if (BH_CONTIG(req->bhtail, bh)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { + if (dma_host && bh_phys(bh) - 1 >= ISA_DMA_THRESHOLD) { segment_size = 0; count = __count_segments(req, use_clustering, dma_host, &segment_size); if( segment_size + bh->b_size > PAGE_SIZE ) { @@ -472,6 +470,9 @@ if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) return 0; + if (!BH_PHYS_4G(bh, req->bh)) + return 0; + if (use_clustering) { /* * See if we can do this without creating another @@ -479,14 +480,12 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bh_phys(bh) - 1 == ISA_DMA_THRESHOLD) { goto new_start_segment; } - if (CONTIGUOUS_BUFFERS(bh, req->bh)) { + if (BH_CONTIG(bh, req->bh)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { + if (dma_host && bh_phys(bh) - 1 >= ISA_DMA_THRESHOLD) { segment_size = bh->b_size; count = __count_segments(req, use_clustering, dma_host, &segment_size); if( count != req->nr_segments ) { @@ -634,6 +633,9 @@ if ((req->nr_sectors + next->nr_sectors) > SHpnt->max_sectors) return 0; + if (!BH_PHYS_4G(req->bhtail, next->bh)) + return 0; + /* * The main question is whether the two segments at the boundaries * would be considered one or two. @@ -645,18 +647,15 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bh_phys(req->bhtail) - 1 == ISA_DMA_THRESHOLD) goto dont_combine; - } #ifdef DMA_SEGMENT_SIZE_LIMITED /* * We currently can only allocate scatter-gather bounce * buffers in chunks of PAGE_SIZE or less. */ - if (dma_host - && CONTIGUOUS_BUFFERS(req->bhtail, next->bh) - && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD ) + if (dma_host && BH_CONTIG(req->bhtail, next->bh) + && bh_phys(req->bhtail) - 1 >= ISA_DMA_THRESHOLD) { int segment_size = 0; int count = 0; @@ -668,7 +667,7 @@ } } #endif - if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) { + if (BH_CONTIG(req->bhtail, next->bh)) { /* * This one is OK. Let it go. */ @@ -796,37 +795,13 @@ char * buff; int count; int i; - struct request * req; + struct request * req = &SCpnt->request; int sectors; struct scatterlist * sgpnt; int this_count; void ** bbpnt; /* - * FIXME(eric) - don't inline this - it doesn't depend on the - * integer flags. Come to think of it, I don't think this is even - * needed any more. Need to play with it and see if we hit the - * panic. If not, then don't bother. - */ - if (!SCpnt->request.bh) { - /* - * Case of page request (i.e. raw device), or unlinked buffer - * Typically used for swapping, but this isn't how we do - * swapping any more. - */ - panic("I believe this is dead code. If we hit this, I was wrong"); -#if 0 - SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9; - SCpnt->request_buffer = SCpnt->request.buffer; - SCpnt->use_sg = 0; - /* - * FIXME(eric) - need to handle DMA here. - */ -#endif - return 1; - } - req = &SCpnt->request; - /* * First we need to know how many scatter gather segments are needed. */ if (!sg_count_valid) { @@ -841,21 +816,27 @@ * buffer. */ if (dma_host && scsi_dma_free_sectors <= 10) { - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; goto single_segment; } /* - * Don't bother with scatter-gather if there is only one segment. - */ - if (count == 1) { - this_count = SCpnt->request.nr_sectors; + * we really want to use sg even for a single segment request, + * however some people just cannot be bothered to write decent + * driver code so we can't risk to break somebody making the + * assumption that sg requests will always contain at least 2 + * segments. if the driver is 32-bit dma safe, then use sg for + * 1 entry anyways. if not, don't rely on the driver handling this + * case. + */ + if (count == 1 && !SCpnt->host->highmem_io) { + this_count = req->current_nr_sectors; goto single_segment; } - SCpnt->use_sg = count; - /* - * Allocate the actual scatter-gather table itself. + /* + * for sane drivers, use sg even for 1 entry request */ + SCpnt->use_sg = count; SCpnt->sglist_len = (SCpnt->use_sg * sizeof(struct scatterlist)); /* If we could potentially require ISA bounce buffers, allocate @@ -875,15 +856,25 @@ * Now fill the scatter-gather table. */ if (!sgpnt) { +#if 0 /* * If we cannot allocate the scatter-gather table, then * simply write the first buffer all by itself. */ printk("Warning - running *really* short on DMA buffers\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; goto single_segment; +#else + /* + * it's probably better to simply always back off a little, + * and let some memory be returned to dma pool instead of + * always falling back to (slow) single segments + */ + return 0; +#endif } - /* + + /* * Next, walk the list, and fill in the addresses and sizes of * each segment. */ @@ -900,13 +891,11 @@ SCpnt->bounce_buffers = bbpnt; - for (count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { + for (count = 0, bh = req->bh; bh; bh = bh->b_reqnext) { if (use_clustering && bhprev != NULL) { - if (dma_host && - virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bh_phys(bhprev) - 1 == ISA_DMA_THRESHOLD) { /* Nothing - fall through */ - } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) { + } else if (blk_seg_merge_ok(bhprev, bh)) { /* * This one is OK. Let it go. Note that we * do not have the ability to allocate @@ -915,7 +904,7 @@ */ if( dma_host ) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD + if (bh_phys(bh) - 1 < ISA_DMA_THRESHOLD || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) { sgpnt[count - 1].length += bh->b_size; bhprev = bh; @@ -934,13 +923,25 @@ } } } - count++; - sgpnt[count - 1].address = bh->b_data; - sgpnt[count - 1].page = NULL; - sgpnt[count - 1].length += bh->b_size; - if (!dma_host) { - SCpnt->request_bufflen += bh->b_size; + + if (SCpnt->host->highmem_io) { + sgpnt[count].page = bh->b_page; + sgpnt[count].offset = bh_offset(bh); + sgpnt[count].address = NULL; + } else { + if (PageHighMem(bh->b_page)) + BUG(); + + sgpnt[count].page = NULL; + sgpnt[count].address = bh->b_data; } + + sgpnt[count].length = bh->b_size; + + if (!dma_host) + SCpnt->request_bufflen += bh->b_size; + + count++; bhprev = bh; } @@ -963,6 +964,10 @@ for (i = 0; i < count; i++) { sectors = (sgpnt[i].length >> 9); SCpnt->request_bufflen += sgpnt[i].length; + /* + * only done for dma_host, in which case .page is not + * set since it's guarenteed to be a low memory page + */ if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 > ISA_DMA_THRESHOLD) { if( scsi_dma_free_sectors - sectors <= 10 ) { @@ -998,7 +1003,7 @@ } break; } - if (SCpnt->request.cmd == WRITE) { + if (req->cmd == WRITE) { memcpy(sgpnt[i].address, bbpnt[i], sgpnt[i].length); } @@ -1043,8 +1048,7 @@ * single-block requests if we had hundreds of free sectors. */ if( scsi_dma_free_sectors > 30 ) { - for (this_count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { + for (this_count = 0, bh = req->bh; bh; bh = bh->b_reqnext) { if( scsi_dma_free_sectors - this_count < 30 || this_count == sectors ) { @@ -1057,21 +1061,32 @@ /* * Yow! Take the absolute minimum here. */ - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; } /* * Now drop through into the single-segment case. */ - single_segment: +single_segment: + /* + * for highmem cases, we have to revert to bouncing for single + * segments. rather just give up now and let the device starvation + * path reinitiate this i/o later + */ + if (SCpnt->host->highmem_io) + return 0; + /* * Come here if for any reason we choose to do this as a single * segment. Possibly the entire request, or possibly a small * chunk of the entire request. */ - bh = SCpnt->request.bh; - buff = SCpnt->request.buffer; + bh = req->bh; + buff = req->buffer = bh->b_data; + + if (PageHighMem(bh->b_page)) + BUG(); if (dma_host) { /* @@ -1079,21 +1094,21 @@ * back and allocate a really small one - enough to satisfy * the first buffer. */ - if (virt_to_phys(SCpnt->request.bh->b_data) - + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) { + if (bh_phys(bh) + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) { buff = (char *) scsi_malloc(this_count << 9); if (!buff) { printk("Warning - running low on DMA memory\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; buff = (char *) scsi_malloc(this_count << 9); if (!buff) { dma_exhausted(SCpnt, 0); } } - if (SCpnt->request.cmd == WRITE) - memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9); + if (req->cmd == WRITE) + memcpy(buff, (char *) req->buffer, this_count << 9); } } + SCpnt->request_bufflen = this_count << 9; SCpnt->request_buffer = buff; SCpnt->use_sg = 0; @@ -1132,21 +1147,11 @@ */ void initialize_merge_fn(Scsi_Device * SDpnt) { - request_queue_t *q; - struct Scsi_Host *SHpnt; - SHpnt = SDpnt->host; - - q = &SDpnt->request_queue; + struct Scsi_Host *SHpnt = SDpnt->host; + request_queue_t *q = &SDpnt->request_queue; + dma64_addr_t bounce_limit; /* - * If the host has already selected a merge manager, then don't - * pick a new one. - */ -#if 0 - if (q->back_merge_fn && q->front_merge_fn) - return; -#endif - /* * If this host has an unlimited tablesize, then don't bother with a * merge manager. The whole point of the operation is to make sure * that requests don't grow too large, and this host isn't picky. @@ -1178,4 +1183,20 @@ q->merge_requests_fn = scsi_merge_requests_fn_dc; SDpnt->scsi_init_io_fn = scsi_init_io_vdc; } + + /* + * now enable highmem I/O, if appropriate + */ + bounce_limit = BLK_BOUNCE_HIGH; + if (SHpnt->highmem_io && (SDpnt->type == TYPE_DISK)) { + if (!PCI_DMA_BUS_IS_PHYS) + /* Platforms with virtual-DMA translation + * hardware have no practical limit. + */ + bounce_limit = BLK_BOUNCE_ANY; + else + bounce_limit = SHpnt->pci_dev->dma_mask; + } + + blk_queue_bounce_limit(q, bounce_limit); } diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/sym53c8xx.h linux/drivers/scsi/sym53c8xx.h --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/sym53c8xx.h 2001-12-21 18:41:55.000000000 +0100 +++ linux/drivers/scsi/sym53c8xx.h 2002-06-18 12:57:37.000000000 +0200 @@ -97,7 +97,8 @@ sg_tablesize: SCSI_NCR_SG_TABLESIZE, \ cmd_per_lun: SCSI_NCR_CMD_PER_LUN, \ max_sectors: MAX_SEGMENTS*8, \ - use_clustering: DISABLE_CLUSTERING} + use_clustering: DISABLE_CLUSTERING, \ + highmem_io: 1} #else diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/sym53c8xx_2/sym53c8xx.h linux/drivers/scsi/sym53c8xx_2/sym53c8xx.h --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/sym53c8xx_2/sym53c8xx.h 2001-12-21 18:41:55.000000000 +0100 +++ linux/drivers/scsi/sym53c8xx_2/sym53c8xx.h 2002-06-18 12:57:37.000000000 +0200 @@ -119,7 +119,8 @@ this_id: 7, \ sg_tablesize: 0, \ cmd_per_lun: 0, \ - use_clustering: DISABLE_CLUSTERING} + use_clustering: DISABLE_CLUSTERING, \ + highmem_io: 1} #endif /* defined(HOSTS_C) || defined(MODULE) */ diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/drivers/scsi/sym53c8xx_2/sym_glue.c linux/drivers/scsi/sym53c8xx_2/sym_glue.c --- /opt/kernel/linux-2.4.20-pre1/drivers/scsi/sym53c8xx_2/sym_glue.c 2001-12-21 18:41:55.000000000 +0100 +++ linux/drivers/scsi/sym53c8xx_2/sym_glue.c 2002-06-18 12:57:37.000000000 +0200 @@ -2140,6 +2140,7 @@ instance->max_cmd_len = 16; #endif instance->select_queue_depths = sym53c8xx_select_queue_depths; + instance->highmem_io = 1; SYM_UNLOCK_HCB(np, flags); diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/fs/buffer.c linux/fs/buffer.c --- /opt/kernel/linux-2.4.20-pre1/fs/buffer.c 2002-08-03 02:39:45.000000000 +0200 +++ linux/fs/buffer.c 2002-08-06 08:19:34.000000000 +0200 @@ -1211,16 +1211,14 @@ void set_bh_page (struct buffer_head *bh, struct page *page, unsigned long offset) { - bh->b_page = page; if (offset >= PAGE_SIZE) BUG(); - if (PageHighMem(page)) - /* - * This catches illegal uses and preserves the offset: - */ - bh->b_data = (char *)(0 + offset); - else - bh->b_data = page_address(page) + offset; + + /* + * page_address will return NULL anyways for highmem pages + */ + bh->b_data = page_address(page) + offset; + bh->b_page = page; } EXPORT_SYMBOL(set_bh_page); diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/include/asm-alpha/io.h linux/include/asm-alpha/io.h --- /opt/kernel/linux-2.4.20-pre1/include/asm-alpha/io.h 2001-11-09 22:45:35.000000000 +0100 +++ linux/include/asm-alpha/io.h 2002-06-18 12:57:37.000000000 +0200 @@ -60,6 +60,8 @@ return (void *) (address + IDENT_ADDR); } +#define page_to_phys(page) PAGE_TO_PA(page) + /* * Change addresses as seen by the kernel (virtual) to addresses as * seen by a device (bus), and vice versa. diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/include/asm-i386/kmap_types.h linux/include/asm-i386/kmap_types.h --- /opt/kernel/linux-2.4.20-pre1/include/asm-i386/kmap_types.h 2001-09-17 22:16:30.000000000 +0200 +++ linux/include/asm-i386/kmap_types.h 2002-06-18 12:57:37.000000000 +0200 @@ -7,6 +7,7 @@ KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, + KM_BH_IRQ, KM_TYPE_NR }; diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/include/asm-i386/scatterlist.h linux/include/asm-i386/scatterlist.h --- /opt/kernel/linux-2.4.20-pre1/include/asm-i386/scatterlist.h 2001-10-13 00:35:54.000000000 +0200 +++ linux/include/asm-i386/scatterlist.h 2002-06-18 12:57:37.000000000 +0200 @@ -1,6 +1,24 @@ #ifndef _I386_SCATTERLIST_H #define _I386_SCATTERLIST_H +/* + * Drivers must set either ->address or (preferred) ->page and ->offset + * to indicate where data must be transferred to/from. + * + * Using ->page is recommended since it handles highmem data as well as + * low mem. ->address is restricted to data which has a virtual mapping, and + * it will go away in the future. Updating to ->page can be automated very + * easily -- something like + * + * sg->address = some_ptr; + * + * can be rewritten as + * + * sg->page = virt_to_page(some_ptr); + * sg->offset = (unsigned long) some_ptr & ~PAGE_MASK; + * + * and that's it. There's no excuse for not highmem enabling YOUR driver. /jens + */ struct scatterlist { char * address; /* Location data is to be transferred to, NULL for * highmem page */ diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/include/asm-ppc/kmap_types.h linux/include/asm-ppc/kmap_types.h --- /opt/kernel/linux-2.4.20-pre1/include/asm-ppc/kmap_types.h 2001-09-17 22:16:30.000000000 +0200 +++ linux/include/asm-ppc/kmap_types.h 2002-06-18 12:57:37.000000000 +0200 @@ -11,6 +11,7 @@ KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, + KM_BH_IRQ, KM_TYPE_NR }; diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/include/asm-sparc/kmap_types.h linux/include/asm-sparc/kmap_types.h --- /opt/kernel/linux-2.4.20-pre1/include/asm-sparc/kmap_types.h 2001-09-17 22:16:30.000000000 +0200 +++ linux/include/asm-sparc/kmap_types.h 2002-06-18 12:57:37.000000000 +0200 @@ -7,6 +7,7 @@ KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, + KM_BH_IRQ, KM_TYPE_NR }; diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/include/linux/blkdev.h linux/include/linux/blkdev.h --- /opt/kernel/linux-2.4.20-pre1/include/linux/blkdev.h 2002-08-03 02:39:45.000000000 +0200 +++ linux/include/linux/blkdev.h 2002-08-06 16:56:39.000000000 +0200 @@ -7,6 +7,8 @@ #include #include +#include + struct request_queue; typedef struct request_queue request_queue_t; struct elevator_s; @@ -36,7 +38,7 @@ unsigned long hard_sector, hard_nr_sectors; unsigned int nr_segments; unsigned int nr_hw_segments; - unsigned long current_nr_sectors; + unsigned long current_nr_sectors, hard_cur_sectors; void * special; char * buffer; struct completion * waiting; @@ -123,6 +125,8 @@ */ char head_active; + unsigned long bounce_pfn; + /* * Is meant to protect the queue in the future instead of * io_request_lock @@ -135,6 +139,38 @@ wait_queue_head_t wait_for_requests[2]; }; +extern unsigned long blk_max_low_pfn, blk_max_pfn; + +#define BLK_BOUNCE_HIGH (blk_max_low_pfn << PAGE_SHIFT) +#define BLK_BOUNCE_ANY (blk_max_pfn << PAGE_SHIFT) + +extern void blk_queue_bounce_limit(request_queue_t *, u64); + +#ifdef CONFIG_HIGHMEM +extern struct buffer_head *create_bounce(int, struct buffer_head *); +extern inline struct buffer_head *blk_queue_bounce(request_queue_t *q, int rw, + struct buffer_head *bh) +{ + struct page *page = bh->b_page; + +#ifndef CONFIG_DISCONTIGMEM + if (page - mem_map <= q->bounce_pfn) +#else + if ((page - page_zone(page)->zone_mem_map) + (page_zone(page)->zone_start_paddr >> PAGE_SHIFT) <= q->bounce_pfn) +#endif + return bh; + + return create_bounce(rw, bh); +} +#else +#define blk_queue_bounce(q, rw, bh) (bh) +#endif + +#define bh_phys(bh) (page_to_phys((bh)->b_page) + bh_offset((bh))) + +#define BH_CONTIG(b1, b2) (bh_phys((b1)) + (b1)->b_size == bh_phys((b2))) +#define BH_PHYS_4G(b1, b2) ((bh_phys((b1)) | 0xffffffff) == ((bh_phys((b2)) + (b2)->b_size - 1) | 0xffffffff)) + struct blk_dev_struct { /* * queue_proc has to be atomic @@ -174,6 +210,7 @@ extern void blk_queue_headactive(request_queue_t *, int); extern void blk_queue_make_request(request_queue_t *, make_request_fn *); extern void generic_unplug_device(void *); +extern inline int blk_seg_merge_ok(struct buffer_head *, struct buffer_head *); extern int * blk_size[MAX_BLKDEV]; diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/include/linux/bootmem.h linux/include/linux/bootmem.h --- /opt/kernel/linux-2.4.20-pre1/include/linux/bootmem.h 2001-11-22 20:47:23.000000000 +0100 +++ linux/include/linux/bootmem.h 2002-08-06 16:57:11.000000000 +0200 @@ -16,6 +16,7 @@ extern unsigned long max_low_pfn; extern unsigned long min_low_pfn; +extern unsigned long max_pfn; /* * node_bootmem_map is a map pointer - the bits represent all physical diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/include/linux/highmem.h linux/include/linux/highmem.h --- /opt/kernel/linux-2.4.20-pre1/include/linux/highmem.h 2002-08-03 02:39:45.000000000 +0200 +++ linux/include/linux/highmem.h 2002-08-06 16:56:13.000000000 +0200 @@ -13,8 +13,7 @@ /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); -extern struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig); - +extern struct buffer_head *create_bounce(int rw, struct buffer_head * bh_orig); static inline char *bh_kmap(struct buffer_head *bh) { @@ -26,6 +25,42 @@ kunmap(bh->b_page); } +/* + * remember to add offset! and never ever reenable interrupts between a + * bh_kmap_irq and bh_kunmap_irq!! + */ +static inline char *bh_kmap_irq(struct buffer_head *bh, unsigned long *flags) +{ + unsigned long addr; + + __save_flags(*flags); + + /* + * could be low + */ + if (!PageHighMem(bh->b_page)) + return bh->b_data; + + /* + * it's a highmem page + */ + __cli(); + addr = (unsigned long) kmap_atomic(bh->b_page, KM_BH_IRQ); + + if (addr & ~PAGE_MASK) + BUG(); + + return (char *) addr + bh_offset(bh); +} + +static inline void bh_kunmap_irq(char *buffer, unsigned long *flags) +{ + unsigned long ptr = (unsigned long) buffer & PAGE_MASK; + + kunmap_atomic((void *) ptr, KM_BH_IRQ); + __restore_flags(*flags); +} + #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } @@ -37,8 +72,10 @@ #define kmap_atomic(page,idx) kmap(page) #define kunmap_atomic(page,idx) kunmap(page) -#define bh_kmap(bh) ((bh)->b_data) -#define bh_kunmap(bh) do { } while (0) +#define bh_kmap(bh) ((bh)->b_data) +#define bh_kunmap(bh) do { } while (0) +#define bh_kmap_irq(bh, flags) ((bh)->b_data) +#define bh_kunmap_irq(bh, flags) do { *(flags) = 0; } while (0) #endif /* CONFIG_HIGHMEM */ diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/include/linux/ide.h linux/include/linux/ide.h --- /opt/kernel/linux-2.4.20-pre1/include/linux/ide.h 2002-08-03 02:39:45.000000000 +0200 +++ linux/include/linux/ide.h 2002-08-06 16:56:43.000000000 +0200 @@ -552,6 +552,7 @@ unsigned reset : 1; /* reset after probe */ unsigned autodma : 1; /* automatically try to enable DMA at boot */ unsigned udma_four : 1; /* 1=ATA-66 capable, 0=default */ + unsigned no_highio : 1; /* don't trust pci dma mask, bounce */ byte channel; /* for dual-port chips: 0=primary, 1=secondary */ #ifdef CONFIG_BLK_DEV_IDEPCI struct pci_dev *pci_dev; /* for pci chipsets */ @@ -874,6 +875,21 @@ } ide_action_t; /* + * temporarily mapping a (possible) highmem bio + */ +#define ide_rq_offset(rq) (((rq)->hard_cur_sectors - (rq)->current_nr_sectors) << 9) + +extern inline void *ide_map_buffer(struct request *rq, unsigned long *flags) +{ + return bh_kmap_irq(rq->bh, flags) + ide_rq_offset(rq); +} + +extern inline void ide_unmap_buffer(char *buffer, unsigned long *flags) +{ + bh_kunmap_irq(buffer, flags); +} + +/* * This function issues a special IDE device request * onto the request queue. * diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/kernel/ksyms.c linux/kernel/ksyms.c --- /opt/kernel/linux-2.4.20-pre1/kernel/ksyms.c 2002-08-03 02:39:46.000000000 +0200 +++ linux/kernel/ksyms.c 2002-08-06 08:19:34.000000000 +0200 @@ -122,6 +122,8 @@ EXPORT_SYMBOL(kunmap_high); EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(create_bounce); +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); #endif /* filesystem internal functions */ diff -urN -X /home/axboe/cdrom/exclude /opt/kernel/linux-2.4.20-pre1/mm/bootmem.c linux/mm/bootmem.c --- /opt/kernel/linux-2.4.20-pre1/mm/bootmem.c 2002-08-03 02:39:46.000000000 +0200 +++ linux/mm/bootmem.c 2002-06-18 12:57:37.000000000 +0200 @@ -26,6 +26,7 @@ */ unsigned long max_low_pfn; unsigned long min_low_pfn; +unsigned long max_pfn; /* return the number of _pages_ that will be allocated for the boot bitmap */ unsigned long __init bootmem_bootmap_pages (unsigned long pages)