diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/Documentation/DMA-mapping.txt linux/Documentation/DMA-mapping.txt --- /opt/kernel/linux-2.4.9/Documentation/DMA-mapping.txt Fri Aug 24 13:46:12 2001 +++ linux/Documentation/DMA-mapping.txt Fri Aug 24 13:49:22 2001 @@ -6,14 +6,15 @@ Jakub Jelinek Most of the 64bit platforms have special hardware that translates bus -addresses (DMA addresses) to physical addresses similarly to how page -tables and/or TLB translate virtual addresses to physical addresses. -This is needed so that e.g. PCI devices can access with a Single Address -Cycle (32bit DMA address) any page in the 64bit physical address space. -Previously in Linux those 64bit platforms had to set artificial limits on -the maximum RAM size in the system, so that the virt_to_bus() static scheme -works (the DMA address translation tables were simply filled on bootup -to map each bus address to the physical page __pa(bus_to_virt())). +addresses (DMA addresses) to physical addresses. This is similar to +how page tables and/or a TLB translates virtual addresses to physical +addresses on a cpu. This is needed so that e.g. PCI devices can +access with a Single Address Cycle (32bit DMA address) any page in the +64bit physical address space. Previously in Linux those 64bit +platforms had to set artificial limits on the maximum RAM size in the +system, so that the virt_to_bus() static scheme works (the DMA address +translation tables were simply filled on bootup to map each bus +address to the physical page __pa(bus_to_virt())). So that Linux can use the dynamic DMA mapping, it needs some help from the drivers, namely it has to take into account that DMA addresses should be @@ -28,9 +29,11 @@ #include -is in your driver. This file will obtain for you the definition of -the dma_addr_t type which should be used everywhere you hold a DMA -(bus) address returned from the DMA mapping functions. +is in your driver. This file will obtain for you the definition of the +dma_addr_t (which can hold only SAC addresses) and dma64_addr_t (which +can hold both SAC and DAC addresses) types which should be used +everywhere you hold a DMA (bus) address returned from the DMA mapping +functions. What memory is DMA'able? @@ -237,6 +240,12 @@ driver needs regions sized smaller than a page, you may prefer using the pci_pool interface, described below. +The consistent DMA mapping interfaces, for non-NULL dev, will always +return a DMA address which is SAC (Single Address Cycle) addressible. +Even if the device indicates (via PCI dma mask) that it may address +the upper 32-bits and thus perform DAC cycles, consistent allocation +will still only return 32-bit PCI addresses for DMA. + It returns two values: the virtual address which you can use to access it from the CPU and dma_handle which you pass to the card. @@ -492,6 +501,258 @@ supports dynamic DMA mapping in hardware) in your driver structures and/or in the card registers. + 64-bit DMA and DAC cycle support + +In order to understand how to use the 64-bit portion of Linux's DMA +support interfaces, some background is necessary. + +PCI system implementations (from Linux's perspective) generally fall +into 2 categories: + +1) The PCI address space equals the physical memory address space. + Only the lower 4GB of physical memory may be addressed by + single-address cycles on PCI. All upper memory can only be + accessed via PCI dual-address cycles. + + The x86 and current ia64 systems are examples. + +2) All of physical memory can be accessed from PCI single-address + cycles via a remapping mechanism. 32-bit PCI addresses within + a certain range are "translated" into a full physical memory + address. These "translations" are setup by pci_map_{single,sg}() + and torn down by pci_unmap_{single,sg}() + + These platforms also provide a way to get at all of physical memory + using dual-address cycles. The "translation" is not used in these + cases, but often performance is lower when using this scheme. This + issue is central to certain aspects of Linux's 64-bit DMA APIs. + + Sparc64, Alpha, and ppc64 are examples of such systems. + + The author's belief is that any sane 64-bit platform will do + something along these lines until 64-bit PCI is so ubiquitous that + 32-bit PCI cards need not be considered anymore (ie. a few years + from now at best). + + Therefore, the author also believes that future ia64 systems are + likely to have PCI 32-bit "translation" mechanisms in hardware. + +On platforms in category #1 the situation is pretty straight forward. + +The category #2 platforms present an issue because they provide two +mechanisms to get at physical memory. Which is best? + +There are two situations where you want to always get DAC (Dual +Address Cycle) addresses from the 64-bit PCI dma APIs: + +1) Your device can hold onto an enormous number of concurrent + DMA transactions at once. This is bad for "translation" schemes + because the translations are a finite resource. + + A great and often mentioned example are compute cluster cards. + These devices can require DMA mappings to several gigabytes of + main memory at once. In such a case, DAC addresses are the only + way to go about doing this. + +2) Your device can _only_ generate DAC cycles. + +This is a device attribute, and we provide a way for the driver +to tell the kernel about them: + + pci_change_dma_flag(struct pci_dev *dev, + unsigned int on, + unsigned int off); + +This turns off and on various DMA attribute flags for a device. Here +are the first two attributes, corresponding to situations #1 and #2 +(respectively) above: + + PCI_DMA_FLAG_HUGE_MAPS + + Setting this bit informs the kernel that your device may need + to hold onto an enormous number of DMA mappings at once, and + thus DAC addressing should be used for everything. + + Do not set this bit unless you absolutely _need_ it. This can + degrade performance of the DMA transfers on some platforms. + If you do not set this bit, the platform specific PCI support + layer will decide if you receive SAC (Single Address Cycle) or + DAC addresses. + +and + + PCI_DMA_FLAG_DAC_ONLY + + Setting this bit tells the kernel that your device generates + only DAC addressing cycles. + +Next, we have an interface so the driver can see if DAC operation +should or can be used: + + int pci_dac_cycles_ok(struct pci_dev *pdev); + +Returns a boolean indicating whether the driver should use DAC +addressing or not. If this returns false, then the driver MUST +reconfigure the attributes of pci_dev and try again with some +SAC configuration. + +So let us show how the driver for a device supporting DAC might begin +probing: + + #define MYDEV_PCI_ADDRESS_BITS64 ((u64)0xffffffffffffffff) + #define MYDEV_PCI_ADDRESS_BITS32 ((u64)0x00000000ffffffff) + + int using_dac; + + /* Try turning on DAC support in the PCI layer first, + * if this fails we simply fall back to SAC operation. + */ + pci_set_dma_mask(mp->pdev, MYDEV_PCI_ADDRESS_BITS64); + + /* We are just a normal device and do not hold onto huge + * numbers of mappings at once. We do support SAC addressing. + * + * This call is superfluous since all device DMA flags default + * to off. + */ + pci_change_dma_flag(mp->pdev, 0, PCI_DMA_FLAG_HUGE_MAPS); + + if (!pci_dac_cycles_ok(mp->pdev)) { + if (!pci_dma_supported(mp->pdev, MYDEV_PCI_ADDRESS_BITS32)) + return -ENODEV; + using_dac = 0; + } else { + using_dac = 1; + } + + if (using_dac) + writel(PCI_MODE_DAC, mp->regs + PCI_MODE_REG); + else + writel(0, mp->regs + PCI_MODE_REG); + +Note that pci_dac_cycles_ok can fail not only because DAC addressing +is not possible. It may also fail because using DAC addressing is +undesirable. For example, this would happen if performance of DMA +transfers will be faster using SAC addressing. + +If your device is PCI_DMA_FLAG_DAC_ONLY and pci_dac_cycles_ok fails +then you cannot use DMA with this device. Usually this will cause +the device probe to fail. + +The rest of the driver work is relatively easy. There is a 64-bit +equivalent of each DMA mapping routine mentioned in the earlier +sections of this document. + +If you are using the pci64_*() routines below, and have enabled DAC +via the mask and flag bits, but you absolutely must use a SAC address +in a certain situation: simply use the non-64bit mapping interfaces +above. + +NOTE: The generic block/networking layers and the platform PCI support +code work together to ensure that you never receive a DMA +address/length pair that would cross a 4GB boundary. If your device +has this limitation (cannot DMA across a 4GB boundary) you need not +worry at all as you will never see such a thing. + +Let us check them out one by one: + + void *pci64_alloc_consistent(struct pci_dev *hwdev, size_t size, + dma64_addr_t *dma_handle); + void pci64_free_consistent(struct pci_dev *hwdev, size_t size, + void *vaddr, dma64_addr_t dma_handle); + +These two routines act the same as pci_{alloc,free}_consistent() +except that they can return DAC addresses. Next: + + void *pci64_pool_alloc(struct pci_pool *pool, int flags, + dma64_addr_t *handle); + void pci64_pool_free(struct pci_pool *pool, void *vaddr, + dma64_addr_t addr); + +Same as pci_pool_{alloc,free} except that +pci64_{alloc,free}_consistent() is used for the pool backing store. +You create and destroy pci pools the same way as in the 32-bit case, +with calls to pci_pool_create and pci_pool_destroy. HOWEVER, you may +only use all pci64_pool_{alloc,free}() calls or all +pci_pool_{alloc,free}() calls for a perticular pool. That is, within +the same PCI pool, 32-bit and 64-bit allocations may not be mixed. + +Next: + +dma64_addr_t pci64_map_page(struct pci_dev *hwdev, + struct page *page, unsigned long offset, + size_t size, int direction); +void pci64_unmap_page(struct pci_dev *hwdev, dma64_addr_t dma_addr, + size_t size, int direction); + +Same as pci_{map,unmap}_page(), except that they can return DAC +addresses. Note that there are no pci64_{map,unmap}_single(), +because on HIGHMEM systems the page+offset pair is the only way +to express each and every physical memory address in the machine. +Next: + +int pci64_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction); +void pci64_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nhwents, int direction); + +Same as pci_{map,unmap}_sg(), except that they can return DAC +addresses. Next: + +void pci64_dma_sync_single(struct pci_dev *hwdev, dma64_addr_t dma_handle, + size_t size, int direction); +void pci64_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nelems, int direction); + +Same as pci_dma_sync_{single,sg}(), except that these routines know +how to deal with DAC addresses. + +That's it. Note that the above routines can and will return SAC +addresses unless you've told them otherwise. + +It is believed that the above allows the driver author to deal with +any reasonable combination of device and platform. At the same time +it allows drivers for SAC only devices to be coded more efficiently +by only using the 32-bit dma_addr_t type in the interfaces. + +There is an important assumption built into how a platform may +implement all of these interfaces, PLEASE READ THIS CAREFULLY: + + The kernel assumes that, for a device supporting DAC and SAC + addressing, that it will behave in a certain way. Specifically, + it assumes that if the device is given a DMA address with the top + 32-bits cleared to zero, it will _ALWAYS_ use a SAC cycle. + + Nearly all devices work this way, for performance reasons, since it + require one less PCI cycle for a DMA transaction. However, as always, + it is possible that there are a few broken devices out there which can + only operate in an "all DAC" or "all SAC" mode. So, we've provided + a way in which even devices like these can be handled correctly (this + is described later on). + + Why does the kernel make this assumption? The reason is that many + platforms in category #2 above interpret addresses seen in SAC cycles + different from those seen in DAC cycles. Specifically, only SAC cycle + addresses can go through the "translation" mechanism. Thus, things + would not work if the kernel gave you a SAC address and your device + used a DAC cycle to access it. + + If your device does not behave this way, you have two choices + of how to deal with this: + + a) Tell the kernel that your device is PCI_DMA_FLAG_DAC_ONLY. + This selection is UNDESIREABLE, because performance may + suffer on some platforms when you do this. + + b) The more desirable solution: configure the device to only + generate SAC addresses. + +If your device driver uses the 64-bit APIs and is not working properly +on some platforms, the reason may be that the device violates the +above assumptions built into the Linux kernel. + + Closing + This document, and the API itself, would not be in it's current form without the feedback and suggestions from numerous individuals. We would like to specifically mention, in no particular order, the @@ -503,3 +764,5 @@ Grant Grundler Jay Estabrook Thomas Sailer + Andrea Arcangeli + Jens Axboe diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/arch/alpha/kernel/pci_iommu.c linux/arch/alpha/kernel/pci_iommu.c --- /opt/kernel/linux-2.4.9/arch/alpha/kernel/pci_iommu.c Fri Aug 24 13:46:12 2001 +++ linux/arch/alpha/kernel/pci_iommu.c Thu Aug 23 09:28:51 2001 @@ -636,7 +636,7 @@ supported properly. */ int -pci_dma_supported(struct pci_dev *pdev, dma_addr_t mask) +pci_dma_supported(struct pci_dev *pdev, u64 mask) { struct pci_controller *hose; struct pci_iommu_arena *arena; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/arch/ia64/sn/io/pci_dma.c linux/arch/ia64/sn/io/pci_dma.c --- /opt/kernel/linux-2.4.9/arch/ia64/sn/io/pci_dma.c Fri Aug 24 13:46:12 2001 +++ linux/arch/ia64/sn/io/pci_dma.c Thu Aug 23 09:28:51 2001 @@ -182,7 +182,7 @@ } /* - * On sn1 we use the alt_address entry of the scatterlist to store + * On sn1 we use the orig_address entry of the scatterlist to store * the physical address corresponding to the given virtual address */ int diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/arch/parisc/kernel/ccio-dma.c linux/arch/parisc/kernel/ccio-dma.c --- /opt/kernel/linux-2.4.9/arch/parisc/kernel/ccio-dma.c Fri Aug 24 13:46:12 2001 +++ linux/arch/parisc/kernel/ccio-dma.c Thu Aug 23 09:28:51 2001 @@ -638,7 +638,7 @@ } -static int ccio_dma_supported( struct pci_dev *dev, dma_addr_t mask) +static int ccio_dma_supported( struct pci_dev *dev, u64 mask) { if (dev == NULL) { printk(MODULE_NAME ": EISA/ISA/et al not supported\n"); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/arch/parisc/kernel/ccio-rm-dma.c linux/arch/parisc/kernel/ccio-rm-dma.c --- /opt/kernel/linux-2.4.9/arch/parisc/kernel/ccio-rm-dma.c Fri Aug 24 13:46:12 2001 +++ linux/arch/parisc/kernel/ccio-rm-dma.c Thu Aug 23 09:28:51 2001 @@ -93,7 +93,7 @@ } -static int ccio_dma_supported( struct pci_dev *dev, dma_addr_t mask) +static int ccio_dma_supported( struct pci_dev *dev, u64 mask) { if (dev == NULL) { printk(MODULE_NAME ": EISA/ISA/et al not supported\n"); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/arch/parisc/kernel/pci-dma.c linux/arch/parisc/kernel/pci-dma.c --- /opt/kernel/linux-2.4.9/arch/parisc/kernel/pci-dma.c Fri Aug 24 13:46:12 2001 +++ linux/arch/parisc/kernel/pci-dma.c Thu Aug 23 09:28:51 2001 @@ -77,7 +77,7 @@ static inline void dump_resmap(void) {;} #endif -static int pa11_dma_supported( struct pci_dev *dev, dma_addr_t mask) +static int pa11_dma_supported( struct pci_dev *dev, u64 mask) { return 1; } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/arch/parisc/kernel/sba_iommu.c linux/arch/parisc/kernel/sba_iommu.c --- /opt/kernel/linux-2.4.9/arch/parisc/kernel/sba_iommu.c Fri Aug 24 13:46:12 2001 +++ linux/arch/parisc/kernel/sba_iommu.c Thu Aug 23 09:28:51 2001 @@ -779,7 +779,7 @@ } static int -sba_dma_supported( struct pci_dev *dev, dma_addr_t mask) +sba_dma_supported( struct pci_dev *dev, u64 mask) { if (dev == NULL) { printk(MODULE_NAME ": EISA/ISA/et al not supported\n"); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/arch/sparc64/kernel/iommu_common.c linux/arch/sparc64/kernel/iommu_common.c --- /opt/kernel/linux-2.4.9/arch/sparc64/kernel/iommu_common.c Fri Aug 24 13:46:12 2001 +++ linux/arch/sparc64/kernel/iommu_common.c Thu Aug 23 09:28:51 2001 @@ -12,7 +12,7 @@ */ #ifdef VERIFY_SG -int verify_lengths(struct scatterlist *sg, int nents, int npages) +static int verify_lengths(struct scatterlist *sg, int nents, int npages) { int sg_len, dma_len; int i, pgcount; @@ -22,8 +22,8 @@ sg_len += sg[i].length; dma_len = 0; - for (i = 0; i < nents && sg[i].dvma_length; i++) - dma_len += sg[i].dvma_length; + for (i = 0; i < nents && sg[i].dma_length; i++) + dma_len += sg[i].dma_length; if (sg_len != dma_len) { printk("verify_lengths: Error, different, sg[%d] dma[%d]\n", @@ -32,13 +32,13 @@ } pgcount = 0; - for (i = 0; i < nents && sg[i].dvma_length; i++) { + for (i = 0; i < nents && sg[i].dma_length; i++) { unsigned long start, end; - start = sg[i].dvma_address; + start = sg[i].dma_address; start = start & PAGE_MASK; - end = sg[i].dvma_address + sg[i].dvma_length; + end = sg[i].dma_address + sg[i].dma_length; end = (end + (PAGE_SIZE - 1)) & PAGE_MASK; pgcount += ((end - start) >> PAGE_SHIFT); @@ -55,15 +55,16 @@ return 0; } -int verify_one_map(struct scatterlist *dma_sg, struct scatterlist **__sg, int nents, iopte_t **__iopte) +static int verify_one_map(struct scatterlist *dma_sg, struct scatterlist **__sg, int nents, iopte_t **__iopte) { struct scatterlist *sg = *__sg; iopte_t *iopte = *__iopte; - u32 dlen = dma_sg->dvma_length; - u32 daddr = dma_sg->dvma_address; + u32 dlen = dma_sg->dma_length; + u32 daddr; unsigned int sglen; unsigned long sgaddr; + daddr = dma_sg->dma_address; sglen = sg->length; sgaddr = (unsigned long) sg->address; while (dlen > 0) { @@ -136,7 +137,7 @@ return nents; } -int verify_maps(struct scatterlist *sg, int nents, iopte_t *iopte) +static int verify_maps(struct scatterlist *sg, int nents, iopte_t *iopte) { struct scatterlist *dma_sg = sg; struct scatterlist *orig_dma_sg = dma_sg; @@ -147,7 +148,7 @@ if (nents <= 0) break; dma_sg++; - if (dma_sg->dvma_length == 0) + if (dma_sg->dma_length == 0) break; } @@ -174,14 +175,15 @@ verify_maps(sg, nents, iopte) < 0) { int i; - printk("verify_sglist: Crap, messed up mappings, dumping, iodma at %08x.\n", - (u32) (sg->dvma_address & PAGE_MASK)); + printk("verify_sglist: Crap, messed up mappings, dumping, iodma at "); + printk("%016lx.\n", sg->dma_address & PAGE_MASK); + for (i = 0; i < nents; i++) { printk("sg(%d): address(%p) length(%x) " - "dma_address[%08x] dma_length[%08x]\n", + "dma_address[%016lx] dma_length[%08x]\n", i, sg[i].address, sg[i].length, - sg[i].dvma_address, sg[i].dvma_length); + sg[i].dma_address, sg[i].dma_length); } } @@ -189,30 +191,23 @@ } #endif -/* Two addresses are "virtually contiguous" if and only if: - * 1) They are equal, or... - * 2) They are both on a page boundry - */ -#define VCONTIG(__X, __Y) (((__X) == (__Y)) || \ - (((__X) | (__Y)) << (64UL - PAGE_SHIFT)) == 0UL) - unsigned long prepare_sg(struct scatterlist *sg, int nents) { struct scatterlist *dma_sg = sg; unsigned long prev; - u32 dent_addr, dent_len; + u64 dent_addr, dent_len; prev = (unsigned long) sg->address; prev += (unsigned long) (dent_len = sg->length); - dent_addr = (u32) ((unsigned long)sg->address & (PAGE_SIZE - 1UL)); + dent_addr = (u64) ((unsigned long)sg->address & (PAGE_SIZE - 1UL)); while (--nents) { unsigned long addr; sg++; addr = (unsigned long) sg->address; if (! VCONTIG(prev, addr)) { - dma_sg->dvma_address = dent_addr; - dma_sg->dvma_length = dent_len; + dma_sg->dma_address = dent_addr; + dma_sg->dma_length = dent_len; dma_sg++; dent_addr = ((dent_addr + @@ -225,8 +220,8 @@ dent_len += sg->length; prev = addr + sg->length; } - dma_sg->dvma_address = dent_addr; - dma_sg->dvma_length = dent_len; + dma_sg->dma_address = dent_addr; + dma_sg->dma_length = dent_len; return ((unsigned long) dent_addr + (unsigned long) dent_len + diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/arch/sparc64/kernel/iommu_common.h linux/arch/sparc64/kernel/iommu_common.h --- /opt/kernel/linux-2.4.9/arch/sparc64/kernel/iommu_common.h Fri Aug 24 13:46:12 2001 +++ linux/arch/sparc64/kernel/iommu_common.h Thu Aug 23 09:28:51 2001 @@ -18,10 +18,7 @@ #undef VERIFY_SG #ifdef VERIFY_SG -int verify_lengths(struct scatterlist *sg, int nents, int npages); -int verify_one_map(struct scatterlist *dma_sg, struct scatterlist **__sg, int nents, iopte_t **__iopte); -int verify_maps(struct scatterlist *sg, int nents, iopte_t *iopte); -void verify_sglist(struct scatterlist *sg, int nents, iopte_t *iopte, int npages); +extern void verify_sglist(struct scatterlist *sg, int nents, iopte_t *iopte, int npages); #endif /* Two addresses are "virtually contiguous" if and only if: @@ -31,4 +28,4 @@ #define VCONTIG(__X, __Y) (((__X) == (__Y)) || \ (((__X) | (__Y)) << (64UL - PAGE_SHIFT)) == 0UL) -unsigned long prepare_sg(struct scatterlist *sg, int nents); +extern unsigned long prepare_sg(struct scatterlist *sg, int nents); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/arch/sparc64/kernel/pci_iommu.c linux/arch/sparc64/kernel/pci_iommu.c --- /opt/kernel/linux-2.4.9/arch/sparc64/kernel/pci_iommu.c Fri Aug 24 13:46:12 2001 +++ linux/arch/sparc64/kernel/pci_iommu.c Thu Aug 23 09:28:51 2001 @@ -237,6 +237,36 @@ return ret; } +void *pci64_alloc_consistent(struct pci_dev *pdev, size_t size, dma64_addr_t *dma_addrp) +{ + if (!(pdev->dma_flags & __PCI_DMA_FLAG_MUST_DAC)) { + dma_addr_t tmp; + void *ret; + + ret = pci_alloc_consistent(pdev, size, &tmp); + if (ret != NULL) + *dma_addrp = (dma64_addr_t) tmp; + + return ret; + } else { + unsigned long order, first_page; + + size = PAGE_ALIGN(size); + order = get_order(size); + if (order >= 10) + return NULL; + + first_page = __get_free_pages(GFP_ATOMIC, order); + if (first_page == 0UL) + return NULL; + + memset((char *)first_page, 0, PAGE_SIZE << order); + *dma_addrp = PCI64_ADDR_BASE + __pa(first_page); + + return (void *) first_page; + } +} + /* Free and unmap a consistent DMA translation. */ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_t dvma) { @@ -299,6 +329,16 @@ free_pages((unsigned long)cpu, order); } +void pci64_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma64_addr_t dvma) +{ + if (!(pdev->dma_flags & __PCI_DMA_FLAG_MUST_DAC)) { + pci_free_consistent(pdev, size, cpu, + (dma_addr_t) dvma); + } + + /* Else, nothing to do. */ +} + /* Map a single buffer at PTR of SZ bytes for PCI DMA * in streaming mode. */ @@ -356,6 +396,20 @@ return 0; } +dma64_addr_t pci64_map_page(struct pci_dev *pdev, + struct page *page, unsigned long offset, + size_t sz, int direction) +{ + if (!(pdev->dma_flags & __PCI_DMA_FLAG_MUST_DAC)) { + return (dma64_addr_t) + pci_map_single(pdev, + page_address(page) + offset, + sz, direction); + } + + return PCI64_ADDR_BASE + (__pa(page_address(page)) + offset); +} + /* Unmap a single streaming mode DMA translation. */ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int direction) { @@ -378,7 +432,8 @@ ((bus_addr - iommu->page_table_map_base) >> PAGE_SHIFT); #ifdef DEBUG_PCI_IOMMU if (iopte_val(*base) == IOPTE_INVALID) - printk("pci_unmap_single called on non-mapped region %08x,%08x from %016lx\n", bus_addr, sz, __builtin_return_address(0)); + printk("pci_unmap_single called on non-mapped region %08x,%08x from %016lx\n", + bus_addr, sz, __builtin_return_address(0)); #endif bus_addr &= PAGE_MASK; @@ -423,18 +478,39 @@ spin_unlock_irqrestore(&iommu->lock, flags); } -static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, unsigned long iopte_protection) +void pci64_unmap_page(struct pci_dev *pdev, dma64_addr_t bus_addr, + size_t sz, int direction) +{ + if (!(pdev->dma_flags & __PCI_DMA_FLAG_MUST_DAC)) { + if ((bus_addr >> 32) != (dma64_addr_t) 0) + BUG(); + + return pci_unmap_single(pdev, (dma_addr_t) bus_addr, + sz, direction); + } + + /* If doing real DAC, there is nothing to do. */ +} + +#define SG_ENT_PHYS_ADDRESS(SG) \ + ((SG)->address ? \ + __pa((SG)->address) : \ + (__pa(page_address((SG)->page)) + (SG)->offset)) + +static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, + int nused, int nelems, unsigned long iopte_protection) { struct scatterlist *dma_sg = sg; + struct scatterlist *sg_end = sg + nelems; int i; for (i = 0; i < nused; i++) { unsigned long pteval = ~0UL; - u32 dma_npages; + u64 dma_npages; - dma_npages = ((dma_sg->dvma_address & (PAGE_SIZE - 1UL)) + - dma_sg->dvma_length + - ((u32)(PAGE_SIZE - 1UL))) >> PAGE_SHIFT; + dma_npages = ((dma_sg->dma_address & (PAGE_SIZE - 1UL)) + + dma_sg->dma_length + + ((PAGE_SIZE - 1UL))) >> PAGE_SHIFT; do { unsigned long offset; signed int len; @@ -447,7 +523,7 @@ for (;;) { unsigned long tmp; - tmp = (unsigned long) __pa(sg->address); + tmp = SG_ENT_PHYS_ADDRESS(sg); len = sg->length; if (((tmp ^ pteval) >> PAGE_SHIFT) != 0UL) { pteval = tmp & PAGE_MASK; @@ -479,10 +555,11 @@ * adjusting pteval along the way. Stop when we * detect a page crossing event. */ - while ((pteval << (64 - PAGE_SHIFT)) != 0UL && - pteval == __pa(sg->address) && + while (sg < sg_end && + (pteval << (64 - PAGE_SHIFT)) != 0UL && + (pteval == SG_ENT_PHYS_ADDRESS(sg)) && ((pteval ^ - (__pa(sg->address) + sg->length - 1UL)) >> PAGE_SHIFT) == 0UL) { + (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> PAGE_SHIFT) == 0UL) { pteval += sg->length; sg++; } @@ -505,14 +582,19 @@ struct pci_strbuf *strbuf; unsigned long flags, ctx, npages, iopte_protection; iopte_t *base; - u32 dma_base; + u64 dma_base; struct scatterlist *sgtmp; int used; /* Fast path single entry scatterlists. */ if (nelems == 1) { - sglist->dvma_address = pci_map_single(pdev, sglist->address, sglist->length, direction); - sglist->dvma_length = sglist->length; + sglist->dma_address = (dma64_addr_t) + pci_map_single(pdev, + (sglist->address ? + sglist->address : + (page_address(sglist->page) + sglist->offset)), + sglist->length, direction); + sglist->dma_length = sglist->length; return 1; } @@ -540,8 +622,8 @@ used = nelems; sgtmp = sglist; - while (used && sgtmp->dvma_length) { - sgtmp->dvma_address += dma_base; + while (used && sgtmp->dma_length) { + sgtmp->dma_address += dma_base; sgtmp++; used--; } @@ -559,7 +641,7 @@ iopte_protection = IOPTE_CONSISTENT(ctx); if (direction != PCI_DMA_TODEVICE) iopte_protection |= IOPTE_WRITE; - fill_sg (base, sglist, used, iopte_protection); + fill_sg (base, sglist, used, nelems, iopte_protection); #ifdef VERIFY_SG verify_sglist(sglist, nelems, base, npages); #endif @@ -574,6 +656,23 @@ return 0; } +int pci64_map_sg(struct pci_dev *pdev, struct scatterlist *sg, + int nelems, int direction) +{ + if ((pdev->dma_flags & __PCI_DMA_FLAG_MUST_DAC) != 0) { + int i; + + for (i = 0; i < nelems; i++) { + sg[i].dma_address = + PCI64_ADDR_BASE + SG_ENT_PHYS_ADDRESS(&sg[i]); + sg[i].dma_length = sg[i].length; + } + return nelems; + } + + return pci_map_sg(pdev, sg, nelems, direction); +} + /* Unmap a set of streaming mode DMA translations. */ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int direction) { @@ -582,7 +681,7 @@ struct pci_strbuf *strbuf; iopte_t *base; unsigned long flags, ctx, i, npages; - u32 bus_addr; + u64 bus_addr; if (direction == PCI_DMA_NONE) BUG(); @@ -591,20 +690,21 @@ iommu = pcp->pbm->iommu; strbuf = &pcp->pbm->stc; - bus_addr = sglist->dvma_address & PAGE_MASK; + bus_addr = sglist->dma_address & PAGE_MASK; for (i = 1; i < nelems; i++) - if (sglist[i].dvma_length == 0) + if (sglist[i].dma_length == 0) break; i--; - npages = (PAGE_ALIGN(sglist[i].dvma_address + sglist[i].dvma_length) - bus_addr) >> PAGE_SHIFT; + npages = (PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> PAGE_SHIFT; base = iommu->page_table + ((bus_addr - iommu->page_table_map_base) >> PAGE_SHIFT); #ifdef DEBUG_PCI_IOMMU if (iopte_val(*base) == IOPTE_INVALID) - printk("pci_unmap_sg called on non-mapped region %08x,%d from %016lx\n", sglist->dvma_address, nelems, __builtin_return_address(0)); + printk("pci_unmap_sg called on non-mapped region %016lx,%d from %016lx\n", + sglist->dma_address, nelems, __builtin_return_address(0)); #endif spin_lock_irqsave(&iommu->lock, flags); @@ -616,7 +716,7 @@ /* Step 1: Kick data out of streaming buffers if necessary. */ if (strbuf->strbuf_enabled) { - u32 vaddr = bus_addr; + u32 vaddr = (u32) bus_addr; PCI_STC_FLUSHFLAG_INIT(strbuf); if (strbuf->strbuf_ctxflush && @@ -648,6 +748,15 @@ spin_unlock_irqrestore(&iommu->lock, flags); } +void pci64_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, + int nelems, int direction) +{ + if (!(pdev->dma_flags & __PCI_DMA_FLAG_MUST_DAC)) + return pci_unmap_sg(pdev, sglist, nelems, direction); + + /* If doing real DAC, there is nothing to do. */ +} + /* Make physical memory consistent for a single * streaming mode DMA translation after a transfer. */ @@ -709,6 +818,20 @@ spin_unlock_irqrestore(&iommu->lock, flags); } +void pci64_dma_sync_single(struct pci_dev *pdev, dma64_addr_t bus_addr, + size_t sz, int direction) +{ + if (!(pdev->dma_flags & __PCI_DMA_FLAG_MUST_DAC)) { + if ((bus_addr >> 32) != (dma64_addr_t) 0) + BUG(); + + return pci_dma_sync_single(pdev, (dma_addr_t) bus_addr, + sz, direction); + } + + /* If doing real DAC, there is nothing to do. */ +} + /* Make physical memory consistent for a set of streaming * mode DMA translations after a transfer. */ @@ -735,7 +858,7 @@ iopte_t *iopte; iopte = iommu->page_table + - ((sglist[0].dvma_address - iommu->page_table_map_base) >> PAGE_SHIFT); + ((sglist[0].dma_address - iommu->page_table_map_base) >> PAGE_SHIFT); ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL; } @@ -752,15 +875,15 @@ } while (((long)pci_iommu_read(matchreg)) < 0L); } else { unsigned long i, npages; - u32 bus_addr; + u64 bus_addr; - bus_addr = sglist[0].dvma_address & PAGE_MASK; + bus_addr = sglist[0].dma_address & PAGE_MASK; for(i = 1; i < nelems; i++) - if (!sglist[i].dvma_length) + if (!sglist[i].dma_length) break; i--; - npages = (PAGE_ALIGN(sglist[i].dvma_address + sglist[i].dvma_length) - bus_addr) >> PAGE_SHIFT; + npages = (PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) - bus_addr) >> PAGE_SHIFT; for (i = 0; i < npages; i++, bus_addr += PAGE_SIZE) pci_iommu_write(strbuf->strbuf_pflush, bus_addr); } @@ -774,10 +897,19 @@ spin_unlock_irqrestore(&iommu->lock, flags); } -int pci_dma_supported(struct pci_dev *pdev, dma_addr_t device_mask) +void pci64_dma_sync_sg(struct pci_dev *pdev, struct scatterlist *sglist, + int nelems, int direction) +{ + if (!(pdev->dma_flags & __PCI_DMA_FLAG_MUST_DAC)) + return pci_dma_sync_sg(pdev, sglist, nelems, direction); + + /* If doing real DAC, there is nothing to do. */ +} + +int pci_dma_supported(struct pci_dev *pdev, u64 device_mask) { struct pcidev_cookie *pcp = pdev->sysdata; - u32 dma_addr_mask; + u64 dma_addr_mask; if (pdev == NULL) { dma_addr_mask = 0xffffffff; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/arch/sparc64/kernel/sbus.c linux/arch/sparc64/kernel/sbus.c --- /opt/kernel/linux-2.4.9/arch/sparc64/kernel/sbus.c Fri Aug 24 13:46:12 2001 +++ linux/arch/sparc64/kernel/sbus.c Thu Aug 23 09:28:51 2001 @@ -376,18 +376,24 @@ spin_unlock_irqrestore(&iommu->lock, flags); } -static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, unsigned long iopte_bits) +#define SG_ENT_PHYS_ADDRESS(SG) \ + ((SG)->address ? \ + __pa((SG)->address) : \ + (__pa(page_address((SG)->page)) + (SG)->offset)) + +static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg, int nused, int nelems, unsigned long iopte_bits) { struct scatterlist *dma_sg = sg; + struct scatterlist *sg_end = sg + nelems; int i; for (i = 0; i < nused; i++) { unsigned long pteval = ~0UL; - u32 dma_npages; + u64 dma_npages; - dma_npages = ((dma_sg->dvma_address & (PAGE_SIZE - 1UL)) + - dma_sg->dvma_length + - ((u32)(PAGE_SIZE - 1UL))) >> PAGE_SHIFT; + dma_npages = ((dma_sg->dma_address & (PAGE_SIZE - 1UL)) + + dma_sg->dma_length + + ((PAGE_SIZE - 1UL))) >> PAGE_SHIFT; do { unsigned long offset; signed int len; @@ -400,7 +406,7 @@ for (;;) { unsigned long tmp; - tmp = (unsigned long) __pa(sg->address); + tmp = (unsigned long) SG_ENT_PHYS_ADDRESS(sg); len = sg->length; if (((tmp ^ pteval) >> PAGE_SHIFT) != 0UL) { pteval = tmp & PAGE_MASK; @@ -432,10 +438,11 @@ * adjusting pteval along the way. Stop when we * detect a page crossing event. */ - while ((pteval << (64 - PAGE_SHIFT)) != 0UL && - pteval == __pa(sg->address) && + while (sg < sg_end && + (pteval << (64 - PAGE_SHIFT)) != 0UL && + (pteval == SG_ENT_PHYS_ADDRESS(sg)) && ((pteval ^ - (__pa(sg->address) + sg->length - 1UL)) >> PAGE_SHIFT) == 0UL) { + (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> PAGE_SHIFT) == 0UL) { pteval += sg->length; sg++; } @@ -461,8 +468,13 @@ /* Fast path single entry scatterlists. */ if (nents == 1) { - sg->dvma_address = sbus_map_single(sdev, sg->address, sg->length, dir); - sg->dvma_length = sg->length; + sg->dma_address = (dma64_addr_t) + sbus_map_single(sdev, + (sg->address ? + sg->address : + (page_address(sg->page) + sg->offset)), + sg->length, dir); + sg->dma_length = sg->length; return 1; } @@ -478,8 +490,8 @@ sgtmp = sg; used = nents; - while (used && sgtmp->dvma_length) { - sgtmp->dvma_address += dma_base; + while (used && sgtmp->dma_length) { + sgtmp->dma_address += dma_base; sgtmp++; used--; } @@ -489,7 +501,7 @@ if (dir != SBUS_DMA_TODEVICE) iopte_bits |= IOPTE_WRITE; - fill_sg(iopte, sg, used, iopte_bits); + fill_sg(iopte, sg, used, nents, iopte_bits); #ifdef VERIFY_SG verify_sglist(sg, nents, iopte, npages); #endif @@ -507,22 +519,22 @@ { unsigned long size, flags; struct sbus_iommu *iommu; - u32 dvma_base; + u64 dvma_base; int i; /* Fast path single entry scatterlists. */ if (nents == 1) { - sbus_unmap_single(sdev, sg->dvma_address, sg->dvma_length, direction); + sbus_unmap_single(sdev, sg->dma_address, sg->dma_length, direction); return; } - dvma_base = sg[0].dvma_address & PAGE_MASK; + dvma_base = sg[0].dma_address & PAGE_MASK; for (i = 0; i < nents; i++) { - if (sg[i].dvma_length == 0) + if (sg[i].dma_length == 0) break; } i--; - size = PAGE_ALIGN(sg[i].dvma_address + sg[i].dvma_length) - dvma_base; + size = PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - dvma_base; iommu = sdev->bus->iommu; spin_lock_irqsave(&iommu->lock, flags); @@ -547,16 +559,16 @@ { struct sbus_iommu *iommu = sdev->bus->iommu; unsigned long flags, size; - u32 base; + u64 base; int i; - base = sg[0].dvma_address & PAGE_MASK; + base = sg[0].dma_address & PAGE_MASK; for (i = 0; i < nents; i++) { - if (sg[i].dvma_length == 0) + if (sg[i].dma_length == 0) break; } i--; - size = PAGE_ALIGN(sg[i].dvma_address + sg[i].dvma_length) - base; + size = PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base; spin_lock_irqsave(&iommu->lock, flags); strbuf_flush(iommu, base, size >> PAGE_SHIFT); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/arch/sparc64/kernel/sparc64_ksyms.c linux/arch/sparc64/kernel/sparc64_ksyms.c --- /opt/kernel/linux-2.4.9/arch/sparc64/kernel/sparc64_ksyms.c Fri Aug 24 13:46:12 2001 +++ linux/arch/sparc64/kernel/sparc64_ksyms.c Thu Aug 23 09:28:51 2001 @@ -215,12 +215,20 @@ EXPORT_SYMBOL(insl); EXPORT_SYMBOL(pci_alloc_consistent); EXPORT_SYMBOL(pci_free_consistent); +EXPORT_SYMBOL(pci64_alloc_consistent); +EXPORT_SYMBOL(pci64_free_consistent); EXPORT_SYMBOL(pci_map_single); EXPORT_SYMBOL(pci_unmap_single); +EXPORT_SYMBOL(pci64_map_page); +EXPORT_SYMBOL(pci64_unmap_page); EXPORT_SYMBOL(pci_map_sg); EXPORT_SYMBOL(pci_unmap_sg); +EXPORT_SYMBOL(pci64_map_sg); +EXPORT_SYMBOL(pci64_unmap_sg); EXPORT_SYMBOL(pci_dma_sync_single); +EXPORT_SYMBOL(pci64_dma_sync_single); EXPORT_SYMBOL(pci_dma_sync_sg); +EXPORT_SYMBOL(pci64_dma_sync_sg); EXPORT_SYMBOL(pci_dma_supported); #endif diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/block/cciss.c linux/drivers/block/cciss.c --- /opt/kernel/linux-2.4.9/drivers/block/cciss.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/block/cciss.c Mon Aug 27 12:07:24 2001 @@ -605,7 +605,7 @@ { buff = kmalloc(iocommand.buf_size, GFP_KERNEL); if( buff == NULL) - return -EFAULT; + return -ENOMEM; } if (iocommand.Request.Type.Direction == XFER_WRITE) { @@ -674,7 +674,7 @@ { kfree(buff); cmd_free(h, c, 0); - return( -EFAULT); + return( -EFAULT); } if (iocommand.Request.Type.Direction == XFER_READ) @@ -1119,20 +1119,22 @@ static inline void complete_command( CommandList_struct *cmd, int timeout) { int status = 1; - int i; + int i, ddir; u64bit temp64; if (timeout) status = 0; /* unmap the DMA mapping for all the scatter gather elements */ + if (cmd->Request.Type.Direction == XFER_READ) + ddir = PCI_DMA_FROMDEVICE; + else + ddir = PCI_DMA_TODEVICE; for(i=0; iHeader.SGList; i++) { temp64.val32.lower = cmd->SG[i].Addr.lower; temp64.val32.upper = cmd->SG[i].Addr.upper; - pci_unmap_single(hba[cmd->ctlr]->pdev, - temp64.val, cmd->SG[i].Len, - (cmd->Request.Type.Direction == XFER_READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + pci64_unmap_page(hba[cmd->ctlr]->pdev, + temp64.val, cmd->SG[i].Len, ddir); } if(cmd->err_info->CommandStatus != 0) @@ -1225,7 +1227,7 @@ static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, struct buffer_head *bh, int max_segments) { - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) + if (BH_CONTIG(rq->bhtail, bh)) return 1; return cpq_new_segment(q, rq, max_segments); } @@ -1233,7 +1235,7 @@ static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, struct buffer_head *bh, int max_segments) { - if (bh->b_data + bh->b_size == rq->bh->b_data) + if (BH_CONTIG(bh, rq->bh)) return 1; return cpq_new_segment(q, rq, max_segments); } @@ -1243,7 +1245,7 @@ { int total_segments = rq->nr_segments + nxt->nr_segments; - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) + if (BH_CONTIG(rq->bhtail, nxt->bh)) total_segments--; if (total_segments > MAXSGENTRIES) @@ -1264,21 +1266,20 @@ ctlr_info_t *h= q->queuedata; CommandList_struct *c; int log_unit, start_blk, seg, sect; - char *lastdataend; + unsigned long long lastdataend; struct buffer_head *bh; struct list_head *queue_head = &q->queue_head; struct request *creq; u64bit temp64; - struct my_sg tmp_sg[MAXSGENTRIES]; - int i; + struct scatterlist tmp_sg[MAXSGENTRIES]; + int i, ddir; - // Loop till the queue is empty if or it is plugged - while (1) - { - if (q->plugged || list_empty(queue_head)) { - start_io(h); - return; - } + if (q->plugged) + goto startio; + +next: + if (list_empty(queue_head)) + goto startio; creq = blkdev_entry_next_request(queue_head); if (creq->nr_segments > MAXSGENTRIES) @@ -1290,15 +1291,12 @@ h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); complete_buffers(creq->bh, 0); - start_io(h); - return; + goto startio; } if (( c = cmd_alloc(h, 1)) == NULL) - { - start_io(h); - return; - } + goto startio; + c->cmd_type = CMD_RWREQ; bh = c->bh = creq->bh; @@ -1322,36 +1320,37 @@ printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",(int) creq->sector, (int) creq->nr_sectors); #endif /* CCISS_DEBUG */ - seg = 0; - lastdataend = NULL; - sect = 0; + seg = sect = 0; + lastdataend = ~0ULL; while(bh) { sect += bh->b_size/512; - if (bh->b_data == lastdataend) + if (bh_phys(bh) == lastdataend) { // tack it on to the last segment - tmp_sg[seg-1].len +=bh->b_size; + tmp_sg[seg-1].length +=bh->b_size; lastdataend += bh->b_size; } else { if (seg == MAXSGENTRIES) BUG(); - tmp_sg[seg].len = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; + tmp_sg[seg].page = bh->b_page; + tmp_sg[seg].length = bh->b_size; + tmp_sg[seg].offset = bh_offset(bh); + lastdataend = bh_phys(bh) + bh->b_size; seg++; } bh = bh->b_reqnext; } + /* get the DMA records for the setup */ - for (i=0; iSG[i].Len = tmp_sg[i].len; - temp64.val = (__u64) pci_map_single( h->pdev, - tmp_sg[i].start_addr, - tmp_sg[i].len, - (c->Request.Type.Direction == XFER_READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + if (c->Request.Type.Direction == XFER_READ) + ddir = PCI_DMA_FROMDEVICE; + else + ddir = PCI_DMA_TODEVICE; + for (i=0; iSG[i].Len = tmp_sg[i].length; + temp64.val = pci64_map_page(h->pdev, tmp_sg[i].page, + tmp_sg[i].length, tmp_sg[i].offset, ddir); c->SG[i].Addr.lower = temp64.val32.lower; c->SG[i].Addr.upper = temp64.val32.upper; c->SG[i].Ext = 0; // we are not chaining @@ -1375,10 +1374,8 @@ c->Request.CDB[8]= sect & 0xff; c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0; - blkdev_dequeue_request(creq); - /* * ehh, we can't really end the request here since it's not * even started yet. for now it shouldn't hurt though @@ -1392,7 +1389,11 @@ h->Qdepth++; if(h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; - } // while loop + + goto next; + +startio: + start_io(h); } static void do_cciss_intr(int irq, void *dev_id, struct pt_regs *regs) @@ -1870,7 +1871,18 @@ sprintf(hba[i]->devname, "cciss%d", i); hba[i]->ctlr = i; hba[i]->pdev = pdev; - + + /* configure PCI DMA stuff */ + pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff); + if (!pci_dac_cycles_ok(pdev)) { + if (!pci_dma_supported(pdev, (u64) 0xffffffff)) { + free_hba(i); + return -ENODEV; + } + printk("cciss: not using DAC cycles\n"); + } else + printk("cciss: using DAC cycles\n"); + if( register_blkdev(MAJOR_NR+i, hba[i]->devname, &cciss_fops)) { printk(KERN_ERR "cciss: Unable to get major number " @@ -1938,9 +1950,10 @@ cciss_procinit(i); q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); - q->queuedata = hba[i]; - blk_init_queue(q, do_cciss_request); - blk_queue_headactive(q, 0); + q->queuedata = hba[i]; + blk_init_queue(q, do_cciss_request); + blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask); + blk_queue_headactive(q, 0); /* fill in the other Kernel structs */ blksize_size[MAJOR_NR+i] = hba[i]->blocksizes; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/block/cciss.h linux/drivers/block/cciss.h --- /opt/kernel/linux-2.4.9/drivers/block/cciss.h Fri Aug 24 13:46:12 2001 +++ linux/drivers/block/cciss.h Fri Aug 24 09:15:54 2001 @@ -15,11 +15,6 @@ #define MAJOR_NR COMPAQ_CISS_MAJOR -struct my_sg { - int len; - char *start_addr; -}; - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/block/cpqarray.c linux/drivers/block/cpqarray.c --- /opt/kernel/linux-2.4.9/drivers/block/cpqarray.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/block/cpqarray.c Mon Aug 27 12:08:00 2001 @@ -367,7 +367,7 @@ static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, struct buffer_head *bh, int max_segments) { - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) + if (BH_CONTIG(rq->bhtail, bh)) return 1; return cpq_new_segment(q, rq, max_segments); } @@ -375,7 +375,7 @@ static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, struct buffer_head *bh, int max_segments) { - if (bh->b_data + bh->b_size == rq->bh->b_data) + if (BH_CONTIG(bh, rq->bh)) return 1; return cpq_new_segment(q, rq, max_segments); } @@ -385,7 +385,7 @@ { int total_segments = rq->nr_segments + nxt->nr_segments; - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) + if (BH_CONTIG(rq->bhtail, nxt->bh)) total_segments--; if (total_segments > SG_MAX) @@ -532,6 +532,7 @@ q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); q->queuedata = hba[i]; blk_init_queue(q, do_ida_request); + blk_queue_bounce_limit(q, hba[i]->pci_dev->dma_mask); blk_queue_headactive(q, 0); blksize_size[MAJOR_NR+i] = ida_blocksizes + (i*256); hardsect_size[MAJOR_NR+i] = ida_hardsizes + (i*256); @@ -923,20 +924,19 @@ ctlr_info_t *h = q->queuedata; cmdlist_t *c; int seg, sect; - char *lastdataend; + unsigned long lastdataend; struct list_head * queue_head = &q->queue_head; struct buffer_head *bh; struct request *creq; - struct my_sg tmp_sg[SG_MAX]; + struct scatterlist tmp_sg[SG_MAX]; int i; -// Loop till the queue is empty if or it is plugged - while (1) -{ - if (q->plugged || list_empty(queue_head)) { - start_io(h); - return; - } + if (q->plugged) + goto startio; + +next: + if (list_empty(queue_head)) + goto startio; creq = blkdev_entry_next_request(queue_head); if (creq->nr_segments > SG_MAX) @@ -948,15 +948,11 @@ h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); complete_buffers(creq->bh, 0); - start_io(h); - return; + goto startio; } if ((c = cmd_alloc(h,1)) == NULL) - { - start_io(h); - return; - } + goto startio; bh = creq->bh; @@ -973,19 +969,20 @@ printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors); ); - seg = 0; lastdataend = NULL; - sect = 0; + seg = sect = 0; + lastdataend = ~0UL; while(bh) { sect += bh->b_size/512; - if (bh->b_data == lastdataend) { - tmp_sg[seg-1].size += bh->b_size; + if (bh_phys(bh) == lastdataend) { + tmp_sg[seg-1].length += bh->b_size; lastdataend += bh->b_size; } else { if (seg == SG_MAX) BUG(); - tmp_sg[seg].size = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; + tmp_sg[seg].page = bh->b_page; + tmp_sg[seg].length = bh->b_size; + tmp_sg[seg].offset = bh_offset(bh); + lastdataend = bh_phys(bh) + bh->b_size; seg++; } bh = bh->b_reqnext; @@ -993,10 +990,10 @@ /* Now do all the DMA Mappings */ for( i=0; i < seg; i++) { - c->req.sg[i].size = tmp_sg[i].size; - c->req.sg[i].addr = (__u32) pci_map_single( - h->pci_dev, tmp_sg[i].start_addr, - tmp_sg[i].size, + c->req.sg[i].size = tmp_sg[i].length; + c->req.sg[i].addr = (__u32) pci_map_page( + h->pci_dev, tmp_sg[i].page, tmp_sg[i].length, + tmp_sg[i].offset, (creq->cmd == READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); } @@ -1030,7 +1027,11 @@ h->Qdepth++; if (h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; - } // while loop + + goto next; + +startio: + start_io(h); } /* @@ -1103,7 +1104,7 @@ /* unmap the DMA mapping for all the scatter gather elements */ for(i=0; ireq.hdr.sg_cnt; i++) { - pci_unmap_single(hba[cmd->ctlr]->pci_dev, + pci_unmap_page(hba[cmd->ctlr]->pci_dev, cmd->req.sg[i].addr, cmd->req.sg[i].size, (cmd->req.hdr.cmd == IDA_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/block/cpqarray.h linux/drivers/block/cpqarray.h --- /opt/kernel/linux-2.4.9/drivers/block/cpqarray.h Fri Aug 24 13:46:12 2001 +++ linux/drivers/block/cpqarray.h Fri Aug 24 09:26:24 2001 @@ -56,11 +56,6 @@ #ifdef __KERNEL__ -struct my_sg { - int size; - char *start_addr; -}; - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/block/elevator.c linux/drivers/block/elevator.c --- /opt/kernel/linux-2.4.9/drivers/block/elevator.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/block/elevator.c Thu Aug 23 09:28:51 2001 @@ -110,7 +110,6 @@ break; } else if (__rq->sector - count == bh->b_rsector) { ret = ELEVATOR_FRONT_MERGE; - __rq->elevator_sequence -= count; *req = __rq; break; } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- /opt/kernel/linux-2.4.9/drivers/block/ll_rw_blk.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/block/ll_rw_blk.c Fri Aug 24 09:14:35 2001 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -124,6 +125,8 @@ */ static int queue_nr_requests, batch_requests; +unsigned long blk_max_low_pfn; + static inline int get_max_sectors(kdev_t dev) { if (!max_sectors[MAJOR(dev)]) @@ -131,7 +134,7 @@ return max_sectors[MAJOR(dev)][MINOR(dev)]; } -inline request_queue_t *__blk_get_queue(kdev_t dev) +inline request_queue_t *blk_get_queue(kdev_t dev) { struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); @@ -141,22 +144,6 @@ return &blk_dev[MAJOR(dev)].request_queue; } -/* - * NOTE: the device-specific queue() functions - * have to be atomic! - */ -request_queue_t *blk_get_queue(kdev_t dev) -{ - request_queue_t *ret; - unsigned long flags; - - spin_lock_irqsave(&io_request_lock,flags); - ret = __blk_get_queue(dev); - spin_unlock_irqrestore(&io_request_lock,flags); - - return ret; -} - static int __blk_cleanup_queue(struct list_head *head) { struct request *rq; @@ -261,6 +248,57 @@ q->make_request_fn = mfn; } +/** + * blk_queue_bounce_limit - set bounce buffer limit for queue + * @q: the request queue for the device + * @dma_addr: bus address limit + * + * Description: + * Different hardware can have different requirements as to what pages + * it can do I/O directly to. A low level driver can call + * blk_queue_bounce_limit to have lower memory pages allocated as bounce + * buffers for doing I/O to pages residing above @page. By default + * the block layer sets this to the highest numbered "low" memory page. + **/ +void blk_queue_bounce_limit(request_queue_t *q, dma64_addr_t dma_addr) +{ + unsigned long mb = dma_addr >> 20; + struct page *bounce_page = mem_map + (dma_addr >> PAGE_SHIFT); + + /* + * just make sure that no pages are considered above this one... + */ + if (dma_addr == BLK_BOUNCE_ANY) + bounce_page = (struct page *) BLK_BOUNCE_ANY; + + /* + * keep this for debugging for now... + */ + printk("blk: queue %p, ", q); + if (dma_addr == BLK_BOUNCE_ANY) + printk("no I/O memory limit\n"); + else if (dma_addr != BLK_BOUNCE_HIGH) + printk("I/O limit %luMb (mask %Lx)\n", mb, (u64) dma_addr); + + q->bounce_limit = bounce_page; +} + +/* + * can we merge the two segments, or do we need to start a new one? + */ +inline int blk_seg_merge_ok(request_queue_t *q, struct buffer_head *bh, + struct buffer_head *nxt) +{ + if (!BH_CONTIG(bh, nxt)) + return 0; + + if ((bh_phys(bh) | 0xffffffff) == + ((bh_phys(nxt) + nxt->b_size - 1) | 0xffffffff)) + return 1; + + return 0; +} + static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) { if (req->nr_segments < max_segments) { @@ -273,16 +311,18 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req, struct buffer_head *bh, int max_segments) { - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + if (blk_seg_merge_ok(q, req->bhtail, bh)) return 1; + return ll_new_segment(q, req, max_segments); } static int ll_front_merge_fn(request_queue_t *q, struct request *req, struct buffer_head *bh, int max_segments) { - if (bh->b_data + bh->b_size == req->bh->b_data) + if (blk_seg_merge_ok(q, bh, req->bh)) return 1; + return ll_new_segment(q, req, max_segments); } @@ -291,7 +331,7 @@ { int total_segments = req->nr_segments + next->nr_segments; - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + if (blk_seg_merge_ok(q, req->bhtail, next->bh)) total_segments--; if (total_segments > max_segments) @@ -430,6 +470,8 @@ */ q->plug_device_fn = generic_plug_device; q->head_active = 1; + + blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); } #define blkdev_free_rq(list) list_entry((list)->next, struct request, table); @@ -696,9 +738,7 @@ * driver. Create a bounce buffer if the buffer data points into * high memory - keep the original buffer otherwise. */ -#if CONFIG_HIGHMEM - bh = create_bounce(rw, bh); -#endif + bh = blk_queue_bounce(q, rw, bh); /* look for a free request. */ /* @@ -743,8 +783,13 @@ elevator->elevator_merge_cleanup_fn(q, req, count); bh->b_reqnext = req->bh; req->bh = bh; + /* + * may not be valid, but queues not having bounce + * enabled for highmem pages must not look at + * ->buffer anyway + */ req->buffer = bh->b_data; - req->current_nr_sectors = count; + req->current_nr_sectors = req->hard_cur_sectors = count; req->sector = req->hard_sector = sector; req->nr_sectors = req->hard_nr_sectors += count; blk_started_io(count); @@ -794,7 +839,7 @@ req->errors = 0; req->hard_sector = req->sector = sector; req->hard_nr_sectors = req->nr_sectors = count; - req->current_nr_sectors = count; + req->current_nr_sectors = req->hard_cur_sectors = count; req->nr_segments = 1; /* Always 1 for a new request. */ req->nr_hw_segments = 1; /* Always 1 for a new request. */ req->buffer = bh->b_data; @@ -1104,6 +1149,7 @@ req->nr_sectors = req->hard_nr_sectors; req->current_nr_sectors = bh->b_size >> 9; + req->hard_cur_sectors = req->current_nr_sectors; if (req->nr_sectors < req->current_nr_sectors) { req->nr_sectors = req->current_nr_sectors; printk("end_request: buffer-list destroyed\n"); @@ -1152,7 +1198,7 @@ */ queue_nr_requests = 64; if (total_ram > MB(32)) - queue_nr_requests = 128; + queue_nr_requests = 256; /* * Batch frees according to queue length @@ -1160,6 +1206,8 @@ batch_requests = queue_nr_requests >> 3; printk("block: %d slots per queue, batch=%d\n", queue_nr_requests, batch_requests); + blk_max_low_pfn = max_low_pfn; + #ifdef CONFIG_AMIGA_Z2RAM z2_init(); #endif @@ -1272,10 +1320,12 @@ EXPORT_SYMBOL(end_that_request_last); EXPORT_SYMBOL(blk_init_queue); EXPORT_SYMBOL(blk_get_queue); -EXPORT_SYMBOL(__blk_get_queue); EXPORT_SYMBOL(blk_cleanup_queue); EXPORT_SYMBOL(blk_queue_headactive); EXPORT_SYMBOL(blk_queue_make_request); EXPORT_SYMBOL(generic_make_request); EXPORT_SYMBOL(blkdev_release_request); EXPORT_SYMBOL(generic_unplug_device); +EXPORT_SYMBOL(blk_queue_bounce_limit); +EXPORT_SYMBOL(blk_max_low_pfn); +EXPORT_SYMBOL(blk_seg_merge_ok); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/block/loop.c linux/drivers/block/loop.c --- /opt/kernel/linux-2.4.9/drivers/block/loop.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/block/loop.c Thu Aug 23 09:28:51 2001 @@ -453,9 +453,7 @@ goto err; } -#if CONFIG_HIGHMEM - rbh = create_bounce(rw, rbh); -#endif + rbh = blk_queue_bounce(q, rw, rbh); /* * file backed, queue for loop_thread to handle diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/ide/hpt34x.c linux/drivers/ide/hpt34x.c --- /opt/kernel/linux-2.4.9/drivers/ide/hpt34x.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/ide/hpt34x.c Thu Aug 23 09:28:51 2001 @@ -425,6 +425,7 @@ hwif->autodma = 0; hwif->dmaproc = &hpt34x_dmaproc; + hwif->highmem = 1; } else { hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/ide/hpt366.c linux/drivers/ide/hpt366.c --- /opt/kernel/linux-2.4.9/drivers/ide/hpt366.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/ide/hpt366.c Thu Aug 23 09:28:51 2001 @@ -730,6 +730,7 @@ hwif->autodma = 1; else hwif->autodma = 0; + hwif->highmem = 1; } else { hwif->autodma = 0; hwif->drives[0].autotune = 1; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c --- /opt/kernel/linux-2.4.9/drivers/ide/ide-disk.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/ide/ide-disk.c Thu Aug 23 09:28:51 2001 @@ -27,9 +27,10 @@ * Version 1.09 added increment of rq->sector in ide_multwrite * added UDMA 3/4 reporting * Version 1.10 request queue changes, Ultra DMA 100 + * Version 1.11 Highmem I/O support, Jens Axboe */ -#define IDEDISK_VERSION "1.10" +#define IDEDISK_VERSION "1.11" #undef REALLY_SLOW_IO /* most systems can safely undef this */ @@ -139,7 +140,9 @@ byte stat; int i; unsigned int msect, nsect; + unsigned long flags; struct request *rq; + char *to; /* new way for dealing with premature shared PCI interrupts */ if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { @@ -150,8 +153,8 @@ ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); return ide_started; } + msect = drive->mult_count; - read_next: rq = HWGROUP(drive)->rq; if (msect) { @@ -160,14 +163,15 @@ msect -= nsect; } else nsect = 1; - idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); + to = ide_map_buffer(rq, &flags); + idedisk_input_data(drive, to, nsect * SECTOR_WORDS); #ifdef DEBUG printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n", drive->name, rq->sector, rq->sector+nsect-1, (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); #endif + ide_unmap_buffer(to, &flags); rq->sector += nsect; - rq->buffer += nsect<<9; rq->errors = 0; i = (rq->nr_sectors -= nsect); if (((long)(rq->current_nr_sectors -= nsect)) <= 0) @@ -201,14 +205,16 @@ #endif if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) { rq->sector++; - rq->buffer += 512; rq->errors = 0; i = --rq->nr_sectors; --rq->current_nr_sectors; if (((long)rq->current_nr_sectors) <= 0) ide_end_request(1, hwgroup); if (i > 0) { - idedisk_output_data (drive, rq->buffer, SECTOR_WORDS); + unsigned long flags; + char *to = ide_map_buffer(rq, &flags); + idedisk_output_data (drive, to, SECTOR_WORDS); + ide_unmap_buffer(to, &flags); ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); return ide_started; } @@ -238,14 +244,14 @@ do { char *buffer; int nsect = rq->current_nr_sectors; - + unsigned long flags; + if (nsect > mcount) nsect = mcount; mcount -= nsect; - buffer = rq->buffer; + buffer = ide_map_buffer(rq, &flags); rq->sector += nsect; - rq->buffer += nsect << 9; rq->nr_sectors -= nsect; rq->current_nr_sectors -= nsect; @@ -259,7 +265,7 @@ } else { rq->bh = bh; rq->current_nr_sectors = bh->b_size >> 9; - rq->buffer = bh->b_data; + rq->hard_cur_sectors = rq->current_nr_sectors; } } @@ -268,6 +274,7 @@ * re-entering us on the last transfer. */ idedisk_output_data(drive, buffer, nsect<<7); + ide_unmap_buffer(buffer, &flags); } while (mcount); return 0; @@ -452,8 +459,11 @@ return ide_stopped; } } else { + unsigned long flags; + char *buffer = ide_map_buffer(rq, &flags); ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); - idedisk_output_data(drive, rq->buffer, SECTOR_WORDS); + idedisk_output_data(drive, buffer, SECTOR_WORDS); + ide_unmap_buffer(buffer, &flags); } return ide_started; } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/ide/ide-dma.c linux/drivers/ide/ide-dma.c --- /opt/kernel/linux-2.4.9/drivers/ide/ide-dma.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/ide/ide-dma.c Fri Aug 24 09:02:26 2001 @@ -231,33 +231,42 @@ { struct buffer_head *bh; struct scatterlist *sg = hwif->sg_table; + unsigned long lastdataend = ~0UL; int nents = 0; if (hwif->sg_dma_active) BUG(); - + if (rq->cmd == READ) hwif->sg_dma_direction = PCI_DMA_FROMDEVICE; else hwif->sg_dma_direction = PCI_DMA_TODEVICE; + bh = rq->bh; + lastdataend = 0; do { - unsigned char *virt_addr = bh->b_data; - unsigned int size = bh->b_size; - - if (nents >= PRD_ENTRIES) - return 0; + /* + * continue segment from before? + */ + if (bh_phys(bh) == lastdataend) { + sg[nents - 1].length += bh->b_size; + lastdataend += bh->b_size; + } else { + struct scatterlist *sge; + /* + * start new segment + */ + if (nents >= PRD_ENTRIES) + return 0; - while ((bh = bh->b_reqnext) != NULL) { - if ((virt_addr + size) != (unsigned char *) bh->b_data) - break; - size += bh->b_size; + sge = &sg[nents]; + sge->page = bh->b_page; + sge->length = bh->b_size; + sge->offset = bh_offset(bh); + lastdataend = bh_phys(bh) + bh->b_size; + nents++; } - memset(&sg[nents], 0, sizeof(*sg)); - sg[nents].address = virt_addr; - sg[nents].length = size; - nents++; - } while (bh != NULL); + } while ((bh = bh->b_reqnext) != NULL); return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); } @@ -285,7 +294,7 @@ return 0; sg = HWIF(drive)->sg_table; - while (i && sg_dma_len(sg)) { + while (i) { u32 cur_addr; u32 cur_len; @@ -299,36 +308,35 @@ */ while (cur_len) { - if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - goto use_pio_instead; - } else { - u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); - - if (bcount > cur_len) - bcount = cur_len; - *table++ = cpu_to_le32(cur_addr); - xcount = bcount & 0xffff; - if (is_trm290_chipset) - xcount = ((xcount >> 2) - 1) << 16; - if (xcount == 0x0000) { - /* - * Most chipsets correctly interpret a length of 0x0000 as 64KB, - * but at least one (e.g. CS5530) misinterprets it as zero (!). - * So here we break the 64KB entry into two 32KB entries instead. - */ - if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - goto use_pio_instead; - } - *table++ = cpu_to_le32(0x8000); - *table++ = cpu_to_le32(cur_addr + 0x8000); - xcount = 0x8000; - } - *table++ = cpu_to_le32(xcount); - cur_addr += bcount; - cur_len -= bcount; + u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); + + if (count++ >= PRD_ENTRIES) + BUG(); + + if (bcount > cur_len) + bcount = cur_len; + *table++ = cpu_to_le32(cur_addr); + xcount = bcount & 0xffff; + if (is_trm290_chipset) + xcount = ((xcount >> 2) - 1) << 16; + if (xcount == 0x0000) { + /* + * Most chipsets correctly interpret a length + * of 0x0000 as 64KB, but at least one + * (e.g. CS5530) misinterprets it as zero (!). + * So here we break the 64KB entry into two + * 32KB entries instead. + */ + if (count++ >= PRD_ENTRIES) + goto use_pio_instead; + + *table++ = cpu_to_le32(0x8000); + *table++ = cpu_to_le32(cur_addr + 0x8000); + xcount = 0x8000; } + *table++ = cpu_to_le32(xcount); + cur_addr += bcount; + cur_len -= bcount; } sg++; @@ -512,6 +520,18 @@ } #endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ +static inline void ide_toggle_bounce(ide_drive_t *drive, int on) +{ + dma64_addr_t addr = BLK_BOUNCE_HIGH; + + if (!PCI_DMA_BUS_IS_PHYS) + addr = BLK_BOUNCE_ANY; + if (on && drive->media == ide_disk && HWIF(drive)->highmem) + addr = HWIF(drive)->pci_dev->dma_mask; + + blk_queue_bounce_limit(&drive->queue, addr); +} + /* * ide_dmaproc() initiates/aborts DMA read/write operations on a drive. * @@ -534,18 +554,20 @@ ide_hwif_t *hwif = HWIF(drive); unsigned long dma_base = hwif->dma_base; byte unit = (drive->select.b.unit & 0x01); - unsigned int count, reading = 0; + unsigned int count, reading = 0, set_high = 1; byte dma_stat; switch (func) { case ide_dma_off: printk("%s: DMA disabled\n", drive->name); + set_high = 0; case ide_dma_off_quietly: outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2); case ide_dma_on: drive->using_dma = (func == ide_dma_on); if (drive->using_dma) outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2); + ide_toggle_bounce(drive, set_high); return 0; case ide_dma_check: return config_drive_for_dma (drive); @@ -676,8 +698,8 @@ request_region(dma_base, num_ports, hwif->name); hwif->dma_base = dma_base; hwif->dmatable_cpu = pci_alloc_consistent(hwif->pci_dev, - PRD_ENTRIES * PRD_BYTES, - &hwif->dmatable_dma); + PRD_ENTRIES * PRD_BYTES, + &hwif->dmatable_dma); if (hwif->dmatable_cpu == NULL) goto dma_alloc_failure; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/ide/pdc202xx.c linux/drivers/ide/pdc202xx.c --- /opt/kernel/linux-2.4.9/drivers/ide/pdc202xx.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/ide/pdc202xx.c Thu Aug 23 09:28:51 2001 @@ -891,6 +891,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { hwif->dmaproc = &pdc202xx_dmaproc; + hwif->highmem = 1; if (!noautodma) hwif->autodma = 1; } else { diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/ide/piix.c linux/drivers/ide/piix.c --- /opt/kernel/linux-2.4.9/drivers/ide/piix.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/ide/piix.c Thu Aug 23 09:28:51 2001 @@ -521,6 +521,7 @@ if (!hwif->dma_base) return; + hwif->highmem = 1; #ifndef CONFIG_BLK_DEV_IDEDMA hwif->autodma = 0; #else /* CONFIG_BLK_DEV_IDEDMA */ diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/net/acenic.c linux/drivers/net/acenic.c --- /opt/kernel/linux-2.4.9/drivers/net/acenic.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/net/acenic.c Thu Aug 23 09:28:51 2001 @@ -161,10 +161,6 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif -#if (BITS_PER_LONG == 64) -#define ACE_64BIT_PTR 1 -#endif - #ifndef SET_MODULE_OWNER #define SET_MODULE_OWNER(dev) {do{} while(0);} #define ACE_MOD_INC_USE_COUNT MOD_INC_USE_COUNT @@ -186,10 +182,10 @@ #endif #if (LINUX_VERSION_CODE < 0x02032a) -typedef u32 dma_addr_t; +typedef u32 dma64_addr_t; -static inline void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, - dma_addr_t *dma_handle) +static inline void *pci64_alloc_consistent(struct pci_dev *hwdev, size_t size, + dma64_addr_t *dma_handle) { void *virt_ptr; @@ -199,9 +195,15 @@ *dma_handle = virt_to_bus(virt_ptr); return virt_ptr; } -#define pci_free_consistent(cookie, size, ptr, dma_ptr) kfree(ptr) -#define pci_map_single(cookie, address, size, dir) virt_to_bus(address) -#define pci_unmap_single(cookie, address, size, dir) + +#define pci64_free_consistent(cookie, size, ptr, dma_ptr) kfree(ptr) +#define pci64_map_page(cookie, page, off, size, dir) \ + virt_to_bus(page_address(page)+(off)) +#define pci64_unmap_page(cookie, address, size, dir) +#define pci_set_dma_mask(dev, mask) do { } while (0) +#define pci_dac_cycles_ok(dev) (0) +#define pci_dma_supported(dev, mask) \ + (((u64)(mask) & 0xffffffff00000000) == 0 ? 1 : 0) #endif #if (LINUX_VERSION_CODE < 0x02032b) @@ -259,11 +261,6 @@ #define ace_if_down(dev) {do{} while(0);} #endif -#ifndef pci_set_dma_mask -#define pci_set_dma_mask(dev, mask) dev->dma_mask = mask; -#endif - - #if (LINUX_VERSION_CODE >= 0x02031b) #define NEW_NETINIT #define ACE_PROBE_ARG void @@ -585,7 +582,7 @@ dev->irq = pdev->irq; dev->open = &ace_open; dev->hard_start_xmit = &ace_start_xmit; - dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_HIGHDMA; + dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; if (1) { static void ace_watchdog(struct net_device *dev); dev->tx_timeout = &ace_watchdog; @@ -727,6 +724,8 @@ kfree(dev); continue; } + if (ap->pci_using_dac) + dev->features |= NETIF_F_HIGHDMA; boards_found++; } @@ -793,14 +792,12 @@ struct sk_buff *skb = ap->skb->rx_std_skbuff[i].skb; if (skb) { -#ifndef DUMMY_PCI_UNMAP - dma_addr_t mapping; + dma64_addr_t mapping; mapping = ap->skb->rx_std_skbuff[i].mapping; - pci_unmap_single(ap->pdev, mapping, + pci64_unmap_page(ap->pdev, mapping, ACE_STD_BUFSIZE - (2 + 16), PCI_DMA_FROMDEVICE); -#endif ap->rx_std_ring[i].size = 0; ap->skb->rx_std_skbuff[i].skb = NULL; @@ -812,14 +809,13 @@ struct sk_buff *skb = ap->skb->rx_mini_skbuff[i].skb; if (skb) { -#ifndef DUMMY_PCI_UNMAP - dma_addr_t mapping; + dma64_addr_t mapping; mapping = ap->skb->rx_mini_skbuff[i].mapping; - pci_unmap_single(ap->pdev, mapping, + pci64_unmap_page(ap->pdev, mapping, ACE_MINI_BUFSIZE - (2 + 16), PCI_DMA_FROMDEVICE); -#endif + ap->rx_mini_ring[i].size = 0; ap->skb->rx_mini_skbuff[i].skb = NULL; dev_kfree_skb(skb); @@ -829,14 +825,12 @@ for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) { struct sk_buff *skb = ap->skb->rx_jumbo_skbuff[i].skb; if (skb) { -#ifndef DUMMY_PCI_UNMAP - dma_addr_t mapping; + dma64_addr_t mapping; mapping = ap->skb->rx_jumbo_skbuff[i].mapping; - pci_unmap_single(ap->pdev, mapping, + pci64_unmap_page(ap->pdev, mapping, ACE_JUMBO_BUFSIZE - (2 + 16), PCI_DMA_FROMDEVICE); -#endif ap->rx_jumbo_ring[i].size = 0; ap->skb->rx_jumbo_skbuff[i].skb = NULL; @@ -896,9 +890,9 @@ RX_JUMBO_RING_ENTRIES + RX_MINI_RING_ENTRIES + RX_RETURN_RING_ENTRIES)); - pci_free_consistent(ap->pdev, size, - ap->rx_std_ring, - ap->rx_ring_base_dma); + pci64_free_consistent(ap->pdev, size, + ap->rx_std_ring, + ap->rx_ring_base_dma); ap->rx_std_ring = NULL; ap->rx_jumbo_ring = NULL; ap->rx_mini_ring = NULL; @@ -906,24 +900,24 @@ } if (ap->evt_ring != NULL) { size = (sizeof(struct event) * EVT_RING_ENTRIES); - pci_free_consistent(ap->pdev, size, - ap->evt_ring, - ap->evt_ring_dma); + pci64_free_consistent(ap->pdev, size, + ap->evt_ring, + ap->evt_ring_dma); ap->evt_ring = NULL; } if (ap->evt_prd != NULL) { - pci_free_consistent(ap->pdev, sizeof(u32), - (void *)ap->evt_prd, ap->evt_prd_dma); + pci64_free_consistent(ap->pdev, sizeof(u32), + (void *)ap->evt_prd, ap->evt_prd_dma); ap->evt_prd = NULL; } if (ap->rx_ret_prd != NULL) { - pci_free_consistent(ap->pdev, sizeof(u32), - (void *)ap->rx_ret_prd, ap->rx_ret_prd_dma); + pci64_free_consistent(ap->pdev, sizeof(u32), + (void *)ap->rx_ret_prd, ap->rx_ret_prd_dma); ap->rx_ret_prd = NULL; } if (ap->tx_csm != NULL) { - pci_free_consistent(ap->pdev, sizeof(u32), - (void *)ap->tx_csm, ap->tx_csm_dma); + pci64_free_consistent(ap->pdev, sizeof(u32), + (void *)ap->tx_csm, ap->tx_csm_dma); ap->tx_csm = NULL; } } @@ -940,8 +934,8 @@ RX_MINI_RING_ENTRIES + RX_RETURN_RING_ENTRIES)); - ap->rx_std_ring = pci_alloc_consistent(ap->pdev, size, - &ap->rx_ring_base_dma); + ap->rx_std_ring = pci64_alloc_consistent(ap->pdev, size, + &ap->rx_ring_base_dma); if (ap->rx_std_ring == NULL) goto fail; @@ -951,30 +945,30 @@ size = (sizeof(struct event) * EVT_RING_ENTRIES); - ap->evt_ring = pci_alloc_consistent(ap->pdev, size, &ap->evt_ring_dma); + ap->evt_ring = pci64_alloc_consistent(ap->pdev, size, &ap->evt_ring_dma); if (ap->evt_ring == NULL) goto fail; size = (sizeof(struct tx_desc) * TX_RING_ENTRIES); - ap->tx_ring = pci_alloc_consistent(ap->pdev, size, &ap->tx_ring_dma); + ap->tx_ring = pci64_alloc_consistent(ap->pdev, size, &ap->tx_ring_dma); if (ap->tx_ring == NULL) goto fail; - ap->evt_prd = pci_alloc_consistent(ap->pdev, sizeof(u32), - &ap->evt_prd_dma); + ap->evt_prd = pci64_alloc_consistent(ap->pdev, sizeof(u32), + &ap->evt_prd_dma); if (ap->evt_prd == NULL) goto fail; - ap->rx_ret_prd = pci_alloc_consistent(ap->pdev, sizeof(u32), - &ap->rx_ret_prd_dma); + ap->rx_ret_prd = pci64_alloc_consistent(ap->pdev, sizeof(u32), + &ap->rx_ret_prd_dma); if (ap->rx_ret_prd == NULL) goto fail; - ap->tx_csm = pci_alloc_consistent(ap->pdev, sizeof(u32), - &ap->tx_csm_dma); + ap->tx_csm = pci64_alloc_consistent(ap->pdev, sizeof(u32), + &ap->tx_csm_dma); if (ap->tx_csm == NULL) goto fail; @@ -1000,8 +994,8 @@ ace_free_descriptors(dev); if (ap->info) - pci_free_consistent(ap->pdev, sizeof(struct ace_info), - ap->info, ap->info_dma); + pci64_free_consistent(ap->pdev, sizeof(struct ace_info), + ap->info, ap->info_dma); if (ap->skb) kfree(ap->skb); if (ap->trace_buf) @@ -1195,12 +1189,6 @@ ap->pci_latency); /* - * Make sure to enable the 64 bit DMA mask if we're in a 64bit slot - */ - if (!(pci_state & PCI_32BIT)) - pci_set_dma_mask(ap->pdev, (dma_addr_t)~0ULL); - - /* * Set the max DMA transfer size. Seems that for most systems * the performance is better when no MAX parameter is * set. However for systems enabling PCI write and invalidate, @@ -1294,12 +1282,26 @@ #endif /* + * Configure DMA attributes. + */ + pci_set_dma_mask(ap->pdev, (u64) 0xffffffffffffffff); + if (!pci_dac_cycles_ok(ap->pdev)) { + if (!pci_dma_supported(ap->pdev, (u64) 0xffffffff)) { + ecode = -ENODEV; + goto init_error; + } + ap->pci_using_dac = 0; + } else { + ap->pci_using_dac = 1; + } + + /* * Initialize the generic info block and the command+event rings * and the control blocks for the transmit and receive rings * as they need to be setup once and for all. */ - if (!(info = pci_alloc_consistent(ap->pdev, sizeof(struct ace_info), - &ap->info_dma))) { + if (!(info = pci64_alloc_consistent(ap->pdev, sizeof(struct ace_info), + &ap->info_dma))) { ecode = -EAGAIN; goto init_error; } @@ -1340,12 +1342,8 @@ ace_load_firmware(dev); ap->fw_running = 0; - tmp_ptr = (unsigned long) ap->info_dma; -#ifdef ACE_64BIT_PTR + tmp_ptr = (u64) ap->info_dma; writel(tmp_ptr >> 32, ®s->InfoPtrHi); -#else - writel(0, ®s->InfoPtrHi); -#endif writel(tmp_ptr & 0xffffffff, ®s->InfoPtrLo); memset(ap->evt_ring, 0, EVT_RING_ENTRIES * sizeof(struct event)); @@ -1370,7 +1368,7 @@ tmp_ptr = ap->info_dma; tmp_ptr += (unsigned long) &(((struct ace_info *)0)->s.stats); - set_aceaddr(&info->stats2_ptr, (dma_addr_t) tmp_ptr); + set_aceaddr(&info->stats2_ptr, (dma64_addr_t) tmp_ptr); set_aceaddr(&info->rx_std_ctrl.rngptr, ap->rx_ring_base_dma); info->rx_std_ctrl.max_len = ACE_STD_MTU + ETH_HLEN + 4; @@ -1769,7 +1767,7 @@ for (i = 0; i < nr_bufs; i++) { struct sk_buff *skb; struct rx_desc *rd; - dma_addr_t mapping; + dma64_addr_t mapping; skb = alloc_skb(ACE_STD_BUFSIZE, GFP_ATOMIC); if (!skb) @@ -1779,13 +1777,14 @@ * Make sure IP header starts on a fresh cache line. */ skb_reserve(skb, 2 + 16); - mapping = pci_map_single(ap->pdev, skb->data, + mapping = pci64_map_page(ap->pdev, + virt_to_page(skb->data), + ((unsigned long) skb->data & + ~PAGE_MASK), ACE_STD_BUFSIZE - (2 + 16), PCI_DMA_FROMDEVICE); ap->skb->rx_std_skbuff[idx].skb = skb; -#ifndef DUMMY_PCI_UNMAP ap->skb->rx_std_skbuff[idx].mapping = mapping; -#endif rd = &ap->rx_std_ring[idx]; set_aceaddr(&rd->addr, mapping); @@ -1833,7 +1832,7 @@ for (i = 0; i < nr_bufs; i++) { struct sk_buff *skb; struct rx_desc *rd; - dma_addr_t mapping; + dma64_addr_t mapping; skb = alloc_skb(ACE_MINI_BUFSIZE, GFP_ATOMIC); if (!skb) @@ -1843,13 +1842,14 @@ * Make sure the IP header ends up on a fresh cache line */ skb_reserve(skb, 2 + 16); - mapping = pci_map_single(ap->pdev, skb->data, + mapping = pci64_map_page(ap->pdev, + virt_to_page(skb->data), + ((unsigned long) skb->data & + ~PAGE_MASK), ACE_MINI_BUFSIZE - (2 + 16), PCI_DMA_FROMDEVICE); ap->skb->rx_mini_skbuff[idx].skb = skb; -#ifndef DUMMY_PCI_UNMAP ap->skb->rx_mini_skbuff[idx].mapping = mapping; -#endif rd = &ap->rx_mini_ring[idx]; set_aceaddr(&rd->addr, mapping); @@ -1894,7 +1894,7 @@ for (i = 0; i < nr_bufs; i++) { struct sk_buff *skb; struct rx_desc *rd; - dma_addr_t mapping; + dma64_addr_t mapping; skb = alloc_skb(ACE_JUMBO_BUFSIZE, GFP_ATOMIC); if (!skb) @@ -1904,13 +1904,14 @@ * Make sure the IP header ends up on a fresh cache line */ skb_reserve(skb, 2 + 16); - mapping = pci_map_single(ap->pdev, skb->data, + mapping = pci64_map_page(ap->pdev, + virt_to_page(skb->data), + ((unsigned long) skb->data & + ~PAGE_MASK), ACE_JUMBO_BUFSIZE - (2 + 16), PCI_DMA_FROMDEVICE); ap->skb->rx_jumbo_skbuff[idx].skb = skb; -#ifndef DUMMY_PCI_UNMAP ap->skb->rx_jumbo_skbuff[idx].mapping = mapping; -#endif rd = &ap->rx_jumbo_ring[idx]; set_aceaddr(&rd->addr, mapping); @@ -2113,10 +2114,8 @@ skb = rip->skb; rip->skb = NULL; -#ifndef DUMMY_PCI_UNMAP - pci_unmap_single(ap->pdev, rip->mapping, mapsize, + pci64_unmap_page(ap->pdev, rip->mapping, mapsize, PCI_DMA_FROMDEVICE); -#endif skb_put(skb, retdesc->size); #if 0 /* unncessary */ @@ -2180,22 +2179,19 @@ do { struct sk_buff *skb; -#ifndef DUMMY_PCI_UNMAP - dma_addr_t mapping; -#endif + dma64_addr_t mapping; struct tx_ring_info *info; info = ap->skb->tx_skbuff + idx; skb = info->skb; -#ifndef DUMMY_PCI_UNMAP mapping = info->mapping; if (mapping) { - pci_unmap_single(ap->pdev, mapping, info->maplen, + pci64_unmap_page(ap->pdev, mapping, info->maplen, PCI_DMA_TODEVICE); info->mapping = 0; } -#endif + if (skb) { ap->stats.tx_packets++; ap->stats.tx_bytes += skb->len; @@ -2472,23 +2468,19 @@ for (i = 0; i < TX_RING_ENTRIES; i++) { struct sk_buff *skb; -#ifndef DUMMY_PCI_UNMAP - dma_addr_t mapping; -#endif + dma64_addr_t mapping; struct tx_ring_info *info; info = ap->skb->tx_skbuff + i; skb = info->skb; -#ifndef DUMMY_PCI_UNMAP mapping = info->mapping; if (mapping) { memset(ap->tx_ring+i, 0, sizeof(struct tx_desc)); - pci_unmap_single(ap->pdev, mapping, info->maplen, + pci64_unmap_page(ap->pdev, mapping, info->maplen, PCI_DMA_TODEVICE); info->mapping = 0; } -#endif if (skb) { dev_kfree_skb(skb); info->skb = NULL; @@ -2508,79 +2500,35 @@ return 0; } - -/* - * Following below should be (in more clean form!) in arch/ARCH/kernel/pci_*. - * For now, let it stay here. - */ -#if defined(CONFIG_HIGHMEM) && MAX_SKB_FRAGS -#ifndef DUMMY_PCI_UNMAP -#error Sorry, cannot DMA from high memory on this architecture. -#endif - -#if defined(CONFIG_X86) -#define DMAADDR_OFFSET 0 -typedef unsigned long long dmaaddr_high_t; -#elif defined(CONFIG_PPC) -#define DMAADDR_OFFSET PCI_DRAM_OFFSET -typedef unsigned long dmaaddr_high_t; -#endif - - -static inline dmaaddr_high_t -pci_map_single_high(struct pci_dev *hwdev, struct page *page, - int offset, size_t size, int dir) -{ - dmaaddr_high_t phys; - - phys = (page-mem_map) * (dmaaddr_high_t) PAGE_SIZE + offset; - - return (phys + DMAADDR_OFFSET); -} - -#else - -typedef unsigned long dmaaddr_high_t; - -static inline dmaaddr_high_t -pci_map_single_high(struct pci_dev *hwdev, struct page *page, - int offset, size_t size, int dir) -{ - return pci_map_single(hwdev, page_address(page) + offset, size, dir); -} - -#endif - - -static inline dmaaddr_high_t +static inline dma64_addr_t ace_map_tx_skb(struct ace_private *ap, struct sk_buff *skb, struct sk_buff *tail, u32 idx) { unsigned long addr; struct tx_ring_info *info; - addr = pci_map_single(ap->pdev, skb->data, skb->len, PCI_DMA_TODEVICE); + addr = pci64_map_page(ap->pdev, + virt_to_page(skb->data), + ((unsigned long) skb->data & + ~PAGE_MASK), + skb->len, PCI_DMA_TODEVICE); info = ap->skb->tx_skbuff + idx; info->skb = tail; -#ifndef DUMMY_PCI_UNMAP info->mapping = addr; info->maplen = skb->len; -#endif return addr; } static inline void -ace_load_tx_bd(struct tx_desc *desc, dmaaddr_high_t addr, u32 flagsize) +ace_load_tx_bd(struct tx_desc *desc, dma64_addr_t addr, u32 flagsize) { #if !USE_TX_COAL_NOW flagsize &= ~BD_FLG_COAL_NOW; #endif -#ifdef ACE_64BIT_PTR desc->addr.addrhi = addr >> 32; -#endif desc->addr.addrlo = addr; desc->flagsize = flagsize; } @@ -2642,16 +2590,16 @@ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct tx_ring_info *info; - dmaaddr_high_t phys; + dma64_addr_t phys; len += frag->size; info = ap->skb->tx_skbuff + idx; desc = ap->tx_ring + idx; - phys = pci_map_single_high(ap->pdev, frag->page, - frag->page_offset, - frag->size, - PCI_DMA_TODEVICE); + phys = pci64_map_page(ap->pdev, frag->page, + frag->page_offset, + frag->size, + PCI_DMA_TODEVICE); flagsize = (frag->size << 16); if (skb->ip_summed == CHECKSUM_HW) @@ -2671,10 +2619,8 @@ } else { info->skb = NULL; } -#ifndef DUMMY_PCI_UNMAP info->mapping = phys; info->maplen = frag->size; -#endif ace_load_tx_bd(desc, phys, flagsize); } } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/net/acenic.h linux/drivers/net/acenic.h --- /opt/kernel/linux-2.4.9/drivers/net/acenic.h Fri Aug 24 13:46:12 2001 +++ linux/drivers/net/acenic.h Thu Aug 23 09:28:51 2001 @@ -582,16 +582,9 @@ aceaddr stats2_ptr; }; -#if defined(CONFIG_X86) || defined(CONFIG_PPC) -/* Intel has null pci_unmap_single, no reasons to remember mapping. */ -#define DUMMY_PCI_UNMAP -#endif - struct ring_info { struct sk_buff *skb; -#ifndef DUMMY_PCI_UNMAP - dma_addr_t mapping; -#endif + dma64_addr_t mapping; }; /* Funny... As soon as we add maplen on alpha, it starts to work @@ -600,10 +593,8 @@ */ struct tx_ring_info { struct sk_buff *skb; -#ifndef DUMMY_PCI_UNMAP - dma_addr_t mapping; + dma64_addr_t mapping; int maplen; -#endif }; /* @@ -635,7 +626,7 @@ struct ace_info *info; struct ace_regs *regs; /* register base */ struct ace_skb *skb; - dma_addr_t info_dma; /* 32/64 bit */ + dma64_addr_t info_dma; /* 32/64 bit */ int version, link; int promisc, mcast_all; @@ -672,10 +663,10 @@ volatile u32 *evt_prd, *rx_ret_prd, *tx_csm; - dma_addr_t tx_ring_dma; /* 32/64 bit */ - dma_addr_t rx_ring_base_dma; - dma_addr_t evt_ring_dma; - dma_addr_t evt_prd_dma, rx_ret_prd_dma, tx_csm_dma; + dma64_addr_t tx_ring_dma; /* 32/64 bit */ + dma64_addr_t rx_ring_base_dma; + dma64_addr_t evt_ring_dma; + dma64_addr_t evt_prd_dma, rx_ret_prd_dma, tx_csm_dma; unsigned char *trace_buf; struct pci_dev *pdev; @@ -691,6 +682,7 @@ u32 last_tx, last_std_rx, last_mini_rx; #endif struct net_device_stats stats; + int pci_using_dac; }; @@ -710,16 +702,11 @@ #endif -static inline void set_aceaddr(aceaddr *aa, dma_addr_t addr) +static inline void set_aceaddr(aceaddr *aa, dma64_addr_t addr) { unsigned long baddr = (unsigned long) addr; -#ifdef ACE_64BIT_PTR aa->addrlo = baddr & 0xffffffff; aa->addrhi = baddr >> 32; -#else - /* Don't bother setting zero every time */ - aa->addrlo = baddr; -#endif mb(); } @@ -729,11 +716,7 @@ { unsigned long addr; mb(); -#ifdef ACE_64BIT_PTR addr = (u64)aa->addrhi << 32 | aa->addrlo; -#else - addr = aa->addrlo; -#endif return (void *)addr; } #endif diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/net/sk98lin/h/skdrv2nd.h linux/drivers/net/sk98lin/h/skdrv2nd.h --- /opt/kernel/linux-2.4.9/drivers/net/sk98lin/h/skdrv2nd.h Fri Aug 24 13:46:12 2001 +++ linux/drivers/net/sk98lin/h/skdrv2nd.h Thu Aug 23 09:28:51 2001 @@ -472,7 +472,7 @@ caddr_t pDescrMem; /* Pointer to the descriptor area */ - dma_addr_t pDescrMemDMA; /* PCI DMA address of area */ + dma64_addr_t pDescrMemDMA; /* PCI DMA address of area */ /* the port structures with descriptor rings */ TX_PORT TxPort[SK_MAX_MACS][2]; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/net/sk98lin/skge.c linux/drivers/net/sk98lin/skge.c --- /opt/kernel/linux-2.4.9/drivers/net/sk98lin/skge.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/net/sk98lin/skge.c Thu Aug 23 09:28:51 2001 @@ -443,6 +443,13 @@ if (pci_enable_device(pdev)) continue; + /* Configure DMA attributes. */ + pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff); + if (!pci_dac_cycles_ok(pdev)) { + if (!pci_dma_supported(pdev, (u64) 0xffffffff)) + continue; + } + if ((dev = init_etherdev(dev, sizeof(DEV_NET))) == 0) { printk(KERN_ERR "Unable to allocate etherdev " "structure!\n"); @@ -949,15 +956,15 @@ AllocLength = (RX_RING_SIZE + TX_RING_SIZE) * pAC->GIni.GIMacsFound + RX_RING_SIZE + 8; #endif - pDescrMem = pci_alloc_consistent(&pAC->PciDev, AllocLength, - &pAC->pDescrMemDMA); + pDescrMem = pci64_alloc_consistent(&pAC->PciDev, AllocLength, + &pAC->pDescrMemDMA); if (pDescrMem == NULL) { return (SK_FALSE); } pAC->pDescrMem = pDescrMem; /* Descriptors need 8 byte alignment, and this is ensured - * by pci_alloc_consistent. + * by pci64_alloc_consistent. */ BusAddr = (unsigned long) pAC->pDescrMemDMA; for (i=0; iGIni.GIMacsFound; i++) { @@ -1007,8 +1014,8 @@ AllocLength = (RX_RING_SIZE + TX_RING_SIZE) * pAC->GIni.GIMacsFound + RX_RING_SIZE + 8; #endif - pci_free_consistent(&pAC->PciDev, AllocLength, - pAC->pDescrMem, pAC->pDescrMemDMA); + pci64_free_consistent(&pAC->PciDev, AllocLength, + pAC->pDescrMem, pAC->pDescrMemDMA); pAC->pDescrMem = NULL; } /* BoardFreeMem */ @@ -1769,8 +1776,10 @@ #endif /* set up descriptor and CONTROL dword */ - PhysAddr = (SK_U64) pci_map_single(&pAC->PciDev, - pMessage->data, + PhysAddr = (SK_U64) pci64_map_page(&pAC->PciDev, + virt_to_page(pMessage->data), + ((unsigned long) pMessage->data & + ~PAGE_MASK), pMessage->len, PCI_DMA_TODEVICE); pTxd->VDataLow = (SK_U32) (PhysAddr & 0xffffffff); @@ -1864,7 +1873,7 @@ /* release the DMA mapping */ PhysAddr = ((SK_U64) pTxd->VDataHigh) << (SK_U64) 32; PhysAddr |= (SK_U64) pTxd->VDataLow; - pci_unmap_single(&pAC->PciDev, PhysAddr, + pci64_unmap_page(&pAC->PciDev, PhysAddr, pTxd->pMBuf->len, PCI_DMA_TODEVICE); @@ -1945,8 +1954,10 @@ pRxPort->pRxdRingTail = pRxd->pNextRxd; pRxPort->RxdRingFree--; Length = pAC->RxBufSize; - PhysAddr = (SK_U64) pci_map_single(&pAC->PciDev, - pMsgBlock->data, + PhysAddr = (SK_U64) pci64_map_page(&pAC->PciDev, + virt_to_page(pMsgBlock->data), + ((unsigned long) pMsgBlock->data & + ~PAGE_MASK), pAC->RxBufSize - 2, PCI_DMA_FROMDEVICE); pRxd->VDataLow = (SK_U32) (PhysAddr & 0xffffffff); @@ -2091,10 +2102,10 @@ Control, FrameStat)); PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32; PhysAddr |= (SK_U64) pRxd->VDataLow; - pci_dma_sync_single(&pAC->PciDev, - (dma_addr_t) PhysAddr, - FrameLength, - PCI_DMA_FROMDEVICE); + pci64_dma_sync_single(&pAC->PciDev, + (dma64_addr_t) PhysAddr, + FrameLength, + PCI_DMA_FROMDEVICE); ReQueueRxBuffer(pAC, pRxPort, pMsg, pRxd->VDataHigh, pRxd->VDataLow); @@ -2115,10 +2126,10 @@ /* use new skb and copy data */ skb_reserve(pNewMsg, 2); skb_put(pNewMsg, FrameLength); - pci_dma_sync_single(&pAC->PciDev, - (dma_addr_t) PhysAddr, - FrameLength, - PCI_DMA_FROMDEVICE); + pci64_dma_sync_single(&pAC->PciDev, + (dma64_addr_t) PhysAddr, + FrameLength, + PCI_DMA_FROMDEVICE); eth_copy_and_sum(pNewMsg, pMsg->data, FrameLength, 0); ReQueueRxBuffer(pAC, pRxPort, pMsg, @@ -2136,7 +2147,7 @@ PhysAddr |= (SK_U64) pRxd->VDataLow; /* release the DMA mapping */ - pci_unmap_single(&pAC->PciDev, + pci64_unmap_page(&pAC->PciDev, PhysAddr, pAC->RxBufSize - 2, PCI_DMA_FROMDEVICE); @@ -2261,7 +2272,7 @@ /* release the DMA mapping */ PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32; PhysAddr |= (SK_U64) pRxd->VDataLow; - pci_unmap_single(&pAC->PciDev, + pci64_unmap_page(&pAC->PciDev, PhysAddr, pAC->RxBufSize - 2, PCI_DMA_FROMDEVICE); @@ -2341,7 +2352,7 @@ if (pRxd->pMBuf != NULL) { PhysAddr = ((SK_U64) pRxd->VDataHigh) << (SK_U64)32; PhysAddr |= (SK_U64) pRxd->VDataLow; - pci_unmap_single(&pAC->PciDev, + pci64_unmap_page(&pAC->PciDev, PhysAddr, pAC->RxBufSize - 2, PCI_DMA_FROMDEVICE); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/net/sungem.c linux/drivers/net/sungem.c --- /opt/kernel/linux-2.4.9/drivers/net/sungem.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/net/sungem.c Thu Aug 23 09:28:51 2001 @@ -416,7 +416,8 @@ while (entry != limit) { struct sk_buff *skb; struct gem_txd *txd; - u32 dma_addr, dma_len; + dma64_addr_t dma_addr; + u32 dma_len; int frag; skb = gp->tx_skbs[entry]; @@ -442,10 +443,10 @@ for (frag = 0; frag <= skb_shinfo(skb)->nr_frags; frag++) { txd = &gp->init_block->txd[entry]; - dma_addr = (u32) le64_to_cpu(txd->buffer); + dma_addr = le64_to_cpu(txd->buffer); dma_len = le64_to_cpu(txd->control_word) & TXDCTRL_BUFSZ; - pci_unmap_single(gp->pdev, dma_addr, dma_len, PCI_DMA_TODEVICE); + pci64_unmap_page(gp->pdev, dma_addr, dma_len, PCI_DMA_TODEVICE); entry = NEXT_TX(entry); } @@ -496,7 +497,7 @@ struct gem_rxd *rxd = &gp->init_block->rxd[entry]; struct sk_buff *skb; u64 status = cpu_to_le64(rxd->status_word); - u32 dma_addr; + dma64_addr_t dma_addr; int len; if ((status & RXDCTRL_OWN) != 0) @@ -518,7 +519,7 @@ goto next; } - dma_addr = (u32) cpu_to_le64(rxd->buffer); + dma_addr = cpu_to_le64(rxd->buffer); if (len > RX_COPY_THRESHOLD) { struct sk_buff *new_skb; @@ -527,13 +528,16 @@ drops++; goto drop_it; } - pci_unmap_single(gp->pdev, dma_addr, - RX_BUF_ALLOC_SIZE(gp), PCI_DMA_FROMDEVICE); + pci64_unmap_page(gp->pdev, dma_addr, + RX_BUF_ALLOC_SIZE(gp), + PCI_DMA_FROMDEVICE); gp->rx_skbs[entry] = new_skb; new_skb->dev = gp->dev; skb_put(new_skb, (ETH_FRAME_LEN + RX_OFFSET)); - rxd->buffer = cpu_to_le64(pci_map_single(gp->pdev, - new_skb->data, + rxd->buffer = cpu_to_le64(pci64_map_page(gp->pdev, + virt_to_page(new_skb->data), + ((unsigned long) new_skb->data & + ~PAGE_MASK), RX_BUF_ALLOC_SIZE(gp), PCI_DMA_FROMDEVICE)); skb_reserve(new_skb, RX_OFFSET); @@ -551,7 +555,7 @@ copy_skb->dev = gp->dev; skb_reserve(copy_skb, 2); skb_put(copy_skb, len); - pci_dma_sync_single(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE); + pci64_dma_sync_single(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE); memcpy(copy_skb->data, skb->data, len); /* We'll reuse the original ring buffer. */ @@ -659,36 +663,44 @@ if (skb_shinfo(skb)->nr_frags == 0) { struct gem_txd *txd = &gp->init_block->txd[entry]; - u32 mapping, len; + dma64_addr_t mapping; + u32 len; len = skb->len; - mapping = pci_map_single(gp->pdev, skb->data, len, PCI_DMA_TODEVICE); + mapping = pci64_map_page(gp->pdev, + virt_to_page(skb->data), + ((unsigned long) skb->data & + ~PAGE_MASK), + len, PCI_DMA_TODEVICE); ctrl |= TXDCTRL_SOF | TXDCTRL_EOF | len; txd->buffer = cpu_to_le64(mapping); txd->control_word = cpu_to_le64(ctrl); entry = NEXT_TX(entry); } else { struct gem_txd *txd; - u32 first_len, first_mapping; + u32 first_len; + dma64_addr_t first_mapping; int frag, first_entry = entry; /* We must give this initial chunk to the device last. * Otherwise we could race with the device. */ first_len = skb->len - skb->data_len; - first_mapping = pci_map_single(gp->pdev, skb->data, + first_mapping = pci64_map_page(gp->pdev, virt_to_page(skb->data), + ((unsigned long) skb->data & ~PAGE_MASK), first_len, PCI_DMA_TODEVICE); entry = NEXT_TX(entry); for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; - u32 len, mapping; + u32 len; + dma64_addr_t mapping; u64 this_ctrl; len = this_frag->size; - mapping = pci_map_single(gp->pdev, - ((void *) page_address(this_frag->page) + - this_frag->page_offset), + mapping = pci64_map_page(gp->pdev, + this_frag->page, + this_frag->page_offset, len, PCI_DMA_TODEVICE); this_ctrl = ctrl; if (frag == skb_shinfo(skb)->nr_frags - 1) @@ -946,17 +958,16 @@ struct gem_init_block *gb = gp->init_block; struct sk_buff *skb; int i; - u32 dma_addr; + dma64_addr_t dma_addr; for (i = 0; i < RX_RING_SIZE; i++) { struct gem_rxd *rxd; rxd = &gb->rxd[i]; if (gp->rx_skbs[i] != NULL) { - skb = gp->rx_skbs[i]; - dma_addr = (u32) le64_to_cpu(rxd->buffer); - pci_unmap_single(gp->pdev, dma_addr, + dma_addr = le64_to_cpu(rxd->buffer); + pci64_unmap_page(gp->pdev, dma_addr, RX_BUF_ALLOC_SIZE(gp), PCI_DMA_FROMDEVICE); dev_kfree_skb_any(skb); @@ -976,8 +987,8 @@ for (frag = 0; frag <= skb_shinfo(skb)->nr_frags; frag++) { txd = &gb->txd[i]; - dma_addr = (u32) le64_to_cpu(txd->buffer); - pci_unmap_single(gp->pdev, dma_addr, + dma_addr = le64_to_cpu(txd->buffer); + pci64_unmap_page(gp->pdev, dma_addr, le64_to_cpu(txd->control_word) & TXDCTRL_BUFSZ, PCI_DMA_TODEVICE); @@ -994,7 +1005,7 @@ struct gem_init_block *gb = gp->init_block; struct net_device *dev = gp->dev; int i, gfp_flags = GFP_KERNEL; - u32 dma_addr; + dma64_addr_t dma_addr; if (from_irq) gfp_flags = GFP_ATOMIC; @@ -1017,7 +1028,10 @@ gp->rx_skbs[i] = skb; skb->dev = dev; skb_put(skb, (ETH_FRAME_LEN + RX_OFFSET)); - dma_addr = pci_map_single(gp->pdev, skb->data, + dma_addr = pci64_map_page(gp->pdev, + virt_to_page(skb->data), + ((unsigned long) skb->data & + ~PAGE_MASK), RX_BUF_ALLOC_SIZE(gp), PCI_DMA_FROMDEVICE); rxd->buffer = cpu_to_le64(dma_addr); @@ -1135,13 +1149,15 @@ static void gem_init_dma(struct gem *gp) { + u64 desc_dma = (u64) gp->gblock_dvma; u32 val; val = (TXDMA_CFG_BASE | (0x7ff << 10) | TXDMA_CFG_PMODE); writel(val, gp->regs + TXDMA_CFG); - writel(0, gp->regs + TXDMA_DBHI); - writel(gp->gblock_dvma, gp->regs + TXDMA_DBLOW); + writel(desc_dma >> 32, gp->regs + TXDMA_DBHI); + writel(desc_dma & 0xffffffff, gp->regs + TXDMA_DBLOW); + desc_dma += (TX_RING_SIZE * sizeof(struct gem_txd)); writel(0, gp->regs + TXDMA_KICK); @@ -1149,10 +1165,8 @@ ((14 / 2) << 13) | RXDMA_CFG_FTHRESH_512); writel(val, gp->regs + RXDMA_CFG); - writel(0, gp->regs + RXDMA_DBHI); - writel((gp->gblock_dvma + - (TX_RING_SIZE * sizeof(struct gem_txd))), - gp->regs + RXDMA_DBLOW); + writel(desc_dma >> 32, gp->regs + RXDMA_DBHI); + writel(desc_dma & 0xffffffff, gp->regs + RXDMA_DBLOW); writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK); @@ -1560,8 +1574,10 @@ } { - u32 cfg = readl(gp->regs + GREG_BIFCFG); + u32 cfg; + /* XXX Why do I do this? -DaveM XXX */ + cfg = readl(gp->regs + GREG_BIFCFG); cfg |= GREG_BIFCFG_B64DIS; writel(cfg, gp->regs + GREG_BIFCFG); @@ -1619,7 +1635,7 @@ unsigned long gemreg_base, gemreg_len; struct net_device *dev; struct gem *gp; - int i, err; + int i, err, pci_using_dac; if (gem_version_printed++ == 0) printk(KERN_INFO "%s", version); @@ -1632,6 +1648,29 @@ } pci_set_master(pdev); + /* Configure DMA attributes. */ + + /* All of the GEM documentation states that 64-bit DMA addressing + * is fully supported and should work just fine. However the + * front end for RIO based GEMs is different and only supports + * 32-bit addressing. + * + * For now we assume the various PPC GEMs are 32-bit only as well. + */ + pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff); + if (pdev->vendor != PCI_VENDOR_ID_SUN || + pdev->device != PCI_DEVICE_ID_SUN_GEM || + !pci_dac_cycles_ok(pdev)) { + if (!pci_dma_supported(pdev, (u64) 0xffffffff)) { + printk(KERN_ERR PFX "No usable DMA configuration, " + "aborting.\n"); + return -ENODEV; + } + pci_using_dac = 0; + } else { + pci_using_dac = 1; + } + gemreg_base = pci_resource_start(pdev, 0); gemreg_len = pci_resource_len(pdev, 0); @@ -1676,8 +1715,8 @@ * PAGE_SIZE aligned. */ gp->init_block = (struct gem_init_block *) - pci_alloc_consistent(pdev, sizeof(struct gem_init_block), - &gp->gblock_dvma); + pci64_alloc_consistent(pdev, sizeof(struct gem_init_block), + &gp->gblock_dvma); if (!gp->init_block) { printk(KERN_ERR PFX "Cannot allocate init block, " "aborting.\n"); @@ -1715,6 +1754,8 @@ /* GEM can do it all... */ dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; + if (pci_using_dac) + dev->features |= NETIF_F_HIGHDMA; return 0; @@ -1741,10 +1782,10 @@ unregister_netdev(dev); - pci_free_consistent(pdev, - sizeof(struct gem_init_block), - gp->init_block, - gp->gblock_dvma); + pci64_free_consistent(pdev, + sizeof(struct gem_init_block), + gp->init_block, + gp->gblock_dvma); iounmap((void *) gp->regs); release_mem_region(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/net/sungem.h linux/drivers/net/sungem.h --- /opt/kernel/linux-2.4.9/drivers/net/sungem.h Fri Aug 24 13:46:12 2001 +++ linux/drivers/net/sungem.h Thu Aug 23 09:28:51 2001 @@ -949,7 +949,7 @@ int timer_ticks; enum link_state lstate; - dma_addr_t gblock_dvma; + dma64_addr_t gblock_dvma; struct pci_dev *pdev; struct net_device *dev; }; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/pci/pci.c linux/drivers/pci/pci.c --- /opt/kernel/linux-2.4.9/drivers/pci/pci.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/pci/pci.c Thu Aug 23 09:28:51 2001 @@ -832,7 +832,7 @@ } int -pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask) +pci_set_dma_mask(struct pci_dev *dev, u64 mask) { if(! pci_dma_supported(dev, mask)) return -EIO; @@ -842,6 +842,12 @@ return 0; } +void +pci_change_dma_flag(struct pci_dev *dev, unsigned int on, unsigned int off) +{ + dev->dma_flags |= on; + dev->dma_flags &= ~off; +} /* * Translate the low bits of the PCI base @@ -1576,7 +1582,8 @@ struct pci_page { /* cacheable header for 'allocation' bytes */ struct list_head page_list; void *vaddr; - dma_addr_t dma; + int is_pci64; + dma64_addr_t dma; unsigned long bitmap [0]; }; @@ -1663,7 +1670,7 @@ static struct pci_page * -pool_alloc_page (struct pci_pool *pool, int mem_flags) +pool_alloc_page (struct pci_pool *pool, int mem_flags, int use_pci64) { struct pci_page *page; int mapsize; @@ -1675,8 +1682,19 @@ page = (struct pci_page *) kmalloc (mapsize + sizeof *page, mem_flags); if (!page) return 0; - page->vaddr = pci_alloc_consistent (pool->dev, - pool->allocation, &page->dma); + page->is_pci64 = use_pci64; + if (use_pci64) { + page->vaddr = pci64_alloc_consistent (pool->dev, + pool->allocation, + &page->dma); + } else { + dma_addr_t tmp; + + page->vaddr = pci_alloc_consistent (pool->dev, + pool->allocation, + &tmp); + page->dma = (dma64_addr_t) tmp; + } if (page->vaddr) { memset (page->bitmap, 0xff, mapsize); // bit set == free if (pool->flags & SLAB_POISON) @@ -1704,11 +1722,16 @@ static void pool_free_page (struct pci_pool *pool, struct pci_page *page) { - dma_addr_t dma = page->dma; + dma64_addr_t dma = page->dma; if (pool->flags & SLAB_POISON) memset (page->vaddr, POOL_POISON_BYTE, pool->allocation); - pci_free_consistent (pool->dev, pool->allocation, page->vaddr, dma); + if (page->is_pci64) + pci64_free_consistent(pool->dev, pool->allocation, + page->vaddr, dma); + else + pci_free_consistent(pool->dev, pool->allocation, + page->vaddr, (dma_addr_t) dma); list_del (&page->page_list); kfree (page); } @@ -1753,7 +1776,7 @@ /** - * pci_pool_alloc - get a block of consistent memory + * pci{,64}_pool_alloc - get a block of consistent memory * @pool: pci pool that will produce the block * @mem_flags: SLAB_KERNEL or SLAB_ATOMIC * @handle: pointer to dma address of block @@ -1762,8 +1785,8 @@ * and reports its dma address through the handle. * If such a memory block can't be allocated, null is returned. */ -void * -pci_pool_alloc (struct pci_pool *pool, int mem_flags, dma_addr_t *handle) +static void * +__pci_pool_alloc (struct pci_pool *pool, int mem_flags, dma64_addr_t *handle, int use_64bit) { unsigned long flags; struct list_head *entry; @@ -1792,7 +1815,7 @@ } } } - if (!(page = pool_alloc_page (pool, mem_flags))) { + if (!(page = pool_alloc_page (pool, mem_flags, use_64bit))) { if (mem_flags == SLAB_KERNEL) { DECLARE_WAITQUEUE (wait, current); @@ -1820,9 +1843,27 @@ return retval; } +void * +pci_pool_alloc (struct pci_pool *pool, int mem_flags, dma_addr_t *handle) +{ + dma64_addr_t dma_addr; + void *ret; + + ret = __pci_pool_alloc(pool, mem_flags, &dma_addr, 0); + if (ret != NULL) + *handle = (dma_addr_t) dma_addr; + + return ret; +} + +void * +pci64_pool_alloc (struct pci_pool *pool, int mem_flags, dma64_addr_t *handle) +{ + return __pci_pool_alloc(pool, mem_flags, handle, 1); +} static struct pci_page * -pool_find_page (struct pci_pool *pool, dma_addr_t dma) +pool_find_page (struct pci_pool *pool, dma64_addr_t dma) { unsigned long flags; struct list_head *entry; @@ -1844,7 +1885,7 @@ /** - * pci_pool_free - put block back into pci pool + * pci{,64}_pool_free - put block back into pci pool * @pool: the pci pool holding the block * @vaddr: virtual address of block * @dma: dma address of block @@ -1853,7 +1894,7 @@ * unless it is first re-allocated. */ void -pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t dma) +pci64_pool_free (struct pci_pool *pool, void *vaddr, dma64_addr_t dma) { struct pci_page *page; unsigned long flags; @@ -1862,14 +1903,14 @@ if ((page = pool_find_page (pool, dma)) == 0) { printk (KERN_ERR "pci_pool_free %s/%s, %p/%x (bad dma)\n", pool->dev ? pool->dev->slot_name : NULL, - pool->name, vaddr, dma); + pool->name, vaddr, (int) (dma & 0xffffffff)); return; } #ifdef CONFIG_PCIPOOL_DEBUG if (((dma - page->dma) + (void *)page->vaddr) != vaddr) { printk (KERN_ERR "pci_pool_free %s/%s, %p (bad vaddr)/%x\n", pool->dev ? pool->dev->slot_name : NULL, - pool->name, vaddr, dma); + pool->name, vaddr, (int) (dma & 0xffffffff)); return; } #endif @@ -1902,6 +1943,10 @@ spin_unlock_irqrestore (&pool->lock, flags); } +void pci_pool_free(struct pci_pool *pool, void *vaddr, dma_addr_t dma) +{ + pci64_pool_free(pool, vaddr, (dma64_addr_t) dma); +} void __init pci_init(void) { @@ -1954,6 +1999,7 @@ EXPORT_SYMBOL(pci_find_subsys); EXPORT_SYMBOL(pci_set_master); EXPORT_SYMBOL(pci_set_dma_mask); +EXPORT_SYMBOL(pci_change_dma_flag); EXPORT_SYMBOL(pci_assign_resource); EXPORT_SYMBOL(pci_register_driver); EXPORT_SYMBOL(pci_unregister_driver); @@ -1995,4 +2041,6 @@ EXPORT_SYMBOL (pci_pool_destroy); EXPORT_SYMBOL (pci_pool_alloc); EXPORT_SYMBOL (pci_pool_free); +EXPORT_SYMBOL (pci64_pool_alloc); +EXPORT_SYMBOL (pci64_pool_free); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/aha1542.c linux/drivers/scsi/aha1542.c --- /opt/kernel/linux-2.4.9/drivers/scsi/aha1542.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/aha1542.c Thu Aug 23 09:28:51 2001 @@ -67,12 +67,10 @@ int nseg, int badseg) { - printk(KERN_CRIT "sgpnt[%d:%d] addr %p/0x%lx alt %p/0x%lx length %d\n", + printk(KERN_CRIT "sgpnt[%d:%d] addr %p/0x%lx length %d\n", badseg, nseg, sgpnt[badseg].address, SCSI_PA(sgpnt[badseg].address), - sgpnt[badseg].alt_address, - sgpnt[badseg].alt_address ? SCSI_PA(sgpnt[badseg].alt_address) : 0, sgpnt[badseg].length); /* @@ -716,7 +714,7 @@ unsigned char *ptr; printk(KERN_CRIT "Bad segment list supplied to aha1542.c (%d, %d)\n", SCpnt->use_sg, i); for (i = 0; i < SCpnt->use_sg; i++) { - printk(KERN_CRIT "%d: %x %x %d\n", i, (unsigned int) sgpnt[i].address, (unsigned int) sgpnt[i].alt_address, + printk(KERN_CRIT "%d: %p %d\n", i, sgpnt[i].address, sgpnt[i].length); }; printk(KERN_CRIT "cptr %x: ", (unsigned int) cptr); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/aic7xxx/aic7xxx_linux_host.h linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h --- /opt/kernel/linux-2.4.9/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Thu Aug 23 09:28:51 2001 @@ -81,7 +81,8 @@ present: 0, /* number of 7xxx's present */\ unchecked_isa_dma: 0, /* no memory DMA restrictions */\ use_clustering: ENABLE_CLUSTERING, \ - use_new_eh_code: 1 \ + use_new_eh_code: 1, \ + can_dma_32: 1 \ } #endif /* _AIC7XXX_LINUX_HOST_H_ */ diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/hosts.c linux/drivers/scsi/hosts.c --- /opt/kernel/linux-2.4.9/drivers/scsi/hosts.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/hosts.c Thu Aug 23 09:28:51 2001 @@ -235,6 +235,7 @@ retval->cmd_per_lun = tpnt->cmd_per_lun; retval->unchecked_isa_dma = tpnt->unchecked_isa_dma; retval->use_clustering = tpnt->use_clustering; + retval->can_dma_32 = tpnt->can_dma_32; retval->select_queue_depths = tpnt->select_queue_depths; retval->max_sectors = tpnt->max_sectors; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/hosts.h linux/drivers/scsi/hosts.h --- /opt/kernel/linux-2.4.9/drivers/scsi/hosts.h Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/hosts.h Thu Aug 23 09:33:10 2001 @@ -291,6 +291,8 @@ */ unsigned emulated:1; + unsigned can_dma_32:1; + /* * Name of proc directory */ @@ -390,6 +392,7 @@ unsigned in_recovery:1; unsigned unchecked_isa_dma:1; unsigned use_clustering:1; + unsigned can_dma_32:1; /* * True if this host was loaded as a loadable module */ diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/ips.c linux/drivers/scsi/ips.c --- /opt/kernel/linux-2.4.9/drivers/scsi/ips.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/ips.c Fri Aug 24 09:31:17 2001 @@ -3546,7 +3546,7 @@ Scsi_Cmnd *p; Scsi_Cmnd *q; ips_copp_wait_item_t *item; - int ret; + int ret, sg_entries = 0; int intr_status; unsigned long cpu_flags; unsigned long cpu_flags2; @@ -3743,6 +3743,8 @@ int i; sg = SC->request_buffer; + scb->org_sg_list = sg; + sg_entries = pci_map_sg(ha->pcidev, sg, SC->use_sg, ips_command_direction[scb->scsi_cmd->cmnd[0]]); if (SC->use_sg == 1) { if (sg[0].length > ha->max_xfer) { @@ -3752,12 +3754,12 @@ scb->data_len = sg[0].length; scb->dcdb.transfer_length = scb->data_len; - scb->data_busaddr = VIRT_TO_BUS(sg[0].address); + scb->data_busaddr = sg_dma_address(&sg[0]); scb->sg_len = 0; } else { /* Check for the first Element being bigger than MAX_XFER */ if (sg[0].length > ha->max_xfer) { - scb->sg_list[0].address = VIRT_TO_BUS(sg[0].address); + scb->sg_list[0].address = sg_dma_address(&sg[0]); scb->sg_list[0].length = ha->max_xfer; scb->data_len = ha->max_xfer; scb->breakup = 0; @@ -3765,8 +3767,8 @@ scb->sg_len = 1; } else { - for (i = 0; i < SC->use_sg; i++) { - scb->sg_list[i].address = VIRT_TO_BUS(sg[i].address); + for (i = 0; i < sg_entries; i++) { + scb->sg_list[i].address = sg_dma_address(&sg[i]); scb->sg_list[i].length = sg[i].length; if (scb->data_len + sg[i].length > ha->max_xfer) { @@ -3781,7 +3783,7 @@ } if (!scb->breakup) - scb->sg_len = SC->use_sg; + scb->sg_len = sg_entries; else scb->sg_len = scb->breakup; } @@ -4441,11 +4443,11 @@ if (sg[0].length - (bk_save * ha->max_xfer)) { /* Further breakup required */ scb->data_len = ha->max_xfer; - scb->data_busaddr = VIRT_TO_BUS(sg[0].address + (bk_save * ha->max_xfer)); + scb->data_busaddr = sg_dma_address(&sg[0] + (bk_save * ha->max_xfer)); scb->breakup = bk_save + 1; } else { scb->data_len = sg[0].length - (bk_save * ha->max_xfer); - scb->data_busaddr = VIRT_TO_BUS(sg[0].address + (bk_save * ha->max_xfer)); + scb->data_busaddr = sg_dma_address(&sg[0] + (bk_save * ha->max_xfer)); } scb->dcdb.transfer_length = scb->data_len; @@ -4462,7 +4464,7 @@ /* pointed to by bk_save */ if (scb->sg_break) { scb->sg_len = 1; - scb->sg_list[0].address = VIRT_TO_BUS(sg[bk_save].address+ha->max_xfer*scb->sg_break); + scb->sg_list[0].address = sg_dma_address(&sg[bk_save] + ha->max_xfer*scb->sg_break); if (ha->max_xfer > sg[bk_save].length-ha->max_xfer * scb->sg_break) scb->sg_list[0].length = sg[bk_save].length-ha->max_xfer * scb->sg_break; else @@ -4480,7 +4482,7 @@ } else { /* ( sg_break == 0 ), so this is our first look at a new sg piece */ if (sg[bk_save].length > ha->max_xfer) { - scb->sg_list[0].address = VIRT_TO_BUS(sg[bk_save].address); + scb->sg_list[0].address = sg_dma_address(&sg[bk_save]); scb->sg_list[0].length = ha->max_xfer; scb->breakup = bk_save; scb->sg_break = 1; @@ -4493,7 +4495,7 @@ scb->sg_break = 0; /* We're only doing full units here */ for (i = bk_save; i < scb->scsi_cmd->use_sg; i++) { - scb->sg_list[i - bk_save].address = VIRT_TO_BUS(sg[i].address); + scb->sg_list[i - bk_save].address = sg_dma_address(&sg[i]); scb->sg_list[i - bk_save].length = sg[i].length; if (scb->data_len + sg[i].length > ha->max_xfer) { scb->breakup = i; /* sneaky, if not more work, than breakup is 0 */ @@ -4560,6 +4562,7 @@ break; } /* end case */ + pci_unmap_sg(ha->pcidev, scb->org_sg_list, scb->sg_len, ips_command_direction[scb->scsi_cmd->cmnd[0]]); return ; } #ifndef NO_IPS_CMDLINE diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/ips.h linux/drivers/scsi/ips.h --- /opt/kernel/linux-2.4.9/drivers/scsi/ips.h Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/ips.h Thu Aug 23 09:28:51 2001 @@ -419,7 +419,8 @@ present : 0, \ unchecked_isa_dma : 0, \ use_clustering : ENABLE_CLUSTERING, \ - use_new_eh_code : 1 \ + use_new_eh_code : 1, \ + can_dma_32 : 1 \ } #endif @@ -1026,6 +1027,7 @@ u32 flags; u32 op_code; IPS_SG_LIST *sg_list; + struct scatterlist *org_sg_list; Scsi_Cmnd *scsi_cmd; struct ips_scb *q_next; ips_scb_callback callback; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/megaraid.h linux/drivers/scsi/megaraid.h --- /opt/kernel/linux-2.4.9/drivers/scsi/megaraid.h Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/megaraid.h Thu Aug 23 09:28:51 2001 @@ -220,7 +220,8 @@ cmd_per_lun: MAX_CMD_PER_LUN, /* SCSI Commands per LUN */\ present: 0, /* Present */\ unchecked_isa_dma: 0, /* Default Unchecked ISA DMA */\ - use_clustering: ENABLE_CLUSTERING /* Enable Clustering */\ + use_clustering: ENABLE_CLUSTERING, /* Enable Clustering */\ + can_dma_32: 1 \ } #endif diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/osst.c linux/drivers/scsi/osst.c --- /opt/kernel/linux-2.4.9/drivers/scsi/osst.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/osst.c Thu Aug 23 09:28:51 2001 @@ -4933,7 +4933,6 @@ tb->sg[0].address = (unsigned char *)__get_free_pages(priority, order); if (tb->sg[0].address != NULL) { - tb->sg[0].alt_address = NULL; tb->sg[0].length = b_size; break; } @@ -4969,7 +4968,6 @@ tb = NULL; break; } - tb->sg[segs].alt_address = NULL; tb->sg[segs].length = b_size; got += b_size; segs++; @@ -5043,7 +5041,6 @@ normalize_buffer(STbuffer); return FALSE; } - STbuffer->sg[segs].alt_address = NULL; STbuffer->sg[segs].length = b_size; STbuffer->sg_segs += 1; got += b_size; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/qlogicfc.c linux/drivers/scsi/qlogicfc.c --- /opt/kernel/linux-2.4.9/drivers/scsi/qlogicfc.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/qlogicfc.c Thu Aug 23 09:28:51 2001 @@ -21,6 +21,9 @@ * * Big endian support and dynamic DMA mapping added * by Jakub Jelinek . + * + * Conversion to final pci64 DMA interfaces + * by David S. Miller . */ /* @@ -63,31 +66,10 @@ #include "sd.h" #include "hosts.h" -#if 1 -/* Once pci64_ DMA mapping interface is in, kill this. */ -typedef dma_addr_t dma64_addr_t; -#define pci64_alloc_consistent(d,s,p) pci_alloc_consistent((d),(s),(p)) -#define pci64_free_consistent(d,s,c,a) pci_free_consistent((d),(s),(c),(a)) -#define pci64_map_single(d,c,s,dir) pci_map_single((d),(c),(s),(dir)) -#define pci64_map_sg(d,s,n,dir) pci_map_sg((d),(s),(n),(dir)) -#define pci64_unmap_single(d,a,s,dir) pci_unmap_single((d),(a),(s),(dir)) -#define pci64_unmap_sg(d,s,n,dir) pci_unmap_sg((d),(s),(n),(dir)) -#if BITS_PER_LONG > 32 #define pci64_dma_hi32(a) ((u32) (0xffffffff & (((u64)(a))>>32))) #define pci64_dma_lo32(a) ((u32) (0xffffffff & (((u64)(a))))) -#else -#define pci64_dma_hi32(a) 0 -#define pci64_dma_lo32(a) (a) -#endif /* BITS_PER_LONG */ -#define pci64_dma_build(hi,lo) (lo) -#define sg_dma64_address(s) sg_dma_address(s) -#define sg_dma64_len(s) sg_dma_len(s) -#if BITS_PER_LONG > 32 -#define PCI64_DMA_BITS 64 -#else -#define PCI64_DMA_BITS 32 -#endif /* BITS_PER_LONG */ -#endif +#define pci64_dma_build(hi,lo) \ + ((dma64_addr_t)(((u64)(lo))|(((u64)(hi))<<32))) #include "qlogicfc.h" @@ -245,13 +227,8 @@ }; /* entry header type commands */ -#if PCI64_DMA_BITS > 32 #define ENTRY_COMMAND 0x19 #define ENTRY_CONTINUATION 0x0a -#else -#define ENTRY_COMMAND 0x11 -#define ENTRY_CONTINUATION 0x02 -#endif #define ENTRY_STATUS 0x03 #define ENTRY_MARKER 0x04 @@ -262,23 +239,12 @@ #define EFLAG_BAD_HEADER 4 #define EFLAG_BAD_PAYLOAD 8 -#if PCI64_DMA_BITS > 32 - struct dataseg { u_int d_base; u_int d_base_hi; u_int d_count; }; -#else - -struct dataseg { - u_int d_base; - u_int d_count; -}; - -#endif - struct Command_Entry { struct Entry_header hdr; u_int handle; @@ -303,18 +269,10 @@ #define CFLAG_READ 0x20 #define CFLAG_WRITE 0x40 -#if PCI64_DMA_BITS > 32 struct Continuation_Entry { struct Entry_header hdr; struct dataseg dataseg[DATASEGS_PER_CONT]; }; -#else -struct Continuation_Entry { - struct Entry_header hdr; - u32 rsvd; - struct dataseg dataseg[DATASEGS_PER_CONT]; -}; -#endif struct Marker_Entry { struct Entry_header hdr; @@ -746,7 +704,7 @@ tmpt->proc_name = "isp2x00"; if (pci_present() == 0) { - printk("qlogicfc : PCI not present\n"); + printk(KERN_INFO "qlogicfc : PCI not present\n"); return 0; } @@ -756,6 +714,13 @@ if (pci_enable_device(pdev)) continue; + /* Try to configure DMA attributes. */ + pci_set_dma_mask(pdev, (u64)0xffffffffffffffff); + if (!pci_dac_cycles_ok(pdev)) { + if (!pci_dma_supported(pdev, (u64) 0xffffffff)) + continue; + } + host = scsi_register(tmpt, sizeof(struct isp2x00_hostdata)); if (!host) { printk("qlogicfc%d : could not register host.\n", hosts); @@ -1284,10 +1249,8 @@ for (i = 0; i < n; i++) { ds[i].d_base = cpu_to_le32(pci64_dma_lo32(sg_dma64_address(sg))); -#if PCI64_DMA_BITS > 32 ds[i].d_base_hi = cpu_to_le32(pci64_dma_hi32(sg_dma64_address(sg))); -#endif - ds[i].d_count = cpu_to_le32(sg_dma64_len(sg)); + ds[i].d_count = cpu_to_le32(sg_dma_len(sg)); ++sg; } sg_count -= DATASEGS_PER_COMMAND; @@ -1310,30 +1273,29 @@ n = DATASEGS_PER_CONT; for (i = 0; i < n; ++i) { ds[i].d_base = cpu_to_le32(pci64_dma_lo32(sg_dma64_address(sg))); -#if PCI64_DMA_BITS > 32 ds[i].d_base_hi = cpu_to_le32(pci64_dma_hi32(sg_dma64_address(sg))); -#endif - ds[i].d_count = cpu_to_le32(sg_dma64_len(sg)); + ds[i].d_count = cpu_to_le32(sg_dma_len(sg)); ++sg; } sg_count -= n; } } else if (Cmnd->request_bufflen && Cmnd->sc_data_direction != PCI_DMA_NONE) { - dma64_addr_t busaddr = pci64_map_single(hostdata->pci_dev, Cmnd->request_buffer, Cmnd->request_bufflen, - scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + struct page *page = virt_to_page(Cmnd->request_buffer); + unsigned long offset = ((unsigned long)Cmnd->request_buffer & + ~PAGE_MASK); + dma64_addr_t busaddr = pci64_map_page(hostdata->pci_dev, + page, offset, + Cmnd->request_bufflen, + scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + Cmnd->SCp.dma_handle = busaddr; - *(dma64_addr_t *)&Cmnd->SCp = busaddr; cmd->dataseg[0].d_base = cpu_to_le32(pci64_dma_lo32(busaddr)); -#if PCI64_DMA_BITS > 32 cmd->dataseg[0].d_base_hi = cpu_to_le32(pci64_dma_hi32(busaddr)); -#endif cmd->dataseg[0].d_count = cpu_to_le32(Cmnd->request_bufflen); cmd->segment_cnt = cpu_to_le16(1); } else { cmd->dataseg[0].d_base = 0; -#if PCI64_DMA_BITS > 32 cmd->dataseg[0].d_base_hi = 0; -#endif cmd->segment_cnt = cpu_to_le16(1); /* Shouldn't this be 0? */ } @@ -1431,11 +1393,12 @@ Cmnd->use_sg, scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); else if (Cmnd->request_bufflen && - Cmnd->sc_data_direction != PCI_DMA_NONE) - pci64_unmap_single(hostdata->pci_dev, - *(dma64_addr_t *)&Cmnd->SCp, - Cmnd->request_bufflen, - scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + Cmnd->sc_data_direction != PCI_DMA_NONE) { + pci64_unmap_page(hostdata->pci_dev, + Cmnd->SCp.dma_handle, + Cmnd->request_bufflen, + scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + } hostdata->handle_ptrs[i]->result = DID_SOFT_ERROR << 16; @@ -1538,10 +1501,10 @@ scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); else if (Cmnd->request_bufflen && Cmnd->sc_data_direction != PCI_DMA_NONE) - pci64_unmap_single(hostdata->pci_dev, - *(dma64_addr_t *)&Cmnd->SCp, - Cmnd->request_bufflen, - scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + pci64_unmap_page(hostdata->pci_dev, + Cmnd->SCp.dma_handle, + Cmnd->request_bufflen, + scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); Cmnd->result = 0x0; (*Cmnd->scsi_done) (Cmnd); } else @@ -1591,9 +1554,10 @@ (struct scatterlist *)Cmnd->buffer, Cmnd->use_sg, scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); else if (Cmnd->request_bufflen && Cmnd->sc_data_direction != PCI_DMA_NONE) - pci64_unmap_single(hostdata->pci_dev, *(dma64_addr_t *)&Cmnd->SCp, - Cmnd->request_bufflen, - scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); + pci64_unmap_page(hostdata->pci_dev, + Cmnd->SCp.dma_handle, + Cmnd->request_bufflen, + scsi_to_pci_dma_dir(Cmnd->sc_data_direction)); /* * if any of the following are true we do not @@ -1964,9 +1928,15 @@ hostdata->wwn |= (u64) (cpu_to_le16(hostdata->control_block.node_name[3]) & 0x00ff) << 8; hostdata->wwn |= (u64) (cpu_to_le16(hostdata->control_block.node_name[3]) & 0xff00) >> 8; - /* FIXME: If the DMA transfer goes one way only, this should use PCI_DMA_TODEVICE and below as well. */ - busaddr = pci64_map_single(hostdata->pci_dev, &hostdata->control_block, sizeof(hostdata->control_block), - PCI_DMA_BIDIRECTIONAL); + /* FIXME: If the DMA transfer goes one way only, this should use + * PCI_DMA_TODEVICE and below as well. + */ + busaddr = pci64_map_page(hostdata->pci_dev, + virt_to_page(&hostdata->control_block), + ((unsigned long) &hostdata->control_block & + ~PAGE_MASK), + sizeof(hostdata->control_block), + PCI_DMA_BIDIRECTIONAL); param[0] = MBOX_INIT_FIRMWARE; param[2] = (u_short) (pci64_dma_lo32(busaddr) >> 16); @@ -1978,21 +1948,24 @@ isp2x00_mbox_command(host, param); if (param[0] != MBOX_COMMAND_COMPLETE) { printk("qlogicfc%d.c: Ouch 0x%04x\n", hostdata->host_id, param[0]); - pci64_unmap_single(hostdata->pci_dev, busaddr, sizeof(hostdata->control_block), - PCI_DMA_BIDIRECTIONAL); + pci64_unmap_page(hostdata->pci_dev, busaddr, + sizeof(hostdata->control_block), + PCI_DMA_BIDIRECTIONAL); return 1; } param[0] = MBOX_GET_FIRMWARE_STATE; isp2x00_mbox_command(host, param); if (param[0] != MBOX_COMMAND_COMPLETE) { printk("qlogicfc%d.c: 0x%04x\n", hostdata->host_id, param[0]); - pci64_unmap_single(hostdata->pci_dev, busaddr, sizeof(hostdata->control_block), - PCI_DMA_BIDIRECTIONAL); + pci64_unmap_page(hostdata->pci_dev, busaddr, + sizeof(hostdata->control_block), + PCI_DMA_BIDIRECTIONAL); return 1; } - pci64_unmap_single(hostdata->pci_dev, busaddr, sizeof(hostdata->control_block), - PCI_DMA_BIDIRECTIONAL); + pci64_unmap_page(hostdata->pci_dev, busaddr, + sizeof(hostdata->control_block), + PCI_DMA_BIDIRECTIONAL); LEAVE("isp2x00_reset_hardware"); return 0; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/qlogicfc.h linux/drivers/scsi/qlogicfc.h --- /opt/kernel/linux-2.4.9/drivers/scsi/qlogicfc.h Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/qlogicfc.h Thu Aug 23 09:28:51 2001 @@ -100,7 +100,8 @@ cmd_per_lun: QLOGICFC_CMD_PER_LUN, \ present: 0, \ unchecked_isa_dma: 0, \ - use_clustering: ENABLE_CLUSTERING \ + use_clustering: ENABLE_CLUSTERING, \ + can_dma_32: 1 \ } #endif /* _QLOGICFC_H */ diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/scsi.c linux/drivers/scsi/scsi.c --- /opt/kernel/linux-2.4.9/drivers/scsi/scsi.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/scsi.c Thu Aug 23 09:28:51 2001 @@ -178,10 +178,13 @@ * handler in the list - ultimately they call scsi_request_fn * to do the dirty deed. */ -void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) { - blk_init_queue(&SDpnt->request_queue, scsi_request_fn); - blk_queue_headactive(&SDpnt->request_queue, 0); - SDpnt->request_queue.queuedata = (void *) SDpnt; +void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) +{ + request_queue_t *q = &SDpnt->request_queue; + + blk_init_queue(q, scsi_request_fn); + blk_queue_headactive(q, 0); + q->queuedata = (void *) SDpnt; } #ifdef MODULE diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/scsi.h linux/drivers/scsi/scsi.h --- /opt/kernel/linux-2.4.9/drivers/scsi/scsi.h Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/scsi.h Fri Aug 24 09:21:13 2001 @@ -386,15 +386,6 @@ #define ASKED_FOR_SENSE 0x20 #define SYNC_RESET 0x40 -#if defined(__mc68000__) || defined(CONFIG_APUS) -#include -#define CONTIGUOUS_BUFFERS(X,Y) \ - (virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data)) -#else -#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data) -#endif - - /* * This is the crap from the old error handling code. We have it in a special * place so that we can more easily delete it later on. @@ -633,6 +624,8 @@ struct scatterlist *buffer; /* which buffer */ int buffers_residual; /* how many buffers left */ + dma64_addr_t dma_handle; + volatile int Status; volatile int Message; volatile int have_data_in; @@ -745,7 +738,8 @@ unsigned request_bufflen; /* Actual request size */ struct timer_list eh_timeout; /* Used to time out the command. */ - void *request_buffer; /* Actual requested buffer */ + void *request_buffer; /* Actual requested buffer */ + void **bounce_buffers; /* Array of bounce buffers when using scatter-gather */ /* These elements define the operation we ultimately want to perform */ unsigned char data_cmnd[MAX_COMMAND_SIZE]; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/scsi_debug.c linux/drivers/scsi/scsi_debug.c --- /opt/kernel/linux-2.4.9/drivers/scsi/scsi_debug.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/scsi_debug.c Thu Aug 23 09:28:51 2001 @@ -154,10 +154,7 @@ if (SCpnt->use_sg) { sgpnt = (struct scatterlist *) SCpnt->buffer; for (i = 0; i < SCpnt->use_sg; i++) { - lpnt = (int *) sgpnt[i].alt_address; - printk(":%p %p %d\n", sgpnt[i].alt_address, sgpnt[i].address, sgpnt[i].length); - if (lpnt) - printk(" (Alt %x) ", lpnt[15]); + printk(":%p %d\n", sgpnt[i].address, sgpnt[i].length); }; } else { printk("nosg: %p %p %d\n", SCpnt->request.buffer, SCpnt->buffer, @@ -175,12 +172,6 @@ printk("\n"); if (flag == 0) return; - lpnt = (unsigned int *) sgpnt[0].alt_address; - for (i = 0; i < sizeof(Scsi_Cmnd) / 4 + 1; i++) { - if ((i & 7) == 0) - printk("\n"); - printk("%x ", *lpnt++); - }; #if 0 printk("\n"); lpnt = (unsigned int *) sgpnt[0].address; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c --- /opt/kernel/linux-2.4.9/drivers/scsi/scsi_lib.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/scsi_lib.c Thu Aug 23 09:28:51 2001 @@ -388,6 +388,7 @@ req->nr_sectors -= nsect; req->current_nr_sectors = bh->b_size >> 9; + req->hard_cur_sectors = req->current_nr_sectors; if (req->nr_sectors < req->current_nr_sectors) { req->nr_sectors = req->current_nr_sectors; printk("scsi_end_request: buffer-list destroyed\n"); @@ -410,7 +411,6 @@ q = &SCpnt->device->request_queue; - req->buffer = bh->b_data; /* * Bleah. Leftovers again. Stick the leftovers in * the front of the queue, and goose the queue again. @@ -489,6 +489,8 @@ */ static void scsi_release_buffers(Scsi_Cmnd * SCpnt) { + struct request *req = &SCpnt->request; + ASSERT_LOCK(&io_request_lock, 0); /* @@ -496,20 +498,22 @@ */ if (SCpnt->use_sg) { struct scatterlist *sgpnt; + void **bbpnt; int i; sgpnt = (struct scatterlist *) SCpnt->request_buffer; + bbpnt = SCpnt->bounce_buffers; - for (i = 0; i < SCpnt->use_sg; i++) { - if (sgpnt[i].alt_address) { - scsi_free(sgpnt[i].address, sgpnt[i].length); + if (bbpnt) { + for (i = 0; i < SCpnt->use_sg; i++) { + if (bbpnt[i]) + scsi_free(sgpnt[i].address, sgpnt[i].length); } } scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); } else { - if (SCpnt->request_buffer != SCpnt->request.buffer) { - scsi_free(SCpnt->request_buffer, SCpnt->request_bufflen); - } + if (SCpnt->request_buffer != req->buffer) + scsi_free(SCpnt->request_buffer,SCpnt->request_bufflen); } /* @@ -545,6 +549,7 @@ int result = SCpnt->result; int this_count = SCpnt->bufflen >> 9; request_queue_t *q = &SCpnt->device->request_queue; + struct request *req = &SCpnt->request; /* * We must do one of several things here: @@ -568,26 +573,32 @@ */ if (SCpnt->use_sg) { struct scatterlist *sgpnt; + void **bbpnt; int i; sgpnt = (struct scatterlist *) SCpnt->buffer; + bbpnt = SCpnt->bounce_buffers; - for (i = 0; i < SCpnt->use_sg; i++) { - if (sgpnt[i].alt_address) { - if (SCpnt->request.cmd == READ) { - memcpy(sgpnt[i].alt_address, - sgpnt[i].address, - sgpnt[i].length); + if (bbpnt) { + for (i = 0; i < SCpnt->use_sg; i++) { + if (bbpnt[i]) { + if (req->cmd == READ) { + memcpy(bbpnt[i], + sgpnt[i].address, + sgpnt[i].length); + } + scsi_free(sgpnt[i].address, sgpnt[i].length); } - scsi_free(sgpnt[i].address, sgpnt[i].length); } } scsi_free(SCpnt->buffer, SCpnt->sglist_len); } else { - if (SCpnt->buffer != SCpnt->request.buffer) { - if (SCpnt->request.cmd == READ) { - memcpy(SCpnt->request.buffer, SCpnt->buffer, - SCpnt->bufflen); + if (SCpnt->buffer != req->buffer) { + if (req->cmd == READ) { + unsigned long flags; + char *to = bh_kmap_irq(req->bh, &flags); + memcpy(to, SCpnt->buffer, SCpnt->bufflen); + bh_kunmap_irq(to, &flags); } scsi_free(SCpnt->buffer, SCpnt->bufflen); } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c --- /opt/kernel/linux-2.4.9/drivers/scsi/scsi_merge.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/scsi_merge.c Fri Aug 24 09:24:40 2001 @@ -6,6 +6,7 @@ * Based upon conversations with large numbers * of people at Linux Expo. * Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com). + * Support for highmem I/O: Jens Axboe */ /* @@ -48,7 +49,6 @@ #include #include - #define __KERNEL_SYSCALLS__ #include @@ -95,7 +95,7 @@ printk("Segment 0x%p, blocks %d, addr 0x%lx\n", bh, bh->b_size >> 9, - virt_to_phys(bh->b_data - 1)); + bh_phys(bh) - 1); } panic("Ththththaats all folks. Too dangerous to continue.\n"); } @@ -120,9 +120,11 @@ { int jj; struct scatterlist *sgpnt; + void **bbpnt; int consumed = 0; sgpnt = (struct scatterlist *) SCpnt->request_buffer; + bbpnt = SCpnt->bounce_buffers; /* * Now print out a bunch of stats. First, start with the request @@ -136,15 +138,13 @@ */ for(jj=0; jj < SCpnt->use_sg; jj++) { - printk("[%d]\tlen:%d\taddr:%p\talt:%p\n", + printk("[%d]\tlen:%d\taddr:%p\tbounce:%p\n", jj, sgpnt[jj].length, sgpnt[jj].address, - sgpnt[jj].alt_address); - if( sgpnt[jj].alt_address != NULL ) - { - consumed = (sgpnt[jj].length >> 9); - } + (bbpnt ? bbpnt[jj] : NULL)); + if (bbpnt && bbpnt[jj]) + consumed += sgpnt[jj].length; } printk("Total %d sectors consumed\n", consumed); panic("DMA pool exhausted"); @@ -223,11 +223,10 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bh_phys(bhnext) - 1 == ISA_DMA_THRESHOLD) { ret++; reqsize = bhnext->b_size; - } else if (CONTIGUOUS_BUFFERS(bh, bhnext)) { + } else if (BH_CONTIG(bh, bhnext)) { /* * This one is OK. Let it go. */ @@ -241,8 +240,7 @@ * kind of screwed and we need to start * another segment. */ - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD + if( dma_host && bh_phys(bh) - 1 >= ISA_DMA_THRESHOLD && reqsize + bhnext->b_size > PAGE_SIZE ) { ret++; @@ -304,7 +302,7 @@ } #define MERGEABLE_BUFFERS(X,Y) \ -(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \ +(((((long)bh_phys((X))+(X)->b_size)|((long)bh_phys((Y)))) & \ (DMA_CHUNK_SIZE - 1)) == 0) #ifdef DMA_CHUNK_SIZE @@ -419,6 +417,8 @@ if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) return 0; + else if (!blk_seg_merge_ok(q, req->bhtail, bh)) + return 0; if (use_clustering) { /* @@ -427,14 +427,11 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bh_phys(req->bhtail) - 1 == ISA_DMA_THRESHOLD) goto new_end_segment; - } - if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) { + if (BH_CONTIG(req->bhtail, bh)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { + if (dma_host && bh_phys(bh) - 1 >= ISA_DMA_THRESHOLD) { segment_size = 0; count = __count_segments(req, use_clustering, dma_host, &segment_size); if( segment_size + bh->b_size > PAGE_SIZE ) { @@ -478,6 +475,8 @@ if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) return 0; + else if (!blk_seg_merge_ok(q, bh, req->bh)) + return 0; if (use_clustering) { /* @@ -486,14 +485,12 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bh_phys(bh) - 1 == ISA_DMA_THRESHOLD) { goto new_start_segment; } - if (CONTIGUOUS_BUFFERS(bh, req->bh)) { + if (BH_CONTIG(bh, req->bh)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { + if (dma_host && bh_phys(bh) - 1 >= ISA_DMA_THRESHOLD) { segment_size = bh->b_size; count = __count_segments(req, use_clustering, dma_host, &segment_size); if( count != req->nr_segments ) { @@ -613,6 +610,9 @@ SDpnt = (Scsi_Device *) q->queuedata; SHpnt = SDpnt->host; + if (!blk_seg_merge_ok(q, req->bhtail, next->bh)) + return 0; + #ifdef DMA_CHUNK_SIZE if (max_segments > 64) max_segments = 64; @@ -652,18 +652,15 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bh_phys(req->bhtail) - 1 == ISA_DMA_THRESHOLD) goto dont_combine; - } #ifdef DMA_SEGMENT_SIZE_LIMITED /* * We currently can only allocate scatter-gather bounce * buffers in chunks of PAGE_SIZE or less. */ - if (dma_host - && CONTIGUOUS_BUFFERS(req->bhtail, next->bh) - && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD ) + if (dma_host && BH_CONTIG(req->bhtail, next->bh) + && bh_phys(req->bhtail) - 1 >= ISA_DMA_THRESHOLD ) { int segment_size = 0; int count = 0; @@ -675,7 +672,7 @@ } } #endif - if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) { + if (BH_CONTIG(req->bhtail, next->bh)) { /* * This one is OK. Let it go. */ @@ -807,30 +804,8 @@ int sectors; struct scatterlist * sgpnt; int this_count; + void ** bbpnt; - /* - * FIXME(eric) - don't inline this - it doesn't depend on the - * integer flags. Come to think of it, I don't think this is even - * needed any more. Need to play with it and see if we hit the - * panic. If not, then don't bother. - */ - if (!SCpnt->request.bh) { - /* - * Case of page request (i.e. raw device), or unlinked buffer - * Typically used for swapping, but this isn't how we do - * swapping any more. - */ - panic("I believe this is dead code. If we hit this, I was wrong"); -#if 0 - SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9; - SCpnt->request_buffer = SCpnt->request.buffer; - SCpnt->use_sg = 0; - /* - * FIXME(eric) - need to handle DMA here. - */ -#endif - return 1; - } req = &SCpnt->request; /* * First we need to know how many scatter gather segments are needed. @@ -847,24 +822,28 @@ * buffer. */ if (dma_host && scsi_dma_free_sectors <= 10) { - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; goto single_segment; } - /* - * Don't bother with scatter-gather if there is only one segment. + + /* + * Allocate the actual scatter-gather table itself. We will use + * sg even for a single segment request, since it's the easiest way + * to support highmem. */ - if (count == 1) { - this_count = SCpnt->request.nr_sectors; - goto single_segment; - } SCpnt->use_sg = count; + SCpnt->sglist_len = (SCpnt->use_sg * sizeof(struct scatterlist)); - /* - * Allocate the actual scatter-gather table itself. - * scsi_malloc can only allocate in chunks of 512 bytes + /* If we could potentially require ISA bounce buffers, allocate + * space for this array here. */ - SCpnt->sglist_len = (SCpnt->use_sg - * sizeof(struct scatterlist) + 511) & ~511; + if (dma_host) + SCpnt->sglist_len += (SCpnt->use_sg * sizeof(void *)); + + /* scsi_malloc can only allocate in chunks of 512 bytes so + * round it up. + */ + SCpnt->sglist_len = (SCpnt->sglist_len + 511) & ~511; sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len); @@ -877,7 +856,7 @@ * simply write the first buffer all by itself. */ printk("Warning - running *really* short on DMA buffers\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; goto single_segment; } /* @@ -889,13 +868,19 @@ SCpnt->request_bufflen = 0; bhprev = NULL; - for (count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { + if (dma_host) + bbpnt = (void **) ((char *)sgpnt + + (SCpnt->use_sg * sizeof(struct scatterlist))); + else + bbpnt = NULL; + + SCpnt->bounce_buffers = bbpnt; + + for (count = 0, bh = req->bh; bh; bh = bh->b_reqnext) { if (use_clustering && bhprev != NULL) { - if (dma_host && - virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bh_phys(bhprev) - 1 == ISA_DMA_THRESHOLD) { /* Nothing - fall through */ - } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) { + } else if (BH_CONTIG(bhprev, bh)) { /* * This one is OK. Let it go. Note that we * do not have the ability to allocate @@ -904,7 +889,7 @@ */ if( dma_host ) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD + if (bh_phys(bh) - 1 < ISA_DMA_THRESHOLD || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) { sgpnt[count - 1].length += bh->b_size; bhprev = bh; @@ -923,12 +908,15 @@ } } } - count++; - sgpnt[count - 1].address = bh->b_data; - sgpnt[count - 1].length += bh->b_size; - if (!dma_host) { + + sgpnt[count].page = bh->b_page; + sgpnt[count].offset = bh_offset(bh); + sgpnt[count].length = bh->b_size; + + if (!dma_host) SCpnt->request_bufflen += bh->b_size; - } + + count++; bhprev = bh; } @@ -951,12 +939,16 @@ for (i = 0; i < count; i++) { sectors = (sgpnt[i].length >> 9); SCpnt->request_bufflen += sgpnt[i].length; + /* + * only done for dma_host, in which case .page is not + * set since it's guarenteed to be a low memory page + */ if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 > ISA_DMA_THRESHOLD) { if( scsi_dma_free_sectors - sectors <= 10 ) { /* * If this would nearly drain the DMA - * pool, mpty, then let's stop here. + * pool empty, then let's stop here. * Don't make this request any larger. * This is kind of a safety valve that * we use - we could get screwed later @@ -970,7 +962,7 @@ break; } - sgpnt[i].alt_address = sgpnt[i].address; + bbpnt[i] = sgpnt[i].address; sgpnt[i].address = (char *) scsi_malloc(sgpnt[i].length); /* @@ -986,8 +978,8 @@ } break; } - if (SCpnt->request.cmd == WRITE) { - memcpy(sgpnt[i].address, sgpnt[i].alt_address, + if (req->cmd == WRITE) { + memcpy(sgpnt[i].address, bbpnt[i], sgpnt[i].length); } } @@ -1031,8 +1023,7 @@ * single-block requests if we had hundreds of free sectors. */ if( scsi_dma_free_sectors > 30 ) { - for (this_count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { + for (this_count = 0, bh = req->bh; bh; bh = bh->b_reqnext) { if( scsi_dma_free_sectors - this_count < 30 || this_count == sectors ) { @@ -1045,7 +1036,7 @@ /* * Yow! Take the absolute minimum here. */ - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; } /* @@ -1058,28 +1049,31 @@ * segment. Possibly the entire request, or possibly a small * chunk of the entire request. */ - bh = SCpnt->request.bh; - buff = SCpnt->request.buffer; + bh = req->bh; + buff = req->buffer = bh->b_data; - if (dma_host) { + if (dma_host || PageHighMem(bh->b_page)) { /* * Allocate a DMA bounce buffer. If the allocation fails, fall * back and allocate a really small one - enough to satisfy * the first buffer. */ - if (virt_to_phys(SCpnt->request.bh->b_data) - + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) { + if (bh_phys(bh) + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) { buff = (char *) scsi_malloc(this_count << 9); if (!buff) { printk("Warning - running low on DMA memory\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; buff = (char *) scsi_malloc(this_count << 9); if (!buff) { dma_exhausted(SCpnt, 0); } } - if (SCpnt->request.cmd == WRITE) - memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9); + if (req->cmd == WRITE) { + unsigned long flags; + char *buf = bh_kmap_irq(bh, &flags); + memcpy(buff, buf, this_count << 9); + bh_kunmap_irq(buf, &flags); + } } } SCpnt->request_bufflen = this_count << 9; @@ -1120,20 +1114,10 @@ */ void initialize_merge_fn(Scsi_Device * SDpnt) { - request_queue_t *q; - struct Scsi_Host *SHpnt; - SHpnt = SDpnt->host; + struct Scsi_Host *SHpnt = SDpnt->host; + request_queue_t *q = &SDpnt->request_queue; + dma64_addr_t bounce_limit; - q = &SDpnt->request_queue; - - /* - * If the host has already selected a merge manager, then don't - * pick a new one. - */ -#if 0 - if (q->back_merge_fn && q->front_merge_fn) - return; -#endif /* * If this host has an unlimited tablesize, then don't bother with a * merge manager. The whole point of the operation is to make sure @@ -1166,4 +1150,19 @@ q->merge_requests_fn = scsi_merge_requests_fn_dc; SDpnt->scsi_init_io_fn = scsi_init_io_vdc; } + + /* + * now enable highmem I/O, if appropriate + */ + if (!PCI_DMA_BUS_IS_PHYS) + /* Platforms with virtual-DMA translation + * hardware have no practical limit. + */ + bounce_limit = BLK_BOUNCE_ANY; + else if (SHpnt->can_dma_32 && (SDpnt->type == TYPE_DISK)) + bounce_limit = SHpnt->pci_dev->dma_mask; + else + bounce_limit = BLK_BOUNCE_HIGH; + + blk_queue_bounce_limit(q, bounce_limit); } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/sr.c linux/drivers/scsi/sr.c --- /opt/kernel/linux-2.4.9/drivers/scsi/sr.c Fri Aug 24 13:46:12 2001 +++ linux/drivers/scsi/sr.c Thu Aug 23 09:28:51 2001 @@ -265,6 +265,7 @@ struct scatterlist *sg, *old_sg = NULL; int i, fsize, bsize, sg_ent, sg_count; char *front, *back; + void **bbpnt, **old_bbpnt = NULL; back = front = NULL; sg_ent = SCpnt->use_sg; @@ -292,17 +293,25 @@ * extend or allocate new scatter-gather table */ sg_count = SCpnt->use_sg; - if (sg_count) + if (sg_count) { old_sg = (struct scatterlist *) SCpnt->request_buffer; - else { + old_bbpnt = SCpnt->bounce_buffers; + } else { sg_count = 1; sg_ent++; } - i = ((sg_ent * sizeof(struct scatterlist)) + 511) & ~511; + /* Get space for scatterlist and bounce buffer array. */ + i = sg_ent * sizeof(struct scatterlist); + i += sg_ent * sizeof(void *); + i = (i + 511) & ~511; + if ((sg = scsi_malloc(i)) == NULL) goto no_mem; + bbpnt = (void **) + ((char *)sg + (sg_ent * sizeof(struct scatterlist))); + /* * no more failing memory allocs possible, we can safely assign * SCpnt values now @@ -313,13 +322,15 @@ i = 0; if (fsize) { - sg[0].address = sg[0].alt_address = front; + sg[0].address = bbpnt[0] = front; sg[0].length = fsize; i++; } if (old_sg) { memcpy(sg + i, old_sg, SCpnt->use_sg * sizeof(struct scatterlist)); - scsi_free(old_sg, ((SCpnt->use_sg * sizeof(struct scatterlist)) + 511) & ~511); + memcpy(bbpnt + i, old_bbpnt, SCpnt->use_sg * sizeof(void *)); + scsi_free(old_sg, (((SCpnt->use_sg * sizeof(struct scatterlist)) + + (SCpnt->use_sg * sizeof(void *))) + 511) & ~511); } else { sg[i].address = SCpnt->request_buffer; sg[i].length = SCpnt->request_bufflen; @@ -327,11 +338,12 @@ SCpnt->request_bufflen += (fsize + bsize); SCpnt->request_buffer = sg; + SCpnt->bounce_buffers = bbpnt; SCpnt->use_sg += i; if (bsize) { sg[SCpnt->use_sg].address = back; - sg[SCpnt->use_sg].alt_address = back; + bbpnt[SCpnt->use_sg] = back; sg[SCpnt->use_sg].length = bsize; SCpnt->use_sg++; } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/st.c linux/drivers/scsi/st.c --- /opt/kernel/linux-2.4.9/drivers/scsi/st.c Fri Aug 24 13:46:13 2001 +++ linux/drivers/scsi/st.c Thu Aug 23 09:28:51 2001 @@ -3222,7 +3222,6 @@ tb->sg[0].address = (unsigned char *) __get_free_pages(priority, order); if (tb->sg[0].address != NULL) { - tb->sg[0].alt_address = NULL; tb->sg[0].length = b_size; break; } @@ -3258,7 +3257,6 @@ tb = NULL; break; } - tb->sg[segs].alt_address = NULL; tb->sg[segs].length = b_size; got += b_size; segs++; @@ -3332,7 +3330,6 @@ normalize_buffer(STbuffer); return FALSE; } - STbuffer->sg[segs].alt_address = NULL; STbuffer->sg[segs].length = b_size; STbuffer->sg_segs += 1; got += b_size; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/sym53c8xx.c linux/drivers/scsi/sym53c8xx.c --- /opt/kernel/linux-2.4.9/drivers/scsi/sym53c8xx.c Fri Aug 24 13:46:13 2001 +++ linux/drivers/scsi/sym53c8xx.c Fri Aug 24 13:49:31 2001 @@ -989,8 +989,8 @@ if (vbp) { dma_addr_t daddr; vp = (m_addr_t) pci_alloc_consistent(mp->bush, - PAGE_SIZE<vaddr = vp; @@ -1140,37 +1140,40 @@ /* Linux version with pci bus iommu kernel interface */ /* To keep track of the dma mapping (sg/single) that has been set */ -#define __data_mapped SCp.phase -#define __data_mapping SCp.have_data_in +#define __data_mapped(cmd) (cmd)->SCp.phase +#define __data_mapping(cmd) (cmd)->SCp.dma_handle static void __unmap_scsi_data(pcidev_t pdev, Scsi_Cmnd *cmd) { int dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction); - switch(cmd->__data_mapped) { + switch(__data_mapped(cmd)) { case 2: - pci_unmap_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir); + pci64_unmap_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir); break; case 1: - pci_unmap_single(pdev, cmd->__data_mapping, + pci64_unmap_page(pdev, __data_mapping(cmd), cmd->request_bufflen, dma_dir); break; } - cmd->__data_mapped = 0; + __data_mapped(cmd) = 0; } -static u_long __map_scsi_single_data(pcidev_t pdev, Scsi_Cmnd *cmd) +static dma64_addr_t __map_scsi_single_data(pcidev_t pdev, Scsi_Cmnd *cmd) { - dma_addr_t mapping; + dma64_addr_t mapping; int dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction); if (cmd->request_bufflen == 0) return 0; - mapping = pci_map_single(pdev, cmd->request_buffer, + mapping = pci64_map_page(pdev, + virt_to_page(cmd->request_buffer), + ((unsigned long)cmd->request_buffer & + ~PAGE_MASK), cmd->request_bufflen, dma_dir); - cmd->__data_mapped = 1; - cmd->__data_mapping = mapping; + __data_mapped(cmd) = 1; + __data_mapping(cmd) = mapping; return mapping; } @@ -1183,9 +1186,9 @@ if (cmd->use_sg == 0) return 0; - use_sg = pci_map_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir); - cmd->__data_mapped = 2; - cmd->__data_mapping = use_sg; + use_sg = pci64_map_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir); + __data_mapped(cmd) = 2; + __data_mapping(cmd) = use_sg; return use_sg; } @@ -1194,18 +1197,18 @@ { int dma_dir = scsi_to_pci_dma_dir(cmd->sc_data_direction); - switch(cmd->__data_mapped) { + switch(__data_mapped(cmd)) { case 2: - pci_dma_sync_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir); + pci64_dma_sync_sg(pdev, cmd->buffer, cmd->use_sg, dma_dir); break; case 1: - pci_dma_sync_single(pdev, cmd->__data_mapping, - cmd->request_bufflen, dma_dir); + pci64_dma_sync_single(pdev, __data_mapping(cmd), + cmd->request_bufflen, dma_dir); break; } } -#define scsi_sg_dma_address(sc) sg_dma_address(sc) +#define scsi_sg_dma_address(sc) sg_dma64_address(sc) #define scsi_sg_dma_len(sc) sg_dma_len(sc) #endif /* SCSI_NCR_DYNAMIC_DMA_MAPPING */ @@ -5031,12 +5034,12 @@ /* ** 64 bit (53C895A or 53C896) ? */ - if (np->features & FE_DAC) -#ifdef SCSI_NCR_USE_64BIT_DAC - np->rv_ccntl1 |= (XTIMOD | EXTIBMV); -#else - np->rv_ccntl1 |= (DDAC); -#endif + if (np->features & FE_DAC) { + if (pci_dac_cycles_ok(np->pdev)) + np->rv_ccntl1 |= (XTIMOD | EXTIBMV); + else + np->rv_ccntl1 |= (DDAC); + } /* ** Phase mismatch handled by SCRIPTS (53C895A, 53C896 or C1010) ? @@ -12070,15 +12073,9 @@ ** code will get more complex later). */ -#ifdef SCSI_NCR_USE_64BIT_DAC #define SCATTER_ONE(data, badd, len) \ (data)->addr = cpu_to_scr(badd); \ (data)->size = cpu_to_scr((((badd) >> 8) & 0xff000000) + len); -#else -#define SCATTER_ONE(data, badd, len) \ - (data)->addr = cpu_to_scr(badd); \ - (data)->size = cpu_to_scr(len); -#endif #define CROSS_16MB(p, n) (((((u_long) p) + n - 1) ^ ((u_long) p)) & ~0xffffff) @@ -12090,7 +12087,7 @@ cp->data_len = cmd->request_bufflen; if (cmd->request_bufflen) { - u_long baddr = map_scsi_single_data(np, cmd); + dma64_addr_t baddr = map_scsi_single_data(np, cmd); SCATTER_ONE(data, baddr, cmd->request_bufflen); if (CROSS_16MB(baddr, cmd->request_bufflen)) { @@ -12141,7 +12138,7 @@ data = &cp->phys.data[MAX_SCATTER - use_sg]; for (segn = 0; segn < use_sg; segn++) { - u_long baddr = scsi_sg_dma_address(&scatter[segn]); + dma64_addr_t baddr = scsi_sg_dma_address(&scatter[segn]); unsigned int len = scsi_sg_dma_len(&scatter[segn]); SCATTER_ONE(&data[segn], @@ -12180,7 +12177,7 @@ data = &cp->phys.data[MAX_SCATTER - use_sg]; for (segment = 0; segment < use_sg; segment++) { - u_long baddr = scsi_sg_dma_address(&scatter[segment]); + dma64_addr_t baddr = scsi_sg_dma_address(&scatter[segment]); unsigned int len = scsi_sg_dma_len(&scatter[segment]); SCATTER_ONE(&data[segment], @@ -13100,14 +13097,6 @@ (int) (PciDeviceFn(pdev) & 0xf8) >> 3, (int) (PciDeviceFn(pdev) & 7)); -#ifdef SCSI_NCR_DYNAMIC_DMA_MAPPING - if (pci_set_dma_mask(pdev, (dma_addr_t) (0xffffffffUL))) { - printk(KERN_WARNING NAME53C8XX - "32 BIT PCI BUS DMA ADDRESSING NOT SUPPORTED\n"); - return -1; - } -#endif - /* ** Read info from the PCI config space. ** pci_read_config_xxx() functions are assumed to be used for @@ -13175,6 +13164,29 @@ break; } +#ifdef SCSI_NCR_DYNAMIC_DMA_MAPPING + /* Configure DMA attributes. For DAC capable boards, we can encode + ** 32+8 bits for SCSI DMA data addresses with the extra bits used + ** in the size field. We use normal 32-bit PCI addresses for + ** descriptors. + */ + if (chip->features & FE_DAC) { + pci_set_dma_mask(pdev, 0xffffffffff); + if (!pci_dac_cycles_ok(pdev)) { + if (!pci_dma_supported(pdev, 0xffffffff)) { + printk(KERN_WARNING NAME53C8XX + "64 BIT AND 32 BIT PCI BUS DMA ADDRESSING " + "NOT SUPPORTED\n"); + return -1; + } + } + } else if (!pci_dma_supported(pdev, 0xffffffff)) { + printk(KERN_WARNING NAME53C8XX + "32 BIT PCI BUS DMA ADDRESSING NOT SUPPORTED\n"); + return -1; + } +#endif + /* ** Ignore Symbios chips controlled by SISL RAID controller. ** This controller sets value 0x52414944 at RAM end - 16. @@ -13611,8 +13623,8 @@ cmd->SCp.ptr = NULL; cmd->SCp.buffer = NULL; #ifdef SCSI_NCR_DYNAMIC_DMA_MAPPING - cmd->__data_mapped = 0; - cmd->__data_mapping = 0; + __data_mapped(cmd) = 0; + __data_mapping(cmd) = 0; #endif NCR_LOCK_NCB(np, flags); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/sym53c8xx.h linux/drivers/scsi/sym53c8xx.h --- /opt/kernel/linux-2.4.9/drivers/scsi/sym53c8xx.h Fri Aug 24 13:46:13 2001 +++ linux/drivers/scsi/sym53c8xx.h Thu Aug 23 09:33:52 2001 @@ -96,7 +96,8 @@ this_id: 7, \ sg_tablesize: SCSI_NCR_SG_TABLESIZE, \ cmd_per_lun: SCSI_NCR_CMD_PER_LUN, \ - use_clustering: DISABLE_CLUSTERING} + use_clustering: DISABLE_CLUSTERING, \ + can_dma_32: 1} #else diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/sym53c8xx_comm.h linux/drivers/scsi/sym53c8xx_comm.h --- /opt/kernel/linux-2.4.9/drivers/scsi/sym53c8xx_comm.h Fri Aug 24 13:46:13 2001 +++ linux/drivers/scsi/sym53c8xx_comm.h Thu Aug 23 09:28:51 2001 @@ -2186,7 +2186,7 @@ (int) (PciDeviceFn(pdev) & 7)); #ifdef SCSI_NCR_DYNAMIC_DMA_MAPPING - if (!pci_dma_supported(pdev, (dma_addr_t) (0xffffffffUL))) { + if (!pci_dma_supported(pdev, 0xffffffff)) { printk(KERN_WARNING NAME53C8XX "32 BIT PCI BUS DMA ADDRESSING NOT SUPPORTED\n"); return -1; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/drivers/scsi/sym53c8xx_defs.h linux/drivers/scsi/sym53c8xx_defs.h --- /opt/kernel/linux-2.4.9/drivers/scsi/sym53c8xx_defs.h Fri Aug 24 13:46:13 2001 +++ linux/drivers/scsi/sym53c8xx_defs.h Thu Aug 23 09:33:37 2001 @@ -184,32 +184,11 @@ #endif /* - * Should we enable DAC cycles on Sparc64 platform? - * Until further investigation we do not enable it - * at the moment. - * We may want to enable it for __ia64__ (untested) - */ -#if defined(__ia64__) -# if !defined(SCSI_NCR_USE_64BIT_DAC) -# define SCSI_NCR_USE_64BIT_DAC -# endif -#else -# undef SCSI_NCR_USE_64BIT_DAC -#endif - -/* * Immediate arbitration */ #if defined(CONFIG_SCSI_NCR53C8XX_IARB) #define SCSI_NCR_IARB_SUPPORT #endif - -/* - * Should we enable DAC cycles on sparc64 platforms? - * Until further investigation we do not enable it - * anywhere at the moment. - */ -#undef SCSI_NCR_USE_64BIT_DAC /* * Sync transfer frequency at startup. diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/fs/buffer.c linux/fs/buffer.c --- /opt/kernel/linux-2.4.9/fs/buffer.c Fri Aug 24 13:46:13 2001 +++ linux/fs/buffer.c Thu Aug 23 09:28:51 2001 @@ -1331,13 +1331,11 @@ bh->b_page = page; if (offset >= PAGE_SIZE) BUG(); - if (PageHighMem(page)) - /* - * This catches illegal uses and preserves the offset: - */ - bh->b_data = (char *)(0 + offset); - else - bh->b_data = page_address(page) + offset; + /* + * ->virtual is NULL on highmem pages, so we can catch the + * offset even though using page_address on it + */ + bh->b_data = page_address(page) + offset; } /* diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-alpha/pci.h linux/include/asm-alpha/pci.h --- /opt/kernel/linux-2.4.9/include/asm-alpha/pci.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-alpha/pci.h Thu Aug 23 09:28:51 2001 @@ -144,7 +144,7 @@ only drive the low 24-bits during PCI bus mastering, then you would pass 0x00ffffff as the mask to this function. */ -extern int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask); +extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask); /* Return the index of the PCI controller for device PDEV. */ extern int pci_controller_num(struct pci_dev *pdev); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-alpha/scatterlist.h linux/include/asm-alpha/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-alpha/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-alpha/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -5,8 +5,6 @@ struct scatterlist { char *address; /* Source/target vaddr. */ - char *alt_address; /* Location of actual if address is a - dma indirect buffer, else NULL. */ dma_addr_t dma_address; unsigned int length; unsigned int dma_length; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-arm/pci.h linux/include/asm-arm/pci.h --- /opt/kernel/linux-2.4.9/include/asm-arm/pci.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-arm/pci.h Thu Aug 23 09:28:51 2001 @@ -152,7 +152,7 @@ * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -static inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { return 1; } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-arm/scatterlist.h linux/include/asm-arm/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-arm/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-arm/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -5,7 +5,6 @@ struct scatterlist { char *address; /* virtual address */ - char *alt_address; /* indirect dma address, or NULL */ dma_addr_t dma_address; /* dma address */ unsigned int length; /* length */ }; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-i386/kmap_types.h linux/include/asm-i386/kmap_types.h --- /opt/kernel/linux-2.4.9/include/asm-i386/kmap_types.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-i386/kmap_types.h Thu Aug 23 09:28:51 2001 @@ -6,6 +6,7 @@ KM_BOUNCE_WRITE, KM_SKB_DATA, KM_SKB_DATA_SOFTIRQ, + KM_BH_IRQ, KM_TYPE_NR }; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-i386/pci.h linux/include/asm-i386/pci.h --- /opt/kernel/linux-2.4.9/include/asm-i386/pci.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-i386/pci.h Thu Aug 23 09:31:00 2001 @@ -34,6 +34,15 @@ struct pci_dev; +/* The PCI address space does equal the physical memory + * address space. The networking and block device layers use + * this boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS (1) + +/* This is always fine. */ +#define pci_dac_cycles_ok(pci_dev) (1) + /* Allocate and map kernel buffer using consistent mode DMA for a device. * hwdev should be valid struct pci_dev pointer for PCI devices, * NULL for PCI-like buses (ISA, EISA). @@ -55,6 +64,22 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); +static __inline__ void *pci64_alloc_consistent(struct pci_dev *hwdev, size_t size, + dma64_addr_t *dma_handle) +{ + dma_addr_t tmp; + void *ret; + + ret = pci_alloc_consistent(hwdev, size, &tmp); + if (ret != NULL) + *dma_handle = (dma64_addr_t) tmp; + + return ret; +} + +#define pci64_free_consistent(pdev,sz,addr,dma) \ + pci_free_consistent(pdev,sz,addr,(dma_addr_t)dma) + /* Map a single buffer of the indicated size for DMA in streaming mode. * The 32-bit bus address to use is returned. * @@ -84,6 +109,46 @@ /* Nothing to do */ } +/* + * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical + * to pci_map_single, but takes a struct page instead of a virtual address + */ +static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page, + unsigned long offset, size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + + return (page - mem_map) * PAGE_SIZE + offset; +} + +static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* Nothing to do */ +} + +/* 64-bit variants */ +static inline dma64_addr_t pci64_map_page(struct pci_dev *hwdev, struct page *page, + unsigned long offset, size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + + return (((dma64_addr_t) (page - mem_map)) * + ((dma64_addr_t) PAGE_SIZE)) + (dma64_addr_t) offset; +} + +static inline void pci64_unmap_page(struct pci_dev *hwdev, dma64_addr_t dma_address, + size_t size, int direction) +{ + if (direction == PCI_DMA_NONE) + BUG(); + /* Nothing to do */ +} + /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scather-gather version of the * above pci_map_single interface. Here the scatter gather list @@ -102,8 +167,26 @@ static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction) { + int i; + if (direction == PCI_DMA_NONE) BUG(); + + /* + * temporary 2.4 hack + */ + for (i = 0; i < nents; i++ ) { + if (sg[i].address && sg[i].page) + BUG(); + else if (!sg[i].address && !sg[i].page) + BUG(); + + if (sg[i].address) + sg[i].dma_address = virt_to_bus(sg[i].address); + else + sg[i].dma_address = page_to_bus(sg[i].page) + sg[i].offset; + } + return nents; } @@ -119,6 +202,9 @@ /* Nothing to do */ } +#define pci64_map_sg pci_map_sg +#define pci64_unmap_sg pci_unmap_sg + /* Make physical memory consistent for a single * streaming mode DMA translation after a transfer. * @@ -152,12 +238,15 @@ /* Nothing to do */ } +#define pci64_dma_sync_single pci_dma_sync_single +#define pci64_dma_sync_sg pci_dma_sync_sg + /* Return whether the given PCI device DMA address mask can * be supported properly. For example, if your device can * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -static inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { /* * we fall back to GFP_DMA when the mask isn't all 1s, @@ -173,10 +262,10 @@ /* These macros should be used after a pci_map_sg call has been done * to get bus addresses of each of the SG entries and their lengths. * You should only work with the number of sg entries pci_map_sg - * returns, or alternatively stop on the first sg_dma_len(sg) which - * is 0. + * returns. */ -#define sg_dma_address(sg) (virt_to_bus((sg)->address)) +#define sg_dma_address(sg) ((dma_addr_t) ((sg)->dma_address)) +#define sg_dma64_address(sg) ((sg)->dma_address) #define sg_dma_len(sg) ((sg)->length) /* Return the index of the PCI controller for device. */ diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-i386/scatterlist.h linux/include/asm-i386/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-i386/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-i386/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -1,10 +1,32 @@ #ifndef _I386_SCATTERLIST_H #define _I386_SCATTERLIST_H -struct scatterlist { - char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ +/* + * Drivers must set either ->address or (preferred) ->page and ->offset + * to indicate where data must be transferred to/from. + * + * Using ->page is recommended since it handles highmem data as well as + * low mem. ->address is restricted to data which has a virtual mapping, and + * it will go away in the future. Updating to ->page can be automated very + * easily -- something like + * + * sg->address = some_ptr; + * + * can be rewritten as + * + * sg->page = virt_to_page(some_ptr); + * sg->offset = (unsigned long) some_ptr & ~PAGE_MASK; + * + * and that's it. There's no excuse for not highmem enabling YOUR driver. /jens + */ +struct scatterlist +{ + char *address; + + struct page *page; + unsigned int offset; + + dma64_addr_t dma_address; unsigned int length; }; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-i386/types.h linux/include/asm-i386/types.h --- /opt/kernel/linux-2.4.9/include/asm-i386/types.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-i386/types.h Thu Aug 23 09:30:13 2001 @@ -27,6 +27,8 @@ */ #ifdef __KERNEL__ +#include + typedef signed char s8; typedef unsigned char u8; @@ -44,6 +46,11 @@ /* Dma addresses are 32-bits wide. */ typedef u32 dma_addr_t; +#ifdef CONFIG_HIGHMEM +typedef u64 dma64_addr_t; +#else +typedef u32 dma64_addr_t; +#endif #endif /* __KERNEL__ */ diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-ia64/pci.h linux/include/asm-ia64/pci.h --- /opt/kernel/linux-2.4.9/include/asm-ia64/pci.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-ia64/pci.h Thu Aug 23 09:28:51 2001 @@ -52,7 +52,7 @@ * you would pass 0x00ffffff as the mask to this function. */ static inline int -pci_dma_supported (struct pci_dev *hwdev, dma_addr_t mask) +pci_dma_supported (struct pci_dev *hwdev, u64 mask) { return 1; } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-ia64/scatterlist.h linux/include/asm-ia64/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-ia64/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-ia64/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -8,11 +8,6 @@ struct scatterlist { char *address; /* location data is to be transferred to */ - /* - * Location of actual buffer if ADDRESS points to a DMA - * indirection buffer, NULL otherwise: - */ - char *alt_address; char *orig_address; /* Save away the original buffer address (used by pci-dma.c) */ unsigned int length; /* buffer length */ }; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-m68k/scatterlist.h linux/include/asm-m68k/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-m68k/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-m68k/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; unsigned long dvma_address; }; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-mips/pci.h linux/include/asm-mips/pci.h --- /opt/kernel/linux-2.4.9/include/asm-mips/pci.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-mips/pci.h Thu Aug 23 09:28:51 2001 @@ -206,7 +206,7 @@ * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -extern inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { /* * we fall back to GFP_DMA when the mask isn't all 1s, diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-mips/scatterlist.h linux/include/asm-mips/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-mips/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-mips/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; __u32 dvma_address; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-mips64/pci.h linux/include/asm-mips64/pci.h --- /opt/kernel/linux-2.4.9/include/asm-mips64/pci.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-mips64/pci.h Thu Aug 23 09:28:51 2001 @@ -195,7 +195,7 @@ #endif } -extern inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { /* * we fall back to GFP_DMA when the mask isn't all 1s, diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-mips64/scatterlist.h linux/include/asm-mips64/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-mips64/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-mips64/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; __u32 dvma_address; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-parisc/pci.h linux/include/asm-parisc/pci.h --- /opt/kernel/linux-2.4.9/include/asm-parisc/pci.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-parisc/pci.h Thu Aug 23 09:28:51 2001 @@ -113,7 +113,7 @@ ** See Documentation/DMA-mapping.txt */ struct pci_dma_ops { - int (*dma_supported)(struct pci_dev *dev, dma_addr_t mask); + int (*dma_supported)(struct pci_dev *dev, u64 mask); void *(*alloc_consistent)(struct pci_dev *dev, size_t size, dma_addr_t *iova); void (*free_consistent)(struct pci_dev *dev, size_t size, void *vaddr, dma_addr_t iova); dma_addr_t (*map_single)(struct pci_dev *dev, void *addr, size_t size, int direction); diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-parisc/scatterlist.h linux/include/asm-parisc/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-parisc/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-parisc/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; /* an IOVA can be 64-bits on some PA-Risc platforms. */ diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-ppc/pci.h linux/include/asm-ppc/pci.h --- /opt/kernel/linux-2.4.9/include/asm-ppc/pci.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-ppc/pci.h Thu Aug 23 09:28:51 2001 @@ -108,7 +108,7 @@ * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -extern inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { return 1; } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-ppc/scatterlist.h linux/include/asm-ppc/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-ppc/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-ppc/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -9,8 +9,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; }; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-s390/scatterlist.h linux/include/asm-s390/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-s390/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-s390/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; }; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-s390x/scatterlist.h linux/include/asm-s390x/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-s390x/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-s390x/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; }; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-sh/pci.h linux/include/asm-sh/pci.h --- /opt/kernel/linux-2.4.9/include/asm-sh/pci.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-sh/pci.h Thu Aug 23 09:28:51 2001 @@ -167,7 +167,7 @@ * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -extern inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { return 1; } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-sh/scatterlist.h linux/include/asm-sh/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-sh/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-sh/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -3,8 +3,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; }; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-sparc/pci.h linux/include/asm-sparc/pci.h --- /opt/kernel/linux-2.4.9/include/asm-sparc/pci.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-sparc/pci.h Thu Aug 23 09:28:51 2001 @@ -108,7 +108,7 @@ * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -extern inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask) +extern inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) { return 1; } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-sparc/scatterlist.h linux/include/asm-sparc/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-sparc/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-sparc/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -6,8 +6,6 @@ struct scatterlist { char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ unsigned int length; __u32 dvma_address; /* A place to hang host-specific addresses at. */ diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-sparc64/pci.h linux/include/asm-sparc64/pci.h --- /opt/kernel/linux-2.4.9/include/asm-sparc64/pci.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-sparc64/pci.h Thu Aug 23 09:28:51 2001 @@ -28,6 +28,31 @@ /* Dynamic DMA mapping stuff. */ +/* PCI 64-bit addressing works for all slots on all controller + * types on sparc64. However, it requires that the device + * can drive enough of the 64 bits. + */ +#define PCI64_REQUIRED_MASK (~(dma64_addr_t)0) +#define PCI64_ADDR_BASE 0xfffc000000000000 + +/* The PCI address space does not equal the physical memory + * address space. The networking and block device layers use + * this boolean for bounce buffer decisions. + */ +#define PCI_DMA_BUS_IS_PHYS (0) + +/* Internal shorthand. */ +#define __PCI_DMA_FLAG_MUST_DAC (PCI_DMA_FLAG_HUGE_MAPS|\ + PCI_DMA_FLAG_DAC_ONLY) + +/* We want the driver to backoff to SAC addresses unless the + * usage of DAC addressing is absolutely required. + */ +#define pci_dac_cycles_ok(pci_dev) \ + (((pci_dev)->dma_flags & (__PCI_DMA_FLAG_MUST_DAC)) ? \ + (((pci_dev)->dma_mask & PCI64_REQUIRED_MASK) == PCI64_REQUIRED_MASK) : \ + 0) + #include struct pci_dev; @@ -36,6 +61,7 @@ * hwdev should be valid struct pci_dev pointer for PCI devices. */ extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle); +extern void *pci64_alloc_consistent(struct pci_dev *hwdev, size_t size, dma64_addr_t *dma_handle); /* Free and unmap a consistent DMA buffer. * cpu_addr is what was returned from pci_alloc_consistent, @@ -46,6 +72,8 @@ * past this call are illegal. */ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); +extern void pci64_free_consistent(struct pci_dev *hwdev, size_t size, + void *vaddr, dma64_addr_t dma_handle); /* Map a single buffer of the indicated size for DMA in streaming mode. * The 32-bit bus address to use is returned. @@ -64,6 +92,20 @@ */ extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction); +/* No highmem on sparc64, plus we have an IOMMU, so mapping pages is easy. */ +#define pci_map_page(dev, page, off, size, dir) \ + pci_map_single(dev, (page_address(page) + (off)), size, dir) +#define pci_unmap_page(dev,addr,sz,dir) pci_unmap_single(dev,addr,sz,dir) + +/* The 64-bit cases might have to do something interesting if + * PCI_DMA_FLAG_HUGE_MAPS is set in hwdev->dma_flags. + */ +extern dma64_addr_t pci64_map_page(struct pci_dev *hwdev, + struct page *page, unsigned long offset, + size_t size, int direction); +extern void pci64_unmap_page(struct pci_dev *hwdev, dma64_addr_t dma_addr, + size_t size, int direction); + /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scather-gather version of the * above pci_map_single interface. Here the scatter gather list @@ -79,13 +121,19 @@ * Device ownership issues as mentioned above for pci_map_single are * the same here. */ -extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction); +extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction); +extern int pci64_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nents, int direction); /* Unmap a set of streaming mode DMA translations. * Again, cpu read rules concerning calls here are the same as for * pci_unmap_single() above. */ -extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nhwents, int direction); +extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nhwents, int direction); +extern void pci64_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, + int nhwents, int direction); /* Make physical memory consistent for a single * streaming mode DMA translation after a transfer. @@ -96,7 +144,10 @@ * next point you give the PCI dma address back to the card, the * device again owns the buffer. */ -extern void pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction); +extern void pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle, + size_t size, int direction); +extern void pci64_dma_sync_single(struct pci_dev *hwdev, dma64_addr_t dma_handle, + size_t size, int direction); /* Make physical memory consistent for a set of streaming * mode DMA translations after a transfer. @@ -105,13 +156,14 @@ * same rules and usage. */ extern void pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction); +extern void pci64_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction); /* Return whether the given PCI device DMA address mask can * be supported properly. For example, if your device can * only drive the low 24-bits during PCI bus mastering, then * you would pass 0x00ffffff as the mask to this function. */ -extern int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask); +extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask); /* Return the index of the PCI controller for device PDEV. */ diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-sparc64/scatterlist.h linux/include/asm-sparc64/scatterlist.h --- /opt/kernel/linux-2.4.9/include/asm-sparc64/scatterlist.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-sparc64/scatterlist.h Thu Aug 23 09:28:51 2001 @@ -5,17 +5,24 @@ #include struct scatterlist { - char * address; /* Location data is to be transferred to */ - char * alt_address; /* Location of actual if address is a - * dma indirect buffer. NULL otherwise */ - unsigned int length; + /* This will disappear in 2.5.x */ + char *address; - __u32 dvma_address; /* A place to hang host-specific addresses at. */ - __u32 dvma_length; + /* These two are only valid if ADDRESS member of this + * struct is NULL. + */ + struct page *page; + unsigned int offset; + + unsigned int length; + + dma64_addr_t dma_address; + __u32 dma_length; }; -#define sg_dma_address(sg) ((sg)->dvma_address) -#define sg_dma_len(sg) ((sg)->dvma_length) +#define sg_dma_address(sg) ((dma_addr_t) ((sg)->dma_address)) +#define sg_dma64_address(sg) ((sg)->dma_address) +#define sg_dma_len(sg) ((sg)->dma_length) #define ISA_DMA_THRESHOLD (~0UL) diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/asm-sparc64/types.h linux/include/asm-sparc64/types.h --- /opt/kernel/linux-2.4.9/include/asm-sparc64/types.h Fri Aug 24 13:46:13 2001 +++ linux/include/asm-sparc64/types.h Thu Aug 23 09:28:51 2001 @@ -45,9 +45,10 @@ #define BITS_PER_LONG 64 -/* Dma addresses are 32-bits wide for now. */ +/* Dma addresses come in 32-bit and 64-bit flavours. */ typedef u32 dma_addr_t; +typedef u64 dma64_addr_t; #endif /* __KERNEL__ */ diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/linux/blkdev.h linux/include/linux/blkdev.h --- /opt/kernel/linux-2.4.9/include/linux/blkdev.h Fri Aug 24 13:46:13 2001 +++ linux/include/linux/blkdev.h Fri Aug 24 09:07:33 2001 @@ -7,6 +7,8 @@ #include #include +#include + struct request_queue; typedef struct request_queue request_queue_t; struct elevator_s; @@ -36,7 +38,7 @@ unsigned long hard_sector, hard_nr_sectors; unsigned int nr_segments; unsigned int nr_hw_segments; - unsigned long current_nr_sectors; + unsigned long current_nr_sectors, hard_cur_sectors; void * special; char * buffer; struct completion * waiting; @@ -110,6 +112,8 @@ */ char head_active; + struct page *bounce_limit; + /* * Is meant to protect the queue in the future instead of * io_request_lock @@ -122,6 +126,31 @@ wait_queue_head_t wait_for_request; }; +extern unsigned long blk_max_low_pfn; + +#define BLK_BOUNCE_HIGH (blk_max_low_pfn * PAGE_SIZE) +#define BLK_BOUNCE_ANY (~(unsigned long long) 0) + +extern void blk_queue_bounce_limit(request_queue_t *, dma64_addr_t); + +#ifdef CONFIG_HIGHMEM +extern struct buffer_head *create_bounce(int, struct buffer_head *); +extern inline struct buffer_head *blk_queue_bounce(request_queue_t *q, int rw, + struct buffer_head *bh) +{ + if (bh->b_page <= q->bounce_limit) + return bh; + + return create_bounce(rw, bh); +} +#else +#define blk_queue_bounce(q, rw, bh) (bh) +#endif + +#define bh_phys(bh) (page_to_phys((bh)->b_page) + bh_offset((bh))) + +#define BH_CONTIG(b1, b2) (bh_phys((b1)) + (b1)->b_size == bh_phys((b2))) + struct blk_dev_struct { /* * queue_proc has to be atomic @@ -149,8 +178,7 @@ extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size); extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); extern void generic_make_request(int rw, struct buffer_head * bh); -extern request_queue_t *blk_get_queue(kdev_t dev); -extern inline request_queue_t *__blk_get_queue(kdev_t dev); +extern inline request_queue_t *blk_get_queue(kdev_t dev); extern void blkdev_release_request(struct request *); /* @@ -161,6 +189,8 @@ extern void blk_queue_headactive(request_queue_t *, int); extern void blk_queue_make_request(request_queue_t *, make_request_fn *); extern void generic_unplug_device(void *); +extern inline int blk_seg_merge_ok(request_queue_t *, struct buffer_head *, + struct buffer_head *); extern int * blk_size[MAX_BLKDEV]; diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/linux/highmem.h linux/include/linux/highmem.h --- /opt/kernel/linux-2.4.9/include/linux/highmem.h Fri Aug 24 13:46:13 2001 +++ linux/include/linux/highmem.h Thu Aug 23 09:30:14 2001 @@ -13,8 +13,7 @@ /* declarations for linux/mm/highmem.c */ FASTCALL(unsigned int nr_free_highpages(void)); -extern struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig); - +extern struct buffer_head *create_bounce(int rw, struct buffer_head * bh_orig); static inline char *bh_kmap(struct buffer_head *bh) { @@ -26,6 +25,42 @@ kunmap(bh->b_page); } +/* + * remember to add offset! and never ever reenable interrupts between a + * bh_kmap_irq and bh_kunmap_irq!! + */ +static inline char *bh_kmap_irq(struct buffer_head *bh, unsigned long *flags) +{ + unsigned long addr; + + __save_flags(*flags); + + /* + * could be low + */ + if (!PageHighMem(bh->b_page)) + return bh->b_data; + + /* + * it's a highmem page + */ + __cli(); + addr = (unsigned long) kmap_atomic(bh->b_page, KM_BH_IRQ); + + if (addr & ~PAGE_MASK) + BUG(); + + return (char *) addr + bh_offset(bh); +} + +static inline void bh_kunmap_irq(char *buffer, unsigned long *flags) +{ + unsigned long ptr = (unsigned long) buffer & PAGE_MASK; + + kunmap_atomic((void *) ptr, KM_BH_IRQ); + __restore_flags(*flags); +} + #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } @@ -37,8 +72,10 @@ #define kmap_atomic(page,idx) kmap(page) #define kunmap_atomic(page,idx) kunmap(page) -#define bh_kmap(bh) ((bh)->b_data) -#define bh_kunmap(bh) do { } while (0) +#define bh_kmap(bh) ((bh)->b_data) +#define bh_kunmap(bh) do { } while (0) +#define bh_kmap_irq(bh, flags) ((bh)->b_data) +#define bh_kunmap_irq(bh, flags) do { } while (0) #endif /* CONFIG_HIGHMEM */ diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/linux/ide.h linux/include/linux/ide.h --- /opt/kernel/linux-2.4.9/include/linux/ide.h Fri Aug 24 13:46:13 2001 +++ linux/include/linux/ide.h Fri Aug 24 09:22:55 2001 @@ -507,6 +507,7 @@ unsigned reset : 1; /* reset after probe */ unsigned autodma : 1; /* automatically try to enable DMA at boot */ unsigned udma_four : 1; /* 1=ATA-66 capable, 0=default */ + unsigned highmem : 1; /* can do full 32-bit dma */ byte channel; /* for dual-port chips: 0=primary, 1=secondary */ #ifdef CONFIG_BLK_DEV_IDEPCI struct pci_dev *pci_dev; /* for pci chipsets */ @@ -812,6 +813,21 @@ ide_preempt, /* insert rq in front of current request */ ide_end /* insert rq at end of list, but don't wait for it */ } ide_action_t; + +/* + * temporarily mapping a (possible) highmem bio + */ +#define ide_rq_offset(rq) (((rq)->hard_cur_sectors - (rq)->current_nr_sectors) << 9) + +extern inline void *ide_map_buffer(struct request *rq, unsigned long *flags) +{ + return bh_kmap_irq(rq->bh, flags) + ide_rq_offset(rq); +} + +extern inline void ide_unmap_buffer(char *buffer, unsigned long *flags) +{ + bh_kunmap_irq(buffer, flags); +} /* * This function issues a special IDE device request diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/include/linux/pci.h linux/include/linux/pci.h --- /opt/kernel/linux-2.4.9/include/linux/pci.h Fri Aug 24 13:46:13 2001 +++ linux/include/linux/pci.h Thu Aug 23 09:31:12 2001 @@ -314,6 +314,17 @@ #define PCI_DMA_FROMDEVICE 2 #define PCI_DMA_NONE 3 +/* These are the boolean attributes stored in pci_dev->dma_flags. */ + +/* Device may hold an enormous number of mappings at once? */ +#define PCI_DMA_FLAG_HUGE_MAPS 0x00000001 + +/* The device only supports Dual Address Cycles. */ +#define PCI_DMA_FLAG_DAC_ONLY 0x00000002 + +/* Reserved for arch-specific flags. */ +#define PCI_DMA_FLAG_ARCHMASK 0xf0000000 + #define DEVICE_COUNT_COMPATIBLE 4 #define DEVICE_COUNT_IRQ 2 #define DEVICE_COUNT_DMA 2 @@ -353,11 +364,12 @@ struct pci_driver *driver; /* which driver has allocated this device */ void *driver_data; /* data private to the driver */ - dma_addr_t dma_mask; /* Mask of the bits of bus address this + u64 dma_mask; /* Mask of the bits of bus address this device implements. Normally this is 0xffffffff. You only need to change this if your device has broken DMA or supports 64-bit transfers. */ + unsigned int dma_flags; /* See PCI_DMA_FLAG_* above */ u32 current_state; /* Current operating state. In ACPI-speak, this is D0-D3, D0 being fully functional, @@ -559,7 +571,8 @@ int pci_enable_device(struct pci_dev *dev); void pci_disable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); -int pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask); +int pci_set_dma_mask(struct pci_dev *dev, u64 mask); +void pci_change_dma_flag(struct pci_dev *dev, unsigned int on, unsigned int off); int pci_assign_resource(struct pci_dev *dev, int i); /* Power management related routines */ @@ -597,6 +610,9 @@ void *pci_pool_alloc (struct pci_pool *pool, int flags, dma_addr_t *handle); void pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t addr); +void *pci64_pool_alloc (struct pci_pool *pool, int flags, dma64_addr_t *handle); +void pci64_pool_free (struct pci_pool *pool, void *vaddr, dma64_addr_t addr); + #endif /* CONFIG_PCI */ /* Include architecture-dependent settings and functions */ @@ -641,7 +657,8 @@ static inline int pci_enable_device(struct pci_dev *dev) { return -EIO; } static inline void pci_disable_device(struct pci_dev *dev) { } static inline int pci_module_init(struct pci_driver *drv) { return -ENODEV; } -static inline int pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask) { return -EIO; } +static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) { return -EIO; } +static inline void pci_change_dma_flag(struct pci_dev *dev, unsigned int on, unsigned int off) { } static inline int pci_assign_resource(struct pci_dev *dev, int i) { return -EBUSY;} static inline int pci_register_driver(struct pci_driver *drv) { return 0;} static inline void pci_unregister_driver(struct pci_driver *drv) { } diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/kernel/ksyms.c linux/kernel/ksyms.c --- /opt/kernel/linux-2.4.9/kernel/ksyms.c Fri Aug 24 13:46:13 2001 +++ linux/kernel/ksyms.c Thu Aug 23 09:28:51 2001 @@ -121,6 +121,8 @@ EXPORT_SYMBOL(kunmap_high); EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(create_bounce); +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); #endif /* filesystem internal functions */ diff -ur --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.9/mm/highmem.c linux/mm/highmem.c --- /opt/kernel/linux-2.4.9/mm/highmem.c Fri Aug 24 13:46:13 2001 +++ linux/mm/highmem.c Thu Aug 23 09:28:51 2001 @@ -222,15 +222,11 @@ { struct page *p_to; char *vto; - unsigned long flags; p_to = to->b_page; - __save_flags(flags); - __cli(); vto = kmap_atomic(p_to, KM_BOUNCE_READ); memcpy(vto + bh_offset(to), from->b_data, to->b_size); kunmap_atomic(vto, KM_BOUNCE_READ); - __restore_flags(flags); } static inline void bounce_end_io (struct buffer_head *bh, int uptodate)