diff -u --recursive --new-file v2.5.0/linux/CREDITS linux/CREDITS --- v2.5.0/linux/CREDITS Sun Nov 11 10:09:32 2001 +++ linux/CREDITS Tue Nov 27 09:23:27 2001 @@ -140,9 +140,11 @@ D: VIA MVP-3/TX Pro III chipset IDE N: Jens Axboe -E: axboe@image.dk -D: Linux CD-ROM maintainer -D: jiffies wrap fixes + schedule timeouts depending on HZ == 100 +E: axboe@suse.de +D: Linux CD-ROM maintainer, DVD support +D: elevator + block layer rewrites +D: highmem I/O support +D: misc hacking on IDE, SCSI, block drivers, etc S: Peter Bangs Vej 258, 2TH S: 2500 Valby S: Denmark diff -u --recursive --new-file v2.5.0/linux/Documentation/Configure.help linux/Documentation/Configure.help --- v2.5.0/linux/Documentation/Configure.help Thu Nov 22 10:52:44 2001 +++ linux/Documentation/Configure.help Tue Nov 27 09:23:27 2001 @@ -6644,17 +6644,6 @@ . The module will be called sg.o. If unsure, say N. -Enable extra checks in SCSI queueing code -CONFIG_SCSI_DEBUG_QUEUES - This option turns on a lot of additional consistency checking for - the new queueing code. This will adversely affect performance, but - it is likely that bugs will be caught sooner if this is turned on. - This will typically cause the kernel to panic if an error is - detected, but it would have probably crashed if the panic weren't - there. Comments/questions/problems to linux-scsi mailing list - please. See for more - up-to-date information. - Probe all LUNs on each SCSI device CONFIG_SCSI_MULTI_LUN If you have a SCSI device that supports more than one LUN (Logical diff -u --recursive --new-file v2.5.0/linux/Documentation/DocBook/Makefile linux/Documentation/DocBook/Makefile --- v2.5.0/linux/Documentation/DocBook/Makefile Fri Nov 2 17:13:53 2001 +++ linux/Documentation/DocBook/Makefile Tue Nov 27 09:23:27 2001 @@ -108,6 +108,7 @@ $(TOPDIR)/drivers/video/modedb.c \ $(TOPDIR)/fs/devfs/base.c \ $(TOPDIR)/fs/locks.c \ + $(TOPDIR)/fs/bio.c \ $(TOPDIR)/include/asm-i386/bitops.h \ $(TOPDIR)/kernel/pm.c \ $(TOPDIR)/kernel/ksyms.c \ diff -u --recursive --new-file v2.5.0/linux/Documentation/filesystems/devfs/ChangeLog linux/Documentation/filesystems/devfs/ChangeLog --- v2.5.0/linux/Documentation/filesystems/devfs/ChangeLog Sat Nov 3 10:06:38 2001 +++ linux/Documentation/filesystems/devfs/ChangeLog Sat Nov 24 13:06:43 2001 @@ -1778,3 +1778,19 @@ Thanks to Kari Hurtta - Avoid deadlock in by using temporary buffer +=============================================================================== +Changes for patch v197 + +- First release of new locking code for devfs core (v1.0) + +- Fixed bug in drivers/cdrom/cdrom.c +=============================================================================== +Changes for patch v198 + +- Discard temporary buffer, now use "%s" for dentry names + +- Don't generate path in : use fake entry instead + +- Use "existing" directory in <_devfs_make_parent_for_leaf> + +- Use slab cache rather than fixed buffer for devfsd events diff -u --recursive --new-file v2.5.0/linux/Documentation/filesystems/devfs/README linux/Documentation/filesystems/devfs/README --- v2.5.0/linux/Documentation/filesystems/devfs/README Wed Oct 10 23:23:24 2001 +++ linux/Documentation/filesystems/devfs/README Sat Nov 24 13:06:43 2001 @@ -3,7 +3,7 @@ Linux Devfs (Device File System) FAQ Richard Gooch -29-SEP-2001 +9-NOV-2001 ----------------------------------------------------------------------------- @@ -11,7 +11,9 @@ http://www.atnf.csiro.au/~rgooch/linux/docs/devfs.html and looks much better than the text version distributed with the -kernel sources. +kernel sources. A mirror site is available at: + +http://www.ras.ucalgary.ca/~rgooch/linux/docs/devfs.html There is also an optional daemon that may be used with devfs. You can find out more about it at: diff -u --recursive --new-file v2.5.0/linux/Documentation/usb/philips.txt linux/Documentation/usb/philips.txt --- v2.5.0/linux/Documentation/usb/philips.txt Wed Oct 17 14:34:06 2001 +++ linux/Documentation/usb/philips.txt Mon Nov 26 17:09:10 2001 @@ -1,5 +1,5 @@ This file contains some additional information for the Philips webcams. -E-mail: webcam@smcc.demon.nl Last updated: 2001-07-27 +E-mail: webcam@smcc.demon.nl Last updated: 2001-09-24 The main webpage for the Philips driver is http://www.smcc.demon.nl/webcam/. It contains a lot of extra information, a FAQ, and the binary plugin @@ -13,11 +13,9 @@ the latter, since it makes troubleshooting a lot easier. The built-in microphone is supported through the USB Audio class. -(Taken from install.html) - When you load the module you can set some default settings for the -camera; some programs depend on a particular image-size or -format. The -options are: +camera; some programs depend on a particular image-size or -format and +don't know how to set it properly in the driver. The options are: size Can be one of 'sqcif', 'qsif', 'qcif', 'sif', 'cif' or @@ -99,6 +97,57 @@ This parameter works only with the ToUCam range of cameras (730, 740, 750). For other cameras this command is silently ignored, and the LED cannot be controlled. + +dev_hint + A long standing problem with USB devices is their dynamic nature: you + never know what device a camera gets assigned; it depends on module load + order, the hub configuration, the order in which devices are plugged in, + and the phase of the moon (i.e. it can be random). With this option you + can give the driver a hint as to what video device node (/dev/videoX) it + should use with a specific camera. This is also handy if you have two + cameras of the same model. + + A camera is specified by its type (the number from the camera model, + like PCA645, PCVC750VC, etc) and optionally the serial number (visible + in /proc/bus/usb/devices). A hint consists of a string with the following + format: + + [type[.serialnumber]:]node + + The square brackets mean that both the type and the serialnumber are + optional, but a serialnumber cannot be specified without a type (which + would be rather pointless). The serialnumber is separated from the type + by a '.'; the node number by a ':'. + + This somewhat cryptic syntax is best explained by a few examples: + + dev_hint=3,5 The first detected cam gets assigned + /dev/video3, the second /dev/video5. Any + other cameras will get the first free + available slot (see below). + + dev_hint=645:1,680=2 The PCA645 camera will get /dev/video1, + and a PCVC680 /dev/video2. + + dev_hint=645.0123:3,645.4567:0 The PCA645 camera with serialnumber + 0123 goes to /dev/video3, the same + camera model with the 4567 serial + gets /dev/video0. + + dev_hint=750:1,4,5,6 The PCVC750 camera will get /dev/video1, the + next 3 Philips cams will use /dev/video4 + through /dev/video6. + + Some points worth knowing: + - Serialnumbers are case sensitive and must be written full, including + leading zeroes (it's treated as a string). + - If a device node is already occupied, registration will fail and + the webcam is not available. + - You can have up to 64 video devices; be sure to make enough device + nodes in /dev if you want to spread the numbers (this does not apply + to devfs). After /dev/video9 comes /dev/video10 (not /dev/videoA). + - If a camera does not match any dev_hint, it will simply get assigned + the first available device node, just as it used to be. trace In order to better detect problems, it is now possible to turn on a diff -u --recursive --new-file v2.5.0/linux/MAINTAINERS linux/MAINTAINERS --- v2.5.0/linux/MAINTAINERS Fri Nov 16 10:03:24 2001 +++ linux/MAINTAINERS Sun Nov 25 09:44:09 2001 @@ -1660,8 +1660,8 @@ W: http://misc.nu/hugh/keyspan/ USB SUBSYSTEM -P: Johannes Erdfelt -M: johannes@erdfelt.com +P: Greg Kroah-Hartman +M: greg@kroah.com L: linux-usb-users@lists.sourceforge.net L: linux-usb-devel@lists.sourceforge.net W: http://www.linux-usb.org diff -u --recursive --new-file v2.5.0/linux/Makefile linux/Makefile --- v2.5.0/linux/Makefile Thu Nov 22 22:23:44 2001 +++ linux/Makefile Tue Nov 27 09:23:27 2001 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 5 -SUBLEVEL = 0 -EXTRAVERSION = +SUBLEVEL = 1 +EXTRAVERSION =-pre2 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -u --recursive --new-file v2.5.0/linux/README linux/README --- v2.5.0/linux/README Fri Oct 5 12:10:00 2001 +++ linux/README Sat Nov 24 11:29:19 2001 @@ -1,9 +1,12 @@ - Linux kernel release 2.4.xx + Linux kernel release 2.5.xx -These are the release notes for Linux version 2.4. Read them carefully, +These are the release notes for Linux version 2.5. Read them carefully, as they tell you what this is all about, explain how to install the kernel, and what to do if something goes wrong. +NOTE! As with all odd-numbered releases, 2.5.x is a development kernel. +For stable kernels, see the 2.4.x maintained by Marcelo Tosatti. + WHAT IS LINUX? Linux is a Unix clone written from scratch by Linus Torvalds with @@ -52,7 +55,7 @@ directory where you have permissions (eg. your home directory) and unpack it: - gzip -cd linux-2.4.XX.tar.gz | tar xvf - + gzip -cd linux-2.5.XX.tar.gz | tar xvf - Replace "XX" with the version number of the latest kernel. @@ -61,7 +64,7 @@ files. They should match the library, and not get messed up by whatever the kernel-du-jour happens to be. - - You can also upgrade between 2.4.xx releases by patching. Patches are + - You can also upgrade between 2.5.xx releases by patching. Patches are distributed in the traditional gzip and the new bzip2 format. To install by patching, get all the newer patch files, enter the directory in which you unpacked the kernel source and execute: @@ -96,7 +99,7 @@ SOFTWARE REQUIREMENTS - Compiling and running the 2.4.xx kernels requires up-to-date + Compiling and running the 2.5.xx kernels requires up-to-date versions of various software packages. Consult ./Documentation/Changes for the minimum version numbers required and how to get updates for these packages. Beware that using diff -u --recursive --new-file v2.5.0/linux/arch/alpha/kernel/alpha_ksyms.c linux/arch/alpha/kernel/alpha_ksyms.c --- v2.5.0/linux/arch/alpha/kernel/alpha_ksyms.c Tue Nov 20 15:49:31 2001 +++ linux/arch/alpha/kernel/alpha_ksyms.c Sun Nov 25 09:45:10 2001 @@ -109,6 +109,7 @@ EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(memcmp); EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memscan); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memsetw); diff -u --recursive --new-file v2.5.0/linux/arch/i386/defconfig linux/arch/i386/defconfig --- v2.5.0/linux/arch/i386/defconfig Mon Nov 12 11:59:03 2001 +++ linux/arch/i386/defconfig Tue Nov 27 09:25:27 2001 @@ -286,7 +286,6 @@ # # Some SCSI devices (e.g. CD jukebox) support multiple LUNs # -CONFIG_SCSI_DEBUG_QUEUES=y CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOGGING is not set @@ -431,6 +430,7 @@ # CONFIG_SUNDANCE is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set +# CONFIG_VIA_RHINE_MMIO is not set # CONFIG_WINBOND_840 is not set # CONFIG_NET_POCKET is not set diff -u --recursive --new-file v2.5.0/linux/arch/i386/kernel/setup.c linux/arch/i386/kernel/setup.c --- v2.5.0/linux/arch/i386/kernel/setup.c Mon Nov 19 15:16:13 2001 +++ linux/arch/i386/kernel/setup.c Tue Nov 27 09:23:27 2001 @@ -157,6 +157,7 @@ extern void mcheck_init(struct cpuinfo_x86 *c); extern int root_mountflags; extern char _text, _etext, _edata, _end; +extern int blk_nohighio; static int disable_x86_serial_nr __initdata = 1; static int disable_x86_fxsr __initdata = 0; @@ -782,7 +783,7 @@ void __init setup_arch(char **cmdline_p) { unsigned long bootmap_size, low_mem_size; - unsigned long start_pfn, max_pfn, max_low_pfn; + unsigned long start_pfn, max_low_pfn; int i; #ifdef CONFIG_VISWS @@ -1067,6 +1068,14 @@ __setup("notsc", tsc_setup); #endif +static int __init highio_setup(char *str) +{ + printk("i386: disabling HIGHMEM block I/O\n"); + blk_nohighio = 1; + return 1; +} +__setup("nohighio", highio_setup); + static int __init get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; diff -u --recursive --new-file v2.5.0/linux/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c --- v2.5.0/linux/arch/i386/kernel/traps.c Sun Sep 30 12:26:08 2001 +++ linux/arch/i386/kernel/traps.c Sun Nov 25 09:48:47 2001 @@ -697,7 +697,7 @@ */ asmlinkage void math_state_restore(struct pt_regs regs) { - __asm__ __volatile__("clts"); /* Allow maths ops (or we recurse) */ + clts(); /* Allow maths ops (or we recurse) */ if (current->used_math) { restore_fpu(current); diff -u --recursive --new-file v2.5.0/linux/arch/i386/mm/init.c linux/arch/i386/mm/init.c --- v2.5.0/linux/arch/i386/mm/init.c Sun Nov 11 10:09:32 2001 +++ linux/arch/i386/mm/init.c Sun Nov 25 09:48:47 2001 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -333,7 +334,7 @@ { pagetable_init(); - __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir))); + __asm__( "movl %0,%%cr3\n" ::"r"(__pa(swapper_pg_dir))); #if CONFIG_X86_PAE /* @@ -596,3 +597,17 @@ val->mem_unit = PAGE_SIZE; return; } + +#if defined(CONFIG_X86_PAE) +struct kmem_cache_s *pae_pgd_cachep; +void __init pgtable_cache_init(void) +{ + /* + * PAE pgds must be 16-byte aligned: + */ + pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0, + SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL); + if (!pae_pgd_cachep) + panic("init_pae(): Cannot alloc pae_pgd SLAB cache"); +} +#endif /* CONFIG_X86_PAE */ diff -u --recursive --new-file v2.5.0/linux/drivers/block/DAC960.c linux/drivers/block/DAC960.c --- v2.5.0/linux/drivers/block/DAC960.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/block/DAC960.c Tue Nov 27 09:23:27 2001 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -306,9 +307,9 @@ static void DAC960_WaitForCommand(DAC960_Controller_T *Controller) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&Controller->RequestQueue->queue_lock); __wait_event(Controller->CommandWaitQueue, Controller->FreeCommands); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&Controller->RequestQueue->queue_lock); } @@ -1922,76 +1923,6 @@ /* - DAC960_BackMergeFunction is the Back Merge Function for the DAC960 driver. -*/ - -static int DAC960_BackMergeFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - BufferHeader_T *BufferHeader, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - if (Request->bhtail->b_data + Request->bhtail->b_size == BufferHeader->b_data) - return true; - if (Request->nr_segments < MaxSegments && - Request->nr_segments < Controller->DriverScatterGatherLimit) - { - Request->nr_segments++; - return true; - } - return false; -} - - -/* - DAC960_FrontMergeFunction is the Front Merge Function for the DAC960 driver. -*/ - -static int DAC960_FrontMergeFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - BufferHeader_T *BufferHeader, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - if (BufferHeader->b_data + BufferHeader->b_size == Request->bh->b_data) - return true; - if (Request->nr_segments < MaxSegments && - Request->nr_segments < Controller->DriverScatterGatherLimit) - { - Request->nr_segments++; - return true; - } - return false; -} - - -/* - DAC960_MergeRequestsFunction is the Merge Requests Function for the - DAC960 driver. -*/ - -static int DAC960_MergeRequestsFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - IO_Request_T *NextRequest, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - int TotalSegments = Request->nr_segments + NextRequest->nr_segments; - if (Request->bhtail->b_data + Request->bhtail->b_size - == NextRequest->bh->b_data) - TotalSegments--; - if (TotalSegments > MaxSegments || - TotalSegments > Controller->DriverScatterGatherLimit) - return false; - Request->nr_segments = TotalSegments; - return true; -} - - -/* DAC960_RegisterBlockDevice registers the Block Device structures associated with Controller. */ @@ -2015,15 +1946,15 @@ Initialize the I/O Request Queue. */ RequestQueue = BLK_DEFAULT_QUEUE(MajorNumber); - blk_init_queue(RequestQueue, DAC960_RequestFunction); + blk_init_queue(RequestQueue, DAC960_RequestFunction, "dac960"); blk_queue_headactive(RequestQueue, 0); - RequestQueue->back_merge_fn = DAC960_BackMergeFunction; - RequestQueue->front_merge_fn = DAC960_FrontMergeFunction; - RequestQueue->merge_requests_fn = DAC960_MergeRequestsFunction; RequestQueue->queuedata = Controller; + RequestQueue->max_segments = Controller->DriverScatterGatherLimit; + RequestQueue->max_sectors = Controller->MaxBlocksPerCommand; Controller->RequestQueue = RequestQueue; /* - Initialize the Max Sectors per Request array. + Initialize the Disk Partitions array, Partition Sizes array, Block Sizes + array, and Max Sectors per Request array. */ for (MinorNumber = 0; MinorNumber < DAC960_MinorCount; MinorNumber++) Controller->MaxSectorsPerRequest[MinorNumber] = @@ -2031,7 +1962,6 @@ Controller->GenericDiskInfo.part = Controller->DiskPartitions; Controller->GenericDiskInfo.sizes = Controller->PartitionSizes; blksize_size[MajorNumber] = Controller->BlockSizes; - max_sectors[MajorNumber] = Controller->MaxSectorsPerRequest; /* Initialize Read Ahead to 128 sectors. */ @@ -2080,9 +2010,7 @@ */ Controller->GenericDiskInfo.part = NULL; Controller->GenericDiskInfo.sizes = NULL; - blk_size[MajorNumber] = NULL; - blksize_size[MajorNumber] = NULL; - max_sectors[MajorNumber] = NULL; + blk_clear(MajorNumber); /* Remove the Generic Disk Information structure from the list. */ @@ -2813,23 +2741,24 @@ CommandMailbox->Type5.ScatterGatherCount = Command->SegmentCount; while (BufferHeader != NULL) { - if (BufferHeader->b_data == LastDataEndPointer) + if (bio_data(BufferHeader) == LastDataEndPointer) { ScatterGatherList[SegmentNumber-1].SegmentByteCount += - BufferHeader->b_size; - LastDataEndPointer += BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer += bio_size(BufferHeader); } else { ScatterGatherList[SegmentNumber].SegmentDataPointer = - Virtual_to_Bus32(BufferHeader->b_data); + Virtual_to_Bus32(bio_data(BufferHeader)); ScatterGatherList[SegmentNumber].SegmentByteCount = - BufferHeader->b_size; - LastDataEndPointer = BufferHeader->b_data + BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer = bio_data(BufferHeader) + + bio_size(BufferHeader); if (SegmentNumber++ > Controller->DriverScatterGatherLimit) panic("DAC960: Scatter/Gather Segment Overflow\n"); } - BufferHeader = BufferHeader->b_reqnext; + BufferHeader = BufferHeader->bi_next; } if (SegmentNumber != Command->SegmentCount) panic("DAC960: SegmentNumber != SegmentCount\n"); @@ -2903,23 +2832,24 @@ .ScatterGatherSegments; while (BufferHeader != NULL) { - if (BufferHeader->b_data == LastDataEndPointer) + if (bio_data(BufferHeader) == LastDataEndPointer) { ScatterGatherList[SegmentNumber-1].SegmentByteCount += - BufferHeader->b_size; - LastDataEndPointer += BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer += bio_size(BufferHeader); } else { ScatterGatherList[SegmentNumber].SegmentDataPointer = - Virtual_to_Bus64(BufferHeader->b_data); + Virtual_to_Bus64(bio_data(BufferHeader)); ScatterGatherList[SegmentNumber].SegmentByteCount = - BufferHeader->b_size; - LastDataEndPointer = BufferHeader->b_data + BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer = bio_data(BufferHeader) + + bio_size(BufferHeader); if (SegmentNumber++ > Controller->DriverScatterGatherLimit) panic("DAC960: Scatter/Gather Segment Overflow\n"); } - BufferHeader = BufferHeader->b_reqnext; + BufferHeader = BufferHeader->bi_next; } if (SegmentNumber != Command->SegmentCount) panic("DAC960: SegmentNumber != SegmentCount\n"); @@ -2947,7 +2877,7 @@ while (true) { if (list_empty(RequestQueueHead)) return false; - Request = blkdev_entry_next_request(RequestQueueHead); + Request = elv_next_request(RequestQueue); Command = DAC960_AllocateCommand(Controller); if (Command != NULL) break; if (!WaitForCommand) return false; @@ -2958,12 +2888,10 @@ else Command->CommandType = DAC960_WriteCommand; Command->Completion = Request->waiting; Command->LogicalDriveNumber = DAC960_LogicalDriveNumber(Request->rq_dev); - Command->BlockNumber = - Request->sector - + Controller->GenericDiskInfo.part[MINOR(Request->rq_dev)].start_sect; + Command->BlockNumber = Request->sector; Command->BlockCount = Request->nr_sectors; Command->SegmentCount = Request->nr_segments; - Command->BufferHeader = Request->bh; + Command->BufferHeader = Request->bio; Command->RequestBuffer = Request->buffer; blkdev_dequeue_request(Request); blkdev_release_request(Request); @@ -3016,8 +2944,10 @@ static inline void DAC960_ProcessCompletedBuffer(BufferHeader_T *BufferHeader, boolean SuccessfulIO) { - blk_finished_io(BufferHeader->b_size >> 9); - BufferHeader->b_end_io(BufferHeader, SuccessfulIO); + if (SuccessfulIO) + set_bit(BIO_UPTODATE, &BufferHeader->bi_flags); + blk_finished_io(bio_sectors(BufferHeader)); + BufferHeader->bi_end_io(BufferHeader); } @@ -3071,13 +3001,13 @@ Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, Command->BlockNumber, Command->BlockNumber + Command->BlockCount - 1); - if (DAC960_PartitionNumber(Command->BufferHeader->b_rdev) > 0) + if (DAC960_PartitionNumber(Command->BufferHeader->bi_dev) > 0) DAC960_Error(" /dev/rd/c%dd%dp%d: relative blocks %u..%u\n", Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, - DAC960_PartitionNumber(Command->BufferHeader->b_rdev), - Command->BufferHeader->b_rsector, - Command->BufferHeader->b_rsector + Command->BlockCount - 1); + DAC960_PartitionNumber(Command->BufferHeader->bi_dev), + Command->BufferHeader->bi_sector, + Command->BufferHeader->bi_sector + Command->BlockCount - 1); } @@ -3104,8 +3034,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, true); BufferHeader = NextBufferHeader; } @@ -3119,7 +3049,7 @@ else if ((CommandStatus == DAC960_V1_IrrecoverableDataError || CommandStatus == DAC960_V1_BadDataEncountered) && BufferHeader != NULL && - BufferHeader->b_reqnext != NULL) + BufferHeader->bi_next != NULL) { DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; @@ -3133,10 +3063,10 @@ Command->CommandType = DAC960_WriteRetryCommand; CommandMailbox->Type5.CommandOpcode = DAC960_V1_Write; } - Command->BlockCount = BufferHeader->b_size >> DAC960_BlockSizeBits; + Command->BlockCount = bio_size(BufferHeader) >> DAC960_BlockSizeBits; CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; CommandMailbox->Type5.BusAddress = - Virtual_to_Bus32(BufferHeader->b_data); + Virtual_to_Bus32(bio_data(BufferHeader)); DAC960_QueueCommand(Command); return; } @@ -3149,8 +3079,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, false); BufferHeader = NextBufferHeader; } @@ -3164,8 +3094,8 @@ else if (CommandType == DAC960_ReadRetryCommand || CommandType == DAC960_WriteRetryCommand) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; /* Perform completion processing for this single buffer. */ @@ -3182,14 +3112,14 @@ DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; Command->BlockNumber += - BufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(BufferHeader) >> DAC960_BlockSizeBits; Command->BlockCount = - NextBufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(NextBufferHeader) >> DAC960_BlockSizeBits; Command->BufferHeader = NextBufferHeader; CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber; CommandMailbox->Type5.BusAddress = - Virtual_to_Bus32(NextBufferHeader->b_data); + Virtual_to_Bus32(bio_data(NextBufferHeader)); DAC960_QueueCommand(Command); return; } @@ -3935,13 +3865,13 @@ Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, Command->BlockNumber, Command->BlockNumber + Command->BlockCount - 1); - if (DAC960_PartitionNumber(Command->BufferHeader->b_rdev) > 0) + if (DAC960_PartitionNumber(Command->BufferHeader->bi_dev) > 0) DAC960_Error(" /dev/rd/c%dd%dp%d: relative blocks %u..%u\n", Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, - DAC960_PartitionNumber(Command->BufferHeader->b_rdev), - Command->BufferHeader->b_rsector, - Command->BufferHeader->b_rsector + Command->BlockCount - 1); + DAC960_PartitionNumber(Command->BufferHeader->bi_dev), + Command->BufferHeader->bi_sector, + Command->BufferHeader->bi_sector + Command->BlockCount - 1); } @@ -4210,8 +4140,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, true); BufferHeader = NextBufferHeader; } @@ -4225,19 +4155,19 @@ else if (Command->V2.RequestSense.SenseKey == DAC960_SenseKey_MediumError && BufferHeader != NULL && - BufferHeader->b_reqnext != NULL) + BufferHeader->bi_next != NULL) { if (CommandType == DAC960_ReadCommand) Command->CommandType = DAC960_ReadRetryCommand; else Command->CommandType = DAC960_WriteRetryCommand; - Command->BlockCount = BufferHeader->b_size >> DAC960_BlockSizeBits; + Command->BlockCount = bio_size(BufferHeader) >> DAC960_BlockSizeBits; CommandMailbox->SCSI_10.CommandControlBits .AdditionalScatterGatherListMemory = false; CommandMailbox->SCSI_10.DataTransferSize = Command->BlockCount << DAC960_BlockSizeBits; CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0].SegmentDataPointer = - Virtual_to_Bus64(BufferHeader->b_data); + Virtual_to_Bus64(bio_data(BufferHeader)); CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0].SegmentByteCount = CommandMailbox->SCSI_10.DataTransferSize; @@ -4255,8 +4185,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, false); BufferHeader = NextBufferHeader; } @@ -4270,8 +4200,8 @@ else if (CommandType == DAC960_ReadRetryCommand || CommandType == DAC960_WriteRetryCommand) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; /* Perform completion processing for this single buffer. */ @@ -4286,16 +4216,16 @@ if (NextBufferHeader != NULL) { Command->BlockNumber += - BufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(BufferHeader) >> DAC960_BlockSizeBits; Command->BlockCount = - NextBufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(NextBufferHeader) >> DAC960_BlockSizeBits; Command->BufferHeader = NextBufferHeader; CommandMailbox->SCSI_10.DataTransferSize = Command->BlockCount << DAC960_BlockSizeBits; CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0] .SegmentDataPointer = - Virtual_to_Bus64(NextBufferHeader->b_data); + Virtual_to_Bus64(bio_data(NextBufferHeader)); CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0] .SegmentByteCount = @@ -5416,7 +5346,8 @@ int LogicalDriveNumber = DAC960_LogicalDriveNumber(Inode->i_rdev); DiskGeometry_T Geometry, *UserGeometry; DAC960_Controller_T *Controller; - int PartitionNumber; + int res; + if (File != NULL && (File->f_flags & O_NONBLOCK)) return DAC960_UserIOCTL(Inode, File, Request, Argument); if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1) @@ -5465,61 +5396,27 @@ LogicalDeviceInfo->ConfigurableDeviceSize / (Geometry.heads * Geometry.sectors); } - Geometry.start = - Controller->GenericDiskInfo.part[MINOR(Inode->i_rdev)].start_sect; + Geometry.start = get_start_sect(Inode->i_rdev); return (copy_to_user(UserGeometry, &Geometry, sizeof(DiskGeometry_T)) ? -EFAULT : 0); case BLKGETSIZE: - /* Get Device Size. */ - if ((unsigned long *) Argument == NULL) return -EINVAL; - return put_user(Controller->GenericDiskInfo.part[MINOR(Inode->i_rdev)] - .nr_sects, - (unsigned long *) Argument); case BLKGETSIZE64: - if ((u64 *) Argument == NULL) return -EINVAL; - return put_user((u64) Controller->GenericDiskInfo - .part[MINOR(Inode->i_rdev)] - .nr_sects << 9, - (u64 *) Argument); case BLKRAGET: case BLKRASET: case BLKFLSBUF: case BLKBSZGET: case BLKBSZSET: return blk_ioctl(Inode->i_rdev, Request, Argument); + case BLKRRPART: /* Re-Read Partition Table. */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (Controller->LogicalDriveUsageCount[LogicalDriveNumber] > 1) return -EBUSY; - for (PartitionNumber = 0; - PartitionNumber < DAC960_MaxPartitions; - PartitionNumber++) - { - KernelDevice_T Device = DAC960_KernelDevice(ControllerNumber, - LogicalDriveNumber, - PartitionNumber); - int MinorNumber = DAC960_MinorNumber(LogicalDriveNumber, - PartitionNumber); - if (Controller->GenericDiskInfo.part[MinorNumber].nr_sects == 0) - continue; - /* - Flush all changes and invalidate buffered state. - */ - invalidate_device(Device, 1); - /* - Clear existing partition sizes. - */ - if (PartitionNumber > 0) - { - Controller->GenericDiskInfo.part[MinorNumber].start_sect = 0; - Controller->GenericDiskInfo.part[MinorNumber].nr_sects = 0; - } - /* - Reset the Block Size so that the partition table can be read. - */ - set_blocksize(Device, BLOCK_SIZE); - } + res = wipe_partitions(Inode->i_rdev); + if (res) /* nothing */ + return res; + DAC960_RegisterDisk(Controller, LogicalDriveNumber); return 0; } @@ -5641,11 +5538,11 @@ while (Controller->V1.DirectCommandActive[DCDB.Channel] [DCDB.TargetID]) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&Controller->RequestQueue->queue_lock); __wait_event(Controller->CommandWaitQueue, !Controller->V1.DirectCommandActive [DCDB.Channel][DCDB.TargetID]); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&Controller->RequestQueue->queue_lock); } Controller->V1.DirectCommandActive[DCDB.Channel] [DCDB.TargetID] = true; diff -u --recursive --new-file v2.5.0/linux/drivers/block/DAC960.h linux/drivers/block/DAC960.h --- v2.5.0/linux/drivers/block/DAC960.h Wed Oct 17 14:46:29 2001 +++ linux/drivers/block/DAC960.h Tue Nov 27 09:23:27 2001 @@ -2191,7 +2191,7 @@ of the Linux Kernel and I/O Subsystem. */ -typedef struct buffer_head BufferHeader_T; +typedef struct bio BufferHeader_T; typedef struct file File_T; typedef struct block_device_operations BlockDeviceOperations_T; typedef struct completion Completion_T; @@ -2475,7 +2475,6 @@ DiskPartition_T DiskPartitions[DAC960_MinorCount]; int PartitionSizes[DAC960_MinorCount]; int BlockSizes[DAC960_MinorCount]; - int MaxSectorsPerRequest[DAC960_MinorCount]; unsigned char ProgressBuffer[DAC960_ProgressBufferSize]; unsigned char UserStatusBuffer[DAC960_UserMessageSize]; } @@ -2509,7 +2508,7 @@ void DAC960_AcquireControllerLock(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_lock_irqsave(&io_request_lock, *ProcessorFlags); + spin_lock_irqsave(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } @@ -2521,13 +2520,13 @@ void DAC960_ReleaseControllerLock(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags); + spin_unlock_irqrestore(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } /* DAC960_AcquireControllerLockRF acquires exclusive access to Controller, - but is only called from the request function with the io_request_lock held. + but is only called from the request function with the queue lock held. */ static inline @@ -2539,7 +2538,7 @@ /* DAC960_ReleaseControllerLockRF releases exclusive access to Controller, - but is only called from the request function with the io_request_lock held. + but is only called from the request function with the queue lock held. */ static inline @@ -2558,7 +2557,7 @@ void DAC960_AcquireControllerLockIH(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_lock_irqsave(&io_request_lock, *ProcessorFlags); + spin_lock_irqsave(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } @@ -2571,7 +2570,7 @@ void DAC960_ReleaseControllerLockIH(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags); + spin_unlock_irqrestore(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } diff -u --recursive --new-file v2.5.0/linux/drivers/block/Makefile linux/drivers/block/Makefile --- v2.5.0/linux/drivers/block/Makefile Sun Sep 9 12:00:55 2001 +++ linux/drivers/block/Makefile Tue Nov 27 09:23:27 2001 @@ -10,9 +10,9 @@ O_TARGET := block.o -export-objs := ll_rw_blk.o blkpg.o loop.o DAC960.o genhd.o +export-objs := elevator.o ll_rw_blk.o blkpg.o loop.o DAC960.o genhd.o -obj-y := ll_rw_blk.o blkpg.o genhd.o elevator.o +obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o diff -u --recursive --new-file v2.5.0/linux/drivers/block/acsi.c linux/drivers/block/acsi.c --- v2.5.0/linux/drivers/block/acsi.c Fri Nov 9 13:58:03 2001 +++ linux/drivers/block/acsi.c Tue Nov 27 09:23:27 2001 @@ -1011,7 +1011,6 @@ goto repeat; } - block += acsi_part[dev].start_sect; target = acsi_info[DEVICE_NR(dev)].target; lun = acsi_info[DEVICE_NR(dev)].lun; @@ -1123,7 +1122,7 @@ put_user( 64, &geo->heads ); put_user( 32, &geo->sectors ); put_user( acsi_info[dev].size >> 11, &geo->cylinders ); - put_user( acsi_part[MINOR(inode->i_rdev)].start_sect, &geo->start ); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; } @@ -1852,7 +1851,7 @@ { int device; struct gendisk * gdev; - int max_p, start, i; + int res; struct acsi_info_struct *aip; device = DEVICE_NR(MINOR(dev)); @@ -1867,16 +1866,7 @@ DEVICE_BUSY = 1; sti(); - max_p = gdev->max_p; - start = device << gdev->minor_shift; - - for( i = max_p - 1; i >= 0 ; i-- ) { - if (gdev->part[start + i].nr_sects != 0) { - invalidate_device(MKDEV(MAJOR_NR, start + i), 1); - gdev->part[start + i].nr_sects = 0; - } - gdev->part[start+i].start_sect = 0; - }; + res = wipe_partitions(dev); stdma_lock( NULL, NULL ); @@ -1891,12 +1881,13 @@ ENABLE_IRQ(); stdma_release(); - - grok_partitions(gdev, device, (aip->type==HARDDISK)?1<<4:1, aip->size); + + if (!res) + grok_partitions(dev, aip->size); DEVICE_BUSY = 0; wake_up(&busy_wait); - return 0; + return res; } diff -u --recursive --new-file v2.5.0/linux/drivers/block/amiflop.c linux/drivers/block/amiflop.c --- v2.5.0/linux/drivers/block/amiflop.c Thu Oct 25 13:58:34 2001 +++ linux/drivers/block/amiflop.c Tue Nov 27 09:23:27 2001 @@ -1895,10 +1895,9 @@ free_irq(IRQ_AMIGA_DSKBLK, NULL); custom.dmacon = DMAF_DISK; /* disable DMA */ amiga_chip_free(raw_buf); - blk_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); release_mem_region(CUSTOM_PHYSADDR+0x20, 8); unregister_blkdev(MAJOR_NR, "fd"); + blk_clear(MAJOR_NR); } #endif diff -u --recursive --new-file v2.5.0/linux/drivers/block/blkpg.c linux/drivers/block/blkpg.c --- v2.5.0/linux/drivers/block/blkpg.c Sun Nov 11 10:20:21 2001 +++ linux/drivers/block/blkpg.c Tue Nov 27 09:23:27 2001 @@ -63,7 +63,8 @@ * or has the same number as an existing one * 0: all OK. */ -int add_partition(kdev_t dev, struct blkpg_partition *p) { +int add_partition(kdev_t dev, struct blkpg_partition *p) +{ struct gendisk *g; long long ppstart, pplength; long pstart, plength; @@ -123,7 +124,8 @@ * * Note that the dev argument refers to the entire disk, not the partition. */ -int del_partition(kdev_t dev, struct blkpg_partition *p) { +int del_partition(kdev_t dev, struct blkpg_partition *p) +{ struct gendisk *g; kdev_t devp; int drive, first_minor, minor; @@ -195,9 +197,10 @@ int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) { + request_queue_t *q; struct gendisk *g; u64 ullval = 0; - int intval; + int intval, *iptr; if (!dev) return -EINVAL; @@ -226,8 +229,26 @@ return -EINVAL; return put_user(read_ahead[MAJOR(dev)], (long *) arg); + case BLKFRASET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (!(iptr = max_readahead[MAJOR(dev)])) + return -EINVAL; + iptr[MINOR(dev)] = arg; + return 0; + + case BLKFRAGET: + if (!(iptr = max_readahead[MAJOR(dev)])) + return -EINVAL; + return put_user(iptr[MINOR(dev)], (long *) arg); + + case BLKSECTGET: + if ((q = blk_get_queue(dev))) + return put_user(q->max_sectors, (unsigned short *)arg); + return -EINVAL; + case BLKFLSBUF: - if(!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EACCES; fsync_dev(dev); invalidate_buffers(dev); @@ -246,8 +267,7 @@ if (cmd == BLKGETSIZE) return put_user((unsigned long)ullval, (unsigned long *)arg); - else - return put_user(ullval, (u64 *)arg); + return put_user(ullval, (u64 *)arg); #if 0 case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) @@ -258,34 +278,38 @@ case BLKPG: return blkpg_ioctl(dev, (struct blkpg_ioctl_arg *) arg); + /* + * deprecated, use the /proc/iosched interface instead + */ case BLKELVGET: - return blkelvget_ioctl(&blk_get_queue(dev)->elevator, - (blkelv_ioctl_arg_t *) arg); case BLKELVSET: - return blkelvset_ioctl(&blk_get_queue(dev)->elevator, - (blkelv_ioctl_arg_t *) arg); + return -ENOTTY; + + case BLKHASHPROF: + case BLKHASHCLEAR: + return bio_ioctl(dev, cmd, arg); case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */ intval = BLOCK_SIZE; if (blksize_size[MAJOR(dev)]) intval = blksize_size[MAJOR(dev)][MINOR(dev)]; - return put_user (intval, (int *) arg); + return put_user(intval, (int *) arg); case BLKBSZSET: /* set the logical block size */ - if (!capable (CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (!dev || !arg) + if (!arg) return -EINVAL; - if (get_user (intval, (int *) arg)) + if (get_user(intval, (int *) arg)) return -EFAULT; if (intval > PAGE_SIZE || intval < 512 || (intval & (intval - 1))) return -EINVAL; - if (is_mounted (dev) || is_swap_partition (dev)) + if (is_mounted(dev) || is_swap_partition(dev)) return -EBUSY; - set_blocksize (dev, intval); + set_blocksize(dev, intval); return 0; default: diff -u --recursive --new-file v2.5.0/linux/drivers/block/cciss.c linux/drivers/block/cciss.c --- v2.5.0/linux/drivers/block/cciss.c Fri Nov 9 14:28:46 2001 +++ linux/drivers/block/cciss.c Tue Nov 27 09:23:27 2001 @@ -84,7 +84,7 @@ #define MAX_CONFIG_WAIT 1000 #define READ_AHEAD 128 -#define NR_CMDS 128 /* #commands that can be outstanding */ +#define NR_CMDS 384 /* #commands that can be outstanding */ #define MAX_CTLR 8 #define CCISS_DMA_MASK 0xFFFFFFFF /* 32 bit DMA */ @@ -147,7 +147,6 @@ " IRQ: %d\n" " Logical drives: %d\n" " Current Q depth: %d\n" - " Current # commands on controller %d\n" " Max Q depth since init: %d\n" " Max # commands on controller since init: %d\n" " Max SG entries since init: %d\n\n", @@ -158,8 +157,7 @@ (unsigned long)h->vaddr, (unsigned int)h->intr, h->num_luns, - h->Qdepth, h->commands_outstanding, - h->maxQsinceinit, h->max_outstanding, h->maxSG); + h->Qdepth, h->maxQsinceinit, h->max_outstanding, h->maxSG); pos += size; len += size; for(i=0; inum_luns; i++) { @@ -237,7 +235,7 @@ i = find_first_zero_bit(h->cmd_pool_bits, NR_CMDS); if (i == NR_CMDS) return NULL; - } while(test_and_set_bit(i%32, h->cmd_pool_bits+(i/32)) != 0); + } while(test_and_set_bit(i & 31, h->cmd_pool_bits+(i/32)) != 0); #ifdef CCISS_DEBUG printk(KERN_DEBUG "cciss: using command buffer %d\n", i); #endif @@ -308,13 +306,10 @@ /* for each partition */ for(j=0; jblocksizes[(i<hardsizes[ (i<block_size; - } hba[ctlr]->gendisk.nr_real++; + (BLK_DEFAULT_QUEUE(MAJOR_NR + ctlr))->hardsect_size = drv->block_size; } } /* @@ -377,8 +372,6 @@ { int ctlr = MAJOR(inode->i_rdev) - MAJOR_NR; int dsk = MINOR(inode->i_rdev) >> NWD_SHIFT; - int diskinfo[4]; - struct hd_geometry *geo = (struct hd_geometry *)arg; #ifdef CCISS_DEBUG printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg); @@ -386,6 +379,10 @@ switch(cmd) { case HDIO_GETGEO: + { + struct hd_geometry *geo = (struct hd_geometry *)arg; + int diskinfo[4]; + if (hba[ctlr]->drv[dsk].cylinders) { diskinfo[0] = hba[ctlr]->drv[dsk].heads; diskinfo[1] = hba[ctlr]->drv[dsk].sectors; @@ -393,20 +390,18 @@ } else { diskinfo[0] = 0xff; diskinfo[1] = 0x3f; - diskinfo[2] = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f); } + diskinfo[2] = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f); + } put_user(diskinfo[0], &geo->heads); put_user(diskinfo[1], &geo->sectors); put_user(diskinfo[2], &geo->cylinders); - put_user(hba[ctlr]->hd[MINOR(inode->i_rdev)].start_sect, &geo->start); - return 0; - case BLKGETSIZE: - put_user(hba[ctlr]->hd[MINOR(inode->i_rdev)].nr_sects, (unsigned long *)arg); - return 0; - case BLKGETSIZE64: - put_user((u64)hba[ctlr]->hd[MINOR(inode->i_rdev)].nr_sects << 9, (u64*)arg); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; + } case BLKRRPART: return revalidate_logvol(inode->i_rdev, 1); + case BLKGETSIZE: + case BLKGETSIZE64: case BLKFLSBUF: case BLKBSZSET: case BLKBSZGET: @@ -415,9 +410,7 @@ case BLKRASET: case BLKRAGET: case BLKPG: - case BLKELVGET: - case BLKELVSET: - return( blk_ioctl(inode->i_rdev, cmd, arg)); + return blk_ioctl(inode->i_rdev, cmd, arg); case CCISS_GETPCIINFO: { cciss_pci_info_struct pciinfo; @@ -459,16 +452,7 @@ // printk("cciss_ioctl: delay and count cannot be 0\n"); return( -EINVAL); } - spin_lock_irqsave(&io_request_lock, flags); - /* Can only safely update if no commands outstanding */ - if (c->commands_outstanding > 0 ) - { -// printk("cciss_ioctl: cannot change coalasing " -// "%d commands outstanding on controller\n", -// c->commands_outstanding); - spin_unlock_irqrestore(&io_request_lock, flags); - return(-EINVAL); - } + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); /* Update the field, and then ring the doorbell */ writel( intinfo.delay, &(c->cfgtable->HostWrite.CoalIntDelay)); @@ -484,7 +468,7 @@ /* delay and try again */ udelay(1000); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); if (i >= MAX_CONFIG_WAIT) return( -EFAULT); return(0); @@ -515,7 +499,7 @@ if (copy_from_user(NodeName, (void *) arg, sizeof( NodeName_type))) return -EFAULT; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); /* Update the field, and then ring the doorbell */ for(i=0;i<16;i++) @@ -531,7 +515,7 @@ /* delay and try again */ udelay(1000); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); if (i >= MAX_CONFIG_WAIT) return( -EFAULT); return(0); @@ -658,11 +642,11 @@ c->SG[0].Ext = 0; // we are not chaining } /* Put the request on the tail of the request queue */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); addQ(&h->reqQ, c); h->Qdepth++; start_io(h); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); /* Wait for completion */ while(c->cmd_type != CMD_IOCTL_DONE) @@ -710,42 +694,32 @@ int ctlr, target; struct gendisk *gdev; unsigned long flags; - int max_p; - int start; - int i; + int res; target = MINOR(dev) >> NWD_SHIFT; ctlr = MAJOR(dev) - MAJOR_NR; gdev = &(hba[ctlr]->gendisk); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); if (hba[ctlr]->drv[target].usage_count > maxusage) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); printk(KERN_WARNING "cciss: Device busy for " "revalidation (usage=%d)\n", hba[ctlr]->drv[target].usage_count); return -EBUSY; } hba[ctlr]->drv[target].usage_count++; - spin_unlock_irqrestore(&io_request_lock, flags); - - max_p = gdev->max_p; - start = target << gdev->minor_shift; + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); - for(i=max_p-1; i>=0; i--) { - int minor = start+i; - invalidate_device(MKDEV(MAJOR_NR + ctlr, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; + res = wipe_partitions(dev); + if (res) + goto leave; - /* reset the blocksize so we can read the partition table */ - blksize_size[MAJOR_NR+ctlr][minor] = 1024; - } /* setup partitions per disk */ - grok_partitions(gdev, target, MAX_PART, - hba[ctlr]->drv[target].nr_blocks); + grok_partitions(dev, hba[ctlr]->drv[target].nr_blocks); +leave: hba[ctlr]->drv[target].usage_count--; - return 0; + return res; } static int frevalidate_logvol(kdev_t dev) @@ -776,15 +750,15 @@ if (MINOR(dev) != 0) return -ENXIO; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); if (hba[ctlr]->usage_count > 1) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); printk(KERN_WARNING "cciss: Device busy for volume" " revalidation (usage=%d)\n", hba[ctlr]->usage_count); return -EBUSY; } - spin_unlock_irqrestore(&io_request_lock, flags); hba[ctlr]->usage_count++; + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); /* * Set the partition and block size structures for all volumes @@ -793,7 +767,6 @@ memset(hba[ctlr]->hd, 0, sizeof(struct hd_struct) * 256); memset(hba[ctlr]->sizes, 0, sizeof(int) * 256); memset(hba[ctlr]->blocksizes, 0, sizeof(int) * 256); - memset(hba[ctlr]->hardsizes, 0, sizeof(int) * 256); memset(hba[ctlr]->drv, 0, sizeof(drive_info_struct) * CISS_MAX_LUN); hba[ctlr]->gendisk.nr_real = 0; @@ -1089,11 +1062,11 @@ while(( c = h->reqQ) != NULL ) { /* can't do anything if fifo is full */ - if ((h->access.fifo_full(h))) - { - printk(KERN_WARNING "cciss: fifo full \n"); - return; + if ((h->access.fifo_full(h))) { + printk(KERN_WARNING "cciss: fifo full\n"); + break; } + /* Get the frist entry from the Request Q */ removeQ(&(h->reqQ), c); h->Qdepth--; @@ -1106,18 +1079,18 @@ } } -static inline void complete_buffers( struct buffer_head *bh, int status) +static inline void complete_buffers(struct bio *bio, int status) { - struct buffer_head *xbh; - - while(bh) - { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, status); - bh = xbh; + while (bio) { + int nsecs = bio_sectors(bio); + + struct bio *xbh = bio->bi_next; + bio->bi_next = NULL; + blk_finished_io(nsecs); + bio_endio(bio, status, nsecs); + bio = xbh; } + } /* checks the status of the job and calls complete buffers to mark all * buffers for the completed job. @@ -1135,7 +1108,7 @@ { temp64.val32.lower = cmd->SG[i].Addr.lower; temp64.val32.upper = cmd->SG[i].Addr.upper; - pci_unmap_single(hba[cmd->ctlr]->pdev, + pci_unmap_page(hba[cmd->ctlr]->pdev, temp64.val, cmd->SG[i].Len, (cmd->Request.Type.Direction == XFER_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); @@ -1214,83 +1187,38 @@ status=0; } } - complete_buffers(cmd->rq->bh, status); + + complete_buffers(cmd->rq->bio, status); #ifdef CCISS_DEBUG printk("Done with %p\n", cmd->rq); #endif /* CCISS_DEBUG */ - end_that_request_last(cmd->rq); -} - - -static inline int cpq_new_segment(request_queue_t *q, struct request *rq, - int max_segments) -{ - if (rq->nr_segments < MAXSGENTRIES) { - rq->nr_segments++; - return 1; - } - return 0; -} -static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (bh->b_data + bh->b_size == rq->bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, - struct request *nxt, int max_segments) -{ - int total_segments = rq->nr_segments + nxt->nr_segments; - - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) - total_segments--; - - if (total_segments > MAXSGENTRIES) - return 0; - - rq->nr_segments = total_segments; - return 1; + end_that_request_last(cmd->rq); } /* * Get a request and submit it to the controller. - * Currently we do one request at a time. Ideally we would like to send - * everything to the controller on the first call, but there is a danger - * of holding the io_request_lock for to long. */ static void do_cciss_request(request_queue_t *q) { ctlr_info_t *h= q->queuedata; CommandList_struct *c; int log_unit, start_blk, seg; - char *lastdataend; - struct buffer_head *bh; struct list_head *queue_head = &q->queue_head; struct request *creq; u64bit temp64; - struct my_sg tmp_sg[MAXSGENTRIES]; - int i; + struct scatterlist tmp_sg[MAXSGENTRIES]; + int i, dir; - if (q->plugged) + if (blk_queue_plugged(q)) goto startio; -queue_next: +queue: if (list_empty(queue_head)) goto startio; - creq = blkdev_entry_next_request(queue_head); + creq = elv_next_request(q); if (creq->nr_segments > MAXSGENTRIES) BUG(); @@ -1299,7 +1227,7 @@ printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq->bio, 0); end_that_request_last(creq); goto startio; } @@ -1309,10 +1237,9 @@ blkdev_dequeue_request(creq); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); - c->cmd_type = CMD_RWREQ; - bh = creq->bh; + c->cmd_type = CMD_RWREQ; c->rq = creq; /* fill in the request */ @@ -1328,41 +1255,26 @@ (creq->cmd == READ) ? XFER_READ: XFER_WRITE; c->Request.Timeout = 0; // Don't time out c->Request.CDB[0] = (creq->cmd == READ) ? CCISS_READ : CCISS_WRITE; - start_blk = hba[h->ctlr]->hd[MINOR(creq->rq_dev)].start_sect + creq->sector; + start_blk = creq->sector; #ifdef CCISS_DEBUG - if (bh == NULL) - panic("cciss: bh== NULL?"); printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",(int) creq->sector, (int) creq->nr_sectors); #endif /* CCISS_DEBUG */ - seg = 0; - lastdataend = NULL; - while(bh) - { - if (bh->b_data == lastdataend) - { // tack it on to the last segment - tmp_sg[seg-1].len +=bh->b_size; - lastdataend += bh->b_size; - } else - { - if (seg == MAXSGENTRIES) - BUG(); - tmp_sg[seg].len = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; - seg++; - } - bh = bh->b_reqnext; - } + + seg = blk_rq_map_sg(q, creq, tmp_sg); + /* get the DMA records for the setup */ + if (c->Request.Type.Direction == XFER_READ) + dir = PCI_DMA_FROMDEVICE; + else + dir = PCI_DMA_TODEVICE; + for (i=0; iSG[i].Len = tmp_sg[i].len; - temp64.val = (__u64) pci_map_single( h->pdev, - tmp_sg[i].start_addr, - tmp_sg[i].len, - (c->Request.Type.Direction == XFER_READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + c->SG[i].Len = tmp_sg[i].length; + temp64.val = (__u64) pci_map_page(h->pdev, tmp_sg[i].page, + tmp_sg[i].offset, tmp_sg[i].length, + dir); c->SG[i].Addr.lower = temp64.val32.lower; c->SG[i].Addr.upper = temp64.val32.upper; c->SG[i].Ext = 0; // we are not chaining @@ -1386,14 +1298,14 @@ c->Request.CDB[8]= creq->nr_sectors & 0xff; c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0; - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); addQ(&(h->reqQ),c); h->Qdepth++; if(h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; - goto queue_next; + goto queue; startio: start_io(h); } @@ -1414,7 +1326,7 @@ * If there are completed commands in the completion queue, * we had better do something about it. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); while( h->access.intr_pending(h)) { while((a = h->access.command_completed(h)) != FIFO_EMPTY) @@ -1447,11 +1359,12 @@ } } } + /* * See if we can queue up some more IO */ do_cciss_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); } /* * We cannot read the structure directly, for portablity we must use @@ -1873,7 +1786,18 @@ sprintf(hba[i]->devname, "cciss%d", i); hba[i]->ctlr = i; hba[i]->pdev = pdev; - + + /* configure PCI DMA stuff */ + if (!pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff)) + printk("cciss: using DAC cycles\n"); + else if (!pci_set_dma_mask(pdev, 0xffffffff)) + printk("cciss: not using DAC cycles\n"); + else { + printk("cciss: no suitable DMA available\n"); + free_hba(i); + return -ENODEV; + } + if( register_blkdev(MAJOR_NR+i, hba[i]->devname, &cciss_fops)) { printk(KERN_ERR "cciss: Unable to get major number " @@ -1942,20 +1866,16 @@ q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); q->queuedata = hba[i]; - blk_init_queue(q, do_cciss_request); + blk_init_queue(q, do_cciss_request, hba[i]->devname); blk_queue_headactive(q, 0); + blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask); + q->max_segments = MAXSGENTRIES; + blk_queue_max_sectors(q, 512); /* fill in the other Kernel structs */ blksize_size[MAJOR_NR+i] = hba[i]->blocksizes; - hardsect_size[MAJOR_NR+i] = hba[i]->hardsizes; read_ahead[MAJOR_NR+i] = READ_AHEAD; - /* Set the pointers to queue functions */ - q->back_merge_fn = cpq_back_merge_fn; - q->front_merge_fn = cpq_front_merge_fn; - q->merge_requests_fn = cpq_merge_requests_fn; - - /* Fill in the gendisk data */ hba[i]->gendisk.major = MAJOR_NR + i; hba[i]->gendisk.major_name = "cciss"; @@ -2004,12 +1924,11 @@ unregister_blkdev(MAJOR_NR+i, hba[i]->devname); remove_proc_entry(hba[i]->devname, proc_cciss); - /* remove it from the disk list */ del_gendisk(&(hba[i]->gendisk)); - pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), - hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); + pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), + hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof( ErrorInfo_struct), hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle); kfree(hba[i]->cmd_pool_bits); @@ -2017,32 +1936,31 @@ } static struct pci_driver cciss_pci_driver = { - name: "cciss", - probe: cciss_init_one, - remove: cciss_remove_one, - id_table: cciss_pci_device_id, /* id_table */ + name: "cciss", + probe: cciss_init_one, + remove: cciss_remove_one, + id_table: cciss_pci_device_id, /* id_table */ }; /* -* This is it. Register the PCI driver information for the cards we control -* the OS will call our registered routines when it finds one of our cards. -*/ + * This is it. Register the PCI driver information for the cards we control + * the OS will call our registered routines when it finds one of our cards. + */ int __init cciss_init(void) { - printk(KERN_INFO DRIVER_NAME "\n"); + /* Register for out PCI devices */ if (pci_register_driver(&cciss_pci_driver) > 0 ) return 0; else return -ENODEV; - } +} EXPORT_NO_SYMBOLS; static int __init init_cciss_module(void) { - return ( cciss_init()); } diff -u --recursive --new-file v2.5.0/linux/drivers/block/cciss.h linux/drivers/block/cciss.h --- v2.5.0/linux/drivers/block/cciss.h Tue May 22 10:23:16 2001 +++ linux/drivers/block/cciss.h Tue Nov 27 09:23:27 2001 @@ -15,11 +15,6 @@ #define MAJOR_NR COMPAQ_CISS_MAJOR -struct my_sg { - int len; - char *start_addr; -}; - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; @@ -85,9 +80,8 @@ struct gendisk gendisk; // indexed by minor numbers struct hd_struct hd[256]; - int sizes[256]; + int sizes[256]; int blocksizes[256]; - int hardsizes[256]; }; /* Defining the diffent access_menthods */ @@ -247,5 +241,8 @@ char *product_name; struct access_method *access; }; + +#define CCISS_LOCK(i) (&((BLK_DEFAULT_QUEUE(MAJOR_NR + i))->queue_lock)) + #endif /* CCISS_H */ diff -u --recursive --new-file v2.5.0/linux/drivers/block/cciss_cmd.h linux/drivers/block/cciss_cmd.h --- v2.5.0/linux/drivers/block/cciss_cmd.h Fri Nov 2 17:45:42 2001 +++ linux/drivers/block/cciss_cmd.h Tue Nov 27 09:23:27 2001 @@ -7,7 +7,7 @@ //general boundary defintions #define SENSEINFOBYTES 32//note that this value may vary between host implementations -#define MAXSGENTRIES 31 +#define MAXSGENTRIES 32 #define MAXREPLYQS 256 //Command Status value diff -u --recursive --new-file v2.5.0/linux/drivers/block/cpqarray.c linux/drivers/block/cpqarray.c --- v2.5.0/linux/drivers/block/cpqarray.c Fri Nov 9 14:28:46 2001 +++ linux/drivers/block/cpqarray.c Tue Nov 27 09:23:27 2001 @@ -100,7 +100,6 @@ static struct hd_struct * ida; static int * ida_sizes; static int * ida_blocksizes; -static int * ida_hardsizes; static struct gendisk ida_gendisk[MAX_CTLR]; static struct proc_dir_entry *proc_array; @@ -145,7 +144,7 @@ static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c); static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c); -static inline void complete_buffers(struct buffer_head *bh, int ok); +static inline void complete_buffers(struct bio *bio, int ok); static inline void complete_command(cmdlist_t *cmd, int timeout); static void do_ida_intr(int irq, void *dev_id, struct pt_regs * regs); @@ -176,12 +175,11 @@ ida_sizes[(ctlr<nr_blks; - for(j=0; j<16; j++) { + for(j=0; j<16; j++) ida_blocksizes[(ctlr<blk_size; - } + + (BLK_DEFAULT_QUEUE(MAJOR_NR + ctlr))->hardsect_size = drv->blk_size; ida_gendisk[ctlr].nr_real++; } @@ -341,52 +339,10 @@ remove_proc_entry("cpqarray", proc_root_driver); kfree(ida); kfree(ida_sizes); - kfree(ida_hardsizes); kfree(ida_blocksizes); } #endif /* MODULE */ -static inline int cpq_new_segment(request_queue_t *q, struct request *rq, - int max_segments) -{ - if (rq->nr_segments < SG_MAX) { - rq->nr_segments++; - return 1; - } - return 0; -} - -static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (bh->b_data + bh->b_size == rq->bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, - struct request *nxt, int max_segments) -{ - int total_segments = rq->nr_segments + nxt->nr_segments; - - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) - total_segments--; - - if (total_segments > SG_MAX) - return 0; - - rq->nr_segments = total_segments; - return 1; -} - /* * This is it. Find all the controllers and register them. I really hate * stealing all these major device numbers. @@ -433,20 +389,9 @@ return(num_cntlrs_reg); } - ida_hardsizes = kmalloc(sizeof(int)*nr_ctlr*NWD*16, GFP_KERNEL); - if(ida_hardsizes==NULL) - { - kfree(ida); - kfree(ida_sizes); - kfree(ida_blocksizes); - printk( KERN_ERR "cpqarray: out of memory"); - return(num_cntlrs_reg); - } - memset(ida, 0, sizeof(struct hd_struct)*nr_ctlr*NWD*16); memset(ida_sizes, 0, sizeof(int)*nr_ctlr*NWD*16); memset(ida_blocksizes, 0, sizeof(int)*nr_ctlr*NWD*16); - memset(ida_hardsizes, 0, sizeof(int)*nr_ctlr*NWD*16); memset(ida_gendisk, 0, sizeof(struct gendisk)*MAX_CTLR); /* @@ -504,7 +449,6 @@ { kfree(ida); kfree(ida_sizes); - kfree(ida_hardsizes); kfree(ida_blocksizes); } return(num_cntlrs_reg); @@ -523,16 +467,13 @@ q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); q->queuedata = hba[i]; - blk_init_queue(q, do_ida_request); + blk_init_queue(q, do_ida_request, hba[i]->devname); blk_queue_headactive(q, 0); + blk_queue_bounce_limit(q, hba[i]->pci_dev->dma_mask); + q->max_segments = SG_MAX; blksize_size[MAJOR_NR+i] = ida_blocksizes + (i*256); - hardsect_size[MAJOR_NR+i] = ida_hardsizes + (i*256); read_ahead[MAJOR_NR+i] = READ_AHEAD; - q->back_merge_fn = cpq_back_merge_fn; - q->front_merge_fn = cpq_front_merge_fn; - q->merge_requests_fn = cpq_merge_requests_fn; - ida_gendisk[i].major = MAJOR_NR + i; ida_gendisk[i].major_name = "ida"; ida_gendisk[i].minor_shift = NWD_SHIFT; @@ -911,21 +852,19 @@ { ctlr_info_t *h = q->queuedata; cmdlist_t *c; - char *lastdataend; struct list_head * queue_head = &q->queue_head; - struct buffer_head *bh; struct request *creq; - struct my_sg tmp_sg[SG_MAX]; - int i, seg; + struct scatterlist tmp_sg[SG_MAX]; + int i, dir, seg; - if (q->plugged) + if (blk_queue_plugged(q)) goto startio; queue_next: if (list_empty(queue_head)) goto startio; - creq = blkdev_entry_next_request(queue_head); + creq = elv_next_request(q); if (creq->nr_segments > SG_MAX) BUG(); @@ -934,7 +873,7 @@ printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq->bio, 0); end_that_request_last(creq); goto startio; } @@ -944,55 +883,40 @@ blkdev_dequeue_request(creq); - spin_unlock_irq(&io_request_lock); - - bh = creq->bh; + spin_unlock_irq(&q->queue_lock); c->ctlr = h->ctlr; c->hdr.unit = MINOR(creq->rq_dev) >> NWD_SHIFT; c->hdr.size = sizeof(rblk_t) >> 2; c->size += sizeof(rblk_t); - c->req.hdr.blk = ida[(h->ctlr<rq_dev)].start_sect + creq->sector; + c->req.hdr.blk = creq->sector; c->rq = creq; DBGPX( - if (bh == NULL) - panic("bh == NULL?"); - printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors); ); - seg = 0; lastdataend = NULL; - while(bh) { - if (bh->b_data == lastdataend) { - tmp_sg[seg-1].size += bh->b_size; - lastdataend += bh->b_size; - } else { - if (seg == SG_MAX) - BUG(); - tmp_sg[seg].size = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; - seg++; - } - bh = bh->b_reqnext; - } + seg = blk_rq_map_sg(q, creq, tmp_sg); + /* Now do all the DMA Mappings */ + if (creq->cmd == READ) + dir = PCI_DMA_FROMDEVICE; + else + dir = PCI_DMA_TODEVICE; for( i=0; i < seg; i++) { - c->req.sg[i].size = tmp_sg[i].size; - c->req.sg[i].addr = (__u32) pci_map_single( - h->pci_dev, tmp_sg[i].start_addr, - tmp_sg[i].size, - (creq->cmd == READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + c->req.sg[i].size = tmp_sg[i].length; + c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev, + tmp_sg[i].page, + tmp_sg[i].offset, + tmp_sg[i].length, dir); } -DBGPX( printk("Submitting %d sectors in %d segments\n", sect, seg); ); +DBGPX( printk("Submitting %d sectors in %d segments\n", creq->nr_sectors, seg); ); c->req.hdr.sg_cnt = seg; c->req.hdr.blk_cnt = creq->nr_sectors; c->req.hdr.cmd = (creq->cmd == READ) ? IDA_READ : IDA_WRITE; c->type = CMD_RWREQ; - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); /* Put the request on the tail of the request queue */ addQ(&h->reqQ, c); @@ -1033,17 +957,19 @@ } } -static inline void complete_buffers(struct buffer_head *bh, int ok) +static inline void complete_buffers(struct bio *bio, int ok) { - struct buffer_head *xbh; - while(bh) { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; + struct bio *xbh; + while(bio) { + int nsecs = bio_sectors(bio); + + xbh = bio->bi_next; + bio->bi_next = NULL; - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, ok); + blk_finished_io(nsecs); + bio_endio(bio, ok, nsecs); - bh = xbh; + bio = xbh; } } /* @@ -1052,7 +978,7 @@ static inline void complete_command(cmdlist_t *cmd, int timeout) { int ok=1; - int i; + int i, ddir; if (cmd->req.hdr.rcode & RCODE_NONFATAL && (hba[cmd->ctlr]->misc_tflags & MISC_NONFATAL_WARN) == 0) { @@ -1074,19 +1000,18 @@ } if (timeout) ok = 0; /* unmap the DMA mapping for all the scatter gather elements */ + if (cmd->req.hdr.cmd == IDA_READ) + ddir = PCI_DMA_FROMDEVICE; + else + ddir = PCI_DMA_TODEVICE; for(i=0; ireq.hdr.sg_cnt; i++) - { - pci_unmap_single(hba[cmd->ctlr]->pci_dev, - cmd->req.sg[i].addr, cmd->req.sg[i].size, - (cmd->req.hdr.cmd == IDA_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); - } + pci_unmap_page(hba[cmd->ctlr]->pci_dev, cmd->req.sg[i].addr, + cmd->req.sg[i].size, ddir); - complete_buffers(cmd->rq->bh, ok); + complete_buffers(cmd->rq->bio, ok); - DBGPX(printk("Done with %p\n", cmd->rq);); + DBGPX(printk("Done with %p\n", cmd->rq);); end_that_request_last(cmd->rq); - - } /* @@ -1111,7 +1036,7 @@ * If there are completed commands in the completion queue, * we had better do something about it. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); if (istat & FIFO_NOT_EMPTY) { while((a = h->access.command_completed(h))) { a1 = a; a &= ~3; @@ -1155,7 +1080,7 @@ * See if we can queue up some more IO */ do_ida_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); } /* @@ -1201,14 +1126,10 @@ put_user(diskinfo[0], &geo->heads); put_user(diskinfo[1], &geo->sectors); put_user(diskinfo[2], &geo->cylinders); - put_user(ida[(ctlr<i_rdev)].start_sect, &geo->start); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; case IDAGETDRVINFO: return copy_to_user(&io->c.drv,&hba[ctlr]->drv[dsk],sizeof(drv_info_t)); - case BLKGETSIZE: - return put_user(ida[(ctlr<i_rdev)].nr_sects, (unsigned long *)arg); - case BLKGETSIZE64: - return put_user((u64)(ida[(ctlr<i_rdev)].nr_sects) << 9, (u64*)arg); case BLKRRPART: return revalidate_logvol(inode->i_rdev, 1); case IDAPASSTHRU: @@ -1244,6 +1165,8 @@ return(0); } + case BLKGETSIZE: + case BLKGETSIZE64: case BLKFLSBUF: case BLKBSZSET: case BLKBSZGET: @@ -1251,8 +1174,6 @@ case BLKROGET: case BLKRASET: case BLKRAGET: - case BLKELVGET: - case BLKELVSET: case BLKPG: return blk_ioctl(inode->i_rdev, cmd, arg); @@ -1352,11 +1273,11 @@ } /* Put the request on the tail of the request queue */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); addQ(&h->reqQ, c); h->Qdepth++; start_io(h); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); /* Wait for completion */ while(c->type != CMD_IOCTL_DONE) @@ -1570,15 +1491,15 @@ if (MINOR(dev) != 0) return -ENXIO; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); if (hba[ctlr]->usage_count > 1) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); printk(KERN_WARNING "cpqarray: Device busy for volume" " revalidation (usage=%d)\n", hba[ctlr]->usage_count); return -EBUSY; } - spin_unlock_irqrestore(&io_request_lock, flags); hba[ctlr]->usage_count++; + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); /* * Set the partition and block size structures for all volumes @@ -1587,7 +1508,6 @@ memset(ida+(ctlr*256), 0, sizeof(struct hd_struct)*NWD*16); memset(ida_sizes+(ctlr*256), 0, sizeof(int)*NWD*16); memset(ida_blocksizes+(ctlr*256), 0, sizeof(int)*NWD*16); - memset(ida_hardsizes+(ctlr*256), 0, sizeof(int)*NWD*16); memset(hba[ctlr]->drv, 0, sizeof(drv_info_t)*NWD); ida_gendisk[ctlr].nr_real = 0; @@ -1615,17 +1535,15 @@ int ctlr, target; struct gendisk *gdev; unsigned long flags; - int max_p; - int start; - int i; + int res; target = DEVICE_NR(dev); ctlr = MAJOR(dev) - MAJOR_NR; gdev = &ida_gendisk[ctlr]; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); if (hba[ctlr]->drv[target].usage_count > maxusage) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); printk(KERN_WARNING "cpqarray: Device busy for " "revalidation (usage=%d)\n", hba[ctlr]->drv[target].usage_count); @@ -1633,25 +1551,14 @@ } hba[ctlr]->drv[target].usage_count++; - spin_unlock_irqrestore(&io_request_lock, flags); - - max_p = gdev->max_p; - start = target << gdev->minor_shift; + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); - for(i=max_p-1; i>=0; i--) { - int minor = start+i; - invalidate_device(MKDEV(MAJOR_NR + ctlr, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; + res = wipe_partitions(dev); + if (!res) + grok_partitions(dev, hba[ctlr]->drv[target].nr_blks); - /* reset the blocksize so we can read the partition table */ - blksize_size[MAJOR_NR+ctlr][minor] = 1024; - } - - /* 16 minors per disk... */ - grok_partitions(gdev, target, 16, hba[ctlr]->drv[target].nr_blks); hba[ctlr]->drv[target].usage_count--; - return 0; + return res; } diff -u --recursive --new-file v2.5.0/linux/drivers/block/cpqarray.h linux/drivers/block/cpqarray.h --- v2.5.0/linux/drivers/block/cpqarray.h Tue May 22 10:23:16 2001 +++ linux/drivers/block/cpqarray.h Tue Nov 27 09:23:27 2001 @@ -56,11 +56,6 @@ #ifdef __KERNEL__ -struct my_sg { - int size; - char *start_addr; -}; - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; @@ -121,6 +116,9 @@ struct timer_list timer; unsigned int misc_tflags; }; + +#define IDA_LOCK(i) (&((BLK_DEFAULT_QUEUE(MAJOR_NR + i))->queue_lock)) + #endif #endif /* CPQARRAY_H */ diff -u --recursive --new-file v2.5.0/linux/drivers/block/elevator.c linux/drivers/block/elevator.c --- v2.5.0/linux/drivers/block/elevator.c Thu Jul 19 20:59:41 2001 +++ linux/drivers/block/elevator.c Tue Nov 27 09:23:27 2001 @@ -18,48 +18,65 @@ * Removed tests for max-bomb-segments, which was breaking elvtune * when run without -bN * + * Jens: + * - Rework again to work with bio instead of buffer_heads + * - added merge by hash-lookup + * - loose bi_dev comparisons, partition handling is right now + * - completely modularize elevator setup and teardown + * */ - +#include #include #include #include #include +#include #include +#include +#include +#include + #include /* - * This is a bit tricky. It's given that bh and rq are for the same + * This is a bit tricky. It's given that bio and rq are for the same * device, but the next request might of course not be. Run through * the tests below to check if we want to insert here if we can't merge - * bh into an existing request + * bio into an existing request */ -inline int bh_rq_in_between(struct buffer_head *bh, struct request *rq, - struct list_head *head) +inline int bio_rq_in_between(struct bio *bio, struct request *rq, + struct list_head *head) { struct list_head *next; struct request *next_rq; - next = rq->queue.next; + /* + * if .next is a valid request + */ + next = rq->queuelist.next; if (next == head) return 0; + next_rq = list_entry(next, struct request, queuelist); + + BUG_ON(!next_rq->inactive); + /* - * if the device is different (usually on a different partition), - * just check if bh is after rq + * if the device is different (not a normal case) just check if + * bio is after rq */ - next_rq = blkdev_entry_to_request(next); if (next_rq->rq_dev != rq->rq_dev) - return bh->b_rsector > rq->sector; + return bio->bi_sector > rq->sector; /* - * ok, rq, next_rq and bh are on the same device. if bh is in between + * ok, rq, next_rq and bio are on the same device. if bio is in between * the two, this is the sweet spot */ - if (bh->b_rsector < next_rq->sector && bh->b_rsector > rq->sector) + if (bio->bi_sector < next_rq->sector && bio->bi_sector > rq->sector) return 1; /* - * next_rq is ordered wrt rq, but bh is not in between the two + * next_rq is ordered wrt rq, but bio is not in between the two */ if (next_rq->sector > rq->sector) return 0; @@ -68,66 +85,139 @@ * next_rq and rq not ordered, if we happen to be either before * next_rq or after rq insert here anyway */ - if (bh->b_rsector > rq->sector || bh->b_rsector < next_rq->sector) + if (bio->bi_sector > rq->sector || bio->bi_sector < next_rq->sector) return 1; return 0; } +/* + * can we safely merge with this request? + */ +inline int elv_rq_merge_ok(request_queue_t *q, struct request *rq, + struct bio *bio) +{ + if (bio_data_dir(bio) == rq->cmd) { + if (rq->rq_dev == bio->bi_dev && !rq->waiting + && !rq->special && rq->inactive && rq->q == q) + return 1; + } + + return 0; +} + +/* + * find a struct request that has a bio linked that we can merge with + */ +inline struct request *bio_get_hash_rq(kdev_t dev, sector_t sector, int vc) +{ + struct bio *bio = bio_hash_find(dev, sector, vc); + struct request *rq = NULL; + + /* + * bio is pinned until we bio_put it + */ + if (bio) { + rq = bio->bi_hash_desc; + + BUG_ON(!rq); + + bio_put(bio); + } + + return rq; +} int elevator_linus_merge(request_queue_t *q, struct request **req, - struct list_head * head, - struct buffer_head *bh, int rw, - int max_sectors) + struct list_head *head, struct bio *bio) { - struct list_head *entry = &q->queue_head; - unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE; + unsigned int count = bio_sectors(bio); + struct elv_linus_data *edat = q->elevator.elevator_data; + unsigned int vc = q->hash_valid_counter; + struct list_head *entry; + struct request *__rq; + /* + * first try a back merge, then front, then give up and scan. this + * will of course fail for different size bios on the same queue, + * however that isn't really an issue + */ + if (likely(edat->flags & ELV_LINUS_BACK_MERGE)) { + __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector - count, vc); + if (__rq) { + if (!elv_rq_merge_ok(q, __rq, bio)) + goto front; + + /* + * looks ok to merge + */ + if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { + *req = __rq; + return ELEVATOR_BACK_MERGE; + } + } + } + +front: + if (likely(edat->flags & ELV_LINUS_FRONT_MERGE)) { + __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector + count, vc); + if (__rq) { + if (!elv_rq_merge_ok(q, __rq, bio)) + goto scan; + + /* + * looks ok to merge + */ + if (__rq->sector - count == bio->bi_sector) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; + } + } + } + + /* + * no merge possible, scan for insertion + */ +scan: + entry = &q->queue_head; while ((entry = entry->prev) != head) { - struct request *__rq = blkdev_entry_to_request(entry); + __rq = list_entry_rq(entry); - /* - * simply "aging" of requests in queue - */ - if (__rq->elevator_sequence-- <= 0) - break; + prefetch(list_entry_rq(entry->prev)); - if (__rq->waiting) - continue; - if (__rq->rq_dev != bh->b_rdev) - continue; - if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head)) - *req = __rq; - if (__rq->cmd != rw) + if (unlikely(__rq->waiting || __rq->special)) continue; - if (__rq->nr_sectors + count > max_sectors) - continue; - if (__rq->elevator_sequence < count) + if (unlikely(!__rq->inactive)) break; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { - ret = ELEVATOR_BACK_MERGE; + if (!*req && bio_rq_in_between(bio, __rq, &q->queue_head)) *req = __rq; + + /* + * simple "aging" of requests in queue + */ + if (__rq->elevator_sequence-- <= 0) break; - } else if (__rq->sector - count == bh->b_rsector) { - ret = ELEVATOR_FRONT_MERGE; - __rq->elevator_sequence -= count; - *req = __rq; + else if (__rq->elevator_sequence < count) break; - } } - return ret; + return ELEVATOR_NO_MERGE; } void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count) { - struct list_head *entry = &req->queue, *head = &q->queue_head; + struct list_head *entry; + + BUG_ON(req->q != q); /* * second pass scan of requests that got passed over, if any */ - while ((entry = entry->next) != head) { - struct request *tmp = blkdev_entry_to_request(entry); + entry = &req->queuelist; + while ((entry = entry->next) != &q->queue_head) { + struct request *tmp; + prefetch(list_entry_rq(entry->next)); + tmp = list_entry_rq(entry); tmp->elevator_sequence -= count; } } @@ -138,85 +228,114 @@ req->elevator_sequence = next->elevator_sequence; } +void elv_add_request_fn(request_queue_t *q, struct request *rq, + struct list_head *insert_here) +{ + /* + * insert into queue pending list, merge hash, and possible latency + * list + */ + list_add(&rq->queuelist, insert_here); +} + +struct request *elv_next_request_fn(request_queue_t *q) +{ + if (!blk_queue_empty(q)) + return list_entry(q->queue_head.next, struct request, queuelist); + + return NULL; +} + +int elv_linus_init(request_queue_t *q, elevator_t *e) +{ + struct elv_linus_data *edata; + + edata = kmalloc(sizeof(struct elv_linus_data), GFP_ATOMIC); + if (!edata) + return -ENOMEM; + + /* + * default to doing both front and back merges + */ + edata->flags = ELV_LINUS_BACK_MERGE | ELV_LINUS_FRONT_MERGE; + e->elevator_data = edata; + return 0; +} + +void elv_linus_exit(request_queue_t *q, elevator_t *e) +{ + kfree(e->elevator_data); +} + /* * See if we can find a request that this buffer can be coalesced with. */ int elevator_noop_merge(request_queue_t *q, struct request **req, - struct list_head * head, - struct buffer_head *bh, int rw, - int max_sectors) + struct list_head * head, struct bio *bio) { - struct list_head *entry; - unsigned int count = bh->b_size >> 9; + struct request *__rq; + int count, ret; + unsigned int vc; + + count = bio_sectors(bio); + ret = ELEVATOR_NO_MERGE; + vc = q->hash_valid_counter; + + __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector - count, vc); + if (__rq) { + if (!elv_rq_merge_ok(q, __rq, bio)) + goto front; - if (list_empty(&q->queue_head)) - return ELEVATOR_NO_MERGE; + if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { + ret = ELEVATOR_BACK_MERGE; + *req = __rq; + goto out; + } + } - entry = &q->queue_head; - while ((entry = entry->prev) != head) { - struct request *__rq = blkdev_entry_to_request(entry); +front: + __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector + count, vc); + if (__rq) { + if (!elv_rq_merge_ok(q, __rq, bio)) + goto out; - if (__rq->cmd != rw) - continue; - if (__rq->rq_dev != bh->b_rdev) - continue; - if (__rq->nr_sectors + count > max_sectors) - continue; - if (__rq->waiting) - continue; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { - *req = __rq; - return ELEVATOR_BACK_MERGE; - } else if (__rq->sector - count == bh->b_rsector) { + if (__rq->sector - count == bio->bi_sector) { + ret = ELEVATOR_FRONT_MERGE; *req = __rq; - return ELEVATOR_FRONT_MERGE; + goto out; } } - *req = blkdev_entry_to_request(q->queue_head.prev); - return ELEVATOR_NO_MERGE; +out: + return ret; } void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {} void elevator_noop_merge_req(struct request *req, struct request *next) {} -int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg) +int elevator_init(request_queue_t *q, elevator_t *e, elevator_t type,char *name) { - blkelv_ioctl_arg_t output; + *e = type; - output.queue_ID = elevator->queue_ID; - output.read_latency = elevator->read_latency; - output.write_latency = elevator->write_latency; - output.max_bomb_segments = 0; + INIT_LIST_HEAD(&q->queue_head); + strncpy(e->queue_name, name, 15); - if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t))) - return -EFAULT; + if (e->elevator_init_fn) + return e->elevator_init_fn(q, e); return 0; } -int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg) +void elevator_exit(request_queue_t *q, elevator_t *e) { - blkelv_ioctl_arg_t input; - - if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t))) - return -EFAULT; - - if (input.read_latency < 0) - return -EINVAL; - if (input.write_latency < 0) - return -EINVAL; - - elevator->read_latency = input.read_latency; - elevator->write_latency = input.write_latency; - return 0; + if (e->elevator_exit_fn) + e->elevator_exit_fn(q, e); } -void elevator_init(elevator_t * elevator, elevator_t type) +int elevator_global_init(void) { - static unsigned int queue_ID; - - *elevator = type; - elevator->queue_ID = queue_ID++; + return 0; } + +module_init(elevator_global_init); diff -u --recursive --new-file v2.5.0/linux/drivers/block/floppy.c linux/drivers/block/floppy.c --- v2.5.0/linux/drivers/block/floppy.c Thu Oct 25 13:58:34 2001 +++ linux/drivers/block/floppy.c Tue Nov 27 09:23:27 2001 @@ -576,7 +576,7 @@ static struct floppy_struct *_floppy = floppy_type; static unsigned char current_drive; static long current_count_sectors; -static unsigned char sector_t; /* sector in track */ +static unsigned char fsector_t; /* sector in track */ static unsigned char in_sector_offset; /* offset within physical sector, * expressed in units of 512 bytes */ @@ -2276,8 +2276,8 @@ * logical buffer */ static void request_done(int uptodate) { - int block; unsigned long flags; + int block; probing = 0; reschedule_timeout(MAXTIMEOUT, "request done %d", uptodate); @@ -2296,7 +2296,7 @@ DRS->maxtrack = 1; /* unlock chained buffers */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&QUEUE->queue_lock, flags); while (current_count_sectors && !QUEUE_EMPTY && current_count_sectors >= CURRENT->current_nr_sectors){ current_count_sectors -= CURRENT->current_nr_sectors; @@ -2304,7 +2304,7 @@ CURRENT->sector += CURRENT->current_nr_sectors; end_request(1); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&QUEUE->queue_lock, flags); if (current_count_sectors && !QUEUE_EMPTY){ /* "unlock" last subsector */ @@ -2329,9 +2329,9 @@ DRWE->last_error_sector = CURRENT->sector; DRWE->last_error_generation = DRS->generation; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&QUEUE->queue_lock, flags); end_request(0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&QUEUE->queue_lock, flags); } } @@ -2377,7 +2377,7 @@ printk("rt=%d t=%d\n", R_TRACK, TRACK); printk("heads=%d eoc=%d\n", heads, eoc); printk("spt=%d st=%d ss=%d\n", SECT_PER_TRACK, - sector_t, ssize); + fsector_t, ssize); printk("in_sector_offset=%d\n", in_sector_offset); } #endif @@ -2424,7 +2424,7 @@ } else if (CT(COMMAND) == FD_READ){ buffer_track = raw_cmd->track; buffer_drive = current_drive; - INFBOUND(buffer_max, nr_sectors + sector_t); + INFBOUND(buffer_max, nr_sectors + fsector_t); } cont->redo(); } @@ -2432,19 +2432,19 @@ /* Compute maximal contiguous buffer size. */ static int buffer_chain_size(void) { - struct buffer_head *bh; + struct bio *bio; int size; char *base; base = CURRENT->buffer; size = CURRENT->current_nr_sectors << 9; - bh = CURRENT->bh; + bio = CURRENT->bio; - if (bh){ - bh = bh->b_reqnext; - while (bh && bh->b_data == base + size){ - size += bh->b_size; - bh = bh->b_reqnext; + if (bio){ + bio = bio->bi_next; + while (bio && bio_data(bio) == base + size){ + size += bio_size(bio); + bio = bio->bi_next; } } return size >> 9; @@ -2453,13 +2453,13 @@ /* Compute the maximal transfer size */ static int transfer_size(int ssize, int max_sector, int max_size) { - SUPBOUND(max_sector, sector_t + max_size); + SUPBOUND(max_sector, fsector_t + max_size); /* alignment */ max_sector -= (max_sector % _floppy->sect) % ssize; /* transfer size, beginning not aligned */ - current_count_sectors = max_sector - sector_t ; + current_count_sectors = max_sector - fsector_t ; return max_sector; } @@ -2470,7 +2470,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) { int remaining; /* number of transferred 512-byte sectors */ - struct buffer_head *bh; + struct bio *bio; char *buffer, *dma_buffer; int size; @@ -2479,8 +2479,8 @@ CURRENT->nr_sectors); if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE && - buffer_max > sector_t + CURRENT->nr_sectors) - current_count_sectors = minimum(buffer_max - sector_t, + buffer_max > fsector_t + CURRENT->nr_sectors) + current_count_sectors = minimum(buffer_max - fsector_t, CURRENT->nr_sectors); remaining = current_count_sectors << 9; @@ -2491,7 +2491,7 @@ printk("current_count_sectors=%ld\n", current_count_sectors); printk("remaining=%d\n", remaining >> 9); printk("CURRENT->nr_sectors=%ld\n",CURRENT->nr_sectors); - printk("CURRENT->current_nr_sectors=%ld\n", + printk("CURRENT->current_nr_sectors=%u\n", CURRENT->current_nr_sectors); printk("max_sector=%d\n", max_sector); printk("ssize=%d\n", ssize); @@ -2500,9 +2500,9 @@ buffer_max = maximum(max_sector, buffer_max); - dma_buffer = floppy_track_buffer + ((sector_t - buffer_min) << 9); + dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9); - bh = CURRENT->bh; + bio = CURRENT->bio; size = CURRENT->current_nr_sectors << 9; buffer = CURRENT->buffer; @@ -2514,8 +2514,8 @@ dma_buffer < floppy_track_buffer){ DPRINT("buffer overrun in copy buffer %d\n", (int) ((floppy_track_buffer - dma_buffer) >>9)); - printk("sector_t=%d buffer_min=%d\n", - sector_t, buffer_min); + printk("fsector_t=%d buffer_min=%d\n", + fsector_t, buffer_min); printk("current_count_sectors=%ld\n", current_count_sectors); if (CT(COMMAND) == FD_READ) @@ -2536,15 +2536,15 @@ break; dma_buffer += size; - bh = bh->b_reqnext; + bio = bio->bi_next; #ifdef FLOPPY_SANITY_CHECK - if (!bh){ + if (!bio){ DPRINT("bh=null in copy buffer after copy\n"); break; } #endif - size = bh->b_size; - buffer = bh->b_data; + size = bio_size(bio); + buffer = bio_data(bio); } #ifdef FLOPPY_SANITY_CHECK if (remaining){ @@ -2636,7 +2636,7 @@ max_sector = _floppy->sect * _floppy->head; TRACK = CURRENT->sector / max_sector; - sector_t = CURRENT->sector % max_sector; + fsector_t = CURRENT->sector % max_sector; if (_floppy->track && TRACK >= _floppy->track) { if (CURRENT->current_nr_sectors & 1) { current_count_sectors = 1; @@ -2644,17 +2644,17 @@ } else return 0; } - HEAD = sector_t / _floppy->sect; + HEAD = fsector_t / _floppy->sect; if (((_floppy->stretch & FD_SWAPSIDES) || TESTF(FD_NEED_TWADDLE)) && - sector_t < _floppy->sect) + fsector_t < _floppy->sect) max_sector = _floppy->sect; /* 2M disks have phantom sectors on the first track */ if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)){ max_sector = 2 * _floppy->sect / 3; - if (sector_t >= max_sector){ - current_count_sectors = minimum(_floppy->sect - sector_t, + if (fsector_t >= max_sector){ + current_count_sectors = minimum(_floppy->sect - fsector_t, CURRENT->nr_sectors); return 1; } @@ -2676,7 +2676,7 @@ GAP = _floppy->gap; CODE2SIZE; SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; - SECTOR = ((sector_t % _floppy->sect) << 2 >> SIZECODE) + 1; + SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + 1; /* tracksize describes the size which can be filled up with sectors * of size ssize. @@ -2684,11 +2684,11 @@ tracksize = _floppy->sect - _floppy->sect % ssize; if (tracksize < _floppy->sect){ SECT_PER_TRACK ++; - if (tracksize <= sector_t % _floppy->sect) + if (tracksize <= fsector_t % _floppy->sect) SECTOR--; /* if we are beyond tracksize, fill up using smaller sectors */ - while (tracksize <= sector_t % _floppy->sect){ + while (tracksize <= fsector_t % _floppy->sect){ while(tracksize + ssize > _floppy->sect){ SIZECODE--; ssize >>= 1; @@ -2704,12 +2704,12 @@ max_sector = _floppy->sect; } - in_sector_offset = (sector_t % _floppy->sect) % ssize; - aligned_sector_t = sector_t - in_sector_offset; + in_sector_offset = (fsector_t % _floppy->sect) % ssize; + aligned_sector_t = fsector_t - in_sector_offset; max_size = CURRENT->nr_sectors; if ((raw_cmd->track == buffer_track) && (current_drive == buffer_drive) && - (sector_t >= buffer_min) && (sector_t < buffer_max)) { + (fsector_t >= buffer_min) && (fsector_t < buffer_max)) { /* data already in track buffer */ if (CT(COMMAND) == FD_READ) { copy_buffer(1, max_sector, buffer_max); @@ -2717,8 +2717,8 @@ } } else if (in_sector_offset || CURRENT->nr_sectors < ssize){ if (CT(COMMAND) == FD_WRITE){ - if (sector_t + CURRENT->nr_sectors > ssize && - sector_t + CURRENT->nr_sectors < ssize + ssize) + if (fsector_t + CURRENT->nr_sectors > ssize && + fsector_t + CURRENT->nr_sectors < ssize + ssize) max_size = ssize + ssize; else max_size = ssize; @@ -2731,7 +2731,7 @@ int direct, indirect; indirect= transfer_size(ssize,max_sector,max_buffer_sectors*2) - - sector_t; + fsector_t; /* * Do NOT use minimum() here---MAX_DMA_ADDRESS is 64 bits wide @@ -2746,7 +2746,7 @@ if (CROSS_64KB(CURRENT->buffer, max_size << 9)) max_size = (K_64 - ((unsigned long)CURRENT->buffer) % K_64)>>9; - direct = transfer_size(ssize,max_sector,max_size) - sector_t; + direct = transfer_size(ssize,max_sector,max_size) - fsector_t; /* * We try to read tracks, but if we get too many errors, we * go back to reading just one sector at a time. @@ -2765,8 +2765,8 @@ raw_cmd->length = current_count_sectors << 9; if (raw_cmd->length == 0){ DPRINT("zero dma transfer attempted from make_raw_request\n"); - DPRINT("indirect=%d direct=%d sector_t=%d", - indirect, direct, sector_t); + DPRINT("indirect=%d direct=%d fsector_t=%d", + indirect, direct, fsector_t); return 0; } /* check_dma_crossing(raw_cmd->kernel_data, @@ -2784,12 +2784,12 @@ /* claim buffer track if needed */ if (buffer_track != raw_cmd->track || /* bad track */ buffer_drive !=current_drive || /* bad drive */ - sector_t > buffer_max || - sector_t < buffer_min || + fsector_t > buffer_max || + fsector_t < buffer_min || ((CT(COMMAND) == FD_READ || (!in_sector_offset && CURRENT->nr_sectors >= ssize))&& max_sector > 2 * max_buffer_sectors + buffer_min && - max_size + sector_t > 2 * max_buffer_sectors + buffer_min) + max_size + fsector_t > 2 * max_buffer_sectors + buffer_min) /* not enough space */){ buffer_track = -1; buffer_drive = current_drive; @@ -2836,7 +2836,7 @@ floppy_track_buffer) >> 9), current_count_sectors); printk("st=%d ast=%d mse=%d msi=%d\n", - sector_t, aligned_sector_t, max_sector, max_size); + fsector_t, aligned_sector_t, max_sector, max_size); printk("ssize=%x SIZECODE=%d\n", ssize, SIZECODE); printk("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n", COMMAND, SECTOR, HEAD, TRACK); @@ -2854,8 +2854,8 @@ raw_cmd->kernel_data + raw_cmd->length > floppy_track_buffer + (max_buffer_sectors << 10)){ DPRINT("buffer overrun in schedule dma\n"); - printk("sector_t=%d buffer_min=%d current_count=%ld\n", - sector_t, buffer_min, + printk("fsector_t=%d buffer_min=%d current_count=%ld\n", + fsector_t, buffer_min, raw_cmd->length >> 9); printk("current_count_sectors=%ld\n", current_count_sectors); @@ -2908,8 +2908,6 @@ } if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) panic(DEVICE_NAME ": request list destroyed"); - if (CURRENT->bh && !buffer_locked(CURRENT->bh)) - panic(DEVICE_NAME ": block not locked"); device = CURRENT->rq_dev; set_fdc(DRIVE(device)); @@ -4172,7 +4170,7 @@ blk_size[MAJOR_NR] = floppy_sizes; blksize_size[MAJOR_NR] = floppy_blocksizes; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST, "floppy"); reschedule_timeout(MAXTIMEOUT, "floppy init", MAXTIMEOUT); config_types(); diff -u --recursive --new-file v2.5.0/linux/drivers/block/genhd.c linux/drivers/block/genhd.c --- v2.5.0/linux/drivers/block/genhd.c Wed Oct 17 14:46:29 2001 +++ linux/drivers/block/genhd.c Tue Nov 27 09:23:27 2001 @@ -28,14 +28,8 @@ /* * Global kernel list of partitioning information. - * - * XXX: you should _never_ access this directly. - * the only reason this is exported is source compatiblity. */ -/*static*/ struct gendisk *gendisk_head; - -EXPORT_SYMBOL(gendisk_head); - +static struct gendisk *gendisk_head; /** * add_gendisk - add partitioning information to kernel list @@ -122,6 +116,30 @@ EXPORT_SYMBOL(get_gendisk); + +unsigned long +get_start_sect(kdev_t dev) +{ + struct gendisk *gp; + + gp = get_gendisk(dev); + if (gp) + return gp->part[MINOR(dev)].start_sect; + return 0; +} + +EXPORT_SYMBOL(get_start_sect); + +unsigned long +get_nr_sects(kdev_t dev) +{ + struct gendisk *gp; + + gp = get_gendisk(dev); + if (gp) + return gp->part[MINOR(dev)].nr_sects; + return 0; +} #ifdef CONFIG_PROC_FS int diff -u --recursive --new-file v2.5.0/linux/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- v2.5.0/linux/drivers/block/ll_rw_blk.c Mon Oct 29 12:11:17 2001 +++ linux/drivers/block/ll_rw_blk.c Tue Nov 27 09:37:52 2001 @@ -6,6 +6,7 @@ * Elevator latency, (C) 2000 Andrea Arcangeli SuSE * Queue request tables / lock, selectable elevator, Jens Axboe * kernel-doc documentation started by NeilBrown - July2000 + * bio rewrite, highmem i/o, etc, Jens Axboe - may 2001 */ /* @@ -22,7 +23,9 @@ #include #include #include +#include #include +#include #include #include @@ -50,27 +53,13 @@ */ DECLARE_TASK_QUEUE(tq_disk); -/* - * Protect the request list against multiple users.. - * - * With this spinlock the Linux block IO subsystem is 100% SMP threaded - * from the IRQ event side, and almost 100% SMP threaded from the syscall - * side (we still have protect against block device array operations, and - * the do_request() side is casually still unsafe. The kernel lock protects - * this part currently.). - * - * there is a fair chance that things will work just OK if these functions - * are called with no global kernel lock held ... - */ -spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; - /* This specifies how many sectors to read ahead on the disk. */ int read_ahead[MAX_BLKDEV]; /* blk_dev_struct is: - * *request_fn - * *current_request + * request_queue + * *queue */ struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */ @@ -94,42 +83,29 @@ int * blksize_size[MAX_BLKDEV]; /* - * hardsect_size contains the size of the hardware sector of a device. - * - * hardsect_size[MAJOR][MINOR] - * - * if (!hardsect_size[MAJOR]) - * then 512 bytes is assumed. - * else - * sector_size is hardsect_size[MAJOR][MINOR] - * This is currently set by some scsi devices and read by the msdos fs driver. - * Other uses may appear later. - */ -int * hardsect_size[MAX_BLKDEV]; - -/* * The following tunes the read-ahead algorithm in mm/filemap.c */ int * max_readahead[MAX_BLKDEV]; /* - * Max number of sectors per request - */ -int * max_sectors[MAX_BLKDEV]; - -/* * How many reqeusts do we allocate per queue, * and how many do we "batch" on freeing them? */ -static int queue_nr_requests, batch_requests; - -static inline int get_max_sectors(kdev_t dev) -{ - if (!max_sectors[MAJOR(dev)]) - return MAX_SECTORS; - return max_sectors[MAJOR(dev)][MINOR(dev)]; -} +int queue_nr_requests, batch_requests; +unsigned long blk_max_low_pfn, blk_max_pfn; +int blk_nohighio = 0; +/** + * blk_get_queue: - return the queue that matches the given device + * @dev: device + * + * Description: + * Given a specific device, return the queue that will hold I/O + * for it. This is either a &struct blk_dev_struct lookup and a + * call to the ->queue() function defined, or the default queue + * stored in the same location. + * + **/ inline request_queue_t *blk_get_queue(kdev_t dev) { struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); @@ -140,113 +116,227 @@ return &blk_dev[MAJOR(dev)].request_queue; } -static int __blk_cleanup_queue(struct request_list *list) +/** + * blk_queue_make_request - define an alternate make_request function for a device + * @q: the request queue for the device to be affected + * @mfn: the alternate make_request function + * + * Description: + * The normal way for &struct bios to be passed to a device + * driver is for them to be collected into requests on a request + * queue, and then to allow the device driver to select requests + * off that queue when it is ready. This works well for many block + * devices. However some block devices (typically virtual devices + * such as md or lvm) do not benefit from the processing on the + * request queue, and are served best by having the requests passed + * directly to them. This can be achieved by providing a function + * to blk_queue_make_request(). + * + * Caveat: + * The driver that does this *must* be able to deal appropriately + * with buffers in "highmemory". This can be accomplished by either calling + * bio_kmap() to get a temporary kernel mapping, or by calling + * blk_queue_bounce() to create a buffer in normal memory. + **/ +void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) { - struct list_head *head = &list->free; - struct request *rq; - int i = 0; - - while (!list_empty(head)) { - rq = list_entry(head->next, struct request, queue); - list_del(&rq->queue); - kmem_cache_free(request_cachep, rq); - i++; - }; - - if (i != list->count) - printk("request list leak!\n"); + /* + * set defaults + */ + q->max_segments = MAX_SEGMENTS; + q->make_request_fn = mfn; + blk_queue_max_sectors(q, MAX_SECTORS); + blk_queue_hardsect_size(q, 512); - list->count = 0; - return i; + init_waitqueue_head(&q->queue_wait); } /** - * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed - * @q: the request queue to be released + * blk_queue_bounce_limit - set bounce buffer limit for queue + * @q: the request queue for the device + * @dma_addr: bus address limit * * Description: - * blk_cleanup_queue is the pair to blk_init_queue(). It should - * be called when a request queue is being released; typically - * when a block device is being de-registered. Currently, its - * primary task it to free all the &struct request structures that - * were allocated to the queue. - * Caveat: - * Hopefully the low level driver will have finished any - * outstanding requests first... + * Different hardware can have different requirements as to what pages + * it can do I/O directly to. A low level driver can call + * blk_queue_bounce_limit to have lower memory pages allocated as bounce + * buffers for doing I/O to pages residing above @page. By default + * the block layer sets this to the highest numbered "low" memory page. **/ -void blk_cleanup_queue(request_queue_t * q) +void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) { - int count = queue_nr_requests; + unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; + unsigned long mb = dma_addr >> 20; + static request_queue_t *last_q; - count -= __blk_cleanup_queue(&q->rq[READ]); - count -= __blk_cleanup_queue(&q->rq[WRITE]); - - if (count) - printk("blk_cleanup_queue: leaked requests (%d)\n", count); + /* + * keep this for debugging for now... + */ + if (dma_addr != BLK_BOUNCE_HIGH && q != last_q) { + printk("blk: queue %p, ", q); + if (dma_addr == BLK_BOUNCE_ANY) + printk("no I/O memory limit\n"); + else + printk("I/O limit %luMb (mask 0x%Lx)\n", mb, (u64) dma_addr); + } - memset(q, 0, sizeof(*q)); + q->bounce_pfn = bounce_pfn; + last_q = q; } + /** - * blk_queue_headactive - indicate whether head of request queue may be active - * @q: The queue which this applies to. - * @active: A flag indication where the head of the queue is active. + * blk_queue_max_sectors - set max sectors for a request for this queue + * @q: the request queue for the device + * @max_sectors: max sectors in the usual 512b unit * * Description: - * The driver for a block device may choose to leave the currently active - * request on the request queue, removing it only when it has completed. - * The queue handling routines assume this by default for safety reasons - * and will not involve the head of the request queue in any merging or - * reordering of requests when the queue is unplugged (and thus may be - * working on this particular request). - * - * If a driver removes requests from the queue before processing them, then - * it may indicate that it does so, there by allowing the head of the queue - * to be involved in merging and reordering. This is done be calling - * blk_queue_headactive() with an @active flag of %0. - * - * If a driver processes several requests at once, it must remove them (or - * at least all but one of them) from the request queue. + * Enables a low level driver to set an upper limit on the size of + * received requests. + **/ +void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors) +{ + q->max_sectors = max_sectors; +} + +/** + * blk_queue_max_segments - set max segments for a request for this queue + * @q: the request queue for the device + * @max_segments: max number of segments * - * When a queue is plugged the head will be assumed to be inactive. + * Description: + * Enables a low level driver to set an upper limit on the number of + * data segments in a request **/ - -void blk_queue_headactive(request_queue_t * q, int active) +void blk_queue_max_segments(request_queue_t *q, unsigned short max_segments) { - q->head_active = active; + q->max_segments = max_segments; } /** - * blk_queue_make_request - define an alternate make_request function for a device - * @q: the request queue for the device to be affected - * @mfn: the alternate make_request function + * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg + * @q: the request queue for the device + * @max_size: max size of segment in bytes * * Description: - * The normal way for &struct buffer_heads to be passed to a device - * driver is for them to be collected into requests on a request - * queue, and then to allow the device driver to select requests - * off that queue when it is ready. This works well for many block - * devices. However some block devices (typically virtual devices - * such as md or lvm) do not benefit from the processing on the - * request queue, and are served best by having the requests passed - * directly to them. This can be achieved by providing a function - * to blk_queue_make_request(). + * Enables a low level driver to set an upper limit on the size of a + * coalesced segment + **/ +void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size) +{ + q->max_segment_size = max_size; +} + +/** + * blk_queue_hardsect_size - set hardware sector size for the queue + * @q: the request queue for the device + * @size: the hardware sector size, in bytes * - * Caveat: - * The driver that does this *must* be able to deal appropriately - * with buffers in "highmemory", either by calling bh_kmap() to get - * a kernel mapping, to by calling create_bounce() to create a - * buffer in normal memory. + * Description: + * This should typically be set to the lowest possible sector size + * that the hardware can operate on (possible without reverting to + * even internal read-modify-write operations). Usually the default + * of 512 covers most hardware. **/ +void blk_queue_hardsect_size(request_queue_t *q, unsigned short size) +{ + q->hardsect_size = size; +} -void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) +/* + * can we merge the two segments, or do we need to start a new one? + */ +inline int blk_same_segment(request_queue_t *q, struct bio *bio, + struct bio *nxt) { - q->make_request_fn = mfn; + /* + * not contigous, just forget it + */ + if (!BIO_CONTIG(bio, nxt)) + return 0; + + /* + * bio and nxt are contigous, if they don't span a 4GB mem boundary + * return ok + */ + if (BIO_PHYS_4G(bio, nxt)) + return 1; + + return 0; } -static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) +/* + * map a request to scatterlist, return number of sg entries setup. Caller + * must make sure sg can hold rq->nr_segments entries + */ +int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg) { - if (req->nr_segments < max_segments) { + unsigned long long lastend; + struct bio_vec *bvec; + struct bio *bio; + int nsegs, i; + + nsegs = 0; + bio = rq->bio; + lastend = ~0ULL; + + /* + * for each bio in rq + */ + rq_for_each_bio(bio, rq) { + /* + * for each segment in bio + */ + bio_for_each_segment(bvec, bio, i) { + int nbytes = bvec->bv_len; + + BIO_BUG_ON(i > bio->bi_io_vec->bvl_cnt); + + if (bvec_to_phys(bvec) == lastend) { + if (sg[nsegs - 1].length + nbytes > q->max_segment_size) { + printk("blk_rq_map_sg: %d segment size exceeded\n", q->max_segment_size); + goto new_segment; + } + + /* + * make sure to not map a 4GB boundary into + * same sg entry + */ + if (!__BIO_PHYS_4G(lastend, lastend + nbytes)) { + printk("blk_rq_map_sg: 4GB cross\n"); + lastend = ~0ULL; + } else + lastend += nbytes; + + sg[nsegs - 1].length += nbytes; + } else { +new_segment: + if (nsegs >= q->max_segments) { + printk("map: %d >= %d\n", nsegs, q->max_segments); + BUG(); + } + + sg[nsegs].address = NULL; + sg[nsegs].page = bvec->bv_page; + sg[nsegs].length = nbytes; + sg[nsegs].offset = bvec->bv_offset; + + lastend = bvec_to_phys(bvec) + nbytes; + nsegs++; + } + } /* segments in bio */ + } /* bios in rq */ + + return nsegs; +} + +/* + * the standard queue merge functions, can be overridden with device + * specific ones if so desired + */ +static inline int ll_new_segment(request_queue_t *q, struct request *req) +{ + if (req->nr_segments < q->max_segments) { req->nr_segments++; return 1; } @@ -254,30 +344,36 @@ } static int ll_back_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) + struct bio *bio) { - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) + return 0; + if (blk_same_segment(q, req->biotail, bio)) return 1; - return ll_new_segment(q, req, max_segments); + + return ll_new_segment(q, req); } static int ll_front_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) + struct bio *bio) { - if (bh->b_data + bh->b_size == req->bh->b_data) + if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) + return 0; + if (blk_same_segment(q, bio, req->bio)) return 1; - return ll_new_segment(q, req, max_segments); + + return ll_new_segment(q, req); } static int ll_merge_requests_fn(request_queue_t *q, struct request *req, - struct request *next, int max_segments) + struct request *next) { int total_segments = req->nr_segments + next->nr_segments; - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + if (blk_same_segment(q, req->biotail, next->bio)) total_segments--; - if (total_segments > max_segments) + if (total_segments > q->max_segments) return 0; req->nr_segments = total_segments; @@ -292,16 +388,16 @@ * This is called with interrupts off and no requests on the queue. * (and with the request spinlock acquired) */ -static void generic_plug_device(request_queue_t *q, kdev_t dev) +static void blk_plug_device(request_queue_t *q) { /* - * no need to replug device + * common case */ - if (!list_empty(&q->queue_head) || q->plugged) + if (!elv_queue_empty(q)) return; - q->plugged = 1; - queue_task(&q->plug_tq, &tq_disk); + if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) + queue_task(&q->plug_tq, &tq_disk); } /* @@ -309,24 +405,83 @@ */ static inline void __generic_unplug_device(request_queue_t *q) { - if (q->plugged) { - q->plugged = 0; - if (!list_empty(&q->queue_head)) + if (test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) + if (!elv_queue_empty(q)) q->request_fn(q); - } } +/** + * generic_unplug_device - fire a request queue + * @q: The &request_queue_t in question + * + * Description: + * Linux uses plugging to build bigger requests queues before letting + * the device have at them. If a queue is plugged, the I/O scheduler + * is still adding and merging requests on the queue. Once the queue + * gets unplugged (either by manually calling this function, or by + * running the tq_disk task queue), the request_fn defined for the + * queue is invoked and transfers started. + **/ void generic_unplug_device(void *data) { request_queue_t *q = (request_queue_t *) data; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); __generic_unplug_device(q); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } -static void blk_init_free_list(request_queue_t *q) +static int __blk_cleanup_queue(struct request_list *list) +{ + struct list_head *head = &list->free; + struct request *rq; + int i = 0; + + while (!list_empty(head)) { + rq = list_entry(head->next, struct request, queuelist); + list_del(&rq->queuelist); + kmem_cache_free(request_cachep, rq); + i++; + } + + if (i != list->count) + printk("request list leak!\n"); + + list->count = 0; + return i; +} + +/** + * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed + * @q: the request queue to be released + * + * Description: + * blk_cleanup_queue is the pair to blk_init_queue(). It should + * be called when a request queue is being released; typically + * when a block device is being de-registered. Currently, its + * primary task it to free all the &struct request structures that + * were allocated to the queue. + * Caveat: + * Hopefully the low level driver will have finished any + * outstanding requests first... + **/ +void blk_cleanup_queue(request_queue_t * q) +{ + int count = queue_nr_requests; + + count -= __blk_cleanup_queue(&q->rq[READ]); + count -= __blk_cleanup_queue(&q->rq[WRITE]); + + if (count) + printk("blk_cleanup_queue: leaked requests (%d)\n", count); + + elevator_exit(q, &q->elevator); + + memset(q, 0, sizeof(*q)); +} + +static int blk_init_free_list(request_queue_t *q) { struct request *rq; int i; @@ -341,22 +496,30 @@ */ for (i = 0; i < queue_nr_requests; i++) { rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL); - if (rq == NULL) { - /* We'll get a `leaked requests' message from blk_cleanup_queue */ - printk(KERN_EMERG "blk_init_free_list: error allocating requests\n"); - break; - } + if (!rq) + goto nomem; + memset(rq, 0, sizeof(struct request)); rq->rq_status = RQ_INACTIVE; - list_add(&rq->queue, &q->rq[i&1].free); - q->rq[i&1].count++; + if (i < queue_nr_requests >> 1) { + list_add(&rq->queuelist, &q->rq[READ].free); + q->rq[READ].count++; + } else { + list_add(&rq->queuelist, &q->rq[WRITE].free); + q->rq[WRITE].count++; + } } - init_waitqueue_head(&q->wait_for_request); + init_waitqueue_head(&q->rq[READ].wait); + init_waitqueue_head(&q->rq[WRITE].wait); spin_lock_init(&q->queue_lock); + return 0; +nomem: + blk_cleanup_queue(q); + return 1; } -static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); +static int __make_request(request_queue_t *, struct bio *); /** * blk_init_queue - prepare a request queue for use with a block device @@ -379,45 +542,47 @@ * requests on the queue, it is responsible for arranging that the requests * get dealt with eventually. * - * A global spin lock $io_request_lock must be held while manipulating the - * requests on the request queue. - * - * The request on the head of the queue is by default assumed to be - * potentially active, and it is not considered for re-ordering or merging - * whenever the given queue is unplugged. This behaviour can be changed with - * blk_queue_headactive(). + * The queue spin lock must be held while manipulating the requests on the + * request queue. * * Note: * blk_init_queue() must be paired with a blk_cleanup_queue() call * when the block device is deactivated (such as at module unload). **/ -void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) +int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, char *name) { - INIT_LIST_HEAD(&q->queue_head); - elevator_init(&q->elevator, ELEVATOR_LINUS); - blk_init_free_list(q); + int ret; + + if (blk_init_free_list(q)) + return -ENOMEM; + + if ((ret = elevator_init(q, &q->elevator, ELEVATOR_LINUS, name))) { + blk_cleanup_queue(q); + return ret; + } + q->request_fn = rfn; q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; q->merge_requests_fn = ll_merge_requests_fn; - q->make_request_fn = __make_request; q->plug_tq.sync = 0; q->plug_tq.routine = &generic_unplug_device; q->plug_tq.data = q; - q->plugged = 0; + q->queue_flags = 0; + /* - * These booleans describe the queue properties. We set the - * default (and most common) values here. Other drivers can - * use the appropriate functions to alter the queue properties. - * as appropriate. + * by default assume old behaviour and bounce for any highmem page */ - q->plug_device_fn = generic_plug_device; - q->head_active = 1; + blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); + + blk_queue_make_request(q, __make_request); + blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); + return 0; } -#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue); +#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) /* - * Get a free request. io_request_lock must be held and interrupts + * Get a free request. queue lock must be held and interrupts * disabled on the way in. */ static inline struct request *get_request(request_queue_t *q, int rw) @@ -427,8 +592,9 @@ if (!list_empty(&rl->free)) { rq = blkdev_free_rq(&rl->free); - list_del(&rq->queue); + list_del(&rq->queuelist); rl->count--; + rq->inactive = 1; rq->rq_status = RQ_ACTIVE; rq->special = NULL; rq->q = q; @@ -440,38 +606,28 @@ /* * No available requests for this queue, unplug the device. */ -static struct request *__get_request_wait(request_queue_t *q, int rw) +static struct request *get_request_wait(request_queue_t *q, int rw) { - register struct request *rq; DECLARE_WAITQUEUE(wait, current); + struct request *rq; + + spin_lock_prefetch(&q->queue_lock); generic_unplug_device(q); - add_wait_queue(&q->wait_for_request, &wait); + add_wait_queue(&q->rq[rw].wait, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); if (q->rq[rw].count < batch_requests) schedule(); - spin_lock_irq(&io_request_lock); - rq = get_request(q,rw); - spin_unlock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); + rq = get_request(q, rw); + spin_unlock_irq(&q->queue_lock); } while (rq == NULL); - remove_wait_queue(&q->wait_for_request, &wait); + remove_wait_queue(&q->rq[rw].wait, &wait); current->state = TASK_RUNNING; return rq; } -static inline struct request *get_request_wait(request_queue_t *q, int rw) -{ - register struct request *rq; - - spin_lock_irq(&io_request_lock); - rq = get_request(q, rw); - spin_unlock_irq(&io_request_lock); - if (rq) - return rq; - return __get_request_wait(q, rw); -} - /* RO fail safe mechanism */ static long ro_bits[MAX_BLKDEV][8]; @@ -497,8 +653,7 @@ else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31)); } -inline void drive_stat_acct (kdev_t dev, int rw, - unsigned long nr_sectors, int new_io) +void drive_stat_acct (kdev_t dev, int rw, unsigned long nr_sectors, int new_io) { unsigned int major = MAJOR(dev); unsigned int index; @@ -520,33 +675,32 @@ /* * add-request adds a request to the linked list. - * io_request_lock is held and interrupts disabled, as we muck with the + * queue lock is held and interrupts disabled, as we muck with the * request queue list. - * - * By this point, req->cmd is always either READ/WRITE, never READA, - * which is important for drive_stat_acct() above. */ static inline void add_request(request_queue_t * q, struct request * req, struct list_head *insert_here) { drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); - if (!q->plugged && q->head_active && insert_here == &q->queue_head) { - spin_unlock_irq(&io_request_lock); - BUG(); + { + struct request *__rq = __elv_next_request(q); + + if (__rq && !__rq->inactive && insert_here == &q->queue_head) + BUG(); } /* * elevator indicated where it wants this request to be * inserted at elevator_merge time */ - list_add(&req->queue, insert_here); + q->elevator.elevator_add_req_fn(q, req, insert_here); } /* - * Must be called with io_request_lock held and interrupts disabled + * Must be called with queue lock held and interrupts disabled */ -inline void blkdev_release_request(struct request *req) +void blkdev_release_request(struct request *req) { request_queue_t *q = req->q; int rw = req->cmd; @@ -555,169 +709,216 @@ req->q = NULL; /* + * should only happen on freereq logic in __make_request, in which + * case we don't want to prune these entries from the hash + */ +#if 1 + if (req->bio) + bio_hash_remove(req->bio); + if (req->biotail) + bio_hash_remove(req->biotail); +#endif + + /* * Request may not have originated from ll_rw_blk. if not, * assume it has free buffers and check waiters */ if (q) { - list_add(&req->queue, &q->rq[rw].free); - if (++q->rq[rw].count >= batch_requests && waitqueue_active(&q->wait_for_request)) - wake_up(&q->wait_for_request); + list_add(&req->queuelist, &q->rq[rw].free); + if (++q->rq[rw].count >= batch_requests + && waitqueue_active(&q->rq[rw].wait)) + wake_up(&q->rq[rw].wait); } } /* * Has to be called with the request spinlock acquired */ -static void attempt_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) -{ - struct request *next; - - next = blkdev_next_request(req); +static void attempt_merge(request_queue_t *q, struct request *req) +{ + struct request *next = blkdev_next_request(req); + if (req->sector + req->nr_sectors != next->sector) return; + if (req->cmd != next->cmd || req->rq_dev != next->rq_dev - || req->nr_sectors + next->nr_sectors > max_sectors - || next->waiting) + || req->nr_sectors + next->nr_sectors > q->max_sectors + || next->waiting || next->special || !next->inactive) return; + /* * If we are not allowed to merge these requests, then * return. If we are allowed to merge, then the count * will have been updated to the appropriate number, * and we shouldn't do it here too. */ - if (!q->merge_requests_fn(q, req, next, max_segments)) - return; + if (q->merge_requests_fn(q, req, next)) { + q->elevator.elevator_merge_req_fn(req, next); + + bio_hash_remove(req->biotail); + + /* + * will handle dangling hash too + */ + blkdev_dequeue_request(next); + + req->biotail->bi_next = next->bio; + req->biotail = next->biotail; + + next->bio = next->biotail = NULL; - q->elevator.elevator_merge_req_fn(req, next); - req->bhtail->b_reqnext = next->bh; - req->bhtail = next->bhtail; - req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; - list_del(&next->queue); - blkdev_release_request(next); + bio_hash_add_unique(req->biotail, req, q->hash_valid_counter); + + req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; + + blkdev_release_request(next); + } } -static inline void attempt_back_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) +static inline void attempt_back_merge(request_queue_t *q, struct request *rq) { - if (&req->queue == q->queue_head.prev) - return; - attempt_merge(q, req, max_sectors, max_segments); + if (&rq->queuelist != q->queue_head.prev) + attempt_merge(q, rq); } -static inline void attempt_front_merge(request_queue_t * q, - struct list_head * head, - struct request *req, - int max_sectors, - int max_segments) +static inline void attempt_front_merge(request_queue_t *q, + struct list_head *head, + struct request *rq) { - struct list_head * prev; + struct list_head *prev = rq->queuelist.prev; - prev = req->queue.prev; - if (head == prev) - return; - attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments); + if (prev != head) + attempt_merge(q, blkdev_entry_to_request(prev)); } -static int __make_request(request_queue_t * q, int rw, - struct buffer_head * bh) +static inline void __blk_attempt_remerge(request_queue_t *q, struct request *rq) { - unsigned int sector, count; - int max_segments = MAX_SEGMENTS; - struct request * req, *freereq = NULL; - int rw_ahead, max_sectors, el_ret; - struct list_head *head, *insert_here; - int latency; - elevator_t *elevator = &q->elevator; + if (rq->queuelist.next != &q->queue_head) + attempt_merge(q, rq); +} - count = bh->b_size >> 9; - sector = bh->b_rsector; +/** + * blk_attempt_remerge - attempt to remerge active head with next request + * @q: The &request_queue_t belonging to the device + * @rq: The head request (usually) + * + * Description: + * For head-active devices, the queue can easily be unplugged so quickly + * that proper merging is not done on the front request. This may hurt + * performance greatly for some devices. The block layer cannot safely + * do merging on that first request for these queues, but the driver can + * call this function and make it happen any way. Only the driver knows + * when it is safe to do so. + **/ +void blk_attempt_remerge(request_queue_t *q, struct request *rq) +{ + unsigned long flags; - rw_ahead = 0; /* normal case; gets changed below for READA */ - switch (rw) { - case READA: - rw_ahead = 1; - rw = READ; /* drop into READ */ - case READ: - case WRITE: - latency = elevator_request_latency(elevator, rw); - break; - default: - BUG(); - goto end_io; - } + spin_lock_irqsave(&q->queue_lock, flags); + __blk_attempt_remerge(q, rq); + spin_unlock_irqrestore(&q->queue_lock, flags); +} - /* We'd better have a real physical mapping! - Check this bit only if the buffer was dirty and just locked - down by us so at this point flushpage will block and - won't clear the mapped bit under us. */ - if (!buffer_mapped(bh)) - BUG(); +static int __make_request(request_queue_t *q, struct bio *bio) +{ + struct request *req, *freereq = NULL; + int el_ret, latency = 0, rw, nr_sectors, cur_nr_sectors, barrier; + struct list_head *head, *insert_here; + elevator_t *elevator = &q->elevator; + sector_t sector; - /* - * Temporary solution - in 2.5 this will be done by the lowlevel - * driver. Create a bounce buffer if the buffer data points into - * high memory - keep the original buffer otherwise. - */ -#if CONFIG_HIGHMEM - bh = create_bounce(rw, bh); -#endif + sector = bio->bi_sector; + nr_sectors = bio_sectors(bio); + cur_nr_sectors = bio_iovec(bio)->bv_len >> 9; + rw = bio_data_dir(bio); -/* look for a free request. */ /* - * Try to coalesce the new request with old requests + * low level driver can indicate that it wants pages above a + * certain limit bounced to low memory (ie for highmem, or even + * ISA dma in theory) */ - max_sectors = get_max_sectors(bh->b_rdev); + blk_queue_bounce(q, &bio); + + spin_lock_prefetch(&q->queue_lock); + + latency = elevator_request_latency(elevator, rw); + + barrier = test_bit(BIO_BARRIER, &bio->bi_flags); again: req = NULL; head = &q->queue_head; + + spin_lock_irq(&q->queue_lock); + /* - * Now we acquire the request spinlock, we have to be mega careful - * not to schedule or do something nonatomic + * barrier write must not be passed - so insert with 0 latency at + * the back of the queue and invalidate the entire existing merge hash + * for this device */ - spin_lock_irq(&io_request_lock); + if (barrier && !freereq) { + latency = 0; + bio_hash_invalidate(q, bio->bi_dev); + } insert_here = head->prev; - if (list_empty(head)) { - q->plug_device_fn(q, bh->b_rdev); /* is atomic */ + if (blk_queue_empty(q) || barrier) { + blk_plug_device(q); goto get_rq; - } else if (q->head_active && !q->plugged) +#if 0 + } else if (test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { head = head->next; +#else + } else if ((req = __elv_next_request(q))) { + if (!req->inactive) + head = head->next; - el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); - switch (el_ret) { + req = NULL; +#endif + } + el_ret = elevator->elevator_merge_fn(q, &req, head, bio); + switch (el_ret) { case ELEVATOR_BACK_MERGE: - if (!q->back_merge_fn(q, req, bh, max_segments)) + if (&req->queuelist == head && !req->inactive) + BUG(); + if (!q->back_merge_fn(q, req, bio)) break; - elevator->elevator_merge_cleanup_fn(q, req, count); - req->bhtail->b_reqnext = bh; - req->bhtail = bh; - req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); - drive_stat_acct(req->rq_dev, req->cmd, count, 0); - attempt_back_merge(q, req, max_sectors, max_segments); + elevator->elevator_merge_cleanup_fn(q, req, nr_sectors); + + bio_hash_remove(req->biotail); + + req->biotail->bi_next = bio; + req->biotail = bio; + req->nr_sectors = req->hard_nr_sectors += nr_sectors; + drive_stat_acct(req->rq_dev, req->cmd, nr_sectors, 0); + attempt_back_merge(q, req); goto out; case ELEVATOR_FRONT_MERGE: - if (!q->front_merge_fn(q, req, bh, max_segments)) + if (&req->queuelist == head && !req->inactive) + BUG(); + if (!q->front_merge_fn(q, req, bio)) break; - elevator->elevator_merge_cleanup_fn(q, req, count); - bh->b_reqnext = req->bh; - req->bh = bh; - req->buffer = bh->b_data; - req->current_nr_sectors = count; + elevator->elevator_merge_cleanup_fn(q, req, nr_sectors); + + bio_hash_remove(req->bio); + + bio->bi_next = req->bio; + req->bio = bio; + /* + * may not be valid. if the low level driver said + * it didn't need a bounce buffer then it better + * not touch req->buffer either... + */ + req->buffer = bio_data(bio); + req->current_nr_sectors = cur_nr_sectors; + req->hard_cur_sectors = cur_nr_sectors; req->sector = req->hard_sector = sector; - req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); - drive_stat_acct(req->rq_dev, req->cmd, count, 0); - attempt_front_merge(q, head, req, max_sectors, max_segments); + req->nr_sectors = req->hard_nr_sectors += nr_sectors; + drive_stat_acct(req->rq_dev, req->cmd, nr_sectors, 0); + attempt_front_merge(q, head, req); goto out; /* @@ -730,14 +931,14 @@ * of the queue */ if (req) - insert_here = &req->queue; + insert_here = &req->queuelist; break; default: printk("elevator returned crap (%d)\n", el_ret); BUG(); } - + /* * Grab a free request from the freelist - if that is empty, check * if we are doing read ahead and abort instead of blocking for @@ -748,107 +949,132 @@ req = freereq; freereq = NULL; } else if ((req = get_request(q, rw)) == NULL) { - spin_unlock_irq(&io_request_lock); - if (rw_ahead) + + spin_unlock_irq(&q->queue_lock); + + /* + * READA bit set + */ + if (bio->bi_rw & RWA_MASK) { + set_bit(BIO_RW_BLOCK, &bio->bi_flags); goto end_io; + } - freereq = __get_request_wait(q, rw); + freereq = get_request_wait(q, rw); goto again; } -/* fill up the request-info, and add it to the queue */ + /* + * fill up the request-info, and add it to the queue + */ req->elevator_sequence = latency; req->cmd = rw; req->errors = 0; req->hard_sector = req->sector = sector; - req->hard_nr_sectors = req->nr_sectors = count; - req->current_nr_sectors = count; - req->nr_segments = 1; /* Always 1 for a new request. */ - req->nr_hw_segments = 1; /* Always 1 for a new request. */ - req->buffer = bh->b_data; + req->hard_nr_sectors = req->nr_sectors = nr_sectors; + req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors; + req->nr_segments = bio->bi_io_vec->bvl_cnt; + req->nr_hw_segments = req->nr_segments; + req->buffer = bio_data(bio); /* see ->buffer comment above */ req->waiting = NULL; - req->bh = bh; - req->bhtail = bh; - req->rq_dev = bh->b_rdev; - blk_started_io(count); + req->bio = req->biotail = bio; + req->rq_dev = bio->bi_dev; add_request(q, req, insert_here); out: - if (freereq) + if (freereq) { + freereq->bio = freereq->biotail = NULL; blkdev_release_request(freereq); - spin_unlock_irq(&io_request_lock); + } + + spin_unlock_irq(&q->queue_lock); + bio_hash_add_unique(bio, req, q->hash_valid_counter); return 0; + end_io: - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + bio->bi_end_io(bio, nr_sectors); return 0; } + +/* + * If bio->bi_dev is a partition, remap the location + */ +static inline void blk_partition_remap(struct bio *bio) +{ + int major, minor, drive, minor0; + struct gendisk *g; + kdev_t dev0; + + major = MAJOR(bio->bi_dev); + if ((g = get_gendisk(bio->bi_dev))) { + minor = MINOR(bio->bi_dev); + drive = (minor >> g->minor_shift); + minor0 = (drive << g->minor_shift); /* whole disk device */ + /* that is, minor0 = (minor & ~((1<minor_shift)-1)); */ + dev0 = MKDEV(major, minor0); + if (dev0 != bio->bi_dev) { + bio->bi_dev = dev0; + bio->bi_sector += g->part[minor].start_sect; + } + /* lots of checks are possible */ + } +} + /** - * generic_make_request: hand a buffer head to it's device driver for I/O - * @rw: READ, WRITE, or READA - what sort of I/O is desired. - * @bh: The buffer head describing the location in memory and on the device. + * generic_make_request: hand a buffer to it's device driver for I/O + * @bio: The bio describing the location in memory and on the device. * * generic_make_request() is used to make I/O requests of block - * devices. It is passed a &struct buffer_head and a &rw value. The - * %READ and %WRITE options are (hopefully) obvious in meaning. The - * %READA value means that a read is required, but that the driver is - * free to fail the request if, for example, it cannot get needed - * resources immediately. + * devices. It is passed a &struct bio, which describes the I/O that needs + * to be done. * * generic_make_request() does not return any status. The * success/failure status of the request, along with notification of - * completion, is delivered asynchronously through the bh->b_end_io + * completion, is delivered asynchronously through the bio->bi_end_io * function described (one day) else where. * - * The caller of generic_make_request must make sure that b_page, - * b_addr, b_size are set to describe the memory buffer, that b_rdev - * and b_rsector are set to describe the device address, and the - * b_end_io and optionally b_private are set to describe how - * completion notification should be signaled. BH_Mapped should also - * be set (to confirm that b_dev and b_blocknr are valid). - * - * generic_make_request and the drivers it calls may use b_reqnext, - * and may change b_rdev and b_rsector. So the values of these fields + * The caller of generic_make_request must make sure that bi_io_vec + * are set to describe the memory buffer, and that bi_dev and bi_sector are + & set to describe the device address, and the + * bi_end_io and optionally bi_private are set to describe how + * completion notification should be signaled. + * + * generic_make_request and the drivers it calls may use bi_next if this + * bio happens to be merged with someone else, and may change bi_dev and + * bi_rsector for remaps as it sees fit. So the values of these fields * should NOT be depended on after the call to generic_make_request. - * Because of this, the caller should record the device address - * information in b_dev and b_blocknr. * - * Apart from those fields mentioned above, no other fields, and in - * particular, no other flags, are changed by generic_make_request or - * any lower level drivers. * */ -void generic_make_request (int rw, struct buffer_head * bh) +void generic_make_request(struct bio *bio) { - int major = MAJOR(bh->b_rdev); - int minorsize = 0; + int major = MAJOR(bio->bi_dev); + int minor = MINOR(bio->bi_dev); request_queue_t *q; + sector_t minorsize = 0; + int nr_sectors = bio_sectors(bio); - if (!bh->b_end_io) - BUG(); - - /* Test device size, when known. */ + /* Test device or partition size, when known. */ if (blk_size[major]) - minorsize = blk_size[major][MINOR(bh->b_rdev)]; + minorsize = blk_size[major][minor]; if (minorsize) { unsigned long maxsector = (minorsize << 1) + 1; - unsigned long sector = bh->b_rsector; - unsigned int count = bh->b_size >> 9; + unsigned long sector = bio->bi_sector; - if (maxsector < count || maxsector - count < sector) { - /* Yecch */ - bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped); - - /* This may well happen - the kernel calls bread() - without checking the size of the device, e.g., - when mounting a device. */ - printk(KERN_INFO - "attempt to access beyond end of device\n"); - printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", - kdevname(bh->b_rdev), rw, - (sector + count)>>1, minorsize); - - /* Yecch again */ - bh->b_end_io(bh, 0); - return; + if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { + if (blk_size[major][minor]) { + + /* This may well happen - the kernel calls + * bread() without checking the size of the + * device, e.g., when mounting a device. */ + printk(KERN_INFO + "attempt to access beyond end of device\n"); + printk(KERN_INFO "%s: rw=%ld, want=%ld, limit=%Lu\n", + kdevname(bio->bi_dev), bio->bi_rw, + (sector + nr_sectors)>>1, + (u64) blk_size[major][minor]); + } + set_bit(BIO_EOF, &bio->bi_flags); + goto end_io; } } @@ -856,63 +1082,127 @@ * Resolve the mapping until finished. (drivers are * still free to implement/resolve their own stacking * by explicitly returning 0) - */ - /* NOTE: we don't repeat the blk_size check for each new device. + * + * NOTE: we don't repeat the blk_size check for each new device. * Stacking drivers are expected to know what they are doing. */ do { - q = blk_get_queue(bh->b_rdev); + q = blk_get_queue(bio->bi_dev); if (!q) { printk(KERN_ERR - "generic_make_request: Trying to access " - "nonexistent block-device %s (%ld)\n", - kdevname(bh->b_rdev), bh->b_rsector); - buffer_IO_error(bh); + "generic_make_request: Trying to access nonexistent block-device %s (%Lu)\n", + kdevname(bio->bi_dev), (u64) bio->bi_sector); +end_io: + bio->bi_end_io(bio, nr_sectors); break; } - } while (q->make_request_fn(q, rw, bh)); + + /* + * uh oh, need to split this bio... not implemented yet + */ + if (bio_sectors(bio) > q->max_sectors) + BUG(); + + /* + * If this device has partitions, remap block n + * of partition p to block n+start(p) of the disk. + */ + blk_partition_remap(bio); + + } while (q->make_request_fn(q, bio)); } +/* + * our default bio end_io callback handler for a buffer_head mapping. + */ +static int end_bio_bh_io_sync(struct bio *bio, int nr_sectors) +{ + struct buffer_head *bh = bio->bi_private; + + BIO_BUG_ON(nr_sectors != (bh->b_size >> 9)); + + /* + * I/O is complete -- remove from hash, end buffer_head, put bio + */ + bio_hash_remove(bio); + + bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags)); + bio_put(bio); + + return 0; +} /** - * submit_bh: submit a buffer_head to the block device later for I/O + * submit_bio: submit a bio to the block device layer for I/O * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) - * @bh: The &struct buffer_head which describes the I/O + * @bio: The &struct bio which describes the I/O * - * submit_bh() is very similar in purpose to generic_make_request(), and - * uses that function to do most of the work. + * submit_bio() is very similar in purpose to generic_make_request(), and + * uses that function to do most of the work. Both are fairly rough + * interfaces, @bio must be presetup and ready for I/O. * - * The extra functionality provided by submit_bh is to determine - * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev. - * This is is appropriate for IO requests that come from the buffer - * cache and page cache which (currently) always use aligned blocks. */ -void submit_bh(int rw, struct buffer_head * bh) +int submit_bio(int rw, struct bio *bio) +{ + int count = bio_sectors(bio); + + /* + * do some validity checks... + */ + BUG_ON(!bio->bi_end_io); + + BIO_BUG_ON(bio_offset(bio) > PAGE_SIZE); + BIO_BUG_ON(!bio_size(bio)); + BIO_BUG_ON(!bio->bi_io_vec); + + bio->bi_rw = rw; + + if (rw & WRITE) + kstat.pgpgout += count; + else + kstat.pgpgin += count; + + generic_make_request(bio); + return 1; +} + +/** + * submit_bh: submit a buffer_head to the block device layer for I/O + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bh: The &struct buffer_head which describes the I/O + * + **/ +int submit_bh(int rw, struct buffer_head * bh) { - int count = bh->b_size >> 9; + struct bio *bio; - if (!test_bit(BH_Lock, &bh->b_state)) - BUG(); + BUG_ON(!test_bit(BH_Lock, &bh->b_state)); + BUG_ON(!buffer_mapped(bh)); + BUG_ON(!bh->b_end_io); set_bit(BH_Req, &bh->b_state); /* - * First step, 'identity mapping' - RAID or LVM might - * further remap this. + * from here on down, it's all bio -- do the initial mapping, + * submit_bio -> generic_make_request may further map this bio around */ - bh->b_rdev = bh->b_dev; - bh->b_rsector = bh->b_blocknr * count; + bio = bio_alloc(GFP_NOIO, 1); - generic_make_request(rw, bh); + bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_next = NULL; + bio->bi_dev = bh->b_dev; + bio->bi_private = bh; + bio->bi_end_io = end_bio_bh_io_sync; + + bio->bi_io_vec->bvl_vec[0].bv_page = bh->b_page; + bio->bi_io_vec->bvl_vec[0].bv_len = bh->b_size; + bio->bi_io_vec->bvl_vec[0].bv_offset = bh_offset(bh); + + bio->bi_io_vec->bvl_cnt = 1; + bio->bi_io_vec->bvl_idx = 0; + bio->bi_io_vec->bvl_size = bh->b_size; - switch (rw) { - case WRITE: - kstat.pgpgout += count; - break; - default: - kstat.pgpgin += count; - break; - } + return submit_bio(rw, bio); } /** @@ -944,8 +1234,9 @@ * * Caveat: * All of the buffers must be for the same device, and must also be - * of the current approved size for the device. */ - + * a multiple of the current approved size for the device. + * + **/ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) { unsigned int major; @@ -963,7 +1254,7 @@ /* Verify requested block sizes. */ for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (bh->b_size % correct_size) { + if (bh->b_size & (correct_size - 1)) { printk(KERN_NOTICE "ll_rw_block: device %s: " "only %d-char blocks implemented (%u)\n", kdevname(bhs[0]->b_dev), @@ -1024,12 +1315,11 @@ extern int stram_device_init (void); #endif - /** * end_that_request_first - end I/O on one buffer. + * &q: queue that finished request * @req: the request being processed * @uptodate: 0 for I/O error - * @name: the name printed for an I/O error * * Description: * Ends I/O on the first buffer attached to @req, and sets it up @@ -1038,40 +1328,52 @@ * Return: * 0 - we are done with this request, call end_that_request_last() * 1 - still buffers pending for this request - * - * Caveat: - * Drivers implementing their own end_request handling must call - * blk_finished_io() appropriately. **/ -int end_that_request_first (struct request *req, int uptodate, char *name) +int end_that_request_first(struct request *req, int uptodate, int nr_sectors) { - struct buffer_head * bh; + struct bio *bio, *nxt; int nsect; req->errors = 0; if (!uptodate) - printk("end_request: I/O error, dev %s (%s), sector %lu\n", - kdevname(req->rq_dev), name, req->sector); + printk("end_request: I/O error, dev %s, sector %lu\n", + kdevname(req->rq_dev), req->sector); + + if ((bio = req->bio) != NULL) { +next_chunk: + nsect = bio_iovec(bio)->bv_len >> 9; + + nr_sectors -= nsect; + + nxt = bio->bi_next; + bio->bi_next = NULL; + if (!bio_endio(bio, uptodate, nsect)) + req->bio = nxt; + else + bio->bi_next = nxt; - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; - blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { + if ((bio = req->bio) != NULL) { + bio_hash_add_unique(bio,req,req->q->hash_valid_counter); req->hard_sector += nsect; req->hard_nr_sectors -= nsect; req->sector = req->hard_sector; req->nr_sectors = req->hard_nr_sectors; - req->current_nr_sectors = bh->b_size >> 9; + req->current_nr_sectors = bio_iovec(bio)->bv_len >> 9; + req->hard_cur_sectors = req->current_nr_sectors; if (req->nr_sectors < req->current_nr_sectors) { - req->nr_sectors = req->current_nr_sectors; printk("end_request: buffer-list destroyed\n"); + req->nr_sectors = req->current_nr_sectors; } - req->buffer = bh->b_data; + + req->buffer = bio_data(bio); + /* + * end more in this run, or just return 'not-done' + */ + if (nr_sectors > 0) + goto next_chunk; + return 1; } } @@ -1080,7 +1382,7 @@ void end_that_request_last(struct request *req) { - if (req->waiting != NULL) + if (req->waiting) complete(req->waiting); blkdev_release_request(req); @@ -1105,7 +1407,6 @@ memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); - memset(max_sectors, 0, sizeof(max_sectors)); total_ram = nr_free_pages() << (PAGE_SHIFT - 10); @@ -1115,129 +1416,46 @@ */ queue_nr_requests = 64; if (total_ram > MB(32)) - queue_nr_requests = 128; + queue_nr_requests = 256; /* * Batch frees according to queue length */ - batch_requests = queue_nr_requests/4; + if ((batch_requests = queue_nr_requests / 4) > 32) + batch_requests = 32; printk("block: %d slots per queue, batch=%d\n", queue_nr_requests, batch_requests); -#ifdef CONFIG_AMIGA_Z2RAM - z2_init(); -#endif -#ifdef CONFIG_STRAM_SWAP - stram_device_init(); -#endif -#ifdef CONFIG_BLK_DEV_RAM - rd_init(); -#endif -#ifdef CONFIG_ISP16_CDI - isp16_init(); -#endif + blk_max_low_pfn = max_low_pfn; + blk_max_pfn = max_pfn; + #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE) ide_init(); /* this MUST precede hd_init */ #endif #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD) hd_init(); #endif -#ifdef CONFIG_BLK_DEV_PS2 - ps2esdi_init(); -#endif -#ifdef CONFIG_BLK_DEV_XD - xd_init(); -#endif -#ifdef CONFIG_BLK_DEV_MFM - mfm_init(); -#endif -#ifdef CONFIG_PARIDE - { extern void paride_init(void); paride_init(); }; -#endif -#ifdef CONFIG_MAC_FLOPPY - swim3_init(); -#endif -#ifdef CONFIG_BLK_DEV_SWIM_IOP - swimiop_init(); -#endif -#ifdef CONFIG_AMIGA_FLOPPY - amiga_floppy_init(); -#endif -#ifdef CONFIG_ATARI_FLOPPY - atari_floppy_init(); -#endif -#ifdef CONFIG_BLK_DEV_FD - floppy_init(); -#else #if defined(__i386__) /* Do we even need this? */ outb_p(0xc, 0x3f2); #endif -#endif -#ifdef CONFIG_CDU31A - cdu31a_init(); -#endif -#ifdef CONFIG_ATARI_ACSI - acsi_init(); -#endif -#ifdef CONFIG_MCD - mcd_init(); -#endif -#ifdef CONFIG_MCDX - mcdx_init(); -#endif -#ifdef CONFIG_SBPCD - sbpcd_init(); -#endif -#ifdef CONFIG_AZTCD - aztcd_init(); -#endif -#ifdef CONFIG_CDU535 - sony535_init(); -#endif -#ifdef CONFIG_GSCD - gscd_init(); -#endif -#ifdef CONFIG_CM206 - cm206_init(); -#endif -#ifdef CONFIG_OPTCD - optcd_init(); -#endif -#ifdef CONFIG_SJCD - sjcd_init(); -#endif -#ifdef CONFIG_APBLOCK - ap_init(); -#endif -#ifdef CONFIG_DDV - ddv_init(); -#endif -#ifdef CONFIG_MDISK - mdisk_init(); -#endif -#ifdef CONFIG_DASD - dasd_init(); -#endif -#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK) - tapeblock_init(); -#endif -#ifdef CONFIG_BLK_DEV_XPRAM - xpram_init(); -#endif -#ifdef CONFIG_SUN_JSFLASH - jsfd_init(); -#endif return 0; }; -EXPORT_SYMBOL(io_request_lock); EXPORT_SYMBOL(end_that_request_first); EXPORT_SYMBOL(end_that_request_last); EXPORT_SYMBOL(blk_init_queue); EXPORT_SYMBOL(blk_get_queue); EXPORT_SYMBOL(blk_cleanup_queue); -EXPORT_SYMBOL(blk_queue_headactive); EXPORT_SYMBOL(blk_queue_make_request); +EXPORT_SYMBOL(blk_queue_bounce_limit); EXPORT_SYMBOL(generic_make_request); EXPORT_SYMBOL(blkdev_release_request); EXPORT_SYMBOL(generic_unplug_device); +EXPORT_SYMBOL(blk_attempt_remerge); +EXPORT_SYMBOL(blk_max_low_pfn); +EXPORT_SYMBOL(blk_queue_max_sectors); +EXPORT_SYMBOL(blk_queue_max_segments); +EXPORT_SYMBOL(blk_queue_max_segment_size); +EXPORT_SYMBOL(blk_queue_hardsect_size); +EXPORT_SYMBOL(blk_rq_map_sg); +EXPORT_SYMBOL(blk_nohighio); diff -u --recursive --new-file v2.5.0/linux/drivers/block/loop.c linux/drivers/block/loop.c --- v2.5.0/linux/drivers/block/loop.c Mon Nov 19 14:48:02 2001 +++ linux/drivers/block/loop.c Tue Nov 27 09:23:27 2001 @@ -168,8 +168,7 @@ lo->lo_device); } -static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ struct address_space *mapping = file->f_dentry->d_inode->i_mapping; @@ -183,8 +182,8 @@ down(&mapping->host->i_sem); index = pos >> PAGE_CACHE_SHIFT; offset = pos & (PAGE_CACHE_SIZE - 1); - len = bh->b_size; - data = bh->b_data; + len = bio_size(bio); + data = bio_data(bio); while (len > 0) { int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize; int transfer_result; @@ -263,18 +262,17 @@ return size; } -static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { struct lo_read_data cookie; read_descriptor_t desc; struct file *file; cookie.lo = lo; - cookie.data = bh->b_data; + cookie.data = bio_data(bio); cookie.bsize = bsize; desc.written = 0; - desc.count = bh->b_size; + desc.count = bio_size(bio); desc.buf = (char*)&cookie; desc.error = 0; spin_lock_irq(&lo->lo_lock); @@ -310,46 +308,46 @@ return IV; } -static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw) +static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) { loff_t pos; int ret; - pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset; + pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; - if (rw == WRITE) - ret = lo_send(lo, bh, loop_get_bs(lo), pos); + if (bio_rw(bio) == WRITE) + ret = lo_send(lo, bio, loop_get_bs(lo), pos); else - ret = lo_receive(lo, bh, loop_get_bs(lo), pos); + ret = lo_receive(lo, bio, loop_get_bs(lo), pos); return ret; } -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate); -static void loop_put_buffer(struct buffer_head *bh) +static int loop_end_io_transfer(struct bio *, int); +static void loop_put_buffer(struct bio *bio) { /* - * check b_end_io, may just be a remapped bh and not an allocated one + * check bi_end_io, may just be a remapped bio */ - if (bh && bh->b_end_io == loop_end_io_transfer) { - __free_page(bh->b_page); - kmem_cache_free(bh_cachep, bh); + if (bio && bio->bi_end_io == loop_end_io_transfer) { + __free_page(bio_page(bio)); + bio_put(bio); } } /* - * Add buffer_head to back of pending list + * Add bio to back of pending list */ -static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh) +static void loop_add_bio(struct loop_device *lo, struct bio *bio) { unsigned long flags; spin_lock_irqsave(&lo->lo_lock, flags); - if (lo->lo_bhtail) { - lo->lo_bhtail->b_reqnext = bh; - lo->lo_bhtail = bh; + if (lo->lo_biotail) { + lo->lo_biotail->bi_next = bio; + lo->lo_biotail = bio; } else - lo->lo_bh = lo->lo_bhtail = bh; + lo->lo_bio = lo->lo_biotail = bio; spin_unlock_irqrestore(&lo->lo_lock, flags); up(&lo->lo_bh_mutex); @@ -358,70 +356,60 @@ /* * Grab first pending buffer */ -static struct buffer_head *loop_get_bh(struct loop_device *lo) +static struct bio *loop_get_bio(struct loop_device *lo) { - struct buffer_head *bh; + struct bio *bio; spin_lock_irq(&lo->lo_lock); - if ((bh = lo->lo_bh)) { - if (bh == lo->lo_bhtail) - lo->lo_bhtail = NULL; - lo->lo_bh = bh->b_reqnext; - bh->b_reqnext = NULL; + if ((bio = lo->lo_bio)) { + if (bio == lo->lo_biotail) + lo->lo_biotail = NULL; + lo->lo_bio = bio->bi_next; + bio->bi_next = NULL; } spin_unlock_irq(&lo->lo_lock); - return bh; + return bio; } /* - * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE - * and lo->transfer stuff has already been done. if not, it was a READ - * so queue it for the loop thread and let it do the transfer out of - * b_end_io context (we don't want to do decrypt of a page with irqs + * if this was a WRITE lo->transfer stuff has already been done. for READs, + * queue it for the loop thread and let it do the transfer out of + * bi_end_io context (we don't want to do decrypt of a page with irqs * disabled) */ -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate) +static int loop_end_io_transfer(struct bio *bio, int nr_sectors) { - struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)]; + struct loop_device *lo = &loop_dev[MINOR(bio->bi_dev)]; + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) { - struct buffer_head *rbh = bh->b_private; + if (!uptodate || bio_rw(bio) == WRITE) { + struct bio *rbh = bio->bi_private; - rbh->b_end_io(rbh, uptodate); + bio_endio(rbh, uptodate, nr_sectors); if (atomic_dec_and_test(&lo->lo_pending)) up(&lo->lo_bh_mutex); - loop_put_buffer(bh); + loop_put_buffer(bio); } else - loop_add_bh(lo, bh); + loop_add_bio(lo, bio); + + return 0; } -static struct buffer_head *loop_get_buffer(struct loop_device *lo, - struct buffer_head *rbh) +static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh) { - struct buffer_head *bh; + struct page *page; + struct bio *bio; /* * for xfer_funcs that can operate on the same bh, do that */ if (lo->lo_flags & LO_FLAGS_BH_REMAP) { - bh = rbh; + bio = rbh; goto out_bh; } - do { - bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO); - if (bh) - break; - - run_task_queue(&tq_disk); - schedule_timeout(HZ); - } while (1); - memset(bh, 0, sizeof(*bh)); - - bh->b_size = rbh->b_size; - bh->b_dev = rbh->b_rdev; - bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock); + bio = bio_alloc(GFP_NOIO, 1); /* * easy way out, although it does waste some memory for < PAGE_SIZE @@ -429,41 +417,46 @@ * so can we :-) */ do { - bh->b_page = alloc_page(GFP_NOIO); - if (bh->b_page) + page = alloc_page(GFP_NOIO); + if (page) break; run_task_queue(&tq_disk); schedule_timeout(HZ); } while (1); - bh->b_data = page_address(bh->b_page); - bh->b_end_io = loop_end_io_transfer; - bh->b_private = rbh; - init_waitqueue_head(&bh->b_wait); + bio->bi_io_vec->bvl_vec[0].bv_page = page; + bio->bi_io_vec->bvl_vec[0].bv_len = bio_size(rbh); + bio->bi_io_vec->bvl_vec[0].bv_offset = bio_offset(rbh); + + bio->bi_io_vec->bvl_cnt = 1; + bio->bi_io_vec->bvl_idx = 1; + bio->bi_io_vec->bvl_size = bio_size(rbh); + + bio->bi_end_io = loop_end_io_transfer; + bio->bi_private = rbh; out_bh: - bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9); + bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9); + bio->bi_rw = rbh->bi_rw; spin_lock_irq(&lo->lo_lock); - bh->b_rdev = lo->lo_device; + bio->bi_dev = lo->lo_device; spin_unlock_irq(&lo->lo_lock); - return bh; + return bio; } -static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh) +static int loop_make_request(request_queue_t *q, struct bio *rbh) { - struct buffer_head *bh = NULL; + struct bio *bh = NULL; struct loop_device *lo; unsigned long IV; + int rw = bio_rw(rbh); - if (!buffer_locked(rbh)) - BUG(); - - if (MINOR(rbh->b_rdev) >= max_loop) + if (MINOR(rbh->bi_dev) >= max_loop) goto out; - lo = &loop_dev[MINOR(rbh->b_rdev)]; + lo = &loop_dev[MINOR(rbh->bi_dev)]; spin_lock_irq(&lo->lo_lock); if (lo->lo_state != Lo_bound) goto inactive; @@ -476,25 +469,17 @@ } else if (rw == READA) { rw = READ; } else if (rw != READ) { - printk(KERN_ERR "loop: unknown command (%d)\n", rw); + printk(KERN_ERR "loop: unknown command (%x)\n", rw); goto err; } -#if CONFIG_HIGHMEM - rbh = create_bounce(rw, rbh); -#endif + blk_queue_bounce(q, &rbh); /* * file backed, queue for loop_thread to handle */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - /* - * rbh locked at this point, noone else should clear - * the dirty flag - */ - if (rw == WRITE) - set_bit(BH_Dirty, &rbh->b_state); - loop_add_bh(lo, rbh); + loop_add_bio(lo, rbh); return 0; } @@ -502,15 +487,14 @@ * piggy old buffer on original, and submit for I/O */ bh = loop_get_buffer(lo, rbh); - IV = loop_get_iv(lo, rbh->b_rsector); + IV = loop_get_iv(lo, rbh->bi_sector); if (rw == WRITE) { - set_bit(BH_Dirty, &bh->b_state); - if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data, - bh->b_size, IV)) + if (lo_do_transfer(lo, WRITE, bio_data(bh), bio_data(rbh), + bio_size(bh), IV)) goto err; } - generic_make_request(rw, bh); + generic_make_request(bh); return 0; err: @@ -518,14 +502,14 @@ up(&lo->lo_bh_mutex); loop_put_buffer(bh); out: - buffer_IO_error(rbh); + bio_io_error(rbh); return 0; inactive: spin_unlock_irq(&lo->lo_lock); goto out; } -static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh) +static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) { int ret; @@ -533,19 +517,17 @@ * For block backed loop, we know this is a READ */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state); - - ret = do_bh_filebacked(lo, bh, rw); - bh->b_end_io(bh, !ret); + ret = do_bio_filebacked(lo, bio); + bio_endio(bio, !ret, bio_sectors(bio)); } else { - struct buffer_head *rbh = bh->b_private; - unsigned long IV = loop_get_iv(lo, rbh->b_rsector); + struct bio *rbh = bio->bi_private; + unsigned long IV = loop_get_iv(lo, rbh->bi_sector); - ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data, - bh->b_size, IV); + ret = lo_do_transfer(lo, READ, bio_data(bio), bio_data(rbh), + bio_size(bio), IV); - rbh->b_end_io(rbh, !ret); - loop_put_buffer(bh); + bio_endio(rbh, !ret, bio_sectors(bio)); + loop_put_buffer(bio); } } @@ -558,7 +540,7 @@ static int loop_thread(void *data) { struct loop_device *lo = data; - struct buffer_head *bh; + struct bio *bio; daemonize(); exit_files(current); @@ -592,12 +574,12 @@ if (!atomic_read(&lo->lo_pending)) break; - bh = loop_get_bh(lo); - if (!bh) { - printk("loop: missing bh\n"); + bio = loop_get_bio(lo); + if (!bio) { + printk("loop: missing bio\n"); continue; } - loop_handle_bh(lo, bh); + loop_handle_bio(lo, bio); /* * upped both for pending work and tear-down, lo_pending @@ -683,7 +665,7 @@ set_blocksize(dev, bs); - lo->lo_bh = lo->lo_bhtail = NULL; + lo->lo_bio = lo->lo_biotail = NULL; kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); down(&lo->lo_sem); @@ -873,7 +855,7 @@ err = -ENXIO; break; } - err = put_user((unsigned long)loop_sizes[lo->lo_number] << 1, (unsigned long *) arg); + err = put_user((unsigned long) loop_sizes[lo->lo_number] << 1, (unsigned long *) arg); break; case BLKGETSIZE64: if (lo->lo_state != Lo_bound) { @@ -1019,11 +1001,11 @@ loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_sizes) - goto out_sizes; + goto out_mem; loop_blksizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_blksizes) - goto out_blksizes; + goto out_mem; blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request); @@ -1047,9 +1029,8 @@ printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop); return 0; -out_sizes: +out_mem: kfree(loop_dev); -out_blksizes: kfree(loop_sizes); printk(KERN_ERR "loop: ran out of memory\n"); return -ENOMEM; diff -u --recursive --new-file v2.5.0/linux/drivers/block/nbd.c linux/drivers/block/nbd.c --- v2.5.0/linux/drivers/block/nbd.c Fri Oct 26 15:39:02 2001 +++ linux/drivers/block/nbd.c Tue Nov 27 09:23:27 2001 @@ -165,14 +165,14 @@ FAIL("Sendmsg failed for control."); if (req->cmd == WRITE) { - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; DEBUG("data, "); do { - result = nbd_xmit(1, sock, bh->b_data, bh->b_size, bh->b_reqnext == NULL ? 0 : MSG_MORE); + result = nbd_xmit(1, sock, bio_data(bio), bio_size(bio), bio->bi_next == NULL ? 0 : MSG_MORE); if (result <= 0) FAIL("Send data failed."); - bh = bh->b_reqnext; - } while(bh); + bio = bio->bi_next; + } while(bio); } return; @@ -205,14 +205,14 @@ if (ntohl(reply.error)) FAIL("Other side returned error."); if (req->cmd == READ) { - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; DEBUG("data, "); do { - result = nbd_xmit(0, lo->sock, bh->b_data, bh->b_size, MSG_WAITALL); + result = nbd_xmit(0, lo->sock, bio_data(bio), bio_size(bio), MSG_WAITALL); if (result <= 0) HARDFAIL("Recv data failed."); - bh = bh->b_reqnext; - } while(bh); + bio = bio->bi_next; + } while(bio); } DEBUG("done.\n"); return req; @@ -250,7 +250,7 @@ goto out; } #endif - list_del(&req->queue); + blkdev_dequeue_request(req); up (&lo->queue_lock); nbd_end_request(req); @@ -285,7 +285,7 @@ } #endif req->errors++; - list_del(&req->queue); + blkdev_dequeue_request(req); up(&lo->queue_lock); nbd_end_request(req); @@ -333,22 +333,22 @@ #endif req->errors = 0; blkdev_dequeue_request(req); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); down (&lo->queue_lock); - list_add(&req->queue, &lo->queue_head); + list_add(&req->queuelist, &lo->queue_head); nbd_send_req(lo->sock, req); /* Why does this block? */ up (&lo->queue_lock); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); continue; error_out: req->errors++; blkdev_dequeue_request(req); - spin_unlock(&io_request_lock); + spin_unlock(&q->queue_lock); nbd_end_request(req); - spin_lock(&io_request_lock); + spin_lock(&q->queue_lock); } return; } @@ -501,7 +501,7 @@ #endif blksize_size[MAJOR_NR] = nbd_blksizes; blk_size[MAJOR_NR] = nbd_sizes; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request, "nbd"); blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); for (i = 0; i < MAX_NBD; i++) { nbd_dev[i].refcnt = 0; diff -u --recursive --new-file v2.5.0/linux/drivers/block/paride/pd.c linux/drivers/block/paride/pd.c --- v2.5.0/linux/drivers/block/paride/pd.c Fri Nov 9 13:58:03 2001 +++ linux/drivers/block/paride/pd.c Tue Nov 27 09:23:27 2001 @@ -287,7 +287,6 @@ static struct hd_struct pd_hd[PD_DEVS]; static int pd_sizes[PD_DEVS]; static int pd_blocksizes[PD_DEVS]; -static int pd_maxsectors[PD_DEVS]; #define PD_NAMELEN 8 @@ -330,7 +329,6 @@ static int pd_cmd; /* current command READ/WRITE */ static int pd_unit; /* unit of current request */ static int pd_dev; /* minor of current request */ -static int pd_poffs; /* partition offset of current minor */ static char * pd_buf; /* buffer for request in progress */ static DECLARE_WAIT_QUEUE_HEAD(pd_wait_open); @@ -397,6 +395,7 @@ } q = BLK_DEFAULT_QUEUE(MAJOR_NR); blk_init_queue(q, DEVICE_REQUEST); + blk_queue_max_sectors(q, cluster); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read ahead */ pd_gendisk.major = major; @@ -406,9 +405,6 @@ for(i=0;ii_rdev)) return -EINVAL; - dev = MINOR(inode->i_rdev); + if (!inode || !inode->i_rdev) + return -EINVAL; unit = DEVICE_NR(inode->i_rdev); - if (dev >= PD_DEVS) return -EINVAL; - if (!PD.present) return -ENODEV; + if (!PD.present) + return -ENODEV; - switch (cmd) { + switch (cmd) { case CDROMEJECT: if (PD.access == 1) pd_eject(unit); return 0; - case HDIO_GETGEO: - if (!geo) return -EINVAL; - err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); - if (err) return err; + case HDIO_GETGEO: + if (!geo) return -EINVAL; + err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); + if (err) return err; if (PD.alt_geom) { - put_user(PD.capacity/(PD_LOG_HEADS*PD_LOG_SECTS), + put_user(PD.capacity/(PD_LOG_HEADS*PD_LOG_SECTS), (short *) &geo->cylinders); - put_user(PD_LOG_HEADS, (char *) &geo->heads); - put_user(PD_LOG_SECTS, (char *) &geo->sectors); + put_user(PD_LOG_HEADS, (char *) &geo->heads); + put_user(PD_LOG_SECTS, (char *) &geo->sectors); } else { - put_user(PD.cylinders, (short *) &geo->cylinders); - put_user(PD.heads, (char *) &geo->heads); - put_user(PD.sectors, (char *) &geo->sectors); + put_user(PD.cylinders, (short *) &geo->cylinders); + put_user(PD.heads, (char *) &geo->heads); + put_user(PD.sectors, (char *) &geo->sectors); } - put_user(pd_hd[dev].start_sect,(long *)&geo->start); - return 0; - case BLKRRPART: + put_user(get_start_sect(inode->i_rdev), (long *)&geo->start); + return 0; + case BLKRRPART: if (!capable(CAP_SYS_ADMIN)) return -EACCES; - return pd_revalidate(inode->i_rdev); + return pd_revalidate(inode->i_rdev); case BLKGETSIZE: case BLKGETSIZE64: case BLKROSET: @@ -488,9 +484,9 @@ case BLKFLSBUF: case BLKPG: return blk_ioctl(inode->i_rdev, cmd, arg); - default: - return -EINVAL; - } + default: + return -EINVAL; + } } static int pd_release (struct inode *inode, struct file *file) @@ -526,36 +522,32 @@ } static int pd_revalidate(kdev_t dev) +{ + int unit, res; + long flags; -{ int p, unit, minor; - long flags; - - unit = DEVICE_NR(dev); - if ((unit >= PD_UNITS) || (!PD.present)) return -ENODEV; - - save_flags(flags); - cli(); - if (PD.access > 1) { - restore_flags(flags); - return -EBUSY; - } - pd_valid = 0; - restore_flags(flags); + unit = DEVICE_NR(dev); + if ((unit >= PD_UNITS) || !PD.present) + return -ENODEV; - for (p=(PD_PARTNS-1);p>=0;p--) { - minor = p + unit*PD_PARTNS; - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - pd_hd[minor].start_sect = 0; - pd_hd[minor].nr_sects = 0; - } + save_flags(flags); + cli(); + if (PD.access > 1) { + restore_flags(flags); + return -EBUSY; + } + pd_valid = 0; + restore_flags(flags); - if (pd_identify(unit)) - grok_partitions(&pd_gendisk,unit,1<cmd; - pd_poffs = pd_hd[pd_dev].start_sect; - pd_block += pd_poffs; pd_buf = CURRENT->buffer; pd_retries = 0; @@ -902,7 +890,7 @@ (CURRENT->cmd != pd_cmd) || (MINOR(CURRENT->rq_dev) != pd_dev) || (CURRENT->rq_status == RQ_INACTIVE) || - (CURRENT->sector+pd_poffs != pd_block)) + (CURRENT->sector != pd_block)) printk("%s: OUCH: request list changed unexpectedly\n", PD.name); diff -u --recursive --new-file v2.5.0/linux/drivers/block/paride/pf.c linux/drivers/block/paride/pf.c --- v2.5.0/linux/drivers/block/paride/pf.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/block/paride/pf.c Tue Nov 27 09:23:27 2001 @@ -458,7 +458,7 @@ if (PF.access == 1) { pf_eject(unit); return 0; - } + } case HDIO_GETGEO: if (!geo) return -EINVAL; err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); diff -u --recursive --new-file v2.5.0/linux/drivers/block/ps2esdi.c linux/drivers/block/ps2esdi.c --- v2.5.0/linux/drivers/block/ps2esdi.c Fri Nov 9 14:01:21 2001 +++ linux/drivers/block/ps2esdi.c Tue Nov 27 09:23:27 2001 @@ -66,6 +66,7 @@ #define TYPE_0_CMD_BLK_LENGTH 2 #define TYPE_1_CMD_BLK_LENGTH 4 +#define PS2ESDI_LOCK (&((BLK_DEFAULT_QUEUE(MAJOR_NR))->queue_lock)) static void reset_ctrl(void); @@ -118,7 +119,6 @@ static char ps2esdi_valid[MAX_HD]; static int ps2esdi_sizes[MAX_HD << 6]; static int ps2esdi_blocksizes[MAX_HD << 6]; -static int ps2esdi_maxsect[MAX_HD << 6]; static int ps2esdi_drives; static struct hd_struct ps2esdi[MAX_HD << 6]; static u_short io_base; @@ -221,8 +221,7 @@ } void -cleanup_module(void) -{ +cleanup_module(void) { if(ps2esdi_slot) { mca_mark_as_unused(ps2esdi_slot); mca_set_adapter_procfn(ps2esdi_slot, NULL, NULL); @@ -231,8 +230,9 @@ free_dma(dma_arb_level); free_irq(PS2ESDI_IRQ, NULL); devfs_unregister_blkdev(MAJOR_NR, "ed"); - del_gendisk(&ps2esdi_gendisk); blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); + del_gendisk(&ps2esdi_gendisk); + blk_clear(MAJOR_NR); } #endif /* MODULE */ @@ -415,16 +415,13 @@ ps2esdi_gendisk.nr_real = ps2esdi_drives; - /* 128 was old default, maybe maxsect=255 is ok too? - Paul G. */ - for (i = 0; i < (MAX_HD << 6); i++) { - ps2esdi_maxsect[i] = 128; + for (i = 0; i < (MAX_HD << 6); i++) ps2esdi_blocksizes[i] = 1024; - } request_dma(dma_arb_level, "ed"); request_region(io_base, 4, "ed"); blksize_size[MAJOR_NR] = ps2esdi_blocksizes; - max_sectors[MAJOR_NR] = ps2esdi_maxsect; + blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 128); for (i = 0; i < ps2esdi_drives; i++) { register_disk(&ps2esdi_gendisk,MKDEV(MAJOR_NR,i<<6),1<<6, @@ -495,13 +492,9 @@ CURRENT->current_nr_sectors); #endif - - block = CURRENT->sector + ps2esdi[MINOR(CURRENT->rq_dev)].start_sect; - -#if 0 - printk("%s: blocknumber : %d\n", DEVICE_NAME, block); -#endif + block = CURRENT->sector; count = CURRENT->current_nr_sectors; + switch (CURRENT->cmd) { case READ: ps2esdi_readwrite(READ, CURRENT_DEV, block, count); @@ -958,10 +951,10 @@ break; } if(ending != -1) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(PS2ESDI_LOCK, flags); end_request(ending); do_ps2esdi_request(BLK_DEFAULT_QUEUE(MAJOR_NR)); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(PS2ESDI_LOCK, flags); } } /* handle interrupts */ @@ -1100,10 +1093,10 @@ put_user(ps2esdi_info[dev].head, (char *) &geometry->heads); put_user(ps2esdi_info[dev].sect, (char *) &geometry->sectors); put_user(ps2esdi_info[dev].cyl, (short *) &geometry->cylinders); - put_user(ps2esdi[MINOR(inode->i_rdev)].start_sect, + put_user(get_start_sect(inode->i_rdev), (long *) &geometry->start); - return (0); + return 0; } break; @@ -1132,8 +1125,7 @@ static int ps2esdi_reread_partitions(kdev_t dev) { int target = DEVICE_NR(dev); - int start = target << ps2esdi_gendisk.minor_shift; - int partition; + int res; cli(); ps2esdi_valid[target] = (access_count[target] != 1); @@ -1141,21 +1133,16 @@ if (ps2esdi_valid[target]) return (-EBUSY); - for (partition = ps2esdi_gendisk.max_p - 1; - partition >= 0; partition--) { - int minor = (start | partition); - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - ps2esdi_gendisk.part[minor].start_sect = 0; - ps2esdi_gendisk.part[minor].nr_sects = 0; - } - - grok_partitions(&ps2esdi_gendisk, target, 1<<6, - ps2esdi_info[target].head * ps2esdi_info[target].cyl * ps2esdi_info[target].sect); - + res = wipe_partitions(dev); + if (res == 0) + grok_partitions(dev, ps2esdi_info[target].head + * ps2esdi_info[target].cyl + * ps2esdi_info[target].sect); + ps2esdi_valid[target] = 1; wake_up(&ps2esdi_wait_open); - return (0); + return (res); } static void ps2esdi_reset_timer(unsigned long unused) diff -u --recursive --new-file v2.5.0/linux/drivers/block/rd.c linux/drivers/block/rd.c --- v2.5.0/linux/drivers/block/rd.c Fri Nov 9 14:15:00 2001 +++ linux/drivers/block/rd.c Tue Nov 27 09:23:27 2001 @@ -98,7 +98,7 @@ static unsigned long rd_length[NUM_RAMDISKS]; /* Size of RAM disks in bytes */ static int rd_hardsec[NUM_RAMDISKS]; /* Size of real blocks in bytes */ static int rd_blocksizes[NUM_RAMDISKS]; /* Size of 1024 byte blocks :) */ -static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ +static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ static devfs_handle_t devfs_handle; static struct block_device *rd_bdev[NUM_RAMDISKS];/* Protected device data */ @@ -227,19 +227,18 @@ commit_write: ramdisk_commit_write, }; -static int rd_blkdev_pagecache_IO(int rw, struct buffer_head * sbh, int minor) +static int rd_blkdev_pagecache_IO(int rw, struct bio *sbh, int minor) { struct address_space * mapping; unsigned long index; int offset, size, err; err = -EIO; - err = 0; mapping = rd_bdev[minor]->bd_inode->i_mapping; - index = sbh->b_rsector >> (PAGE_CACHE_SHIFT - 9); - offset = (sbh->b_rsector << 9) & ~PAGE_CACHE_MASK; - size = sbh->b_size; + index = sbh->bi_sector >> (PAGE_CACHE_SHIFT - 9); + offset = (sbh->bi_sector << 9) & ~PAGE_CACHE_MASK; + size = bio_size(sbh); do { int count; @@ -276,18 +275,18 @@ if (rw == READ) { src = kmap(page); src += offset; - dst = bh_kmap(sbh); + dst = bio_kmap(sbh); } else { dst = kmap(page); dst += offset; - src = bh_kmap(sbh); + src = bio_kmap(sbh); } offset = 0; memcpy(dst, src, count); kunmap(page); - bh_kunmap(sbh); + bio_kunmap(sbh); if (rw == READ) { flush_dcache_page(page); @@ -311,19 +310,19 @@ * 19-JAN-1998 Richard Gooch Added devfs support * */ -static int rd_make_request(request_queue_t * q, int rw, struct buffer_head *sbh) +static int rd_make_request(request_queue_t * q, struct bio *sbh) { unsigned int minor; unsigned long offset, len; + int rw = sbh->bi_rw; - minor = MINOR(sbh->b_rdev); + minor = MINOR(sbh->bi_dev); if (minor >= NUM_RAMDISKS) goto fail; - - offset = sbh->b_rsector << 9; - len = sbh->b_size; + offset = sbh->bi_sector << 9; + len = bio_size(sbh); if ((offset + len) > rd_length[minor]) goto fail; @@ -338,10 +337,11 @@ if (rd_blkdev_pagecache_IO(rw, sbh, minor)) goto fail; - sbh->b_end_io(sbh,1); + set_bit(BIO_UPTODATE, &sbh->bi_flags); + sbh->bi_end_io(sbh, len >> 9); return 0; fail: - sbh->b_end_io(sbh,0); + bio_io_error(sbh); return 0; } @@ -477,9 +477,7 @@ devfs_unregister (devfs_handle); unregister_blkdev( MAJOR_NR, "ramdisk" ); - hardsect_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; + blk_clear(MAJOR_NR); } #endif @@ -524,7 +522,6 @@ register_disk(NULL, MKDEV(MAJOR_NR,INITRD_MINOR), 1, &rd_bd_op, rd_size<<1); #endif - hardsect_size[MAJOR_NR] = rd_hardsec; /* Size of the RAM disk blocks */ blksize_size[MAJOR_NR] = rd_blocksizes; /* Avoid set_blocksize() check */ blk_size[MAJOR_NR] = rd_kbsize; /* Size of the RAM disk in kB */ diff -u --recursive --new-file v2.5.0/linux/drivers/block/xd.c linux/drivers/block/xd.c --- v2.5.0/linux/drivers/block/xd.c Fri Nov 9 14:01:21 2001 +++ linux/drivers/block/xd.c Tue Nov 27 09:23:27 2001 @@ -257,7 +257,6 @@ } xd_gendisk.nr_real = xd_drives; - } /* xd_open: open a device */ @@ -292,7 +291,7 @@ if (CURRENT_DEV < xd_drives && CURRENT->sector + CURRENT->nr_sectors <= xd_struct[MINOR(CURRENT->rq_dev)].nr_sects) { - block = CURRENT->sector + xd_struct[MINOR(CURRENT->rq_dev)].start_sect; + block = CURRENT->sector; count = CURRENT->nr_sectors; switch (CURRENT->cmd) { @@ -329,7 +328,7 @@ g.heads = xd_info[dev].heads; g.sectors = xd_info[dev].sectors; g.cylinders = xd_info[dev].cylinders; - g.start = xd_struct[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user(geometry, &g, sizeof g) ? -EFAULT : 0; } case HDIO_SET_DMA: @@ -337,7 +336,8 @@ if (xdc_busy) return -EBUSY; nodma = !arg; if (nodma && xd_dma_buffer) { - xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200); + xd_dma_mem_free((unsigned long)xd_dma_buffer, + xd_maxsectors * 0x200); xd_dma_buffer = 0; } return 0; @@ -378,11 +378,9 @@ static int xd_reread_partitions(kdev_t dev) { int target; - int start; - int partition; + int res; target = DEVICE_NR(dev); - start = target << xd_gendisk.minor_shift; cli(); xd_valid[target] = (xd_access[target] != 1); @@ -390,20 +388,16 @@ if (xd_valid[target]) return -EBUSY; - for (partition = xd_gendisk.max_p - 1; partition >= 0; partition--) { - int minor = (start | partition); - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - xd_gendisk.part[minor].start_sect = 0; - xd_gendisk.part[minor].nr_sects = 0; - }; - - grok_partitions(&xd_gendisk, target, 1<<6, - xd_info[target].heads * xd_info[target].cylinders * xd_info[target].sectors); + res = wipe_partitions(dev); + if (!res) + grok_partitions(dev, xd_info[target].heads + * xd_info[target].cylinders + * xd_info[target].sectors); xd_valid[target] = 1; wake_up(&xd_wait_open); - return 0; + return res; } /* xd_readwrite: handle a read/write request */ @@ -1105,12 +1099,9 @@ static void xd_done (void) { - blksize_size[MAJOR_NR] = NULL; blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); - blk_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - read_ahead[MAJOR_NR] = 0; del_gendisk(&xd_gendisk); + blk_clear(MAJOR_NR); release_region(xd_iobase,4); } diff -u --recursive --new-file v2.5.0/linux/drivers/cdrom/cdrom.c linux/drivers/cdrom/cdrom.c --- v2.5.0/linux/drivers/cdrom/cdrom.c Fri Nov 16 10:14:08 2001 +++ linux/drivers/cdrom/cdrom.c Tue Nov 27 09:23:27 2001 @@ -1987,7 +1987,7 @@ return -EINVAL; /* FIXME: we need upper bound checking, too!! */ - if (lba < 0 || ra.nframes <= 0) + if (lba < 0 || ra.nframes <= 0 || ra.nframes > 64) return -EINVAL; /* diff -u --recursive --new-file v2.5.0/linux/drivers/cdrom/cdu31a.c linux/drivers/cdrom/cdu31a.c --- v2.5.0/linux/drivers/cdrom/cdu31a.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/cdrom/cdu31a.c Tue Nov 27 09:23:27 2001 @@ -1583,7 +1583,10 @@ /* Make sure we have a valid TOC. */ sony_get_toc(); - spin_unlock_irq(&io_request_lock); + /* + * jens: driver has lots of races + */ + spin_unlock_irq(&q->queue_lock); /* Make sure the timer is cancelled. */ del_timer(&cdu31a_abort_timer); @@ -1730,7 +1733,7 @@ } end_do_cdu31a_request: - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); #if 0 /* After finished, cancel any pending operations. */ abort_read(); diff -u --recursive --new-file v2.5.0/linux/drivers/cdrom/cm206.c linux/drivers/cdrom/cm206.c --- v2.5.0/linux/drivers/cdrom/cm206.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/cdrom/cm206.c Tue Nov 27 09:23:27 2001 @@ -866,7 +866,7 @@ end_request(0); continue; } - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); error = 0; for (i = 0; i < CURRENT->nr_sectors; i++) { int e1, e2; @@ -893,7 +893,7 @@ debug(("cm206_request: %d %d\n", e1, e2)); } } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); end_request(!error); } } diff -u --recursive --new-file v2.5.0/linux/drivers/cdrom/sbpcd.c linux/drivers/cdrom/sbpcd.c --- v2.5.0/linux/drivers/cdrom/sbpcd.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/cdrom/sbpcd.c Tue Nov 27 09:23:27 2001 @@ -4930,7 +4930,7 @@ sbpcd_end_request(req, 0); if (req -> sector == -1) sbpcd_end_request(req, 0); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); down(&ioctl_read_sem); if (req->cmd != READ) @@ -4970,7 +4970,7 @@ xnr, req, req->sector, req->nr_sectors, jiffies); #endif up(&ioctl_read_sem); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 1); goto request_loop; } @@ -5011,7 +5011,7 @@ xnr, req, req->sector, req->nr_sectors, jiffies); #endif up(&ioctl_read_sem); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 1); goto request_loop; } @@ -5027,7 +5027,7 @@ #endif up(&ioctl_read_sem); sbp_sleep(0); /* wait a bit, try again */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 0); goto request_loop; } diff -u --recursive --new-file v2.5.0/linux/drivers/char/joystick/ns558.c linux/drivers/char/joystick/ns558.c --- v2.5.0/linux/drivers/char/joystick/ns558.c Wed Sep 12 15:34:06 2001 +++ linux/drivers/char/joystick/ns558.c Sun Nov 25 09:43:42 2001 @@ -153,11 +153,7 @@ return port; } -#if defined(CONFIG_ISAPNP) || (defined(CONFIG_ISAPNP_MODULE) && defined(MODULE)) -#define NSS558_ISAPNP -#endif - -#ifdef NSS558_ISAPNP +#ifdef __ISAPNP__ static struct isapnp_device_id pnp_devids[] = { { ISAPNP_ANY_ID, ISAPNP_ANY_ID, ISAPNP_VENDOR('@','P','@'), ISAPNP_DEVICE(0x0001), 0 }, @@ -229,7 +225,7 @@ int __init ns558_init(void) { int i = 0; -#ifdef NSS558_ISAPNP +#ifdef __ISAPNP__ struct isapnp_device_id *devid; struct pci_dev *dev = NULL; #endif @@ -245,7 +241,7 @@ * Probe for PnP ports. */ -#ifdef NSS558_ISAPNP +#ifdef __ISAPNP__ for (devid = pnp_devids; devid->vendor; devid++) { while ((dev = isapnp_find_dev(NULL, devid->vendor, devid->function, dev))) { ns558 = ns558_pnp_probe(dev, ns558); @@ -264,7 +260,7 @@ gameport_unregister_port(&port->gameport); switch (port->type) { -#ifdef NSS558_ISAPNP +#ifdef __ISAPNP__ case NS558_PNP: if (port->dev->deactivate) port->dev->deactivate(port->dev); diff -u --recursive --new-file v2.5.0/linux/drivers/char/raw.c linux/drivers/char/raw.c --- v2.5.0/linux/drivers/char/raw.c Sat Sep 22 20:35:43 2001 +++ linux/drivers/char/raw.c Tue Nov 27 09:23:27 2001 @@ -126,10 +126,8 @@ if (is_mounted(rdev)) { if (blksize_size[MAJOR(rdev)]) sector_size = blksize_size[MAJOR(rdev)][MINOR(rdev)]; - } else { - if (hardsect_size[MAJOR(rdev)]) - sector_size = hardsect_size[MAJOR(rdev)][MINOR(rdev)]; - } + } else + sector_size = get_hardsect_size(rdev); set_blocksize(rdev, sector_size); raw_devices[minor].sector_size = sector_size; @@ -273,16 +271,14 @@ struct kiobuf * iobuf; int new_iobuf; int err = 0; - unsigned long blocknr, blocks; + unsigned long blocks; size_t transferred; int iosize; - int i; int minor; kdev_t dev; unsigned long limit; - int sector_size, sector_bits, sector_mask; - int max_sectors; + sector_t blocknr; /* * First, a few checks on device size limits @@ -307,7 +303,6 @@ sector_size = raw_devices[minor].sector_size; sector_bits = raw_devices[minor].sector_bits; sector_mask = sector_size- 1; - max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9); if (blk_size[MAJOR(dev)]) limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits; @@ -325,18 +320,10 @@ if ((*offp >> sector_bits) >= limit) goto out_free; - /* - * Split the IO into KIO_MAX_SECTORS chunks, mapping and - * unmapping the single kiobuf as we go to perform each chunk of - * IO. - */ - transferred = 0; blocknr = *offp >> sector_bits; while (size > 0) { blocks = size >> sector_bits; - if (blocks > max_sectors) - blocks = max_sectors; if (blocks > limit - blocknr) blocks = limit - blocknr; if (!blocks) @@ -348,10 +335,7 @@ if (err) break; - for (i=0; i < blocks; i++) - iobuf->blocks[i] = blocknr++; - - err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size); + err = brw_kiovec(rw, 1, &iobuf, dev, &blocknr, sector_size); if (rw == READ && err > 0) mark_dirty_kiobuf(iobuf, err); @@ -361,6 +345,8 @@ size -= err; buf += err; } + + blocknr += blocks; unmap_kiobuf(iobuf); diff -u --recursive --new-file v2.5.0/linux/drivers/char/serial.c linux/drivers/char/serial.c --- v2.5.0/linux/drivers/char/serial.c Fri Nov 9 14:12:55 2001 +++ linux/drivers/char/serial.c Sun Nov 25 09:43:42 2001 @@ -122,7 +122,7 @@ #define ENABLE_SERIAL_ACPI #endif -#if defined(CONFIG_ISAPNP)|| (defined(CONFIG_ISAPNP_MODULE) && defined(MODULE)) +#ifdef __ISAPNP__ #ifndef ENABLE_SERIAL_PNP #define ENABLE_SERIAL_PNP #endif diff -u --recursive --new-file v2.5.0/linux/drivers/ide/aec62xx.c linux/drivers/ide/aec62xx.c --- v2.5.0/linux/drivers/ide/aec62xx.c Tue Jun 20 07:52:36 2000 +++ linux/drivers/ide/aec62xx.c Tue Nov 27 09:23:27 2001 @@ -557,6 +557,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) hwif->dmaproc = &aec62xx_dmaproc; + hwif->highmem = 1; #else /* !CONFIG_BLK_DEV_IDEDMA */ hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; diff -u --recursive --new-file v2.5.0/linux/drivers/ide/amd74xx.c linux/drivers/ide/amd74xx.c --- v2.5.0/linux/drivers/ide/amd74xx.c Mon Aug 13 14:56:19 2001 +++ linux/drivers/ide/amd74xx.c Tue Nov 27 09:23:27 2001 @@ -75,7 +75,8 @@ { unsigned int class_rev; - if (dev->device == PCI_DEVICE_ID_AMD_VIPER_7411) + if ((dev->device == PCI_DEVICE_ID_AMD_VIPER_7411) || + (dev->device == PCI_DEVICE_ID_AMD_VIPER_7441)) return 0; pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); @@ -122,8 +123,8 @@ pci_read_config_byte(dev, 0x4c, &pio_timing); #ifdef DEBUG - printk("%s: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x ", - drive->name, ultra_timing, dma_pio_timing, pio_timing); + printk("%s:%d: Speed 0x%02x UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x\n", + drive->name, drive->dn, speed, ultra_timing, dma_pio_timing, pio_timing); #endif ultra_timing &= ~0xC7; @@ -131,22 +132,19 @@ pio_timing &= ~(0x03 << drive->dn); #ifdef DEBUG - printk(":: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x ", - ultra_timing, dma_pio_timing, pio_timing); + printk("%s: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x\n", + drive->name, ultra_timing, dma_pio_timing, pio_timing); #endif switch(speed) { #ifdef CONFIG_BLK_DEV_IDEDMA + case XFER_UDMA_7: + case XFER_UDMA_6: + speed = XFER_UDMA_5; case XFER_UDMA_5: -#undef __CAN_MODE_5 -#ifdef __CAN_MODE_5 ultra_timing |= 0x46; dma_pio_timing |= 0x20; break; -#else - printk("%s: setting to mode 4, driver problems in mode 5.\n", drive->name); - speed = XFER_UDMA_4; -#endif /* __CAN_MODE_5 */ case XFER_UDMA_4: ultra_timing |= 0x45; dma_pio_timing |= 0x20; @@ -222,8 +220,8 @@ pci_write_config_byte(dev, 0x4c, pio_timing); #ifdef DEBUG - printk(":: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x\n", - ultra_timing, dma_pio_timing, pio_timing); + printk("%s: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x\n", + drive->name, ultra_timing, dma_pio_timing, pio_timing); #endif #ifdef CONFIG_BLK_DEV_IDEDMA @@ -303,11 +301,12 @@ struct pci_dev *dev = hwif->pci_dev; struct hd_driveid *id = drive->id; byte udma_66 = eighty_ninty_three(drive); - byte udma_100 = (dev->device==PCI_DEVICE_ID_AMD_VIPER_7411) ? 1 : 0; + byte udma_100 = ((dev->device==PCI_DEVICE_ID_AMD_VIPER_7411)|| + (dev->device==PCI_DEVICE_ID_AMD_VIPER_7441)) ? 1 : 0; byte speed = 0x00; int rval; - if ((id->dma_ultra & 0x0020) && (udma_66)&& (udma_100)) { + if ((id->dma_ultra & 0x0020) && (udma_66) && (udma_100)) { speed = XFER_UDMA_5; } else if ((id->dma_ultra & 0x0010) && (udma_66)) { speed = XFER_UDMA_4; @@ -331,7 +330,7 @@ (void) amd74xx_tune_chipset(drive, speed); - rval = (int)( ((id->dma_ultra >> 11) & 3) ? ide_dma_on : + rval = (int)( ((id->dma_ultra >> 11) & 7) ? ide_dma_on : ((id->dma_ultra >> 8) & 7) ? ide_dma_on : ((id->dma_mword >> 8) & 7) ? ide_dma_on : ide_dma_off_quietly); @@ -352,7 +351,7 @@ } dma_func = ide_dma_off_quietly; if (id->field_valid & 4) { - if (id->dma_ultra & 0x002F) { + if (id->dma_ultra & 0x003F) { /* Force if Capable UltraDMA */ dma_func = config_chipset_for_dma(drive); if ((id->field_valid & 2) && diff -u --recursive --new-file v2.5.0/linux/drivers/ide/cmd64x.c linux/drivers/ide/cmd64x.c --- v2.5.0/linux/drivers/ide/cmd64x.c Thu Jul 27 16:40:57 2000 +++ linux/drivers/ide/cmd64x.c Tue Nov 27 09:23:27 2001 @@ -795,5 +795,7 @@ default: break; } + + hwif->highmem = 1; #endif /* CONFIG_BLK_DEV_IDEDMA */ } diff -u --recursive --new-file v2.5.0/linux/drivers/ide/cs5530.c linux/drivers/ide/cs5530.c --- v2.5.0/linux/drivers/ide/cs5530.c Tue Jan 2 16:58:45 2001 +++ linux/drivers/ide/cs5530.c Tue Nov 27 09:23:27 2001 @@ -352,9 +352,10 @@ unsigned int basereg, d0_timings; #ifdef CONFIG_BLK_DEV_IDEDMA - hwif->dmaproc = &cs5530_dmaproc; + hwif->dmaproc = &cs5530_dmaproc; + hwif->highmem = 1; #else - hwif->autodma = 0; + hwif->autodma = 0; #endif /* CONFIG_BLK_DEV_IDEDMA */ hwif->tuneproc = &cs5530_tuneproc; diff -u --recursive --new-file v2.5.0/linux/drivers/ide/cy82c693.c linux/drivers/ide/cy82c693.c --- v2.5.0/linux/drivers/ide/cy82c693.c Sat May 19 17:43:06 2001 +++ linux/drivers/ide/cy82c693.c Tue Nov 27 09:23:27 2001 @@ -441,6 +441,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { + hwif->highmem = 1; hwif->dmaproc = &cy82c693_dmaproc; if (!noautodma) hwif->autodma = 1; diff -u --recursive --new-file v2.5.0/linux/drivers/ide/hd.c linux/drivers/ide/hd.c --- v2.5.0/linux/drivers/ide/hd.c Mon Oct 15 13:27:42 2001 +++ linux/drivers/ide/hd.c Tue Nov 27 09:23:27 2001 @@ -107,7 +107,6 @@ static int hd_sizes[MAX_HD<<6]; static int hd_blocksizes[MAX_HD<<6]; static int hd_hardsectsizes[MAX_HD<<6]; -static int hd_maxsect[MAX_HD<<6]; static struct timer_list device_timer; @@ -560,19 +559,18 @@ dev = MINOR(CURRENT->rq_dev); block = CURRENT->sector; nsect = CURRENT->nr_sectors; - if (dev >= (NR_HD<<6) || block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) { -#ifdef DEBUG - if (dev >= (NR_HD<<6)) + if (dev >= (NR_HD<<6) || (dev & 0x3f) || + block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) { + if (dev >= (NR_HD<<6) || (dev & 0x3f)) printk("hd: bad minor number: device=%s\n", kdevname(CURRENT->rq_dev)); else printk("hd%c: bad access: block=%d, count=%d\n", (MINOR(CURRENT->rq_dev)>>6)+'a', block, nsect); -#endif end_request(0); goto repeat; } - block += hd[dev].start_sect; + dev >>= 6; if (special_op[dev]) { if (do_special_op(dev)) @@ -634,22 +632,17 @@ g.heads = hd_info[dev].head; g.sectors = hd_info[dev].sect; g.cylinders = hd_info[dev].cyl; - g.start = hd[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: /* Return device size */ - return put_user(hd[MINOR(inode->i_rdev)].nr_sects, - (unsigned long *) arg); - case BLKGETSIZE64: - return put_user((u64)hd[MINOR(inode->i_rdev)].nr_sects << 9, - (u64 *) arg); - case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; return revalidate_hddisk(inode->i_rdev, 1); + case BLKGETSIZE: + case BLKGETSIZE64: case BLKROSET: case BLKROGET: case BLKRASET: @@ -733,11 +726,9 @@ for(drive=0; drive < (MAX_HD << 6); drive++) { hd_blocksizes[drive] = 1024; hd_hardsectsizes[drive] = 512; - hd_maxsect[drive]=255; } blksize_size[MAJOR_NR] = hd_blocksizes; hardsect_size[MAJOR_NR] = hd_hardsectsizes; - max_sectors[MAJOR_NR] = hd_maxsect; #ifdef __i386__ if (!NR_HD) { @@ -840,6 +831,7 @@ return -1; } blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 255); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ add_gendisk(&hd_gendisk); init_timer(&device_timer); @@ -868,9 +860,7 @@ { int target; struct gendisk * gdev; - int max_p; - int start; - int i; + int res; long flags; target = DEVICE_NR(dev); @@ -885,25 +875,20 @@ DEVICE_BUSY = 1; restore_flags(flags); - max_p = gdev->max_p; - start = target << gdev->minor_shift; - - for (i=max_p - 1; i >=0 ; i--) { - int minor = start + i; - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; - } + res = wipe_partitions(dev); + if (res) + goto leave; #ifdef MAYBE_REINIT MAYBE_REINIT; #endif - grok_partitions(gdev, target, 1<<6, CAPACITY); + grok_partitions(dev, CAPACITY); +leave: DEVICE_BUSY = 0; wake_up(&busy_wait); - return 0; + return res; } static int parse_hd_setup (char *line) { diff -u --recursive --new-file v2.5.0/linux/drivers/ide/hpt34x.c linux/drivers/ide/hpt34x.c --- v2.5.0/linux/drivers/ide/hpt34x.c Sat May 19 17:43:06 2001 +++ linux/drivers/ide/hpt34x.c Tue Nov 27 09:23:27 2001 @@ -425,6 +425,7 @@ hwif->autodma = 0; hwif->dmaproc = &hpt34x_dmaproc; + hwif->highmem = 1; } else { hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; diff -u --recursive --new-file v2.5.0/linux/drivers/ide/hpt366.c linux/drivers/ide/hpt366.c --- v2.5.0/linux/drivers/ide/hpt366.c Tue Aug 14 20:01:07 2001 +++ linux/drivers/ide/hpt366.c Tue Nov 27 09:23:27 2001 @@ -730,6 +730,7 @@ hwif->autodma = 1; else hwif->autodma = 0; + hwif->highmem = 1; } else { hwif->autodma = 0; hwif->drives[0].autotune = 1; diff -u --recursive --new-file v2.5.0/linux/drivers/ide/ide-cd.c linux/drivers/ide/ide-cd.c --- v2.5.0/linux/drivers/ide/ide-cd.c Wed Oct 24 23:53:51 2001 +++ linux/drivers/ide/ide-cd.c Tue Nov 27 09:23:27 2001 @@ -891,7 +891,7 @@ int stat; int ireason, len, sectors_to_transfer, nskip; struct cdrom_info *info = drive->driver_data; - int i, dma = info->dma, dma_error = 0; + int dma = info->dma, dma_error = 0; ide_startstop_t startstop; struct request *rq = HWGROUP(drive)->rq; @@ -908,10 +908,7 @@ if (dma) { if (!dma_error) { - for (i = rq->nr_sectors; i > 0;) { - i -= rq->current_nr_sectors; - ide_end_request(1, HWGROUP(drive)); - } + __ide_end_request(HWGROUP(drive), 1, rq->nr_sectors); return ide_stopped; } else return ide_error (drive, "dma error", stat); @@ -926,7 +923,7 @@ /* If we're not done filling the current buffer, complain. Otherwise, complete the command normally. */ if (rq->current_nr_sectors > 0) { - printk ("%s: cdrom_read_intr: data underrun (%ld blocks)\n", + printk ("%s: cdrom_read_intr: data underrun (%u blocks)\n", drive->name, rq->current_nr_sectors); cdrom_end_request (0, drive); } else @@ -959,8 +956,7 @@ /* First, figure out if we need to bit-bucket any of the leading sectors. */ - nskip = MIN ((int)(rq->current_nr_sectors - (rq->bh->b_size >> SECTOR_BITS)), - sectors_to_transfer); + nskip = MIN(rq->current_nr_sectors - bio_sectors(rq->bio), sectors_to_transfer); while (nskip > 0) { /* We need to throw away a sector. */ @@ -1058,7 +1054,7 @@ represent the number of sectors to skip at the start of a transfer will fail. I think that this will never happen, but let's be paranoid and check. */ - if (rq->current_nr_sectors < (rq->bh->b_size >> SECTOR_BITS) && + if (rq->current_nr_sectors < bio_sectors(rq->bio) && (rq->sector % SECTORS_PER_FRAME) != 0) { printk ("%s: cdrom_read_from_buffer: buffer botch (%ld)\n", drive->name, rq->sector); @@ -1097,9 +1093,9 @@ nskip = (sector % SECTORS_PER_FRAME); if (nskip > 0) { /* Sanity check... */ - if (rq->current_nr_sectors != (rq->bh->b_size >> SECTOR_BITS) && + if (rq->current_nr_sectors != bio_sectors(rq->bio) && (rq->sector % CD_FRAMESIZE != 0)) { - printk ("%s: cdrom_start_read_continuation: buffer botch (%lu)\n", + printk ("%s: cdrom_start_read_continuation: buffer botch (%u)\n", drive->name, rq->current_nr_sectors); cdrom_end_request (0, drive); return ide_stopped; @@ -1192,66 +1188,17 @@ return cdrom_start_packet_command (drive, 0, cdrom_start_seek_continuation); } -static inline int cdrom_merge_requests(struct request *rq, struct request *nxt) -{ - int ret = 1; - - /* - * partitions not really working, but better check anyway... - */ - if (rq->cmd == nxt->cmd && rq->rq_dev == nxt->rq_dev) { - rq->nr_sectors += nxt->nr_sectors; - rq->hard_nr_sectors += nxt->nr_sectors; - rq->bhtail->b_reqnext = nxt->bh; - rq->bhtail = nxt->bhtail; - list_del(&nxt->queue); - blkdev_release_request(nxt); - ret = 0; - } - - return ret; -} - -/* - * the current request will always be the first one on the list - */ -static void cdrom_attempt_remerge(ide_drive_t *drive, struct request *rq) -{ - struct list_head *entry; - struct request *nxt; - unsigned long flags; - - spin_lock_irqsave(&io_request_lock, flags); - - while (1) { - entry = rq->queue.next; - if (entry == &drive->queue.queue_head) - break; - - nxt = blkdev_entry_to_request(entry); - if (rq->sector + rq->nr_sectors != nxt->sector) - break; - else if (rq->nr_sectors + nxt->nr_sectors > SECTORS_MAX) - break; - - if (cdrom_merge_requests(rq, nxt)) - break; - } - - spin_unlock_irqrestore(&io_request_lock, flags); -} - /* Fix up a possibly partially-processed request so that we can - start it over entirely, or even put it back on the request queue. */ + start it over entirely */ static void restore_request (struct request *rq) { - if (rq->buffer != rq->bh->b_data) { - int n = (rq->buffer - rq->bh->b_data) / SECTOR_SIZE; - rq->buffer = rq->bh->b_data; + if (rq->buffer != bio_data(rq->bio)) { + int n = (rq->buffer - (char *) bio_data(rq->bio)) / SECTOR_SIZE; + rq->buffer = bio_data(rq->bio); rq->nr_sectors += n; rq->sector -= n; } - rq->current_nr_sectors = rq->bh->b_size >> SECTOR_BITS; + rq->hard_cur_sectors = rq->current_nr_sectors = bio_sectors(rq->bio); rq->hard_nr_sectors = rq->nr_sectors; rq->hard_sector = rq->sector; } @@ -1281,7 +1228,7 @@ if (cdrom_read_from_buffer(drive)) return ide_stopped; - cdrom_attempt_remerge(drive, rq); + blk_attempt_remerge(&drive->queue, rq); /* Clear the local sector buffer. */ info->nsectors_buffered = 0; @@ -1532,7 +1479,7 @@ { int stat, ireason, len, sectors_to_transfer, uptodate; struct cdrom_info *info = drive->driver_data; - int i, dma_error = 0, dma = info->dma; + int dma_error = 0, dma = info->dma; ide_startstop_t startstop; struct request *rq = HWGROUP(drive)->rq; @@ -1559,10 +1506,7 @@ return ide_error(drive, "dma error", stat); rq = HWGROUP(drive)->rq; - for (i = rq->nr_sectors; i > 0;) { - i -= rq->current_nr_sectors; - ide_end_request(1, HWGROUP(drive)); - } + __ide_end_request(HWGROUP(drive), 1, rq->nr_sectors); return ide_stopped; } @@ -1577,7 +1521,7 @@ */ uptodate = 1; if (rq->current_nr_sectors > 0) { - printk("%s: write_intr: data underrun (%ld blocks)\n", + printk("%s: write_intr: data underrun (%u blocks)\n", drive->name, rq->current_nr_sectors); uptodate = 0; } @@ -1674,7 +1618,7 @@ * remerge requests, often the plugging will not have had time * to do this properly */ - cdrom_attempt_remerge(drive, rq); + blk_attempt_remerge(&drive->queue, rq); info->nsectors_buffered = 0; @@ -2202,7 +2146,9 @@ pc.quiet = cgc->quiet; pc.timeout = cgc->timeout; pc.sense = cgc->sense; - return cgc->stat = cdrom_queue_packet_command(drive, &pc); + cgc->stat = cdrom_queue_packet_command(drive, &pc); + cgc->buflen -= pc.buflen; + return cgc->stat; } static @@ -2711,7 +2657,6 @@ ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); ide_add_setting(drive, "dsc_overlap", SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->dsc_overlap, NULL); } @@ -2875,7 +2820,7 @@ MOD_INC_USE_COUNT; if (info->buffer == NULL) info->buffer = (char *) kmalloc(SECTOR_BUFFER_SIZE, GFP_KERNEL); - if ((info->buffer == NULL) || (rc = cdrom_open(ip, fp))) { + if ((info->buffer == NULL) || (rc = cdrom_open(ip, fp))) { drive->usage--; MOD_DEC_USE_COUNT; } diff -u --recursive --new-file v2.5.0/linux/drivers/ide/ide-cd.h linux/drivers/ide/ide-cd.h --- v2.5.0/linux/drivers/ide/ide-cd.h Thu Nov 22 11:46:58 2001 +++ linux/drivers/ide/ide-cd.h Tue Nov 27 09:42:30 2001 @@ -435,7 +435,7 @@ byte curlba[3]; byte nslots; - __u8 short slot_tablelen; + __u16 short slot_tablelen; }; diff -u --recursive --new-file v2.5.0/linux/drivers/ide/ide-cs.c linux/drivers/ide/ide-cs.c --- v2.5.0/linux/drivers/ide/ide-cs.c Sun Sep 30 12:26:05 2001 +++ linux/drivers/ide/ide-cs.c Sun Nov 25 09:48:08 2001 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -226,6 +227,16 @@ #define CFG_CHECK(fn, args...) \ if (CardServices(fn, args) != 0) goto next_entry +int idecs_register (int io_base, int ctl_base, int irq) +{ + hw_regs_t hw; + ide_init_hwif_ports(&hw, (ide_ioreg_t) io_base, (ide_ioreg_t) ctl_base, NULL); + hw.irq = irq; + hw.chipset = ide_pci; // this enables IRQ sharing w/ PCI irqs + return ide_register_hw(&hw, NULL); +} + + void ide_config(dev_link_t *link) { client_handle_t handle = link->handle; @@ -327,12 +338,16 @@ if (link->io.NumPorts2) release_region(link->io.BasePort2, link->io.NumPorts2); + /* disable drive interrupts during IDE probe */ + if(ctl_base) + outb(0x02, ctl_base); + /* retry registration in case device is still spinning up */ for (i = 0; i < 10; i++) { - hd = ide_register(io_base, ctl_base, link->irq.AssignedIRQ); + hd = idecs_register(io_base, ctl_base, link->irq.AssignedIRQ); if (hd >= 0) break; if (link->io.NumPorts1 == 0x20) { - hd = ide_register(io_base+0x10, ctl_base+0x10, + hd = idecs_register(io_base+0x10, ctl_base+0x10, link->irq.AssignedIRQ); if (hd >= 0) { io_base += 0x10; ctl_base += 0x10; diff -u --recursive --new-file v2.5.0/linux/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c --- v2.5.0/linux/drivers/ide/ide-disk.c Tue Nov 20 21:35:28 2001 +++ linux/drivers/ide/ide-disk.c Tue Nov 27 09:23:27 2001 @@ -27,6 +27,7 @@ * Version 1.09 added increment of rq->sector in ide_multwrite * added UDMA 3/4 reporting * Version 1.10 request queue changes, Ultra DMA 100 + * Version 1.11 Highmem I/O support, Jens Axboe */ #define IDEDISK_VERSION "1.10" @@ -139,7 +140,9 @@ byte stat; int i; unsigned int msect, nsect; + unsigned long flags; struct request *rq; + char *to; /* new way for dealing with premature shared PCI interrupts */ if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { @@ -150,8 +153,8 @@ ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); return ide_started; } + msect = drive->mult_count; - read_next: rq = HWGROUP(drive)->rq; if (msect) { @@ -160,14 +163,15 @@ msect -= nsect; } else nsect = 1; - idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); + to = ide_map_buffer(rq, &flags); + idedisk_input_data(drive, to, nsect * SECTOR_WORDS); #ifdef DEBUG printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n", drive->name, rq->sector, rq->sector+nsect-1, (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); #endif + ide_unmap_buffer(to, &flags); rq->sector += nsect; - rq->buffer += nsect<<9; rq->errors = 0; i = (rq->nr_sectors -= nsect); if (((long)(rq->current_nr_sectors -= nsect)) <= 0) @@ -201,14 +205,16 @@ #endif if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) { rq->sector++; - rq->buffer += 512; rq->errors = 0; i = --rq->nr_sectors; --rq->current_nr_sectors; if (((long)rq->current_nr_sectors) <= 0) ide_end_request(1, hwgroup); if (i > 0) { - idedisk_output_data (drive, rq->buffer, SECTOR_WORDS); + unsigned long flags; + char *to = ide_map_buffer(rq, &flags); + idedisk_output_data (drive, to, SECTOR_WORDS); + ide_unmap_buffer(to, &flags); ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); return ide_started; } @@ -238,28 +244,28 @@ do { char *buffer; int nsect = rq->current_nr_sectors; - + unsigned long flags; + if (nsect > mcount) nsect = mcount; mcount -= nsect; - buffer = rq->buffer; + buffer = ide_map_buffer(rq, &flags); rq->sector += nsect; - rq->buffer += nsect << 9; rq->nr_sectors -= nsect; rq->current_nr_sectors -= nsect; /* Do we move to the next bh after this? */ if (!rq->current_nr_sectors) { - struct buffer_head *bh = rq->bh->b_reqnext; + struct bio *bio = rq->bio->bi_next; /* end early early we ran out of requests */ - if (!bh) { + if (!bio) { mcount = 0; } else { - rq->bh = bh; - rq->current_nr_sectors = bh->b_size >> 9; - rq->buffer = bh->b_data; + rq->bio = bio; + rq->current_nr_sectors = bio_sectors(bio); + rq->hard_cur_sectors = rq->current_nr_sectors; } } @@ -268,6 +274,7 @@ * re-entering us on the last transfer. */ idedisk_output_data(drive, buffer, nsect<<7); + ide_unmap_buffer(buffer, &flags); } while (mcount); return 0; @@ -279,7 +286,6 @@ static ide_startstop_t multwrite_intr (ide_drive_t *drive) { byte stat; - int i; ide_hwgroup_t *hwgroup = HWGROUP(drive); struct request *rq = &hwgroup->wrq; @@ -302,10 +308,8 @@ */ if (!rq->nr_sectors) { /* all done? */ rq = hwgroup->rq; - for (i = rq->nr_sectors; i > 0;){ - i -= rq->current_nr_sectors; - ide_end_request(1, hwgroup); - } + + __ide_end_request(hwgroup, 1, rq->nr_sectors); return ide_stopped; } } @@ -367,6 +371,8 @@ */ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) { + unsigned long flags; + if (IDE_CONTROL_REG) OUT_BYTE(drive->ctl,IDE_CONTROL_REG); OUT_BYTE(0x00, IDE_FEATURE_REG); @@ -444,16 +450,17 @@ hwgroup->wrq = *rq; /* scratchpad */ ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL); if (ide_multwrite(drive, drive->mult_count)) { - unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return ide_stopped; } } else { + char *buffer = ide_map_buffer(rq, &flags); ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); - idedisk_output_data(drive, rq->buffer, SECTOR_WORDS); + idedisk_output_data(drive, buffer, SECTOR_WORDS); + ide_unmap_buffer(buffer, &flags); } return ide_started; } @@ -482,7 +489,8 @@ { if (drive->removable && !drive->usage) { invalidate_bdev(inode->i_bdev, 0); - if (drive->doorlocking && ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL)) + if (drive->doorlocking && + ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL)) drive->doorlocking = 0; } MOD_DEC_USE_COUNT; @@ -495,9 +503,7 @@ static void idedisk_revalidate (ide_drive_t *drive) { - grok_partitions(HWIF(drive)->gd, drive->select.b.unit, - 1<nowerr = arg; drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&ide_lock); return 0; } @@ -691,7 +697,6 @@ ide_add_setting(drive, "nowerr", SETTING_RW, HDIO_GET_NOWERR, HDIO_SET_NOWERR, TYPE_BYTE, 0, 1, 1, 1, &drive->nowerr, set_nowerr); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, 4096, PAGE_SIZE, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); ide_add_setting(drive, "lun", SETTING_RW, -1, -1, TYPE_INT, 0, 7, 1, 1, &drive->lun, NULL); ide_add_setting(drive, "failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->failures, NULL); ide_add_setting(drive, "max_failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->max_failures, NULL); diff -u --recursive --new-file v2.5.0/linux/drivers/ide/ide-dma.c linux/drivers/ide/ide-dma.c --- v2.5.0/linux/drivers/ide/ide-dma.c Sun Sep 9 10:43:02 2001 +++ linux/drivers/ide/ide-dma.c Tue Nov 27 09:23:27 2001 @@ -203,30 +203,10 @@ #endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */ /* - * Our Physical Region Descriptor (PRD) table should be large enough - * to handle the biggest I/O request we are likely to see. Since requests - * can have no more than 256 sectors, and since the typical blocksize is - * two or more sectors, we could get by with a limit of 128 entries here for - * the usual worst case. Most requests seem to include some contiguous blocks, - * further reducing the number of table entries required. - * - * The driver reverts to PIO mode for individual requests that exceed - * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling - * 100% of all crazy scenarios here is not necessary. - * - * As it turns out though, we must allocate a full 4KB page for this, - * so the two PRD tables (ide0 & ide1) will each get half of that, - * allowing each to have about 256 entries (8 bytes each) from this. - */ -#define PRD_BYTES 8 -#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) - -/* * dma_intr() is the handler for disk read/write DMA interrupts */ ide_startstop_t ide_dma_intr (ide_drive_t *drive) { - int i; byte stat, dma_stat; dma_stat = HWIF(drive)->dmaproc(ide_dma_end, drive); @@ -234,11 +214,8 @@ if (OK_STAT(stat,DRIVE_READY,drive->bad_wstat|DRQ_STAT)) { if (!dma_stat) { struct request *rq = HWGROUP(drive)->rq; - rq = HWGROUP(drive)->rq; - for (i = rq->nr_sectors; i > 0;) { - i -= rq->current_nr_sectors; - ide_end_request(1, HWGROUP(drive)); - } + + __ide_end_request(HWGROUP(drive), 1, rq->nr_sectors); return ide_stopped; } printk("%s: dma_intr: bad DMA status (dma_stat=%x)\n", @@ -249,35 +226,18 @@ static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq) { - struct buffer_head *bh; struct scatterlist *sg = hwif->sg_table; - int nents = 0; + int nents; + + nents = blk_rq_map_sg(rq->q, rq, hwif->sg_table); + + if (nents > rq->nr_segments) + printk("ide-dma: received %d segments, build %d\n", rq->nr_segments, nents); - if (hwif->sg_dma_active) - BUG(); - if (rq->cmd == READ) hwif->sg_dma_direction = PCI_DMA_FROMDEVICE; else hwif->sg_dma_direction = PCI_DMA_TODEVICE; - bh = rq->bh; - do { - unsigned char *virt_addr = bh->b_data; - unsigned int size = bh->b_size; - - if (nents >= PRD_ENTRIES) - return 0; - - while ((bh = bh->b_reqnext) != NULL) { - if ((virt_addr + size) != (unsigned char *) bh->b_data) - break; - size += bh->b_size; - } - memset(&sg[nents], 0, sizeof(*sg)); - sg[nents].address = virt_addr; - sg[nents].length = size; - nents++; - } while (bh != NULL); return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); } @@ -289,9 +249,10 @@ */ int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func) { - unsigned int *table = HWIF(drive)->dmatable_cpu; + ide_hwif_t *hwif = HWIF(drive); + unsigned int *table = hwif->dmatable_cpu; #ifdef CONFIG_BLK_DEV_TRM290 - unsigned int is_trm290_chipset = (HWIF(drive)->chipset == ide_trm290); + unsigned int is_trm290_chipset = (hwif->chipset == ide_trm290); #else const int is_trm290_chipset = 0; #endif @@ -299,13 +260,12 @@ int i; struct scatterlist *sg; - HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq); - + hwif->sg_nents = i = ide_build_sglist(hwif, HWGROUP(drive)->rq); if (!i) return 0; - sg = HWIF(drive)->sg_table; - while (i && sg_dma_len(sg)) { + sg = hwif->sg_table; + while (i) { u32 cur_addr; u32 cur_len; @@ -319,55 +279,53 @@ */ while (cur_len) { + u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); + if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - goto use_pio_instead; - } else { - u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); - - if (bcount > cur_len) - bcount = cur_len; - *table++ = cpu_to_le32(cur_addr); - xcount = bcount & 0xffff; - if (is_trm290_chipset) - xcount = ((xcount >> 2) - 1) << 16; - if (xcount == 0x0000) { - /* - * Most chipsets correctly interpret a length of 0x0000 as 64KB, - * but at least one (e.g. CS5530) misinterprets it as zero (!). - * So here we break the 64KB entry into two 32KB entries instead. - */ - if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - goto use_pio_instead; - } - *table++ = cpu_to_le32(0x8000); - *table++ = cpu_to_le32(cur_addr + 0x8000); - xcount = 0x8000; + printk("ide-dma: req %p\n", HWGROUP(drive)->rq); + printk("count %d, sg_nents %d, cur_len %d, cur_addr %u\n", count, hwif->sg_nents, cur_len, cur_addr); + BUG(); + } + + if (bcount > cur_len) + bcount = cur_len; + *table++ = cpu_to_le32(cur_addr); + xcount = bcount & 0xffff; + if (is_trm290_chipset) + xcount = ((xcount >> 2) - 1) << 16; + if (xcount == 0x0000) { + /* + * Most chipsets correctly interpret a length of + * 0x0000 as 64KB, but at least one (e.g. CS5530) + * misinterprets it as zero (!). So here we break + * the 64KB entry into two 32KB entries instead. + */ + if (count++ >= PRD_ENTRIES) { + pci_unmap_sg(hwif->pci_dev, sg, + hwif->sg_nents, + hwif->sg_dma_direction); + return 0; } - *table++ = cpu_to_le32(xcount); - cur_addr += bcount; - cur_len -= bcount; + + *table++ = cpu_to_le32(0x8000); + *table++ = cpu_to_le32(cur_addr + 0x8000); + xcount = 0x8000; } + *table++ = cpu_to_le32(xcount); + cur_addr += bcount; + cur_len -= bcount; } sg++; i--; } - if (count) { - if (!is_trm290_chipset) - *--table |= cpu_to_le32(0x80000000); - return count; - } - printk("%s: empty DMA table?\n", drive->name); -use_pio_instead: - pci_unmap_sg(HWIF(drive)->pci_dev, - HWIF(drive)->sg_table, - HWIF(drive)->sg_nents, - HWIF(drive)->sg_dma_direction); - HWIF(drive)->sg_dma_active = 0; - return 0; /* revert to PIO for this request */ + if (!count) + printk("%s: empty DMA table?\n", drive->name); + else if (!is_trm290_chipset) + *--table |= cpu_to_le32(0x80000000); + + return count; } /* Teardown mappings after DMA has completed. */ @@ -378,7 +336,6 @@ int nents = HWIF(drive)->sg_nents; pci_unmap_sg(dev, sg, nents, HWIF(drive)->sg_dma_direction); - HWIF(drive)->sg_dma_active = 0; } /* @@ -532,6 +489,20 @@ } #endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ +static void ide_toggle_bounce(ide_drive_t *drive, int on) +{ + dma64_addr_t addr = BLK_BOUNCE_HIGH; + + if (on && drive->media == ide_disk && HWIF(drive)->highmem) { + if (!PCI_DMA_BUS_IS_PHYS) + addr = BLK_BOUNCE_ANY; + else + addr = HWIF(drive)->pci_dev->dma_mask; + } + + blk_queue_bounce_limit(&drive->queue, addr); +} + /* * ide_dmaproc() initiates/aborts DMA read/write operations on a drive. * @@ -550,19 +521,20 @@ */ int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive) { -// ide_hwgroup_t *hwgroup = HWGROUP(drive); - ide_hwif_t *hwif = HWIF(drive); - unsigned long dma_base = hwif->dma_base; - byte unit = (drive->select.b.unit & 0x01); - unsigned int count, reading = 0; + ide_hwif_t *hwif = HWIF(drive); + unsigned long dma_base = hwif->dma_base; + byte unit = (drive->select.b.unit & 0x01); + unsigned int count, reading = 0, set_high = 1; byte dma_stat; switch (func) { case ide_dma_off: printk("%s: DMA disabled\n", drive->name); + set_high = 0; case ide_dma_off_quietly: outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2); case ide_dma_on: + ide_toggle_bounce(drive, set_high); drive->using_dma = (func == ide_dma_on); if (drive->using_dma) outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2); diff -u --recursive --new-file v2.5.0/linux/drivers/ide/ide-floppy.c linux/drivers/ide/ide-floppy.c --- v2.5.0/linux/drivers/ide/ide-floppy.c Thu Oct 11 09:14:32 2001 +++ linux/drivers/ide/ide-floppy.c Tue Nov 27 09:23:27 2001 @@ -707,24 +707,24 @@ static void idefloppy_input_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount) { struct request *rq = pc->rq; - struct buffer_head *bh = rq->bh; + struct bio *bio = rq->bio; int count; while (bcount) { - if (pc->b_count == bh->b_size) { + if (pc->b_count == bio_size(bio)) { rq->sector += rq->current_nr_sectors; rq->nr_sectors -= rq->current_nr_sectors; idefloppy_end_request (1, HWGROUP(drive)); - if ((bh = rq->bh) != NULL) + if ((bio = rq->bio) != NULL) pc->b_count = 0; } - if (bh == NULL) { - printk (KERN_ERR "%s: bh == NULL in idefloppy_input_buffers, bcount == %d\n", drive->name, bcount); + if (bio == NULL) { + printk (KERN_ERR "%s: bio == NULL in idefloppy_input_buffers, bcount == %d\n", drive->name, bcount); idefloppy_discard_data (drive, bcount); return; } - count = IDEFLOPPY_MIN (bh->b_size - pc->b_count, bcount); - atapi_input_bytes (drive, bh->b_data + pc->b_count, count); + count = IDEFLOPPY_MIN (bio_size(bio) - pc->b_count, bcount); + atapi_input_bytes (drive, bio_data(bio) + pc->b_count, count); bcount -= count; pc->b_count += count; } } @@ -732,7 +732,7 @@ static void idefloppy_output_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount) { struct request *rq = pc->rq; - struct buffer_head *bh = rq->bh; + struct bio *bio = rq->bio; int count; while (bcount) { @@ -740,13 +740,13 @@ rq->sector += rq->current_nr_sectors; rq->nr_sectors -= rq->current_nr_sectors; idefloppy_end_request (1, HWGROUP(drive)); - if ((bh = rq->bh) != NULL) { - pc->b_data = bh->b_data; - pc->b_count = bh->b_size; + if ((bio = rq->bio) != NULL) { + pc->b_data = bio_data(bio); + pc->b_count = bio_size(bio); } } - if (bh == NULL) { - printk (KERN_ERR "%s: bh == NULL in idefloppy_output_buffers, bcount == %d\n", drive->name, bcount); + if (bio == NULL) { + printk (KERN_ERR "%s: bio == NULL in idefloppy_output_buffers, bcount == %d\n", drive->name, bcount); idefloppy_write_zeros (drive, bcount); return; } @@ -760,9 +760,9 @@ static void idefloppy_update_buffers (ide_drive_t *drive, idefloppy_pc_t *pc) { struct request *rq = pc->rq; - struct buffer_head *bh = rq->bh; + struct bio *bio = rq->bio; - while ((bh = rq->bh) != NULL) + while ((bio = rq->bio) != NULL) idefloppy_end_request (1, HWGROUP(drive)); } #endif /* CONFIG_BLK_DEV_IDEDMA */ @@ -1210,7 +1210,7 @@ pc->callback = &idefloppy_rw_callback; pc->rq = rq; pc->b_data = rq->buffer; - pc->b_count = rq->cmd == READ ? 0 : rq->bh->b_size; + pc->b_count = rq->cmd == READ ? 0 : bio_size(rq->bio); if (rq->cmd == WRITE) set_bit (PC_WRITING, &pc->flags); pc->buffer = NULL; @@ -1778,9 +1778,7 @@ */ static void idefloppy_revalidate (ide_drive_t *drive) { - grok_partitions(HWIF(drive)->gd, drive->select.b.unit, - 1<bios_sect, NULL); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); } @@ -1930,8 +1927,7 @@ static void idefloppy_setup (ide_drive_t *drive, idefloppy_floppy_t *floppy) { struct idefloppy_id_gcw gcw; - int major = HWIF(drive)->major, i; - int minor = drive->select.b.unit << PARTN_BITS; + int i; *((unsigned short *) &gcw) = drive->id->config; drive->driver_data = floppy; @@ -1953,34 +1949,17 @@ */ if (strcmp(drive->id->model, "IOMEGA ZIP 100 ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; - } - /* - * Guess what? The IOMEGA Clik! drive also needs the - * above fix. It makes nasty clicking noises without - * it, so please don't remove this. - */ - if (strcmp(drive->id->model, "IOMEGA Clik! 40 CZ ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; - set_bit(IDEFLOPPY_CLIK_DRIVE, &floppy->flags); - } + blk_queue_max_sectors(&drive->queue, 64); /* * Guess what? The IOMEGA Clik! drive also needs the * above fix. It makes nasty clicking noises without * it, so please don't remove this. */ - if (strcmp(drive->id->model, "IOMEGA Clik! 40 CZ ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; + if (strcmp(drive->id->model, "IOMEGA Clik! 40 CZ ATAPI") == 0) { + blk_queue_max_sectors(&drive->queue, 64); set_bit(IDEFLOPPY_CLIK_DRIVE, &floppy->flags); } - (void) idefloppy_get_capacity (drive); idefloppy_add_settings(drive); diff -u --recursive --new-file v2.5.0/linux/drivers/ide/ide-pci.c linux/drivers/ide/ide-pci.c --- v2.5.0/linux/drivers/ide/ide-pci.c Thu Oct 25 13:53:47 2001 +++ linux/drivers/ide/ide-pci.c Tue Nov 27 09:23:27 2001 @@ -79,6 +79,7 @@ #define DEVID_AMD7401 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_COBRA_7401}) #define DEVID_AMD7409 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7409}) #define DEVID_AMD7411 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7411}) +#define DEVID_AMD7441 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7441}) #define DEVID_PDCADMA ((ide_pci_devid_t){PCI_VENDOR_ID_PDC, PCI_DEVICE_ID_PDC_1841}) #define DEVID_SLC90E66 ((ide_pci_devid_t){PCI_VENDOR_ID_EFAR, PCI_DEVICE_ID_EFAR_SLC90E66_1}) #define DEVID_OSB4 ((ide_pci_devid_t){PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4IDE}) @@ -437,6 +438,7 @@ {DEVID_AMD7401, "AMD7401", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, {DEVID_AMD7409, "AMD7409", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, {DEVID_AMD7411, "AMD7411", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, + {DEVID_AMD7441, "AMD7441", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, {DEVID_PDCADMA, "PDCADMA", PCI_PDCADMA, ATA66_PDCADMA, INIT_PDCADMA, DMA_PDCADMA, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, {DEVID_SLC90E66,"SLC90E66", PCI_SLC90E66, ATA66_SLC90E66, INIT_SLC90E66, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, {DEVID_OSB4, "ServerWorks OSB4", PCI_SVWKS, ATA66_SVWKS, INIT_SVWKS, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, diff -u --recursive --new-file v2.5.0/linux/drivers/ide/ide-probe.c linux/drivers/ide/ide-probe.c --- v2.5.0/linux/drivers/ide/ide-probe.c Thu Oct 11 09:14:32 2001 +++ linux/drivers/ide/ide-probe.c Tue Nov 27 09:23:27 2001 @@ -594,9 +594,21 @@ static void ide_init_queue(ide_drive_t *drive) { request_queue_t *q = &drive->queue; + int max_sectors; q->queuedata = HWGROUP(drive); - blk_init_queue(q, do_ide_request); + blk_init_queue(q, do_ide_request, drive->name); + + /* IDE can do up to 128K per request, pdc4030 needs smaller limit */ +#ifdef CONFIG_BLK_DEV_PDC4030 + max_sectors = 127; +#else + max_sectors = 255; +#endif + blk_queue_max_sectors(q, max_sectors); + + /* IDE DMA can do PRD_ENTRIES number of segments */ + q->max_segments = PRD_ENTRIES; } /* @@ -670,7 +682,7 @@ hwgroup->rq = NULL; hwgroup->handler = NULL; hwgroup->drive = NULL; - hwgroup->busy = 0; + hwgroup->flags = 0; init_timer(&hwgroup->timer); hwgroup->timer.function = &ide_timer_expiry; hwgroup->timer.data = (unsigned long) hwgroup; @@ -749,7 +761,7 @@ { struct gendisk *gd; unsigned int unit, units, minors; - int *bs, *max_sect, *max_ra; + int *bs, *max_ra; extern devfs_handle_t ide_devfs_handle; /* figure out maximum drive number on the interface */ @@ -762,23 +774,15 @@ gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); bs = kmalloc (minors*sizeof(int), GFP_KERNEL); - max_sect = kmalloc (minors*sizeof(int), GFP_KERNEL); max_ra = kmalloc (minors*sizeof(int), GFP_KERNEL); memset(gd->part, 0, minors * sizeof(struct hd_struct)); /* cdroms and msdos f/s are examples of non-1024 blocksizes */ blksize_size[hwif->major] = bs; - max_sectors[hwif->major] = max_sect; max_readahead[hwif->major] = max_ra; for (unit = 0; unit < minors; ++unit) { *bs++ = BLOCK_SIZE; -#ifdef CONFIG_BLK_DEV_PDC4030 - *max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 255); -#else - /* IDE can do up to 128K per request. */ - *max_sect++ = 255; -#endif *max_ra++ = MAX_READAHEAD; } @@ -870,13 +874,6 @@ read_ahead[hwif->major] = 8; /* (4kB) */ hwif->present = 1; /* success */ -#if (DEBUG_SPINLOCK > 0) -{ - static int done = 0; - if (!done++) - printk("io_request_lock is %p\n", &io_request_lock); /* FIXME */ -} -#endif return hwif->present; } diff -u --recursive --new-file v2.5.0/linux/drivers/ide/ide-proc.c linux/drivers/ide/ide-proc.c --- v2.5.0/linux/drivers/ide/ide-proc.c Fri Sep 7 09:28:38 2001 +++ linux/drivers/ide/ide-proc.c Tue Nov 27 09:23:27 2001 @@ -190,7 +190,7 @@ if (hwif->mate && hwif->mate->hwgroup) mategroup = (ide_hwgroup_t *)(hwif->mate->hwgroup); cli(); /* all CPUs; ensure all writes are done together */ - while (mygroup->busy || (mategroup && mategroup->busy)) { + while (test_bit(IDE_BUSY, &mygroup->flags) || (mategroup && test_bit(IDE_BUSY, &mategroup->flags))) { sti(); /* all CPUs */ if (0 < (signed long)(jiffies - timeout)) { printk("/proc/ide/%s/config: channel(s) busy, cannot write\n", hwif->name); diff -u --recursive --new-file v2.5.0/linux/drivers/ide/ide-tape.c linux/drivers/ide/ide-tape.c --- v2.5.0/linux/drivers/ide/ide-tape.c Mon Aug 13 14:56:19 2001 +++ linux/drivers/ide/ide-tape.c Tue Nov 27 09:23:27 2001 @@ -1887,8 +1887,7 @@ printk("ide-tape: %s: skipping over config parition..\n", tape->name); #endif tape->onstream_write_error = OS_PART_ERROR; - if (tape->waiting) - complete(tape->waiting); + complete(tape->waiting); } } remove_stage = 1; @@ -1904,8 +1903,7 @@ tape->nr_pending_stages++; tape->next_stage = tape->first_stage; rq->current_nr_sectors = rq->nr_sectors; - if (tape->waiting) - complete(tape->waiting); + complete(tape->waiting); } } } else if (rq->cmd == IDETAPE_READ_RQ) { diff -u --recursive --new-file v2.5.0/linux/drivers/ide/ide.c linux/drivers/ide/ide.c --- v2.5.0/linux/drivers/ide/ide.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/ide/ide.c Tue Nov 27 09:23:27 2001 @@ -113,6 +113,8 @@ * Version 6.31 Debug Share INTR's and request queue streaming * Native ATA-100 support * Prep for Cascades Project + * Version 6.32 4GB highmem support for DMA, and mapping of those for + * PIO transfer (Jens Axboe) * * Some additional driver compile-time options are in ./include/linux/ide.h * @@ -121,8 +123,8 @@ * */ -#define REVISION "Revision: 6.31" -#define VERSION "Id: ide.c 6.31 2000/06/09" +#define REVISION "Revision: 6.32" +#define VERSION "Id: ide.c 6.32 2001/05/24" #undef REALLY_SLOW_IO /* most systems can safely undef this */ @@ -171,6 +173,14 @@ static int system_bus_speed; /* holds what we think is VESA/PCI bus speed */ static int initializing; /* set while initializing built-in drivers */ +/* + * protects global structures etc, we want to split this into per-hwgroup + * instead. + * + * anti-deadlock ordering: ide_lock -> DRIVE_LOCK + */ +spinlock_t ide_lock = SPIN_LOCK_UNLOCKED; + #ifdef CONFIG_BLK_DEV_IDEPCI static int ide_scan_direction; /* THIS was formerly 2.2.x pci=reverse */ #endif /* CONFIG_BLK_DEV_IDEPCI */ @@ -180,7 +190,7 @@ * ide_lock is used by the Atari code to obtain access to the IDE interrupt, * which is shared between several drivers. */ -static int ide_lock; +static int ide_intr_lock; #endif /* __mc68000__ || CONFIG_APUS */ int noautodma = 0; @@ -542,18 +552,26 @@ return 1; /* drive ready: *might* be interrupting */ } -/* - * This is our end_request replacement function. - */ -void ide_end_request (byte uptodate, ide_hwgroup_t *hwgroup) +inline int __ide_end_request(ide_hwgroup_t *hwgroup, int uptodate, int nr_secs) { + ide_drive_t *drive = hwgroup->drive; struct request *rq; unsigned long flags; - ide_drive_t *drive = hwgroup->drive; + int ret = 1; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); rq = hwgroup->rq; + if (rq->inactive) + BUG(); + + /* + * small hack to eliminate locking from ide_end_request to grab + * the first segment number of sectors + */ + if (!nr_secs) + nr_secs = rq->hard_cur_sectors; + /* * decide whether to reenable DMA -- 3 is a random magic for now, * if we DMA timeout more than 3 times, just stay in PIO @@ -563,13 +581,26 @@ hwgroup->hwif->dmaproc(ide_dma_on, drive); } - if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) { + if (!end_that_request_first(rq, uptodate, nr_secs)) { add_blkdev_randomness(MAJOR(rq->rq_dev)); + spin_lock(DRIVE_LOCK(drive)); blkdev_dequeue_request(rq); hwgroup->rq = NULL; end_that_request_last(rq); + spin_unlock(DRIVE_LOCK(drive)); + ret = 0; } - spin_unlock_irqrestore(&io_request_lock, flags); + + spin_unlock_irqrestore(&ide_lock, flags); + return ret; +} + +/* + * This is our end_request replacement function. + */ +int ide_end_request (byte uptodate, ide_hwgroup_t *hwgroup) +{ + return __ide_end_request(hwgroup, uptodate, 0); } /* @@ -585,7 +616,7 @@ unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); if (hwgroup->handler != NULL) { printk("%s: ide_set_handler: handler not null; old=%p, new=%p\n", drive->name, hwgroup->handler, handler); @@ -594,7 +625,7 @@ hwgroup->expiry = expiry; hwgroup->timer.expires = jiffies + timeout; add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -844,9 +875,8 @@ unsigned long flags; struct request *rq; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); rq = HWGROUP(drive)->rq; - spin_unlock_irqrestore(&io_request_lock, flags); if (rq->cmd == IDE_DRIVE_CMD) { byte *args = (byte *) rq->buffer; @@ -869,11 +899,16 @@ args[6] = IN_BYTE(IDE_SELECT_REG); } } - spin_lock_irqsave(&io_request_lock, flags); + + spin_lock(DRIVE_LOCK(drive)); + if (rq->inactive) + BUG(); blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; end_that_request_last(rq); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock(DRIVE_LOCK(drive)); + + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -1192,17 +1227,20 @@ /* * start_request() initiates handling of a new I/O request */ -static ide_startstop_t start_request (ide_drive_t *drive) +static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) { ide_startstop_t startstop; - unsigned long block, blockend; - struct request *rq = blkdev_entry_next_request(&drive->queue.queue_head); + unsigned long block; unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS; ide_hwif_t *hwif = HWIF(drive); + if (rq->inactive) + BUG(); + #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", hwif->name, (unsigned long) rq); #endif + /* bail early if we've exceeded max_failures */ if (drive->max_failures && (drive->failures > drive->max_failures)) { goto kill_rq; @@ -1219,16 +1257,11 @@ } #endif block = rq->sector; - blockend = block + rq->nr_sectors; + /* Strange disk manager remap */ if ((rq->cmd == READ || rq->cmd == WRITE) && (drive->media == ide_disk || drive->media == ide_floppy)) { - if ((blockend < block) || (blockend > drive->part[minor&PARTN_MASK].nr_sects)) { - printk("%s%c: bad access: block=%ld, count=%ld\n", drive->name, - (minor&PARTN_MASK)?'0'+(minor&PARTN_MASK):' ', block, rq->nr_sectors); - goto kill_rq; - } - block += drive->part[minor&PARTN_MASK].start_sect + drive->sect0; + block += drive->sect0; } /* Yecch - this will shift the entire interval, possibly killing some innocent following sector */ @@ -1240,7 +1273,8 @@ #endif SELECT_DRIVE(hwif, drive); - if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) { + if (ide_wait_stat(&startstop, drive, drive->ready_stat, + BUSY_STAT|DRQ_STAT, WAIT_READY)) { printk("%s: drive not ready for command\n", drive->name); return startstop; } @@ -1251,7 +1285,8 @@ if (drive->driver != NULL) { return (DRIVER(drive)->do_request(drive, rq, block)); } - printk("%s: media type %d not supported\n", drive->name, drive->media); + printk("%s: media type %d not supported\n", + drive->name, drive->media); goto kill_rq; } return do_special(drive); @@ -1267,13 +1302,15 @@ { ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned long flags; + struct request *rq; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + rq = hwgroup->rq; + spin_unlock_irqrestore(&ide_lock, flags); - return start_request(drive); + return start_request(drive, rq); } /* @@ -1305,7 +1342,7 @@ || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep))) || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive)))) { - if( !drive->queue.plugged ) + if (!blk_queue_plugged(&drive->queue)) best = drive; } } @@ -1334,7 +1371,7 @@ /* * Issue a new request to a drive from hwgroup - * Caller must have already done spin_lock_irqsave(&io_request_lock, ..); + * Caller must have already done spin_lock_irqsave(DRIVE_LOCK(drive), ...) * * A hwgroup is a serialized group of IDE interfaces. Usually there is * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640) @@ -1346,39 +1383,34 @@ * possibly along with many other devices. This is especially common in * PCI-based systems with off-board IDE controller cards. * - * The IDE driver uses the single global io_request_lock spinlock to protect - * access to the request queues, and to protect the hwgroup->busy flag. + * The IDE driver uses the queue spinlock to protect access to the request + * queues. * * The first thread into the driver for a particular hwgroup sets the - * hwgroup->busy flag to indicate that this hwgroup is now active, + * hwgroup->flags IDE_BUSY flag to indicate that this hwgroup is now active, * and then initiates processing of the top request from the request queue. * * Other threads attempting entry notice the busy setting, and will simply - * queue their new requests and exit immediately. Note that hwgroup->busy - * remains set even when the driver is merely awaiting the next interrupt. + * queue their new requests and exit immediately. Note that hwgroup->flags + * remains busy even when the driver is merely awaiting the next interrupt. * Thus, the meaning is "this hwgroup is busy processing a request". * * When processing of a request completes, the completing thread or IRQ-handler * will start the next request from the queue. If no more work remains, - * the driver will clear the hwgroup->busy flag and exit. - * - * The io_request_lock (spinlock) is used to protect all access to the - * hwgroup->busy flag, but is otherwise not needed for most processing in - * the driver. This makes the driver much more friendlier to shared IRQs - * than previous designs, while remaining 100% (?) SMP safe and capable. + * the driver will clear the hwgroup->flags IDE_BUSY flag and exit. */ static void ide_do_request(ide_hwgroup_t *hwgroup, int masked_irq) { ide_drive_t *drive; ide_hwif_t *hwif; ide_startstop_t startstop; + struct request *rq; - ide_get_lock(&ide_lock, ide_intr, hwgroup); /* for atari only: POSSIBLY BROKEN HERE(?) */ + ide_get_lock(&ide_intr_lock, ide_intr, hwgroup);/* for atari only: POSSIBLY BROKEN HERE(?) */ __cli(); /* necessary paranoia: ensure IRQs are masked on local CPU */ - while (!hwgroup->busy) { - hwgroup->busy = 1; + while (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) { drive = choose_drive(hwgroup); if (drive == NULL) { unsigned long sleep = 0; @@ -1401,13 +1433,13 @@ if (timer_pending(&hwgroup->timer)) printk("ide_set_handler: timer already active\n"); #endif - hwgroup->sleeping = 1; /* so that ide_timer_expiry knows what to do */ + set_bit(IDE_SLEEP, &hwgroup->flags); mod_timer(&hwgroup->timer, sleep); - /* we purposely leave hwgroup->busy==1 while sleeping */ + /* we purposely leave hwgroup busy while sleeping */ } else { /* Ugly, but how can we sleep for the lock otherwise? perhaps from tq_disk? */ - ide_release_lock(&ide_lock); /* for atari only */ - hwgroup->busy = 0; + ide_release_lock(&ide_intr_lock);/* for atari only */ + clear_bit(IDE_BUSY, &hwgroup->flags); } return; /* no more work for this hwgroup (for now) */ } @@ -1421,9 +1453,16 @@ drive->sleep = 0; drive->service_start = jiffies; - if ( drive->queue.plugged ) /* paranoia */ - printk("%s: Huh? nuking plugged queue\n", drive->name); - hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head); + if (blk_queue_plugged(&drive->queue)) + BUG(); + + /* + * just continuing an interrupted request maybe + */ + spin_lock(DRIVE_LOCK(drive)); + rq = hwgroup->rq = elv_next_request(&drive->queue); + spin_unlock(DRIVE_LOCK(drive)); + /* * Some systems have trouble with IDE IRQs arriving while * the driver is still setting things up. So, here we disable @@ -1434,14 +1473,14 @@ */ if (masked_irq && hwif->irq != masked_irq) disable_irq_nosync(hwif->irq); - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); ide__sti(); /* allow other IRQs while we start this request */ - startstop = start_request(drive); - spin_lock_irq(&io_request_lock); + startstop = start_request(drive, rq); + spin_lock_irq(&ide_lock); if (masked_irq && hwif->irq != masked_irq) enable_irq(hwif->irq); if (startstop == ide_stopped) - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); } } @@ -1460,7 +1499,19 @@ */ void do_ide_request(request_queue_t *q) { + unsigned long flags; + + /* + * release queue lock, grab IDE global lock and restore when + * we leave... + */ + spin_unlock(&q->queue_lock); + + spin_lock_irqsave(&ide_lock, flags); ide_do_request(q->queuedata, 0); + spin_unlock_irqrestore(&ide_lock, flags); + + spin_lock(&q->queue_lock); } /* @@ -1501,9 +1552,14 @@ HWGROUP(drive)->rq = NULL; rq->errors = 0; - rq->sector = rq->bh->b_rsector; - rq->current_nr_sectors = rq->bh->b_size >> 9; - rq->buffer = rq->bh->b_data; + rq->sector = rq->bio->bi_sector; + rq->current_nr_sectors = bio_sectors(rq->bio); + + /* + * just to make sure... + */ + if (rq->bio) + rq->buffer = NULL; } /* @@ -1519,7 +1575,11 @@ unsigned long flags; unsigned long wait; - spin_lock_irqsave(&io_request_lock, flags); + /* + * a global lock protects timers etc -- shouldn't get contention + * worth mentioning + */ + spin_lock_irqsave(&ide_lock, flags); del_timer(&hwgroup->timer); if ((handler = hwgroup->handler) == NULL) { @@ -1529,10 +1589,8 @@ * or we were "sleeping" to give other devices a chance. * Either way, we don't really want to complain about anything. */ - if (hwgroup->sleeping) { - hwgroup->sleeping = 0; - hwgroup->busy = 0; - } + if (test_and_clear_bit(IDE_SLEEP, &hwgroup->flags)) + clear_bit(IDE_BUSY, &hwgroup->flags); } else { ide_drive_t *drive = hwgroup->drive; if (!drive) { @@ -1541,17 +1599,16 @@ } else { ide_hwif_t *hwif; ide_startstop_t startstop; - if (!hwgroup->busy) { - hwgroup->busy = 1; /* paranoia */ - printk("%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name); - } + /* paranoia */ + if (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) + printk("%s: ide_timer_expiry: hwgroup was not busy??\n", drive->name); if ((expiry = hwgroup->expiry) != NULL) { /* continue */ if ((wait = expiry(drive)) != 0) { /* reset timer */ hwgroup->timer.expires = jiffies + wait; add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } } @@ -1561,7 +1618,7 @@ * the handler() function, which means we need to globally * mask the specific IRQ: */ - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); hwif = HWIF(drive); #if DISABLE_IRQ_NOSYNC disable_irq_nosync(hwif->irq); @@ -1587,13 +1644,13 @@ set_recovery_timer(hwif); drive->service_time = jiffies - drive->service_start; enable_irq(hwif->irq); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); if (startstop == ide_stopped) - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); } } ide_do_request(hwgroup, 0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -1656,13 +1713,11 @@ ide_handler_t *handler; ide_startstop_t startstop; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwif = hwgroup->hwif; - if (!ide_ack_intr(hwif)) { - spin_unlock_irqrestore(&io_request_lock, flags); - return; - } + if (!ide_ack_intr(hwif)) + goto out_lock; if ((handler = hwgroup->handler) == NULL || hwgroup->poll_timeout != 0) { /* @@ -1694,16 +1749,14 @@ (void) IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]); #endif /* CONFIG_BLK_DEV_IDEPCI */ } - spin_unlock_irqrestore(&io_request_lock, flags); - return; + goto out_lock; } drive = hwgroup->drive; if (!drive) { /* * This should NEVER happen, and there isn't much we could do about it here. */ - spin_unlock_irqrestore(&io_request_lock, flags); - return; + goto out_lock; } if (!drive_is_ready(drive)) { /* @@ -1712,21 +1765,19 @@ * the IRQ before their status register is up to date. Hopefully we have * enough advance overhead that the latter isn't a problem. */ - spin_unlock_irqrestore(&io_request_lock, flags); - return; - } - if (!hwgroup->busy) { - hwgroup->busy = 1; /* paranoia */ - printk("%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name); + goto out_lock; } + /* paranoia */ + if (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) + printk("%s: ide_intr: hwgroup was not busy??\n", drive->name); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); if (drive->unmask) ide__sti(); /* local CPU only */ startstop = handler(drive); /* service this interrupt, may set handler for next interrupt */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); /* * Note that handler() may have set things up for another @@ -1739,13 +1790,15 @@ drive->service_time = jiffies - drive->service_start; if (startstop == ide_stopped) { if (hwgroup->handler == NULL) { /* paranoia */ - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); ide_do_request(hwgroup, hwif->irq); } else { printk("%s: ide_intr: huh? expected NULL handler on exit\n", drive->name); } } - spin_unlock_irqrestore(&io_request_lock, flags); + +out_lock: + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -1755,9 +1808,6 @@ ide_drive_t *get_info_ptr (kdev_t i_rdev) { int major = MAJOR(i_rdev); -#if 0 - int minor = MINOR(i_rdev) & PARTN_MASK; -#endif unsigned int h; for (h = 0; h < MAX_HWIFS; ++h) { @@ -1766,11 +1816,7 @@ unsigned unit = DEVICE_NR(i_rdev); if (unit < MAX_DRIVES) { ide_drive_t *drive = &hwif->drives[unit]; -#if 0 - if ((drive->present) && (drive->part[minor].nr_sects)) -#else if (drive->present) -#endif return drive; } break; @@ -1818,7 +1864,8 @@ unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned int major = HWIF(drive)->major; - struct list_head *queue_head = &drive->queue.queue_head; + request_queue_t *q = &drive->queue; + struct list_head *queue_head = &q->queue_head; DECLARE_COMPLETION(wait); #ifdef CONFIG_BLK_DEV_PDC4030 @@ -1830,8 +1877,9 @@ rq->rq_dev = MKDEV(major,(drive->select.b.unit)<waiting = &wait; - spin_lock_irqsave(&io_request_lock, flags); - if (list_empty(queue_head) || action == ide_preempt) { + spin_lock_irqsave(&ide_lock, flags); + spin_lock(DRIVE_LOCK(drive)); + if (blk_queue_empty(&drive->queue) || action == ide_preempt) { if (action == ide_preempt) hwgroup->rq = NULL; } else { @@ -1840,9 +1888,10 @@ } else queue_head = queue_head->next; } - list_add(&rq->queue, queue_head); + q->elevator.elevator_add_req_fn(q, rq, queue_head); + spin_unlock(DRIVE_LOCK(drive)); ide_do_request(hwgroup, 0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); if (action == ide_wait) { wait_for_completion(&wait); /* wait for it to be serviced */ return rq->errors ? -EIO : 0; /* return -EIO if errors */ @@ -1851,6 +1900,16 @@ } +/* Common for ide-floppy.c and ide-disk.c */ +void ide_revalidate_drive (ide_drive_t *drive) +{ + struct gendisk *g = HWIF(drive)->gd; + int minor = (drive->select.b.unit << g->minor_shift); + kdev_t dev = MKDEV(g->major, minor); + + grok_partitions(dev, current_capacity(drive)); +} + /* * This routine is called to flush all partitions and partition tables * for a changed disk, and then re-read the new partition table. @@ -1863,40 +1922,33 @@ { ide_drive_t *drive; ide_hwgroup_t *hwgroup; - unsigned int p, major, minor; - long flags; + unsigned long flags; + int res; if ((drive = get_info_ptr(i_rdev)) == NULL) return -ENODEV; - major = MAJOR(i_rdev); - minor = drive->select.b.unit << PARTN_BITS; hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); if (drive->busy || (drive->usage > 1)) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return -EBUSY; - }; + } drive->busy = 1; MOD_INC_USE_COUNT; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); - for (p = 0; p < (1<part[p].nr_sects > 0) { - kdev_t devp = MKDEV(major, minor+p); - invalidate_device(devp, 1); - set_blocksize(devp, 1024); - } - drive->part[p].start_sect = 0; - drive->part[p].nr_sects = 0; - }; + res = wipe_partitions(i_rdev); + if (res) + goto leave; if (DRIVER(drive)->revalidate) DRIVER(drive)->revalidate(drive); + leave: drive->busy = 0; wake_up(&drive->wqueue); MOD_DEC_USE_COUNT; - return 0; + return res; } static void revalidate_drives (void) @@ -2169,11 +2221,10 @@ */ unregister_blkdev(hwif->major, hwif->name); kfree(blksize_size[hwif->major]); - kfree(max_sectors[hwif->major]); kfree(max_readahead[hwif->major]); blk_dev[hwif->major].data = NULL; blk_dev[hwif->major].queue = NULL; - blksize_size[hwif->major] = NULL; + blk_clear(hwif->major); gd = hwif->gd; if (gd) { del_gendisk(gd); @@ -2293,6 +2344,7 @@ memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports)); hwif->irq = hw->irq; hwif->noprobe = 0; + hwif->chipset = hw->chipset; if (!initializing) { ide_probe_module(); @@ -2403,7 +2455,7 @@ unsigned long flags; if ((setting->rw & SETTING_READ)) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); switch(setting->data_type) { case TYPE_BYTE: val = *((u8 *) setting->data); @@ -2416,7 +2468,7 @@ val = *((u32 *) setting->data); break; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); } return val; } @@ -2426,11 +2478,11 @@ ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned long timeout = jiffies + (3 * HZ); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); - while (hwgroup->busy) { + while (test_bit(IDE_BUSY, &hwgroup->flags)) { unsigned long lflags; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&ide_lock); __save_flags(lflags); /* local CPU only */ __sti(); /* local CPU only; needed for jiffies */ if (0 < (signed long)(jiffies - timeout)) { @@ -2439,7 +2491,7 @@ return -EBUSY; } __restore_flags(lflags); /* local CPU only */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); } return 0; } @@ -2480,7 +2532,7 @@ *p = val; break; } - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&ide_lock); return 0; } @@ -2633,6 +2685,7 @@ { struct hd_big_geometry *loc = (struct hd_big_geometry *) arg; if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; + if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT; if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT; if (put_user(drive->bios_cyl, (unsigned int *) &loc->cylinders)) return -EFAULT; @@ -2653,11 +2706,6 @@ return 0; } - case BLKGETSIZE: /* Return device size */ - return put_user(drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects, (unsigned long *) arg); - case BLKGETSIZE64: - return put_user((u64)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects << 9, (u64 *) arg); - case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; return ide_revalidate_disk(inode->i_rdev); @@ -2775,6 +2823,8 @@ } return 0; + case BLKGETSIZE: + case BLKGETSIZE64: case BLKROSET: case BLKROGET: case BLKFLSBUF: @@ -2784,6 +2834,8 @@ case BLKELVSET: case BLKBSZGET: case BLKBSZSET: + case BLKHASHPROF: + case BLKHASHCLEAR: return blk_ioctl(inode->i_rdev, cmd, arg); case HDIO_GET_BUSSTATE: @@ -3409,7 +3461,7 @@ #ifdef CONFIG_BLK_DEV_IDE #if defined(__mc68000__) || defined(CONFIG_APUS) if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) { - ide_get_lock(&ide_lock, NULL, NULL); /* for atari only */ + ide_get_lock(&ide_intr_lock, NULL, NULL);/* for atari only */ disable_irq(ide_hwifs[0].irq); /* disable_irq_nosync ?? */ // disable_irq_nosync(ide_hwifs[0].irq); } @@ -3420,7 +3472,7 @@ #if defined(__mc68000__) || defined(CONFIG_APUS) if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) { enable_irq(ide_hwifs[0].irq); - ide_release_lock(&ide_lock); /* for atari only */ + ide_release_lock(&ide_intr_lock);/* for atari only */ } #endif /* __mc68000__ || CONFIG_APUS */ #endif /* CONFIG_BLK_DEV_IDE */ @@ -3685,6 +3737,7 @@ EXPORT_SYMBOL(ide_do_drive_cmd); EXPORT_SYMBOL(ide_end_drive_cmd); EXPORT_SYMBOL(ide_end_request); +EXPORT_SYMBOL(__ide_end_request); EXPORT_SYMBOL(ide_revalidate_disk); EXPORT_SYMBOL(ide_cmd); EXPORT_SYMBOL(ide_wait_cmd); diff -u --recursive --new-file v2.5.0/linux/drivers/ide/pdc202xx.c linux/drivers/ide/pdc202xx.c --- v2.5.0/linux/drivers/ide/pdc202xx.c Wed Nov 14 11:44:03 2001 +++ linux/drivers/ide/pdc202xx.c Tue Nov 27 09:23:27 2001 @@ -893,6 +893,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { hwif->dmaproc = &pdc202xx_dmaproc; + hwif->highmem = 1; if (!noautodma) hwif->autodma = 1; } else { diff -u --recursive --new-file v2.5.0/linux/drivers/ide/piix.c linux/drivers/ide/piix.c --- v2.5.0/linux/drivers/ide/piix.c Thu Oct 25 13:53:47 2001 +++ linux/drivers/ide/piix.c Tue Nov 27 09:23:27 2001 @@ -523,6 +523,7 @@ if (!hwif->dma_base) return; + hwif->highmem = 1; #ifndef CONFIG_BLK_DEV_IDEDMA hwif->autodma = 0; #else /* CONFIG_BLK_DEV_IDEDMA */ diff -u --recursive --new-file v2.5.0/linux/drivers/ide/serverworks.c linux/drivers/ide/serverworks.c --- v2.5.0/linux/drivers/ide/serverworks.c Sun Sep 9 10:43:02 2001 +++ linux/drivers/ide/serverworks.c Tue Nov 27 09:23:27 2001 @@ -593,6 +593,7 @@ if (!noautodma) hwif->autodma = 1; hwif->dmaproc = &svwks_dmaproc; + hwif->highmem = 1; } else { hwif->autodma = 0; hwif->drives[0].autotune = 1; diff -u --recursive --new-file v2.5.0/linux/drivers/ide/sis5513.c linux/drivers/ide/sis5513.c --- v2.5.0/linux/drivers/ide/sis5513.c Fri Sep 7 09:28:38 2001 +++ linux/drivers/ide/sis5513.c Tue Nov 27 09:23:27 2001 @@ -671,6 +671,7 @@ case PCI_DEVICE_ID_SI_5591: if (!noautodma) hwif->autodma = 1; + hwif->highmem = 1; hwif->dmaproc = &sis5513_dmaproc; break; #endif /* CONFIG_BLK_DEV_IDEDMA */ diff -u --recursive --new-file v2.5.0/linux/drivers/ide/slc90e66.c linux/drivers/ide/slc90e66.c --- v2.5.0/linux/drivers/ide/slc90e66.c Sun Jul 15 16:22:23 2001 +++ linux/drivers/ide/slc90e66.c Tue Nov 27 09:23:27 2001 @@ -373,6 +373,7 @@ return; hwif->autodma = 0; + hwif->highmem = 1; #ifdef CONFIG_BLK_DEV_IDEDMA if (!noautodma) hwif->autodma = 1; diff -u --recursive --new-file v2.5.0/linux/drivers/ide/via82cxxx.c linux/drivers/ide/via82cxxx.c --- v2.5.0/linux/drivers/ide/via82cxxx.c Tue Sep 11 08:40:36 2001 +++ linux/drivers/ide/via82cxxx.c Tue Nov 27 09:23:27 2001 @@ -520,6 +520,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { + hwif->highmem = 1; hwif->dmaproc = &via82cxxx_dmaproc; #ifdef CONFIG_IDEDMA_AUTO if (!noautodma) diff -u --recursive --new-file v2.5.0/linux/drivers/md/lvm-snap.c linux/drivers/md/lvm-snap.c --- v2.5.0/linux/drivers/md/lvm-snap.c Mon Nov 12 09:34:20 2001 +++ linux/drivers/md/lvm-snap.c Tue Nov 27 09:23:27 2001 @@ -351,7 +351,7 @@ blksize_snap = lvm_get_blksize(snap_phys_dev); max_blksize = max(blksize_org, blksize_snap); min_blksize = min(blksize_org, blksize_snap); - max_sectors = KIO_MAX_SECTORS * (min_blksize>>9); + max_sectors = LVM_MAX_SECTORS * (min_blksize>>9); if (chunk_size % (max_blksize>>9)) goto fail_blksize; @@ -363,20 +363,20 @@ iobuf->length = nr_sectors << 9; - if(!lvm_snapshot_prepare_blocks(iobuf->blocks, org_start, + if(!lvm_snapshot_prepare_blocks(lv_snap->blocks, org_start, nr_sectors, blksize_org)) goto fail_prepare; if (brw_kiovec(READ, 1, &iobuf, org_phys_dev, - iobuf->blocks, blksize_org) != (nr_sectors<<9)) + lv_snap->blocks, blksize_org) != (nr_sectors<<9)) goto fail_raw_read; - if(!lvm_snapshot_prepare_blocks(iobuf->blocks, snap_start, + if(!lvm_snapshot_prepare_blocks(lv_snap->blocks, snap_start, nr_sectors, blksize_snap)) goto fail_prepare; if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, - iobuf->blocks, blksize_snap) != (nr_sectors<<9)) + lv_snap->blocks, blksize_snap) !=(nr_sectors<<9)) goto fail_raw_write; } @@ -505,7 +505,7 @@ ret = alloc_kiovec(1, &lv_snap->lv_iobuf); if (ret) goto out; - max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9); + max_sectors = LVM_MAX_SECTORS << (PAGE_SHIFT-9); ret = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors); if (ret) goto out_free_kiovec; @@ -542,8 +542,6 @@ void lvm_snapshot_release(lv_t * lv) { - int nbhs = KIO_MAX_SECTORS; - if (lv->lv_block_exception) { vfree(lv->lv_block_exception); diff -u --recursive --new-file v2.5.0/linux/drivers/md/lvm.c linux/drivers/md/lvm.c --- v2.5.0/linux/drivers/md/lvm.c Mon Nov 19 09:56:04 2001 +++ linux/drivers/md/lvm.c Tue Nov 27 09:23:27 2001 @@ -236,7 +236,7 @@ /* * External function prototypes */ -static int lvm_make_request_fn(request_queue_t*, int, struct buffer_head*); +static int lvm_make_request_fn(request_queue_t*, struct bio *); static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong); static int lvm_blk_open(struct inode *, struct file *); @@ -262,7 +262,7 @@ #ifdef LVM_HD_NAME extern void (*lvm_hd_name_ptr) (char *, int); #endif -static int lvm_map(struct buffer_head *, int); +static int lvm_map(struct bio *); static int lvm_do_lock_lvm(void); static int lvm_do_le_remap(vg_t *, void *); @@ -291,9 +291,9 @@ static void __update_hardsectsize(lv_t *lv); -static void _queue_io(struct buffer_head *bh, int rw); -static struct buffer_head *_dequeue_io(void); -static void _flush_io(struct buffer_head *bh); +static void _queue_io(struct bio *bh, int rw); +static struct bio *_dequeue_io(void); +static void _flush_io(struct bio *bh); static int _open_pv(pv_t *pv); static void _close_pv(pv_t *pv); @@ -346,7 +346,7 @@ static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED; static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; -static struct buffer_head *_pe_requests; +static struct bio *_pe_requests; static DECLARE_RWSEM(_pe_lock); @@ -369,7 +369,6 @@ /* gendisk structures */ static struct hd_struct lvm_hd_struct[MAX_LV]; static int lvm_blocksizes[MAX_LV]; -static int lvm_hardsectsizes[MAX_LV]; static int lvm_size[MAX_LV]; static struct gendisk lvm_gendisk = @@ -451,9 +450,7 @@ del_gendisk(&lvm_gendisk); - blk_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; + blk_clear(MAJOR_NR); #ifdef LVM_HD_NAME /* reference from linux/drivers/block/genhd.c */ @@ -1037,25 +1034,25 @@ static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result) { - struct buffer_head bh; + struct bio bio; unsigned long block; int err; if (get_user(block, &user_result->lv_block)) return -EFAULT; - memset(&bh,0,sizeof bh); - bh.b_blocknr = block; - bh.b_dev = bh.b_rdev = inode->i_rdev; - bh.b_size = lvm_get_blksize(bh.b_dev); - bh.b_rsector = block * (bh.b_size >> 9); - if ((err=lvm_map(&bh, READ)) < 0) { + memset(&bio,0,sizeof(bio)); + bio.bi_dev = inode->i_rdev; + bio.bi_io_vec.bv_len = lvm_get_blksize(bio.bi_dev); + bio.bi_sector = block * bio_sectors(&bio); + bio.bi_rw = READ; + if ((err=lvm_map(&bio)) < 0) { printk("lvm map failed: %d\n", err); return -EINVAL; } - return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) || - put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block) ? + return put_user(kdev_t_to_nr(bio.bi_dev), &user_result->lv_dev) || + put_user(bio.bi_sector/bio_sectors(&bio), &user_result->lv_block) ? -EFAULT : 0; } @@ -1104,7 +1101,7 @@ (sector < (pe_lock_req.data.pv_offset + pe_size))); } -static inline int _defer_extent(struct buffer_head *bh, int rw, +static inline int _defer_extent(struct bio *bh, int rw, kdev_t pv, ulong sector, uint32_t pe_size) { if (pe_lock_req.lock == LOCK_PE) { @@ -1122,17 +1119,18 @@ return 0; } -static int lvm_map(struct buffer_head *bh, int rw) +static int lvm_map(struct bio *bh) { - int minor = MINOR(bh->b_rdev); + int minor = MINOR(bh->bi_dev); ulong index; ulong pe_start; - ulong size = bh->b_size >> 9; - ulong rsector_org = bh->b_rsector; + ulong size = bio_sectors(bh); + ulong rsector_org = bh->bi_sector; ulong rsector_map; kdev_t rdev_map; vg_t *vg_this = vg[VG_BLK(minor)]; lv_t *lv = vg_this->lv[LV_BLK(minor)]; + int rw = bio_data_dir(bh); down_read(&lv->lv_lock); @@ -1153,7 +1151,7 @@ P_MAP("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n", lvm_name, minor, - kdevname(bh->b_rdev), + kdevname(bh->bi_dev), rsector_org, size); if (rsector_org + size > lv->lv_size) { @@ -1248,13 +1246,15 @@ } out: - bh->b_rdev = rdev_map; - bh->b_rsector = rsector_map; + if (test_bit(BIO_HASHED, &bh->bi_flags)) + BUG(); + bh->bi_dev = rdev_map; + bh->bi_sector = rsector_map; up_read(&lv->lv_lock); return 1; bad: - buffer_IO_error(bh); + bio_io_error(bh); up_read(&lv->lv_lock); return -1; } /* lvm_map() */ @@ -1287,10 +1287,9 @@ /* * make request function */ -static int lvm_make_request_fn(request_queue_t *q, - int rw, - struct buffer_head *bh) { - return (lvm_map(bh, rw) <= 0) ? 0 : 1; +static int lvm_make_request_fn(request_queue_t *q, struct bio *bio) +{ + return (lvm_map(bio) <= 0) ? 0 : 1; } @@ -1331,7 +1330,7 @@ static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg) { pe_lock_req_t new_lock; - struct buffer_head *bh; + struct bio *bh; uint p; if (vg_ptr == NULL) return -ENXIO; @@ -1820,8 +1819,6 @@ max_hardsectsize = hardsectsize; } } - - lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize; } /* @@ -2665,7 +2662,6 @@ blk_size[MAJOR_NR] = lvm_size; blksize_size[MAJOR_NR] = lvm_blocksizes; - hardsect_size[MAJOR_NR] = lvm_hardsectsizes; return; } /* lvm_gen_init() */ @@ -2673,16 +2669,16 @@ /* Must have down_write(_pe_lock) when we enqueue buffers */ -static void _queue_io(struct buffer_head *bh, int rw) { - if (bh->b_reqnext) BUG(); - bh->b_reqnext = _pe_requests; +static void _queue_io(struct bio *bh, int rw) { + if (bh->bi_next) BUG(); + bh->bi_next = _pe_requests; _pe_requests = bh; } /* Must have down_write(_pe_lock) when we dequeue buffers */ -static struct buffer_head *_dequeue_io(void) +static struct bio *_dequeue_io(void) { - struct buffer_head *bh = _pe_requests; + struct bio *bh = _pe_requests; _pe_requests = NULL; return bh; } @@ -2697,13 +2693,14 @@ * If, for some reason, the same PE is locked again before all of these writes * have finished, then these buffers will just be re-queued (i.e. no danger). */ -static void _flush_io(struct buffer_head *bh) +static void _flush_io(struct bio *bh) { while (bh) { - struct buffer_head *next = bh->b_reqnext; - bh->b_reqnext = NULL; + struct bio *next = bh->bi_next; + bh->bi_next = NULL; /* resubmit this buffer head */ - generic_make_request(WRITE, bh); + bh->bi_rw = WRITE; /* needed? */ + generic_make_request(bh); bh = next; } } diff -u --recursive --new-file v2.5.0/linux/drivers/md/md.c linux/drivers/md/md.c --- v2.5.0/linux/drivers/md/md.c Thu Oct 25 13:58:34 2001 +++ linux/drivers/md/md.c Tue Nov 27 09:23:27 2001 @@ -105,7 +105,6 @@ */ struct hd_struct md_hd_struct[MAX_MD_DEVS]; static int md_blocksizes[MAX_MD_DEVS]; -static int md_hardsect_sizes[MAX_MD_DEVS]; static int md_maxreadahead[MAX_MD_DEVS]; static mdk_thread_t *md_recovery_thread; @@ -172,14 +171,14 @@ mddev_map[minor].data = NULL; } -static int md_make_request(request_queue_t *q, int rw, struct buffer_head * bh) +static int md_make_request (request_queue_t *q, struct bio *bio) { - mddev_t *mddev = kdev_to_mddev(bh->b_rdev); + mddev_t *mddev = kdev_to_mddev(bio->bi_dev); if (mddev && mddev->pers) - return mddev->pers->make_request(mddev, rw, bh); + return mddev->pers->make_request(mddev, bio_rw(bio), bio); else { - buffer_IO_error(bh); + bio_io_error(bio); return 0; } } @@ -1701,19 +1700,14 @@ * device. * Also find largest hardsector size */ - md_hardsect_sizes[mdidx(mddev)] = 512; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) continue; invalidate_device(rdev->dev, 1); - if (get_hardsect_size(rdev->dev) - > md_hardsect_sizes[mdidx(mddev)]) - md_hardsect_sizes[mdidx(mddev)] = - get_hardsect_size(rdev->dev); - } - md_blocksizes[mdidx(mddev)] = 1024; - if (md_blocksizes[mdidx(mddev)] < md_hardsect_sizes[mdidx(mddev)]) - md_blocksizes[mdidx(mddev)] = md_hardsect_sizes[mdidx(mddev)]; + md_blocksizes[mdidx(mddev)] = 1024; + if (get_hardsect_size(rdev->dev) > md_blocksizes[mdidx(mddev)]) + md_blocksizes[mdidx(mddev)] = get_hardsect_size(rdev->dev); + } mddev->pers = pers[pnum]; err = mddev->pers->run(mddev); @@ -2769,7 +2763,7 @@ (short *) &loc->cylinders); if (err) goto abort_unlock; - err = md_put_user (md_hd_struct[minor].start_sect, + err = md_put_user (get_start_sect(dev), (long *) &loc->start); goto done_unlock; } @@ -3621,13 +3615,11 @@ for(i = 0; i < MAX_MD_DEVS; i++) { md_blocksizes[i] = 1024; md_size[i] = 0; - md_hardsect_sizes[i] = 512; md_maxreadahead[i] = MD_READAHEAD; } blksize_size[MAJOR_NR] = md_blocksizes; blk_size[MAJOR_NR] = md_size; max_readahead[MAJOR_NR] = md_maxreadahead; - hardsect_size[MAJOR_NR] = md_hardsect_sizes; dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t)); @@ -3670,7 +3662,8 @@ md_recovery_thread = md_register_thread(md_do_recovery, NULL, name); if (!md_recovery_thread) - printk(KERN_ALERT "md: bug: couldn't allocate md_recovery_thread\n"); + printk(KERN_ALERT + "md: bug: couldn't allocate md_recovery_thread\n"); md_register_reboot_notifier(&md_notifier); raid_table_header = register_sysctl_table(raid_root_table, 1); @@ -4008,15 +4001,10 @@ #endif del_gendisk(&md_gendisk); - blk_dev[MAJOR_NR].queue = NULL; - blksize_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; - max_readahead[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - + blk_clear(MAJOR_NR); + free_device_names(); - } #endif diff -u --recursive --new-file v2.5.0/linux/drivers/message/i2o/i2o_block.c linux/drivers/message/i2o/i2o_block.c --- v2.5.0/linux/drivers/message/i2o/i2o_block.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/message/i2o/i2o_block.c Tue Nov 27 09:23:27 2001 @@ -114,15 +114,16 @@ #define I2O_BSA_DSC_VOLUME_CHANGED 0x000D #define I2O_BSA_DSC_TIMEOUT 0x000E +#define I2O_UNIT(dev) (i2ob_dev[MINOR((dev)) & 0xf0]) +#define I2O_LOCK(unit) (i2ob_dev[(unit)].req_queue->queue_lock) + /* * Some of these can be made smaller later */ static int i2ob_blksizes[MAX_I2OB<<4]; -static int i2ob_hardsizes[MAX_I2OB<<4]; static int i2ob_sizes[MAX_I2OB<<4]; static int i2ob_media_change_flag[MAX_I2OB]; -static u32 i2ob_max_sectors[MAX_I2OB<<4]; static int i2ob_context; @@ -252,9 +253,9 @@ unsigned long mptr; u64 offset; struct request *req = ireq->req; - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; int count = req->nr_sectors<<9; - char *last = NULL; + unsigned long last = ~0UL; unsigned short size = 0; // printk(KERN_INFO "i2ob_send called\n"); @@ -283,30 +284,30 @@ if(req->cmd == READ) { __raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4); - while(bh!=NULL) + while(bio) { - if(bh->b_data == last) { - size += bh->b_size; - last += bh->b_size; - if(bh->b_reqnext) + if (bio_to_phys(bio) == last) { + size += bio_size(bio); + last += bio_size(bio); + if(bio->bi_next) __raw_writel(0x14000000|(size), mptr-8); else __raw_writel(0xD4000000|(size), mptr-8); } else { - if(bh->b_reqnext) - __raw_writel(0x10000000|(bh->b_size), mptr); + if(bio->bi_next) + __raw_writel(0x10000000|bio_size(bio), mptr); else - __raw_writel(0xD0000000|(bh->b_size), mptr); - __raw_writel(virt_to_bus(bh->b_data), mptr+4); + __raw_writel(0xD0000000|bio_size(bio), mptr); + __raw_writel(bio_to_phys(bio), mptr+4); mptr += 8; - size = bh->b_size; - last = bh->b_data + size; + size = bio_size(bio); + last = bio_to_phys(bio) + bio_size(bio); } - count -= bh->b_size; - bh = bh->b_reqnext; + count -= bio_size(bio); + bio = bio->bi_next; } /* * Heuristic for now since the block layer doesnt give @@ -322,30 +323,30 @@ else if(req->cmd == WRITE) { __raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4); - while(bh!=NULL) + while(bio) { - if(bh->b_data == last) { - size += bh->b_size; - last += bh->b_size; - if(bh->b_reqnext) + if (bio_to_phys(bio) == last) { + size += bio_size(bio); + last += bio_size(bio); + if(bio->bi_next) __raw_writel(0x14000000|(size), mptr-8); else __raw_writel(0xD4000000|(size), mptr-8); } else { - if(bh->b_reqnext) - __raw_writel(0x14000000|(bh->b_size), mptr); + if(bio->bi_next) + __raw_writel(0x14000000|bio_size(bio), mptr); else - __raw_writel(0xD4000000|(bh->b_size), mptr); - __raw_writel(virt_to_bus(bh->b_data), mptr+4); + __raw_writel(0xD4000000|bio_size(bio), mptr); + __raw_writel(bio_to_phys(bio), mptr+4); mptr += 8; - size = bh->b_size; - last = bh->b_data + size; + size = bio_size(bio); + last = bio_to_phys(bio) + bio_size(bio); } - count -= bh->b_size; - bh = bh->b_reqnext; + count -= bio_size(bio); + bio = bio->bi_next; } if(c->battery) @@ -409,7 +410,8 @@ * unlocked. */ - while (end_that_request_first( req, !req->errors, "i2o block" )); + while (end_that_request_first(req, !req->errors)) + ; /* * It is now ok to complete the request. @@ -417,61 +419,6 @@ end_that_request_last( req ); } -/* - * Request merging functions - */ -static inline int i2ob_new_segment(request_queue_t *q, struct request *req, - int __max_segments) -{ - int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments; - - if (__max_segments < max_segments) - max_segments = __max_segments; - - if (req->nr_segments < max_segments) { - req->nr_segments++; - return 1; - } - return 0; -} - -static int i2ob_back_merge(request_queue_t *q, struct request *req, - struct buffer_head *bh, int __max_segments) -{ - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) - return 1; - return i2ob_new_segment(q, req, __max_segments); -} - -static int i2ob_front_merge(request_queue_t *q, struct request *req, - struct buffer_head *bh, int __max_segments) -{ - if (bh->b_data + bh->b_size == req->bh->b_data) - return 1; - return i2ob_new_segment(q, req, __max_segments); -} - -static int i2ob_merge_requests(request_queue_t *q, - struct request *req, - struct request *next, - int __max_segments) -{ - int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments; - int total_segments = req->nr_segments + next->nr_segments; - - if (__max_segments < max_segments) - max_segments = __max_segments; - - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) - total_segments--; - - if (total_segments > max_segments) - return 0; - - req->nr_segments = total_segments; - return 1; -} - static int i2ob_flush(struct i2o_controller *c, struct i2ob_device *d, int unit) { unsigned long msg; @@ -535,10 +482,10 @@ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; ireq->req->errors++; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* Now flush the message by making it a NOP */ m[0]&=0x00FFFFFF; @@ -559,12 +506,12 @@ if(msg->function == I2O_CMD_BLOCK_CFLUSH) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); dev->constipated=0; DEBUG(("unconstipated\n")); if(i2ob_backlog_request(c, dev)==0) i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -580,10 +527,10 @@ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; ireq->req->errors++; printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n"); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -629,7 +576,7 @@ */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); if(err==4) { /* @@ -674,7 +621,7 @@ */ i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* * and out @@ -682,7 +629,7 @@ return; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name, bsa_errors[m[4]&0XFFFF]); if(m[4]&0x00FF0000) @@ -697,8 +644,8 @@ * Dequeue the request. We use irqsave locks as one day we * may be running polled controllers from a BH... */ - - spin_lock_irqsave(&io_request_lock, flags); + + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); atomic_dec(&i2ob_queues[c->unit]->queue_depth); @@ -710,7 +657,7 @@ if(i2ob_backlog_request(c, dev)==0) i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); } /* @@ -789,8 +736,7 @@ for(i = unit; i <= unit+15; i++) { i2ob_sizes[i] = 0; - i2ob_hardsizes[i] = 0; - i2ob_max_sectors[i] = 0; + blk_queue_max_sectors(i2ob_dev[i].req_queue, 0); i2ob[i].nr_sects = 0; i2ob_gendisk.part[i].nr_sects = 0; } @@ -824,11 +770,11 @@ if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 ) i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(unit), flags); i2ob_sizes[unit] = (int)(size>>10); i2ob_gendisk.part[unit].nr_sects = size>>9; i2ob[unit].nr_sects = (int)(size>>9); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(unit), flags); break; } @@ -881,13 +827,14 @@ static void i2ob_timer_handler(unsigned long q) { + request_queue_t *req_queue = (request_queue_t *) q; unsigned long flags; /* * We cannot touch the request queue or the timer - * flag without holding the io_request_lock. + * flag without holding the queue_lock */ - spin_lock_irqsave(&io_request_lock,flags); + spin_lock_irqsave(&req_queue->queue_lock,flags); /* * Clear the timer started flag so that @@ -898,12 +845,12 @@ /* * Restart any requests. */ - i2ob_request((request_queue_t*)q); + i2ob_request(req_queue); /* * Free the lock. */ - spin_unlock_irqrestore(&io_request_lock,flags); + spin_unlock_irqrestore(&req_queue->queue_lock,flags); } static int i2ob_backlog_request(struct i2o_controller *c, struct i2ob_device *dev) @@ -1132,34 +1079,23 @@ static int i2ob_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - struct i2ob_device *dev; - int minor; - /* Anyone capable of this syscall can do *real bad* things */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!inode) + if (!inode || !inode->i_rdev) return -EINVAL; - minor = MINOR(inode->i_rdev); - if (minor >= (MAX_I2OB<<4)) - return -ENODEV; - dev = &i2ob_dev[minor]; switch (cmd) { - case BLKGETSIZE: - return put_user(i2ob[minor].nr_sects, (long *) arg); - case BLKGETSIZE64: - return put_user((u64)i2ob[minor].nr_sects << 9, (u64 *)arg); - case HDIO_GETGEO: { struct hd_geometry g; - int u=minor&0xF0; + int u = MINOR(inode->i_rdev) & 0xF0; i2o_block_biosparam(i2ob_sizes[u]<<1, &g.cylinders, &g.heads, &g.sectors); - g.start = i2ob[minor].start_sect; - return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0; + g.start = get_start_sect(inode->i_rdev); + return copy_to_user((void *)arg, &g, sizeof(g)) + ? -EFAULT : 0; } case BLKRRPART: @@ -1167,6 +1103,8 @@ return -EACCES; return do_i2ob_revalidate(inode->i_rdev,1); + case BLKGETSIZE: + case BLKGETSIZE64: case BLKFLSBUF: case BLKROSET: case BLKROGET: @@ -1354,8 +1292,6 @@ i2ob_query_device(dev, 0x0000, 5, &flags, 4); i2ob_query_device(dev, 0x0000, 6, &status, 4); i2ob_sizes[unit] = (int)(size>>10); - for(i=unit; i <= unit+15 ; i++) - i2ob_hardsizes[i] = blocksize; i2ob_gendisk.part[unit].nr_sects = size>>9; i2ob[unit].nr_sects = (int)(size>>9); @@ -1366,26 +1302,27 @@ /* * Max number of Scatter-Gather Elements */ - for(i=unit;i<=unit+15;i++) { - i2ob_max_sectors[i] = 256; - i2ob_dev[i].max_segments = (d->controller->status_block->inbound_frame_size - 8)/2; + request_queue_t *q = i2ob_dev[unit].req_queue; + + blk_queue_max_sectors(q, 256); + blk_queue_max_segments(q, (d->controller->status_block->inbound_frame_size - 8)/2); if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy == 2) i2ob_dev[i].depth = 32; if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy == 1) { - i2ob_max_sectors[i] = 32; - i2ob_dev[i].max_segments = 8; + blk_queue_max_sectors(q, 32); + blk_queue_max_segments(q, 8); i2ob_dev[i].depth = 4; } if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.short_req) { - i2ob_max_sectors[i] = 8; - i2ob_dev[i].max_segments = 8; + blk_queue_max_sectors(q, 8); + blk_queue_max_segments(q, 8); } } @@ -1430,7 +1367,7 @@ } printk(".\n"); printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n", - d->dev_name, i2ob_max_sectors[unit]); + d->dev_name, i2ob_dev[unit].req_queue->max_sectors); /* * If this is the first I2O block device found on this IOP, @@ -1450,7 +1387,7 @@ */ dev->req_queue = &i2ob_queues[c->unit]->req_queue; - grok_partitions(&i2ob_gendisk, unit>>4, 1<<4, (long)(size>>9)); + grok_partitions(MKDEV(MAJOR_NR, unit), (long)(size>>9)); /* * Register for the events we're interested in and that the @@ -1468,6 +1405,7 @@ */ static int i2ob_init_iop(unsigned int unit) { + char name[16]; int i; i2ob_queues[unit] = (struct i2ob_iop_queue*) @@ -1491,11 +1429,9 @@ i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0]; atomic_set(&i2ob_queues[unit]->queue_depth, 0); - blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request); + sprintf(name, "i2o%d", unit); + blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request, name); blk_queue_headactive(&i2ob_queues[unit]->req_queue, 0); - i2ob_queues[unit]->req_queue.back_merge_fn = i2ob_back_merge; - i2ob_queues[unit]->req_queue.front_merge_fn = i2ob_front_merge; - i2ob_queues[unit]->req_queue.merge_requests_fn = i2ob_merge_requests; i2ob_queues[unit]->req_queue.queuedata = &i2ob_queues[unit]; return 0; @@ -1506,11 +1442,11 @@ */ static request_queue_t* i2ob_get_queue(kdev_t dev) { - int unit = MINOR(dev)&0xF0; - - return i2ob_dev[unit].req_queue; + return I2O_UNIT(dev).req_queue; } + + /* * Probe the I2O subsytem for block class devices */ @@ -1708,7 +1644,7 @@ int i = 0; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); /* * Need to do this...we somtimes get two events from the IRTOS @@ -1730,7 +1666,7 @@ if(unit >= MAX_I2OB<<4) { printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n"); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -1743,12 +1679,11 @@ { i2ob_dev[i].i2odev = NULL; i2ob_sizes[i] = 0; - i2ob_hardsizes[i] = 0; - i2ob_max_sectors[i] = 0; + blk_queue_max_sectors(i2ob_dev[i].req_queue, 0); i2ob[i].nr_sects = 0; i2ob_gendisk.part[i].nr_sects = 0; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* * Decrease usage count for module @@ -1891,12 +1826,10 @@ */ blksize_size[MAJOR_NR] = i2ob_blksizes; - hardsect_size[MAJOR_NR] = i2ob_hardsizes; blk_size[MAJOR_NR] = i2ob_sizes; - max_sectors[MAJOR_NR] = i2ob_max_sectors; blk_dev[MAJOR_NR].queue = i2ob_get_queue; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request, "i2o"); blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); for (i = 0; i < MAX_I2OB << 4; i++) { @@ -1909,7 +1842,6 @@ i2ob_dev[i].tail = NULL; i2ob_dev[i].depth = MAX_I2OB_DEPTH; i2ob_blksizes[i] = 1024; - i2ob_max_sectors[i] = 2; } /* @@ -1977,7 +1909,6 @@ MODULE_AUTHOR("Red Hat Software"); MODULE_DESCRIPTION("I2O Block Device OSM"); MODULE_LICENSE("GPL"); - void cleanup_module(void) { diff -u --recursive --new-file v2.5.0/linux/drivers/message/i2o/i2o_core.c linux/drivers/message/i2o/i2o_core.c --- v2.5.0/linux/drivers/message/i2o/i2o_core.c Mon Oct 22 08:39:56 2001 +++ linux/drivers/message/i2o/i2o_core.c Tue Nov 27 09:23:27 2001 @@ -125,6 +125,7 @@ * Function table to send to bus specific layers * See for explanation of this */ +#ifdef CONFIG_I2O_PCI_MODULE static struct i2o_core_func_table i2o_core_functions = { i2o_install_controller, @@ -135,7 +136,6 @@ i2o_delete_controller }; -#ifdef CONFIG_I2O_PCI_MODULE extern int i2o_pci_core_attach(struct i2o_core_func_table *); extern void i2o_pci_core_detach(void); #endif /* CONFIG_I2O_PCI_MODULE */ diff -u --recursive --new-file v2.5.0/linux/drivers/mtd/ftl.c linux/drivers/mtd/ftl.c --- v2.5.0/linux/drivers/mtd/ftl.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/mtd/ftl.c Tue Nov 27 09:23:27 2001 @@ -1166,7 +1166,7 @@ put_user(1, (char *)&geo->heads); put_user(8, (char *)&geo->sectors); put_user((sect>>3), (short *)&geo->cylinders); - put_user(ftl_hd[minor].start_sect, (u_long *)&geo->start); + put_user(get_start_sect(inode->i_rdev), (u_long *)&geo->start); break; case BLKGETSIZE: ret = put_user(ftl_hd[minor].nr_sects, (unsigned long *)arg); @@ -1206,42 +1206,27 @@ ======================================================================*/ -static int ftl_reread_partitions(int minor) +static int ftl_reread_partitions(kdev_t dev) { + int minor = MINOR(dev); partition_t *part = myparts[minor >> 4]; - int i, whole; + int res; DEBUG(0, "ftl_cs: ftl_reread_partition(%d)\n", minor); if ((atomic_read(&part->open) > 1)) { return -EBUSY; } - whole = minor & ~(MAX_PART-1); - i = MAX_PART - 1; - while (i-- > 0) { - if (ftl_hd[whole+i].nr_sects > 0) { - kdev_t rdev = MKDEV(FTL_MAJOR, whole+i); - - invalidate_device(rdev, 1); - } - ftl_hd[whole+i].start_sect = 0; - ftl_hd[whole+i].nr_sects = 0; - } + res = wipe_partitions(dev); + if (res) + goto leave; scan_header(part); register_disk(&ftl_gendisk, whole >> PART_BITS, MAX_PART, &ftl_blk_fops, le32_to_cpu(part->header.FormattedSize)/SECTOR_SIZE); -#ifdef PCMCIA_DEBUG - for (i = 0; i < MAX_PART; i++) { - if (ftl_hd[whole+i].nr_sects > 0) - printk(KERN_INFO " %d: start %ld size %ld\n", i, - ftl_hd[whole+i].start_sect, - ftl_hd[whole+i].nr_sects); - } -#endif - return 0; + return res; } /*====================================================================== @@ -1431,7 +1416,7 @@ unregister_blkdev(FTL_MAJOR, "ftl"); blk_cleanup_queue(BLK_DEFAULT_QUEUE(FTL_MAJOR)); - blksize_size[FTL_MAJOR] = NULL; + bklk_clear(FTL_MAJOR); del_gendisk(&ftl_gendisk); } diff -u --recursive --new-file v2.5.0/linux/drivers/mtd/mtdblock.c linux/drivers/mtd/mtdblock.c --- v2.5.0/linux/drivers/mtd/mtdblock.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/mtd/mtdblock.c Tue Nov 27 09:23:27 2001 @@ -29,7 +29,7 @@ #if LINUX_VERSION_CODE < 0x20300 #define QUEUE_PLUGGED (blk_dev[MAJOR_NR].plug_tq.sync) #else -#define QUEUE_PLUGGED (blk_dev[MAJOR_NR].request_queue.plugged) +#define QUEUE_PLUGGED (blk_queue_plugged(QUEUE)) #endif #ifdef CONFIG_DEVFS_FS @@ -402,7 +402,7 @@ /* * This is a special request_fn because it is executed in a process context - * to be able to sleep independently of the caller. The io_request_lock + * to be able to sleep independently of the caller. The queue_lock * is held upon entry and exit. * The head of our request queue is considered active so there is no need * to dequeue requests before we are done. @@ -416,7 +416,7 @@ for (;;) { INIT_REQUEST; req = CURRENT; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); mtdblk = mtdblks[MINOR(req->rq_dev)]; res = 0; @@ -458,7 +458,7 @@ } end_req: - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); end_request(res); } } @@ -490,16 +490,16 @@ while (!leaving) { add_wait_queue(&thr_wq, &wait); set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); if (QUEUE_EMPTY || QUEUE_PLUGGED) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); schedule(); remove_wait_queue(&thr_wq, &wait); } else { remove_wait_queue(&thr_wq, &wait); set_current_state(TASK_RUNNING); handle_mtdblock_request(); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); } } diff -u --recursive --new-file v2.5.0/linux/drivers/mtd/nftlcore.c linux/drivers/mtd/nftlcore.c --- v2.5.0/linux/drivers/mtd/nftlcore.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/mtd/nftlcore.c Tue Nov 27 09:23:27 2001 @@ -59,11 +59,6 @@ /* .. for the Linux partition table handling. */ struct hd_struct part_table[256]; -#if LINUX_VERSION_CODE < 0x20328 -static void dummy_init (struct gendisk *crap) -{} -#endif - static struct gendisk nftl_gendisk = { major: MAJOR_NR, major_name: "nftl", @@ -166,7 +161,8 @@ #if LINUX_VERSION_CODE < 0x20328 resetup_one_dev(&nftl_gendisk, firstfree); #else - grok_partitions(&nftl_gendisk, firstfree, 1<nr_sects); + grok_partitions(MKDEV(MAJOR_NR,firstfree<nr_sects); #endif } @@ -786,7 +782,7 @@ static int nftl_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) { struct NFTLrecord *nftl; - int p; + int res; nftl = NFTLs[MINOR(inode->i_rdev) >> NFTL_PARTN_BITS]; @@ -799,16 +795,9 @@ g.heads = nftl->heads; g.sectors = nftl->sectors; g.cylinders = nftl->cylinders; - g.start = part_table[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user((void *)arg, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: /* Return device size */ - return put_user(part_table[MINOR(inode->i_rdev)].nr_sects, - (unsigned long *) arg); - case BLKGETSIZE64: - return put_user((u64)part_table[MINOR(inode->i_rdev)].nr_sects << 9, - (u64 *)arg); - case BLKFLSBUF: if (!capable(CAP_SYS_ADMIN)) return -EACCES; fsync_dev(inode->i_rdev); @@ -825,27 +814,17 @@ * or we won't be able to re-use the partitions, * if there was a change and we don't want to reboot */ - p = (1< 0) { - kdev_t devp = MKDEV(MAJOR(inode->i_dev), MINOR(inode->i_dev)+p); - if (part_table[p].nr_sects > 0) - invalidate_device (devp, 1); + res = wipe_partitions(inode->i_rdev); + if (!res) + grok_partitions(inode->i_rdev, nftl->nr_sects); - part_table[MINOR(inode->i_dev)+p].start_sect = 0; - part_table[MINOR(inode->i_dev)+p].nr_sects = 0; - } - -#if LINUX_VERSION_CODE < 0x20328 - resetup_one_dev(&nftl_gendisk, MINOR(inode->i_rdev) >> NFTL_PARTN_BITS); -#else - grok_partitions(&nftl_gendisk, MINOR(inode->i_rdev) >> NFTL_PARTN_BITS, - 1<nr_sects); -#endif - return 0; + return res; #if (LINUX_VERSION_CODE < 0x20303) RO_IOCTLS(inode->i_rdev, arg); /* ref. linux/blk.h */ #else + case BLKGETSIZE: + case BLKGETSIZE64: case BLKROSET: case BLKROGET: case BLKSSZGET: @@ -859,7 +838,7 @@ void nftl_request(RQFUNC_ARG) { - unsigned int dev, block, nsect; + unsigned int dev, unit, block, nsect; struct NFTLrecord *nftl; char *buffer; struct request *req; @@ -871,10 +850,11 @@ /* We can do this because the generic code knows not to touch the request at the head of the queue */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); DEBUG(MTD_DEBUG_LEVEL2, "NFTL_request\n"); - DEBUG(MTD_DEBUG_LEVEL3, "NFTL %s request, from sector 0x%04lx for 0x%04lx sectors\n", + DEBUG(MTD_DEBUG_LEVEL3, + "NFTL %s request, from sector 0x%04lx for 0x%04lx sectors\n", (req->cmd == READ) ? "Read " : "Write", req->sector, req->current_nr_sectors); @@ -884,8 +864,8 @@ buffer = req->buffer; res = 1; /* succeed */ - if (dev >= MAX_NFTLS * (1<> NFTL_PARTN_BITS; + if (unit >= MAX_NFTLS || dev != (unit << NFTL_PARTN_BITS)) { printk("nftl: bad minor number: device = %s\n", kdevname(req->rq_dev)); res = 0; /* fail */ @@ -906,8 +886,6 @@ goto repeat; } - block += part_table[dev].start_sect; - if (req->cmd == READ) { DEBUG(MTD_DEBUG_LEVEL2, "NFTL read request of 0x%x sectors @ %x " "(req->nr_sectors == %lx)\n", nsect, block, req->nr_sectors); @@ -953,7 +931,7 @@ } repeat: DEBUG(MTD_DEBUG_LEVEL3, "end_request(%d)\n", res); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); end_request(res); } } diff -u --recursive --new-file v2.5.0/linux/drivers/net/3c509.c linux/drivers/net/3c509.c --- v2.5.0/linux/drivers/net/3c509.c Sun Sep 30 12:26:06 2001 +++ linux/drivers/net/3c509.c Sun Nov 25 09:43:42 2001 @@ -175,7 +175,7 @@ }; #endif /* CONFIG_MCA */ -#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE) +#ifdef __ISAPNP__ static struct isapnp_device_id el3_isapnp_adapters[] __initdata = { { ISAPNP_ANY_ID, ISAPNP_ANY_ID, ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5090), @@ -206,7 +206,7 @@ static u16 el3_isapnp_phys_addr[8][3]; -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ +#endif /* __ISAPNP__ */ static int nopnp; int __init el3_probe(struct net_device *dev) @@ -217,9 +217,9 @@ u16 phys_addr[3]; static int current_tag; int mca_slot = -1; -#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE) +#ifdef __ISAPNP__ static int pnp_cards; -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ +#endif /* __ISAPNP__ */ if (dev) SET_MODULE_OWNER(dev); @@ -323,7 +323,7 @@ } #endif /* CONFIG_MCA */ -#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE) +#ifdef __ISAPNP__ if (nopnp == 1) goto no_pnp; @@ -359,7 +359,7 @@ } } no_pnp: -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ +#endif /* __ISAPNP__ */ /* Select an open I/O location at 0x1*0 to do contention select. */ for ( ; id_port < 0x200; id_port += 0x10) { @@ -405,7 +405,7 @@ phys_addr[i] = htons(id_read_eeprom(i)); } -#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE) +#ifdef __ISAPNP__ if (nopnp == 0) { /* The ISA PnP 3c509 cards respond to the ID sequence. This check is needed in order not to register them twice. */ @@ -425,7 +425,7 @@ } } } -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ +#endif /* __ISAPNP__ */ { unsigned int iobase = id_read_eeprom(8); @@ -1017,10 +1017,10 @@ MODULE_PARM_DESC(irq, "EtherLink III IRQ number(s) (assigned)"); MODULE_PARM_DESC(xcvr,"EtherLink III tranceiver(s) (0=internal, 1=external)"); MODULE_PARM_DESC(max_interrupt_work, "EtherLink III maximum events handled per interrupt"); -#ifdef CONFIG_ISAPNP +#ifdef __ISAPNP__ MODULE_PARM(nopnp, "i"); MODULE_PARM_DESC(nopnp, "EtherLink III disable ISA PnP support (0-1)"); -#endif /* CONFIG_ISAPNP */ +#endif /* __ISAPNP__ */ int init_module(void) diff -u --recursive --new-file v2.5.0/linux/drivers/net/3c515.c linux/drivers/net/3c515.c --- v2.5.0/linux/drivers/net/3c515.c Sun Sep 30 12:26:06 2001 +++ linux/drivers/net/3c515.c Sun Nov 25 09:43:42 2001 @@ -359,7 +359,7 @@ { "Default", 0, 0xFF, XCVR_10baseT, 10000}, }; -#ifdef CONFIG_ISAPNP +#ifdef __ISAPNP__ static struct isapnp_device_id corkscrew_isapnp_adapters[] = { { ISAPNP_ANY_ID, ISAPNP_ANY_ID, ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5051), @@ -372,7 +372,7 @@ static int corkscrew_isapnp_phys_addr[3]; static int nopnp; -#endif /* CONFIG_ISAPNP */ +#endif /* __ISAPNP__ */ static int corkscrew_scan(struct net_device *dev); static struct net_device *corkscrew_found_device(struct net_device *dev, @@ -450,12 +450,12 @@ { int cards_found = 0; static int ioaddr; -#ifdef CONFIG_ISAPNP +#ifdef __ISAPNP__ short i; static int pnp_cards; #endif -#ifdef CONFIG_ISAPNP +#ifdef __ISAPNP__ if(nopnp == 1) goto no_pnp; for(i=0; corkscrew_isapnp_adapters[i].vendor != 0; i++) { @@ -513,17 +513,17 @@ } } no_pnp: -#endif /* CONFIG_ISAPNP */ +#endif /* __ISAPNP__ */ /* Check all locations on the ISA bus -- evil! */ for (ioaddr = 0x100; ioaddr < 0x400; ioaddr += 0x20) { int irq; -#ifdef CONFIG_ISAPNP +#ifdef __ISAPNP__ /* Make sure this was not already picked up by isapnp */ if(ioaddr == corkscrew_isapnp_phys_addr[0]) continue; if(ioaddr == corkscrew_isapnp_phys_addr[1]) continue; if(ioaddr == corkscrew_isapnp_phys_addr[2]) continue; -#endif /* CONFIG_ISAPNP */ +#endif /* __ISAPNP__ */ if (check_region(ioaddr, CORKSCREW_TOTAL_SIZE)) continue; /* Check the resource configuration for a matching ioaddr. */ diff -u --recursive --new-file v2.5.0/linux/drivers/net/8139too.c linux/drivers/net/8139too.c --- v2.5.0/linux/drivers/net/8139too.c Fri Nov 9 13:45:35 2001 +++ linux/drivers/net/8139too.c Sat Nov 24 11:26:37 2001 @@ -1270,6 +1270,7 @@ tp->full_duplex = tp->duplex_lock; tp->tx_flag = (TX_FIFO_THRESH << 11) & 0x003f0000; tp->twistie = 1; + tp->time_to_die = 0; rtl8139_init_ring (dev); rtl8139_hw_start (dev); diff -u --recursive --new-file v2.5.0/linux/drivers/net/hamradio/baycom_epp.c linux/drivers/net/hamradio/baycom_epp.c --- v2.5.0/linux/drivers/net/hamradio/baycom_epp.c Mon Sep 10 09:04:53 2001 +++ linux/drivers/net/hamradio/baycom_epp.c Sun Nov 25 09:48:47 2001 @@ -807,10 +807,11 @@ /* --------------------------------------------------------------------- */ #ifdef __i386__ +#include #define GETTICK(x) \ ({ \ if (cpu_has_tsc) \ - __asm__ __volatile__("rdtsc" : "=a" (x) : : "dx");\ + rdtscl(x); \ }) #else /* __i386__ */ #define GETTICK(x) diff -u --recursive --new-file v2.5.0/linux/drivers/net/smc-ultra.c linux/drivers/net/smc-ultra.c --- v2.5.0/linux/drivers/net/smc-ultra.c Sun Sep 30 12:26:07 2001 +++ linux/drivers/net/smc-ultra.c Sun Nov 25 09:43:42 2001 @@ -80,7 +80,7 @@ int ultra_probe(struct net_device *dev); static int ultra_probe1(struct net_device *dev, int ioaddr); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static int ultra_probe_isapnp(struct net_device *dev); #endif @@ -100,7 +100,7 @@ const unsigned char *buf, const int start_page); static int ultra_close_card(struct net_device *dev); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static struct isapnp_device_id ultra_device_ids[] __initdata = { { ISAPNP_VENDOR('S','M','C'), ISAPNP_FUNCTION(0x8416), ISAPNP_VENDOR('S','M','C'), ISAPNP_FUNCTION(0x8416), @@ -140,7 +140,7 @@ else if (base_addr != 0) /* Don't probe at all. */ return -ENXIO; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ /* Look for any installed ISAPnP cards */ if (isapnp_present() && (ultra_probe_isapnp(dev) == 0)) return 0; @@ -279,7 +279,7 @@ return retval; } -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static int __init ultra_probe_isapnp(struct net_device *dev) { int i; @@ -544,7 +544,7 @@ /* NB: ultra_close_card() does free_irq */ int ioaddr = dev->base_addr - ULTRA_NIC_OFFSET; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ struct pci_dev *idev = (struct pci_dev *)ei_status.priv; if (idev) idev->deactivate(idev); diff -u --recursive --new-file v2.5.0/linux/drivers/net/tlan.c linux/drivers/net/tlan.c --- v2.5.0/linux/drivers/net/tlan.c Sun Sep 30 12:26:08 2001 +++ linux/drivers/net/tlan.c Tue Nov 27 09:23:27 2001 @@ -2265,8 +2265,8 @@ printk("TLAN: Partner capability: "); for (i = 5; i <= 10; i++) if (partner & (1<base_addr, TLAN_LED_REG, TLAN_LED_LINK ); diff -u --recursive --new-file v2.5.0/linux/drivers/pci/setup-res.c linux/drivers/pci/setup-res.c --- v2.5.0/linux/drivers/pci/setup-res.c Thu Oct 4 18:47:08 2001 +++ linux/drivers/pci/setup-res.c Mon Nov 26 14:23:58 2001 @@ -219,9 +219,8 @@ cmd |= PCI_COMMAND_IO; } - /* ??? Always turn on bus mastering. If the device doesn't support - it, the bit will go into the bucket. */ - cmd |= PCI_COMMAND_MASTER; + /* Do not enable bus mastering. A device could corrupt + * system memory by DMAing before a driver is ready for it. */ /* Set the cache line and default latency (32). */ pci_write_config_word(dev, PCI_CACHE_LINE_SIZE, diff -u --recursive --new-file v2.5.0/linux/drivers/pcmcia/i82365.c linux/drivers/pcmcia/i82365.c --- v2.5.0/linux/drivers/pcmcia/i82365.c Mon Nov 12 09:39:01 2001 +++ linux/drivers/pcmcia/i82365.c Sun Nov 25 09:43:42 2001 @@ -813,11 +813,7 @@ #ifdef CONFIG_ISA -#if defined(CONFIG_ISAPNP) || (defined(CONFIG_ISAPNP_MODULE) && defined(MODULE)) -#define I82365_ISAPNP -#endif - -#ifdef I82365_ISAPNP +#ifdef __ISAPNP__ static struct isapnp_device_id id_table[] __initdata = { { ISAPNP_ANY_ID, ISAPNP_ANY_ID, ISAPNP_VENDOR('P', 'N', 'P'), ISAPNP_FUNCTION(0x0e00), (unsigned long) "Intel 82365-Compatible" }, @@ -836,7 +832,7 @@ { int i, j, sock, k, ns, id; ioaddr_t port; -#ifdef I82365_ISAPNP +#ifdef __ISAPNP__ struct isapnp_device_id *devid; struct pci_dev *dev; @@ -1647,7 +1643,7 @@ i365_set(i, I365_CSCINT, 0); release_region(socket[i].ioaddr, 2); } -#if defined(CONFIG_ISA) && defined(I82365_ISAPNP) +#if defined(CONFIG_ISA) && defined(__ISAPNP__) if (i82365_pnpdev && i82365_pnpdev->deactivate) i82365_pnpdev->deactivate(i82365_pnpdev); #endif diff -u --recursive --new-file v2.5.0/linux/drivers/s390/block/dasd.c linux/drivers/s390/block/dasd.c --- v2.5.0/linux/drivers/s390/block/dasd.c Fri Nov 9 14:05:02 2001 +++ linux/drivers/s390/block/dasd.c Tue Nov 27 09:23:27 2001 @@ -730,13 +730,6 @@ goto out_hardsect_size; memset (hardsect_size[major], 0, (1 << MINORBITS) * sizeof (int)); - /* init max_sectors */ - max_sectors[major] = - (int *) kmalloc ((1 << MINORBITS) * sizeof (int), GFP_ATOMIC); - if (!max_sectors[major]) - goto out_max_sectors; - memset (max_sectors[major], 0, (1 << MINORBITS) * sizeof (int)); - /* finally do the gendisk stuff */ major_info->gendisk.part = kmalloc ((1 << MINORBITS) * sizeof (struct hd_struct), @@ -755,10 +748,6 @@ /* error handling - free the prior allocated memory */ out_gendisk: - kfree (max_sectors[major]); - max_sectors[major] = NULL; - - out_max_sectors: kfree (hardsect_size[major]); hardsect_size[major] = NULL; @@ -825,12 +814,8 @@ kfree (blk_size[major]); kfree (blksize_size[major]); kfree (hardsect_size[major]); - kfree (max_sectors[major]); - blk_size[major] = NULL; - blksize_size[major] = NULL; - hardsect_size[major] = NULL; - max_sectors[major] = NULL; + blk_clear(major); rc = devfs_unregister_blkdev (major, DASD_NAME); if (rc < 0) { @@ -1704,10 +1689,6 @@ dasd_end_request (req, 0); dasd_dequeue_request (queue,req); } else { - /* relocate request according to partition table */ - req->sector += - device->major_info->gendisk. - part[MINOR (req->rq_dev)].start_sect; cqr = device->discipline->build_cp_from_req (device, req); if (cqr == NULL) { @@ -1716,10 +1697,7 @@ "on request %p\n", device->devinfo.devno, req); - /* revert relocation of request */ - req->sector -= - device->major_info->gendisk. - part[MINOR (req->rq_dev)].start_sect; + break; /* terminate request queue loop */ } @@ -1769,10 +1747,10 @@ dasd_run_bh (dasd_device_t * device) { long flags; - spin_lock_irqsave (&io_request_lock, flags); + spin_lock_irqsave (&device->request_queue.queue_lock, flags); atomic_set (&device->bh_scheduled, 0); dasd_process_queues (device); - spin_unlock_irqrestore (&io_request_lock, flags); + spin_unlock_irqrestore (&device->request_queue.queue_lock, flags); } /* @@ -2468,14 +2446,12 @@ dasd_info.chanq_len = 0; if (device->request_queue->request_fn) { struct list_head *l; + request_queue_t *q = drive->request_queue; ccw_req_t *cqr = device->queue.head; - spin_lock_irqsave (&io_request_lock, flags); - list_for_each (l, - &device->request_queue-> - queue_head) { + spin_lock_irqsave (&q->queue_lock, flags); + list_for_each (l, q->queue_head, queue_head) dasd_info.req_queue_len++; - } - spin_unlock_irqrestore (&io_request_lock, + spin_unlock_irqrestore (&q->queue_lock, flags); s390irq_spin_lock_irqsave (device->devinfo.irq, flags); @@ -2668,7 +2644,7 @@ /* SECTION: Management of device list */ int -dasd_fillgeo(int kdev,struct hd_geometry *geo) +dasd_fillgeo(kdev_t kdev,struct hd_geometry *geo) { dasd_device_t *device = dasd_device_from_kdev (kdev); @@ -2679,8 +2655,7 @@ return -EINVAL; device->discipline->fill_geometry (device, geo); - geo->start = device->major_info->gendisk.part[MINOR(kdev)].start_sect - >> device->sizes.s2b_shift;; + geo->start = get_start_sect(kdev); return 0; } @@ -3365,6 +3340,12 @@ int major = MAJOR(device->kdev); int minor = MINOR(device->kdev); + device->request_queue = kmalloc(sizeof(request_queue_t),GFP_KERNEL); + device->request_queue->queuedata = device; + blk_init_queue (device->request_queue, do_dasd_request); + blk_queue_headactive (device->request_queue, 0); + elevator_init (&(device->request_queue->elevator),ELEVATOR_NOOP); + for (i = 0; i < (1 << DASD_PARTN_BITS); i++) { if (i == 0) device->major_info->gendisk.sizes[minor] = @@ -3374,17 +3355,11 @@ device->major_info->gendisk.sizes[minor + i] = 0; hardsect_size[major][minor + i] = device->sizes.bp_block; blksize_size[major][minor + i] = device->sizes.bp_block; - max_sectors[major][minor + i] = - device->discipline->max_blocks << - device->sizes.s2b_shift; + blk_queue_max_sectors(device->request_queue, + device->discipline->max_blocks << device->sizes.s2b_shift); device->major_info->gendisk.part[minor+i].start_sect = 0; device->major_info->gendisk.part[minor+i].nr_sects = 0; } - device->request_queue = kmalloc(sizeof(request_queue_t),GFP_KERNEL); - device->request_queue->queuedata = device; - blk_init_queue (device->request_queue, do_dasd_request); - blk_queue_headactive (device->request_queue, 0); - elevator_init (&(device->request_queue->elevator),ELEVATOR_NOOP); return rc; } @@ -3411,7 +3386,6 @@ device->major_info->gendisk.sizes[minor + i] = 0; hardsect_size[major][minor + i] = 0; blksize_size[major][minor + i] = 0; - max_sectors[major][minor + i] = 0; } if (device->request_queue) { blk_cleanup_queue (device->request_queue); diff -u --recursive --new-file v2.5.0/linux/drivers/s390/block/xpram.c linux/drivers/s390/block/xpram.c --- v2.5.0/linux/drivers/s390/block/xpram.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/s390/block/xpram.c Tue Nov 27 09:23:27 2001 @@ -1213,8 +1213,7 @@ { int i; - /* first of all, flush it all and reset all the data structures */ - + /* first of all, flush it all and reset all the data structures */ for (i=0; irequest_queue; + + spin_lock_irqsave (&q->queue_lock, flags_ior); s390irq_spin_lock_irqsave(ti->devinfo.irq,flags_390irq); atomic_set(&ti->bh_scheduled,0); tapeblock_exec_IO(ti); s390irq_spin_unlock_irqrestore(ti->devinfo.irq,flags_390irq); - spin_unlock_irqrestore (&io_request_lock, flags_ior); + spin_unlock_irqrestore (&q->queue_lock, flags_ior); } void diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/53c7,8xx.c linux/drivers/scsi/53c7,8xx.c --- v2.5.0/linux/drivers/scsi/53c7,8xx.c Thu Oct 25 13:53:48 2001 +++ linux/drivers/scsi/53c7,8xx.c Mon Nov 26 14:23:58 2001 @@ -1427,13 +1427,14 @@ return -1; } -#ifdef __powerpc__ if ( ! (command & PCI_COMMAND_MASTER)) { - printk("SCSI: PCI Master Bit has not been set. Setting...\n"); + printk(KERN_INFO "SCSI: PCI Master Bit has not been set. Setting...\n"); command |= PCI_COMMAND_MASTER|PCI_COMMAND_IO; pci_write_config_word(pdev, PCI_COMMAND, command); + } - if (io_port >= 0x10000000 && is_prep ) { +#ifdef __powerpc__ + if (io_port >= 0x10000000 && is_prep ) { /* Mapping on PowerPC can't handle this! */ unsigned long new_io_port; new_io_port = (io_port & 0x00FFFFFF) | 0x01000000; @@ -1441,7 +1442,6 @@ io_port = new_io_port; pci_write_config_dword(pdev, PCI_BASE_ADDRESS_0, io_port); pdev->base_address[0] = io_port; - } } #endif diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/Config.in linux/drivers/scsi/Config.in --- v2.5.0/linux/drivers/scsi/Config.in Fri Nov 9 15:22:54 2001 +++ linux/drivers/scsi/Config.in Tue Nov 27 09:23:27 2001 @@ -20,10 +20,6 @@ comment 'Some SCSI devices (e.g. CD jukebox) support multiple LUNs' -#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' Enable extra checks in new queueing code' CONFIG_SCSI_DEBUG_QUEUES -#fi - bool ' Probe all LUNs on each SCSI device' CONFIG_SCSI_MULTI_LUN bool ' Verbose SCSI error reporting (kernel size +=12K)' CONFIG_SCSI_CONSTANTS diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c linux/drivers/scsi/aic7xxx/aic7xxx_linux.c --- v2.5.0/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c Wed Nov 21 14:05:29 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_linux.c Tue Nov 27 09:23:27 2001 @@ -1123,9 +1123,9 @@ if (host == NULL) return (ENOMEM); - ahc_lock(ahc, &s); *((struct ahc_softc **)host->hostdata) = ahc; ahc->platform_data->host = host; + ahc_lock(ahc, &s); host->can_queue = AHC_MAX_QUEUE; host->cmd_per_lun = 2; host->sg_tablesize = AHC_NSEG; @@ -1272,7 +1272,9 @@ TAILQ_INIT(&ahc->platform_data->completeq); TAILQ_INIT(&ahc->platform_data->device_runq); ahc->platform_data->hw_dma_mask = 0xFFFFFFFF; - ahc_lockinit(ahc); + /* + * ahc_lockinit done by scsi_register, as we don't own that lock + */ ahc_done_lockinit(ahc); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) init_MUTEX_LOCKED(&ahc->platform_data->eh_sem); @@ -1530,22 +1532,17 @@ int ahc_linux_queue(Scsi_Cmnd * cmd, void (*scsi_done) (Scsi_Cmnd *)) { - struct ahc_softc *ahc; + struct ahc_softc *ahc = *(struct ahc_softc **)cmd->host->hostdata; struct ahc_linux_device *dev; - u_long flags; - - ahc = *(struct ahc_softc **)cmd->host->hostdata; /* * Save the callback on completion function. */ cmd->scsi_done = scsi_done; - ahc_lock(ahc, &flags); dev = ahc_linux_get_device(ahc, cmd->channel, cmd->target, cmd->lun, /*alloc*/TRUE); if (dev == NULL) { - ahc_unlock(ahc, &flags); printf("aic7xxx_linux_queue: Unable to allocate device!\n"); return (-ENOMEM); } @@ -1556,7 +1553,6 @@ dev->flags |= AHC_DEV_ON_RUN_LIST; ahc_linux_run_device_queues(ahc); } - ahc_unlock(ahc, &flags); return (0); } @@ -2408,12 +2404,10 @@ flag == SCB_ABORT ? "n ABORT" : " TARGET RESET"); /* - * It is a bug that the upper layer takes - * this lock just prior to calling us. + * we used to drop io_request_lock and lock ahc from here, but + * now that the global lock is gone the upper layer have already + * done what ahc_lock would do /jens */ - spin_unlock_irq(&io_request_lock); - - ahc_lock(ahc, &s); /* * First determine if we currently own this command. @@ -2661,7 +2655,7 @@ ahc_unlock(ahc, &s); if (acmd != NULL) ahc_linux_run_complete_queue(ahc, acmd); - spin_lock_irq(&io_request_lock); + ahc_lock(ahc, &s); return (retval); } @@ -2704,14 +2698,7 @@ u_long s; int found; - /* - * It is a bug that the upper layer takes - * this lock just prior to calling us. - */ - spin_unlock_irq(&io_request_lock); - ahc = *(struct ahc_softc **)cmd->host->hostdata; - ahc_lock(ahc, &s); found = ahc_reset_channel(ahc, cmd->channel + 'A', /*initiate reset*/TRUE); acmd = TAILQ_FIRST(&ahc->platform_data->completeq); @@ -2724,7 +2711,7 @@ if (acmd != NULL) ahc_linux_run_complete_queue(ahc, acmd); - spin_lock_irq(&io_request_lock); + ahc_lock(ahc, &s); return SUCCESS; } diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h --- v2.5.0/linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Thu Oct 25 13:53:49 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Tue Nov 27 09:23:27 2001 @@ -89,7 +89,8 @@ present: 0, /* number of 7xxx's present */\ unchecked_isa_dma: 0, /* no memory DMA restrictions */\ use_clustering: ENABLE_CLUSTERING, \ - use_new_eh_code: 1 \ + use_new_eh_code: 1, \ + highmem_io: 1 \ } #endif /* _AIC7XXX_LINUX_HOST_H_ */ diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/aic7xxx/aic7xxx_osm.h linux/drivers/scsi/aic7xxx/aic7xxx_osm.h --- v2.5.0/linux/drivers/scsi/aic7xxx/aic7xxx_osm.h Thu Oct 25 13:53:49 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_osm.h Tue Nov 27 09:23:27 2001 @@ -575,9 +575,6 @@ TAILQ_HEAD(, ahc_linux_device) device_runq; struct ahc_completeq completeq; -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,1,0) - spinlock_t spin_lock; -#endif u_int qfrozen; struct timer_list reset_timer; struct semaphore eh_sem; @@ -716,20 +713,20 @@ static __inline void ahc_lockinit(struct ahc_softc *ahc) { - spin_lock_init(&ahc->platform_data->spin_lock); + spin_lock_init(&ahc->platform_data->host->host_lock); } static __inline void ahc_lock(struct ahc_softc *ahc, unsigned long *flags) { *flags = 0; - spin_lock_irqsave(&ahc->platform_data->spin_lock, *flags); + spin_lock_irqsave(&ahc->platform_data->host->host_lock, *flags); } static __inline void ahc_unlock(struct ahc_softc *ahc, unsigned long *flags) { - spin_unlock_irqrestore(&ahc->platform_data->spin_lock, *flags); + spin_unlock_irqrestore(&ahc->platform_data->host->host_lock, *flags); } static __inline void @@ -741,14 +738,18 @@ static __inline void ahc_done_lock(struct ahc_softc *ahc, unsigned long *flags) { + struct Scsi_Host *host = ahc->platform_data->host; + *flags = 0; - spin_lock_irqsave(&io_request_lock, *flags); + spin_lock_irqsave(&host->host_lock, *flags); } static __inline void ahc_done_unlock(struct ahc_softc *ahc, unsigned long *flags) { - spin_unlock_irqrestore(&io_request_lock, *flags); + struct Scsi_Host *host = ahc->platform_data->host; + + spin_unlock_irqrestore(&host->host_lock, *flags); } #else /* LINUX_VERSION_CODE < KERNEL_VERSION(2,1,0) */ diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/aic7xxx_old.c linux/drivers/scsi/aic7xxx_old.c --- v2.5.0/linux/drivers/scsi/aic7xxx_old.c Wed Nov 21 14:05:29 2001 +++ linux/drivers/scsi/aic7xxx_old.c Tue Nov 27 09:23:27 2001 @@ -4127,7 +4127,7 @@ unsigned long cpu_flags = 0; struct aic7xxx_scb *scb; - spin_lock_irqsave(&io_request_lock, cpu_flags); + spin_lock_irqsave(&p->host->host_lock, cpu_flags); p->dev_timer_active &= ~(0x01 << MAX_TARGETS); if ( (p->dev_timer_active & (0x01 << p->scsi_id)) && time_after_eq(jiffies, p->dev_expires[p->scsi_id]) ) @@ -4184,7 +4184,7 @@ } aic7xxx_run_waiting_queues(p); - spin_unlock_irqrestore(&io_request_lock, cpu_flags); + spin_unlock_irqrestore(&p->host->host_lock, cpu_flags); } /*+F************************************************************************* @@ -7011,7 +7011,7 @@ p = (struct aic7xxx_host *)dev_id; if(!p) return; - spin_lock_irqsave(&io_request_lock, cpu_flags); + spin_lock_irqsave(&p->host->host_lock, cpu_flags); p->flags |= AHC_IN_ISR; do { @@ -7020,7 +7020,7 @@ aic7xxx_done_cmds_complete(p); aic7xxx_run_waiting_queues(p); p->flags &= ~AHC_IN_ISR; - spin_unlock_irqrestore(&io_request_lock, cpu_flags); + spin_unlock_irqrestore(&p->host->host_lock, cpu_flags); } /*+F************************************************************************* @@ -11148,7 +11148,7 @@ disable_irq(p->irq); aic7xxx_print_card(p); aic7xxx_print_scratch_ram(p); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&p->host->host_lock); for(;;) barrier(); } diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/hosts.c linux/drivers/scsi/hosts.c --- v2.5.0/linux/drivers/scsi/hosts.c Thu Jul 5 11:28:17 2001 +++ linux/drivers/scsi/hosts.c Tue Nov 27 09:23:27 2001 @@ -129,7 +129,7 @@ * once we are 100% sure that we want to use this host adapter - it is a * pain to reverse this, so we try to avoid it */ - +extern int blk_nohighio; struct Scsi_Host * scsi_register(Scsi_Host_Template * tpnt, int j){ struct Scsi_Host * retval, *shpnt, *o_shp; Scsi_Host_Name *shn, *shn2; @@ -160,6 +160,7 @@ break; } } + spin_lock_init(&retval->host_lock); atomic_set(&retval->host_active,0); retval->host_busy = 0; retval->host_failed = 0; @@ -235,6 +236,8 @@ retval->cmd_per_lun = tpnt->cmd_per_lun; retval->unchecked_isa_dma = tpnt->unchecked_isa_dma; retval->use_clustering = tpnt->use_clustering; + if (!blk_nohighio) + retval->highmem_io = tpnt->highmem_io; retval->select_queue_depths = tpnt->select_queue_depths; retval->max_sectors = tpnt->max_sectors; diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/hosts.h linux/drivers/scsi/hosts.h --- v2.5.0/linux/drivers/scsi/hosts.h Thu Nov 22 11:49:15 2001 +++ linux/drivers/scsi/hosts.h Tue Nov 27 09:43:46 2001 @@ -291,6 +291,8 @@ */ unsigned emulated:1; + unsigned highmem_io:1; + /* * Name of proc directory */ @@ -317,6 +319,7 @@ struct Scsi_Host * next; Scsi_Device * host_queue; + spinlock_t host_lock; struct task_struct * ehandler; /* Error recovery thread. */ struct semaphore * eh_wait; /* The error recovery thread waits on @@ -390,6 +393,7 @@ unsigned in_recovery:1; unsigned unchecked_isa_dma:1; unsigned use_clustering:1; + unsigned highmem_io:1; /* * True if this host was loaded as a loadable module */ diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/ide-scsi.c linux/drivers/scsi/ide-scsi.c --- v2.5.0/linux/drivers/scsi/ide-scsi.c Sun Sep 30 12:26:07 2001 +++ linux/drivers/scsi/ide-scsi.c Tue Nov 27 09:23:27 2001 @@ -235,13 +235,13 @@ kfree(atapi_buf); } -static inline void idescsi_free_bh (struct buffer_head *bh) +static inline void idescsi_free_bio (struct bio *bio) { - struct buffer_head *bhp; + struct bio *bhp; - while (bh) { - bhp = bh; - bh = bh->b_reqnext; + while (bio) { + bhp = bio; + bio = bio->bi_next; kfree (bhp); } } @@ -263,6 +263,7 @@ struct request *rq = hwgroup->rq; idescsi_pc_t *pc = (idescsi_pc_t *) rq->buffer; int log = test_bit(IDESCSI_LOG_CMD, &scsi->log); + struct Scsi_Host *host; u8 *scsi_buf; unsigned long flags; @@ -291,10 +292,11 @@ } else printk("\n"); } } - spin_lock_irqsave(&io_request_lock,flags); + host = pc->scsi_cmd->host; + spin_lock_irqsave(&host->host_lock, flags); pc->done(pc->scsi_cmd); - spin_unlock_irqrestore(&io_request_lock,flags); - idescsi_free_bh (rq->bh); + spin_unlock_irqrestore(&host->host_lock, flags); + idescsi_free_bio (rq->bio); kfree(pc); kfree(rq); scsi->pc = NULL; } @@ -427,7 +429,7 @@ pc->current_position=pc->buffer; bcount = IDE_MIN (pc->request_transfer, 63 * 1024); /* Request to transfer the entire buffer at once */ - if (drive->using_dma && rq->bh) + if (drive->using_dma && rq->special) dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive); SELECT_DRIVE(HWIF(drive), drive); @@ -653,25 +655,24 @@ return -EINVAL; } -static inline struct buffer_head *idescsi_kmalloc_bh (int count) +static inline struct bio *idescsi_kmalloc_bio (int count) { - struct buffer_head *bh, *bhp, *first_bh; + struct bio *bh, *bhp, *first_bh; - if ((first_bh = bhp = bh = kmalloc (sizeof(struct buffer_head), GFP_ATOMIC)) == NULL) + if ((first_bh = bhp = bh = bio_alloc(GFP_ATOMIC, 1)) == NULL) goto abort; - memset (bh, 0, sizeof (struct buffer_head)); - bh->b_reqnext = NULL; + memset (bh, 0, sizeof (struct bio)); while (--count) { - if ((bh = kmalloc (sizeof(struct buffer_head), GFP_ATOMIC)) == NULL) + if ((bh = bio_alloc(GFP_ATOMIC, 1)) == NULL) goto abort; - memset (bh, 0, sizeof (struct buffer_head)); - bhp->b_reqnext = bh; + memset (bh, 0, sizeof (struct bio)); + bhp->bi_next = bh; bhp = bh; - bh->b_reqnext = NULL; + bh->bi_next = NULL; } return first_bh; abort: - idescsi_free_bh (first_bh); + idescsi_free_bio (first_bh); return NULL; } @@ -689,9 +690,9 @@ } } -static inline struct buffer_head *idescsi_dma_bh (ide_drive_t *drive, idescsi_pc_t *pc) +static inline struct bio *idescsi_dma_bio(ide_drive_t *drive, idescsi_pc_t *pc) { - struct buffer_head *bh = NULL, *first_bh = NULL; + struct bio *bh = NULL, *first_bh = NULL; int segments = pc->scsi_cmd->use_sg; struct scatterlist *sg = pc->scsi_cmd->request_buffer; @@ -700,25 +701,27 @@ if (idescsi_set_direction(pc)) return NULL; if (segments) { - if ((first_bh = bh = idescsi_kmalloc_bh (segments)) == NULL) + if ((first_bh = bh = idescsi_kmalloc_bio (segments)) == NULL) return NULL; #if IDESCSI_DEBUG_LOG printk ("ide-scsi: %s: building DMA table, %d segments, %dkB total\n", drive->name, segments, pc->request_transfer >> 10); #endif /* IDESCSI_DEBUG_LOG */ while (segments--) { - bh->b_data = sg->address; - bh->b_size = sg->length; - bh = bh->b_reqnext; + bh->bi_io_vec.bv_page = sg->page; + bh->bi_io_vec.bv_len = sg->length; + bh->bi_io_vec.bv_offset = sg->offset; + bh = bh->bi_next; sg++; } } else { - if ((first_bh = bh = idescsi_kmalloc_bh (1)) == NULL) + if ((first_bh = bh = idescsi_kmalloc_bio (1)) == NULL) return NULL; #if IDESCSI_DEBUG_LOG printk ("ide-scsi: %s: building DMA table for a single buffer (%dkB)\n", drive->name, pc->request_transfer >> 10); #endif /* IDESCSI_DEBUG_LOG */ - bh->b_data = pc->scsi_cmd->request_buffer; - bh->b_size = pc->request_transfer; + bh->bi_io_vec.bv_page = virt_to_page(pc->scsi_cmd->request_buffer); + bh->bi_io_vec.bv_len = pc->request_transfer; + bh->bi_io_vec.bv_offset = (unsigned long) pc->scsi_cmd->request_buffer & ~PAGE_MASK; } return first_bh; } @@ -783,11 +786,11 @@ ide_init_drive_cmd (rq); rq->buffer = (char *) pc; - rq->bh = idescsi_dma_bh (drive, pc); + rq->bio = idescsi_dma_bio (drive, pc); rq->cmd = IDESCSI_PC_RQ; - spin_unlock(&io_request_lock); + spin_unlock(&cmd->host->host_lock); (void) ide_do_drive_cmd (drive, rq, ide_end); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&cmd->host->host_lock); return 0; abort: if (pc) kfree (pc); diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/megaraid.c linux/drivers/scsi/megaraid.c --- v2.5.0/linux/drivers/scsi/megaraid.c Thu Oct 25 13:53:51 2001 +++ linux/drivers/scsi/megaraid.c Tue Nov 27 09:23:27 2001 @@ -586,8 +586,10 @@ #define DRIVER_LOCK(p) #define DRIVER_UNLOCK(p) #define IO_LOCK_T unsigned long io_flags = 0 -#define IO_LOCK spin_lock_irqsave(&io_request_lock,io_flags); -#define IO_UNLOCK spin_unlock_irqrestore(&io_request_lock,io_flags); +#define IO_LOCK(host) spin_lock_irqsave(&(host)->host_lock,io_flags) +#define IO_UNLOCK(host) spin_unlock_irqrestore(&(host)->host_lock,io_flags) +#define IO_LOCK_IRQ(host) spin_lock_irq(&(host)->host_lock) +#define IO_UNLOCK_IRQ(host) spin_unlock_irq(&(host)->host_lock) #define queue_task_irq(a,b) queue_task(a,b) #define queue_task_irq_off(a,b) queue_task(a,b) @@ -612,8 +614,8 @@ #define DRIVER_LOCK(p) #define DRIVER_UNLOCK(p) #define IO_LOCK_T unsigned long io_flags = 0 -#define IO_LOCK spin_lock_irqsave(&io_request_lock,io_flags); -#define IO_UNLOCK spin_unlock_irqrestore(&io_request_lock,io_flags); +#define IO_LOCK(host) spin_lock_irqsave(&io_request_lock,io_flags); +#define IO_UNLOCK(host) spin_unlock_irqrestore(&io_request_lock,io_flags); #define pci_free_consistent(a,b,c,d) #define pci_unmap_single(a,b,c,d) @@ -2101,7 +2103,7 @@ for (idx = 0; idx < MAX_FIRMWARE_STATUS; idx++) completed[idx] = 0; - IO_LOCK; + IO_LOCK(megaCfg->host); megaCfg->nInterrupts++; qCnt = 0xff; @@ -2220,7 +2222,7 @@ megaCfg->flag &= ~IN_ISR; /* Loop through any pending requests */ mega_runpendq (megaCfg); - IO_UNLOCK; + IO_UNLOCK(megaCfg->host); } @@ -3032,9 +3034,7 @@ sizeof (mega_mailbox64), &(megaCfg->dma_handle64)); - mega_register_mailbox (megaCfg, - virt_to_bus ((void *) megaCfg-> - mailbox64ptr)); + mega_register_mailbox (megaCfg, megaCfg->dma_handle64); #else mega_register_mailbox (megaCfg, virt_to_bus ((void *) &megaCfg-> @@ -3800,7 +3800,7 @@ if (pScb->SCpnt->cmnd[0] == M_RD_IOCTL_CMD_NEW) { init_MUTEX_LOCKED (&pScb->ioctl_sem); - spin_unlock_irq (&io_request_lock); + IO_UNLOCK_IRQ(megaCfg->host); down (&pScb->ioctl_sem); user_area = (char *)*((u32*)&pScb->SCpnt->cmnd[4]); if (copy_to_user @@ -3809,7 +3809,7 @@ ("megaraid: Error copying ioctl return value to user buffer.\n"); pScb->SCpnt->result = (DID_ERROR << 16); } - spin_lock_irq (&io_request_lock); + IO_LOCK_IRQ(megaCfg->host); DRIVER_LOCK (megaCfg); kfree (pScb->buff_ptr); pScb->buff_ptr = NULL; @@ -4744,10 +4744,10 @@ init_MUTEX_LOCKED(&mimd_ioctl_sem); - IO_LOCK; + IO_LOCK(shpnt); megaraid_queue(scsicmd, megadev_ioctl_done); - IO_UNLOCK; + IO_UNLOCK(shpnt); down(&mimd_ioctl_sem); @@ -4893,10 +4893,10 @@ init_MUTEX_LOCKED (&mimd_ioctl_sem); - IO_LOCK; + IO_LOCK(shpnt); megaraid_queue (scsicmd, megadev_ioctl_done); - IO_UNLOCK; + IO_UNLOCK(shpnt); down (&mimd_ioctl_sem); if (!scsicmd->result && outlen) { diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/megaraid.h linux/drivers/scsi/megaraid.h --- v2.5.0/linux/drivers/scsi/megaraid.h Thu Oct 25 13:53:51 2001 +++ linux/drivers/scsi/megaraid.h Tue Nov 27 09:23:27 2001 @@ -223,7 +223,8 @@ cmd_per_lun: MAX_CMD_PER_LUN, /* SCSI Commands per LUN */\ present: 0, /* Present */\ unchecked_isa_dma: 0, /* Default Unchecked ISA DMA */\ - use_clustering: ENABLE_CLUSTERING /* Enable Clustering */\ + use_clustering: ENABLE_CLUSTERING, /* Enable Clustering */\ + highmem_io: 1, \ } #endif diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/qlogicfc.c linux/drivers/scsi/qlogicfc.c --- v2.5.0/linux/drivers/scsi/qlogicfc.c Thu Oct 25 13:53:51 2001 +++ linux/drivers/scsi/qlogicfc.c Mon Nov 26 14:23:58 2001 @@ -2042,6 +2042,7 @@ return 1; } + pci_set_master(pdev); if (!(command & PCI_COMMAND_MASTER)) { printk("qlogicfc%d : bus mastering is disabled\n", hostdata->host_id); return 1; diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/qlogicfc.h linux/drivers/scsi/qlogicfc.h --- v2.5.0/linux/drivers/scsi/qlogicfc.h Sun Oct 21 10:36:54 2001 +++ linux/drivers/scsi/qlogicfc.h Tue Nov 27 09:23:27 2001 @@ -95,7 +95,8 @@ cmd_per_lun: QLOGICFC_CMD_PER_LUN, \ present: 0, \ unchecked_isa_dma: 0, \ - use_clustering: ENABLE_CLUSTERING \ + use_clustering: ENABLE_CLUSTERING, \ + highmem_io: 1 \ } #endif /* _QLOGICFC_H */ diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/qlogicisp.c linux/drivers/scsi/qlogicisp.c --- v2.5.0/linux/drivers/scsi/qlogicisp.c Thu Oct 25 13:53:51 2001 +++ linux/drivers/scsi/qlogicisp.c Mon Nov 26 14:23:58 2001 @@ -1403,11 +1403,6 @@ command &= ~PCI_COMMAND_MEMORY; #endif - if (!(command & PCI_COMMAND_MASTER)) { - printk("qlogicisp : bus mastering is disabled\n"); - return 1; - } - sh->io_port = io_base; if (!request_region(sh->io_port, 0xff, "qlogicisp")) { @@ -1471,6 +1466,8 @@ printk("qlogicisp : can't allocate request queue\n"); goto out_unmap; } + + pci_set_master(pdev); LEAVE("isp1020_init"); diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/scsi.c linux/drivers/scsi/scsi.c --- v2.5.0/linux/drivers/scsi/scsi.c Fri Nov 9 14:05:06 2001 +++ linux/drivers/scsi/scsi.c Tue Nov 27 09:23:27 2001 @@ -186,10 +186,22 @@ * handler in the list - ultimately they call scsi_request_fn * to do the dirty deed. */ -void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) { - blk_init_queue(&SDpnt->request_queue, scsi_request_fn); - blk_queue_headactive(&SDpnt->request_queue, 0); - SDpnt->request_queue.queuedata = (void *) SDpnt; +void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) +{ + char name[16]; + + request_queue_t *q = &SDpnt->request_queue; + + sprintf(name, "scsi%d%d%d", SDpnt->id, SDpnt->lun, SDpnt->channel); + blk_init_queue(q, scsi_request_fn, name); + blk_queue_headactive(q, 0); + q->queuedata = (void *) SDpnt; +#ifdef DMA_CHUNK_SIZE + blk_queue_max_segments(q, 64); +#else + blk_queue_max_segments(q, SHpnt->sg_tablesize); +#endif + blk_queue_max_sectors(q, SHpnt->max_sectors); } #ifdef MODULE @@ -227,9 +239,7 @@ req = &SCpnt->request; req->rq_status = RQ_SCSI_DONE; /* Busy, but indicate request done */ - if (req->waiting != NULL) { - complete(req->waiting); - } + complete(req->waiting); } /* @@ -620,8 +630,6 @@ unsigned long flags = 0; unsigned long timeout; - ASSERT_LOCK(&io_request_lock, 0); - #if DEBUG unsigned long *ret = 0; #ifdef __mips__ @@ -633,6 +641,8 @@ host = SCpnt->host; + ASSERT_LOCK(&host->host_lock, 0); + /* Assign a unique nonzero serial_number. */ if (++serial_number == 0) serial_number = 1; @@ -692,9 +702,9 @@ * length exceeds what the host adapter can handle. */ if (CDB_SIZE(SCpnt) <= SCpnt->host->max_cmd_len) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); rtn = host->hostt->queuecommand(SCpnt, scsi_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); if (rtn != 0) { scsi_delete_timer(SCpnt); scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_HOST_BUSY); @@ -703,10 +713,11 @@ } else { SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n")); SCpnt->result = (DID_ABORT << 16); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); scsi_done(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); rtn = 1; + } } else { /* @@ -714,15 +725,15 @@ * length exceeds what the host adapter can handle. */ if (CDB_SIZE(SCpnt) <= SCpnt->host->max_cmd_len) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); host->hostt->queuecommand(SCpnt, scsi_old_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } else { SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n")); SCpnt->result = (DID_ABORT << 16); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); scsi_old_done(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); rtn = 1; } } @@ -730,11 +741,11 @@ int temp; SCSI_LOG_MLQUEUE(3, printk("command() : routine at %p\n", host->hostt->command)); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); temp = host->hostt->command(SCpnt); SCpnt->result = temp; #ifdef DEBUG_DELAY - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); clock = jiffies + 4 * HZ; while (time_before(jiffies, clock)) { barrier(); @@ -742,14 +753,14 @@ } printk("done(host = %d, result = %04x) : routine at %p\n", host->host_no, temp, host->hostt->command); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); #endif if (host->hostt->use_new_eh_code) { scsi_done(SCpnt); } else { scsi_old_done(SCpnt); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()\n")); return rtn; @@ -817,7 +828,7 @@ Scsi_Device * SDpnt = SRpnt->sr_device; struct Scsi_Host *host = SDpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCSI_LOG_MLQUEUE(4, { @@ -914,7 +925,7 @@ { struct Scsi_Host *host = SCpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCpnt->owner = SCSI_OWNER_MIDLEVEL; SRpnt->sr_command = SCpnt; @@ -1004,7 +1015,7 @@ { struct Scsi_Host *host = SCpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCpnt->pid = scsi_pid++; SCpnt->owner = SCSI_OWNER_MIDLEVEL; @@ -1355,11 +1366,11 @@ Scsi_Request * SRpnt; unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); - host = SCpnt->host; device = SCpnt->device; + ASSERT_LOCK(&host->host_lock, 0); + /* * We need to protect the decrement, as otherwise a race condition * would exist. Fiddling with SCpnt isn't a problem as the @@ -1367,10 +1378,10 @@ * one execution context, but the device and host structures are * shared. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); host->host_busy--; /* Indicate that we are free */ device->device_busy--; /* Decrement device usage counter. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); /* * Clear the flags which say that the device/host is no longer @@ -1858,7 +1869,6 @@ Scsi_Device *SDpnt; struct Scsi_Device_Template *sdtpnt; const char *name; - unsigned long flags; int out_of_space = 0; if (tpnt->next || !tpnt->detect) @@ -1868,7 +1878,7 @@ /* If max_sectors isn't set, default to max */ if (!tpnt->max_sectors) - tpnt->max_sectors = MAX_SECTORS; + tpnt->max_sectors = 1024; pcount = next_scsi_host; @@ -1882,10 +1892,11 @@ using the new scsi code. NOTE: the detect routine could redefine the value tpnt->use_new_eh_code. (DB, 13 May 1998) */ + /* + * detect should do its own locking + */ if (tpnt->use_new_eh_code) { - spin_lock_irqsave(&io_request_lock, flags); tpnt->present = tpnt->detect(tpnt); - spin_unlock_irqrestore(&io_request_lock, flags); } else tpnt->present = tpnt->detect(tpnt); diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/scsi.h linux/drivers/scsi/scsi.h --- v2.5.0/linux/drivers/scsi/scsi.h Thu Nov 22 11:49:15 2001 +++ linux/drivers/scsi/scsi.h Tue Nov 27 09:43:46 2001 @@ -386,15 +386,6 @@ #define ASKED_FOR_SENSE 0x20 #define SYNC_RESET 0x40 -#if defined(__mc68000__) || defined(CONFIG_APUS) -#include -#define CONTIGUOUS_BUFFERS(X,Y) \ - (virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data)) -#else -#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data) -#endif - - /* * This is the crap from the old error handling code. We have it in a special * place so that we can more easily delete it later on. diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/scsi_error.c linux/drivers/scsi/scsi_error.c --- v2.5.0/linux/drivers/scsi/scsi_error.c Sun Sep 9 10:52:35 2001 +++ linux/drivers/scsi/scsi_error.c Tue Nov 27 09:23:27 2001 @@ -423,8 +423,6 @@ unsigned char scsi_result0[256], *scsi_result = NULL; int saved_result; - ASSERT_LOCK(&io_request_lock, 0); - memcpy((void *) SCpnt->cmnd, (void *) generic_sense, sizeof(generic_sense)); @@ -583,16 +581,14 @@ STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout) { unsigned long flags; - struct Scsi_Host *host; - - ASSERT_LOCK(&io_request_lock, 0); + struct Scsi_Host *host = SCpnt->host; - host = SCpnt->host; + ASSERT_LOCK(&host->host_lock, 0); - retry: +retry: /* - * We will use a queued command if possible, otherwise we will emulate the - * queuing and calling of completion function ourselves. + * We will use a queued command if possible, otherwise we will + * emulate the queuing and calling of completion function ourselves. */ SCpnt->owner = SCSI_OWNER_LOWLEVEL; @@ -609,9 +605,9 @@ SCpnt->host->eh_action = &sem; SCpnt->request.rq_status = RQ_SCSI_BUSY; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); host->hostt->queuecommand(SCpnt, scsi_eh_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); down(&sem); @@ -634,10 +630,10 @@ * abort a timed out command or not. Not sure how * we should treat them differently anyways. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); if (SCpnt->host->hostt->eh_abort_handler) SCpnt->host->hostt->eh_abort_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); SCpnt->request.rq_status = RQ_SCSI_DONE; SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; @@ -650,13 +646,13 @@ int temp; /* - * We damn well had better never use this code. There is no timeout - * protection here, since we would end up waiting in the actual low - * level driver, we don't know how to wake it up. + * We damn well had better never use this code. There is no + * timeout protection here, since we would end up waiting in + * the actual low level driver, we don't know how to wake it up. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); temp = host->hostt->command(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); SCpnt->result = temp; /* Fall through to code below to examine status. */ @@ -664,8 +660,8 @@ } /* - * Now examine the actual status codes to see whether the command actually - * did complete normally. + * Now examine the actual status codes to see whether the command + * actually did complete normally. */ if (SCpnt->eh_state == SUCCESS) { int ret = scsi_eh_completed_normally(SCpnt); @@ -776,9 +772,9 @@ SCpnt->owner = SCSI_OWNER_LOWLEVEL; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); return rtn; } @@ -808,9 +804,9 @@ } SCpnt->owner = SCSI_OWNER_LOWLEVEL; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -841,9 +837,9 @@ return FAILED; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -887,9 +883,9 @@ if (SCpnt->host->hostt->eh_host_reset_handler == NULL) { return FAILED; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -1230,7 +1226,7 @@ Scsi_Device *SDpnt; unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); /* * Next free up anything directly waiting upon the host. This will be @@ -1247,19 +1243,22 @@ * now that error recovery is done, we will need to ensure that these * requests are started. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { - request_queue_t *q; + request_queue_t *q = &SDpnt->request_queue; + if ((host->can_queue > 0 && (host->host_busy >= host->can_queue)) || (host->host_blocked) || (host->host_self_blocked) || (SDpnt->device_blocked)) { break; } - q = &SDpnt->request_queue; + + spin_lock(&q->queue_lock); q->request_fn(q); + spin_unlock(&q->queue_lock); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } /* @@ -1306,7 +1305,7 @@ Scsi_Cmnd *SCdone; int timed_out; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCdone = NULL; diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c --- v2.5.0/linux/drivers/scsi/scsi_lib.c Fri Oct 12 15:35:54 2001 +++ linux/drivers/scsi/scsi_lib.c Tue Nov 27 09:23:27 2001 @@ -61,7 +61,7 @@ * data - private data * at_head - insert request at head or tail of queue * - * Lock status: Assumed that io_request_lock is not held upon entry. + * Lock status: Assumed that queue lock is not held upon entry. * * Returns: Nothing */ @@ -70,13 +70,15 @@ { unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); rq->cmd = SPECIAL; rq->special = data; rq->q = NULL; + rq->bio = rq->biotail = NULL; rq->nr_segments = 0; rq->elevator_sequence = 0; + rq->inactive = 0; /* * We have the option of inserting the head or the tail of the queue. @@ -84,15 +86,15 @@ * head of the queue for things like a QUEUE_FULL message from a * device, or a host that is unable to accept a particular command. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); if (at_head) - list_add(&rq->queue, &q->queue_head); + list_add(&rq->queuelist, &q->queue_head); else - list_add_tail(&rq->queue, &q->queue_head); + list_add_tail(&rq->queuelist, &q->queue_head); q->request_fn(q); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } @@ -167,8 +169,6 @@ */ int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt) { - ASSERT_LOCK(&io_request_lock, 0); - SCpnt->owner = SCSI_OWNER_MIDLEVEL; SCpnt->reset_chain = NULL; SCpnt->serial_number = 0; @@ -250,9 +250,9 @@ Scsi_Device *SDpnt; struct Scsi_Host *SHpnt; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); if (SCpnt != NULL) { /* @@ -262,7 +262,7 @@ * the bad sector. */ SCpnt->request.special = (void *) SCpnt; - list_add(&SCpnt->request.queue, &q->queue_head); + list_add(&SCpnt->request.queuelist, &q->queue_head); } /* @@ -280,14 +280,10 @@ * with special case code, then spin off separate versions and * use function pointers to pick the right one. */ - if (SDpnt->single_lun - && list_empty(&q->queue_head) - && SDpnt->device_busy == 0) { + if (SDpnt->single_lun && blk_queue_empty(q) && SDpnt->device_busy ==0) { request_queue_t *q; - for (SDpnt = SHpnt->host_queue; - SDpnt; - SDpnt = SDpnt->next) { + for (SDpnt = SHpnt->host_queue; SDpnt; SDpnt = SDpnt->next) { if (((SHpnt->can_queue > 0) && (SHpnt->host_busy >= SHpnt->can_queue)) || (SHpnt->host_blocked) @@ -295,6 +291,7 @@ || (SDpnt->device_blocked)) { break; } + q = &SDpnt->request_queue; q->request_fn(q); } @@ -328,7 +325,7 @@ SHpnt->some_device_starved = 0; } } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } /* @@ -360,57 +357,27 @@ int requeue, int frequeue) { + request_queue_t *q = &SCpnt->device->request_queue; struct request *req; - struct buffer_head *bh; - Scsi_Device * SDpnt; - int nsect; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); req = &SCpnt->request; - req->errors = 0; - if (!uptodate) { - printk(" I/O error: dev %s, sector %lu\n", - kdevname(req->rq_dev), req->sector); - } - do { - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; - blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - sectors -= nsect; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector += nsect; - req->nr_sectors -= nsect; - - req->current_nr_sectors = bh->b_size >> 9; - if (req->nr_sectors < req->current_nr_sectors) { - req->nr_sectors = req->current_nr_sectors; - printk("scsi_end_request: buffer-list destroyed\n"); - } - } + while (end_that_request_first(req, 1, sectors)) { + if (!req->bio) { + printk("scsi_end_request: missing bio\n"); + break; } - } while (sectors && bh); + } /* * If there are blocks left over at the end, set up the command * to queue the remainder of them. */ - if (req->bh) { - request_queue_t *q; - - if( !requeue ) - { + if (req->bio) { + if (!requeue) return SCpnt; - } - - q = &SCpnt->device->request_queue; - req->buffer = bh->b_data; /* * Bleah. Leftovers again. Stick the leftovers in * the front of the queue, and goose the queue again. @@ -418,17 +385,15 @@ scsi_queue_next_request(q, SCpnt); return SCpnt; } + /* * This request is done. If there is someone blocked waiting for this - * request, wake them up. Typically used to wake up processes trying - * to swap a page into memory. + * request, wake them up. */ - if (req->waiting != NULL) { + if (req->waiting) complete(req->waiting); - } - add_blkdev_randomness(MAJOR(req->rq_dev)); - SDpnt = SCpnt->device; + add_blkdev_randomness(MAJOR(req->rq_dev)); /* * This will goose the queue request function at the end, so we don't @@ -436,12 +401,9 @@ */ __scsi_release_command(SCpnt); - if( frequeue ) { - request_queue_t *q; + if (frequeue) + scsi_queue_next_request(q, NULL); - q = &SDpnt->request_queue; - scsi_queue_next_request(q, NULL); - } return NULL; } @@ -489,7 +451,9 @@ */ static void scsi_release_buffers(Scsi_Cmnd * SCpnt) { - ASSERT_LOCK(&io_request_lock, 0); + struct request *req = &SCpnt->request; + + ASSERT_LOCK(&SCpnt->device->request_queue.queue_lock, 0); /* * Free up any indirection buffers we allocated for DMA purposes. @@ -510,9 +474,8 @@ } scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); } else { - if (SCpnt->request_buffer != SCpnt->request.buffer) { - scsi_free(SCpnt->request_buffer, SCpnt->request_bufflen); - } + if (SCpnt->request_buffer != req->buffer) + scsi_free(SCpnt->request_buffer,SCpnt->request_bufflen); } /* @@ -548,6 +511,7 @@ int result = SCpnt->result; int this_count = SCpnt->bufflen >> 9; request_queue_t *q = &SCpnt->device->request_queue; + struct request *req = &SCpnt->request; /* * We must do one of several things here: @@ -562,7 +526,7 @@ * would be used if we just wanted to retry, for example. * */ - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); /* * Free up any indirection buffers we allocated for DMA purposes. @@ -591,10 +555,13 @@ } scsi_free(SCpnt->buffer, SCpnt->sglist_len); } else { - if (SCpnt->buffer != SCpnt->request.buffer) { - if (SCpnt->request.cmd == READ) { - memcpy(SCpnt->request.buffer, SCpnt->buffer, - SCpnt->bufflen); + if (SCpnt->buffer != req->buffer) { + if (req->cmd == READ) { + unsigned long flags; + char *to = bio_kmap_irq(req->bio, &flags); + + memcpy(to, SCpnt->buffer, SCpnt->bufflen); + bio_kunmap_irq(to, &flags); } scsi_free(SCpnt->buffer, SCpnt->bufflen); } @@ -615,11 +582,10 @@ */ if (good_sectors > 0) { SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d sectors done.\n", - SCpnt->request.nr_sectors, - good_sectors)); + req->nr_sectors good_sectors)); SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg)); - SCpnt->request.errors = 0; + req->errors = 0; /* * If multiple sectors are requested in one buffer, then * they will have been finished off by the first command. @@ -716,7 +682,7 @@ break; case NOT_READY: printk(KERN_INFO "Device %s not ready.\n", - kdevname(SCpnt->request.rq_dev)); + kdevname(req->rq_dev)); SCpnt = scsi_end_request(SCpnt, 0, this_count); return; break; @@ -760,7 +726,7 @@ * We sometimes get this cruft in the event that a medium error * isn't properly reported. */ - SCpnt = scsi_end_request(SCpnt, 0, SCpnt->request.current_nr_sectors); + SCpnt = scsi_end_request(SCpnt, 0, req->current_nr_sectors); return; } } @@ -774,7 +740,7 @@ * Arguments: request - I/O request we are preparing to queue. * * Lock status: No locks assumed to be held, but as it happens the - * io_request_lock is held when this is called. + * q->queue_lock is held when this is called. * * Returns: Nothing * @@ -788,7 +754,7 @@ kdev_t dev = req->rq_dev; int major = MAJOR(dev); - ASSERT_LOCK(&io_request_lock, 1); + ASSERT_LOCK(&req->q->queue_lock, 1); for (spnt = scsi_devicelist; spnt; spnt = spnt->next) { /* @@ -846,7 +812,7 @@ struct Scsi_Host *SHpnt; struct Scsi_Device_Template *STpnt; - ASSERT_LOCK(&io_request_lock, 1); + ASSERT_LOCK(&q->queue_lock, 1); SDpnt = (Scsi_Device *) q->queuedata; if (!SDpnt) { @@ -864,10 +830,17 @@ * released the lock and grabbed it again, so each time * we need to check to see if the queue is plugged or not. */ - if (SHpnt->in_recovery || q->plugged) + if (SHpnt->in_recovery || blk_queue_plugged(q)) return; /* + * if we are at the max queue depth, don't attempt to queue + * more + */ + if (SHpnt->host_busy == SDpnt->queue_depth) + break; + + /* * If the device cannot accept another request, then quit. */ if (SDpnt->device_blocked) { @@ -913,9 +886,9 @@ */ SDpnt->was_reset = 0; if (SDpnt->removable && !in_interrupt()) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); continue; } } @@ -924,14 +897,14 @@ * If we couldn't find a request that could be queued, then we * can also quit. */ - if (list_empty(&q->queue_head)) + if (blk_queue_empty(q)) break; /* - * Loop through all of the requests in this queue, and find - * one that is queueable. + * get next queueable request. cur_rq would be set if we + * previously had to abort for some reason */ - req = blkdev_entry_next_request(&q->queue_head); + req = elv_next_request(q); /* * Find the actual device driver associated with this command. @@ -951,9 +924,8 @@ if( SRpnt->sr_magic == SCSI_REQ_MAGIC ) { SCpnt = scsi_allocate_device(SRpnt->sr_device, FALSE, FALSE); - if( !SCpnt ) { + if (!SCpnt) break; - } scsi_init_cmd_from_req(SCpnt, SRpnt); } @@ -973,7 +945,7 @@ * scatter-gather segments here - the * normal case code assumes this to be * correct, as it would be a performance - * lose to always recount. Handling + * loss to always recount. Handling * errors is always unusual, of course. */ recount_segments(SCpnt); @@ -985,9 +957,8 @@ * while the queue is locked and then break out of the * loop. Otherwise loop around and try another request. */ - if (!SCpnt) { + if (!SCpnt) break; - } } /* @@ -1024,7 +995,7 @@ * another. */ req = NULL; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); if (SCpnt->request.cmd != SPECIAL) { /* @@ -1054,7 +1025,7 @@ { panic("Should not have leftover blocks\n"); } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); SHpnt->host_busy--; SDpnt->device_busy--; continue; @@ -1070,7 +1041,7 @@ { panic("Should not have leftover blocks\n"); } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); SHpnt->host_busy--; SDpnt->device_busy--; continue; @@ -1091,7 +1062,7 @@ * Now we need to grab the lock again. We are about to mess * with the request queue and try to find another command. */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); } } diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c --- v2.5.0/linux/drivers/scsi/scsi_merge.c Thu Oct 25 14:05:31 2001 +++ linux/drivers/scsi/scsi_merge.c Tue Nov 27 09:23:27 2001 @@ -6,6 +6,7 @@ * Based upon conversations with large numbers * of people at Linux Expo. * Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com). + * Support for highmem I/O: Jens Axboe */ /* @@ -71,51 +72,6 @@ */ #define DMA_SEGMENT_SIZE_LIMITED -#ifdef CONFIG_SCSI_DEBUG_QUEUES -/* - * Enable a bunch of additional consistency checking. Turn this off - * if you are benchmarking. - */ -static int dump_stats(struct request *req, - int use_clustering, - int dma_host, - int segments) -{ - struct buffer_head *bh; - - /* - * Dump the information that we have. We know we have an - * inconsistency. - */ - printk("nr_segments is %x\n", req->nr_segments); - printk("counted segments is %x\n", segments); - printk("Flags %d %d\n", use_clustering, dma_host); - for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) - { - printk("Segment 0x%p, blocks %d, addr 0x%lx\n", - bh, - bh->b_size >> 9, - virt_to_phys(bh->b_data - 1)); - } - panic("Ththththaats all folks. Too dangerous to continue.\n"); -} - - -/* - * Simple sanity check that we will use for the first go around - * in order to ensure that we are doing the counting correctly. - * This can be removed for optimization. - */ -#define SANITY_CHECK(req, _CLUSTER, _DMA) \ - if( req->nr_segments != __count_segments(req, _CLUSTER, _DMA, NULL) ) \ - { \ - printk("Incorrect segment count at 0x%p", current_text_addr()); \ - dump_stats(req, _CLUSTER, _DMA, __count_segments(req, _CLUSTER, _DMA, NULL)); \ - } -#else -#define SANITY_CHECK(req, _CLUSTER, _DMA) -#endif - static void dma_exhausted(Scsi_Cmnd * SCpnt, int i) { int jj; @@ -191,31 +147,23 @@ { int ret = 1; int reqsize = 0; - struct buffer_head *bh; - struct buffer_head *bhnext; + struct bio *bio, *bionext; - if( remainder != NULL ) { + if (remainder) reqsize = *remainder; - } /* * Add in the size increment for the first buffer. */ - bh = req->bh; + bio = req->bio; #ifdef DMA_SEGMENT_SIZE_LIMITED - if( reqsize + bh->b_size > PAGE_SIZE ) { + if (reqsize + bio_size(bio) > PAGE_SIZE) ret++; - reqsize = bh->b_size; - } else { - reqsize += bh->b_size; - } -#else - reqsize += bh->b_size; #endif - for (bh = req->bh, bhnext = bh->b_reqnext; - bhnext != NULL; - bh = bhnext, bhnext = bh->b_reqnext) { + for (bio = req->bio, bionext = bio->bi_next; + bionext != NULL; + bio = bionext, bionext = bio->bi_next) { if (use_clustering) { /* * See if we can do this without creating another @@ -223,11 +171,10 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(bionext) - 1 == ISA_DMA_THRESHOLD) { ret++; - reqsize = bhnext->b_size; - } else if (CONTIGUOUS_BUFFERS(bh, bhnext)) { + reqsize = bio_size(bionext); + } else if (BIO_CONTIG(bio, bionext)) { /* * This one is OK. Let it go. */ @@ -241,23 +188,22 @@ * kind of screwed and we need to start * another segment. */ - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD - && reqsize + bhnext->b_size > PAGE_SIZE ) + if(dma_host && bio_to_phys(bionext) - 1 >= ISA_DMA_THRESHOLD + && reqsize + bio_size(bionext) > PAGE_SIZE ) { ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); continue; } #endif - reqsize += bhnext->b_size; + reqsize += bio_size(bionext); continue; } ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); } else { ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); } } if( remainder != NULL ) { @@ -304,14 +250,13 @@ } #define MERGEABLE_BUFFERS(X,Y) \ -(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \ +(((((long)bio_to_phys((X))+bio_size((X)))|((long)bio_to_phys((Y)))) & \ (DMA_CHUNK_SIZE - 1)) == 0) #ifdef DMA_CHUNK_SIZE static inline int scsi_new_mergeable(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { /* * pci_map_sg will be able to merge these two @@ -320,47 +265,47 @@ * scsi.c allocates for this purpose * min(64,sg_tablesize) entries. */ - if (req->nr_segments >= max_segments || - req->nr_segments >= SHpnt->sg_tablesize) + if (req->nr_segments >= q->max_segments) return 0; + req->nr_segments++; return 1; } static inline int scsi_new_segment(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { /* * pci_map_sg won't be able to map these two * into a single hardware sg entry, so we have to * check if things fit into sg_tablesize. */ - if (req->nr_hw_segments >= SHpnt->sg_tablesize || - req->nr_segments >= SHpnt->sg_tablesize) + if (req->nr_hw_segments >= q->max_segments) return 0; + else if (req->nr_segments >= q->max_segments) + return 0; + req->nr_hw_segments++; req->nr_segments++; return 1; } + #else + static inline int scsi_new_segment(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { - if (req->nr_segments < SHpnt->sg_tablesize && - req->nr_segments < max_segments) { - /* - * This will form the start of a new segment. Bump the - * counter. - */ - req->nr_segments++; - return 1; - } else { + if (req->nr_segments >= q->max_segments) return 0; - } + + /* + * This will form the start of a new segment. Bump the + * counter. + */ + req->nr_segments++; + return 1; } #endif @@ -371,7 +316,7 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * use_clustering - 1 if this host wishes to use clustering * dma_host - 1 if this host has ISA DMA issues (bus doesn't * expose all of the address lines, so that DMA cannot @@ -380,7 +325,7 @@ * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Some drivers have limited scatter-gather table sizes, and * thus they cannot queue an infinitely large command. This @@ -399,25 +344,17 @@ */ __inline static int __scsi_back_merge_fn(request_queue_t * q, struct request *req, - struct buffer_head *bh, - int max_segments, + struct bio *bio, int use_clustering, int dma_host) { unsigned int count; unsigned int segment_size = 0; - Scsi_Device *SDpnt; - struct Scsi_Host *SHpnt; - - SDpnt = (Scsi_Device *) q->queuedata; - SHpnt = SDpnt->host; + Scsi_Device *SDpnt = q->queuedata; -#ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; -#endif - - if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) + if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) + return 0; + else if (!BIO_PHYS_4G(req->biotail, bio)) return 0; if (use_clustering) { @@ -427,17 +364,15 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(req->biotail) - 1 == ISA_DMA_THRESHOLD) { goto new_end_segment; } - if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) { + if (BIO_CONTIG(req->biotail, bio)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { + if( dma_host && bio_to_phys(bio) - 1 >= ISA_DMA_THRESHOLD ) { segment_size = 0; count = __count_segments(req, use_clustering, dma_host, &segment_size); - if( segment_size + bh->b_size > PAGE_SIZE ) { + if( segment_size + bio_size(bio) > PAGE_SIZE ) { goto new_end_segment; } } @@ -450,33 +385,25 @@ } new_end_segment: #ifdef DMA_CHUNK_SIZE - if (MERGEABLE_BUFFERS(req->bhtail, bh)) - return scsi_new_mergeable(q, req, SHpnt, max_segments); + if (MERGEABLE_BUFFERS(req->biotail, bio)) + return scsi_new_mergeable(q, req, SDpnt->host); #endif - return scsi_new_segment(q, req, SHpnt, max_segments); + return scsi_new_segment(q, req, SDpnt->host); } __inline static int __scsi_front_merge_fn(request_queue_t * q, struct request *req, - struct buffer_head *bh, - int max_segments, + struct bio *bio, int use_clustering, int dma_host) { unsigned int count; unsigned int segment_size = 0; - Scsi_Device *SDpnt; - struct Scsi_Host *SHpnt; - - SDpnt = (Scsi_Device *) q->queuedata; - SHpnt = SDpnt->host; - -#ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; -#endif + Scsi_Device *SDpnt = q->queuedata; - if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) + if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) + return 0; + else if (!BIO_PHYS_4G(bio, req->bio)) return 0; if (use_clustering) { @@ -486,15 +413,13 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(bio) - 1 == ISA_DMA_THRESHOLD) { goto new_start_segment; } - if (CONTIGUOUS_BUFFERS(bh, req->bh)) { + if (BIO_CONTIG(bio, req->bio)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { - segment_size = bh->b_size; + if( dma_host && bio_to_phys(bio) - 1 >= ISA_DMA_THRESHOLD ) { + segment_size = bio_size(bio); count = __count_segments(req, use_clustering, dma_host, &segment_size); if( count != req->nr_segments ) { goto new_start_segment; @@ -509,10 +434,10 @@ } new_start_segment: #ifdef DMA_CHUNK_SIZE - if (MERGEABLE_BUFFERS(bh, req->bh)) - return scsi_new_mergeable(q, req, SHpnt, max_segments); + if (MERGEABLE_BUFFERS(bio, req->bio)) + return scsi_new_mergeable(q, req, SDpnt->host); #endif - return scsi_new_segment(q, req, SHpnt, max_segments); + return scsi_new_segment(q, req, SDpnt->host); } /* @@ -522,12 +447,12 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. @@ -535,15 +460,12 @@ #define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ - struct buffer_head * bh, \ - int max_segments) \ + struct bio *bio) \ { \ int ret; \ - SANITY_CHECK(req, _CLUSTER, _DMA); \ ret = __scsi_ ## _BACK_FRONT ## _merge_fn(q, \ req, \ - bh, \ - max_segments, \ + bio, \ _CLUSTER, \ _DMA); \ return ret; \ @@ -576,7 +498,7 @@ * Returns: 1 if it is OK to merge the two requests. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Some drivers have limited scatter-gather table sizes, and * thus they cannot queue an infinitely large command. This @@ -596,7 +518,6 @@ __inline static int __scsi_merge_requests_fn(request_queue_t * q, struct request *req, struct request *next, - int max_segments, int use_clustering, int dma_host) { @@ -609,31 +530,28 @@ */ if (req->special || next->special) return 0; + else if (!BIO_PHYS_4G(req->biotail, next->bio)) + return 0; SDpnt = (Scsi_Device *) q->queuedata; SHpnt = SDpnt->host; #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; - /* If it would not fit into prepared memory space for sg chain, * then don't allow the merge. */ - if (req->nr_segments + next->nr_segments - 1 > max_segments || - req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments - 1 > q->max_segments) return 0; - } - if (req->nr_hw_segments + next->nr_hw_segments - 1 > SHpnt->sg_tablesize) { + + if (req->nr_hw_segments + next->nr_hw_segments - 1 > q->max_segments) return 0; - } #else /* * If the two requests together are too large (even assuming that we * can merge the boundary requests into one segment, then don't * allow the merge. */ - if (req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments - 1 > q->max_segments) { return 0; } #endif @@ -652,8 +570,7 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(req->biotail) - 1 == ISA_DMA_THRESHOLD) { goto dont_combine; } #ifdef DMA_SEGMENT_SIZE_LIMITED @@ -662,8 +579,8 @@ * buffers in chunks of PAGE_SIZE or less. */ if (dma_host - && CONTIGUOUS_BUFFERS(req->bhtail, next->bh) - && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD ) + && BIO_CONTIG(req->biotail, next->bio) + && bio_to_phys(req->biotail) - 1 >= ISA_DMA_THRESHOLD ) { int segment_size = 0; int count = 0; @@ -675,7 +592,7 @@ } } #endif - if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) { + if (BIO_CONTIG(req->biotail, next->bio)) { /* * This one is OK. Let it go. */ @@ -688,17 +605,16 @@ } dont_combine: #ifdef DMA_CHUNK_SIZE - if (req->nr_segments + next->nr_segments > max_segments || - req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments > q->max_segments) return 0; - } + /* If dynamic DMA mapping can merge last segment in req with * first segment in next, then the check for hw segments was * done above already, so we can always merge. */ - if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) { + if (MERGEABLE_BUFFERS(req->biotail, next->bio)) { req->nr_hw_segments += next->nr_hw_segments - 1; - } else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) { + } else if (req->nr_hw_segments + next->nr_hw_segments > q->max_segments) return 0; } else { req->nr_hw_segments += next->nr_hw_segments; @@ -711,8 +627,7 @@ * Make sure we can fix something that is the sum of the two. * A slightly stricter test than we had above. */ - if (req->nr_segments + next->nr_segments > max_segments || - req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments > q->max_segments) { return 0; } else { /* @@ -732,12 +647,12 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. @@ -745,12 +660,10 @@ #define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ - struct request * next, \ - int max_segments) \ + struct request * next) \ { \ int ret; \ - SANITY_CHECK(req, _CLUSTER, _DMA); \ - ret = __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \ + ret = __scsi_merge_requests_fn(q, req, next, _CLUSTER, _DMA); \ return ret; \ } @@ -798,8 +711,8 @@ int use_clustering, int dma_host) { - struct buffer_head * bh; - struct buffer_head * bhprev; + struct bio * bio; + struct bio * bioprev; char * buff; int count; int i; @@ -810,29 +723,12 @@ void ** bbpnt; /* - * FIXME(eric) - don't inline this - it doesn't depend on the - * integer flags. Come to think of it, I don't think this is even - * needed any more. Need to play with it and see if we hit the - * panic. If not, then don't bother. + * now working right now */ - if (!SCpnt->request.bh) { - /* - * Case of page request (i.e. raw device), or unlinked buffer - * Typically used for swapping, but this isn't how we do - * swapping any more. - */ - panic("I believe this is dead code. If we hit this, I was wrong"); -#if 0 - SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9; - SCpnt->request_buffer = SCpnt->request.buffer; - SCpnt->use_sg = 0; - /* - * FIXME(eric) - need to handle DMA here. - */ -#endif - return 1; - } + BUG_ON(dma_host); + req = &SCpnt->request; + /* * First we need to know how many scatter gather segments are needed. */ @@ -848,16 +744,15 @@ * buffer. */ if (dma_host && scsi_dma_free_sectors <= 10) { - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; goto single_segment; } + /* - * Don't bother with scatter-gather if there is only one segment. + * we used to not use scatter-gather for single segment request, + * but now we do (it makes highmem I/O easier to support without + * kmapping pages) */ - if (count == 1) { - this_count = SCpnt->request.nr_sectors; - goto single_segment; - } SCpnt->use_sg = count; /* @@ -875,29 +770,27 @@ * round it up. */ SCpnt->sglist_len = (SCpnt->sglist_len + 511) & ~511; - + sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len); - /* - * Now fill the scatter-gather table. - */ if (!sgpnt) { + struct Scsi_Host *SHpnt = SCpnt->host; + /* * If we cannot allocate the scatter-gather table, then * simply write the first buffer all by itself. */ printk("Warning - running *really* short on DMA buffers\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; + printk("SCSI: depth is %d, # segs %d, # hw segs %d\n", SHpnt->host_busy, req->nr_segments, req->nr_hw_segments); goto single_segment; } - /* - * Next, walk the list, and fill in the addresses and sizes of - * each segment. - */ + memset(sgpnt, 0, SCpnt->sglist_len); SCpnt->request_buffer = (char *) sgpnt; SCpnt->request_bufflen = 0; - bhprev = NULL; + req->buffer = NULL; + bioprev = NULL; if (dma_host) bbpnt = (void **) ((char *)sgpnt + @@ -907,62 +800,30 @@ SCpnt->bounce_buffers = bbpnt; - for (count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { - if (use_clustering && bhprev != NULL) { - if (dma_host && - virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) { - /* Nothing - fall through */ - } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) { - /* - * This one is OK. Let it go. Note that we - * do not have the ability to allocate - * bounce buffer segments > PAGE_SIZE, so - * for now we limit the thing. - */ - if( dma_host ) { -#ifdef DMA_SEGMENT_SIZE_LIMITED - if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD - || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) { - sgpnt[count - 1].length += bh->b_size; - bhprev = bh; - continue; - } -#else - sgpnt[count - 1].length += bh->b_size; - bhprev = bh; - continue; -#endif - } else { - sgpnt[count - 1].length += bh->b_size; - SCpnt->request_bufflen += bh->b_size; - bhprev = bh; - continue; - } - } - } - count++; - sgpnt[count - 1].address = bh->b_data; - sgpnt[count - 1].page = NULL; - sgpnt[count - 1].length += bh->b_size; - if (!dma_host) { - SCpnt->request_bufflen += bh->b_size; - } - bhprev = bh; - } + /* + * Next, walk the list, and fill in the addresses and sizes of + * each segment. + */ + SCpnt->request_bufflen = req->nr_sectors << 9; + count = blk_rq_map_sg(req->q, req, SCpnt->request_buffer); /* * Verify that the count is correct. */ - if (count != SCpnt->use_sg) { + if (count > SCpnt->use_sg) { printk("Incorrect number of segments after building list\n"); -#ifdef CONFIG_SCSI_DEBUG_QUEUES - dump_stats(req, use_clustering, dma_host, count); -#endif + printk("counted %d, received %d\n", count, SCpnt->use_sg); + printk("req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors, req->current_nr_sectors); + scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); + this_count = req->current_nr_sectors; + goto single_segment; } - if (!dma_host) { + + SCpnt->use_sg = count; + + if (!dma_host) return 1; - } + /* * Now allocate bounce buffers, if needed. */ @@ -971,7 +832,7 @@ sectors = (sgpnt[i].length >> 9); SCpnt->request_bufflen += sgpnt[i].length; if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 > - ISA_DMA_THRESHOLD) { + ISA_DMA_THRESHOLD) { if( scsi_dma_free_sectors - sectors <= 10 ) { /* * If this would nearly drain the DMA @@ -989,9 +850,12 @@ break; } - bbpnt[i] = sgpnt[i].address; - sgpnt[i].address = - (char *) scsi_malloc(sgpnt[i].length); + /* + * this is not a dma host, so it will never + * be a highmem page + */ + bbpnt[i] = page_address(sgpnt[i].page) +sgpnt[i].offset; + sgpnt[i].address = (char *)scsi_malloc(sgpnt[i].length); /* * If we cannot allocate memory for this DMA bounce * buffer, then queue just what we have done so far. @@ -1005,7 +869,7 @@ } break; } - if (SCpnt->request.cmd == WRITE) { + if (req->cmd == WRITE) { memcpy(sgpnt[i].address, bbpnt[i], sgpnt[i].length); } @@ -1050,21 +914,20 @@ * single-block requests if we had hundreds of free sectors. */ if( scsi_dma_free_sectors > 30 ) { - for (this_count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { + for (this_count = 0, bio = req->bio; bio; bio = bio->bi_next) { if( scsi_dma_free_sectors - this_count < 30 || this_count == sectors ) { break; } - this_count += bh->b_size >> 9; + this_count += bio_sectors(bio); } } else { /* * Yow! Take the absolute minimum here. */ - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; } /* @@ -1077,28 +940,33 @@ * segment. Possibly the entire request, or possibly a small * chunk of the entire request. */ - bh = SCpnt->request.bh; - buff = SCpnt->request.buffer; - if (dma_host) { + bio = req->bio; + buff = req->buffer = bio_data(bio); + + if (dma_host || PageHighMem(bio_page(bio))) { /* * Allocate a DMA bounce buffer. If the allocation fails, fall * back and allocate a really small one - enough to satisfy * the first buffer. */ - if (virt_to_phys(SCpnt->request.bh->b_data) - + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) { + if (bio_to_phys(bio) + bio_size(bio) - 1 > ISA_DMA_THRESHOLD) { buff = (char *) scsi_malloc(this_count << 9); if (!buff) { printk("Warning - running low on DMA memory\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; buff = (char *) scsi_malloc(this_count << 9); if (!buff) { dma_exhausted(SCpnt, 0); + return 0; } } - if (SCpnt->request.cmd == WRITE) - memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9); + if (req->cmd == WRITE) { + unsigned long flags; + char *buf = bio_kmap_irq(bio, &flags); + memcpy(buff, buf, this_count << 9); + bio_kunmap_irq(buf, &flags); + } } } SCpnt->request_bufflen = this_count << 9; @@ -1139,21 +1007,11 @@ */ void initialize_merge_fn(Scsi_Device * SDpnt) { - request_queue_t *q; - struct Scsi_Host *SHpnt; - SHpnt = SDpnt->host; - - q = &SDpnt->request_queue; + struct Scsi_Host *SHpnt = SDpnt->host; + request_queue_t *q = &SDpnt->request_queue; + dma64_addr_t bounce_limit; /* - * If the host has already selected a merge manager, then don't - * pick a new one. - */ -#if 0 - if (q->back_merge_fn && q->front_merge_fn) - return; -#endif - /* * If this host has an unlimited tablesize, then don't bother with a * merge manager. The whole point of the operation is to make sure * that requests don't grow too large, and this host isn't picky. @@ -1185,4 +1043,20 @@ q->merge_requests_fn = scsi_merge_requests_fn_dc; SDpnt->scsi_init_io_fn = scsi_init_io_vdc; } + + /* + * now enable highmem I/O, if appropriate + */ + bounce_limit = BLK_BOUNCE_HIGH; + if (SHpnt->highmem_io && (SDpnt->type == TYPE_DISK)) { + if (!PCI_DMA_BUS_IS_PHYS) + /* Platforms with virtual-DMA translation + * hardware have no practical limit. + */ + bounce_limit = BLK_BOUNCE_ANY; + else + bounce_limit = SHpnt->pci_dev->dma_mask; + } + + blk_queue_bounce_limit(q, bounce_limit); } diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/scsi_obsolete.c linux/drivers/scsi/scsi_obsolete.c --- v2.5.0/linux/drivers/scsi/scsi_obsolete.c Thu Jul 5 11:28:17 2001 +++ linux/drivers/scsi/scsi_obsolete.c Tue Nov 27 09:23:27 2001 @@ -145,9 +145,10 @@ void scsi_old_times_out(Scsi_Cmnd * SCpnt) { + struct Scsi_Host *host = SCpnt->host; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); /* Set the serial_number_at_timeout to the current serial_number */ SCpnt->serial_number_at_timeout = SCpnt->serial_number; @@ -164,7 +165,7 @@ break; case IN_ABORT: printk("SCSI host %d abort (pid %ld) timed out - resetting\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); if (!scsi_reset(SCpnt, SCSI_RESET_ASYNCHRONOUS)) break; case IN_RESET: @@ -175,7 +176,7 @@ */ printk("SCSI host %d channel %d reset (pid %ld) timed out - " "trying harder\n", - SCpnt->host->host_no, SCpnt->channel, SCpnt->pid); + host->host_no, SCpnt->channel, SCpnt->pid); SCpnt->internal_timeout &= ~IN_RESET; SCpnt->internal_timeout |= IN_RESET2; scsi_reset(SCpnt, @@ -188,7 +189,7 @@ * Maybe the HBA itself crashed and this will shake it loose. */ printk("SCSI host %d reset (pid %ld) timed out - trying to shake it loose\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2); SCpnt->internal_timeout |= IN_RESET3; scsi_reset(SCpnt, @@ -197,19 +198,19 @@ default: printk("SCSI host %d reset (pid %ld) timed out again -\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); printk("probably an unrecoverable SCSI bus or device hang.\n"); break; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } /* * From what I can find in scsi_obsolete.c, this function is only called * by scsi_old_done and scsi_reset. Both of these functions run with the - * io_request_lock already held, so we need do nothing here about grabbing + * host_lock already held, so we need do nothing here about grabbing * any locks. */ static void scsi_request_sense(Scsi_Cmnd * SCpnt) @@ -217,7 +218,6 @@ SCpnt->flags |= WAS_SENSE | ASKED_FOR_SENSE; update_timeout(SCpnt, SENSE_TIMEOUT); - memcpy((void *) SCpnt->cmnd, (void *) generic_sense, sizeof(generic_sense)); memset((void *) SCpnt->sense_buffer, 0, @@ -238,9 +238,9 @@ * Ugly, ugly. The newer interfaces all assume that the lock * isn't held. Mustn't disappoint, or we deadlock the system. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&SCpnt->host->host_lock); scsi_dispatch_cmd(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&SCpnt->host->host_lock); } @@ -646,9 +646,9 @@ * assume that the lock isn't held. Mustn't * disappoint, or we deadlock the system. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); scsi_dispatch_cmd(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } break; default: @@ -674,7 +674,7 @@ * use, the upper code is run from a bottom half handler, so * it isn't an issue. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); SRpnt = SCpnt->sc_request; if( SRpnt != NULL ) { SRpnt->sr_result = SRpnt->sr_command->result; @@ -686,7 +686,7 @@ } SCpnt->done(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } #undef CMD_FINISHED #undef REDO @@ -725,10 +725,10 @@ return 0; } if (SCpnt->internal_timeout & IN_ABORT) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); while (SCpnt->internal_timeout & IN_ABORT) barrier(); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } else { SCpnt->internal_timeout |= IN_ABORT; oldto = update_timeout(SCpnt, ABORT_TIMEOUT); @@ -908,10 +908,10 @@ return 0; } if (SCpnt->internal_timeout & IN_RESET) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); while (SCpnt->internal_timeout & IN_RESET) barrier(); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } else { SCpnt->internal_timeout |= IN_RESET; update_timeout(SCpnt, RESET_TIMEOUT); diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/scsi_queue.c linux/drivers/scsi/scsi_queue.c --- v2.5.0/linux/drivers/scsi/scsi_queue.c Fri Feb 9 11:30:23 2001 +++ linux/drivers/scsi/scsi_queue.c Tue Nov 27 09:23:27 2001 @@ -80,6 +80,7 @@ { struct Scsi_Host *host; unsigned long flags; + request_queue_t *q = &cmd->device->request_queue; SCSI_LOG_MLQUEUE(1, printk("Inserting command %p into mlqueue\n", cmd)); @@ -137,10 +138,10 @@ * Decrement the counters, since these commands are no longer * active on the host/device. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); cmd->host->host_busy--; cmd->device->device_busy--; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); /* * Insert this command at the head of the queue for it's device. diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/sd.c linux/drivers/scsi/sd.c --- v2.5.0/linux/drivers/scsi/sd.c Fri Nov 9 14:05:06 2001 +++ linux/drivers/scsi/sd.c Tue Nov 27 09:23:27 2001 @@ -61,10 +61,6 @@ #include -/* - * static const char RCSid[] = "$Header:"; - */ - #define SD_MAJOR(i) (!(i) ? SCSI_DISK0_MAJOR : SCSI_DISK1_MAJOR-1+(i)) #define SCSI_DISKS_PER_MAJOR 16 @@ -72,8 +68,7 @@ #define SD_MINOR_NUMBER(i) ((i) & 255) #define MKDEV_SD_PARTITION(i) MKDEV(SD_MAJOR_NUMBER(i), (i) & 255) #define MKDEV_SD(index) MKDEV_SD_PARTITION((index) << 4) -#define N_USED_SCSI_DISKS (sd_template.dev_max + SCSI_DISKS_PER_MAJOR - 1) -#define N_USED_SD_MAJORS (N_USED_SCSI_DISKS / SCSI_DISKS_PER_MAJOR) +#define N_USED_SD_MAJORS (1 + ((sd_template.dev_max - 1) >> 4)) #define MAX_RETRIES 5 @@ -89,7 +84,6 @@ static Scsi_Disk *rscsi_disks; static int *sd_sizes; static int *sd_blocksizes; -static int *sd_hardsizes; /* Hardware sector size */ static int *sd_max_sectors; static int check_scsidisk_media_change(kdev_t); @@ -97,7 +91,6 @@ static int sd_init_onedisk(int); - static int sd_init(void); static void sd_finish(void); static int sd_attach(Scsi_Device *); @@ -124,7 +117,6 @@ init_command:sd_init_command, }; - static void rw_intr(Scsi_Cmnd * SCpnt); #if defined(CONFIG_PPC) @@ -191,11 +183,11 @@ &diskinfo[0]); else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)], dev, &diskinfo[0]); - if (put_user(diskinfo[0], &loc->heads) || put_user(diskinfo[1], &loc->sectors) || put_user(diskinfo[2], &loc->cylinders) || - put_user(sd[SD_PARTITION(inode->i_rdev)].start_sect, &loc->start)) + put_user((unsigned) get_start_sect(inode->i_rdev), + (unsigned long *) &loc->start)) return -EFAULT; return 0; } @@ -226,7 +218,8 @@ if (put_user(diskinfo[0], &loc->heads) || put_user(diskinfo[1], &loc->sectors) || put_user(diskinfo[2], (unsigned int *) &loc->cylinders) || - put_user(sd[SD_PARTITION(inode->i_rdev)].start_sect, &loc->start)) + put_user((unsigned)get_start_sect(inode->i_rdev), + (unsigned long *)&loc->start)) return -EFAULT; return 0; } @@ -239,10 +232,12 @@ case BLKFLSBUF: case BLKSSZGET: case BLKPG: - case BLKELVGET: - case BLKELVSET: + case BLKELVGET: + case BLKELVSET: case BLKBSZGET: case BLKBSZSET: + case BLKHASHPROF: + case BLKHASHCLEAR: return blk_ioctl(inode->i_rdev, cmd, arg); case BLKRRPART: /* Re-read partition tables */ @@ -251,7 +246,8 @@ return revalidate_scsidisk(dev, 1); default: - return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device , cmd, (void *) arg); + return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device, + cmd, (void *) arg); } } @@ -301,7 +297,7 @@ SCSI_LOG_HLQUEUE(1, printk("Doing sd request, dev = %d, block = %d\n", devm, block)); dpnt = &rscsi_disks[dev]; - if (devm >= (sd_template.dev_max << 4) || + if (devm >= (sd_template.dev_max << 4) || (devm & 0xf) || !dpnt || !dpnt->device->online || block + SCpnt->request.nr_sectors > sd[devm].nr_sects) { @@ -309,7 +305,7 @@ SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt)); return 0; } - block += sd[devm].start_sect; + if (dpnt->device->changed) { /* * quietly refuse to do anything to a changed disc until the changed @@ -618,8 +614,8 @@ (SCpnt->sense_buffer[4] << 16) | (SCpnt->sense_buffer[5] << 8) | SCpnt->sense_buffer[6]; - if (SCpnt->request.bh != NULL) - block_sectors = SCpnt->request.bh->b_size >> 9; + if (SCpnt->request.bio != NULL) + block_sectors = bio_sectors(SCpnt->request.bio); switch (SCpnt->device->sector_size) { case 1024: error_sector <<= 1; @@ -642,7 +638,7 @@ default: break; } - error_sector -= sd[SD_PARTITION(SCpnt->request.rq_dev)].start_sect; + error_sector &= ~(block_sectors - 1); good_sectors = error_sector - SCpnt->request.sector; if (good_sectors < 0 || good_sectors >= this_count) @@ -970,15 +966,11 @@ * So I have created this table. See ll_rw_blk.c * Jacques Gelinas (Jacques@solucorp.qc.ca) */ - int m; int hard_sector = sector_size; int sz = rscsi_disks[i].capacity * (hard_sector/256); /* There are 16 minors allocated for each major device */ - for (m = i << 4; m < ((i + 1) << 4); m++) { - sd_hardsizes[m] = hard_sector; - } - + blk_queue_hardsect_size(blk_get_queue(SD_MAJOR(i)), hard_sector); printk("SCSI device %s: " "%d %d-byte hdwr sectors (%d MB)\n", nbuff, rscsi_disks[i].capacity, @@ -1063,7 +1055,7 @@ static int sd_init() { - int i; + int i, maxparts; if (sd_template.dev_noticed == 0) return 0; @@ -1074,10 +1066,17 @@ if (sd_template.dev_max > N_SD_MAJORS * SCSI_DISKS_PER_MAJOR) sd_template.dev_max = N_SD_MAJORS * SCSI_DISKS_PER_MAJOR; + /* At most 16 partitions on each scsi disk. */ + maxparts = (sd_template.dev_max << 4); + if (maxparts == 0) + return 0; + if (!sd_registered) { for (i = 0; i < N_USED_SD_MAJORS; i++) { - if (devfs_register_blkdev(SD_MAJOR(i), "sd", &sd_fops)) { - printk("Unable to get major %d for SCSI disk\n", SD_MAJOR(i)); + if (devfs_register_blkdev(SD_MAJOR(i), "sd", + &sd_fops)) { + printk("Unable to get major %d for SCSI disk\n", + SD_MAJOR(i)); return 1; } } @@ -1087,32 +1086,26 @@ if (rscsi_disks) return 0; - rscsi_disks = kmalloc(sd_template.dev_max * sizeof(Scsi_Disk), GFP_ATOMIC); - if (!rscsi_disks) - goto cleanup_devfs; - memset(rscsi_disks, 0, sd_template.dev_max * sizeof(Scsi_Disk)); - - /* for every (necessary) major: */ - sd_sizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_sizes) - goto cleanup_disks; - memset(sd_sizes, 0, (sd_template.dev_max << 4) * sizeof(int)); - - sd_blocksizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_blocksizes) - goto cleanup_sizes; - - sd_hardsizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_hardsizes) - goto cleanup_blocksizes; - - sd_max_sectors = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_max_sectors) - goto cleanup_max_sectors; + /* allocate memory */ +#define init_mem_lth(x,n) x = kmalloc((n) * sizeof(*x), GFP_ATOMIC) +#define zero_mem_lth(x,n) memset(x, 0, (n) * sizeof(*x)) + + init_mem_lth(rscsi_disks, sd_template.dev_max); + init_mem_lth(sd_sizes, maxparts); + init_mem_lth(sd_blocksizes, maxparts); + init_mem_lth(sd, maxparts); + init_mem_lth(sd_gendisks, N_USED_SD_MAJORS); + init_mem_lth(sd_max_sectors, sd_template.dev_max << 4); + + if (!rscsi_disks || !sd_sizes || !sd_blocksizes || !sd || !sd_gendisks) + goto cleanup_mem; + + zero_mem_lth(rscsi_disks, sd_template.dev_max); + zero_mem_lth(sd_sizes, maxparts); + zero_mem_lth(sd, maxparts); - for (i = 0; i < sd_template.dev_max << 4; i++) { + for (i = 0; i < maxparts; i++) { sd_blocksizes[i] = 1024; - sd_hardsizes[i] = 512; /* * Allow lowlevel device drivers to generate 512k large scsi * commands if they know what they're doing and they ask for it @@ -1122,45 +1115,34 @@ } for (i = 0; i < N_USED_SD_MAJORS; i++) { - blksize_size[SD_MAJOR(i)] = sd_blocksizes + i * (SCSI_DISKS_PER_MAJOR << 4); - hardsect_size[SD_MAJOR(i)] = sd_hardsizes + i * (SCSI_DISKS_PER_MAJOR << 4); - max_sectors[SD_MAJOR(i)] = sd_max_sectors + i * (SCSI_DISKS_PER_MAJOR << 4); - } - /* - * FIXME: should unregister blksize_size, hardsect_size and max_sectors when - * the module is unloaded. - */ - sd = kmalloc((sd_template.dev_max << 4) * - sizeof(struct hd_struct), - GFP_ATOMIC); - if (!sd) - goto cleanup_sd; - memset(sd, 0, (sd_template.dev_max << 4) * sizeof(struct hd_struct)); - - if (N_USED_SD_MAJORS > 1) - sd_gendisks = kmalloc(N_USED_SD_MAJORS * sizeof(struct gendisk), GFP_ATOMIC); - if (!sd_gendisks) - goto cleanup_sd_gendisks; + request_queue_t *q = blk_get_queue(SD_MAJOR(i)); + int parts_per_major = (SCSI_DISKS_PER_MAJOR << 4); + + blksize_size[SD_MAJOR(i)] = + sd_blocksizes + i * parts_per_major; + blk_queue_hardsect_size(q, 512); + } + for (i = 0; i < N_USED_SD_MAJORS; i++) { + int N = SCSI_DISKS_PER_MAJOR; + sd_gendisks[i] = sd_gendisk; - sd_gendisks[i].de_arr = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr, - GFP_ATOMIC); - if (!sd_gendisks[i].de_arr) - goto cleanup_gendisks_de_arr; - memset (sd_gendisks[i].de_arr, 0, - SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr); - sd_gendisks[i].flags = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags, - GFP_ATOMIC); - if (!sd_gendisks[i].flags) - goto cleanup_gendisks_flags; - memset (sd_gendisks[i].flags, 0, - SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags); + + init_mem_lth(sd_gendisks[i].de_arr, N); + init_mem_lth(sd_gendisks[i].flags, N); + + if (!sd_gendisks[i].de_arr || !sd_gendisks[i].flags) + goto cleanup_gendisks; + + zero_mem_lth(sd_gendisks[i].de_arr, N); + zero_mem_lth(sd_gendisks[i].flags, N); + sd_gendisks[i].major = SD_MAJOR(i); sd_gendisks[i].major_name = "sd"; sd_gendisks[i].minor_shift = 4; sd_gendisks[i].max_p = 1 << 4; - sd_gendisks[i].part = sd + (i * SCSI_DISKS_PER_MAJOR << 4); - sd_gendisks[i].sizes = sd_sizes + (i * SCSI_DISKS_PER_MAJOR << 4); + sd_gendisks[i].part = sd + i * (N << 4); + sd_gendisks[i].sizes = sd_sizes + i * (N << 4); sd_gendisks[i].nr_real = 0; sd_gendisks[i].real_devices = (void *) (rscsi_disks + i * SCSI_DISKS_PER_MAJOR); @@ -1168,27 +1150,21 @@ return 0; -cleanup_gendisks_flags: - kfree(sd_gendisks[i].de_arr); -cleanup_gendisks_de_arr: - while (--i >= 0 ) { +#undef init_mem_lth +#undef zero_mem_lth + +cleanup_gendisks: + /* kfree can handle NULL, so no test is required here */ + for (i = 0; i < N_USED_SD_MAJORS; i++) { kfree(sd_gendisks[i].de_arr); kfree(sd_gendisks[i].flags); } +cleanup_mem: kfree(sd_gendisks); -cleanup_sd_gendisks: kfree(sd); -cleanup_sd: - kfree(sd_max_sectors); -cleanup_max_sectors: - kfree(sd_hardsizes); -cleanup_blocksizes: kfree(sd_blocksizes); -cleanup_sizes: kfree(sd_sizes); -cleanup_disks: kfree(rscsi_disks); -cleanup_devfs: for (i = 0; i < N_USED_SD_MAJORS; i++) { devfs_unregister_blkdev(SD_MAJOR(i), "sd"); } @@ -1203,7 +1179,7 @@ for (i = 0; i < N_USED_SD_MAJORS; i++) { blk_dev[SD_MAJOR(i)].queue = sd_find_queue; - add_gendisk(&sd_gendisks[i]); + add_gendisk(&(sd_gendisks[i])); } for (i = 0; i < sd_template.dev_max; ++i) @@ -1293,9 +1269,7 @@ int revalidate_scsidisk(kdev_t dev, int maxusage) { int target; - int max_p; - int start; - int i; + int res; target = DEVICE_NR(dev); @@ -1305,36 +1279,18 @@ } DEVICE_BUSY = 1; - max_p = sd_gendisks->max_p; - start = target << sd_gendisks->minor_shift; - - for (i = max_p - 1; i >= 0; i--) { - int index = start + i; - invalidate_device(MKDEV_SD_PARTITION(index), 1); - sd_gendisks->part[index].start_sect = 0; - sd_gendisks->part[index].nr_sects = 0; - /* - * Reset the blocksize for everything so that we can read - * the partition table. Technically we will determine the - * correct block size when we revalidate, but we do this just - * to make sure that everything remains consistent. - */ - sd_blocksizes[index] = 1024; - if (rscsi_disks[target].device->sector_size == 2048) - sd_blocksizes[index] = 2048; - else - sd_blocksizes[index] = 1024; - } + res = wipe_partitions(dev); + if (res) + goto leave; #ifdef MAYBE_REINIT MAYBE_REINIT; #endif - grok_partitions(&SD_GENDISK(target), target % SCSI_DISKS_PER_MAJOR, - 1<<4, CAPACITY); - + grok_partitions(dev, CAPACITY); +leave: DEVICE_BUSY = 0; - return 0; + return res; } static int fop_revalidate_scsidisk(kdev_t dev) @@ -1344,6 +1300,7 @@ static void sd_detach(Scsi_Device * SDp) { Scsi_Disk *dpnt; + kdev_t dev; int i, j; int max_p; int start; @@ -1351,18 +1308,13 @@ for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++) if (dpnt->device == SDp) { - /* If we are disconnecting a disk driver, sync and invalidate - * everything */ max_p = sd_gendisk.max_p; start = i << sd_gendisk.minor_shift; + dev = MKDEV_SD_PARTITION(start); + wipe_partitions(dev); + for (j = max_p - 1; j >= 0; j--) + sd_sizes[start + j] = 0; - for (j = max_p - 1; j >= 0; j--) { - int index = start + j; - invalidate_device(MKDEV_SD_PARTITION(index), 1); - sd_gendisks->part[index].start_sect = 0; - sd_gendisks->part[index].nr_sects = 0; - sd_sizes[index] = 0; - } devfs_register_partitions (&SD_GENDISK (i), SD_MINOR_NUMBER (start), 1); /* unregister_disk() */ @@ -1375,7 +1327,6 @@ SD_GENDISK(i).nr_real--; return; } - return; } static int __init init_sd(void) @@ -1398,14 +1349,11 @@ kfree(rscsi_disks); kfree(sd_sizes); kfree(sd_blocksizes); - kfree(sd_hardsizes); kfree((char *) sd); } for (i = 0; i < N_USED_SD_MAJORS; i++) { - del_gendisk(&sd_gendisks[i]); - blk_size[SD_MAJOR(i)] = NULL; - hardsect_size[SD_MAJOR(i)] = NULL; - read_ahead[SD_MAJOR(i)] = 0; + del_gendisk(&(sd_gendisks[i])); + blk_clear(SD_MAJOR(i)); } sd_template.dev_max = 0; if (sd_gendisks != &sd_gendisk) diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/sr.c linux/drivers/scsi/sr.c --- v2.5.0/linux/drivers/scsi/sr.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/scsi/sr.c Tue Nov 27 09:23:27 2001 @@ -88,7 +88,6 @@ static int *sr_sizes; static int *sr_blocksizes; -static int *sr_hardsizes; static int sr_open(struct cdrom_device_info *, int); void get_sectorsize(int); @@ -218,8 +217,8 @@ (SCpnt->sense_buffer[4] << 16) | (SCpnt->sense_buffer[5] << 8) | SCpnt->sense_buffer[6]; - if (SCpnt->request.bh != NULL) - block_sectors = SCpnt->request.bh->b_size >> 9; + if (SCpnt->request.bio != NULL) + block_sectors = bio_sectors(SCpnt->request.bio); if (block_sectors < 4) block_sectors = 4; if (scsi_CDs[device_nr].device->sector_size == 2048) @@ -663,6 +662,7 @@ scsi_CDs[i].needs_sector_size = 0; sr_sizes[i] = scsi_CDs[i].capacity >> (BLOCK_SIZE_BITS - 9); }; + blk_queue_hardsect_size(blk_get_queue(MAJOR_NR), sector_size); scsi_free(buffer, 512); } @@ -811,21 +811,14 @@ if (!sr_blocksizes) goto cleanup_sizes; - sr_hardsizes = kmalloc(sr_template.dev_max * sizeof(int), GFP_ATOMIC); - if (!sr_hardsizes) - goto cleanup_blocksizes; /* * These are good guesses for the time being. */ - for (i = 0; i < sr_template.dev_max; i++) { + for (i = 0; i < sr_template.dev_max; i++) sr_blocksizes[i] = 2048; - sr_hardsizes[i] = 2048; - } + blksize_size[MAJOR_NR] = sr_blocksizes; - hardsect_size[MAJOR_NR] = sr_hardsizes; return 0; -cleanup_blocksizes: - kfree(sr_blocksizes); cleanup_sizes: kfree(sr_sizes); cleanup_cds: @@ -897,7 +890,6 @@ else read_ahead[MAJOR_NR] = 4; /* 4 sector read-ahead */ - return; } static void sr_detach(Scsi_Device * SDp) @@ -905,17 +897,18 @@ Scsi_CD *cpnt; int i; - for (cpnt = scsi_CDs, i = 0; i < sr_template.dev_max; i++, cpnt++) + for (cpnt = scsi_CDs, i = 0; i < sr_template.dev_max; i++, cpnt++) { if (cpnt->device == SDp) { /* - * Since the cdrom is read-only, no need to sync the device. + * Since the cdrom is read-only, no need to sync + * the device. * We should be kind to our buffer cache, however. */ invalidate_device(MKDEV(MAJOR_NR, i), 0); /* - * Reset things back to a sane state so that one can re-load a new - * driver (perhaps the same one). + * Reset things back to a sane state so that one can + * re-load a new driver (perhaps the same one). */ unregister_cdrom(&(cpnt->cdi)); cpnt->device = NULL; @@ -926,7 +919,7 @@ sr_sizes[i] = 0; return; } - return; + } } static int __init init_sr(void) @@ -948,13 +941,9 @@ kfree(sr_blocksizes); sr_blocksizes = NULL; - kfree(sr_hardsizes); - sr_hardsizes = NULL; } - blksize_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; read_ahead[MAJOR_NR] = 0; + blk_clear(MAJOR_NR); sr_template.dev_max = 0; } diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/sym53c8xx.c linux/drivers/scsi/sym53c8xx.c --- v2.5.0/linux/drivers/scsi/sym53c8xx.c Wed Oct 17 14:16:39 2001 +++ linux/drivers/scsi/sym53c8xx.c Tue Nov 27 09:23:27 2001 @@ -642,10 +642,10 @@ #define NCR_LOCK_NCB(np, flags) spin_lock_irqsave(&np->smp_lock, flags) #define NCR_UNLOCK_NCB(np, flags) spin_unlock_irqrestore(&np->smp_lock, flags) -#define NCR_LOCK_SCSI_DONE(np, flags) \ - spin_lock_irqsave(&io_request_lock, flags) -#define NCR_UNLOCK_SCSI_DONE(np, flags) \ - spin_unlock_irqrestore(&io_request_lock, flags) +#define NCR_LOCK_SCSI_DONE(host, flags) \ + spin_lock_irqsave(&((host)->host_lock), flags) +#define NCR_UNLOCK_SCSI_DONE(host, flags) \ + spin_unlock_irqrestore(&((host)->host_lock), flags) #else @@ -656,8 +656,8 @@ #define NCR_LOCK_NCB(np, flags) do { save_flags(flags); cli(); } while (0) #define NCR_UNLOCK_NCB(np, flags) do { restore_flags(flags); } while (0) -#define NCR_LOCK_SCSI_DONE(np, flags) do {;} while (0) -#define NCR_UNLOCK_SCSI_DONE(np, flags) do {;} while (0) +#define NCR_LOCK_SCSI_DONE(host, flags) do {;} while (0) +#define NCR_UNLOCK_SCSI_DONE(host, flags) do {;} while (0) #endif @@ -13676,9 +13676,9 @@ if (DEBUG_FLAGS & DEBUG_TINY) printk ("]\n"); if (done_list) { - NCR_LOCK_SCSI_DONE(np, flags); + NCR_LOCK_SCSI_DONE(done_list->host, flags); ncr_flush_done_cmds(done_list); - NCR_UNLOCK_SCSI_DONE(np, flags); + NCR_UNLOCK_SCSI_DONE(done_list->host, flags); } } @@ -13699,9 +13699,9 @@ NCR_UNLOCK_NCB(np, flags); if (done_list) { - NCR_LOCK_SCSI_DONE(np, flags); + NCR_LOCK_SCSI_DONE(done_list->host, flags); ncr_flush_done_cmds(done_list); - NCR_UNLOCK_SCSI_DONE(np, flags); + NCR_UNLOCK_SCSI_DONE(done_list->host, flags); } } diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/sym53c8xx.h linux/drivers/scsi/sym53c8xx.h --- v2.5.0/linux/drivers/scsi/sym53c8xx.h Thu Nov 22 11:49:48 2001 +++ linux/drivers/scsi/sym53c8xx.h Tue Nov 27 09:44:13 2001 @@ -96,8 +96,9 @@ this_id: 7, \ sg_tablesize: SCSI_NCR_SG_TABLESIZE, \ cmd_per_lun: SCSI_NCR_CMD_PER_LUN, \ - max_sectors: MAX_SEGMENTS*8, \ - use_clustering: DISABLE_CLUSTERING} + max_sectors: MAX_SEGMENTS*8, \ + use_clustering: DISABLE_CLUSTERING, \ + highmem_io: 1} #else diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/sym53c8xx_2/sym53c8xx.h linux/drivers/scsi/sym53c8xx_2/sym53c8xx.h --- v2.5.0/linux/drivers/scsi/sym53c8xx_2/sym53c8xx.h Fri Nov 9 15:22:54 2001 +++ linux/drivers/scsi/sym53c8xx_2/sym53c8xx.h Tue Nov 27 09:23:27 2001 @@ -119,7 +119,8 @@ this_id: 7, \ sg_tablesize: 0, \ cmd_per_lun: 0, \ - use_clustering: DISABLE_CLUSTERING} + use_clustering: DISABLE_CLUSTERING, \ + highmem_io: 1} #endif /* defined(HOSTS_C) || defined(MODULE) */ diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/sym53c8xx_2/sym_glue.c linux/drivers/scsi/sym53c8xx_2/sym_glue.c --- v2.5.0/linux/drivers/scsi/sym53c8xx_2/sym_glue.c Fri Nov 9 15:22:54 2001 +++ linux/drivers/scsi/sym53c8xx_2/sym_glue.c Tue Nov 27 09:23:27 2001 @@ -138,18 +138,11 @@ #define SYM_LOCK_DRIVER(flags) spin_lock_irqsave(&sym53c8xx_lock, flags) #define SYM_UNLOCK_DRIVER(flags) spin_unlock_irqrestore(&sym53c8xx_lock,flags) -#define SYM_INIT_LOCK_HCB(np) spin_lock_init(&np->s.smp_lock); -#define SYM_LOCK_HCB(np, flags) spin_lock_irqsave(&np->s.smp_lock, flags) -#define SYM_UNLOCK_HCB(np, flags) spin_unlock_irqrestore(&np->s.smp_lock, flags) - -#define SYM_LOCK_SCSI(np, flags) \ - spin_lock_irqsave(&io_request_lock, flags) -#define SYM_UNLOCK_SCSI(np, flags) \ - spin_unlock_irqrestore(&io_request_lock, flags) - -/* Ugly, but will make things easier if this locking will ever disappear */ -#define SYM_LOCK_SCSI_NOSAVE(np) spin_lock_irq(&io_request_lock) -#define SYM_UNLOCK_SCSI_NORESTORE(np) spin_unlock_irq(&io_request_lock) +#define SYM_INIT_LOCK_HCB(np) spin_lock_init(&np->s.host->host_lock); +#define SYM_LOCK_HCB(np, flags) \ + spin_lock_irqsave(&np->s.host->host_lock, flags) +#define SYM_UNLOCK_HCB(np, flags) \ + spin_unlock_irqrestore(&np->s.host->host_lock, flags) /* * These simple macros limit expression involving @@ -966,14 +959,18 @@ { hcb_p np = SYM_SOFTC_PTR(cmd); ucmd_p ucp = SYM_UCMD_PTR(cmd); - u_long flags; int sts = 0; +#if 0 + u_long flags; +#endif cmd->scsi_done = done; cmd->host_scribble = NULL; memset(ucp, 0, sizeof(*ucp)); +#if 0 SYM_LOCK_HCB(np, flags); +#endif /* * Shorten our settle_time if needed for @@ -999,7 +996,9 @@ sym_insque_tail(&ucp->link_cmdq, &np->s.wait_cmdq); } out: +#if 0 SYM_UNLOCK_HCB(np, flags); +#endif return 0; } @@ -1010,21 +1009,21 @@ static void sym53c8xx_intr(int irq, void *dev_id, struct pt_regs * regs) { unsigned long flags; - unsigned long flags1; hcb_p np = (hcb_p) dev_id; if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("["); - SYM_LOCK_SCSI(np, flags1); SYM_LOCK_HCB(np, flags); sym_interrupt(np); + /* + * push queue walk-through to tasklet + */ if (!sym_que_empty(&np->s.wait_cmdq) && !np->s.settle_time_valid) sym_requeue_awaiting_cmds(np); SYM_UNLOCK_HCB(np, flags); - SYM_UNLOCK_SCSI(np, flags1); if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("]\n"); } @@ -1036,9 +1035,7 @@ { hcb_p np = (hcb_p) npref; unsigned long flags; - unsigned long flags1; - SYM_LOCK_SCSI(np, flags1); SYM_LOCK_HCB(np, flags); sym_timer(np); @@ -1047,7 +1044,6 @@ sym_requeue_awaiting_cmds(np); SYM_UNLOCK_HCB(np, flags); - SYM_UNLOCK_SCSI(np, flags1); } @@ -1209,9 +1205,7 @@ ep->timer.data = (u_long)cmd; ep->timed_out = 1; /* Be pessimistic for once :) */ add_timer(&ep->timer); - SYM_UNLOCK_SCSI_NORESTORE(np); down(&ep->sem); - SYM_LOCK_SCSI_NOSAVE(np); if (ep->timed_out) sts = -2; } @@ -1975,6 +1969,7 @@ goto attach_failed; #endif host_data->ncb = np; + np->s.host = instance; SYM_INIT_LOCK_HCB(np); @@ -2140,6 +2135,7 @@ instance->max_cmd_len = 16; #endif instance->select_queue_depths = sym53c8xx_select_queue_depths; + instance->highmem_io = 1; SYM_UNLOCK_HCB(np, flags); diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/sym53c8xx_2/sym_glue.h linux/drivers/scsi/sym53c8xx_2/sym_glue.h --- v2.5.0/linux/drivers/scsi/sym53c8xx_2/sym_glue.h Thu Nov 22 10:41:14 2001 +++ linux/drivers/scsi/sym53c8xx_2/sym_glue.h Tue Nov 27 09:23:27 2001 @@ -456,10 +456,10 @@ char chip_name[8]; struct pci_dev *device; + struct Scsi_Host *host; + u_char bus; /* PCI BUS number */ u_char device_fn; /* PCI BUS device and function */ - - spinlock_t smp_lock; /* Lock for SMP threading */ vm_offset_t mmio_va; /* MMIO kernel virtual address */ vm_offset_t ram_va; /* RAM kernel virtual address */ diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/sym53c8xx_2/sym_hipd.c linux/drivers/scsi/sym53c8xx_2/sym_hipd.c --- v2.5.0/linux/drivers/scsi/sym53c8xx_2/sym_hipd.c Fri Nov 9 15:22:54 2001 +++ linux/drivers/scsi/sym53c8xx_2/sym_hipd.c Tue Nov 27 09:23:27 2001 @@ -4689,8 +4689,8 @@ return; out_clrack: OUTL_DSP (SCRIPTA_BA (np, clrack)); - return; out_stuck: + ; } /* @@ -5223,9 +5223,8 @@ * And accept tagged commands now. */ lp->head.itlq_tbl_sa = cpu_to_scr(vtobus(lp->itlq_tbl)); - - return; fail: + ; } /* diff -u --recursive --new-file v2.5.0/linux/drivers/scsi/sym53c8xx_2/sym_nvram.c linux/drivers/scsi/sym53c8xx_2/sym_nvram.c --- v2.5.0/linux/drivers/scsi/sym53c8xx_2/sym_nvram.c Fri Nov 9 15:22:54 2001 +++ linux/drivers/scsi/sym53c8xx_2/sym_nvram.c Tue Nov 27 09:23:27 2001 @@ -505,10 +505,10 @@ return retv; } -#undef SET_BIT 0 -#undef CLR_BIT 1 -#undef SET_CLK 2 -#undef CLR_CLK 3 +#undef SET_BIT +#undef CLR_BIT +#undef SET_CLK +#undef CLR_CLK /* * Try reading Symbios NVRAM. diff -u --recursive --new-file v2.5.0/linux/drivers/sound/ad1816.c linux/drivers/sound/ad1816.c --- v2.5.0/linux/drivers/sound/ad1816.c Fri Nov 9 15:22:54 2001 +++ linux/drivers/sound/ad1816.c Sun Nov 25 09:43:42 2001 @@ -1258,7 +1258,7 @@ static int __initdata dma = -1; static int __initdata dma2 = -1; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ struct pci_dev *ad1816_dev = NULL; static int activated = 1; @@ -1280,7 +1280,7 @@ MODULE_PARM(ad1816_clockfreq,"i"); MODULE_PARM(options,"i"); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static struct pci_dev *activate_dev(char *devname, char *resname, struct pci_dev *dev) { @@ -1407,7 +1407,7 @@ static int __init init_ad1816(void) { -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if(isapnp && (ad1816_probe_isapnp(&cfg) < 0) ) { printk(KERN_NOTICE "ad1816: No ISAPnP cards found, trying standard ones...\n"); isapnp = 0; @@ -1447,7 +1447,7 @@ } nr_ad1816_devs=0; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if(activated) if(ad1816_dev) ad1816_dev->deactivate(ad1816_dev); diff -u --recursive --new-file v2.5.0/linux/drivers/sound/ad1848.c linux/drivers/sound/ad1848.c --- v2.5.0/linux/drivers/sound/ad1848.c Sun Sep 30 12:26:08 2001 +++ linux/drivers/sound/ad1848.c Sun Nov 25 09:43:42 2001 @@ -162,7 +162,7 @@ ,{CAP_F_TIMER} /* MD_1845_SSCAPE */ }; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static int isapnp = 1; static int isapnpjump = 0; static int reverse = 0; @@ -2830,7 +2830,7 @@ MODULE_PARM(deskpro_m, "i"); /* Special magic for Deskpro M box */ MODULE_PARM(soundpro, "i"); /* More special magic for SoundPro chips */ -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ MODULE_PARM(isapnp, "i"); MODULE_PARM(isapnpjump, "i"); MODULE_PARM(reverse, "i"); @@ -3000,7 +3000,7 @@ { printk(KERN_INFO "ad1848/cs4248 codec driver Copyright (C) by Hannu Savolainen 1993-1996\n"); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if(isapnp && (ad1848_isapnp_probe(&cfg) < 0) ) { printk(KERN_NOTICE "ad1848: No ISAPnP cards found, trying standard ones...\n"); isapnp = 0; @@ -3035,7 +3035,7 @@ if(loaded) unload_ms_sound(&cfg); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if(audio_activated) if(ad1848_dev) ad1848_dev->deactivate(ad1848_dev); diff -u --recursive --new-file v2.5.0/linux/drivers/sound/awe_wave.c linux/drivers/sound/awe_wave.c --- v2.5.0/linux/drivers/sound/awe_wave.c Sun Sep 30 12:26:08 2001 +++ linux/drivers/sound/awe_wave.c Sun Nov 25 09:43:42 2001 @@ -26,9 +26,7 @@ #include #include #include -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE #include -#endif #include "sound_config.h" @@ -205,7 +203,7 @@ int io = AWE_DEFAULT_BASE_ADDR; /* Emu8000 base address */ int memsize = AWE_DEFAULT_MEM_SIZE; /* memory size in Kbytes */ -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static int isapnp = -1; #else static int isapnp = 0; @@ -4772,7 +4770,7 @@ return 1; } -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static struct { unsigned short card_vendor, card_device; unsigned short vendor; @@ -4841,7 +4839,7 @@ { int base; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if (isapnp) { if (awe_probe_isapnp(&io) < 0) { printk(KERN_ERR "AWE32: No ISAPnP cards found\n"); @@ -6132,7 +6130,7 @@ void __exit unload_awe(void) { _unload_awe(); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if (isapnp) awe_deactivate_isapnp(); #endif /* isapnp */ diff -u --recursive --new-file v2.5.0/linux/drivers/sound/cmpci.c linux/drivers/sound/cmpci.c --- v2.5.0/linux/drivers/sound/cmpci.c Fri Nov 9 14:07:41 2001 +++ linux/drivers/sound/cmpci.c Sun Nov 25 10:17:47 2001 @@ -2496,7 +2496,6 @@ spin_unlock_irqrestore(&s->lock, flags); s->open_mode |= (file->f_mode << FMODE_MIDI_SHIFT) & (FMODE_MIDI_READ | FMODE_MIDI_WRITE); up(&s->open_sem); - MOD_INC_USE_COUNT; return 0; } @@ -2694,7 +2693,6 @@ outb(1, s->iosynth+3); /* enable OPL3 */ s->open_mode |= FMODE_DMFM; up(&s->open_sem); - MOD_INC_USE_COUNT; return 0; } diff -u --recursive --new-file v2.5.0/linux/drivers/sound/maestro3.c linux/drivers/sound/maestro3.c --- v2.5.0/linux/drivers/sound/maestro3.c Fri Nov 9 13:41:42 2001 +++ linux/drivers/sound/maestro3.c Sun Nov 25 10:17:47 2001 @@ -2036,7 +2036,6 @@ set_fmt(s, fmtm, fmts); s->open_mode |= file->f_mode & (FMODE_READ | FMODE_WRITE); - MOD_INC_USE_COUNT; up(&s->open_sem); spin_unlock_irqrestore(&s->lock, flags); return 0; @@ -2075,7 +2074,6 @@ up(&s->open_sem); wake_up(&s->open_wait); - MOD_DEC_USE_COUNT; return 0; } @@ -2142,14 +2140,12 @@ int minor = MINOR(inode->i_rdev); struct m3_card *card = devs; - MOD_INC_USE_COUNT; for (card = devs; card != NULL; card = card->next) { if((card->ac97 != NULL) && (card->ac97->dev_mixer == minor)) break; } if (!card) { - MOD_DEC_USE_COUNT; return -ENODEV; } @@ -2160,7 +2156,6 @@ static int m3_release_mixdev(struct inode *inode, struct file *file) { - MOD_DEC_USE_COUNT; return 0; } @@ -2173,6 +2168,7 @@ } static struct file_operations m3_mixer_fops = { + owner: THIS_MODULE, llseek: no_llseek, ioctl: m3_ioctl_mixdev, open: m3_open_mixdev, @@ -2546,6 +2542,7 @@ } static struct file_operations m3_audio_fops = { + owner: THIS_MODULE, llseek: &no_llseek, read: &m3_read, write: &m3_write, diff -u --recursive --new-file v2.5.0/linux/drivers/sound/opl3sa2.c linux/drivers/sound/opl3sa2.c --- v2.5.0/linux/drivers/sound/opl3sa2.c Thu Oct 11 09:43:30 2001 +++ linux/drivers/sound/opl3sa2.c Sun Nov 25 09:43:42 2001 @@ -99,7 +99,7 @@ #define CHIPSET_OPL3SA2 0 #define CHIPSET_OPL3SA3 1 -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ #define OPL3SA2_CARDS_MAX 4 #else #define OPL3SA2_CARDS_MAX 1 @@ -147,7 +147,7 @@ static int __initdata ymode = -1; static int __initdata loopback = -1; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ /* PnP specific parameters */ static int __initdata isapnp = 1; static int __initdata multiple = 1; @@ -191,7 +191,7 @@ MODULE_PARM(loopback, "i"); MODULE_PARM_DESC(loopback, "Set A/D input source. Useful for echo cancellation (0 = Mic Rch (default), 1 = Mono output loopback)"); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ MODULE_PARM(isapnp, "i"); MODULE_PARM_DESC(isapnp, "When set to 0, ISA PnP support will be disabled"); @@ -807,7 +807,7 @@ } -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ struct isapnp_device_id isapnp_opl3sa2_list[] __initdata = { { ISAPNP_ANY_ID, ISAPNP_ANY_ID, @@ -888,7 +888,7 @@ return 0; } -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ +#endif /* __ISAPNP__ */ /* End of component functions */ @@ -909,9 +909,9 @@ max = (multiple && isapnp) ? OPL3SA2_CARDS_MAX : 1; for(card = 0; card < max; card++, opl3sa2_cards_num++) { -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ /* - * Please remember that even with CONFIG_ISAPNP defined one + * Please remember that even with __ISAPNP__ defined one * should still be able to disable PNP support for this * single driver! */ @@ -1039,7 +1039,7 @@ unload_opl3sa2_mss(&cfg_mss[card]); unload_opl3sa2(&cfg[card], card); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if(opl3sa2_activated[card] && opl3sa2_dev[card]) { opl3sa2_dev[card]->deactivate(opl3sa2_dev[card]); @@ -1058,7 +1058,7 @@ static int __init setup_opl3sa2(char *str) { /* io, irq, dma, dma2,... */ -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ int ints[11]; #else int ints[9]; @@ -1073,7 +1073,7 @@ mpu_io = ints[6]; ymode = ints[7]; loopback = ints[8]; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ isapnp = ints[9]; multiple = ints[10]; #endif diff -u --recursive --new-file v2.5.0/linux/drivers/sound/sb_card.c linux/drivers/sound/sb_card.c --- v2.5.0/linux/drivers/sound/sb_card.c Thu Oct 11 09:43:30 2001 +++ linux/drivers/sound/sb_card.c Sun Nov 25 09:43:42 2001 @@ -69,7 +69,7 @@ #include "sb_mixer.h" #include "sb.h" -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ #define SB_CARDS_MAX 5 #else #define SB_CARDS_MAX 1 @@ -196,7 +196,7 @@ *opl_dev[SB_CARDS_MAX] = {NULL}; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static int isapnp = 1; static int isapnpjump = 0; static int multiple = 1; @@ -226,7 +226,7 @@ MODULE_PARM(esstype, "i"); MODULE_PARM(acer, "i"); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ MODULE_PARM(isapnp, "i"); MODULE_PARM(isapnpjump, "i"); MODULE_PARM(multiple, "i"); @@ -251,7 +251,7 @@ MODULE_PARM_DESC(esstype, "ESS chip type"); MODULE_PARM_DESC(acer, "Set this to detect cards in some ACER notebooks"); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ /* Please add new entries at the end of the table */ static struct { @@ -909,8 +909,8 @@ printk(KERN_INFO "Soundblaster audio driver Copyright (C) by Hannu Savolainen 1993-1996\n"); for(card = 0; card < max; card++, sb_cards_num++) { -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE - /* Please remember that even with CONFIG_ISAPNP defined one +#ifdef __ISAPNP__ + /* Please remember that even with __ISAPNP__ defined one * should still be able to disable PNP support for this * single driver! */ if((!pnplegacy||card>0) && isapnp && (sb_isapnp_probe(&cfg[card], &cfg_mpu[card], card) < 0) ) { @@ -997,7 +997,7 @@ if (sbmpu[i]) unload_sbmpu(&cfg_mpu[i]); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if(!audio_activated[i] && sb_dev[i]) sb_dev[i]->deactivate(sb_dev[i]); if(!mpu_activated[i] && mpu_dev[i]) diff -u --recursive --new-file v2.5.0/linux/drivers/sound/sound_core.c linux/drivers/sound/sound_core.c --- v2.5.0/linux/drivers/sound/sound_core.c Sun Sep 30 12:26:08 2001 +++ linux/drivers/sound/sound_core.c Sun Nov 25 10:17:47 2001 @@ -115,7 +115,6 @@ *list=s; - MOD_INC_USE_COUNT; return n; } @@ -133,7 +132,6 @@ *list=p->next; devfs_unregister (p->de); kfree(p); - MOD_DEC_USE_COUNT; return; } list=&(p->next); diff -u --recursive --new-file v2.5.0/linux/drivers/sound/ymfpci.c linux/drivers/sound/ymfpci.c --- v2.5.0/linux/drivers/sound/ymfpci.c Mon Nov 19 14:53:19 2001 +++ linux/drivers/sound/ymfpci.c Sun Nov 25 10:17:47 2001 @@ -1872,7 +1872,6 @@ #endif up(&unit->open_sem); - MOD_INC_USE_COUNT; return 0; out_nodma: @@ -1921,7 +1920,6 @@ up(&codec->open_sem); - MOD_DEC_USE_COUNT; return 0; } @@ -1949,7 +1947,6 @@ match: file->private_data = unit->ac97_codec[i]; - MOD_INC_USE_COUNT; return 0; } @@ -1963,11 +1960,11 @@ static int ymf_release_mixdev(struct inode *inode, struct file *file) { - MOD_DEC_USE_COUNT; return 0; } static /*const*/ struct file_operations ymf_fops = { + owner: THIS_MODULE, llseek: no_llseek, read: ymf_read, write: ymf_write, @@ -1979,6 +1976,7 @@ }; static /*const*/ struct file_operations ymf_mixer_fops = { + owner: THIS_MODULE, llseek: no_llseek, ioctl: ymf_ioctl_mixdev, open: ymf_open_mixdev, @@ -2043,13 +2041,6 @@ ymfpci_aclink_reset(unit->pci); ymfpci_codec_ready(unit, 0, 1); /* prints diag if not ready. */ - for (i = 0; i < NR_AC97; i++) { - codec = unit->ac97_codec[i]; - if (!codec) - continue; - ac97_restore_state(codec); - } - #ifdef CONFIG_SOUND_YMFPCI_LEGACY /* XXX At this time the legacy registers are probably deprogrammed. */ #endif @@ -2063,6 +2054,13 @@ if (unit->start_count) { ymfpci_writel(unit, YDSXGR_MODE, 3); unit->active_bank = ymfpci_readl(unit, YDSXGR_CTRLSELECT) & 1; + } + + for (i = 0; i < NR_AC97; i++) { + codec = unit->ac97_codec[i]; + if (!codec) + continue; + ac97_restore_state(codec); } unit->suspended = 0; diff -u --recursive --new-file v2.5.0/linux/drivers/usb/dc2xx.c linux/drivers/usb/dc2xx.c --- v2.5.0/linux/drivers/usb/dc2xx.c Fri Sep 14 14:04:07 2001 +++ linux/drivers/usb/dc2xx.c Mon Nov 26 17:09:10 2001 @@ -112,12 +112,15 @@ /* These have a different application level protocol which * is part of the Flashpoint "DigitaOS". That supports some * non-camera devices, and some non-Kodak cameras. + * Use this driver to get USB and "OpenDis" to talk. */ { USB_DEVICE(0x040a, 0x0100) }, // Kodak DC-220 { USB_DEVICE(0x040a, 0x0110) }, // Kodak DC-260 { USB_DEVICE(0x040a, 0x0111) }, // Kodak DC-265 { USB_DEVICE(0x040a, 0x0112) }, // Kodak DC-290 { USB_DEVICE(0xf003, 0x6002) }, // HP PhotoSmart C500 + { USB_DEVICE(0x03f0, 0x4102) }, // HP PhotoSmart C618 + { USB_DEVICE(0x0a17, 0x1001) }, // Pentax EI-200 /* Other USB devices may well work here too, so long as they * just stick to half duplex bulk packet exchanges. That diff -u --recursive --new-file v2.5.0/linux/drivers/usb/pwc-ctrl.c linux/drivers/usb/pwc-ctrl.c --- v2.5.0/linux/drivers/usb/pwc-ctrl.c Wed Oct 17 14:34:06 2001 +++ linux/drivers/usb/pwc-ctrl.c Mon Nov 26 17:09:10 2001 @@ -782,7 +782,7 @@ { char buf; - if (pdev->type < 675 || pdev->release < 6) + if (pdev->type < 675 || (pdev->type < 730 && pdev->release < 6)) return 0; /* Not supported by Nala or Timon < release 6 */ if (power) diff -u --recursive --new-file v2.5.0/linux/drivers/usb/pwc-if.c linux/drivers/usb/pwc-if.c --- v2.5.0/linux/drivers/usb/pwc-if.c Wed Oct 17 14:34:06 2001 +++ linux/drivers/usb/pwc-if.c Mon Nov 26 17:09:10 2001 @@ -91,6 +91,8 @@ disconnect: usb_pwc_disconnect, /* disconnect() */ }; +#define MAX_DEV_HINTS 10 + static int default_size = PSZ_QCIF; static int default_fps = 10; static int default_palette = VIDEO_PALETTE_YUV420P; /* This format is understood by most tools */ @@ -99,13 +101,17 @@ int pwc_trace = TRACE_MODULE | TRACE_FLOW | TRACE_PWCX; static int power_save = 0; static int led_on = 1, led_off = 0; /* defaults to LED that is on while in use */ -int pwc_preferred_compression = 2; /* 0..3 = uncompressed..high */ + int pwc_preferred_compression = 2; /* 0..3 = uncompressed..high */ +static struct { + int type; + char serial_number[30]; + int device_node; + struct pwc_device *pdev; +} device_hint[MAX_DEV_HINTS]; static struct semaphore mem_lock; static void *mem_leak = NULL; /* For delayed kfree()s. See below */ -static int video_nr = -1; - /***/ static int pwc_video_open(struct video_device *vdev, int mode); @@ -647,7 +653,8 @@ errmsg = "Unknown"; switch(urb->status) { case -ENOSR: errmsg = "Buffer error (overrun)"; break; - case -EPIPE: errmsg = "Babble/stalled (bad cable?)"; break; + case -EPIPE: errmsg = "Stalled (device not responding)"; break; + case -EOVERFLOW: errmsg = "Babble (bad cable?)"; break; case -EPROTO: errmsg = "Bit-stuff error (bad cable?)"; break; case -EILSEQ: errmsg = "CRC/Timeout"; break; case -ETIMEDOUT: errmsg = "NAK (device does not respond)"; break; @@ -765,6 +772,11 @@ } /* .. flen < last_packet_size */ pdev->vlast_packet_size = flen; } /* ..status == 0 */ +#ifdef PWC_DEBUG + /* This is normally not interesting to the user, unless you are really debugging something */ + else + Trace(TRACE_FLOW, "Iso frame %d of USB has error %d\n", i, fst); +#endif } if (awake) wake_up_interruptible(&pdev->frameq); @@ -1140,7 +1152,7 @@ return -ERESTARTSYS; } schedule(); - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_INTERRUPTIBLE); } remove_wait_queue(&pdev->frameq, &wait); set_current_state(TASK_RUNNING); @@ -1595,7 +1607,9 @@ struct pwc_device *pdev = NULL; struct video_device *vdev; int vendor_id, product_id, type_id; - int i; + int i, hint; + int video_nr = -1; /* default: use next available device */ + char serial_number[30]; free_mem_leak(); @@ -1698,6 +1712,10 @@ } else return NULL; /* Not Philips, Askey, Logitech or Samsung, for sure. */ + memset(serial_number, 0, 30); + usb_string(udev, udev->descriptor.iSerialNumber, serial_number, 29); + Trace(TRACE_PROBE, "Device serial number is %s\n", serial_number); + if (udev->descriptor.bNumConfigurations > 1) Info("Warning: more than 1 configuration available.\n"); @@ -1734,6 +1752,21 @@ pdev->release = udev->descriptor.bcdDevice; Trace(TRACE_PROBE, "Release: %04x\n", pdev->release); + + /* Now search device_hint[] table for a match, so we can hint a node number. */ + for (hint = 0; hint < MAX_DEV_HINTS; hint++) { + if (((device_hint[hint].type == -1) || (device_hint[hint].type == pdev->type)) && + (device_hint[hint].pdev == NULL)) { + /* so far, so good... try serial number */ + if ((device_hint[hint].serial_number[0] == '*') || !strcmp(device_hint[hint].serial_number, serial_number)) { + /* match! */ + video_nr = device_hint[hint].device_node; + Trace(TRACE_PROBE, "Found hint, will try to register as /dev/video%d\n", video_nr); + break; + } + } + } + i = video_register_device(vdev, VFL_TYPE_GRABBER, video_nr); if (i < 0) { Err("Failed to register as video device (%d).\n", i); @@ -1743,6 +1776,9 @@ Trace(TRACE_PROBE, "Registered video struct at 0x%p.\n", vdev); Info("Registered as /dev/video%d.\n", vdev->minor & 0x3F); } + /* occupy slot */ + if (hint < MAX_DEV_HINTS) + device_hint[hint].pdev = pdev; #if 0 /* Shut down camera now (some people like the LED off) */ @@ -1762,6 +1798,7 @@ static void usb_pwc_disconnect(struct usb_device *udev, void *ptr) { struct pwc_device *pdev; + int hint; lock_kernel(); free_mem_leak(); @@ -1815,12 +1852,31 @@ pdev->vdev = NULL; } } + + /* search device_hint[] table if we occupy a slot, by any chance */ + for (hint = 0; hint < MAX_DEV_HINTS; hint++) + if (device_hint[hint].pdev == pdev) + device_hint[hint].pdev = NULL; + pdev->udev = NULL; unlock_kernel(); kfree(pdev); } +/* *grunt* We have to do atoi ourselves :-( */ +static int pwc_atoi(char *s) +{ + int k = 0; + + k = 0; + while (*s != '\0' && *s >= '0' && *s <= '9') { + k = 10 * k + (*s - '0'); + s++; + } + return k; +} + /* * Initialization code & module stuff @@ -1833,8 +1889,8 @@ static int trace = -1; static int compression = -1; static int leds[2] = { -1, -1 }; +static char *dev_hint[10] = { }; -MODULE_PARM(video_nr, "i"); MODULE_PARM(size, "s"); MODULE_PARM_DESC(size, "Initial image size. One of sqcif, qsif, qcif, sif, cif, vga"); MODULE_PARM(fps, "i"); @@ -1851,13 +1907,16 @@ MODULE_PARM_DESC(compression, "Preferred compression quality. Range 0 (uncompressed) to 3 (high compression)"); MODULE_PARM(leds, "2i"); MODULE_PARM_DESC(leds, "LED on,off time in milliseconds"); +MODULE_PARM(dev_hint, "0-10s"); +MODULE_PARM_DESC(dev_hint, "Device node hints"); + MODULE_DESCRIPTION("Philips USB webcam driver"); MODULE_AUTHOR("Nemosoft Unv. "); MODULE_LICENSE("GPL"); static int __init usb_pwc_init(void) { - int s; + int i, sz; char *sizenames[PSZ_MAX] = { "sqcif", "qsif", "qcif", "sif", "cif", "vga" }; Info("Philips PCA645/646 + PCVC675/680/690 + PCVC730/740/750 webcam module version " PWC_VERSION " loaded.\n"); @@ -1874,13 +1933,13 @@ if (size) { /* string; try matching with array */ - for (s = 0; s < PSZ_MAX; s++) { - if (!strcmp(sizenames[s], size)) { /* Found! */ - default_size = s; + for (sz = 0; sz < PSZ_MAX; sz++) { + if (!strcmp(sizenames[sz], size)) { /* Found! */ + default_size = sz; break; } } - if (s == PSZ_MAX) { + if (sz == PSZ_MAX) { Err("Size not recognized; try size=[sqcif | qsif | qcif | sif | cif | vga].\n"); return -EINVAL; } @@ -1920,6 +1979,74 @@ led_on = leds[0] / 100; if (leds[1] >= 0) led_off = leds[1] / 100; + + /* Big device node whoopla. Basicly, it allows you to assign a + device node (/dev/videoX) to a camera, based on its type + & serial number. The format is [type[.serialnumber]:]node. + + Any camera that isn't matched by these rules gets the next + available free device node. + */ + for (i = 0; i < MAX_DEV_HINTS; i++) { + char *s, *colon, *dot; + + /* This loop also initializes the array */ + device_hint[i].pdev = NULL; + s = dev_hint[i]; + if (s != NULL && *s != '\0') { + device_hint[i].type = -1; /* wildcard */ + strcpy(device_hint[i].serial_number, "*"); + + /* parse string: chop at ':' & '/' */ + colon = dot = s; + while (*colon != '\0' && *colon != ':') + colon++; + while (*dot != '\0' && *dot != '.') + dot++; + /* Few sanity checks */ + if (*dot != '\0' && dot > colon) { + Err("Malformed camera hint: the colon must be after the dot.\n"); + return -EINVAL; + } + + if (*colon == '\0') { + /* No colon */ + if (*dot != '\0') { + Err("Malformed camera hint: no colon + device node given.\n"); + return -EINVAL; + } + else { + /* No type or serial number specified, just a number. */ + device_hint[i].device_node = pwc_atoi(s); + } + } + else { + /* There's a colon, so we have at least a type and a device node */ + device_hint[i].type = pwc_atoi(s); + device_hint[i].device_node = pwc_atoi(colon + 1); + if (*dot != '\0') { + /* There's a serial number as well */ + int k; + + dot++; + k = 0; + while (*dot != ':' && k < 29) { + device_hint[i].serial_number[k++] = *dot; + dot++; + } + device_hint[i].serial_number[k] = '\0'; + } + } +#ifdef PWC_DEBUG + Debug("device_hint[%d]:\n", i); + Debug(" type : %d\n", device_hint[i].type); + Debug(" serial# : %s\n", device_hint[i].serial_number); + Debug(" node : %d\n", device_hint[i].device_node); +#endif + } + else + device_hint[i].type = 0; /* not filled */ + } /* ..for MAX_DEV_HINTS */ init_MUTEX(&mem_lock); Trace(TRACE_PROBE, "Registering driver at address 0x%p.\n", &pwc_driver); diff -u --recursive --new-file v2.5.0/linux/drivers/usb/pwc.h linux/drivers/usb/pwc.h --- v2.5.0/linux/drivers/usb/pwc.h Wed Oct 17 14:34:06 2001 +++ linux/drivers/usb/pwc.h Mon Nov 26 17:09:10 2001 @@ -60,8 +60,8 @@ /* Version block */ #define PWC_MAJOR 8 -#define PWC_MINOR 3 -#define PWC_VERSION "8.3" +#define PWC_MINOR 4 +#define PWC_VERSION "8.4" #define PWC_NAME "pwc" /* Turn certain features on/off */ diff -u --recursive --new-file v2.5.0/linux/drivers/usb/serial/belkin_sa.c linux/drivers/usb/serial/belkin_sa.c --- v2.5.0/linux/drivers/usb/serial/belkin_sa.c Wed Oct 10 23:42:47 2001 +++ linux/drivers/usb/serial/belkin_sa.c Mon Nov 26 17:09:10 2001 @@ -140,7 +140,7 @@ MODULE_DEVICE_TABLE (usb, id_table_combined); /* All of the device info needed for the Belkin dockstation serial converter */ -struct usb_serial_device_type belkin_dockstation_device = { +static struct usb_serial_device_type belkin_dockstation_device = { name: "Belkin F5U120-PC USB Serial Adapter", id_table: belkin_dockstation_table, /* the Belkin F5U103 device */ needs_interrupt_in: MUST_HAVE, /* this device must have an interrupt in endpoint */ @@ -161,7 +161,7 @@ }; /* All of the device info needed for the Belkin serial converter */ -struct usb_serial_device_type belkin_sa_device = { +static struct usb_serial_device_type belkin_sa_device = { name: "Belkin F5U103 USB Serial Adapter", id_table: belkin_sa_table, /* the Belkin F5U103 device */ needs_interrupt_in: MUST_HAVE, /* this device must have an interrupt in endpoint */ @@ -183,7 +183,7 @@ /* This driver also supports the "old" school Belkin single port adaptor */ -struct usb_serial_device_type belkin_old_device = { +static struct usb_serial_device_type belkin_old_device = { name: "Belkin USB Serial Adapter", id_table: belkin_old_table, /* the old Belkin device */ needs_interrupt_in: MUST_HAVE, /* this device must have an interrupt in endpoint */ @@ -204,7 +204,7 @@ }; /* this driver also works for the Peracom single port adapter */ -struct usb_serial_device_type peracom_device = { +static struct usb_serial_device_type peracom_device = { name: "Peracom single port USB Serial Adapter", id_table: peracom_table, /* the Peracom device */ needs_interrupt_in: MUST_HAVE, /* this device must have an interrupt in endpoint */ @@ -225,7 +225,7 @@ }; /* the GoHubs Go-COM232 device is the same as the Peracom single port adapter */ -struct usb_serial_device_type gocom232_device = { +static struct usb_serial_device_type gocom232_device = { name: "GO-COM232 USB Serial Converter", id_table: gocom232_table, /* the GO-COM232 device */ needs_interrupt_in: MUST_HAVE, /* this device must have an interrupt in endpoint */ diff -u --recursive --new-file v2.5.0/linux/drivers/usb/serial/cyberjack.c linux/drivers/usb/serial/cyberjack.c --- v2.5.0/linux/drivers/usb/serial/cyberjack.c Wed Oct 10 23:42:47 2001 +++ linux/drivers/usb/serial/cyberjack.c Mon Nov 26 17:09:10 2001 @@ -76,7 +76,7 @@ MODULE_DEVICE_TABLE (usb, id_table); -struct usb_serial_device_type cyberjack_device = { +static struct usb_serial_device_type cyberjack_device = { name: "Reiner SCT Cyberjack USB card reader", id_table: id_table, needs_interrupt_in: MUST_HAVE, diff -u --recursive --new-file v2.5.0/linux/drivers/usb/serial/empeg.c linux/drivers/usb/serial/empeg.c --- v2.5.0/linux/drivers/usb/serial/empeg.c Wed Oct 10 23:42:47 2001 +++ linux/drivers/usb/serial/empeg.c Mon Nov 26 17:09:10 2001 @@ -113,7 +113,7 @@ MODULE_DEVICE_TABLE (usb, id_table); -struct usb_serial_device_type empeg_device = { +static struct usb_serial_device_type empeg_device = { name: "Empeg", id_table: id_table, needs_interrupt_in: MUST_HAVE_NOT, /* must not have an interrupt in endpoint */ diff -u --recursive --new-file v2.5.0/linux/drivers/usb/serial/ftdi_sio.c linux/drivers/usb/serial/ftdi_sio.c --- v2.5.0/linux/drivers/usb/serial/ftdi_sio.c Tue Nov 13 09:19:41 2001 +++ linux/drivers/usb/serial/ftdi_sio.c Mon Nov 26 17:09:10 2001 @@ -173,7 +173,7 @@ /* Should rename most ftdi_sio's to ftdi_ now since there are two devices which share common code */ -struct usb_serial_device_type ftdi_sio_device = { +static struct usb_serial_device_type ftdi_sio_device = { name: "FTDI SIO", id_table: id_table_sio, needs_interrupt_in: MUST_HAVE_NOT, @@ -196,7 +196,7 @@ shutdown: ftdi_sio_shutdown, }; -struct usb_serial_device_type ftdi_8U232AM_device = { +static struct usb_serial_device_type ftdi_8U232AM_device = { name: "FTDI 8U232AM", id_table: id_table_8U232AM, needs_interrupt_in: DONT_CARE, @@ -660,7 +660,7 @@ } /* ftdi_sio_serial_read_bulk_callback */ -__u16 translate_baudrate_to_ftdi(unsigned int cflag, ftdi_type_t ftdi_type) +static __u16 translate_baudrate_to_ftdi(unsigned int cflag, ftdi_type_t ftdi_type) { /* translate_baudrate_to_ftdi */ __u16 urb_value = ftdi_sio_b9600; diff -u --recursive --new-file v2.5.0/linux/drivers/usb/serial/io_edgeport.c linux/drivers/usb/serial/io_edgeport.c --- v2.5.0/linux/drivers/usb/serial/io_edgeport.c Wed Nov 21 09:59:11 2001 +++ linux/drivers/usb/serial/io_edgeport.c Mon Nov 26 17:09:10 2001 @@ -318,11 +318,6 @@ }; -/* the info for all of the devices that this driver supports */ -int EdgeportDevices[] = EDGEPORT_DEVICE_IDS; -#define NUM_EDGEPORT_DEVICES (sizeof(EdgeportDevices) / sizeof(int)) - - /* Transmit Fifo * This Transmit queue is an extension of the edgeport Rx buffer. * The maximum amount of data buffered in both the edgeport @@ -495,17 +490,15 @@ // ************************************************************************ // ************************************************************************ -// These functions should be in firmware.c - /************************************************************************ * * - * update_edgeport_E2PROM() Compare current versions of * + * update_edgeport_E2PROM() Compare current versions of * * Boot ROM and Manufacture * * Descriptors with versions * * embedded in this driver * * * ************************************************************************/ -void update_edgeport_E2PROM (struct edgeport_serial *edge_serial) +static void update_edgeport_E2PROM (struct edgeport_serial *edge_serial) { __u32 BootCurVer; __u32 BootNewVer; diff -u --recursive --new-file v2.5.0/linux/drivers/usb/serial/keyspan.c linux/drivers/usb/serial/keyspan.c --- v2.5.0/linux/drivers/usb/serial/keyspan.c Wed Oct 10 23:42:47 2001 +++ linux/drivers/usb/serial/keyspan.c Mon Nov 26 17:09:10 2001 @@ -176,7 +176,7 @@ /* Functions used by new usb-serial code. */ -int keyspan_init (void) +static int __init keyspan_init (void) { usb_serial_register (&keyspan_usa18x_pre_device); usb_serial_register (&keyspan_usa19_pre_device); @@ -201,7 +201,7 @@ return 0; } -void keyspan_exit (void) +static void __exit keyspan_exit (void) { usb_serial_deregister (&keyspan_usa18x_pre_device); usb_serial_deregister (&keyspan_usa19_pre_device); @@ -1089,7 +1089,7 @@ return urb; } -struct callbacks { +static struct callbacks { void (*instat_callback)(urb_t *); void (*glocont_callback)(urb_t *); void (*indat_callback)(urb_t *); diff -u --recursive --new-file v2.5.0/linux/drivers/usb/serial/keyspan.h linux/drivers/usb/serial/keyspan.h --- v2.5.0/linux/drivers/usb/serial/keyspan.h Tue Oct 9 15:15:02 2001 +++ linux/drivers/usb/serial/keyspan.h Mon Nov 26 17:09:10 2001 @@ -448,7 +448,7 @@ }; /* Structs for the devices, pre and post renumeration. */ -struct usb_serial_device_type keyspan_usa18x_pre_device = { +static struct usb_serial_device_type keyspan_usa18x_pre_device = { name: "Keyspan USA18X - (without firmware)", id_table: keyspan_usa18x_pre_ids, needs_interrupt_in: DONT_CARE, @@ -461,7 +461,7 @@ startup: keyspan_fake_startup }; -struct usb_serial_device_type keyspan_usa19_pre_device = { +static struct usb_serial_device_type keyspan_usa19_pre_device = { name: "Keyspan USA19 - (without firmware)", id_table: keyspan_usa19_pre_ids, needs_interrupt_in: DONT_CARE, @@ -475,7 +475,7 @@ }; -struct usb_serial_device_type keyspan_usa19w_pre_device = { +static struct usb_serial_device_type keyspan_usa19w_pre_device = { name: "Keyspan USA19W - (without firmware)", id_table: keyspan_usa19w_pre_ids, needs_interrupt_in: DONT_CARE, @@ -489,7 +489,7 @@ }; -struct usb_serial_device_type keyspan_usa28_pre_device = { +static struct usb_serial_device_type keyspan_usa28_pre_device = { name: "Keyspan USA28 - (without firmware)", id_table: keyspan_usa28_pre_ids, needs_interrupt_in: DONT_CARE, @@ -502,7 +502,7 @@ startup: keyspan_fake_startup }; -struct usb_serial_device_type keyspan_usa28x_pre_device = { +static struct usb_serial_device_type keyspan_usa28x_pre_device = { name: "Keyspan USA28X - (without firmware)", id_table: keyspan_usa28x_pre_ids, needs_interrupt_in: DONT_CARE, @@ -515,7 +515,7 @@ startup: keyspan_fake_startup }; -struct usb_serial_device_type keyspan_usa28xa_pre_device = { +static struct usb_serial_device_type keyspan_usa28xa_pre_device = { name: "Keyspan USA28XA - (without firmware)", id_table: keyspan_usa28xa_pre_ids, needs_interrupt_in: DONT_CARE, @@ -528,7 +528,7 @@ startup: keyspan_fake_startup }; -struct usb_serial_device_type keyspan_usa28xb_pre_device = { +static struct usb_serial_device_type keyspan_usa28xb_pre_device = { name: "Keyspan USA28XB - (without firmware)", id_table: keyspan_usa28xb_pre_ids, needs_interrupt_in: DONT_CARE, @@ -541,7 +541,7 @@ startup: keyspan_fake_startup }; -struct usb_serial_device_type keyspan_usa49w_pre_device = { +static struct usb_serial_device_type keyspan_usa49w_pre_device = { name: "Keyspan USA49W - (without firmware)", id_table: keyspan_usa49w_pre_ids, needs_interrupt_in: DONT_CARE, @@ -554,7 +554,7 @@ startup: keyspan_fake_startup }; -struct usb_serial_device_type keyspan_usa18x_device = { +static struct usb_serial_device_type keyspan_usa18x_device = { name: "Keyspan USA18X", id_table: keyspan_usa18x_ids, needs_interrupt_in: DONT_CARE, @@ -580,7 +580,7 @@ shutdown: keyspan_shutdown, }; -struct usb_serial_device_type keyspan_usa19_device = { +static struct usb_serial_device_type keyspan_usa19_device = { name: "Keyspan USA19", id_table: keyspan_usa19_ids, needs_interrupt_in: DONT_CARE, @@ -607,7 +607,7 @@ }; -struct usb_serial_device_type keyspan_usa19w_device = { +static struct usb_serial_device_type keyspan_usa19w_device = { name: "Keyspan USA19W", id_table: keyspan_usa19w_ids, needs_interrupt_in: DONT_CARE, @@ -634,7 +634,7 @@ }; -struct usb_serial_device_type keyspan_usa28_device = { +static struct usb_serial_device_type keyspan_usa28_device = { name: "Keyspan USA28", id_table: keyspan_usa28_ids, needs_interrupt_in: DONT_CARE, @@ -652,7 +652,7 @@ }; -struct usb_serial_device_type keyspan_usa28x_device = { +static struct usb_serial_device_type keyspan_usa28x_device = { name: "Keyspan USA28X/XB", id_table: keyspan_usa28x_ids, needs_interrupt_in: DONT_CARE, @@ -679,7 +679,7 @@ }; -struct usb_serial_device_type keyspan_usa28xa_device = { +static struct usb_serial_device_type keyspan_usa28xa_device = { name: "Keyspan USA28XA", id_table: keyspan_usa28xa_ids, needs_interrupt_in: DONT_CARE, @@ -706,7 +706,7 @@ }; -struct usb_serial_device_type keyspan_usa49w_device = { +static struct usb_serial_device_type keyspan_usa49w_device = { name: "Keyspan USA49W", id_table: keyspan_usa49w_ids, needs_interrupt_in: DONT_CARE, diff -u --recursive --new-file v2.5.0/linux/drivers/usb/serial/mct_u232.c linux/drivers/usb/serial/mct_u232.c --- v2.5.0/linux/drivers/usb/serial/mct_u232.c Mon Nov 12 09:53:56 2001 +++ linux/drivers/usb/serial/mct_u232.c Mon Nov 26 17:09:10 2001 @@ -154,7 +154,7 @@ MODULE_DEVICE_TABLE (usb, id_table_combined); -struct usb_serial_device_type mct_u232_device = { +static struct usb_serial_device_type mct_u232_device = { name: "Magic Control Technology USB-RS232", id_table: mct_u232_table, needs_interrupt_in: MUST_HAVE, /* 2 interrupt-in endpoints */ @@ -178,7 +178,7 @@ shutdown: mct_u232_shutdown, }; -struct usb_serial_device_type mct_u232_sitecom_device = { +static struct usb_serial_device_type mct_u232_sitecom_device = { name: "MCT/Sitecom USB-RS232", id_table: mct_u232_sitecom_table, needs_interrupt_in: MUST_HAVE, /* 2 interrupt-in endpoints */ @@ -202,7 +202,7 @@ shutdown: mct_u232_shutdown, }; -struct usb_serial_device_type mct_u232_du_h3sp_device = { +static struct usb_serial_device_type mct_u232_du_h3sp_device = { name: "MCT/D-Link DU-H3SP USB BAY", id_table: mct_u232_du_h3sp_table, needs_interrupt_in: MUST_HAVE, /* 2 interrupt-in endpoints */ diff -u --recursive --new-file v2.5.0/linux/drivers/usb/serial/omninet.c linux/drivers/usb/serial/omninet.c --- v2.5.0/linux/drivers/usb/serial/omninet.c Wed Oct 10 23:42:47 2001 +++ linux/drivers/usb/serial/omninet.c Mon Nov 26 17:09:10 2001 @@ -87,7 +87,7 @@ MODULE_DEVICE_TABLE (usb, id_table); -struct usb_serial_device_type zyxel_omninet_device = { +static struct usb_serial_device_type zyxel_omninet_device = { name: "ZyXEL - omni.net lcd plus usb", id_table: id_table, needs_interrupt_in: MUST_HAVE, diff -u --recursive --new-file v2.5.0/linux/drivers/usb/serial/usbserial.c linux/drivers/usb/serial/usbserial.c --- v2.5.0/linux/drivers/usb/serial/usbserial.c Wed Oct 10 23:42:47 2001 +++ linux/drivers/usb/serial/usbserial.c Mon Nov 26 17:09:10 2001 @@ -397,7 +397,7 @@ static struct usb_serial *serial_table[SERIAL_TTY_MINORS]; /* initially all NULL */ -LIST_HEAD(usb_serial_driver_list); +static LIST_HEAD(usb_serial_driver_list); static struct usb_serial *get_serial_by_minor (int minor) @@ -1433,7 +1433,7 @@ }; -int usb_serial_init(void) +static int __init usb_serial_init(void) { int i; int result; @@ -1473,7 +1473,7 @@ } -void usb_serial_exit(void) +static void __exit usb_serial_exit(void) { #ifdef CONFIG_USB_SERIAL_GENERIC diff -u --recursive --new-file v2.5.0/linux/drivers/usb/serial/visor.c linux/drivers/usb/serial/visor.c --- v2.5.0/linux/drivers/usb/serial/visor.c Mon Nov 12 09:53:56 2001 +++ linux/drivers/usb/serial/visor.c Mon Nov 26 17:09:10 2001 @@ -184,7 +184,7 @@ /* All of the device info needed for the Handspring Visor */ -struct usb_serial_device_type handspring_device = { +static struct usb_serial_device_type handspring_device = { name: "Handspring Visor", id_table: visor_id_table, needs_interrupt_in: MUST_HAVE_NOT, /* this device must not have an interrupt in endpoint */ @@ -210,7 +210,7 @@ }; /* device info for the Palm 4.0 devices */ -struct usb_serial_device_type palm_4_0_device = { +static struct usb_serial_device_type palm_4_0_device = { name: "Palm 4.0", id_table: palm_4_0_id_table, needs_interrupt_in: MUST_HAVE_NOT, /* this device must not have an interrupt in endpoint */ diff -u --recursive --new-file v2.5.0/linux/drivers/usb/serial/whiteheat.c linux/drivers/usb/serial/whiteheat.c --- v2.5.0/linux/drivers/usb/serial/whiteheat.c Fri Sep 14 14:04:07 2001 +++ linux/drivers/usb/serial/whiteheat.c Mon Nov 26 17:09:10 2001 @@ -131,7 +131,7 @@ static int whiteheat_startup (struct usb_serial *serial); static void whiteheat_shutdown (struct usb_serial *serial); -struct usb_serial_device_type whiteheat_fake_device = { +static struct usb_serial_device_type whiteheat_fake_device = { name: "Connect Tech - WhiteHEAT - (prerenumeration)", id_table: id_table_prerenumeration, needs_interrupt_in: DONT_CARE, /* don't have to have an interrupt in endpoint */ @@ -144,7 +144,7 @@ startup: whiteheat_startup }; -struct usb_serial_device_type whiteheat_device = { +static struct usb_serial_device_type whiteheat_device = { name: "Connect Tech - WhiteHEAT", id_table: id_table_std, needs_interrupt_in: DONT_CARE, /* don't have to have an interrupt in endpoint */ diff -u --recursive --new-file v2.5.0/linux/drivers/usb/usb.c linux/drivers/usb/usb.c --- v2.5.0/linux/drivers/usb/usb.c Wed Nov 21 09:59:11 2001 +++ linux/drivers/usb/usb.c Mon Nov 26 17:09:10 2001 @@ -7,7 +7,8 @@ * (C) Copyright Gregory P. Smith 1999 * (C) Copyright Deti Fliegl 1999 (new USB architecture) * (C) Copyright Randy Dunlap 2000 - * (C) Copyright David Brownell 2000 (kernel hotplug, usb_device_id) + * (C) Copyright David Brownell 2000-2001 (kernel hotplug, usb_device_id, + more docs, etc) * (C) Copyright Yggdrasil Computing, Inc. 2000 * (usb_device_id matching changes by Adam J. Richter) * @@ -193,6 +194,22 @@ up (&usb_bus_list_lock); } +/** + * usb_ifnum_to_if - get the interface object with a given interface number + * @dev: the device whose current configuration is considered + * @ifnum: the desired interface + * + * This walks the device descriptor for the currently active configuration + * and returns a pointer to the interface with that particular interface + * number, or null. + * + * Note that configuration descriptors are not required to assign interface + * numbers sequentially, so that it would be incorrect to assume that + * the first interface in that descriptor corresponds to interface zero. + * This routine helps device drivers avoid such mistakes. + * However, you should make sure that you do the right thing with any + * alternate settings available for this interfaces. + */ struct usb_interface *usb_ifnum_to_if(struct usb_device *dev, unsigned ifnum) { int i; @@ -204,6 +221,20 @@ return NULL; } +/** + * usb_epnum_to_ep_desc - get the endpoint object with a given endpoint number + * @dev: the device whose current configuration is considered + * @epnum: the desired endpoint + * + * This walks the device descriptor for the currently active configuration, + * and returns a pointer to the endpoint with that particular endpoint + * number, or null. + * + * Note that interface descriptors are not required to assign endpont + * numbers sequentially, so that it would be incorrect to assume that + * the first endpoint in that descriptor corresponds to interface zero. + * This routine helps device drivers avoid such mistakes. + */ struct usb_endpoint_descriptor *usb_epnum_to_ep_desc(struct usb_device *dev, unsigned epnum) { int i, j, k; @@ -356,7 +387,7 @@ } /** - * usb_alloc_bus - creates a new USB host controller structure + * usb_alloc_bus - creates a new USB host controller structure (usbcore-internal) * @op: pointer to a struct usb_operations that this bus structure should use * * Creates a USB host controller bus structure with the specified @@ -398,7 +429,7 @@ } /** - * usb_free_bus - frees the memory used by a bus structure + * usb_free_bus - frees the memory used by a bus structure (usbcore-internal) * @bus: pointer to the bus to free * * (For use only by USB Host Controller Drivers.) @@ -412,10 +443,12 @@ } /** - * usb_register_bus - registers the USB host controller with the usb core + * usb_register_bus - registers the USB host controller with the usb core (usbcore-internal) * @bus: pointer to the bus to register * * (For use only by USB Host Controller Drivers.) + * + * This call is synchronous, and may not be used in an interrupt context. */ void usb_register_bus(struct usb_bus *bus) { @@ -441,10 +474,12 @@ } /** - * usb_deregister_bus - deregisters the USB host controller + * usb_deregister_bus - deregisters the USB host controller (usbcore-internal) * @bus: pointer to the bus to deregister * * (For use only by USB Host Controller Drivers.) + * + * This call is synchronous, and may not be used in an interrupt context. */ void usb_deregister_bus(struct usb_bus *bus) { @@ -493,27 +528,49 @@ } -/* - * This is intended to be used by usb device drivers that need to - * claim more than one interface on a device at once when probing - * (audio and acm are good examples). No device driver should have - * to mess with the internal usb_interface or usb_device structure - * members. +/** + * usb_driver_claim_interface - bind a driver to an interface + * @driver: the driver to be bound + * @iface: the interface to which it will be bound + * @priv: driver data associated with that interface + * + * This is used by usb device drivers that need to claim more than one + * interface on a device when probing (audio and acm are current examples). + * No device driver should directly modify internal usb_interface or + * usb_device structure members. + * + * Few drivers should need to use this routine, since the most natural + * way to bind to an interface is to return the private data from + * the driver's probe() method. Any driver that does use this must + * first be sure that no other driver has claimed the interface, by + * checking with usb_interface_claimed(). */ void usb_driver_claim_interface(struct usb_driver *driver, struct usb_interface *iface, void* priv) { if (!iface || !driver) return; - dbg("%s driver claimed interface %p", driver->name, iface); + // FIXME change API to report an error in this case + if (iface->driver) + err ("%s driver booted %s off interface %p", + driver->name, iface->driver->name, iface); + else + dbg("%s driver claimed interface %p", driver->name, iface); iface->driver = driver; iface->private_data = priv; } /* usb_driver_claim_interface() */ -/* +/** + * usb_interface_claimed - returns true iff an interface is claimed + * @iface: the interface being checked + * * This should be used by drivers to check other interfaces to see if - * they are available or not. + * they are available or not. If another driver has claimed the interface, + * they may not claim it. Otherwise it's OK to claim it using + * usb_driver_claim_interface(). + * + * Returns true (nonzero) iff the interface is claimed, else false (zero). */ int usb_interface_claimed(struct usb_interface *iface) { @@ -523,8 +580,19 @@ return (iface->driver != NULL); } /* usb_interface_claimed() */ -/* - * This should be used by drivers to release their claimed interfaces +/** + * usb_driver_release_interface - unbind a driver from an interface + * @driver: the driver to be unbound + * @iface: the interface from which it will be unbound + * + * This should be used by drivers to release their claimed interfaces. + * It is normally called in their disconnect() methods, and only for + * drivers that bound to more than one interface in their probe(). + * + * When the USB subsystem disconnect()s a driver from some interface, + * it automatically invokes this method for that interface. That + * means that even drivers that used usb_driver_claim_interface() + * usually won't need to call this. */ void usb_driver_release_interface(struct usb_driver *driver, struct usb_interface *iface) { @@ -923,9 +991,15 @@ } } -/* - * Only HC's should call usb_alloc_dev and usb_free_dev directly - * Anybody may use usb_inc_dev_use or usb_dec_dev_use +/** + * usb_alloc_dev - allocate a usb device structure (usbcore-internal) + * @parent: hub to which device is connected + * @bus: bus used to access the device + * + * Only hub drivers (including virtual root hub drivers for host + * controllers) should ever call this. + * + * This call is synchronous, and may not be used in an interrupt context. */ struct usb_device *usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus) { @@ -952,6 +1026,8 @@ return dev; } +// usbcore-internal ... +// but usb_dec_dev_use() is #defined to this, and that's public!! void usb_free_dev(struct usb_device *dev) { if (atomic_dec_and_test(&dev->refcnt)) { @@ -964,14 +1040,25 @@ } } +/** + * usb_inc_dev_use - record another reference to a device + * @dev: the device being referenced + * + * Each live reference to a device should be refcounted. + * + * Device drivers should normally record such references in their + * open() methods. + * Drivers should then release them, using usb_dec_dev_use(), in their + * close() methods. + */ void usb_inc_dev_use(struct usb_device *dev) { atomic_inc(&dev->refcnt); } -/* ------------------------------------------------------------------------------------- +/* ---------------------------------------------------------------------- * New USB Core Functions - * -------------------------------------------------------------------------------------*/ + * ----------------------------------------------------------------------*/ /** * usb_alloc_urb - creates a new urb for a USB driver to use @@ -1017,6 +1104,58 @@ kfree(urb); } /*-------------------------------------------------------------------*/ + +/** + * usb_submit_urb - asynchronously issue a transfer request for an endpoint + * @urb: pointer to the urb describing the request + * + * This submits a transfer request, and transfers control of the URB + * describing that request to the USB subsystem. Request completion will + * indicated later, asynchronously, by calling the completion handler. + * This call may be issued in interrupt context. + * + * The caller must have correctly initialized the URB before submitting + * it. Macros such as FILL_BULK_URB() and FILL_CONTROL_URB() are + * available to ensure that most fields are correctly initialized, for + * the particular kind of transfer, although they will not initialize + * any transfer flags. + * + * Successful submissions return 0; otherwise this routine returns a + * negative error number. + * + * Unreserved Bandwidth Transfers: + * + * Bulk or control requests complete only once. When the completion + * function is called, control of the URB is returned to the device + * driver which issued the request. The completion handler may then + * immediately free or reuse that URB. + * + * Bulk URBs will be queued if the USB_QUEUE_BULK transfer flag is set + * in the URB. This can be used to maximize bandwidth utilization by + * letting the USB controller start work on the next URB without any + * delay to report completion (scheduling and processing an interrupt) + * and then submit that next request. + * + * For control endpoints, the synchronous usb_control_msg() call is + * often used (in non-interrupt context) instead of this call. + * + * Reserved Bandwidth Transfers: + * + * Periodic URBs (interrupt or isochronous) are completed repeatedly, + * until the original request is aborted. When the completion callback + * indicates the URB has been unlinked (with a special status code), + * control of that URB returns to the device driver. Otherwise, the + * completion handler does not control the URB, and should not change + * any of its fields. + * + * Note that isochronous URBs should be submitted in a "ring" data + * structure (using urb->next) to ensure that they are resubmitted + * appropriately. + * + * If the USB subsystem can't reserve sufficient bandwidth to perform + * the periodic request, and bandwidth reservation is being done for + * this controller, submitting such a periodic request will fail. + */ int usb_submit_urb(urb_t *urb) { if (urb && urb->dev && urb->dev->bus && urb->dev->bus->op) @@ -1026,6 +1165,31 @@ } /*-------------------------------------------------------------------*/ + +/** + * usb_unlink_urb - abort/cancel a transfer request for an endpoint + * @urb: pointer to urb describing a previously submitted request + * + * This routine cancels an in-progress request. The requests's + * completion handler will be called with a status code indicating + * that the request has been canceled, and that control of the URB + * has been returned to that device driver. This is the only way + * to stop an interrupt transfer, so long as the device is connected. + * + * When the USB_ASYNC_UNLINK transfer flag for the URB is clear, this + * request is synchronous. Success is indicated by returning zero, + * at which time the urb will have been unlinked, + * and the completion function will see status -ENOENT. Failure is + * indicated by any other return value. This mode may not be used + * when unlinking an urb from an interrupt context, such as a bottom + * half or a completion handler, + * + * When the USB_ASYNC_UNLINK transfer flag for the URB is set, this + * request is asynchronous. Success is indicated by returning -EINPROGRESS, + * at which time the urb will normally not have been unlinked, + * and the completion function will see status -ECONNRESET. Failure is + * indicated by any other return value. + */ int usb_unlink_urb(urb_t *urb) { if (urb && urb->dev && urb->dev->bus && urb->dev->bus->op) @@ -1050,7 +1214,7 @@ } /*-------------------------------------------------------------------* - * COMPATIBILITY STUFF * + * SYNCHRONOUS CALLS * *-------------------------------------------------------------------*/ // Starts urb and waits for completion or timeout @@ -1145,7 +1309,7 @@ * This function sends a simple control message to a specified endpoint * and waits for the message to complete, or timeout. * - * If successful, it returns 0, othwise a negative error number. + * If successful, it returns 0, otherwise a negative error number. * * Don't use this function from within an interrupt context, like a * bottom half handler. If you need a asyncronous message, or need to send @@ -1188,9 +1352,9 @@ * This function sends a simple bulk message to a specified endpoint * and waits for the message to complete, or timeout. * - * If successful, it returns 0, othwise a negative error number. - * The number of actual bytes transferred will be plaed in the - * actual_timeout paramater. + * If successful, it returns 0, otherwise a negative error number. + * The number of actual bytes transferred will be stored in the + * actual_length paramater. * * Don't use this function from within an interrupt context, like a * bottom half handler. If you need a asyncronous message, or need to @@ -1214,16 +1378,19 @@ return usb_start_wait_urb(urb,timeout,actual_length); } -/* - * usb_get_current_frame_number() +/** + * usb_get_current_frame_number - return current bus frame number + * @dev: the device whose bus is being queried * - * returns the current frame number for the parent USB bus/controller - * of the given USB device. + * Returns the current frame number for the USB host controller + * used with the given USB device. This can be used when scheduling + * isochronous requests. */ -int usb_get_current_frame_number(struct usb_device *usb_dev) +int usb_get_current_frame_number(struct usb_device *dev) { - return usb_dev->bus->op->get_frame_number (usb_dev); + return dev->bus->op->get_frame_number (dev); } + /*-------------------------------------------------------------------*/ static int usb_parse_endpoint(struct usb_endpoint_descriptor *endpoint, unsigned char *buffer, int size) @@ -1556,6 +1723,7 @@ return size; } +// usbcore-internal: enumeration/hub only!! void usb_destroy_configuration(struct usb_device *dev) { int c, i, j, k; @@ -1685,8 +1853,16 @@ return -1; } -/* +/** + * usb_disconnect - disconnect a device (usbcore-internal) + * @pdev: pointer to device being disconnected + * * Something got disconnected. Get rid of it, and all of its children. + * + * Only hub drivers (including virtual root hub drivers for host + * controllers) should ever call this. + * + * This call is synchronous, and may not be used in an interrupt context. */ void usb_disconnect(struct usb_device **pdev) { @@ -1735,11 +1911,17 @@ usb_free_dev(dev); } -/* +/** + * usb_connect - connects a new device during enumeration (usbcore-internal) + * @dev: partially enumerated device + * * Connect a new USB device. This basically just initializes * the USB device information and sets up the topology - it's * up to the low-level driver to reset the port and actually * do the setup (the upper levels don't know how to do that). + * + * Only hub drivers (including virtual root hub drivers for host + * controllers) should ever call this. */ void usb_connect(struct usb_device *dev) { @@ -1747,6 +1929,9 @@ // FIXME needs locking for SMP!! /* why? this is called only from the hub thread, * which hopefully doesn't run on multiple CPU's simultaneously 8-) + * ... it's also called from modprobe/rmmod/apmd threads as part + * of virtual root hub init/reinit. In the init case, the hub code + * won't have seen this, but not so for reinit ... */ dev->descriptor.bMaxPacketSize0 = 8; /* Start off at 8 bytes */ #ifndef DEVNUM_ROUND_ROBIN @@ -1777,12 +1962,35 @@ #endif #define SET_TIMEOUT 3 +// hub driver only!!! for enumeration int usb_set_address(struct usb_device *dev) { return usb_control_msg(dev, usb_snddefctrl(dev), USB_REQ_SET_ADDRESS, 0, dev->devnum, 0, NULL, 0, HZ * GET_TIMEOUT); } +/** + * usb_get_descriptor - issues a generic GET_DESCRIPTOR request + * @dev: the device whose descriptor is being retrieved + * @type: the descriptor type (USB_DT_*) + * @index: the number of the descriptor + * @buf: where to put the descriptor + * @size: how big is "buf"? + * + * Gets a USB descriptor. Convenience functions exist to simplify + * getting some types of descriptors. Use + * usb_get_device_descriptor() for USB_DT_DEVICE, + * and usb_get_string() or usb_string() for USB_DT_STRING. + * Configuration descriptors (USB_DT_CONFIG) are part of the device + * structure, at least for the current configuration. + * In addition to a number of USB-standard descriptors, some + * devices also use vendor-specific descriptors. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. + */ int usb_get_descriptor(struct usb_device *dev, unsigned char type, unsigned char index, void *buf, int size) { int i = 5; @@ -1800,6 +2008,7 @@ return result; } +// FIXME Doesn't use USB_DT_CLASS ... but hid-core.c expects it this way int usb_get_class_descriptor(struct usb_device *dev, int ifnum, unsigned char type, unsigned char id, void *buf, int size) { @@ -1808,6 +2017,27 @@ (type << 8) + id, ifnum, buf, size, HZ * GET_TIMEOUT); } +/** + * usb_get_string - gets a string descriptor + * @dev: the device whose string descriptor is being retrieved + * @langid: code for language chosen (from string descriptor zero) + * @index: the number of the descriptor + * @buf: where to put the string + * @size: how big is "buf"? + * + * Retrieves a string, encoded using UTF-16LE (Unicode, 16 bits per character, + * in little-endian byte order). + * The usb_string() function will often be a convenient way to turn + * these strings into kernel-printable form. + * + * Strings may be referenced in device, configuration, interface, or other + * descriptors, and could also be used in vendor-specific ways. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. + */ int usb_get_string(struct usb_device *dev, unsigned short langid, unsigned char index, void *buf, int size) { return usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), @@ -1815,6 +2045,24 @@ (USB_DT_STRING << 8) + index, langid, buf, size, HZ * GET_TIMEOUT); } +/** + * usb_get_device_descriptor - (re)reads the device descriptor + * @dev: the device whose device descriptor is being updated + * + * Updates the copy of the device descriptor stored in the device structure, + * which dedicates space for this purpose. Note that several fields are + * converted to the host CPU's byte order: the USB version (bcdUSB), and + * vendors product and version fields (idVendor, idProduct, and bcdDevice). + * That lets device drivers compare against non-byteswapped constants. + * + * There's normally no need to use this call, although some devices + * will change their descriptors after events like updating firmware. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. + */ int usb_get_device_descriptor(struct usb_device *dev) { int ret = usb_get_descriptor(dev, USB_DT_DEVICE, 0, &dev->descriptor, @@ -1828,12 +2076,34 @@ return ret; } +/** + * usb_get_status - issues a GET_STATUS call + * @dev: the device whose status is being checked + * @type: USB_RECIP_*; for device, interface, or endpoint + * @target: zero (for device), else interface or endpoint number + * @data: pointer to two bytes of bitmap data + * + * Returns device, interface, or endpoint status. Normally only of + * interest to see if the device is self powered, or has enabled the + * remote wakeup facility; or whether a bulk or interrupt endpoint + * is halted ("stalled"). + * + * Bits in these status bitmaps are set using the SET_FEATURE request, + * and cleared using the CLEAR_FEATURE request. The usb_clear_halt() + * function should be used to clear halt ("stall") status. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. + */ int usb_get_status(struct usb_device *dev, int type, int target, void *data) { return usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), USB_REQ_GET_STATUS, USB_DIR_IN | type, 0, target, data, 2, HZ * GET_TIMEOUT); } +// FIXME hid-specific !! DOES NOT BELONG HERE int usb_get_protocol(struct usb_device *dev, int ifnum) { unsigned char type; @@ -1847,6 +2117,7 @@ return type; } +// FIXME hid-specific !! DOES NOT BELONG HERE int usb_set_protocol(struct usb_device *dev, int ifnum, int protocol) { return usb_control_msg(dev, usb_sndctrlpipe(dev, 0), @@ -1854,6 +2125,7 @@ protocol, ifnum, NULL, 0, HZ * SET_TIMEOUT); } +// FIXME hid-specific !! DOES NOT BELONG HERE int usb_set_idle(struct usb_device *dev, int ifnum, int duration, int report_id) { return usb_control_msg(dev, usb_sndctrlpipe(dev, 0), @@ -1861,6 +2133,7 @@ (duration << 8) | report_id, ifnum, NULL, 0, HZ * SET_TIMEOUT); } +// hub-only!! void usb_set_maxpacket(struct usb_device *dev) { int i, b; @@ -1890,9 +2163,26 @@ } } -/* - * endp: endpoint number in bits 0-3; - * direction flag in bit 7 (1 = IN, 0 = OUT) +/** + * usb_clear_halt - tells device to clear endpoint halt/stall condition + * @dev: device whose endpoint is halted + * @pipe: endpoint "pipe" being cleared + * + * This is used to clear halt conditions for bulk and interrupt endpoints, + * as reported by URB completion status. Endpoints that are halted are + * sometimes referred to as being "stalled". Such endpoints are unable + * to transmit or receive data until the halt status is cleared. Any URBs + * queued queued for such an endpoint should normally be unlinked before + * clearing the halt condition. + * + * Note that control and isochronous endpoints don't halt, although control + * endpoints report "protocol stall" (for unsupported requests) using the + * same status code used to report a true stall. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. */ int usb_clear_halt(struct usb_device *dev, int pipe) { @@ -1941,6 +2231,33 @@ return 0; } +/** + * usb_set_interface - Makes a particular alternate setting be current + * @dev: the device whose interface is being updated + * @interface: the interface being updated + * @alternate: the setting being chosen. + * + * This is used to enable data transfers on interfaces that may not + * be enabled by default. Not all devices support such configurability. + * + * Within any given configuration, each interface may have several + * alternative settings. These are often used to control levels of + * bandwidth consumption. For example, the default setting for a high + * speed interrupt endpoint may not send more than about 4KBytes per + * microframe, and isochronous endpoints may never be part of a an + * interface's default setting. To access such bandwidth, alternate + * interface setting must be made current. + * + * Note that in the Linux USB subsystem, bandwidth associated with + * an endpoint in a given alternate setting is not reserved until an + * is submitted that needs that bandwidth. Some other operating systems + * allocate bandwidth early, when a configuration is chosen. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. + */ int usb_set_interface(struct usb_device *dev, int interface, int alternate) { struct usb_interface *iface; @@ -1964,6 +2281,35 @@ return 0; } +/** + * usb_set_configuration - Makes a particular device setting be current + * @dev: the device whose configuration is being updated + * @configuration: the configuration being chosen. + * + * This is used to enable non-default device modes. Not all devices + * support this kind of configurability. By default, configuration + * zero is selected after enumeration; many devices only have a single + * configuration. + * + * USB devices may support one or more configurations, which affect + * power consumption and the functionality available. For example, + * the default configuration is limited to using 100mA of bus power, + * so that when certain device functionality requires more power, + * and the device is bus powered, that functionality will be in some + * non-default device configuration. Other device modes may also be + * reflected as configuration options, such as whether two ISDN + * channels are presented as independent 64Kb/s interfaces or as one + * bonded 128Kb/s interface. + * + * Note that USB has an additional level of device configurability, + * associated with interfaces. That configurability is accessed using + * usb_set_interface(). + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. + */ int usb_set_configuration(struct usb_device *dev, int configuration) { int i, ret; @@ -1992,6 +2338,7 @@ return 0; } +// FIXME hid-specific !! DOES NOT BELONG HERE int usb_get_report(struct usb_device *dev, int ifnum, unsigned char type, unsigned char id, void *buf, int size) { return usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), @@ -1999,6 +2346,7 @@ (type << 8) + id, ifnum, buf, size, HZ * GET_TIMEOUT); } +// FIXME hid-specific !! DOES NOT BELONG HERE int usb_set_report(struct usb_device *dev, int ifnum, unsigned char type, unsigned char id, void *buf, int size) { return usb_control_msg(dev, usb_sndctrlpipe(dev, 0), @@ -2006,6 +2354,7 @@ (type << 8) + id, ifnum, buf, size, HZ); } +// hub driver only !! int usb_get_configuration(struct usb_device *dev) { int result; @@ -2106,9 +2455,28 @@ return result; } -/* - * usb_string: - * returns string length (> 0) or error (< 0) +/** + * usb_string - returns ISO 8859-1 version of a string descriptor + * @dev: the device whose string descriptor is being retrieved + * @index: the number of the descriptor + * @buf: where to put the string + * @size: how big is "buf"? + * + * This converts the UTF-16LE encoded strings returned by devices, from + * usb_get_string_descriptor(), to null-terminated ISO-8859-1 encoded ones + * that are more usable in most kernel contexts. Note that all characters + * in the chosen descriptor that can't be encoded using ISO-8859-1 + * are converted to the question mark ("?") character, and this function + * chooses strings in the first language supported by the device. + * + * The ASCII (or, redundantly, "US-ASCII") character set is the seven-bit + * subset of ISO 8859-1. ISO-8859-1 is the eight-bit subset of Unicode, + * and is appropriate for use many uses of English and several other + * Western European languages. (But it doesn't include the "Euro" symbol.) + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns length of the string (>= 0) or usb_control_msg status (< 0). */ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) { @@ -2155,7 +2523,7 @@ if (idx >= size) break; if (tbuf[u+1]) /* high byte */ - buf[idx++] = '?'; /* non-ASCII character */ + buf[idx++] = '?'; /* non ISO-8859-1 character */ else buf[idx++] = tbuf[u]; } @@ -2173,6 +2541,11 @@ * get the ball rolling.. * * Returns 0 for success, != 0 for error. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Only hub drivers (including virtual root hub drivers for host + * controllers) should ever call this. */ int usb_new_device(struct usb_device *dev) { diff -u --recursive --new-file v2.5.0/linux/fs/Makefile linux/fs/Makefile --- v2.5.0/linux/fs/Makefile Mon Nov 12 09:34:16 2001 +++ linux/fs/Makefile Tue Nov 27 09:23:27 2001 @@ -7,12 +7,12 @@ O_TARGET := fs.o -export-objs := filesystems.o open.o dcache.o buffer.o +export-objs := filesystems.o open.o dcache.o buffer.o bio.o mod-subdirs := nls obj-y := open.o read_write.o devices.o file_table.o buffer.o \ - super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \ - fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ + bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \ + namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \ filesystems.o namespace.o seq_file.o diff -u --recursive --new-file v2.5.0/linux/fs/bio.c linux/fs/bio.c --- v2.5.0/linux/fs/bio.c Wed Dec 31 16:00:00 1969 +++ linux/fs/bio.c Tue Nov 27 09:23:27 2001 @@ -0,0 +1,999 @@ +/* + * Copyright (C) 2001 Jens Axboe + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +kmem_cache_t *bio_cachep; +static spinlock_t __cacheline_aligned bio_lock = SPIN_LOCK_UNLOCKED; +static struct bio *bio_pool; +static DECLARE_WAIT_QUEUE_HEAD(bio_pool_wait); +static DECLARE_WAIT_QUEUE_HEAD(biovec_pool_wait); + +struct bio_hash_bucket *bio_hash_table; +unsigned int bio_hash_bits, bio_hash_mask; + +static unsigned int bio_pool_free; + +#define BIOVEC_NR_POOLS 6 + +struct biovec_pool { + int bp_size; + kmem_cache_t *bp_cachep; + wait_queue_head_t bp_wait; +}; + +static struct biovec_pool bvec_list[BIOVEC_NR_POOLS]; + +/* + * if you change this list, also change bvec_alloc or things will + * break badly! + */ +static const int bvec_pool_sizes[BIOVEC_NR_POOLS] = { 1, 4, 16, 64, 128, 256 }; + +#define BIO_MAX_PAGES (bvec_pool_sizes[BIOVEC_NR_POOLS - 1]) + +#ifdef BIO_HASH_PROFILING +static struct bio_hash_stats bio_stats; +#endif + +/* + * optimized for 2^BIO_HASH_SCALE kB block size + */ +#define BIO_HASH_SCALE 3 +#define BIO_HASH_BLOCK(sector) ((sector) >> BIO_HASH_SCALE) + +/* + * pending further testing, grabbed from fs/buffer.c hash so far... + */ +#define __bio_hash(dev,block) \ + (((((dev)<<(bio_hash_bits - 6)) ^ ((dev)<<(bio_hash_bits - 9))) ^ \ + (((block)<<(bio_hash_bits - 6)) ^ ((block) >> 13) ^ \ + ((block) << (bio_hash_bits - 12)))) & bio_hash_mask) + +#define bio_hash(dev, sector) &((bio_hash_table + __bio_hash(dev, BIO_HASH_BLOCK((sector))))->hash) + +#define bio_hash_bucket(dev, sector) (bio_hash_table + __bio_hash(dev, BIO_HASH_BLOCK((sector)))) + +#define __BIO_HASH_RWLOCK(dev, sector) \ + &((bio_hash_table + __bio_hash((dev), BIO_HASH_BLOCK((sector))))->lock) +#define BIO_HASH_RWLOCK(bio) \ + __BIO_HASH_RWLOCK((bio)->bi_dev, (bio)->bi_sector) + +/* + * TODO: change this to use slab reservation scheme once that infrastructure + * is in place... + */ +#define BIO_POOL_SIZE (256) + +void __init bio_hash_init(unsigned long mempages) +{ + unsigned long htable_size, order; + int i; + + /* + * need to experiment on size of hash + */ + mempages >>= 2; + + htable_size = mempages * sizeof(struct bio_hash_bucket *); + for (order = 0; (PAGE_SIZE << order) < htable_size; order++) + ; + + do { + unsigned long tmp = (PAGE_SIZE << order) / sizeof(struct bio_hash_bucket); + + bio_hash_bits = 0; + while ((tmp >>= 1UL) != 0UL) + bio_hash_bits++; + + bio_hash_table = (struct bio_hash_bucket *) __get_free_pages(GFP_ATOMIC, order); + } while (bio_hash_table == NULL && --order > 0); + + if (!bio_hash_table) + panic("Failed to allocate page hash table\n"); + + printk("Bio-cache hash table entries: %ld (order: %ld, %ld bytes)\n", + BIO_HASH_SIZE, order, (PAGE_SIZE << order)); + + for (i = 0; i < BIO_HASH_SIZE; i++) { + struct bio_hash_bucket *hb = &bio_hash_table[i]; + + rwlock_init(&hb->lock); + hb->hash = NULL; + } + + bio_hash_mask = BIO_HASH_SIZE - 1; +} + +inline void __bio_hash_remove(struct bio *bio) +{ + bio_hash_t *entry = &bio->bi_hash; + bio_hash_t **pprev = entry->pprev_hash; + + if (pprev) { + bio_hash_t *nxt = entry->next_hash; + + if (nxt) + nxt->pprev_hash = pprev; + + *pprev = nxt; +#if 1 + entry->next_hash = NULL; +#endif + entry->pprev_hash = NULL; + entry->valid_counter = 0; + bio->bi_hash_desc = NULL; +#ifdef BIO_HASH_PROFILING + atomic_dec(&bio_stats.nr_entries); +#endif + } +} + +inline void bio_hash_remove(struct bio *bio) +{ + rwlock_t *hash_lock = BIO_HASH_RWLOCK(bio); + unsigned long flags; + + write_lock_irqsave(hash_lock, flags); + __bio_hash_remove(bio); + write_unlock_irqrestore(hash_lock, flags); +} + +inline void __bio_hash_add(struct bio *bio, bio_hash_t **hash, + void *hash_desc, unsigned int vc) +{ + bio_hash_t *entry = &bio->bi_hash; + bio_hash_t *nxt = *hash; + + BUG_ON(entry->pprev_hash); + + *hash = entry; + entry->next_hash = nxt; + entry->pprev_hash = hash; + entry->valid_counter = vc; + + if (nxt) + nxt->pprev_hash = &entry->next_hash; + + bio->bi_hash_desc = hash_desc; + +#ifdef BIO_HASH_PROFILING + atomic_inc(&bio_stats.nr_inserts); + atomic_inc(&bio_stats.nr_entries); + { + int entries = atomic_read(&bio_stats.nr_entries); + if (entries > atomic_read(&bio_stats.max_entries)) + atomic_set(&bio_stats.max_entries, entries); + } +#endif +} + +inline void bio_hash_add(struct bio *bio, void *hash_desc, unsigned int vc) +{ + struct bio_hash_bucket *hb =bio_hash_bucket(bio->bi_dev,bio->bi_sector); + unsigned long flags; + + write_lock_irqsave(&hb->lock, flags); + __bio_hash_add(bio, &hb->hash, hash_desc, vc); + write_unlock_irqrestore(&hb->lock, flags); +} + +inline struct bio *__bio_hash_find(kdev_t dev, sector_t sector, + bio_hash_t **hash, unsigned int vc) +{ + bio_hash_t *next = *hash, *entry; + struct bio *bio; + int nr = 0; + +#ifdef BIO_HASH_PROFILING + atomic_inc(&bio_stats.nr_lookups); +#endif + while ((entry = next)) { + next = entry->next_hash; + prefetch(next); + bio = bio_hash_entry(entry); + + if (entry->valid_counter == vc) { + if (bio->bi_sector == sector && bio->bi_dev == dev) { +#ifdef BIO_HASH_PROFILING + if (nr > atomic_read(&bio_stats.max_bucket_size)) + atomic_set(&bio_stats.max_bucket_size, nr); + if (nr <= MAX_PROFILE_BUCKETS) + atomic_inc(&bio_stats.bucket_size[nr]); + atomic_inc(&bio_stats.nr_hits); +#endif + bio_get(bio); + return bio; + } + } + nr++; + } + + return NULL; +} + +inline struct bio *bio_hash_find(kdev_t dev, sector_t sector, unsigned int vc) +{ + struct bio_hash_bucket *hb = bio_hash_bucket(dev, sector); + unsigned long flags; + struct bio *bio; + + read_lock_irqsave(&hb->lock, flags); + bio = __bio_hash_find(dev, sector, &hb->hash, vc); + read_unlock_irqrestore(&hb->lock, flags); + + return bio; +} + +inline int __bio_hash_add_unique(struct bio *bio, bio_hash_t **hash, + void *hash_desc, unsigned int vc) +{ + struct bio *alias = __bio_hash_find(bio->bi_dev, bio->bi_sector, hash, vc); + + if (!alias) { + __bio_hash_add(bio, hash, hash_desc, vc); + return 0; + } + + /* + * release reference to alias + */ + bio_put(alias); + return 1; +} + +inline int bio_hash_add_unique(struct bio *bio, void *hash_desc, unsigned int vc) +{ + struct bio_hash_bucket *hb =bio_hash_bucket(bio->bi_dev,bio->bi_sector); + unsigned long flags; + int ret = 1; + + if (!bio->bi_hash.pprev_hash) { + write_lock_irqsave(&hb->lock, flags); + ret = __bio_hash_add_unique(bio, &hb->hash, hash_desc, vc); + write_unlock_irqrestore(&hb->lock, flags); + } + + return ret; +} + +/* + * increment validity counter on barrier inserts. if it wraps, we must + * prune all existing entries for this device to be completely safe + * + * q->queue_lock must be held by caller + */ +void bio_hash_invalidate(request_queue_t *q, kdev_t dev) +{ + bio_hash_t *hash; + struct bio *bio; + int i; + + if (++q->hash_valid_counter) + return; + + /* + * it wrapped... + */ + for (i = 0; i < (1 << bio_hash_bits); i++) { + struct bio_hash_bucket *hb = &bio_hash_table[i]; + unsigned long flags; + + write_lock_irqsave(&hb->lock, flags); + while ((hash = hb->hash) != NULL) { + bio = bio_hash_entry(hash); + if (bio->bi_dev != dev) + __bio_hash_remove(bio); + } + write_unlock_irqrestore(&hb->lock, flags); + } + + /* + * entries pruned, reset validity counter + */ + q->hash_valid_counter = 1; +} + + +/* + * if need be, add bio_pool_get_irq() to match... + */ +static inline struct bio *__bio_pool_get(void) +{ + struct bio *bio; + + if ((bio = bio_pool)) { + BUG_ON(bio_pool_free <= 0); + bio_pool = bio->bi_next; + bio->bi_next = NULL; + bio_pool_free--; + } + + return bio; +} + +static inline struct bio *bio_pool_get(void) +{ + unsigned long flags; + struct bio *bio; + + spin_lock_irqsave(&bio_lock, flags); + bio = __bio_pool_get(); + BUG_ON(!bio && bio_pool_free); + spin_unlock_irqrestore(&bio_lock, flags); + + return bio; +} + +static inline void bio_pool_put(struct bio *bio) +{ + unsigned long flags; + int wake_pool = 0; + + spin_lock_irqsave(&bio_lock, flags); + + /* + * if the pool has enough free entries, just slab free the bio + */ + if (bio_pool_free < BIO_POOL_SIZE) { + bio->bi_next = bio_pool; + bio_pool = bio; + bio_pool_free++; + wake_pool = waitqueue_active(&bio_pool_wait); + spin_unlock_irqrestore(&bio_lock, flags); + + if (wake_pool) + wake_up_nr(&bio_pool_wait, 1); + } else { + spin_unlock_irqrestore(&bio_lock, flags); + kmem_cache_free(bio_cachep, bio); + } +} + +#define BIO_CAN_WAIT(gfp_mask) \ + (((gfp_mask) & (__GFP_WAIT | __GFP_IO)) == (__GFP_WAIT | __GFP_IO)) + +static inline struct bio_vec_list *bvec_alloc(int gfp_mask, int nr) +{ + struct bio_vec_list *bvl = NULL; + struct biovec_pool *bp; + int idx; + + /* + * see comment near bvec_pool_sizes define! + */ + switch (nr) { + case 1: + idx = 0; + break; + case 2 ... 4: + idx = 1; + break; + case 5 ... 16: + idx = 2; + break; + case 17 ... 64: + idx = 3; + break; + case 65 ... 128: + idx = 4; + break; + case 129 ... 256: + idx = 5; + break; + default: + return NULL; + } + bp = &bvec_list[idx]; + + /* + * ok, so idx now points to the slab we want to allocate from + */ + if ((bvl = kmem_cache_alloc(bp->bp_cachep, gfp_mask))) + goto out_gotit; + + /* + * we need slab reservations for this to be completely + * deadlock free... + */ + if (BIO_CAN_WAIT(gfp_mask)) { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue_exclusive(&bp->bp_wait, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + bvl = kmem_cache_alloc(bp->bp_cachep, gfp_mask); + if (bvl) + goto out_gotit; + + run_task_queue(&tq_disk); + schedule(); + } + remove_wait_queue(&bp->bp_wait, &wait); + __set_current_state(TASK_RUNNING); + } + + /* + * we use bvl_max as index into bvec_pool_sizes, non-slab originated + * bvecs may use it for something else if they use their own + * destructor + */ + if (bvl) { +out_gotit: + memset(bvl, 0, bp->bp_size); + bvl->bvl_max = idx; + } + + return bvl; +} + +/* + * default destructor for a bio allocated with bio_alloc() + */ +void bio_destructor(struct bio *bio) +{ + struct biovec_pool *bp = &bvec_list[bio->bi_io_vec->bvl_max]; + + BUG_ON(bio->bi_io_vec->bvl_max >= BIOVEC_NR_POOLS); + + /* + * cloned bio doesn't own the veclist + */ + if (!(bio->bi_flags & (1 << BIO_CLONED))) + kmem_cache_free(bp->bp_cachep, bio->bi_io_vec); + + bio_pool_put(bio); +} + +static inline struct bio *__bio_alloc(int gfp_mask, bio_destructor_t *dest) +{ + struct bio *bio; + + /* + * first try our reserved pool + */ + if ((bio = bio_pool_get())) + goto gotit; + + /* + * no such luck, try slab alloc + */ + if ((bio = kmem_cache_alloc(bio_cachep, gfp_mask))) + goto gotit; + + /* + * hrmpf, not much luck. if we are allowed to wait, wait on + * bio_pool to be replenished + */ + if (BIO_CAN_WAIT(gfp_mask)) { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue_exclusive(&bio_pool_wait, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if ((bio = bio_pool_get())) + break; + + run_task_queue(&tq_disk); + schedule(); + } + remove_wait_queue(&bio_pool_wait, &wait); + __set_current_state(TASK_RUNNING); + } + + if (bio) { +gotit: + bio->bi_next = NULL; + bio->bi_hash.pprev_hash = NULL; + atomic_set(&bio->bi_cnt, 1); + bio->bi_io_vec = NULL; + bio->bi_flags = 0; + bio->bi_rw = 0; + bio->bi_end_io = NULL; + bio->bi_hash_desc = NULL; + bio->bi_destructor = dest; + } + + return bio; +} + +/** + * bio_alloc - allocate a bio for I/O + * @gfp_mask: the GFP_ mask given to the slab allocator + * @nr_iovecs: number of iovecs to pre-allocate + * + * Description: + * bio_alloc will first try it's on internal pool to satisfy the allocation + * and if that fails fall back to the bio slab cache. In the latter case, + * the @gfp_mask specifies the priority of the allocation. In particular, + * if %__GFP_WAIT is set then we will block on the internal pool waiting + * for a &struct bio to become free. + **/ +struct bio *bio_alloc(int gfp_mask, int nr_iovecs) +{ + struct bio *bio = __bio_alloc(gfp_mask, bio_destructor); + struct bio_vec_list *bvl = NULL; + + if (unlikely(!bio)) + return NULL; + + if (!nr_iovecs || (bvl = bvec_alloc(gfp_mask, nr_iovecs))) { + bio->bi_io_vec = bvl; + return bio; + } + + bio_pool_put(bio); + return NULL; +} + +/* + * queue lock assumed held! + */ +static inline void bio_free(struct bio *bio) +{ + BUG_ON(bio_is_hashed(bio)); + + bio->bi_destructor(bio); +} + +/** + * bio_put - release a reference to a bio + * @bio: bio to release reference to + * + * Description: + * Put a reference to a &struct bio, either one you have gotten with + * bio_alloc or bio_get. The last put of a bio will free it. + **/ +void bio_put(struct bio *bio) +{ + BUG_ON(!atomic_read(&bio->bi_cnt)); + + /* + * last put frees it + */ + if (atomic_dec_and_test(&bio->bi_cnt)) { + BUG_ON(bio->bi_next); + + bio_free(bio); + } +} + +/** + * bio_clone - duplicate a bio + * @bio: bio to clone + * @gfp_mask: allocation priority + * + * Duplicate a &bio. Caller will own the returned bio, but not + * the actual data it points to. Reference count of returned + * bio will be one. + */ +struct bio *bio_clone(struct bio *bio, int gfp_mask) +{ + struct bio *b = bio_alloc(gfp_mask, 0); + + if (b) { + b->bi_io_vec = bio->bi_io_vec; + + b->bi_sector = bio->bi_sector; + b->bi_dev = bio->bi_dev; + b->bi_flags |= 1 << BIO_CLONED; + b->bi_rw = bio->bi_rw; + } + + return b; +} + +/** + * bio_copy - create copy of a bio + * @bio: bio to copy + * @gfp_mask: allocation priority + * + * Create a copy of a &bio. Caller will own the returned bio and + * the actual data it points to. Reference count of returned + * bio will be one. + */ +struct bio *bio_copy(struct bio *bio, int gfp_mask) +{ + struct bio *b = bio_alloc(gfp_mask, bio->bi_io_vec->bvl_cnt); + unsigned long flags = 0; /* gcc silly */ + int i; + + if (b) { + struct bio_vec *bv; + + /* + * iterate iovec list and alloc pages + copy data + */ + bio_for_each_segment(bv, bio, i) { + struct bio_vec *bbv = &b->bi_io_vec->bvl_vec[i]; + char *vfrom, *vto; + + bbv->bv_page = alloc_page(gfp_mask); + if (bbv->bv_page == NULL) + goto oom; + + if (gfp_mask & __GFP_WAIT) { + vfrom = kmap(bv->bv_page); + vto = kmap(bv->bv_page); + } else { + __save_flags(flags); + __cli(); + vfrom = kmap_atomic(bv->bv_page, KM_BIO_IRQ); + vto = kmap_atomic(bv->bv_page, KM_BIO_IRQ); + } + + memcpy(vto + bv->bv_offset, vfrom + bv->bv_offset, bv->bv_len); + if (gfp_mask & __GFP_WAIT) { + kunmap(vto); + kunmap(vfrom); + } else { + kunmap_atomic(vto, KM_BIO_IRQ); + kunmap_atomic(vfrom, KM_BIO_IRQ); + __restore_flags(flags); + } + + bbv->bv_len = bv->bv_len; + bbv->bv_offset = bv->bv_offset; + } + + b->bi_sector = bio->bi_sector; + b->bi_dev = bio->bi_dev; + b->bi_rw = bio->bi_rw; + + b->bi_io_vec->bvl_cnt = bio->bi_io_vec->bvl_cnt; + b->bi_io_vec->bvl_size = bio->bi_io_vec->bvl_size; + } + + return b; + +oom: + while (i >= 0) { + __free_page(b->bi_io_vec->bvl_vec[i].bv_page); + i--; + } + + bio_pool_put(b); + return NULL; +} + +#ifdef BIO_PAGEIO +static int bio_end_io_page(struct bio *bio) +{ + struct page *page = bio_page(bio); + + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + SetPageError(page); + if (!PageError(page)) + SetPageUptodate(page); + + /* + * Run the hooks that have to be done when a page I/O has completed. + */ + if (PageTestandClearDecrAfter(page)) + atomic_dec(&nr_async_pages); + + UnlockPage(page); + bio_put(bio); + return 1; +} +#endif + +static int bio_end_io_kio(struct bio *bio, int nr_sectors) +{ + struct kiobuf *kio = (struct kiobuf *) bio->bi_private; + struct bio_vec_list *bv = bio->bi_io_vec; + int uptodate, done; + + BUG_ON(!bv); + + done = 0; + uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + do { + int sectors = bv->bvl_vec[bv->bvl_idx].bv_len >> 9; + + nr_sectors -= sectors; + + bv->bvl_idx++; + + done = !end_kio_request(kio, uptodate); + + if (bv->bvl_idx == bv->bvl_cnt) + done = 1; + + } while (!done && nr_sectors > 0); + + /* + * all done + */ + if (done) { + bio_hash_remove(bio); + bio_put(bio); + return 0; + } + + return 1; +} + +/* + * obviously doesn't work for stacking drivers, but ll_rw_blk will split + * bio for those + */ +int get_max_segments(kdev_t dev) +{ + int segments = MAX_SEGMENTS; + request_queue_t *q; + + if ((q = blk_get_queue(dev))) + segments = q->max_segments; + + return segments; +} + +int get_max_sectors(kdev_t dev) +{ + int sectors = MAX_SECTORS; + request_queue_t *q; + + if ((q = blk_get_queue(dev))) + sectors = q->max_sectors; + + return sectors; +} + +/** + * ll_rw_kio - submit a &struct kiobuf for I/O + * @rw: %READ or %WRITE + * @kio: the kiobuf to do I/O on + * @dev: target device + * @sector: start location on disk + * + * Description: + * ll_rw_kio will map the page list inside the &struct kiobuf to + * &struct bio and queue them for I/O. The kiobuf given must describe + * a continous range of data, and must be fully prepared for I/O. + **/ +void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, sector_t sector) +{ + int i, offset, size, err, map_i, total_nr_pages, nr_pages; + int max_bytes, max_segments; + struct bio_vec *bvec; + struct bio *bio; + + err = 0; + if ((rw & WRITE) && is_read_only(dev)) { + printk("ll_rw_bio: WRITE to ro device %s\n", kdevname(dev)); + err = -EPERM; + goto out; + } + + if (!kio->nr_pages) { + err = -EINVAL; + goto out; + } + + /* + * rudimentary max sectors/segments checks and setup. once we are + * sure that drivers can handle requests that cannot be completed in + * one go this will die + */ + max_bytes = get_max_sectors(dev) << 9; + max_segments = get_max_segments(dev); + if ((max_bytes >> PAGE_SHIFT) < (max_segments + 1)) + max_segments = (max_bytes >> PAGE_SHIFT) + 1; + + if (max_segments > BIO_MAX_PAGES) + max_segments = BIO_MAX_PAGES; + + /* + * maybe kio is bigger than the max we can easily map into a bio. + * if so, split it up in appropriately sized chunks. + */ + total_nr_pages = kio->nr_pages; + offset = kio->offset & ~PAGE_MASK; + size = kio->length; + + /* + * set I/O count to number of pages for now + */ + atomic_set(&kio->io_count, total_nr_pages); + + map_i = 0; + +next_chunk: + if ((nr_pages = total_nr_pages) > max_segments) + nr_pages = max_segments; + + /* + * allocate bio and do initial setup + */ + if ((bio = bio_alloc(GFP_NOIO, nr_pages)) == NULL) { + err = -ENOMEM; + goto out; + } + + bio->bi_sector = sector; + bio->bi_dev = dev; + bio->bi_io_vec->bvl_idx = 0; + bio->bi_flags |= 1 << BIO_PREBUILT; + bio->bi_end_io = bio_end_io_kio; + bio->bi_private = kio; + + bvec = &bio->bi_io_vec->bvl_vec[0]; + for (i = 0; i < nr_pages; i++, bvec++, map_i++) { + int nbytes = PAGE_SIZE - offset; + + if (nbytes > size) + nbytes = size; + + BUG_ON(kio->maplist[map_i] == NULL); + + if (bio->bi_io_vec->bvl_size + nbytes > max_bytes) + goto queue_io; + + bio->bi_io_vec->bvl_cnt++; + bio->bi_io_vec->bvl_size += nbytes; + + bvec->bv_page = kio->maplist[map_i]; + bvec->bv_len = nbytes; + bvec->bv_offset = offset; + + /* + * kiobuf only has an offset into the first page + */ + offset = 0; + + sector += nbytes >> 9; + size -= nbytes; + total_nr_pages--; + } + +queue_io: + submit_bio(rw, bio); + + if (total_nr_pages) + goto next_chunk; + + if (size) { + printk("ll_rw_kio: size %d left (kio %d)\n", size, kio->length); + BUG(); + } + +out: + if (err) + kio->errno = err; +} + +int bio_endio(struct bio *bio, int uptodate, int nr_sectors) +{ + if (uptodate) + set_bit(BIO_UPTODATE, &bio->bi_flags); + else + clear_bit(BIO_UPTODATE, &bio->bi_flags); + + return bio->bi_end_io(bio, nr_sectors); +} + +static int __init bio_init_pool(void) +{ + struct bio *bio; + int i; + + for (i = 0; i < BIO_POOL_SIZE; i++) { + bio = kmem_cache_alloc(bio_cachep, GFP_ATOMIC); + if (!bio) + panic("bio: cannot init bio pool\n"); + + bio_pool_put(bio); + } + + return i; +} + +static void __init biovec_init_pool(void) +{ + char name[16]; + int i, size; + + memset(&bvec_list, 0, sizeof(bvec_list)); + + for (i = 0; i < BIOVEC_NR_POOLS; i++) { + struct biovec_pool *bp = &bvec_list[i]; + + size = bvec_pool_sizes[i] * sizeof(struct bio_vec); + size += sizeof(struct bio_vec_list); + + printk("biovec: init pool %d, %d entries, %d bytes\n", i, + bvec_pool_sizes[i], size); + + snprintf(name, sizeof(name) - 1,"biovec-%d",bvec_pool_sizes[i]); + bp->bp_cachep = kmem_cache_create(name, size, 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + + if (!bp->bp_cachep) + panic("biovec: can't init slab pools\n"); + + bp->bp_size = size; + init_waitqueue_head(&bp->bp_wait); + } +} + +static int __init init_bio(void) +{ + int nr; + + bio_cachep = kmem_cache_create("bio", sizeof(struct bio), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!bio_cachep) + panic("bio: can't create bio_cachep slab cache\n"); + + nr = bio_init_pool(); + printk("BIO: pool of %d setup, %uKb (%d bytes/bio)\n", nr, nr * sizeof(struct bio) >> 10, sizeof(struct bio)); + + biovec_init_pool(); + +#ifdef BIO_HASH_PROFILING + memset(&bio_stats, 0, sizeof(bio_stats)); +#endif + + return 0; +} + +int bio_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) +{ +#ifdef BIO_HASH_PROFILING + switch (cmd) { + case BLKHASHPROF: + if (copy_to_user((struct bio_hash_stats *) arg, &bio_stats, sizeof(bio_stats))) + return -EFAULT; + break; + case BLKHASHCLEAR: + memset(&bio_stats, 0, sizeof(bio_stats)); + break; + default: + return -ENOTTY; + } + +#endif + return 0; +} + +module_init(init_bio); + +EXPORT_SYMBOL(bio_alloc); +EXPORT_SYMBOL(bio_put); +EXPORT_SYMBOL(ll_rw_kio); +EXPORT_SYMBOL(bio_hash_remove); +EXPORT_SYMBOL(bio_hash_add); +EXPORT_SYMBOL(bio_hash_add_unique); +EXPORT_SYMBOL(bio_endio); diff -u --recursive --new-file v2.5.0/linux/fs/block_dev.c linux/fs/block_dev.c --- v2.5.0/linux/fs/block_dev.c Wed Nov 21 14:07:25 2001 +++ linux/fs/block_dev.c Tue Nov 27 09:23:27 2001 @@ -102,7 +102,7 @@ return 0; } -static int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh, int create) +static int blkdev_get_block(struct inode * inode, sector_t iblock, struct buffer_head * bh, int create) { if (iblock >= max_block(inode->i_rdev)) return -EIO; diff -u --recursive --new-file v2.5.0/linux/fs/buffer.c linux/fs/buffer.c --- v2.5.0/linux/fs/buffer.c Wed Nov 21 14:40:17 2001 +++ linux/fs/buffer.c Tue Nov 27 09:23:27 2001 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -548,7 +549,7 @@ spin_unlock(&lru_list_lock); } -struct buffer_head * get_hash_table(kdev_t dev, int block, int size) +struct buffer_head * get_hash_table(kdev_t dev, sector_t block, int size) { struct buffer_head *bh, **p = &hash(dev, block); @@ -1014,7 +1015,7 @@ * 14.02.92: changed it to sync dirty buffers a bit: better performance * when the filesystem starts to get full of dirty blocks (I hope). */ -struct buffer_head * getblk(kdev_t dev, int block, int size) +struct buffer_head * getblk(kdev_t dev, sector_t block, int size) { for (;;) { struct buffer_head * bh; @@ -1988,7 +1989,8 @@ goto done; } -int generic_block_bmap(struct address_space *mapping, long block, get_block_t *get_block) +sector_t generic_block_bmap(struct address_space *mapping, sector_t block, + get_block_t *get_block) { struct buffer_head tmp; struct inode *inode = mapping->host; @@ -2001,7 +2003,7 @@ int generic_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize, get_block_t * get_block) { int i, nr_blocks, retval; - unsigned long * blocks = iobuf->blocks; + sector_t *blocks = iobuf->blocks; nr_blocks = iobuf->length / blocksize; /* build the blocklist */ @@ -2012,7 +2014,7 @@ bh.b_dev = inode->i_dev; bh.b_size = blocksize; - retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1); + retval = get_block(inode, blocknr, &bh, rw & 1); if (retval) goto out; @@ -2033,64 +2035,13 @@ blocks[i] = bh.b_blocknr; } - retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize); + retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, blocks, blocksize); out: return retval; } /* - * IO completion routine for a buffer_head being used for kiobuf IO: we - * can't dispatch the kiobuf callback until io_count reaches 0. - */ - -static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate) -{ - struct kiobuf *kiobuf; - - mark_buffer_uptodate(bh, uptodate); - - kiobuf = bh->b_private; - unlock_buffer(bh); - end_kio_request(kiobuf, uptodate); -} - -/* - * For brw_kiovec: submit a set of buffer_head temporary IOs and wait - * for them to complete. Clean up the buffer_heads afterwards. - */ - -static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size) -{ - int iosize, err; - int i; - struct buffer_head *tmp; - - iosize = 0; - err = 0; - - for (i = nr; --i >= 0; ) { - iosize += size; - tmp = bh[i]; - if (buffer_locked(tmp)) { - wait_on_buffer(tmp); - } - - if (!buffer_uptodate(tmp)) { - /* We are traversing bh'es in reverse order so - clearing iosize on error calculates the - amount of IO before the first error. */ - iosize = 0; - err = -EIO; - } - } - - if (iosize) - return iosize; - return err; -} - -/* * Start I/O on a physical range of kernel memory, defined by a vector * of kiobuf structs (much like a user-space iovec list). * @@ -2101,22 +2052,13 @@ * It is up to the caller to make sure that there are enough blocks * passed in to completely map the iobufs to disk. */ - -int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - kdev_t dev, unsigned long b[], int size) +int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], kdev_t dev, sector_t b[], + int size) { - int err; - int length; int transferred; int i; - int bufind; - int pageind; - int bhind; - int offset; - unsigned long blocknr; - struct kiobuf * iobuf = NULL; - struct page * map; - struct buffer_head *tmp, **bhs = NULL; + int err; + struct kiobuf * iobuf; if (!nr) return 0; @@ -2126,8 +2068,7 @@ */ for (i = 0; i < nr; i++) { iobuf = iovec[i]; - if ((iobuf->offset & (size-1)) || - (iobuf->length & (size-1))) + if ((iobuf->offset & (size-1)) || (iobuf->length & (size-1))) return -EINVAL; if (!iobuf->nr_pages) panic("brw_kiovec: iobuf not initialised"); @@ -2136,94 +2077,28 @@ /* * OK to walk down the iovec doing page IO on each page we find. */ - bufind = bhind = transferred = err = 0; for (i = 0; i < nr; i++) { iobuf = iovec[i]; - offset = iobuf->offset; - length = iobuf->length; iobuf->errno = 0; - if (!bhs) - bhs = iobuf->bh; - - for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { - map = iobuf->maplist[pageind]; - if (!map) { - err = -EFAULT; - goto finished; - } - - while (length > 0) { - blocknr = b[bufind++]; - if (blocknr == -1UL) { - if (rw == READ) { - /* there was an hole in the filesystem */ - memset(kmap(map) + offset, 0, size); - flush_dcache_page(map); - kunmap(map); - - transferred += size; - goto skip_block; - } else - BUG(); - } - tmp = bhs[bhind++]; - tmp->b_size = size; - set_bh_page(tmp, map, offset); - tmp->b_this_page = tmp; - - init_buffer(tmp, end_buffer_io_kiobuf, iobuf); - tmp->b_dev = dev; - tmp->b_blocknr = blocknr; - tmp->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req); - - if (rw == WRITE) { - set_bit(BH_Uptodate, &tmp->b_state); - clear_bit(BH_Dirty, &tmp->b_state); - } else - set_bit(BH_Uptodate, &tmp->b_state); - - atomic_inc(&iobuf->io_count); - submit_bh(rw, tmp); - /* - * Wait for IO if we have got too much - */ - if (bhind >= KIO_MAX_SECTORS) { - kiobuf_wait_for_io(iobuf); /* wake-one */ - err = wait_kio(rw, bhind, bhs, size); - if (err >= 0) - transferred += err; - else - goto finished; - bhind = 0; - } + ll_rw_kio(rw, iobuf, dev, b[i] * (size >> 9)); + } - skip_block: - length -= size; - offset += size; - - if (offset >= PAGE_SIZE) { - offset = 0; - break; - } - } /* End of block loop */ - } /* End of page loop */ - } /* End of iovec loop */ - - /* Is there any IO still left to submit? */ - if (bhind) { - kiobuf_wait_for_io(iobuf); /* wake-one */ - err = wait_kio(rw, bhind, bhs, size); - if (err >= 0) - transferred += err; - else - goto finished; - } - - finished: - if (transferred) - return transferred; - return err; + /* + * now they are all submitted, wait for completion + */ + transferred = 0; + err = 0; + for (i = 0; i < nr; i++) { + iobuf = iovec[i]; + kiobuf_wait_for_io(iobuf); + if (iobuf->errno && !err) + err = iobuf->errno; + if (!err) + transferred += iobuf->length; + } + + return err ? err : transferred; } /* @@ -2238,7 +2113,7 @@ * FIXME: we need a swapper_inode->get_block function to remove * some of the bmap kludges and interface ugliness here. */ -int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size) +int brw_page(int rw, struct page *page, kdev_t dev, sector_t b[], int size) { struct buffer_head *head, *bh; @@ -2326,7 +2201,7 @@ struct buffer_head *bh; page = find_or_create_page(bdev->bd_inode->i_mapping, index, GFP_NOFS); - if (IS_ERR(page)) + if (!page) return NULL; if (!PageLocked(page)) @@ -2489,6 +2364,9 @@ int try_to_free_buffers(struct page * page, unsigned int gfp_mask) { struct buffer_head * tmp, * bh = page->buffers; + + BUG_ON(!PageLocked(page)); + BUG_ON(!bh); cleaned_buffers_try_again: spin_lock(&lru_list_lock); diff -u --recursive --new-file v2.5.0/linux/fs/devfs/base.c linux/fs/devfs/base.c --- v2.5.0/linux/fs/devfs/base.c Sat Nov 3 10:06:38 2001 +++ linux/fs/devfs/base.c Sat Nov 24 13:06:43 2001 @@ -545,21 +545,23 @@ 20010919 Richard Gooch Set inode->i_mapping->a_ops for block nodes in . v0.116 - 20010927 Richard Gooch - Went back to global rwsem for symlinks (refcount scheme no good) - v0.117 20011008 Richard Gooch Fixed overrun in by removing function (not needed). - v0.118 20011009 Richard Gooch Fixed buffer underrun in . - Moved down_read() from to - v0.119 20011029 Richard Gooch Fixed race in when setting event mask. - 20011103 Richard Gooch - Avoid deadlock in by using temporary buffer. - v0.120 + 20011114 Richard Gooch + First release of new locking code. + v1.0 + 20011117 Richard Gooch + Discard temporary buffer, now use "%s" for dentry names. + 20011118 Richard Gooch + Don't generate path in : use fake entry instead. + Use "existing" directory in <_devfs_make_parent_for_leaf>. + 20011122 Richard Gooch + Use slab cache rather than fixed buffer for devfsd events. + v1.1 */ #include #include @@ -592,7 +594,7 @@ #include #include -#define DEVFS_VERSION "0.120 (20011103)" +#define DEVFS_VERSION "1.1 (20011122)" #define DEVFS_NAME "devfs" @@ -605,27 +607,30 @@ # define FALSE 0 #endif -#define IS_HIDDEN(de) (( ((de)->hide && !is_devfsd_or_child(fs_info)) || !(de)->registered)) +#define MODE_DIR (S_IFDIR | S_IWUSR | S_IRUGO | S_IXUGO) -#define DEBUG_NONE 0x00000 -#define DEBUG_MODULE_LOAD 0x00001 -#define DEBUG_REGISTER 0x00002 -#define DEBUG_UNREGISTER 0x00004 -#define DEBUG_SET_FLAGS 0x00008 -#define DEBUG_S_PUT 0x00010 -#define DEBUG_I_LOOKUP 0x00020 -#define DEBUG_I_CREATE 0x00040 -#define DEBUG_I_GET 0x00080 -#define DEBUG_I_CHANGE 0x00100 -#define DEBUG_I_UNLINK 0x00200 -#define DEBUG_I_RLINK 0x00400 -#define DEBUG_I_FLINK 0x00800 -#define DEBUG_I_MKNOD 0x01000 -#define DEBUG_F_READDIR 0x02000 -#define DEBUG_D_DELETE 0x04000 -#define DEBUG_D_RELEASE 0x08000 -#define DEBUG_D_IPUT 0x10000 -#define DEBUG_ALL 0xfffff +#define IS_HIDDEN(de) ( (de)->hide && !is_devfsd_or_child(fs_info) ) + +#define DEBUG_NONE 0x0000000 +#define DEBUG_MODULE_LOAD 0x0000001 +#define DEBUG_REGISTER 0x0000002 +#define DEBUG_UNREGISTER 0x0000004 +#define DEBUG_FREE 0x0000008 +#define DEBUG_SET_FLAGS 0x0000010 +#define DEBUG_S_READ 0x0000100 /* Break */ +#define DEBUG_I_LOOKUP 0x0001000 /* Break */ +#define DEBUG_I_CREATE 0x0002000 +#define DEBUG_I_GET 0x0004000 +#define DEBUG_I_CHANGE 0x0008000 +#define DEBUG_I_UNLINK 0x0010000 +#define DEBUG_I_RLINK 0x0020000 +#define DEBUG_I_FLINK 0x0040000 +#define DEBUG_I_MKNOD 0x0080000 +#define DEBUG_F_READDIR 0x0100000 /* Break */ +#define DEBUG_D_DELETE 0x1000000 /* Break */ +#define DEBUG_D_RELEASE 0x2000000 +#define DEBUG_D_IPUT 0x4000000 +#define DEBUG_ALL 0xfffffff #define DEBUG_DISABLED DEBUG_NONE #define OPTION_NONE 0x00 @@ -638,9 +643,11 @@ struct directory_type { + rwlock_t lock; /* Lock for searching(R)/updating(W) */ struct devfs_entry *first; struct devfs_entry *last; - unsigned int num_removable; + unsigned short num_removable; /* Lock for writing but not reading */ + unsigned char no_more_additions:1; }; struct file_type @@ -656,8 +663,6 @@ struct fcb_type /* File, char, block type */ { - uid_t default_uid; - gid_t default_gid; void *ops; union { @@ -678,20 +683,13 @@ char *linkname; /* This is NULL-terminated */ }; -struct fifo_type -{ - uid_t uid; - gid_t gid; -}; - -struct devfs_inode /* This structure is for "persistent" inode storage */ +struct devfs_inode /* This structure is for "persistent" inode storage */ { + struct dentry *dentry; time_t atime; time_t mtime; time_t ctime; - unsigned int ino; /* Inode number as seen in the VFS */ - struct dentry *dentry; - umode_t mode; + unsigned int ino; /* Inode number as seen in the VFS */ uid_t uid; gid_t gid; }; @@ -699,12 +697,13 @@ struct devfs_entry { void *info; + atomic_t refcount; /* When this drops to zero, it's unused */ union { struct directory_type dir; struct fcb_type fcb; struct symlink_type symlink; - struct fifo_type fifo; + const char *name; /* Only used for (mode == 0) */ } u; struct devfs_entry *prev; /* Previous entry in the parent directory */ @@ -713,12 +712,11 @@ struct devfs_entry *slave; /* Another entry to unregister */ struct devfs_inode inode; umode_t mode; - unsigned short namelen; /* I think 64k+ filenames are a way off... */ - unsigned char registered:1; + unsigned short namelen; /* I think 64k+ filenames are a way off... */ unsigned char hide:1; - unsigned char no_persistence:1; - char name[1]; /* This is just a dummy: the allocated array is - bigger. This is NULL-terminated */ + unsigned char vfs_created:1; /* Whether created by driver or VFS */ + char name[1]; /* This is just a dummy: the allocated array + is bigger. This is NULL-terminated */ }; /* The root of the device tree */ @@ -726,35 +724,38 @@ struct devfsd_buf_entry { - void *data; - unsigned int type; + struct devfs_entry *de; /* The name is generated with this */ + unsigned short type; /* The type of event */ umode_t mode; uid_t uid; gid_t gid; + struct devfsd_buf_entry *next; }; -struct fs_info /* This structure is for the mounted devfs */ +struct fs_info /* This structure is for the mounted devfs */ { struct super_block *sb; - volatile struct devfsd_buf_entry *devfsd_buffer; - spinlock_t devfsd_buffer_lock; - volatile unsigned int devfsd_buf_in; - volatile unsigned int devfsd_buf_out; + spinlock_t devfsd_buffer_lock; /* Lock when inserting/deleting events */ + struct devfsd_buf_entry *devfsd_first_event; + struct devfsd_buf_entry *devfsd_last_event; volatile int devfsd_sleeping; volatile struct task_struct *devfsd_task; volatile struct file *devfsd_file; struct devfsd_notify_struct *devfsd_info; volatile unsigned long devfsd_event_mask; atomic_t devfsd_overrun_count; - wait_queue_head_t devfsd_wait_queue; - wait_queue_head_t revalidate_wait_queue; + wait_queue_head_t devfsd_wait_queue; /* Wake devfsd on input */ + wait_queue_head_t revalidate_wait_queue; /* Wake when devfsd sleeps */ }; static struct fs_info fs_info = {devfsd_buffer_lock: SPIN_LOCK_UNLOCKED}; -static const int devfsd_buf_size = PAGE_SIZE / sizeof(struct devfsd_buf_entry); +static kmem_cache_t *devfsd_buf_cache; #ifdef CONFIG_DEVFS_DEBUG static unsigned int devfs_debug_init __initdata = DEBUG_NONE; static unsigned int devfs_debug = DEBUG_NONE; +static spinlock_t stat_lock = SPIN_LOCK_UNLOCKED; +static unsigned int stat_num_entries; +static unsigned int stat_num_bytes; #endif #ifdef CONFIG_DEVFS_MOUNT @@ -763,19 +764,23 @@ static unsigned int boot_options = OPTION_NONE; #endif -static DECLARE_RWSEM (symlink_rwsem); - /* Forward function declarations */ -static struct devfs_entry *search_for_entry (struct devfs_entry *dir, - const char *name, - unsigned int namelen, int mkdir, - int mkfile, int *is_new, - int traverse_symlink); +static devfs_handle_t _devfs_walk_path (struct devfs_entry *dir, + const char *name, int namelen, + int traverse_symlink); static ssize_t devfsd_read (struct file *file, char *buf, size_t len, loff_t *ppos); static int devfsd_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); static int devfsd_close (struct inode *inode, struct file *file); +#ifdef CONFIG_DEVFS_DEBUG +static int stat_read (struct file *file, char *buf, size_t len, + loff_t *ppos); +static struct file_operations stat_fops = +{ + read: stat_read, +}; +#endif /* Devfs daemon file operations */ @@ -791,46 +796,95 @@ /** - * search_for_entry_in_dir - Search for a devfs entry inside another devfs entry. - * @parent: The parent devfs entry. - * @name: The name of the entry. + * devfs_get - Get a reference to a devfs entry. + * @de: The devfs entry. + */ + +static struct devfs_entry *devfs_get (struct devfs_entry *de) +{ + if (de) atomic_inc (&de->refcount); + return de; +} /* End Function devfs_get */ + +/** + * devfs_put - Put (release) a reference to a devfs entry. + * @de: The devfs entry. + */ + +static void devfs_put (struct devfs_entry *de) +{ + if (!de) return; + if ( !atomic_dec_and_test (&de->refcount) ) return; + if (de == root_entry) + OOPS ("%s: devfs_put(): root entry being freed\n", DEVFS_NAME); +#ifdef CONFIG_DEVFS_DEBUG + if (devfs_debug & DEBUG_FREE) + printk ("%s: devfs_put(%s): de: %p, parent: %p \"%s\"\n", + DEVFS_NAME, de->name, de, de->parent, + de->parent ? de->parent->name : "no parent"); +#endif + if ( S_ISLNK (de->mode) ) kfree (de->u.symlink.linkname); + if ( ( S_ISCHR (de->mode) || S_ISBLK (de->mode) ) && de->u.fcb.autogen ) + { + devfs_dealloc_devnum ( S_ISCHR (de->mode) ? DEVFS_SPECIAL_CHR : + DEVFS_SPECIAL_BLK, + MKDEV (de->u.fcb.u.device.major, + de->u.fcb.u.device.minor) ); + } +#ifdef CONFIG_DEVFS_DEBUG + spin_lock (&stat_lock); + --stat_num_entries; + stat_num_bytes -= sizeof *de + de->namelen; + if ( S_ISLNK (de->mode) ) stat_num_bytes -= de->u.symlink.length + 1; + spin_unlock (&stat_lock); +#endif + kfree (de); +} /* End Function devfs_put */ + +/** + * _devfs_search_dir - Search for a devfs entry in a directory. + * @dir: The directory to search. + * @name: The name of the entry to search for. * @namelen: The number of characters in @name. - * @traverse_symlink: If %TRUE then the entry is traversed if it is a symlink. * - * Search for a devfs entry inside another devfs entry and returns a pointer - * to the entry on success, else %NULL. + * Search for a devfs entry in a directory and returns a pointer to the entry + * on success, else %NULL. The directory must be locked already. + * An implicit devfs_get() is performed on the returned entry. */ -static struct devfs_entry *search_for_entry_in_dir (struct devfs_entry *parent, - const char *name, - unsigned int namelen, - int traverse_symlink) +static struct devfs_entry *_devfs_search_dir (struct devfs_entry *dir, + const char *name, + unsigned int namelen) { - struct devfs_entry *curr, *retval; + struct devfs_entry *curr; - if ( !S_ISDIR (parent->mode) ) + if ( !S_ISDIR (dir->mode) ) { - printk ("%s: entry is not a directory\n", DEVFS_NAME); + printk ("%s: search_dir(%s): not a directory\n", DEVFS_NAME,dir->name); return NULL; } - for (curr = parent->u.dir.first; curr != NULL; curr = curr->next) + for (curr = dir->u.dir.first; curr != NULL; curr = curr->next) { if (curr->namelen != namelen) continue; if (memcmp (curr->name, name, namelen) == 0) break; /* Not found: try the next one */ } - if (curr == NULL) return NULL; - if (!S_ISLNK (curr->mode) || !traverse_symlink) return curr; - /* Need to follow the link: this is a stack chomper */ - retval = curr->registered ? - search_for_entry (parent, curr->u.symlink.linkname, - curr->u.symlink.length, FALSE, FALSE, NULL, - TRUE) : NULL; - return retval; -} /* End Function search_for_entry_in_dir */ + return devfs_get (curr); +} /* End Function _devfs_search_dir */ + + +/** + * _devfs_alloc_entry - Allocate a devfs entry. + * @name: The name of the entry. + * @namelen: The number of characters in @name. + * + * Allocate a devfs entry and returns a pointer to the entry on success, else + * %NULL. + */ -static struct devfs_entry *create_entry (struct devfs_entry *parent, - const char *name,unsigned int namelen) +static struct devfs_entry *_devfs_alloc_entry (const char *name, + unsigned int namelen, + umode_t mode) { struct devfs_entry *new; static unsigned long inode_counter = FIRST_INODE; @@ -839,168 +893,270 @@ if ( name && (namelen < 1) ) namelen = strlen (name); if ( ( new = kmalloc (sizeof *new + namelen, GFP_KERNEL) ) == NULL ) return NULL; - /* Magic: this will set the ctime to zero, thus subsequent lookups will - trigger the call to */ memset (new, 0, sizeof *new + namelen); + new->mode = mode; + if ( S_ISDIR (mode) ) rwlock_init (&new->u.dir.lock); + atomic_set (&new->refcount, 1); spin_lock (&counter_lock); new->inode.ino = inode_counter++; spin_unlock (&counter_lock); - new->parent = parent; if (name) memcpy (new->name, name, namelen); new->namelen = namelen; - if (parent == NULL) return new; - new->prev = parent->u.dir.last; - /* Insert into the parent directory's list of children */ - if (parent->u.dir.first == NULL) parent->u.dir.first = new; - else parent->u.dir.last->next = new; - parent->u.dir.last = new; +#ifdef CONFIG_DEVFS_DEBUG + spin_lock (&stat_lock); + ++stat_num_entries; + stat_num_bytes += sizeof *new + namelen; + spin_unlock (&stat_lock); +#endif return new; -} /* End Function create_entry */ +} /* End Function _devfs_alloc_entry */ + -static void update_devfs_inode_from_entry (struct devfs_entry *de) +/** + * _devfs_append_entry - Append a devfs entry to a directory's child list. + * @dir: The directory to add to. + * @de: The devfs entry to append. + * @removable: If TRUE, increment the count of removable devices for %dir. + * @old_de: If an existing entry exists, it will be written here. This may + * be %NULL. + * + * Append a devfs entry to a directory's list of children, checking first to + * see if an entry of the same name exists. The directory will be locked. + * The value 0 is returned on success, else a negative error code. + * On failure, an implicit devfs_put() is performed on %de. + */ + +static int _devfs_append_entry (devfs_handle_t dir, devfs_handle_t de, + int removable, devfs_handle_t *old_de) { - if (de == NULL) return; - if ( S_ISDIR (de->mode) ) - { - de->inode.mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; - de->inode.uid = 0; - de->inode.gid = 0; - } - else if ( S_ISLNK (de->mode) ) - { - de->inode.mode = S_IFLNK | S_IRUGO | S_IXUGO; - de->inode.uid = 0; - de->inode.gid = 0; - } - else if ( S_ISFIFO (de->mode) ) + int retval; + + if (old_de) *old_de = NULL; + if ( !S_ISDIR (dir->mode) ) { - de->inode.mode = de->mode; - de->inode.uid = de->u.fifo.uid; - de->inode.gid = de->u.fifo.gid; + printk ("%s: append_entry(%s): dir: \"%s\" is not a directory\n", + DEVFS_NAME, de->name, dir->name); + devfs_put (de); + return -ENOTDIR; } + write_lock (&dir->u.dir.lock); + if (dir->u.dir.no_more_additions) retval = -ENOENT; else { - if (de->u.fcb.auto_owner) - de->inode.mode = (de->mode & ~S_IALLUGO) | S_IRUGO | S_IWUGO; - else de->inode.mode = de->mode; - de->inode.uid = de->u.fcb.default_uid; - de->inode.gid = de->u.fcb.default_gid; + struct devfs_entry *old; + + old = _devfs_search_dir (dir, de->name, de->namelen); + if (old_de) *old_de = old; + else devfs_put (old); + if (old == NULL) + { + de->parent = dir; + de->prev = dir->u.dir.last; + /* Append to the directory's list of children */ + if (dir->u.dir.first == NULL) dir->u.dir.first = de; + else dir->u.dir.last->next = de; + dir->u.dir.last = de; + if (removable) ++dir->u.dir.num_removable; + retval = 0; + } + else retval = -EEXIST; } -} /* End Function update_devfs_inode_from_entry */ + write_unlock (&dir->u.dir.lock); + if (retval) devfs_put (de); + return retval; +} /* End Function _devfs_append_entry */ + /** - * get_root_entry - Get the root devfs entry. + * _devfs_get_root_entry - Get the root devfs entry. * * Returns the root devfs entry on success, else %NULL. */ -static struct devfs_entry *get_root_entry (void) +static struct devfs_entry *_devfs_get_root_entry (void) { kdev_t devnum; struct devfs_entry *new; + static spinlock_t root_lock = SPIN_LOCK_UNLOCKED; /* Always ensure the root is created */ - if (root_entry != NULL) return root_entry; - if ( ( root_entry = create_entry (NULL, NULL, 0) ) == NULL ) return NULL; - root_entry->mode = S_IFDIR; - /* Force an inode update, because lookup() is never done for the root */ - update_devfs_inode_from_entry (root_entry); - root_entry->registered = TRUE; + if (root_entry) return root_entry; + if ( ( new = _devfs_alloc_entry (NULL, 0,MODE_DIR) ) == NULL ) return NULL; + spin_lock (&root_lock); + if (root_entry) + { + spin_unlock (&root_lock); + devfs_put (new); + return (root_entry); + } + root_entry = new; + spin_unlock (&root_lock); /* And create the entry for ".devfsd" */ - if ( ( new = create_entry (root_entry, ".devfsd", 0) ) == NULL ) - return NULL; + if ( ( new = _devfs_alloc_entry (".devfsd", 0, S_IFCHR |S_IRUSR |S_IWUSR) ) + == NULL ) return NULL; devnum = devfs_alloc_devnum (DEVFS_SPECIAL_CHR); new->u.fcb.u.device.major = MAJOR (devnum); new->u.fcb.u.device.minor = MINOR (devnum); - new->mode = S_IFCHR | S_IRUSR | S_IWUSR; - new->u.fcb.default_uid = 0; - new->u.fcb.default_gid = 0; new->u.fcb.ops = &devfsd_fops; - new->registered = TRUE; + _devfs_append_entry (root_entry, new, FALSE, NULL); +#ifdef CONFIG_DEVFS_DEBUG + if ( ( new = _devfs_alloc_entry (".stat", 0, S_IFCHR | S_IRUGO | S_IWUGO) ) + == NULL ) return NULL; + devnum = devfs_alloc_devnum (DEVFS_SPECIAL_CHR); + new->u.fcb.u.device.major = MAJOR (devnum); + new->u.fcb.u.device.minor = MINOR (devnum); + new->u.fcb.ops = &stat_fops; + _devfs_append_entry (root_entry, new, FALSE, NULL); +#endif return root_entry; -} /* End Function get_root_entry */ +} /* End Function _devfs_get_root_entry */ /** - * search_for_entry - Search for an entry in the devfs tree. - * @dir: The parent directory to search from. If this is %NULL the root is used - * @name: The name of the entry. - * @namelen: The number of characters in @name. - * @mkdir: If %TRUE intermediate directories are created as needed. - * @mkfile: If %TRUE the file entry is created if it doesn't exist. - * @is_new: If the returned entry was newly made, %TRUE is written here. If - * this is %NULL nothing is written here. - * @traverse_symlink: If %TRUE then symbolic links are traversed. + * _devfs_descend - Descend down a tree using the next component name. + * @dir: The directory to search. + * @name: The component name to search for. + * @namelen: The length of %name. + * @next_pos: The position of the next '/' or '\0' is written here. * - * If the entry is created, then it will be in the unregistered state. - * Returns a pointer to the entry on success, else %NULL. + * Descend into a directory, searching for a component. This function forms + * the core of a tree-walking algorithm. The directory will be locked. + * The devfs entry corresponding to the component is returned. If there is + * no matching entry, %NULL is returned. + * An implicit devfs_get() is performed on the returned entry. */ -static struct devfs_entry *search_for_entry (struct devfs_entry *dir, - const char *name, - unsigned int namelen, int mkdir, - int mkfile, int *is_new, - int traverse_symlink) +static struct devfs_entry *_devfs_descend (struct devfs_entry *dir, + const char *name, int namelen, + int *next_pos) { - int len; - const char *subname, *stop, *ptr; + const char *stop, *ptr; struct devfs_entry *entry; - if (is_new) *is_new = FALSE; - if (dir == NULL) dir = get_root_entry (); - if (dir == NULL) return NULL; - /* Extract one filename component */ - subname = name; + if ( (namelen >= 3) && (strncmp (name, "../", 3) == 0) ) + { /* Special-case going to parent directory */ + *next_pos = 3; + return devfs_get (dir->parent); + } stop = name + namelen; - while (subname < stop) - { - /* Search for a possible '/' */ - for (ptr = subname; (ptr < stop) && (*ptr != '/'); ++ptr); - if (ptr >= stop) - { - /* Look for trailing component */ - len = stop - subname; - entry = search_for_entry_in_dir (dir, subname, len, - traverse_symlink); - if (entry != NULL) return entry; - if (!mkfile) return NULL; - entry = create_entry (dir, subname, len); - if (entry && is_new) *is_new = TRUE; - return entry; - } - /* Found '/': search for directory */ - if (strncmp (subname, "../", 3) == 0) - { - /* Going up */ - dir = dir->parent; - if (dir == NULL) return NULL; /* Cannot escape from devfs */ - subname += 3; - continue; + /* Search for a possible '/' */ + for (ptr = name; (ptr < stop) && (*ptr != '/'); ++ptr); + *next_pos = ptr - name; + read_lock (&dir->u.dir.lock); + entry = _devfs_search_dir (dir, name, *next_pos); + read_unlock (&dir->u.dir.lock); + return entry; +} /* End Function _devfs_descend */ + + +static devfs_handle_t _devfs_make_parent_for_leaf (struct devfs_entry *dir, + const char *name, + int namelen, int *leaf_pos) +{ + int next_pos = 0; + + if (dir == NULL) dir = _devfs_get_root_entry (); + if (dir == NULL) return NULL; + devfs_get (dir); + /* Search for possible trailing component and ignore it */ + for (--namelen; (namelen > 0) && (name[namelen] != '/'); --namelen); + *leaf_pos = (name[namelen] == '/') ? (namelen + 1) : 0; + for (; namelen > 0; name += next_pos, namelen -= next_pos) + { + struct devfs_entry *de, *old; + + if ( ( de = _devfs_descend (dir, name, namelen, &next_pos) ) == NULL ) + { + de = _devfs_alloc_entry (name, next_pos, MODE_DIR); + devfs_get (de); + if ( !de || _devfs_append_entry (dir, de, FALSE, &old) ) + { + devfs_put (de); + if ( !old || !S_ISDIR (old->mode) ) + { + devfs_put (old); + devfs_put (dir); + return NULL; + } + de = old; /* Use the existing directory */ + } } - len = ptr - subname; - entry = search_for_entry_in_dir (dir, subname, len, traverse_symlink); - if (!entry && !mkdir) return NULL; - if (entry == NULL) + if (de == dir->parent) { - /* Make it */ - if ( ( entry = create_entry (dir, subname, len) ) == NULL ) - return NULL; - entry->mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; - if (is_new) *is_new = TRUE; + devfs_put (dir); + devfs_put (de); + return NULL; } - if ( !S_ISDIR (entry->mode) ) + devfs_put (dir); + dir = de; + if (name[next_pos] == '/') ++next_pos; + } + return dir; +} /* End Function _devfs_make_parent_for_leaf */ + + +static devfs_handle_t _devfs_prepare_leaf (devfs_handle_t *dir, + const char *name, umode_t mode) +{ + int namelen, leaf_pos; + struct devfs_entry *de; + + namelen = strlen (name); + if ( ( *dir = _devfs_make_parent_for_leaf (*dir, name, namelen, + &leaf_pos) ) == NULL ) + { + printk ("%s: prepare_leaf(%s): could not create parent path\n", + DEVFS_NAME, name); + return NULL; + } + if ( ( de = _devfs_alloc_entry (name + leaf_pos, namelen - leaf_pos,mode) ) + == NULL ) + { + printk ("%s: prepare_leaf(%s): could not allocate entry\n", + DEVFS_NAME, name); + devfs_put (*dir); + return NULL; + } + return de; +} /* End Function _devfs_prepare_leaf */ + + +static devfs_handle_t _devfs_walk_path (struct devfs_entry *dir, + const char *name, int namelen, + int traverse_symlink) +{ + int next_pos = 0; + + if (dir == NULL) dir = _devfs_get_root_entry (); + if (dir == NULL) return NULL; + devfs_get (dir); + for (; namelen > 0; name += next_pos, namelen -= next_pos) + { + struct devfs_entry *de, *link; + + if ( ( de = _devfs_descend (dir, name, namelen, &next_pos) ) == NULL ) { - printk ("%s: existing non-directory entry\n", DEVFS_NAME); + devfs_put (dir); return NULL; } - /* Ensure an unregistered entry is re-registered and visible */ - entry->hide = FALSE; - entry->registered = TRUE; - subname = ptr + 1; - dir = entry; + if (S_ISLNK (de->mode) && traverse_symlink) + { /* Need to follow the link: this is a stack chomper */ + link = _devfs_walk_path (dir, de->u.symlink.linkname, + de->u.symlink.length, TRUE); + devfs_put (de); + if (!link) + { + devfs_put (dir); + return NULL; + } + de = link; + } + devfs_put (dir); + dir = de; + if (name[next_pos] == '/') ++next_pos; } - return NULL; -} /* End Function search_for_entry */ + return dir; +} /* End Function _devfs_walk_path */ /** @@ -1020,20 +1176,29 @@ { struct devfs_entry *entry, *de; + devfs_get (dir); if (dir == NULL) return NULL; if ( !S_ISDIR (dir->mode) ) { printk ("%s: find_by_dev(): not a directory\n", DEVFS_NAME); + devfs_put (dir); return NULL; } /* First search files in this directory */ + read_lock (&dir->u.dir.lock); for (entry = dir->u.dir.first; entry != NULL; entry = entry->next) { if ( !S_ISCHR (entry->mode) && !S_ISBLK (entry->mode) ) continue; if ( S_ISCHR (entry->mode) && (type != DEVFS_SPECIAL_CHR) ) continue; if ( S_ISBLK (entry->mode) && (type != DEVFS_SPECIAL_BLK) ) continue; if ( (entry->u.fcb.u.device.major == major) && - (entry->u.fcb.u.device.minor == minor) ) return entry; + (entry->u.fcb.u.device.minor == minor) ) + { + devfs_get (entry); + read_unlock (&dir->u.dir.lock); + devfs_put (dir); + return entry; + } /* Not found: try the next one */ } /* Now recursively search the subdirectories: this is a stack chomper */ @@ -1041,8 +1206,15 @@ { if ( !S_ISDIR (entry->mode) ) continue; de = find_by_dev (entry, major, minor, type); - if (de) return de; + if (de) + { + read_unlock (&dir->u.dir.lock); + devfs_put (dir); + return de; + } } + read_unlock (&dir->u.dir.lock); + devfs_put (dir); return NULL; } /* End Function find_by_dev */ @@ -1063,7 +1235,6 @@ * %DEVFS_SPECIAL_CHR or %DEVFS_SPECIAL_BLK. * @traverse_symlink: If %TRUE then symbolic links are traversed. * - * FIXME: What the hell is @handle? - ch * Returns the devfs_entry pointer on success, else %NULL. */ @@ -1095,10 +1266,7 @@ ++name; --namelen; } - if (traverse_symlink) down_read (&symlink_rwsem); - entry = search_for_entry (dir, name, namelen, FALSE, FALSE, NULL, - traverse_symlink); - if (traverse_symlink) up_read (&symlink_rwsem); + entry = _devfs_walk_path (dir, name, namelen, traverse_symlink); if (entry != NULL) return entry; } /* Have to search by major and minor: slow */ @@ -1106,42 +1274,34 @@ return find_by_dev (root_entry, major, minor, type); } /* End Function find_entry */ -static struct devfs_entry *get_devfs_entry_from_vfs_inode (struct inode *inode, - int do_check) +static struct devfs_entry *get_devfs_entry_from_vfs_inode (struct inode *inode) { - struct devfs_entry *de; - if (inode == NULL) return NULL; - de = inode->u.generic_ip; - if (!de) printk (__FUNCTION__ "(): NULL de for inode %ld\n", inode->i_ino); - if (do_check && de && !de->registered) de = NULL; - return de; + return inode->u.generic_ip; } /* End Function get_devfs_entry_from_vfs_inode */ /** - * free_dentries - Free the dentries for a device entry and invalidate inodes. + * free_dentry - Free the dentry for a device entry and invalidate inode. * @de: The entry. + * + * This must only be called after the entry has been unhooked from it's + * parent directory. */ -static void free_dentries (struct devfs_entry *de) +static void free_dentry (struct devfs_entry *de) { - struct dentry *dentry; + struct dentry *dentry = de->inode.dentry; + if (!dentry) return; spin_lock (&dcache_lock); - dentry = de->inode.dentry; - if (dentry != NULL) - { - dget_locked (dentry); - de->inode.dentry = NULL; - spin_unlock (&dcache_lock); - /* Forcefully remove the inode */ - if (dentry->d_inode != NULL) dentry->d_inode->i_nlink = 0; - d_drop (dentry); - dput (dentry); - } - else spin_unlock (&dcache_lock); -} /* End Function free_dentries */ + dget_locked (dentry); + spin_unlock (&dcache_lock); + /* Forcefully remove the inode */ + if (dentry->d_inode != NULL) dentry->d_inode->i_nlink = 0; + d_drop (dentry); + dput (dentry); +} /* End Function free_dentry */ /** @@ -1172,7 +1332,7 @@ static inline int devfsd_queue_empty (struct fs_info *fs_info) { - return (fs_info->devfsd_buf_out == fs_info->devfsd_buf_in) ? TRUE : FALSE; + return (fs_info->devfsd_last_event) ? FALSE : TRUE; } /* End Function devfsd_queue_empty */ @@ -1201,8 +1361,9 @@ /** - * devfsd_notify_one - Notify a single devfsd daemon of a change. - * @data: Data to be passed. + * devfsd_notify_de - Notify the devfsd daemon of a change. + * @de: The devfs entry that has changed. This and all parent entries will + * have their reference counts incremented if the event was queued. * @type: The type of change. * @mode: The mode of the entry. * @uid: The user ID. @@ -1212,51 +1373,48 @@ * Returns %TRUE if an event was queued and devfsd woken up, else %FALSE. */ -static int devfsd_notify_one (void *data, unsigned int type, umode_t mode, - uid_t uid, gid_t gid, struct fs_info *fs_info) +static int devfsd_notify_de (struct devfs_entry *de, + unsigned short type, umode_t mode, + uid_t uid, gid_t gid, struct fs_info *fs_info) { - unsigned int next_pos; - unsigned long flags; struct devfsd_buf_entry *entry; + struct devfs_entry *curr; if ( !( fs_info->devfsd_event_mask & (1 << type) ) ) return (FALSE); - next_pos = fs_info->devfsd_buf_in + 1; - if (next_pos >= devfsd_buf_size) next_pos = 0; - if (next_pos == fs_info->devfsd_buf_out) + if ( ( entry = kmem_cache_alloc (devfsd_buf_cache, 0) ) == NULL ) { - /* Running up the arse of the reader: drop it */ atomic_inc (&fs_info->devfsd_overrun_count); return (FALSE); } - spin_lock_irqsave (&fs_info->devfsd_buffer_lock, flags); - next_pos = fs_info->devfsd_buf_in + 1; - if (next_pos >= devfsd_buf_size) next_pos = 0; - entry = (struct devfsd_buf_entry *) fs_info->devfsd_buffer + - fs_info->devfsd_buf_in; - entry->data = data; + for (curr = de; curr != NULL; curr = curr->parent) devfs_get (curr); + entry->de = de; entry->type = type; entry->mode = mode; entry->uid = uid; entry->gid = gid; - fs_info->devfsd_buf_in = next_pos; - spin_unlock_irqrestore (&fs_info->devfsd_buffer_lock, flags); + entry->next = NULL; + spin_lock (&fs_info->devfsd_buffer_lock); + if (!fs_info->devfsd_first_event) fs_info->devfsd_first_event = entry; + if (fs_info->devfsd_last_event) fs_info->devfsd_last_event->next = entry; + fs_info->devfsd_last_event = entry; + spin_unlock (&fs_info->devfsd_buffer_lock); wake_up_interruptible (&fs_info->devfsd_wait_queue); return (TRUE); -} /* End Function devfsd_notify_one */ +} /* End Function devfsd_notify_de */ /** - * devfsd_notify - Notify all devfsd daemons of a change. + * devfsd_notify - Notify the devfsd daemon of a change. * @de: The devfs entry that has changed. * @type: The type of change event. - * @wait: If TRUE, the functions waits for all daemons to finish processing + * @wait: If TRUE, the function waits for the daemon to finish processing * the event. */ -static void devfsd_notify (struct devfs_entry *de, unsigned int type, int wait) +static void devfsd_notify (struct devfs_entry *de,unsigned short type,int wait) { - if (devfsd_notify_one (de, type, de->mode, current->euid, - current->egid, &fs_info) && wait) + if (devfsd_notify_de (de, type, de->mode, current->euid, + current->egid, &fs_info) && wait) wait_for_devfsd_finished (&fs_info); } /* End Function devfsd_notify */ @@ -1287,7 +1445,7 @@ umode_t mode, void *ops, void *info) { char devtype = S_ISCHR (mode) ? DEVFS_SPECIAL_CHR : DEVFS_SPECIAL_BLK; - int is_new; + int err; kdev_t devnum = NODEV; struct devfs_entry *de; @@ -1332,146 +1490,127 @@ major = MAJOR (devnum); minor = MINOR (devnum); } - de = search_for_entry (dir, name, strlen (name), TRUE, TRUE, &is_new, - FALSE); - if (de == NULL) + if ( ( de = _devfs_prepare_leaf (&dir, name, mode) ) == NULL ) { - printk ("%s: devfs_register(): could not create entry: \"%s\"\n", + printk ("%s: devfs_register(%s): could not prepare leaf\n", DEVFS_NAME, name); if (devnum != NODEV) devfs_dealloc_devnum (devtype, devnum); return NULL; } -#ifdef CONFIG_DEVFS_DEBUG - if (devfs_debug & DEBUG_REGISTER) - printk ("%s: devfs_register(%s): de: %p %s\n", - DEVFS_NAME, name, de, is_new ? "new" : "existing"); -#endif - if (!is_new) - { - /* Existing entry */ - if ( !S_ISCHR (de->mode) && !S_ISBLK (de->mode) && - !S_ISREG (de->mode) ) - { - printk ("%s: devfs_register(): existing non-device/file entry: \"%s\"\n", - DEVFS_NAME, name); - if (devnum != NODEV) devfs_dealloc_devnum (devtype, devnum); - return NULL; - } - if (de->registered) - { - printk("%s: devfs_register(): device already registered: \"%s\"\n", - DEVFS_NAME, name); - if (devnum != NODEV) devfs_dealloc_devnum (devtype, devnum); - return NULL; - } - } - de->u.fcb.autogen = FALSE; if ( S_ISCHR (mode) || S_ISBLK (mode) ) { de->u.fcb.u.device.major = major; de->u.fcb.u.device.minor = minor; de->u.fcb.autogen = (devnum == NODEV) ? FALSE : TRUE; } - else if ( S_ISREG (mode) ) de->u.fcb.u.file.size = 0; - else + else if ( !S_ISREG (mode) ) { - printk ("%s: devfs_register(): illegal mode: %x\n", - DEVFS_NAME, mode); + printk ("%s: devfs_register(%s): illegal mode: %x\n", + DEVFS_NAME, name, mode); + devfs_put (de); + devfs_put (dir); return (NULL); } de->info = info; - de->mode = mode; if (flags & DEVFS_FL_CURRENT_OWNER) { - de->u.fcb.default_uid = current->uid; - de->u.fcb.default_gid = current->gid; + de->inode.uid = current->uid; + de->inode.gid = current->gid; } else { - de->u.fcb.default_uid = 0; - de->u.fcb.default_gid = 0; + de->inode.uid = 0; + de->inode.gid = 0; } de->u.fcb.ops = ops; de->u.fcb.auto_owner = (flags & DEVFS_FL_AUTO_OWNER) ? TRUE : FALSE; de->u.fcb.aopen_notify = (flags & DEVFS_FL_AOPEN_NOTIFY) ? TRUE : FALSE; - if (flags & DEVFS_FL_REMOVABLE) - { - de->u.fcb.removable = TRUE; - ++de->parent->u.dir.num_removable; - } - de->u.fcb.open = FALSE; de->hide = (flags & DEVFS_FL_HIDE) ? TRUE : FALSE; - de->no_persistence = (flags & DEVFS_FL_NO_PERSISTENCE) ? TRUE : FALSE; - de->registered = TRUE; + if (flags & DEVFS_FL_REMOVABLE) de->u.fcb.removable = TRUE; + if ( ( err = _devfs_append_entry (dir, de, de->u.fcb.removable, NULL) ) + != 0 ) + { + printk("%s: devfs_register(%s): could not append to parent, err: %d\n", + DEVFS_NAME, name, err); + devfs_put (dir); + if (devnum != NODEV) devfs_dealloc_devnum (devtype, devnum); + return NULL; + } +#ifdef CONFIG_DEVFS_DEBUG + if (devfs_debug & DEBUG_REGISTER) + printk ("%s: devfs_register(%s): de: %p dir: %p \"%s\" pp: %p\n", + DEVFS_NAME, name, de, dir, dir->name, dir->parent); +#endif devfsd_notify (de, DEVFSD_NOTIFY_REGISTERED, flags & DEVFS_FL_WAIT); + devfs_put (dir); return de; } /* End Function devfs_register */ /** - * unregister - Unregister a device entry. + * _devfs_unhook - Unhook a device entry from its parents list + * @de: The entry to unhook. + * + * Returns %TRUE if the entry was unhooked, else %FALSE if it was + * previously unhooked. + * The caller must have a write lock on the parent directory. + */ + +static int _devfs_unhook (struct devfs_entry *de) +{ + struct devfs_entry *parent; + + if ( !de || (de->prev == de) ) return FALSE; + parent = de->parent; + if (de->prev == NULL) parent->u.dir.first = de->next; + else de->prev->next = de->next; + if (de->next == NULL) parent->u.dir.last = de->prev; + else de->next->prev = de->prev; + de->prev = de; /* Indicate we're unhooked */ + de->next = NULL; /* Force early termination for */ + if ( ( S_ISREG (de->mode) || S_ISCHR (de->mode) || S_ISBLK (de->mode) ) && + de->u.fcb.removable ) + --parent->u.dir.num_removable; + return TRUE; +} /* End Function _devfs_unhook */ + + +/** + * unregister - Unregister a device entry from it's parent. + * @dir: The parent directory. * @de: The entry to unregister. + * + * The caller must have a write lock on the parent directory, which is + * unlocked by this function. */ -static void unregister (struct devfs_entry *de) +static void unregister (struct devfs_entry *dir, struct devfs_entry *de) { - struct devfs_entry *child; + int unhooked = _devfs_unhook (de); - if ( (child = de->slave) != NULL ) - { - de->slave = NULL; /* Unhook first in case slave is parent directory */ - unregister (child); - } - if (de->registered) - { - devfsd_notify (de, DEVFSD_NOTIFY_UNREGISTERED, 0); - free_dentries (de); - } - de->info = NULL; - if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) || S_ISREG (de->mode) ) - { - de->registered = FALSE; - de->u.fcb.ops = NULL; - if (!S_ISREG (de->mode) && de->u.fcb.autogen) - { - devfs_dealloc_devnum ( S_ISCHR (de->mode) ? DEVFS_SPECIAL_CHR : - DEVFS_SPECIAL_BLK, - MKDEV (de->u.fcb.u.device.major, - de->u.fcb.u.device.minor) ); - } - de->u.fcb.autogen = FALSE; - return; - } - if (S_ISLNK (de->mode) && de->registered) - { - de->registered = FALSE; - down_write (&symlink_rwsem); - if (de->u.symlink.linkname) kfree (de->u.symlink.linkname); - de->u.symlink.linkname = NULL; - up_write (&symlink_rwsem); - return; - } - if ( S_ISFIFO (de->mode) ) - { - de->registered = FALSE; - return; - } - if (!de->registered) return; - if ( !S_ISDIR (de->mode) ) - { - printk ("%s: unregister(): unsupported type\n", DEVFS_NAME); - return; - } - de->registered = FALSE; - /* Now recursively search the subdirectories: this is a stack chomper */ - for (child = de->u.dir.first; child != NULL; child = child->next) - { + write_unlock (&dir->u.dir.lock); + if (!unhooked) return; + devfs_get (dir); + devfs_unregister (de->slave); /* Let it handle the locking */ + devfsd_notify (de, DEVFSD_NOTIFY_UNREGISTERED, 0); + free_dentry (de); + devfs_put (dir); + if ( !S_ISDIR (de->mode) ) return; + while (TRUE) /* Recursively unregister: this is a stack chomper */ + { + struct devfs_entry *child; + + write_lock (&de->u.dir.lock); + de->u.dir.no_more_additions = TRUE; + child = de->u.dir.first; + unregister (de, child); + if (!child) break; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_UNREGISTER) printk ("%s: unregister(): child->name: \"%s\" child: %p\n", DEVFS_NAME, child->name, child); #endif - unregister (child); + devfs_put (child); } } /* End Function unregister */ @@ -1484,20 +1623,22 @@ void devfs_unregister (devfs_handle_t de) { - if (de == NULL) return; + if ( (de == NULL) || (de->parent == NULL) ) return; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_UNREGISTER) printk ("%s: devfs_unregister(): de->name: \"%s\" de: %p\n", DEVFS_NAME, de->name, de); #endif - unregister (de); + write_lock (&de->parent->u.dir.lock); + unregister (de->parent, de); + devfs_put (de); } /* End Function devfs_unregister */ static int devfs_do_symlink (devfs_handle_t dir, const char *name, unsigned int flags, const char *link, devfs_handle_t *handle, void *info) { - int is_new; + int err; unsigned int linklength; char *newlink; struct devfs_entry *de; @@ -1522,28 +1663,31 @@ return -ENOMEM; memcpy (newlink, link, linklength); newlink[linklength] = '\0'; - if ( ( de = search_for_entry (dir, name, strlen (name), TRUE, TRUE, - &is_new, FALSE) ) == NULL ) - { - kfree (newlink); - return -ENOMEM; - } - down_write (&symlink_rwsem); - if (de->registered) + if ( ( de = _devfs_prepare_leaf (&dir, name, S_IFLNK | S_IRUGO | S_IXUGO) ) + == NULL ) { - up_write (&symlink_rwsem); - kfree (newlink); - printk ("%s: devfs_do_symlink(%s): entry already exists\n", + printk ("%s: devfs_do_symlink(%s): could not prepare leaf\n", DEVFS_NAME, name); - return -EEXIST; + kfree (newlink); + return -ENOTDIR; } - de->mode = S_IFLNK | S_IRUGO | S_IXUGO; de->info = info; de->hide = (flags & DEVFS_FL_HIDE) ? TRUE : FALSE; de->u.symlink.linkname = newlink; de->u.symlink.length = linklength; - de->registered = TRUE; - up_write (&symlink_rwsem); + if ( ( err = _devfs_append_entry (dir, de, FALSE, NULL) ) != 0 ) + { + printk ("%s: devfs_do_symlink(%s): could not append to parent, err: %d\n", + DEVFS_NAME, name, err); + devfs_put (dir); + return err; + } + devfs_put (dir); +#ifdef CONFIG_DEVFS_DEBUG + spin_lock (&stat_lock); + stat_num_bytes += linklength + 1; + spin_unlock (&stat_lock); +#endif if (handle != NULL) *handle = de; return 0; } /* End Function devfs_do_symlink */ @@ -1593,7 +1737,7 @@ devfs_handle_t devfs_mk_dir (devfs_handle_t dir, const char *name, void *info) { - int is_new; + int err; struct devfs_entry *de; if (name == NULL) @@ -1601,36 +1745,26 @@ printk ("%s: devfs_mk_dir(): NULL name pointer\n", DEVFS_NAME); return NULL; } - de = search_for_entry (dir, name, strlen (name), TRUE, TRUE, &is_new, - FALSE); - if (de == NULL) + if ( ( de = _devfs_prepare_leaf (&dir, name, MODE_DIR) ) == NULL ) { - printk ("%s: devfs_mk_dir(): could not create entry: \"%s\"\n", + printk ("%s: devfs_mk_dir(%s): could not prepare leaf\n", DEVFS_NAME, name); return NULL; } - if (!S_ISDIR (de->mode) && de->registered) + de->info = info; + if ( ( err = _devfs_append_entry (dir, de, FALSE, NULL) ) != 0 ) { - printk ("%s: devfs_mk_dir(): existing non-directory entry: \"%s\"\n", - DEVFS_NAME, name); + printk ("%s: devfs_mk_dir(%s): could not append to dir: %p \"%s\", err: %d\n", + DEVFS_NAME, name, dir, dir->name, err); + devfs_put (dir); return NULL; } #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_REGISTER) - printk ("%s: devfs_mk_dir(%s): de: %p %s\n", - DEVFS_NAME, name, de, is_new ? "new" : "existing"); + printk ("%s: devfs_mk_dir(%s): de: %p dir: %p \"%s\"\n", + DEVFS_NAME, name, de, dir, dir->name); #endif - if (!S_ISDIR (de->mode) && !is_new) - { - /* Transmogrifying an old entry */ - de->u.dir.first = NULL; - de->u.dir.last = NULL; - } - de->mode = S_IFDIR | S_IRUGO | S_IXUGO; - de->info = info; - if (!de->registered) de->u.dir.num_removable = 0; - de->hide = FALSE; - de->registered = TRUE; + devfs_put (dir); return de; } /* End Function devfs_mk_dir */ @@ -1660,8 +1794,8 @@ if ( (name != NULL) && (name[0] == '\0') ) name = NULL; de = find_entry (dir, name, 0, major, minor, type, traverse_symlinks); - if (de == NULL) return NULL; - if (!de->registered) return NULL; + devfs_put (de); /* FIXME: in 2.5 consider dropping this and require a + call to devfs_put() */ return de; } /* End Function devfs_find_handle */ @@ -1679,7 +1813,6 @@ unsigned int fl = 0; if (de == NULL) return -EINVAL; - if (!de->registered) return -ENODEV; if (de->hide) fl |= DEVFS_FL_HIDE; if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) || S_ISREG (de->mode) ) { @@ -1703,7 +1836,6 @@ int devfs_set_flags (devfs_handle_t de, unsigned int flags) { if (de == NULL) return -EINVAL; - if (!de->registered) return -ENODEV; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_SET_FLAGS) printk ("%s: devfs_set_flags(): de->name: \"%s\"\n", @@ -1714,16 +1846,6 @@ { de->u.fcb.auto_owner = (flags & DEVFS_FL_AUTO_OWNER) ? TRUE : FALSE; de->u.fcb.aopen_notify = (flags & DEVFS_FL_AOPEN_NOTIFY) ? TRUE:FALSE; - if ( de->u.fcb.removable && !(flags & DEVFS_FL_REMOVABLE) ) - { - de->u.fcb.removable = FALSE; - --de->parent->u.dir.num_removable; - } - else if ( !de->u.fcb.removable && (flags & DEVFS_FL_REMOVABLE) ) - { - de->u.fcb.removable = TRUE; - ++de->parent->u.dir.num_removable; - } } return 0; } /* End Function devfs_set_flags */ @@ -1742,7 +1864,6 @@ unsigned int *minor) { if (de == NULL) return -EINVAL; - if (!de->registered) return -ENODEV; if ( S_ISDIR (de->mode) ) return -EISDIR; if ( !S_ISCHR (de->mode) && !S_ISBLK (de->mode) ) return -EINVAL; if (major != NULL) *major = de->u.fcb.u.device.major; @@ -1762,7 +1883,7 @@ { if (!inode || !inode->i_sb) return NULL; if (inode->i_sb->s_magic != DEVFS_SUPER_MAGIC) return NULL; - return get_devfs_entry_from_vfs_inode (inode, TRUE); + return get_devfs_entry_from_vfs_inode (inode); } /* End Function devfs_get_handle_from_inode */ @@ -1780,19 +1901,20 @@ int devfs_generate_path (devfs_handle_t de, char *path, int buflen) { int pos; +#define NAMEOF(de) ( (de)->mode ? (de)->name : (de)->u.name ) if (de == NULL) return -EINVAL; if (de->namelen >= buflen) return -ENAMETOOLONG; /* Must be first */ path[buflen - 1] = '\0'; if (de->parent == NULL) return buflen - 1; /* Don't prepend root */ pos = buflen - de->namelen - 1; - memcpy (path + pos, de->name, de->namelen); + memcpy (path + pos, NAMEOF (de), de->namelen); for (de = de->parent; de->parent != NULL; de = de->parent) { if (pos - de->namelen - 1 < 0) return -ENAMETOOLONG; path[--pos] = '/'; pos -= de->namelen; - memcpy (path + pos, de->name, de->namelen); + memcpy (path + pos, NAMEOF (de), de->namelen); } return pos; } /* End Function devfs_generate_path */ @@ -1808,7 +1930,6 @@ void *devfs_get_ops (devfs_handle_t de) { if (de == NULL) return NULL; - if (!de->registered) return NULL; if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) || S_ISREG (de->mode) ) return de->u.fcb.ops; return NULL; @@ -1826,7 +1947,6 @@ int devfs_set_file_size (devfs_handle_t de, unsigned long size) { if (de == NULL) return -EINVAL; - if (!de->registered) return -EINVAL; if ( !S_ISREG (de->mode) ) return -EINVAL; if (de->u.fcb.u.file.size == size) return 0; de->u.fcb.u.file.size = size; @@ -1846,7 +1966,6 @@ void *devfs_get_info (devfs_handle_t de) { if (de == NULL) return NULL; - if (!de->registered) return NULL; return de->info; } /* End Function devfs_get_info */ @@ -1861,7 +1980,6 @@ int devfs_set_info (devfs_handle_t de, void *info) { if (de == NULL) return -EINVAL; - if (!de->registered) return -EINVAL; de->info = info; return 0; } /* End Function devfs_set_info */ @@ -1876,7 +1994,6 @@ devfs_handle_t devfs_get_parent (devfs_handle_t de) { if (de == NULL) return NULL; - if (!de->registered) return NULL; return de->parent; } /* End Function devfs_get_parent */ @@ -1891,7 +2008,6 @@ devfs_handle_t devfs_get_first_child (devfs_handle_t de) { if (de == NULL) return NULL; - if (!de->registered) return NULL; if ( !S_ISDIR (de->mode) ) return NULL; return de->u.dir.first; } /* End Function devfs_get_first_child */ @@ -1907,7 +2023,6 @@ devfs_handle_t devfs_get_next_sibling (devfs_handle_t de) { if (de == NULL) return NULL; - if (!de->registered) return NULL; return de->next; } /* End Function devfs_get_next_sibling */ @@ -1961,7 +2076,6 @@ const char *devfs_get_name (devfs_handle_t de, unsigned int *namelen) { if (de == NULL) return NULL; - if (!de->registered) return NULL; if (namelen != NULL) *namelen = de->namelen; return de->name; } /* End Function devfs_get_name */ @@ -2057,8 +2171,10 @@ {"dmod", DEBUG_MODULE_LOAD, &devfs_debug_init}, {"dreg", DEBUG_REGISTER, &devfs_debug_init}, {"dunreg", DEBUG_UNREGISTER, &devfs_debug_init}, + {"dfree", DEBUG_FREE, &devfs_debug_init}, {"diget", DEBUG_I_GET, &devfs_debug_init}, {"dchange", DEBUG_SET_FLAGS, &devfs_debug_init}, + {"dsread", DEBUG_S_READ, &devfs_debug_init}, {"dichange", DEBUG_I_CHANGE, &devfs_debug_init}, {"dimknod", DEBUG_I_MKNOD, &devfs_debug_init}, {"dilookup", DEBUG_I_LOOKUP, &devfs_debug_init}, @@ -2129,34 +2245,31 @@ /** - * try_modload - Notify devfsd of an inode lookup. + * try_modload - Notify devfsd of an inode lookup by a non-devfsd process. * @parent: The parent devfs entry. * @fs_info: The filesystem info. * @name: The device name. * @namelen: The number of characters in @name. - * @buf: A working area that will be used. This must not go out of scope until - * devfsd is idle again. + * @buf: A working area that will be used. This must not go out of scope + * until devfsd is idle again. * * Returns 0 on success, else a negative error code. */ static int try_modload (struct devfs_entry *parent, struct fs_info *fs_info, const char *name, unsigned namelen, - char buf[STRING_LENGTH]) + struct devfs_entry *buf) { - int pos = STRING_LENGTH - namelen - 1; - if ( !( fs_info->devfsd_event_mask & (1 << DEVFSD_NOTIFY_LOOKUP) ) ) return -ENOENT; if ( is_devfsd_or_child (fs_info) ) return -ENOENT; - if (namelen >= STRING_LENGTH - 1) return -ENAMETOOLONG; - memcpy (buf + pos, name, namelen); - buf[STRING_LENGTH - 1] = '\0'; - if (parent->parent != NULL) pos = devfs_generate_path (parent, buf, pos); - if (pos < 0) return pos; - buf[STRING_LENGTH - namelen - 2] = '/'; - if ( !devfsd_notify_one (buf + pos, DEVFSD_NOTIFY_LOOKUP, 0, - current->euid, current->egid, fs_info) ) + memset (buf, 0, sizeof *buf); + atomic_set (&buf->refcount, 1); + buf->parent = parent; + buf->namelen = namelen; + buf->u.name = name; + if ( !devfsd_notify_de (buf, DEVFSD_NOTIFY_LOOKUP, 0, + current->euid, current->egid, fs_info) ) return -ENOENT; /* Possible success */ return 0; @@ -2206,7 +2319,6 @@ if (dir->u.dir.num_removable < 1) return; for (de = dir->u.dir.first; de != NULL; de = de->next) { - if (!de->registered) continue; if ( !S_ISBLK (de->mode) ) continue; if (!de->u.fcb.removable) continue; check_disc_changed (de); @@ -2229,7 +2341,6 @@ for (de = dir->u.dir.first; de != NULL; de = de->next) { - if (!de->registered) continue; if ( !S_ISBLK (de->mode) ) continue; if (!de->u.fcb.removable) continue; if (strcmp (de->name, "disc") == 0) return check_disc_changed (de); @@ -2258,7 +2369,7 @@ struct inode *inode = dentry->d_inode; struct fs_info *fs_info = inode->i_sb->u.generic_sbp; - de = get_devfs_entry_from_vfs_inode (inode, TRUE); + de = get_devfs_entry_from_vfs_inode (inode); if (de == NULL) return -ENODEV; retval = inode_change_ok (inode, iattr); if (retval != 0) return retval; @@ -2276,15 +2387,19 @@ #endif /* Inode is not on hash chains, thus must save permissions here rather than in a write_inode() method */ - de->inode.mode = inode->i_mode; - de->inode.uid = inode->i_uid; - de->inode.gid = inode->i_gid; + if ( ( !S_ISREG (inode->i_mode) && !S_ISCHR (inode->i_mode) && + !S_ISBLK (inode->i_mode) ) || !de->u.fcb.auto_owner ) + { + de->mode = inode->i_mode; + de->inode.uid = inode->i_uid; + de->inode.gid = inode->i_gid; + } de->inode.atime = inode->i_atime; de->inode.mtime = inode->i_mtime; de->inode.ctime = inode->i_ctime; if ( iattr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID) ) - devfsd_notify_one (de, DEVFSD_NOTIFY_CHANGE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); + devfsd_notify_de (de, DEVFSD_NOTIFY_CHANGE, inode->i_mode, + inode->i_uid, inode->i_gid, fs_info); return 0; } /* End Function devfs_notify_change */ @@ -2299,11 +2414,10 @@ return 0; } /* End Function devfs_statfs */ -static void devfs_clear_inode(struct inode *inode) +static void devfs_clear_inode (struct inode *inode) { - if (S_ISBLK(inode->i_mode)) - bdput(inode->i_bdev); -} + if ( S_ISBLK (inode->i_mode) ) bdput (inode->i_bdev); +} /* End Function devfs_clear_inode */ static struct super_operations devfs_sops = { @@ -2319,32 +2433,37 @@ * @de: The devfs inode. * @dentry: The dentry to register with the devfs inode. * - * Returns the inode on success, else %NULL. + * Returns the inode on success, else %NULL. An implicit devfs_get() is + * performed if the inode is created. */ static struct inode *get_vfs_inode (struct super_block *sb, struct devfs_entry *de, struct dentry *dentry) { + int is_fcb = FALSE; struct inode *inode; - if (de->inode.dentry != NULL) - { - printk ("%s: get_vfs_inode(%u): old de->inode.dentry: %p \"%s\" new dentry: %p \"%s\"\n", - DEVFS_NAME, de->inode.ino, - de->inode.dentry, de->inode.dentry->d_name.name, - dentry, dentry->d_name.name); - printk (" old inode: %p\n", de->inode.dentry->d_inode); - return NULL; - } + if (de->prev == de) return NULL; /* Quick check to see if unhooked */ if ( ( inode = new_inode (sb) ) == NULL ) { printk ("%s: get_vfs_inode(%s): new_inode() failed, de: %p\n", DEVFS_NAME, de->name, de); return NULL; } - de->inode.dentry = dentry; - inode->u.generic_ip = de; + if (de->parent) + { + read_lock (&de->parent->u.dir.lock); + if (de->prev != de) de->inode.dentry = dentry; /* Not unhooked */ + read_unlock (&de->parent->u.dir.lock); + } + else de->inode.dentry = dentry; /* Root: no locking needed */ + if (de->inode.dentry != dentry) + { /* Must have been unhooked */ + iput (inode); + return NULL; + } + inode->u.generic_ip = devfs_get (de); inode->i_ino = de->inode.ino; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_I_GET) @@ -2356,37 +2475,45 @@ inode->i_op = &devfs_iops; inode->i_fop = &devfs_fops; inode->i_rdev = NODEV; - if ( S_ISCHR (de->inode.mode) ) + if ( S_ISCHR (de->mode) ) { inode->i_rdev = MKDEV (de->u.fcb.u.device.major, de->u.fcb.u.device.minor); - inode->i_cdev = cdget (kdev_t_to_nr(inode->i_rdev)); + inode->i_cdev = cdget ( kdev_t_to_nr (inode->i_rdev) ); + is_fcb = TRUE; } - else if ( S_ISBLK (de->inode.mode) ) + else if ( S_ISBLK (de->mode) ) { inode->i_rdev = MKDEV (de->u.fcb.u.device.major, de->u.fcb.u.device.minor); - if (bd_acquire(inode) == 0) + if (bd_acquire (inode) == 0) { if (!inode->i_bdev->bd_op && de->u.fcb.ops) inode->i_bdev->bd_op = de->u.fcb.ops; } else printk ("%s: get_vfs_inode(%d): no block device from bdget()\n", DEVFS_NAME, (int) inode->i_ino); + is_fcb = TRUE; } - else if ( S_ISFIFO (de->inode.mode) ) inode->i_fop = &def_fifo_fops; - else if ( S_ISREG (de->inode.mode) ) inode->i_size = de->u.fcb.u.file.size; - else if ( S_ISDIR (de->inode.mode) ) + else if ( S_ISFIFO (de->mode) ) inode->i_fop = &def_fifo_fops; + else if ( S_ISREG (de->mode) ) + { + inode->i_size = de->u.fcb.u.file.size; + is_fcb = TRUE; + } + else if ( S_ISDIR (de->mode) ) { inode->i_op = &devfs_dir_iops; inode->i_fop = &devfs_dir_fops; } - else if ( S_ISLNK (de->inode.mode) ) + else if ( S_ISLNK (de->mode) ) { inode->i_op = &devfs_symlink_iops; inode->i_size = de->u.symlink.length; } - inode->i_mode = de->inode.mode; + if (is_fcb && de->u.fcb.auto_owner) + inode->i_mode = (de->mode & S_IFMT) | S_IRUGO | S_IWUGO; + else inode->i_mode = de->mode; inode->i_uid = de->inode.uid; inode->i_gid = de->inode.gid; inode->i_atime = de->inode.atime; @@ -2409,11 +2536,11 @@ int err, count; int stored = 0; struct fs_info *fs_info; - struct devfs_entry *parent, *de; + struct devfs_entry *parent, *de, *next = NULL; struct inode *inode = file->f_dentry->d_inode; fs_info = inode->i_sb->u.generic_sbp; - parent = get_devfs_entry_from_vfs_inode (file->f_dentry->d_inode, TRUE); + parent = get_devfs_entry_from_vfs_inode (file->f_dentry->d_inode); if ( (long) file->f_pos < 0 ) return -EINVAL; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_F_READDIR) @@ -2441,19 +2568,32 @@ default: /* Skip entries */ count = file->f_pos - 2; - for (de = parent->u.dir.first; (de != NULL) && (count > 0); - de = de->next) + read_lock (&parent->u.dir.lock); + for (de = parent->u.dir.first; de && (count > 0); de = de->next) if ( !IS_HIDDEN (de) ) --count; + devfs_get (de); + read_unlock (&parent->u.dir.lock); /* Now add all remaining entries */ - for (; de != NULL; de = de->next) + while (de) { - if ( IS_HIDDEN (de) ) continue; - err = (*filldir) (dirent, de->name, de->namelen, - file->f_pos, de->inode.ino, de->mode >> 12); + if ( IS_HIDDEN (de) ) err = 0; + else + { + err = (*filldir) (dirent, de->name, de->namelen, + file->f_pos, de->inode.ino, de->mode >> 12); + if (err >= 0) + { + file->f_pos++; + ++stored; + } + } + read_lock (&parent->u.dir.lock); + next = devfs_get (de->next); + read_unlock (&parent->u.dir.lock); + devfs_put (de); + de = next; if (err == -EINVAL) break; if (err < 0) return err; - file->f_pos++; - ++stored; } break; } @@ -2467,14 +2607,9 @@ struct devfs_entry *de; struct fs_info *fs_info = inode->i_sb->u.generic_sbp; - lock_kernel (); - de = get_devfs_entry_from_vfs_inode (inode, TRUE); - err = -ENODEV; - if (de == NULL) - goto out; - err = 0; - if ( S_ISDIR (de->mode) ) - goto out; + de = get_devfs_entry_from_vfs_inode (inode); + if (de == NULL) return -ENODEV; + if ( S_ISDIR (de->mode) ) return 0; df = &de->u.fcb; file->private_data = de->info; if ( S_ISBLK (inode->i_mode) ) @@ -2482,7 +2617,7 @@ file->f_op = &def_blk_fops; if (df->ops) inode->i_bdev->bd_op = df->ops; } - else file->f_op = fops_get ( (struct file_operations*) df->ops ); + else file->f_op = fops_get ( (struct file_operations *) df->ops ); if (file->f_op) err = file->f_op->open ? (*file->f_op->open) (inode, file) : 0; else @@ -2491,39 +2626,33 @@ if ( S_ISCHR (inode->i_mode) ) err = chrdev_open (inode, file); else err = -ENODEV; } - if (err < 0) goto out; + if (err < 0) return err; /* Open was successful */ - err = 0; - if (df->open) goto out; + if (df->open) return 0; df->open = TRUE; /* This is the first open */ if (df->auto_owner) { - /* Change the ownership/protection */ - de->inode.mode = (de->inode.mode & ~S_IALLUGO) |(de->mode & S_IRWXUGO); - de->inode.uid = current->euid; - de->inode.gid = current->egid; - inode->i_mode = de->inode.mode; - inode->i_uid = de->inode.uid; - inode->i_gid = de->inode.gid; + /* Change the ownership/protection to what driver specified */ + inode->i_mode = de->mode; + inode->i_uid = current->euid; + inode->i_gid = current->egid; } if (df->aopen_notify) - devfsd_notify_one (de, DEVFSD_NOTIFY_ASYNC_OPEN, inode->i_mode, - current->euid, current->egid, fs_info); -out: - unlock_kernel (); - return err; + devfsd_notify_de (de, DEVFSD_NOTIFY_ASYNC_OPEN, inode->i_mode, + current->euid, current->egid, fs_info); + return 0; } /* End Function devfs_open */ static struct file_operations devfs_fops = { - open: devfs_open, + open: devfs_open, }; static struct file_operations devfs_dir_fops = { - read: generic_read_dir, + read: generic_read_dir, readdir: devfs_readdir, - open: devfs_open, + open: devfs_open, }; @@ -2556,16 +2685,18 @@ { struct devfs_entry *de; - lock_kernel (); - de = get_devfs_entry_from_vfs_inode (inode, FALSE); + de = get_devfs_entry_from_vfs_inode (inode); #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_D_IPUT) printk ("%s: d_iput(): dentry: %p inode: %p de: %p de->dentry: %p\n", DEVFS_NAME, dentry, inode, de, de->inode.dentry); #endif - if (de->inode.dentry == dentry) de->inode.dentry = NULL; - unlock_kernel (); + if ( de->inode.dentry && (de->inode.dentry != dentry) ) + OOPS ("%s: d_iput(%s): de: %p dentry: %p de->dentry: %p\n", + DEVFS_NAME, de->name, de, dentry, de->inode.dentry); + de->inode.dentry = NULL; iput (inode); + devfs_put (de); } /* End Function devfs_d_iput */ static int devfs_d_delete (struct dentry *dentry); @@ -2610,7 +2741,7 @@ return 1; } fs_info = inode->i_sb->u.generic_sbp; - de = get_devfs_entry_from_vfs_inode (inode, TRUE); + de = get_devfs_entry_from_vfs_inode (inode); #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_D_DELETE) printk ("%s: d_delete(): dentry: %p inode: %p devfs_entry: %p\n", @@ -2622,14 +2753,11 @@ if (!de->u.fcb.open) return 0; de->u.fcb.open = FALSE; if (de->u.fcb.aopen_notify) - devfsd_notify_one (de, DEVFSD_NOTIFY_CLOSE, inode->i_mode, - current->euid, current->egid, fs_info); + devfsd_notify_de (de, DEVFSD_NOTIFY_CLOSE, inode->i_mode, + current->euid, current->egid, fs_info); if (!de->u.fcb.auto_owner) return 0; /* Change the ownership/protection back */ - de->inode.mode = (de->inode.mode & ~S_IALLUGO) | S_IRUGO | S_IWUGO; - de->inode.uid = de->u.fcb.default_uid; - de->inode.gid = de->u.fcb.default_gid; - inode->i_mode = de->inode.mode; + inode->i_mode = (de->mode & S_IFMT) | S_IRUGO | S_IWUGO; inode->i_uid = de->inode.uid; inode->i_gid = de->inode.gid; return 0; @@ -2637,59 +2765,38 @@ static int devfs_d_revalidate_wait (struct dentry *dentry, int flags) { - devfs_handle_t de = dentry->d_fsdata; - struct inode *dir; - struct fs_info *fs_info; + struct inode *dir = dentry->d_parent->d_inode; + struct fs_info *fs_info = dir->i_sb->u.generic_sbp; - lock_kernel (); - dir = dentry->d_parent->d_inode; - fs_info = dir->i_sb->u.generic_sbp; - if (!de || de->registered) + if ( !dentry->d_inode && is_devfsd_or_child (fs_info) ) { - if ( !dentry->d_inode && is_devfsd_or_child (fs_info) ) - { - struct inode *inode; - -#ifdef CONFIG_DEVFS_DEBUG - char txt[STRING_LENGTH]; - - memset (txt, 0, STRING_LENGTH); - memcpy (txt, dentry->d_name.name, - (dentry->d_name.len >= STRING_LENGTH) ? - (STRING_LENGTH - 1) : dentry->d_name.len); - if (devfs_debug & DEBUG_I_LOOKUP) - printk ("%s: d_revalidate(): dentry: %p name: \"%s\" by: \"%s\"\n", - DEVFS_NAME, dentry, txt, current->comm); + devfs_handle_t de; + devfs_handle_t parent = get_devfs_entry_from_vfs_inode (dir); + struct inode *inode; + +#ifdef CONFIG_DEVFS_DEBUG + if (devfs_debug & DEBUG_I_LOOKUP) + printk ("%s: d_revalidate(%s): dentry: %p by: \"%s\"\n", + DEVFS_NAME, dentry->d_name.name, dentry, current->comm); +#endif + read_lock (&parent->u.dir.lock); + de = _devfs_search_dir (parent, dentry->d_name.name, + dentry->d_name.len); + read_lock (&parent->u.dir.lock); + if (de == NULL) return 1; + /* Create an inode, now that the driver information is available */ + inode = get_vfs_inode (dir->i_sb, de, dentry); + devfs_put (de); + if (!inode) return 1; +#ifdef CONFIG_DEVFS_DEBUG + if (devfs_debug & DEBUG_I_LOOKUP) + printk ("%s: d_revalidate(): new VFS inode(%u): %p devfs_entry: %p\n", + DEVFS_NAME, de->inode.ino, inode, de); #endif - if (de == NULL) - { - devfs_handle_t parent; - - parent = get_devfs_entry_from_vfs_inode (dir, TRUE); - de = search_for_entry_in_dir (parent, dentry->d_name.name, - dentry->d_name.len, FALSE); - } - if (de == NULL) goto out; - /* Create an inode, now that the driver information is available - */ - if (de->no_persistence) update_devfs_inode_from_entry (de); - else if (de->inode.ctime == 0) update_devfs_inode_from_entry (de); - else de->inode.mode = - (de->mode & ~S_IALLUGO) | (de->inode.mode & S_IALLUGO); - if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) - goto out; -#ifdef CONFIG_DEVFS_DEBUG - if (devfs_debug & DEBUG_I_LOOKUP) - printk ("%s: d_revalidate(): new VFS inode(%u): %p devfs_entry: %p\n", - DEVFS_NAME, de->inode.ino, inode, de); -#endif - d_instantiate (dentry, inode); - goto out; - } + d_instantiate (dentry, inode); + return 1; } if ( wait_for_devfsd_finished (fs_info) ) dentry->d_op = &devfs_dops; -out: - unlock_kernel (); return 1; } /* End Function devfs_d_revalidate_wait */ @@ -2701,67 +2808,61 @@ struct fs_info *fs_info; struct devfs_entry *parent, *de; struct inode *inode; - char txt[STRING_LENGTH]; /* Set up the dentry operations before anything else, to ensure cleaning up on any error */ dentry->d_op = &devfs_dops; - memset (txt, 0, STRING_LENGTH); - memcpy (txt, dentry->d_name.name, - (dentry->d_name.len >= STRING_LENGTH) ? - (STRING_LENGTH - 1) : dentry->d_name.len); fs_info = dir->i_sb->u.generic_sbp; /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode (dir, TRUE); + parent = get_devfs_entry_from_vfs_inode (dir); #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_I_LOOKUP) printk ("%s: lookup(%s): dentry: %p parent: %p by: \"%s\"\n", - DEVFS_NAME, txt, dentry, parent, current->comm); + DEVFS_NAME, dentry->d_name.name, dentry, parent,current->comm); #endif if (parent == NULL) return ERR_PTR (-ENOENT); - /* Try to reclaim an existing devfs entry */ - de = search_for_entry_in_dir (parent, - dentry->d_name.name, dentry->d_name.len, - FALSE); - if ( ( (de == NULL) || !de->registered ) && - (parent->u.dir.num_removable > 0) && + read_lock (&parent->u.dir.lock); + de = _devfs_search_dir (parent, dentry->d_name.name, dentry->d_name.len); + read_unlock (&parent->u.dir.lock); + if ( (de == NULL) && (parent->u.dir.num_removable > 0) && get_removable_partition (parent, dentry->d_name.name, dentry->d_name.len) ) { - if (de == NULL) - de = search_for_entry_in_dir (parent, dentry->d_name.name, - dentry->d_name.len, FALSE); + read_lock (&parent->u.dir.lock); + de = _devfs_search_dir (parent, dentry->d_name.name, + dentry->d_name.len); + read_unlock (&parent->u.dir.lock); } - if ( (de == NULL) || !de->registered ) - { - /* Try with devfsd. For any kind of failure, leave a negative dentry + if (de == NULL) + { /* Try with devfsd. For any kind of failure, leave a negative dentry so someone else can deal with it (in the case where the sysadmin does a mknod()). It's important to do this before hashing the dentry, so that the devfsd queue is filled before revalidates can start */ + struct devfs_entry tmp; + if (try_modload (parent, fs_info, - dentry->d_name.name, dentry->d_name.len, txt) < 0) + dentry->d_name.name, dentry->d_name.len, &tmp) < 0) { d_add (dentry, NULL); return NULL; } /* devfsd claimed success */ dentry->d_op = &devfs_wait_dops; - dentry->d_fsdata = de; d_add (dentry, NULL); /* Open the floodgates */ /* Unlock directory semaphore, which will release any waiters. They will get the hashed dentry, and may be forced to wait for revalidation */ up (&dir->i_sem); - devfs_d_revalidate_wait (dentry, 0); /* I might have to wait too */ + devfs_d_revalidate_wait (dentry, 0); /* I might have to wait too */ down (&dir->i_sem); /* Grab it again because them's the rules */ /* If someone else has been so kind as to make the inode, we go home early */ if (dentry->d_inode) return NULL; - if (de && !de->registered) return NULL; - if (de == NULL) - de = search_for_entry_in_dir (parent, dentry->d_name.name, - dentry->d_name.len, FALSE); + read_lock (&parent->u.dir.lock); + de = _devfs_search_dir (parent, dentry->d_name.name, + dentry->d_name.len); + read_unlock (&parent->u.dir.lock); if (de == NULL) return NULL; /* OK, there's an entry now, but no VFS inode yet */ } @@ -2771,58 +2872,47 @@ d_add (dentry, NULL); /* Open the floodgates */ } /* Create an inode, now that the driver information is available */ - if (de->no_persistence) update_devfs_inode_from_entry (de); - else if (de->inode.ctime == 0) update_devfs_inode_from_entry (de); - else de->inode.mode = - (de->mode & ~S_IALLUGO) | (de->inode.mode & S_IALLUGO); - if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) - return ERR_PTR (-ENOMEM); + inode = get_vfs_inode (dir->i_sb, de, dentry); + devfs_put (de); + if (!inode) return ERR_PTR (-ENOMEM); #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_I_LOOKUP) printk ("%s: lookup(): new VFS inode(%u): %p devfs_entry: %p\n", DEVFS_NAME, de->inode.ino, inode, de); #endif d_instantiate (dentry, inode); - /* Unlock directory semaphore, which will release any waiters. They will - get the hashed dentry, and may be forced to wait for revalidation */ - up (&dir->i_sem); if (dentry->d_op == &devfs_wait_dops) - devfs_d_revalidate_wait (dentry, 0); /* I might have to wait too */ - down (&dir->i_sem); /* Grab it again because them's the rules */ + { /* Unlock directory semaphore, which will release any waiters. They + will get the hashed dentry, and may be forced to wait for + revalidation */ + up (&dir->i_sem); + devfs_d_revalidate_wait (dentry, 0); /* I might have to wait too */ + down (&dir->i_sem); /* Grab it again because them's the rules */ + } return NULL; } /* End Function devfs_lookup */ static int devfs_unlink (struct inode *dir, struct dentry *dentry) { + int unhooked; struct devfs_entry *de; struct inode *inode = dentry->d_inode; #ifdef CONFIG_DEVFS_DEBUG - char txt[STRING_LENGTH]; - if (devfs_debug & DEBUG_I_UNLINK) - { - memset (txt, 0, STRING_LENGTH); - memcpy (txt, dentry->d_name.name, dentry->d_name.len); - txt[STRING_LENGTH - 1] = '\0'; - printk ("%s: unlink(%s)\n", DEVFS_NAME, txt); - } + printk ("%s: unlink(%s)\n", DEVFS_NAME, dentry->d_name.name); #endif - - de = get_devfs_entry_from_vfs_inode (dentry->d_inode, TRUE); + de = get_devfs_entry_from_vfs_inode (inode); if (de == NULL) return -ENOENT; - devfsd_notify_one (de, DEVFSD_NOTIFY_DELETE, inode->i_mode, - inode->i_uid, inode->i_gid, dir->i_sb->u.generic_sbp); - de->registered = FALSE; - de->hide = TRUE; - if ( S_ISLNK (de->mode) ) - { - down_write (&symlink_rwsem); - if (de->u.symlink.linkname) kfree (de->u.symlink.linkname); - de->u.symlink.linkname = NULL; - up_write (&symlink_rwsem); - } - free_dentries (de); + if (!de->vfs_created) return -EPERM; + write_lock (&de->parent->u.dir.lock); + unhooked = _devfs_unhook (de); + write_unlock (&de->parent->u.dir.lock); + if (!unhooked) return -ENOENT; + devfsd_notify_de (de, DEVFSD_NOTIFY_DELETE, inode->i_mode, + inode->i_uid, inode->i_gid, dir->i_sb->u.generic_sbp); + free_dentry (de); + devfs_put (de); return 0; } /* End Function devfs_unlink */ @@ -2836,7 +2926,7 @@ fs_info = dir->i_sb->u.generic_sbp; /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode (dir, TRUE); + parent = get_devfs_entry_from_vfs_inode (dir); if (parent == NULL) return -ENOENT; err = devfs_do_symlink (parent, dentry->d_name.name, DEVFS_FL_NONE, symname, &de, NULL); @@ -2846,7 +2936,9 @@ DEVFS_NAME, err); #endif if (err < 0) return err; - de->inode.mode = de->mode; + de->vfs_created = TRUE; + de->inode.uid = current->euid; + de->inode.gid = current->egid; de->inode.atime = CURRENT_TIME; de->inode.mtime = CURRENT_TIME; de->inode.ctime = CURRENT_TIME; @@ -2857,50 +2949,33 @@ printk ("%s: symlink(): new VFS inode(%u): %p dentry: %p\n", DEVFS_NAME, de->inode.ino, inode, dentry); #endif - de->hide = FALSE; d_instantiate (dentry, inode); - devfsd_notify_one (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); + devfsd_notify_de (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, + inode->i_uid, inode->i_gid, fs_info); return 0; } /* End Function devfs_symlink */ static int devfs_mkdir (struct inode *dir, struct dentry *dentry, int mode) { - int is_new; + int err; struct fs_info *fs_info; struct devfs_entry *parent, *de; struct inode *inode; - mode = (mode & ~S_IFMT) | S_IFDIR; + mode = (mode & ~S_IFMT) | S_IFDIR; /* VFS doesn't pass S_IFMT part */ fs_info = dir->i_sb->u.generic_sbp; - /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode (dir, TRUE); + parent = get_devfs_entry_from_vfs_inode (dir); if (parent == NULL) return -ENOENT; - /* Try to reclaim an existing devfs entry, create if there isn't one */ - de = search_for_entry (parent, dentry->d_name.name, dentry->d_name.len, - FALSE, TRUE, &is_new, FALSE); - if (de == NULL) return -ENOMEM; - if (de->registered) - { - printk ("%s: mkdir(): existing entry\n", DEVFS_NAME); - return -EEXIST; - } - de->hide = FALSE; - if (!S_ISDIR (de->mode) && !is_new) - { - /* Transmogrifying an old entry */ - de->u.dir.first = NULL; - de->u.dir.last = NULL; - } - de->mode = mode; - de->u.dir.num_removable = 0; - de->inode.mode = mode; + de = _devfs_alloc_entry (dentry->d_name.name, dentry->d_name.len, mode); + if (!de) return -ENOMEM; + de->vfs_created = TRUE; + if ( ( err = _devfs_append_entry (parent, de, FALSE, NULL) ) != 0 ) + return err; de->inode.uid = current->euid; de->inode.gid = current->egid; de->inode.atime = CURRENT_TIME; de->inode.mtime = CURRENT_TIME; de->inode.ctime = CURRENT_TIME; - de->registered = TRUE; if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) return -ENOMEM; #ifdef CONFIG_DEVFS_DEBUG @@ -2909,100 +2984,73 @@ DEVFS_NAME, de->inode.ino, inode, dentry); #endif d_instantiate (dentry, inode); - devfsd_notify_one (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); + devfsd_notify_de (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, + inode->i_uid, inode->i_gid, fs_info); return 0; } /* End Function devfs_mkdir */ static int devfs_rmdir (struct inode *dir, struct dentry *dentry) { - int has_children = FALSE; + int err = 0; + struct devfs_entry *de; struct fs_info *fs_info; - struct devfs_entry *de, *child; struct inode *inode = dentry->d_inode; if (dir->i_sb->u.generic_sbp != inode->i_sb->u.generic_sbp) return -EINVAL; fs_info = dir->i_sb->u.generic_sbp; - de = get_devfs_entry_from_vfs_inode (inode, TRUE); + de = get_devfs_entry_from_vfs_inode (inode); if (de == NULL) return -ENOENT; if ( !S_ISDIR (de->mode) ) return -ENOTDIR; - for (child = de->u.dir.first; child != NULL; child = child->next) - { - if (child->registered) - { - has_children = TRUE; - break; - } - } - if (has_children) return -ENOTEMPTY; - devfsd_notify_one (de, DEVFSD_NOTIFY_DELETE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); - de->hide = TRUE; - de->registered = FALSE; - free_dentries (de); + if (!de->vfs_created) return -EPERM; + /* First ensure the directory is empty and will stay thay way */ + write_lock (&de->u.dir.lock); + de->u.dir.no_more_additions = TRUE; + if (de->u.dir.first) err = -ENOTEMPTY; + write_unlock (&de->u.dir.lock); + if (err) return err; + /* Now unhook the directory from it's parent */ + write_lock (&de->parent->u.dir.lock); + if ( !_devfs_unhook (de) ) err = -ENOENT; + write_unlock (&de->parent->u.dir.lock); + if (err) return err; + devfsd_notify_de (de, DEVFSD_NOTIFY_DELETE, inode->i_mode, + inode->i_uid, inode->i_gid, fs_info); + free_dentry (de); + devfs_put (de); return 0; } /* End Function devfs_rmdir */ static int devfs_mknod (struct inode *dir, struct dentry *dentry, int mode, int rdev) { - int is_new; + int err; struct fs_info *fs_info; struct devfs_entry *parent, *de; struct inode *inode; #ifdef CONFIG_DEVFS_DEBUG - char txt[STRING_LENGTH]; - if (devfs_debug & DEBUG_I_MKNOD) - { - memset (txt, 0, STRING_LENGTH); - memcpy (txt, dentry->d_name.name, dentry->d_name.len); - txt[STRING_LENGTH - 1] = '\0'; printk ("%s: mknod(%s): mode: 0%o dev: %d\n", - DEVFS_NAME, txt, mode, rdev); - } + DEVFS_NAME, dentry->d_name.name, mode, rdev); #endif - fs_info = dir->i_sb->u.generic_sbp; - /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode (dir, TRUE); + parent = get_devfs_entry_from_vfs_inode (dir); if (parent == NULL) return -ENOENT; - /* Try to reclaim an existing devfs entry, create if there isn't one */ - de = search_for_entry (parent, dentry->d_name.name, dentry->d_name.len, - FALSE, TRUE, &is_new, FALSE); - if (de == NULL) return -ENOMEM; - if (de->registered) - { - printk ("%s: mknod(): existing entry\n", DEVFS_NAME); - return -EEXIST; - } - de->info = NULL; - de->mode = mode; + de = _devfs_alloc_entry (dentry->d_name.name, dentry->d_name.len, mode); + if (!de) return -ENOMEM; + de->vfs_created = TRUE; if ( S_ISBLK (mode) || S_ISCHR (mode) ) { de->u.fcb.u.device.major = MAJOR (rdev); de->u.fcb.u.device.minor = MINOR (rdev); - de->u.fcb.default_uid = current->euid; - de->u.fcb.default_gid = current->egid; - de->u.fcb.ops = NULL; - de->u.fcb.auto_owner = FALSE; - de->u.fcb.aopen_notify = FALSE; - de->u.fcb.open = FALSE; - } - else if ( S_ISFIFO (mode) ) - { - de->u.fifo.uid = current->euid; - de->u.fifo.gid = current->egid; } - de->hide = FALSE; - de->inode.mode = mode; + if ( ( err = _devfs_append_entry (parent, de, FALSE, NULL) ) != 0 ) + return err; de->inode.uid = current->euid; de->inode.gid = current->egid; de->inode.atime = CURRENT_TIME; de->inode.mtime = CURRENT_TIME; de->inode.ctime = CURRENT_TIME; - de->registered = TRUE; if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) return -ENOMEM; #ifdef CONFIG_DEVFS_DEBUG @@ -3011,8 +3059,8 @@ DEVFS_NAME, de->inode.ino, inode, dentry); #endif d_instantiate (dentry, inode); - devfsd_notify_one (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); + devfsd_notify_de (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, + inode->i_uid, inode->i_gid, fs_info); return 0; } /* End Function devfs_mknod */ @@ -3021,12 +3069,9 @@ int err; struct devfs_entry *de; - de = get_devfs_entry_from_vfs_inode (dentry->d_inode, TRUE); + de = get_devfs_entry_from_vfs_inode (dentry->d_inode); if (!de) return -ENODEV; - down_read (&symlink_rwsem); - err = de->registered ? vfs_readlink (dentry, buffer, buflen, - de->u.symlink.linkname) : -ENODEV; - up_read (&symlink_rwsem); + err = vfs_readlink (dentry, buffer, buflen, de->u.symlink.linkname); return err; } /* End Function devfs_readlink */ @@ -3034,25 +3079,10 @@ { int err; struct devfs_entry *de; - char *copy; - de = get_devfs_entry_from_vfs_inode (dentry->d_inode, TRUE); + de = get_devfs_entry_from_vfs_inode (dentry->d_inode); if (!de) return -ENODEV; - down_read (&symlink_rwsem); - if (!de->registered) - { - up_read (&symlink_rwsem); - return -ENODEV; - } - copy = kmalloc (de->u.symlink.length + 1, GFP_KERNEL); - if (copy) memcpy (copy, de->u.symlink.linkname, de->u.symlink.length + 1); - up_read (&symlink_rwsem); - if (copy) - { - err = vfs_follow_link (nd, copy); - kfree (copy); - } - else err = -ENOMEM; + err = vfs_follow_link (nd, de->u.symlink.linkname); return err; } /* End Function devfs_follow_link */ @@ -3084,7 +3114,7 @@ { struct inode *root_inode = NULL; - if (get_root_entry () == NULL) goto out_no_root; + if (_devfs_get_root_entry () == NULL) goto out_no_root; atomic_set (&fs_info.devfsd_overrun_count, 0); init_waitqueue_head (&fs_info.devfsd_wait_queue); init_waitqueue_head (&fs_info.revalidate_wait_queue); @@ -3099,7 +3129,7 @@ sb->s_root = d_alloc_root (root_inode); if (!sb->s_root) goto out_no_root; #ifdef CONFIG_DEVFS_DEBUG - if (devfs_debug & DEBUG_DISABLED) + if (devfs_debug & DEBUG_S_READ) printk ("%s: read super, made devfs ptr: %p\n", DEVFS_NAME, sb->u.generic_sbp); #endif @@ -3123,6 +3153,7 @@ int done = FALSE; int ival; loff_t pos, devname_offset, tlen, rpos; + devfs_handle_t de; struct devfsd_buf_entry *entry; struct fs_info *fs_info = file->f_dentry->d_inode->i_sb->u.generic_sbp; struct devfsd_notify_struct *info = fs_info->devfsd_info; @@ -3149,40 +3180,28 @@ current->state = TASK_RUNNING; return -EINTR; } - set_current_state(TASK_INTERRUPTIBLE); + set_current_state (TASK_INTERRUPTIBLE); } remove_wait_queue (&fs_info->devfsd_wait_queue, &wait); current->state = TASK_RUNNING; /* Now play with the data */ ival = atomic_read (&fs_info->devfsd_overrun_count); - if (ival > 0) atomic_sub (ival, &fs_info->devfsd_overrun_count); info->overrun_count = ival; - entry = (struct devfsd_buf_entry *) fs_info->devfsd_buffer + - fs_info->devfsd_buf_out; + entry = fs_info->devfsd_first_event; info->type = entry->type; info->mode = entry->mode; info->uid = entry->uid; info->gid = entry->gid; - if (entry->type == DEVFSD_NOTIFY_LOOKUP) + de = entry->de; + if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) ) { - info->namelen = strlen (entry->data); - pos = 0; - memcpy (info->devname, entry->data, info->namelen + 1); - } - else - { - devfs_handle_t de = entry->data; - - if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) || S_ISREG (de->mode) ) - { - info->major = de->u.fcb.u.device.major; - info->minor = de->u.fcb.u.device.minor; - } - pos = devfs_generate_path (de, info->devname, DEVFS_PATHLEN); - if (pos < 0) return pos; - info->namelen = DEVFS_PATHLEN - pos - 1; - if (info->mode == 0) info->mode = de->mode; + info->major = de->u.fcb.u.device.major; + info->minor = de->u.fcb.u.device.minor; } + pos = devfs_generate_path (de, info->devname, DEVFS_PATHLEN); + if (pos < 0) return pos; + info->namelen = DEVFS_PATHLEN - pos - 1; + if (info->mode == 0) info->mode = de->mode; devname_offset = info->devname - (char *) info; rpos = *ppos; if (rpos < devname_offset) @@ -3214,10 +3233,13 @@ tlen = rpos - *ppos; if (done) { - unsigned int next_pos = fs_info->devfsd_buf_out + 1; - - if (next_pos >= devfsd_buf_size) next_pos = 0; - fs_info->devfsd_buf_out = next_pos; + spin_lock (&fs_info->devfsd_buffer_lock); + fs_info->devfsd_first_event = entry->next; + if (entry->next == NULL) fs_info->devfsd_last_event = NULL; + spin_unlock (&fs_info->devfsd_buffer_lock); + for (; de != NULL; de = de->parent) devfs_put (de); + kmem_cache_free (devfsd_buf_cache, entry); + if (ival > 0) atomic_sub (ival, &fs_info->devfsd_overrun_count); *ppos = 0; } else *ppos = rpos; @@ -3253,15 +3275,13 @@ fs_info->devfsd_task = current; spin_unlock (&lock); fs_info->devfsd_file = file; - fs_info->devfsd_buffer = (void *) __get_free_page (GFP_KERNEL); fs_info->devfsd_info = kmalloc (sizeof *fs_info->devfsd_info, GFP_KERNEL); - if (!fs_info->devfsd_buffer || !fs_info->devfsd_info) + if (!fs_info->devfsd_info) { devfsd_close (inode, file); return -ENOMEM; } - fs_info->devfsd_buf_out = fs_info->devfsd_buf_in; } else if (fs_info->devfsd_task != current) return -EBUSY; fs_info->devfsd_event_mask = arg; /* Let the masses come forth */ @@ -3284,29 +3304,48 @@ static int devfsd_close (struct inode *inode, struct file *file) { - unsigned long flags; + struct devfsd_buf_entry *entry; struct fs_info *fs_info = inode->i_sb->u.generic_sbp; if (fs_info->devfsd_file != file) return 0; fs_info->devfsd_event_mask = 0; fs_info->devfsd_file = NULL; - spin_lock_irqsave (&fs_info->devfsd_buffer_lock, flags); - if (fs_info->devfsd_buffer) - { - free_page ( (unsigned long) fs_info->devfsd_buffer ); - fs_info->devfsd_buffer = NULL; - } + spin_lock (&fs_info->devfsd_buffer_lock); + entry = fs_info->devfsd_first_event; + fs_info->devfsd_first_event = NULL; + fs_info->devfsd_last_event = NULL; if (fs_info->devfsd_info) { kfree (fs_info->devfsd_info); fs_info->devfsd_info = NULL; } - spin_unlock_irqrestore (&fs_info->devfsd_buffer_lock, flags); + spin_unlock (&fs_info->devfsd_buffer_lock); fs_info->devfsd_task = NULL; wake_up (&fs_info->revalidate_wait_queue); + for (; entry; entry = entry->next) + kmem_cache_free (devfsd_buf_cache, entry); return 0; } /* End Function devfsd_close */ +#ifdef CONFIG_DEVFS_DEBUG +static ssize_t stat_read (struct file *file, char *buf, size_t len, + loff_t *ppos) +{ + ssize_t num; + char txt[80]; + + num = sprintf (txt, "Number of entries: %u number of bytes: %u\n", + stat_num_entries, stat_num_bytes) + 1; + /* Can't seek (pread) on this device */ + if (ppos != &file->f_pos) return -ESPIPE; + if (*ppos >= num) return 0; + if (*ppos + len > num) len = num - *ppos; + if ( copy_to_user (buf, txt + *ppos, len) ) return -EFAULT; + *ppos += len; + return len; +} /* End Function stat_read */ +#endif + static int __init init_devfs_fs (void) { @@ -3333,6 +3372,9 @@ { int err; + devfsd_buf_cache = kmem_cache_create ("devfsd_event", + sizeof (struct devfsd_buf_entry), + 0, 0, NULL, NULL); if ( !(boot_options & OPTION_MOUNT) ) return; err = do_mount ("none", "/dev", "devfs", 0, ""); if (err == 0) printk ("Mounted devfs on /dev\n"); diff -u --recursive --new-file v2.5.0/linux/fs/ext2/inode.c linux/fs/ext2/inode.c --- v2.5.0/linux/fs/ext2/inode.c Wed Nov 21 14:07:25 2001 +++ linux/fs/ext2/inode.c Tue Nov 27 09:23:27 2001 @@ -505,7 +505,7 @@ * reachable from inode. */ -static int ext2_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create) +static int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int err = -EIO; int offsets[4]; diff -u --recursive --new-file v2.5.0/linux/fs/ext3/inode.c linux/fs/ext3/inode.c --- v2.5.0/linux/fs/ext3/inode.c Fri Nov 9 14:25:04 2001 +++ linux/fs/ext3/inode.c Tue Nov 27 09:23:27 2001 @@ -719,7 +719,7 @@ */ static int ext3_get_block_handle(handle_t *handle, struct inode *inode, - long iblock, + sector_t iblock, struct buffer_head *bh_result, int create) { int err = -EIO; @@ -823,7 +823,7 @@ goto reread; } -static int ext3_get_block(struct inode *inode, long iblock, +static int ext3_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { handle_t *handle = 0; diff -u --recursive --new-file v2.5.0/linux/fs/inode.c linux/fs/inode.c --- v2.5.0/linux/fs/inode.c Thu Nov 22 10:38:31 2001 +++ linux/fs/inode.c Sat Nov 24 11:26:37 2001 @@ -1065,24 +1065,27 @@ if (inode->i_state != I_CLEAR) BUG(); } else { - if (!list_empty(&inode->i_hash) && sb && sb->s_root) { + if (!list_empty(&inode->i_hash)) { if (!(inode->i_state & (I_DIRTY|I_LOCK))) { list_del(&inode->i_list); list_add(&inode->i_list, &inode_unused); } inodes_stat.nr_unused++; spin_unlock(&inode_lock); - return; - } else { - list_del_init(&inode->i_list); + if (!sb || sb->s_flags & MS_ACTIVE) + return; + write_inode_now(inode, 1); + spin_lock(&inode_lock); + inodes_stat.nr_unused--; list_del_init(&inode->i_hash); - inode->i_state|=I_FREEING; - inodes_stat.nr_inodes--; - spin_unlock(&inode_lock); - if (inode->i_data.nrpages) - truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); } + list_del_init(&inode->i_list); + inode->i_state|=I_FREEING; + inodes_stat.nr_inodes--; + spin_unlock(&inode_lock); + if (inode->i_data.nrpages) + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); } destroy_inode(inode); } diff -u --recursive --new-file v2.5.0/linux/fs/iobuf.c linux/fs/iobuf.c --- v2.5.0/linux/fs/iobuf.c Fri Apr 27 14:23:25 2001 +++ linux/fs/iobuf.c Tue Nov 27 09:23:27 2001 @@ -8,70 +8,45 @@ #include #include -#include -void end_kio_request(struct kiobuf *kiobuf, int uptodate) +int end_kio_request(struct kiobuf *kiobuf, int uptodate) { + int ret = 1; + if ((!uptodate) && !kiobuf->errno) kiobuf->errno = -EIO; if (atomic_dec_and_test(&kiobuf->io_count)) { + ret = 0; if (kiobuf->end_io) kiobuf->end_io(kiobuf); wake_up(&kiobuf->wait_queue); } + + return ret; } static void kiobuf_init(struct kiobuf *iobuf) { memset(iobuf, 0, sizeof(*iobuf)); init_waitqueue_head(&iobuf->wait_queue); + atomic_set(&iobuf->io_count, 0); iobuf->array_len = KIO_STATIC_PAGES; iobuf->maplist = iobuf->map_array; } -int alloc_kiobuf_bhs(struct kiobuf * kiobuf) -{ - int i; - - for (i = 0; i < KIO_MAX_SECTORS; i++) - if (!(kiobuf->bh[i] = kmem_cache_alloc(bh_cachep, SLAB_KERNEL))) { - while (i--) { - kmem_cache_free(bh_cachep, kiobuf->bh[i]); - kiobuf->bh[i] = NULL; - } - return -ENOMEM; - } - return 0; -} - -void free_kiobuf_bhs(struct kiobuf * kiobuf) -{ - int i; - - for (i = 0; i < KIO_MAX_SECTORS; i++) { - kmem_cache_free(bh_cachep, kiobuf->bh[i]); - kiobuf->bh[i] = NULL; - } -} - int alloc_kiovec(int nr, struct kiobuf **bufp) { int i; struct kiobuf *iobuf; for (i = 0; i < nr; i++) { - iobuf = vmalloc(sizeof(struct kiobuf)); + iobuf = kmalloc(sizeof(struct kiobuf), GFP_KERNEL); if (!iobuf) { free_kiovec(i, bufp); return -ENOMEM; } kiobuf_init(iobuf); - if (alloc_kiobuf_bhs(iobuf)) { - vfree(iobuf); - free_kiovec(i, bufp); - return -ENOMEM; - } bufp[i] = iobuf; } @@ -89,8 +64,7 @@ unlock_kiovec(1, &iobuf); if (iobuf->array_len > KIO_STATIC_PAGES) kfree (iobuf->maplist); - free_kiobuf_bhs(iobuf); - vfree(bufp[i]); + kfree(bufp[i]); } } diff -u --recursive --new-file v2.5.0/linux/fs/isofs/inode.c linux/fs/isofs/inode.c --- v2.5.0/linux/fs/isofs/inode.c Thu Oct 25 13:53:53 2001 +++ linux/fs/isofs/inode.c Tue Nov 27 09:23:27 2001 @@ -888,7 +888,7 @@ * or getblk() if they are not. Returns the number of blocks inserted * (0 == error.) */ -int isofs_get_blocks(struct inode *inode, long iblock, +int isofs_get_blocks(struct inode *inode, sector_t iblock, struct buffer_head **bh_result, unsigned long nblocks) { unsigned long b_off; @@ -976,7 +976,7 @@ /* * Used by the standard interfaces. */ -static int isofs_get_block(struct inode *inode, long iblock, +static int isofs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { if ( create ) { diff -u --recursive --new-file v2.5.0/linux/fs/namespace.c linux/fs/namespace.c --- v2.5.0/linux/fs/namespace.c Sun Nov 11 11:23:14 2001 +++ linux/fs/namespace.c Sat Nov 24 13:17:18 2001 @@ -19,9 +19,6 @@ #include -#include -#include -#include #include struct vfsmount *do_kern_mount(char *type, int flags, char *name, void *data); @@ -198,50 +195,10 @@ seq_escape(m, s, " \t\n\\"); } -static void show_nfs_mount(struct seq_file *m, struct vfsmount *mnt) -{ - static struct proc_nfs_info { - int flag; - char *str; - char *nostr; - } nfs_info[] = { - { NFS_MOUNT_SOFT, ",soft", ",hard" }, - { NFS_MOUNT_INTR, ",intr", "" }, - { NFS_MOUNT_POSIX, ",posix", "" }, - { NFS_MOUNT_TCP, ",tcp", ",udp" }, - { NFS_MOUNT_NOCTO, ",nocto", "" }, - { NFS_MOUNT_NOAC, ",noac", "" }, - { NFS_MOUNT_NONLM, ",nolock", ",lock" }, - { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" }, - { 0, NULL, NULL } - }; - struct proc_nfs_info *nfs_infop; - struct nfs_server *nfss = &mnt->mnt_sb->u.nfs_sb.s_server; - - seq_printf(m, ",v%d", nfss->rpc_ops->version); - seq_printf(m, ",rsize=%d", nfss->rsize); - seq_printf(m, ",wsize=%d", nfss->wsize); - if (nfss->acregmin != 3*HZ) - seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ) - seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ) - seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ) - seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); - for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { - if (nfss->flags & nfs_infop->flag) - seq_puts(m, nfs_infop->str); - else - seq_puts(m, nfs_infop->nostr); - } - seq_puts(m, ",addr="); - mangle(m, nfss->hostname); -} - static int show_vfsmnt(struct seq_file *m, void *v) { struct vfsmount *mnt = v; + int err = 0; static struct proc_fs_info { int flag; char *str; @@ -281,10 +238,10 @@ if (mnt->mnt_flags & fs_infop->flag) seq_puts(m, fs_infop->str); } - if (strcmp("nfs", mnt->mnt_sb->s_type->name) == 0) - show_nfs_mount(m, mnt); + if (mnt->mnt_sb->s_op->show_options) + err = mnt->mnt_sb->s_op->show_options(m, mnt); seq_puts(m, " 0 0\n"); - return 0; + return err; } struct seq_operations mounts_op = { diff -u --recursive --new-file v2.5.0/linux/fs/nfs/inode.c linux/fs/nfs/inode.c --- v2.5.0/linux/fs/nfs/inode.c Fri Nov 9 14:28:15 2001 +++ linux/fs/nfs/inode.c Sat Nov 24 13:17:18 2001 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,7 @@ static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct statfs *); +static int nfs_show_options(struct seq_file *, struct vfsmount *); static struct super_operations nfs_sops = { read_inode: nfs_read_inode, @@ -60,6 +62,7 @@ statfs: nfs_statfs, clear_inode: nfs_clear_inode, umount_begin: nfs_umount_begin, + show_options: nfs_show_options, }; /* @@ -551,6 +554,48 @@ out_err: printk("nfs_statfs: statfs error = %d\n", -error); buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; + return 0; +} + +static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + static struct proc_nfs_info { + int flag; + char *str; + char *nostr; + } nfs_info[] = { + { NFS_MOUNT_SOFT, ",soft", ",hard" }, + { NFS_MOUNT_INTR, ",intr", "" }, + { NFS_MOUNT_POSIX, ",posix", "" }, + { NFS_MOUNT_TCP, ",tcp", ",udp" }, + { NFS_MOUNT_NOCTO, ",nocto", "" }, + { NFS_MOUNT_NOAC, ",noac", "" }, + { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" }, + { 0, NULL, NULL } + }; + struct proc_nfs_info *nfs_infop; + struct nfs_server *nfss = &mnt->mnt_sb->u.nfs_sb.s_server; + + seq_printf(m, ",v%d", nfss->rpc_ops->version); + seq_printf(m, ",rsize=%d", nfss->rsize); + seq_printf(m, ",wsize=%d", nfss->wsize); + if (nfss->acregmin != 3*HZ) + seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); + if (nfss->acregmax != 60*HZ) + seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); + if (nfss->acdirmin != 30*HZ) + seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); + if (nfss->acdirmax != 60*HZ) + seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); + for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { + if (nfss->flags & nfs_infop->flag) + seq_puts(m, nfs_infop->str); + else + seq_puts(m, nfs_infop->nostr); + } + seq_puts(m, ",addr="); + seq_escape(m, nfss->hostname, " \t\n\\"); return 0; } diff -u --recursive --new-file v2.5.0/linux/fs/partitions/check.c linux/fs/partitions/check.c --- v2.5.0/linux/fs/partitions/check.c Thu Oct 11 17:25:10 2001 +++ linux/fs/partitions/check.c Tue Nov 27 09:23:27 2001 @@ -1,4 +1,6 @@ /* + * fs/partitions/check.c + * * Code extracted from drivers/block/genhd.c * Copyright (C) 1991-1998 Linus Torvalds * Re-organised Feb 1998 Russell King @@ -34,8 +36,6 @@ #include "ibm.h" #include "ultrix.h" -extern int *blk_size[]; - int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ static int (*check_part[])(struct gendisk *hd, struct block_device *bdev, unsigned long first_sect, int first_minor) = { @@ -369,38 +369,50 @@ { if (!gdev) return; - grok_partitions(gdev, MINOR(dev)>>gdev->minor_shift, minors, size); + grok_partitions(dev, size); } -void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size) +void grok_partitions(kdev_t dev, long size) { - int i; - int first_minor = drive << dev->minor_shift; - int end_minor = first_minor + dev->max_p; + int i, minors, first_minor, end_minor; + struct gendisk *g = get_gendisk(dev); + + if (!g) + return; + + minors = 1 << g->minor_shift; + first_minor = MINOR(dev); + if (first_minor & (minors-1)) { + printk("grok_partitions: bad device 0x%02x:%02x\n", + MAJOR(dev), first_minor); + first_minor &= ~(minors-1); + } + end_minor = first_minor + minors; + + if (!g->sizes) + blk_size[g->major] = NULL; - if(!dev->sizes) - blk_size[dev->major] = NULL; + g->part[first_minor].nr_sects = size; - dev->part[first_minor].nr_sects = size; /* No such device or no minors to use for partitions */ if (!size || minors == 1) return; - if (dev->sizes) { - dev->sizes[first_minor] = size >> (BLOCK_SIZE_BITS - 9); + if (g->sizes) { + g->sizes[first_minor] = size >> (BLOCK_SIZE_BITS - 9); for (i = first_minor + 1; i < end_minor; i++) - dev->sizes[i] = 0; + g->sizes[i] = 0; } - blk_size[dev->major] = dev->sizes; - check_partition(dev, MKDEV(dev->major, first_minor), 1 + first_minor); + blk_size[g->major] = g->sizes; + check_partition(g, MKDEV(g->major, first_minor), 1 + first_minor); /* * We need to set the sizes array before we will be able to access * any of the partitions on this device. */ - if (dev->sizes != NULL) { /* optional safeguard in ll_rw_blk.c */ + if (g->sizes != NULL) { /* optional safeguard in ll_rw_blk.c */ for (i = first_minor; i < end_minor; i++) - dev->sizes[i] = dev->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9); + g->sizes[i] = g->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9); } } @@ -425,4 +437,44 @@ } p->v = NULL; return NULL; +} + +int wipe_partitions(kdev_t dev) +{ + struct gendisk *g; + kdev_t devp; + int p, major, minor, minor0, max_p, res; + + g = get_gendisk(dev); + if (g == NULL) + return -EINVAL; + + max_p = 1 << g->minor_shift; + major = MAJOR(dev); + minor = MINOR(dev); + minor0 = minor & ~(max_p - 1); + if (minor0 != minor) /* for now only whole-disk reread */ + return -EINVAL; /* %%% later.. */ + + /* invalidate stuff */ + for (p = max_p - 1; p >= 0; p--) { + minor = minor0 + p; + devp = MKDEV(major,minor); +#if 0 /* %%% superfluous? */ + if (g->part[minor].nr_sects == 0) + continue; +#endif + res = invalidate_device(devp, 1); + if (res) + return res; + g->part[minor].start_sect = 0; + g->part[minor].nr_sects = 0; + } + + /* some places do blksize_size[major][minor] = 1024, + as preparation for reading partition table - superfluous */ + /* sd.c used to set blksize_size to 2048 in case + rscsi_disks[target].device->sector_size == 2048 */ + + return 0; } diff -u --recursive --new-file v2.5.0/linux/fs/partitions/check.h linux/fs/partitions/check.h --- v2.5.0/linux/fs/partitions/check.h Mon Oct 1 20:03:26 2001 +++ linux/fs/partitions/check.h Tue Nov 27 09:23:27 2001 @@ -1,5 +1,5 @@ /* - * add_partition adds a partitions details to the devices partition + * add_gd_partition adds a partitions details to the devices partition * description. */ void add_gd_partition(struct gendisk *hd, int minor, int start, int size); diff -u --recursive --new-file v2.5.0/linux/fs/readdir.c linux/fs/readdir.c --- v2.5.0/linux/fs/readdir.c Sun Aug 12 14:59:08 2001 +++ linux/fs/readdir.c Mon Nov 26 16:41:46 2001 @@ -79,6 +79,10 @@ while(1) { struct dentry *de = list_entry(list, struct dentry, d_child); + /* + * See comment on top of function on why we + * can just drop the lock here.. + */ if (!list_empty(&de->d_hash) && de->d_inode) { spin_unlock(&dcache_lock); if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino, DT_UNKNOWN) < 0) diff -u --recursive --new-file v2.5.0/linux/fs/reiserfs/inode.c linux/fs/reiserfs/inode.c --- v2.5.0/linux/fs/reiserfs/inode.c Tue Oct 30 15:11:34 2001 +++ linux/fs/reiserfs/inode.c Tue Nov 27 09:23:27 2001 @@ -390,7 +390,7 @@ // this is called to create file map. So, _get_block_create_0 will not // read direct item -int reiserfs_bmap (struct inode * inode, long block, +int reiserfs_bmap (struct inode * inode, sector_t block, struct buffer_head * bh_result, int create) { if (!file_capable (inode, block)) @@ -420,7 +420,7 @@ ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, ** don't use this function. */ -static int reiserfs_get_block_create_0 (struct inode * inode, long block, +static int reiserfs_get_block_create_0 (struct inode * inode, sector_t block, struct buffer_head * bh_result, int create) { return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ; } @@ -511,7 +511,7 @@ // determine which parts are derivative, if any, understanding that // there are only so many ways to code to a given interface. // -int reiserfs_get_block (struct inode * inode, long block, +int reiserfs_get_block (struct inode * inode, sector_t block, struct buffer_head * bh_result, int create) { int repeat, retval; @@ -1963,7 +1963,7 @@ // // this is exactly what 2.3.99-pre9's ext2_bmap is // -static int reiserfs_aop_bmap(struct address_space *as, long block) { +static int reiserfs_aop_bmap(struct address_space *as, sector_t block) { return generic_block_bmap(as, block, reiserfs_bmap) ; } diff -u --recursive --new-file v2.5.0/linux/fs/super.c linux/fs/super.c --- v2.5.0/linux/fs/super.c Wed Nov 21 14:05:29 2001 +++ linux/fs/super.c Sat Nov 24 11:26:37 2001 @@ -462,6 +462,7 @@ lock_super(s); if (!type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) goto out_fail; + s->s_flags |= MS_ACTIVE; unlock_super(s); /* tell bdcache that we are going to keep this one */ if (bdev) @@ -614,6 +615,7 @@ lock_super(s); if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) goto out_fail; + s->s_flags |= MS_ACTIVE; unlock_super(s); get_filesystem(fs_type); path_release(&nd); @@ -695,6 +697,7 @@ lock_super(s); if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) goto out_fail; + s->s_flags |= MS_ACTIVE; unlock_super(s); get_filesystem(fs_type); return s; @@ -739,6 +742,7 @@ dput(root); fsync_super(sb); lock_super(sb); + sb->s_flags &= ~MS_ACTIVE; invalidate_inodes(sb); /* bad name - it should be evict_inodes() */ if (sop) { if (sop->write_super && sb->s_dirt) diff -u --recursive --new-file v2.5.0/linux/fs/udf/inode.c linux/fs/udf/inode.c --- v2.5.0/linux/fs/udf/inode.c Fri Oct 12 13:48:42 2001 +++ linux/fs/udf/inode.c Tue Nov 27 09:23:27 2001 @@ -61,7 +61,7 @@ static void udf_update_extents(struct inode *, long_ad [EXTENT_MERGE_SIZE], int, int, lb_addr, Uint32, struct buffer_head **); -static int udf_get_block(struct inode *, long, struct buffer_head *, int); +static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); /* * udf_put_inode @@ -314,7 +314,7 @@ return dbh; } -static int udf_get_block(struct inode *inode, long block, struct buffer_head *bh_result, int create) +static int udf_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create) { int err, new; struct buffer_head *bh; diff -u --recursive --new-file v2.5.0/linux/include/asm-alpha/io.h linux/include/asm-alpha/io.h --- v2.5.0/linux/include/asm-alpha/io.h Fri Nov 9 13:45:35 2001 +++ linux/include/asm-alpha/io.h Tue Nov 27 09:23:27 2001 @@ -60,6 +60,8 @@ return (void *) (address + IDENT_ADDR); } +#define page_to_phys(page) (((page) - (page)->zone->zone_mem_map) << PAGE_SHIFT) + /* * Change addresses as seen by the kernel (virtual) to addresses as * seen by a device (bus), and vice versa. diff -u --recursive --new-file v2.5.0/linux/include/asm-i386/checksum.h linux/include/asm-i386/checksum.h --- v2.5.0/linux/include/asm-i386/checksum.h Thu Jul 26 13:41:22 2001 +++ linux/include/asm-i386/checksum.h Tue Nov 27 09:23:27 2001 @@ -69,25 +69,24 @@ unsigned int ihl) { unsigned int sum; - __asm__ __volatile__(" - movl (%1), %0 - subl $4, %2 - jbe 2f - addl 4(%1), %0 - adcl 8(%1), %0 - adcl 12(%1), %0 -1: adcl 16(%1), %0 - lea 4(%1), %1 - decl %2 - jne 1b - adcl $0, %0 - movl %0, %2 - shrl $16, %0 - addw %w2, %w0 - adcl $0, %0 - notl %0 -2: - " + __asm__ __volatile__( + "movl (%1), %0 ;\n" + "subl $4, %2 ;\n" + "jbe 2f ;\n" + "addl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0 ;\n" +"1: adcl 16(%1), %0 ;\n" + "lea 4(%1), %1 ;\n" + "decl %2 ;\n" + "jne 1b ;\n" + "adcl $0, %0 ;\n" + "movl %0, %2 ;\n" + "shrl $16, %0 ;\n" + "addw %w2, %w0 ;\n" + "adcl $0, %0 ;\n" + "notl %0 ;\n" +"2: ;\n" /* Since the input registers which are loaded with iph and ipl are modified, we must also specify them as outputs, or gcc will assume they contain their original values. */ @@ -102,10 +101,9 @@ static inline unsigned int csum_fold(unsigned int sum) { - __asm__(" - addl %1, %0 - adcl $0xffff, %0 - " + __asm__( + "addl %1, %0 ;\n" + "adcl $0xffff, %0 ;\n" : "=r" (sum) : "r" (sum << 16), "0" (sum & 0xffff0000) ); @@ -118,12 +116,11 @@ unsigned short proto, unsigned int sum) { - __asm__(" - addl %1, %0 - adcl %2, %0 - adcl %3, %0 - adcl $0, %0 - " + __asm__( + "addl %1, %0 ;\n" + "adcl %2, %0 ;\n" + "adcl %3, %0 ;\n" + "adcl $0, %0 ;\n" : "=r" (sum) : "g" (daddr), "g"(saddr), "g"((ntohs(len)<<16)+proto*256), "0"(sum)); return sum; @@ -158,19 +155,18 @@ unsigned short proto, unsigned int sum) { - __asm__(" - addl 0(%1), %0 - adcl 4(%1), %0 - adcl 8(%1), %0 - adcl 12(%1), %0 - adcl 0(%2), %0 - adcl 4(%2), %0 - adcl 8(%2), %0 - adcl 12(%2), %0 - adcl %3, %0 - adcl %4, %0 - adcl $0, %0 - " + __asm__( + "addl 0(%1), %0" + "adcl 4(%1), %0" + "adcl 8(%1), %0" + "adcl 12(%1), %0" + "adcl 0(%2), %0" + "adcl 4(%2), %0" + "adcl 8(%2), %0" + "adcl 12(%2), %0" + "adcl %3, %0" + "adcl %4, %0" + "adcl $0, %0" : "=&r" (sum) : "r" (saddr), "r" (daddr), "r"(htonl(len)), "r"(htonl(proto)), "0"(sum)); diff -u --recursive --new-file v2.5.0/linux/include/asm-i386/floppy.h linux/include/asm-i386/floppy.h --- v2.5.0/linux/include/asm-i386/floppy.h Thu Nov 22 11:46:19 2001 +++ linux/include/asm-i386/floppy.h Tue Nov 27 09:40:20 2001 @@ -75,28 +75,28 @@ #ifndef NO_FLOPPY_ASSEMBLER __asm__ ( - "testl %1,%1 - je 3f -1: inb %w4,%b0 - andb $160,%b0 - cmpb $160,%b0 - jne 2f - incw %w4 - testl %3,%3 - jne 4f - inb %w4,%b0 - movb %0,(%2) - jmp 5f -4: movb (%2),%0 - outb %b0,%w4 -5: decw %w4 - outb %0,$0x80 - decl %1 - incl %2 - testl %1,%1 - jne 1b -3: inb %w4,%b0 -2: " + "testl %1,%1" + "je 3f" +"1: inb %w4,%b0" + "andb $160,%b0" + "cmpb $160,%b0" + "jne 2f" + "incw %w4" + "testl %3,%3" + "jne 4f" + "inb %w4,%b0" + "movb %0,(%2)" + "jmp 5f" +"4: movb (%2),%0" + "outb %b0,%w4" +"5: decw %w4" + "outb %0,$0x80" + "decl %1" + "incl %2" + "testl %1,%1" + "jne 1b" +"3: inb %w4,%b0" +"2: " : "=a" ((char) st), "=c" ((long) virtual_dma_count), "=S" ((long) virtual_dma_addr) diff -u --recursive --new-file v2.5.0/linux/include/asm-i386/kmap_types.h linux/include/asm-i386/kmap_types.h --- v2.5.0/linux/include/asm-i386/kmap_types.h Mon Sep 17 13:16:30 2001 +++ linux/include/asm-i386/kmap_types.h Tue Nov 27 09:23:27 2001 @@ -7,6 +7,7 @@ KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, + KM_BIO_IRQ, KM_TYPE_NR }; diff -u --recursive --new-file v2.5.0/linux/include/asm-i386/page.h linux/include/asm-i386/page.h --- v2.5.0/linux/include/asm-i386/page.h Thu Nov 22 11:46:18 2001 +++ linux/include/asm-i386/page.h Tue Nov 27 09:40:20 2001 @@ -101,6 +101,12 @@ BUG(); \ } while (0) +#define BUG_ON(condition) \ + do { \ + if (unlikely((int)(condition))) \ + BUG(); \ + } while (0) + /* Pure 2^n version of get_order */ static __inline__ int get_order(unsigned long size) { diff -u --recursive --new-file v2.5.0/linux/include/asm-i386/pgalloc.h linux/include/asm-i386/pgalloc.h --- v2.5.0/linux/include/asm-i386/pgalloc.h Thu Nov 22 11:46:19 2001 +++ linux/include/asm-i386/pgalloc.h Tue Nov 27 09:40:20 2001 @@ -18,15 +18,21 @@ * Allocate and free page tables. */ -#if CONFIG_X86_PAE +#if defined (CONFIG_X86_PAE) +/* + * We can't include here, thus these uglinesses. + */ +struct kmem_cache_s; + +extern struct kmem_cache_s *pae_pgd_cachep; +extern void *kmem_cache_alloc(struct kmem_cache_s *, int); +extern void kmem_cache_free(struct kmem_cache_s *, void *); -extern void *kmalloc(size_t, int); -extern void kfree(const void *); -static __inline__ pgd_t *get_pgd_slow(void) +static inline pgd_t *get_pgd_slow(void) { int i; - pgd_t *pgd = kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL); if (pgd) { for (i = 0; i < USER_PTRS_PER_PGD; i++) { @@ -36,32 +42,36 @@ clear_page(pmd); set_pgd(pgd + i, __pgd(1 + __pa(pmd))); } - memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); } return pgd; out_oom: for (i--; i >= 0; i--) free_page((unsigned long)__va(pgd_val(pgd[i])-1)); - kfree(pgd); + kmem_cache_free(pae_pgd_cachep, pgd); return NULL; } #else -static __inline__ pgd_t *get_pgd_slow(void) +static inline pgd_t *get_pgd_slow(void) { pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); if (pgd) { memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); - memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); } return pgd; } -#endif +#endif /* CONFIG_X86_PAE */ -static __inline__ pgd_t *get_pgd_fast(void) +static inline pgd_t *get_pgd_fast(void) { unsigned long *ret; @@ -74,21 +84,21 @@ return (pgd_t *)ret; } -static __inline__ void free_pgd_fast(pgd_t *pgd) +static inline void free_pgd_fast(pgd_t *pgd) { *(unsigned long *)pgd = (unsigned long) pgd_quicklist; pgd_quicklist = (unsigned long *) pgd; pgtable_cache_size++; } -static __inline__ void free_pgd_slow(pgd_t *pgd) +static inline void free_pgd_slow(pgd_t *pgd) { -#if CONFIG_X86_PAE +#if defined(CONFIG_X86_PAE) int i; for (i = 0; i < USER_PTRS_PER_PGD; i++) free_page((unsigned long)__va(pgd_val(pgd[i])-1)); - kfree(pgd); + kmem_cache_free(pae_pgd_cachep, pgd); #else free_page((unsigned long)pgd); #endif @@ -104,7 +114,8 @@ return pte; } -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) +static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, + unsigned long address) { unsigned long *ret; @@ -116,7 +127,7 @@ return (pte_t *)ret; } -static __inline__ void pte_free_fast(pte_t *pte) +static inline void pte_free_fast(pte_t *pte) { *(unsigned long *)pte = (unsigned long) pte_quicklist; pte_quicklist = (unsigned long *) pte; @@ -128,14 +139,9 @@ free_page((unsigned long)pte); } -#define pte_free(pte) pte_free_fast(pte) -#ifdef CONFIG_X86_PAE -#define pgd_alloc(mm) get_pgd_slow() +#define pte_free(pte) pte_free_slow(pte) #define pgd_free(pgd) free_pgd_slow(pgd) -#else #define pgd_alloc(mm) get_pgd_fast() -#define pgd_free(pgd) free_pgd_fast(pgd) -#endif /* * allocating and freeing a pmd is trivial: the 1-entry pmd is diff -u --recursive --new-file v2.5.0/linux/include/asm-m68k/machdep.h linux/include/asm-m68k/machdep.h --- v2.5.0/linux/include/asm-m68k/machdep.h Mon Nov 27 17:57:34 2000 +++ linux/include/asm-m68k/machdep.h Tue Nov 27 09:23:27 2001 @@ -5,7 +5,6 @@ struct kbd_repeat; struct mktime; struct hwclk_time; -struct gendisk; struct buffer_head; extern void (*mach_sched_init) (void (*handler)(int, void *, struct pt_regs *)); diff -u --recursive --new-file v2.5.0/linux/include/asm-ppc/kmap_types.h linux/include/asm-ppc/kmap_types.h --- v2.5.0/linux/include/asm-ppc/kmap_types.h Mon Sep 17 13:16:30 2001 +++ linux/include/asm-ppc/kmap_types.h Tue Nov 27 09:23:27 2001 @@ -11,6 +11,7 @@ KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, + KM_BIO_IRQ, KM_TYPE_NR }; diff -u --recursive --new-file v2.5.0/linux/include/asm-sparc/kmap_types.h linux/include/asm-sparc/kmap_types.h --- v2.5.0/linux/include/asm-sparc/kmap_types.h Mon Sep 17 13:16:30 2001 +++ linux/include/asm-sparc/kmap_types.h Tue Nov 27 09:23:27 2001 @@ -7,6 +7,7 @@ KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, + KM_BIO_IRQ, KM_TYPE_NR }; diff -u --recursive --new-file v2.5.0/linux/include/asm-sparc64/io.h linux/include/asm-sparc64/io.h --- v2.5.0/linux/include/asm-sparc64/io.h Tue Nov 13 09:16:05 2001 +++ linux/include/asm-sparc64/io.h Tue Nov 27 09:23:27 2001 @@ -18,6 +18,8 @@ extern unsigned long bus_to_virt_not_defined_use_pci_map(volatile void *addr); #define bus_to_virt bus_to_virt_not_defined_use_pci_map +#define page_to_phys(page) (((page) - mem_map) << PAGE_SHIFT) + /* Different PCI controllers we support have their PCI MEM space * mapped to an either 2GB (Psycho) or 4GB (Sabre) aligned area, * so need to chop off the top 33 or 32 bits. diff -u --recursive --new-file v2.5.0/linux/include/linux/bio.h linux/include/linux/bio.h --- v2.5.0/linux/include/linux/bio.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/bio.h Tue Nov 27 09:23:27 2001 @@ -0,0 +1,230 @@ +/* + * New 2.5 block I/O model + * + * Copyright (C) 2001 Jens Axboe + * + * This program is free software; you can redistribute it and/or mo + * it under the terms of the GNU General Public License as publishe + * the Free Software Foundation; either version 2 of the License, o + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ +#ifndef __LINUX_BIO_H +#define __LINUX_BIO_H + +#define BIO_DEBUG + +#ifdef BIO_DEBUG +#define BIO_BUG_ON BUG_ON +#else +#define BIO_BUG_ON +#endif + +/* + * hash profiling stuff.. + */ +#define BIO_HASH_PROFILING + +#define BLKHASHPROF _IOR(0x12,108,sizeof(struct bio_hash_stats)) +#define BLKHASHCLEAR _IO(0x12,109) + +#define MAX_PROFILE_BUCKETS 64 + +struct bio_hash_stats { + atomic_t nr_lookups; + atomic_t nr_hits; + atomic_t nr_inserts; + atomic_t nr_entries; + atomic_t max_entries; + atomic_t max_bucket_size; + atomic_t bucket_size[MAX_PROFILE_BUCKETS + 1]; +}; + +/* + * was unsigned short, but we might as well be ready for > 64kB I/O pages + */ +struct bio_vec { + struct page *bv_page; + unsigned int bv_len; + unsigned int bv_offset; +}; + +struct bio_vec_list { + unsigned int bvl_cnt; /* how may bio_vec's */ + unsigned int bvl_idx; /* current index into bvl_vec */ + unsigned int bvl_size; /* total size in bytes */ + unsigned int bvl_max; /* max bvl_vecs we can hold, used + as index into pool */ + struct bio_vec bvl_vec[0]; /* the iovec array */ +}; + +typedef struct bio_hash_s { + struct bio_hash_s *next_hash; + struct bio_hash_s **pprev_hash; + unsigned long valid_counter; +} bio_hash_t; + +struct bio_hash_bucket { + rwlock_t lock; + bio_hash_t *hash; +} __attribute__((__aligned__(16))); + +#define BIO_HASH_BITS (bio_hash_bits) +#define BIO_HASH_SIZE (1UL << BIO_HASH_BITS) + +/* + * shamelessly stolen from the list.h implementation + */ +#define hash_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) +#define bio_hash_entry(ptr) \ + hash_entry((ptr), struct bio, bi_hash) + +/* + * main unit of I/O for the block layer and lower layers (ie drivers and + * stacking drivers) + */ +struct bio { + sector_t bi_sector; + struct bio *bi_next; /* request queue link */ + bio_hash_t bi_hash; + atomic_t bi_cnt; /* pin count */ + kdev_t bi_dev; /* will be block device */ + struct bio_vec_list *bi_io_vec; + unsigned long bi_flags; /* status, command, etc */ + unsigned long bi_rw; /* bottom bits READ/WRITE, + * top bits priority + */ + int (*bi_end_io)(struct bio *bio, int nr_sectors); + void *bi_private; + + void *bi_hash_desc; /* cookie for hash */ + + void (*bi_destructor)(struct bio *); /* destructor */ +}; + +#define BIO_SECTOR_BITS 9 +#define BIO_OFFSET_MASK ((1UL << (PAGE_CACHE_SHIFT - BIO_SECTOR_BITS)) - 1) +#define BIO_PAGE_MASK (PAGE_CACHE_SIZE - 1) + +/* + * bio flags + */ +#define BIO_UPTODATE 0 /* ok after I/O completion */ +#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ +#define BIO_EOF 2 /* out-out-bounds error */ +#define BIO_PREBUILT 3 /* not merged big */ +#define BIO_CLONED 4 /* doesn't own data */ + +#define bio_is_hashed(bio) ((bio)->bi_hash.pprev_hash) + +/* + * bio bi_rw flags + * + * bit 0 -- read (not set) or write (set) + * bit 1 -- rw-ahead when set + * bit 2 -- barrier + */ +#define BIO_RW 0 +#define BIO_RW_AHEAD 1 +#define BIO_BARRIER 2 + +/* + * various member access, note that bio_data should of course not be used + * on highmem page vectors + */ +#define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec->bvl_vec[(idx)])) +#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_io_vec->bvl_idx) +#define bio_page(bio) bio_iovec((bio))->bv_page +#define bio_size(bio) ((bio)->bi_io_vec->bvl_size) +#define bio_offset(bio) bio_iovec((bio))->bv_offset +#define bio_sectors(bio) (bio_size((bio)) >> BIO_SECTOR_BITS) +#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) +#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_BARRIER)) + +/* + * will die + */ +#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + bio_offset((bio))) +#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (bv)->bv_offset) + +/* + * hack to avoid doing 64-bit calculations on 32-bit archs, instead use a + * pseudo-pfn check to do segment coalescing + */ +#define bio_sec_pfn(bio) \ + ((((bio_page(bio) - bio_page(bio)->zone->zone_mem_map) << PAGE_SHIFT) / bio_size(bio)) + (bio_offset(bio) >> 9)) + +/* + * queues that have highmem support enabled may still need to revert to + * PIO transfers occasionally and thus map high pages temporarily. For + * permanent PIO fall back, user is probably better off disabling highmem + * I/O completely on that queue (see ide-dma for example) + */ +#define bio_kmap(bio) kmap(bio_page((bio))) + bio_offset((bio)) +#define bio_kunmap(bio) kunmap(bio_page((bio))) + +#define BIO_CONTIG(bio, nxt) \ + (bio_to_phys((bio)) + bio_size((bio)) == bio_to_phys((nxt))) +#define __BIO_PHYS_4G(addr1, addr2) \ + (((addr1) | 0xffffffff) == (((addr2) -1 ) | 0xffffffff)) +#define BIO_PHYS_4G(b1, b2) \ + __BIO_PHYS_4G(bio_to_phys((b1)), bio_to_phys((b2)) + bio_size((b2))) + +typedef int (bio_end_io_t) (struct bio *, int); +typedef void (bio_destructor_t) (struct bio *); + +#define bio_io_error(bio) bio_endio((bio), 0, bio_sectors((bio))) + +#define bio_for_each_segment(bvl, bio, i) \ + for (bvl = bio_iovec((bio)), i = (bio)->bi_io_vec->bvl_idx; \ + i < (bio)->bi_io_vec->bvl_cnt; \ + bvl++, i++) + +/* + * get a reference to a bio, so it won't disappear. the intended use is + * something like: + * + * bio_get(bio); + * submit_bio(rw, bio); + * if (bio->bi_flags ...) + * do_something + * bio_put(bio); + * + * without the bio_get(), it could potentially complete I/O before submit_bio + * returns. and then bio would be freed memory when if (bio->bi_flags ...) + * runs + */ +#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) + +extern struct bio *bio_alloc(int, int); +extern void bio_put(struct bio *); + +/* + * the hash stuff is pretty closely tied to the request queue (needed for + * locking etc anyway, and it's in no way an attempt at a generic hash) + */ +struct request_queue; + +extern inline void bio_hash_remove(struct bio *); +extern inline void bio_hash_add(struct bio *, void *, unsigned int); +extern inline struct bio *bio_hash_find(kdev_t, sector_t, unsigned int); +extern inline int bio_hash_add_unique(struct bio *, void *, unsigned int); +extern void bio_hash_invalidate(struct request_queue *, kdev_t); +extern int bio_endio(struct bio *, int, int); + +extern struct bio *bio_clone(struct bio *, int); +extern struct bio *bio_copy(struct bio *, int); + +extern int bio_ioctl(kdev_t, unsigned int, unsigned long); + +#endif /* __LINUX_BIO_H */ diff -u --recursive --new-file v2.5.0/linux/include/linux/blk.h linux/include/linux/blk.h --- v2.5.0/linux/include/linux/blk.h Thu Nov 22 11:48:07 2001 +++ linux/include/linux/blk.h Tue Nov 27 09:41:41 2001 @@ -5,13 +5,7 @@ #include #include #include - -/* - * Spinlock for protecting the request queue which - * is mucked around with in interrupts on potentially - * multiple CPU's.. - */ -extern spinlock_t io_request_lock; +#include /* * Initialization functions. @@ -87,13 +81,18 @@ * code duplication in drivers. */ -static inline void blkdev_dequeue_request(struct request * req) +static inline void blkdev_dequeue_request(struct request *req) { - list_del(&req->queue); + if (req->bio) + bio_hash_remove(req->bio); + if (req->biotail) + bio_hash_remove(req->biotail); + + list_del(&req->queuelist); } -int end_that_request_first(struct request *req, int uptodate, char *name); -void end_that_request_last(struct request *req); +int end_that_request_first(struct request *, int uptodate, int nr_sectors); +void end_that_request_last(struct request *); #if defined(MAJOR_NR) || defined(IDE_DRIVER) @@ -338,12 +337,16 @@ #if !defined(IDE_DRIVER) #ifndef CURRENT -#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head) +#define CURRENT elv_next_request(&blk_dev[MAJOR_NR].request_queue) +#endif +#ifndef QUEUE +#define QUEUE (&blk_dev[MAJOR_NR].request_queue) #endif #ifndef QUEUE_EMPTY -#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head) +#define QUEUE_EMPTY blk_queue_empty(QUEUE) #endif + #ifndef DEVICE_NAME #define DEVICE_NAME "unknown" #endif @@ -367,16 +370,14 @@ #endif #define INIT_REQUEST \ - if (QUEUE_EMPTY) {\ + if (QUEUE_EMPTY) { \ CLEAR_INTR; \ - return; \ + return; \ } \ if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \ panic(DEVICE_NAME ": request list destroyed"); \ - if (CURRENT->bh) { \ - if (!buffer_locked(CURRENT->bh)) \ - panic(DEVICE_NAME ": block not locked"); \ - } + if (!CURRENT->bio) \ + panic(DEVICE_NAME ": no bio"); \ #endif /* !defined(IDE_DRIVER) */ @@ -385,10 +386,11 @@ #if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR) -static inline void end_request(int uptodate) { +static inline void end_request(int uptodate) +{ struct request *req = CURRENT; - if (end_that_request_first(req, uptodate, DEVICE_NAME)) + if (end_that_request_first(req, uptodate, CURRENT->hard_cur_sectors)) return; #ifndef DEVICE_NO_RANDOM diff -u --recursive --new-file v2.5.0/linux/include/linux/blkdev.h linux/include/linux/blkdev.h --- v2.5.0/linux/include/linux/blkdev.h Thu Nov 22 11:47:08 2001 +++ linux/include/linux/blkdev.h Tue Nov 27 09:40:28 2001 @@ -6,60 +6,57 @@ #include #include #include +#include + +#include struct request_queue; typedef struct request_queue request_queue_t; struct elevator_s; typedef struct elevator_s elevator_t; -/* - * Ok, this is an expanded form so that we can use the same - * request for paging requests. - */ struct request { - struct list_head queue; + struct list_head queuelist; /* looking for ->queue? you must _not_ + * access it directly, use + * blkdev_dequeue_request! */ int elevator_sequence; - volatile int rq_status; /* should split this into a few status bits */ -#define RQ_INACTIVE (-1) -#define RQ_ACTIVE 1 -#define RQ_SCSI_BUSY 0xffff -#define RQ_SCSI_DONE 0xfffe -#define RQ_SCSI_DISCONNECTING 0xffe0 + int inactive; /* driver hasn't seen it yet */ + int rq_status; /* should split this into a few status bits */ kdev_t rq_dev; int cmd; /* READ or WRITE */ int errors; - unsigned long sector; + sector_t sector; unsigned long nr_sectors; unsigned long hard_sector, hard_nr_sectors; - unsigned int nr_segments; - unsigned int nr_hw_segments; - unsigned long current_nr_sectors; - void * special; - char * buffer; - struct completion * waiting; - struct buffer_head * bh; - struct buffer_head * bhtail; + unsigned short nr_segments; + unsigned short nr_hw_segments; + unsigned int current_nr_sectors; + unsigned int hard_cur_sectors; + void *special; + char *buffer; + struct completion *waiting; + struct bio *bio, *biotail; request_queue_t *q; }; #include -typedef int (merge_request_fn) (request_queue_t *q, - struct request *req, - struct buffer_head *bh, - int); -typedef int (merge_requests_fn) (request_queue_t *q, - struct request *req, - struct request *req2, - int); +typedef int (merge_request_fn) (request_queue_t *, struct request *, + struct bio *); +typedef int (merge_requests_fn) (request_queue_t *, struct request *, + struct request *); typedef void (request_fn_proc) (request_queue_t *q); typedef request_queue_t * (queue_proc) (kdev_t dev); -typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh); -typedef void (plug_device_fn) (request_queue_t *q, kdev_t device); +typedef int (make_request_fn) (request_queue_t *q, struct bio *bio); typedef void (unplug_device_fn) (void *q); +enum blk_queue_state { + Queue_down, + Queue_up, +}; + /* * Default nr free requests per queue, ll_rw_blk will scale it down * according to available RAM at init time @@ -69,6 +66,7 @@ struct request_list { unsigned int count; struct list_head free; + wait_queue_head_t wait; }; struct request_queue @@ -89,7 +87,7 @@ merge_request_fn * front_merge_fn; merge_requests_fn * merge_requests_fn; make_request_fn * make_request_fn; - plug_device_fn * plug_device_fn; + /* * The queue owner gets to use this for whatever they like. * ll_rw_blk doesn't touch it. @@ -97,33 +95,111 @@ void * queuedata; /* - * This is used to remove the plug when tq_disk runs. + * queue needs bounce pages for pages above this limit */ - struct tq_struct plug_tq; + unsigned long bounce_pfn; /* - * Boolean that indicates whether this queue is plugged or not. + * for memory zoning (<= 4GB and > 4GB) */ - char plugged; + int bounce_gfp; /* - * Boolean that indicates whether current_request is active or - * not. + * This is used to remove the plug when tq_disk runs. */ - char head_active; + struct tq_struct plug_tq; /* - * Is meant to protect the queue in the future instead of - * io_request_lock + * various queue flags, see QUEUE_* below + */ + unsigned long queue_flags; + + /* + * protects queue structures from reentrancy */ spinlock_t queue_lock; /* - * Tasks wait here for free request + * queue settings */ - wait_queue_head_t wait_for_request; + unsigned short max_sectors; + unsigned short max_segments; + unsigned short hardsect_size; + unsigned int max_segment_size; + + wait_queue_head_t queue_wait; + + unsigned int hash_valid_counter; }; +#define RQ_INACTIVE (-1) +#define RQ_ACTIVE 1 +#define RQ_SCSI_BUSY 0xffff +#define RQ_SCSI_DONE 0xfffe +#define RQ_SCSI_DISCONNECTING 0xffe0 + +#define QUEUE_FLAG_PLUGGED 0 /* queue is plugged */ +#define QUEUE_FLAG_NOSPLIT 1 /* can process bio over several goes */ + +#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) + +#define blk_mark_plugged(q) set_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) + +#define blk_queue_empty(q) elv_queue_empty(q) + +#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) + +/* + * noop, requests are automagically marked as active/inactive by I/O + * scheduler -- see elv_next_request + */ +#define blk_queue_headactive(q, head_active) + +extern unsigned long blk_max_low_pfn, blk_max_pfn; + +#define __elv_next_request(q) (q)->elevator.elevator_next_req_fn((q)) + +extern inline struct request *elv_next_request(request_queue_t *q) +{ + struct request *rq = __elv_next_request(q); + + if (rq) { + rq->inactive = 0; + wmb(); + + if (rq->bio) + bio_hash_remove(rq->bio); + if (rq->biotail) + bio_hash_remove(rq->biotail); + } + + return rq; +} + +#define BLK_BOUNCE_HIGH (blk_max_low_pfn << PAGE_SHIFT) +#define BLK_BOUNCE_ANY (blk_max_pfn << PAGE_SHIFT) + +#ifdef CONFIG_HIGHMEM + +extern void create_bounce(struct bio **bio_orig, int gfp_mask); + +extern inline void blk_queue_bounce(request_queue_t *q, struct bio **bio) +{ + struct page *page = bio_page(*bio); + + if (page - page->zone->zone_mem_map > q->bounce_pfn) + create_bounce(bio, q->bounce_gfp); +} + +#else /* CONFIG_HIGHMEM */ + +#define blk_queue_bounce(q, bio) do { } while (0) + +#endif /* CONFIG_HIGHMEM */ + +#define rq_for_each_bio(bio, rq) \ + for (bio = (rq)->bio; bio; bio = bio->bi_next) + struct blk_dev_struct { /* * queue_proc has to be atomic @@ -148,68 +224,78 @@ extern struct sec_size * blk_sec[MAX_BLKDEV]; extern struct blk_dev_struct blk_dev[MAX_BLKDEV]; -extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size); +extern void grok_partitions(kdev_t dev, long size); +extern int wipe_partitions(kdev_t dev); extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); -extern void generic_make_request(int rw, struct buffer_head * bh); +extern void generic_make_request(struct bio *bio); extern inline request_queue_t *blk_get_queue(kdev_t dev); extern void blkdev_release_request(struct request *); +extern void blk_attempt_remerge(request_queue_t *, struct request *); /* * Access functions for manipulating queue properties */ -extern void blk_init_queue(request_queue_t *, request_fn_proc *); +extern int blk_init_queue(request_queue_t *, request_fn_proc *, char *); extern void blk_cleanup_queue(request_queue_t *); -extern void blk_queue_headactive(request_queue_t *, int); extern void blk_queue_make_request(request_queue_t *, make_request_fn *); +extern void blk_queue_bounce_limit(request_queue_t *, unsigned long long); +extern void blk_queue_max_sectors(request_queue_t *q, unsigned short); +extern void blk_queue_max_segments(request_queue_t *q, unsigned short); +extern void blk_queue_max_segment_size(request_queue_t *q, unsigned int); +extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short); +extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); extern void generic_unplug_device(void *); extern int * blk_size[MAX_BLKDEV]; extern int * blksize_size[MAX_BLKDEV]; -extern int * hardsect_size[MAX_BLKDEV]; - extern int * max_readahead[MAX_BLKDEV]; -extern int * max_sectors[MAX_BLKDEV]; - -extern int * max_segments[MAX_BLKDEV]; - #define MAX_SEGMENTS 128 #define MAX_SECTORS 255 -#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK) +#define MAX_SEGMENT_SIZE 65536 /* read-ahead in pages.. */ #define MAX_READAHEAD 31 #define MIN_READAHEAD 3 -#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue) +#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) #define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next) #define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev) -#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next) -#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev) +#define blkdev_next_request(req) blkdev_entry_to_request((req)->queuelist.next) +#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queuelist.prev) extern void drive_stat_acct (kdev_t dev, int rw, unsigned long nr_sectors, int new_io); -static inline int get_hardsect_size(kdev_t dev) +extern inline void blk_clear(int major) { + blk_size[major] = NULL; +#if 0 + blk_size_in_bytes[major] = NULL; +#endif + blksize_size[major] = NULL; + max_readahead[major] = NULL; + read_ahead[major] = 0; +} + +extern inline int get_hardsect_size(kdev_t dev) +{ + request_queue_t *q = blk_get_queue(dev); int retval = 512; - int major = MAJOR(dev); - if (hardsect_size[major]) { - int minor = MINOR(dev); - if (hardsect_size[major][minor]) - retval = hardsect_size[major][minor]; - } + if (q && q->hardsect_size) + retval = q->hardsect_size; + return retval; } #define blk_finished_io(nsects) do { } while (0) #define blk_started_io(nsects) do { } while (0) -static inline unsigned int blksize_bits(unsigned int size) +extern inline unsigned int blksize_bits(unsigned int size) { unsigned int bits = 8; do { @@ -219,7 +305,7 @@ return bits; } -static inline unsigned int block_size(kdev_t dev) +extern inline unsigned int block_size(kdev_t dev) { int retval = BLOCK_SIZE; int major = MAJOR(dev); diff -u --recursive --new-file v2.5.0/linux/include/linux/bootmem.h linux/include/linux/bootmem.h --- v2.5.0/linux/include/linux/bootmem.h Thu Nov 22 11:47:23 2001 +++ linux/include/linux/bootmem.h Tue Nov 27 09:41:46 2001 @@ -18,6 +18,11 @@ extern unsigned long min_low_pfn; /* + * highest page + */ +extern unsigned long max_pfn; + +/* * node_bootmem_map is a map pointer - the bits represent all physical * memory pages (including holes) on the node. */ diff -u --recursive --new-file v2.5.0/linux/include/linux/devfs_fs_kernel.h linux/include/linux/devfs_fs_kernel.h --- v2.5.0/linux/include/linux/devfs_fs_kernel.h Thu Nov 22 11:47:00 2001 +++ linux/include/linux/devfs_fs_kernel.h Tue Nov 27 09:40:20 2001 @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include diff -u --recursive --new-file v2.5.0/linux/include/linux/elevator.h linux/include/linux/elevator.h --- v2.5.0/linux/include/linux/elevator.h Thu Feb 15 16:58:34 2001 +++ linux/include/linux/elevator.h Tue Nov 27 09:23:27 2001 @@ -5,13 +5,20 @@ struct list_head *, struct list_head *, int); -typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *, - struct buffer_head *, int, int); +typedef int (elevator_merge_fn) (request_queue_t *, struct request **, + struct list_head *, struct bio *); typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int); typedef void (elevator_merge_req_fn) (struct request *, struct request *); +typedef struct request *(elevator_next_req_fn) (request_queue_t *); + +typedef void (elevator_add_req_fn) (request_queue_t *, struct request *, struct list_head *); + +typedef int (elevator_init_fn) (request_queue_t *, elevator_t *); +typedef void (elevator_exit_fn) (request_queue_t *, elevator_t *); + struct elevator_s { int read_latency; @@ -21,31 +28,46 @@ elevator_merge_cleanup_fn *elevator_merge_cleanup_fn; elevator_merge_req_fn *elevator_merge_req_fn; - unsigned int queue_ID; + elevator_next_req_fn *elevator_next_req_fn; + elevator_add_req_fn *elevator_add_req_fn; + + elevator_init_fn *elevator_init_fn; + elevator_exit_fn *elevator_exit_fn; + + /* + * per-elevator private data + */ + void *elevator_data; + + char queue_name[16]; }; -int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int); void elevator_noop_merge_req(struct request *, struct request *); -int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int); void elevator_linus_merge_req(struct request *, struct request *); +int elv_linus_init(request_queue_t *, elevator_t *); +void elv_linus_exit(request_queue_t *, elevator_t *); +struct request *elv_next_request_fn(request_queue_t *); +void elv_add_request_fn(request_queue_t *, struct request *,struct list_head *); +/* + * use the /proc/iosched interface, all the below is history -> + */ typedef struct blkelv_ioctl_arg_s { int queue_ID; int read_latency; int write_latency; int max_bomb_segments; } blkelv_ioctl_arg_t; - #define BLKELVGET _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t)) #define BLKELVSET _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t)) -extern int blkelvget_ioctl(elevator_t *, blkelv_ioctl_arg_t *); -extern int blkelvset_ioctl(elevator_t *, const blkelv_ioctl_arg_t *); - -extern void elevator_init(elevator_t *, elevator_t); +extern int elevator_init(request_queue_t *, elevator_t *, elevator_t, char *); +extern void elevator_exit(request_queue_t *, elevator_t *); /* * Return values from elevator merger @@ -81,6 +103,24 @@ return latency; } +/* + * will change once we move to a more complex data structure than a simple + * list for pending requests + */ +#define elv_queue_empty(q) list_empty(&(q)->queue_head) + +/* + * elevator private data + */ +struct elv_linus_data { + unsigned long flags; +}; + +#define ELV_DAT(e) ((struct elv_linus_data *)(e)->elevator_data) + +#define ELV_LINUS_BACK_MERGE 1 +#define ELV_LINUS_FRONT_MERGE 2 + #define ELEVATOR_NOOP \ ((elevator_t) { \ 0, /* read_latency */ \ @@ -89,6 +129,10 @@ elevator_noop_merge, /* elevator_merge_fn */ \ elevator_noop_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_noop_merge_req, /* elevator_merge_req_fn */ \ + elv_next_request_fn, \ + elv_add_request_fn, \ + elv_linus_init, \ + elv_linus_exit, \ }) #define ELEVATOR_LINUS \ @@ -99,6 +143,10 @@ elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \ + elv_next_request_fn, \ + elv_add_request_fn, \ + elv_linus_init, \ + elv_linus_exit, \ }) #endif diff -u --recursive --new-file v2.5.0/linux/include/linux/fs.h linux/include/linux/fs.h --- v2.5.0/linux/include/linux/fs.h Thu Nov 22 11:46:19 2001 +++ linux/include/linux/fs.h Tue Nov 27 09:40:20 2001 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -74,6 +75,8 @@ #define FMODE_READ 1 #define FMODE_WRITE 2 +#define RW_MASK 1 +#define RWA_MASK 2 #define READ 0 #define WRITE 1 #define READA 2 /* read-ahead - don't block if no resources */ @@ -110,6 +113,7 @@ #define MS_BIND 4096 #define MS_REC 16384 #define MS_VERBOSE 32768 +#define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) /* @@ -201,6 +205,7 @@ extern void update_atime (struct inode *); #define UPDATE_ATIME(inode) update_atime (inode) +extern void bio_hash_init(unsigned long); extern void buffer_init(unsigned long); extern void inode_init(unsigned long); extern void mnt_init(unsigned long); @@ -237,28 +242,24 @@ struct buffer_head { /* First cache line: */ struct buffer_head *b_next; /* Hash queue list */ - unsigned long b_blocknr; /* block number */ + sector_t b_blocknr; /* block number */ unsigned short b_size; /* block size */ unsigned short b_list; /* List that this buffer appears */ kdev_t b_dev; /* device (B_FREE = free) */ atomic_t b_count; /* users using this block */ - kdev_t b_rdev; /* Real device */ unsigned long b_state; /* buffer state bitmap (see above) */ unsigned long b_flushtime; /* Time when (dirty) buffer should be written */ struct buffer_head *b_next_free;/* lru/free list linkage */ struct buffer_head *b_prev_free;/* doubly linked list of buffers */ struct buffer_head *b_this_page;/* circular list of buffers in one page */ - struct buffer_head *b_reqnext; /* request queue */ - struct buffer_head **b_pprev; /* doubly linked list of hash-queue */ char * b_data; /* pointer to data block */ struct page *b_page; /* the page this bh is mapped to */ void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ void *b_private; /* reserved for b_end_io */ - unsigned long b_rsector; /* Real buffer location on disk */ wait_queue_head_t b_wait; struct inode * b_inode; @@ -853,6 +854,8 @@ int (*getattr) (struct dentry *, struct iattr *); }; +struct seq_file; + /* * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called * without the big kernel lock held in all filesystems. @@ -904,6 +907,7 @@ */ struct dentry * (*fh_to_dentry)(struct super_block *sb, __u32 *fh, int len, int fhtype, int parent); int (*dentry_to_fh)(struct dentry *, __u32 *fh, int *lenp, int need_parent); + int (*show_options)(struct seq_file *, struct vfsmount *); }; /* Inode state bits.. */ @@ -1169,12 +1173,25 @@ static inline void buffer_IO_error(struct buffer_head * bh) { mark_buffer_clean(bh); + /* - * b_end_io has to clear the BH_Uptodate bitflag in the error case! + * b_end_io has to clear the BH_Uptodate bitflag in the read error + * case, however buffer contents are not necessarily bad if a + * write fails */ - bh->b_end_io(bh, 0); + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); } +/* + * return READ, READA, or WRITE + */ +#define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) + +/* + * return data direction, READ or WRITE + */ +#define bio_data_dir(bio) ((bio)->bi_rw & 1) + extern void buffer_insert_inode_queue(struct buffer_head *, struct inode *); static inline void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) { @@ -1342,10 +1359,11 @@ extern void remove_inode_hash(struct inode *); extern struct file * get_empty_filp(void); extern void file_move(struct file *f, struct list_head *list); -extern struct buffer_head * get_hash_table(kdev_t, int, int); -extern struct buffer_head * getblk(kdev_t, int, int); +extern struct buffer_head * get_hash_table(kdev_t, sector_t, int); +extern struct buffer_head * getblk(kdev_t, sector_t, int); extern void ll_rw_block(int, int, struct buffer_head * bh[]); -extern void submit_bh(int, struct buffer_head *); +extern int submit_bh(int, struct buffer_head *); +extern int submit_bio(int, struct bio *); extern int is_read_only(kdev_t); extern void __brelse(struct buffer_head *); static inline void brelse(struct buffer_head *buf) @@ -1365,9 +1383,9 @@ extern void put_unused_buffer_head(struct buffer_head * bh); extern struct buffer_head * get_unused_buffer_head(int async); -extern int brw_page(int, struct page *, kdev_t, int [], int); +extern int brw_page(int, struct page *, kdev_t, sector_t [], int); -typedef int (get_block_t)(struct inode*,long,struct buffer_head*,int); +typedef int (get_block_t)(struct inode*,sector_t,struct buffer_head*,int); /* Generic buffer handling for block filesystems.. */ extern int try_to_release_page(struct page * page, int gfp_mask); @@ -1383,7 +1401,7 @@ extern int block_commit_write(struct page *page, unsigned from, unsigned to); extern int block_sync_page(struct page *); -int generic_block_bmap(struct address_space *, long, get_block_t *); +sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *); diff -u --recursive --new-file v2.5.0/linux/include/linux/genhd.h linux/include/linux/genhd.h --- v2.5.0/linux/include/linux/genhd.h Thu Nov 22 11:47:05 2001 +++ linux/include/linux/genhd.h Tue Nov 27 09:40:20 2001 @@ -86,11 +86,11 @@ }; /* drivers/block/genhd.c */ -extern struct gendisk *gendisk_head; - extern void add_gendisk(struct gendisk *gp); extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(kdev_t dev); +extern unsigned long get_start_sect(kdev_t dev); +extern unsigned long get_nr_sects(kdev_t dev); #endif /* __KERNEL__ */ @@ -244,35 +244,10 @@ extern void devfs_register_partitions (struct gendisk *dev, int minor, int unregister); - - -/* - * FIXME: this should use genhd->minor_shift, but that is slow to look up. - */ static inline unsigned int disk_index (kdev_t dev) { - int major = MAJOR(dev); - int minor = MINOR(dev); - unsigned int index; - - switch (major) { - case DAC960_MAJOR+0: - index = (minor & 0x00f8) >> 3; - break; - case SCSI_DISK0_MAJOR: - index = (minor & 0x00f0) >> 4; - break; - case IDE0_MAJOR: /* same as HD_MAJOR */ - case XT_DISK_MAJOR: - index = (minor & 0x0040) >> 6; - break; - case IDE1_MAJOR: - index = ((minor & 0x0040) >> 6) + 2; - break; - default: - return 0; - } - return index; + struct gendisk *g = get_gendisk(dev); + return g ? (MINOR(dev) >> g->minor_shift) : 0; } #endif diff -u --recursive --new-file v2.5.0/linux/include/linux/highmem.h linux/include/linux/highmem.h --- v2.5.0/linux/include/linux/highmem.h Thu Nov 22 11:46:23 2001 +++ linux/include/linux/highmem.h Tue Nov 27 09:40:25 2001 @@ -13,8 +13,7 @@ /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); -extern struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig); - +extern void create_bounce(struct bio **bio_orig, int gfp_mask); static inline char *bh_kmap(struct buffer_head *bh) { @@ -26,6 +25,42 @@ kunmap(bh->b_page); } +/* + * remember to add offset! and never ever reenable interrupts between a + * bio_kmap_irq and bio_kunmap_irq!! + */ +static inline char *bio_kmap_irq(struct bio *bio, unsigned long *flags) +{ + unsigned long addr; + + __save_flags(*flags); + + /* + * could be low + */ + if (!PageHighMem(bio_page(bio))) + return bio_data(bio); + + /* + * it's a highmem page + */ + __cli(); + addr = (unsigned long) kmap_atomic(bio_page(bio), KM_BIO_IRQ); + + if (addr & ~PAGE_MASK) + BUG(); + + return (char *) addr + bio_offset(bio); +} + +static inline void bio_kunmap_irq(char *buffer, unsigned long *flags) +{ + unsigned long ptr = (unsigned long) buffer & PAGE_MASK; + + kunmap_atomic((void *) ptr, KM_BIO_IRQ); + __restore_flags(*flags); +} + #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } @@ -39,6 +74,9 @@ #define bh_kmap(bh) ((bh)->b_data) #define bh_kunmap(bh) do { } while (0) + +#define bio_kmap_irq(bio, flags) (bio_data(bio)) +#define bio_kunmap_irq(buf, flags) do { *(flags) = 0; } while (0) #endif /* CONFIG_HIGHMEM */ diff -u --recursive --new-file v2.5.0/linux/include/linux/ide.h linux/include/linux/ide.h --- v2.5.0/linux/include/linux/ide.h Thu Nov 22 11:48:07 2001 +++ linux/include/linux/ide.h Tue Nov 27 09:41:41 2001 @@ -149,6 +149,21 @@ #define DATA_READY (DRQ_STAT) /* + * Our Physical Region Descriptor (PRD) table should be large enough + * to handle the biggest I/O request we are likely to see. Since requests + * can have no more than 256 sectors, and since the typical blocksize is + * two or more sectors, we could get by with a limit of 128 entries here for + * the usual worst case. Most requests seem to include some contiguous blocks, + * further reducing the number of table entries required. + * + * As it turns out though, we must allocate a full 4KB page for this, + * so the two PRD tables (ide0 & ide1) will each get half of that, + * allowing each to have about 256 entries (8 bytes each) from this. + */ +#define PRD_BYTES 8 +#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) + +/* * Some more useful definitions */ #define IDE_MAJOR_NAME "hd" /* the same for all i/f; see also genhd.c */ @@ -223,6 +238,23 @@ #endif /* + * hwif_chipset_t is used to keep track of the specific hardware + * chipset used by each IDE interface, if known. + */ +typedef enum { ide_unknown, ide_generic, ide_pci, + ide_cmd640, ide_dtc2278, ide_ali14xx, + ide_qd65xx, ide_umc8672, ide_ht6560b, + ide_pdc4030, ide_rz1000, ide_trm290, + ide_cmd646, ide_cy82c693, ide_4drives, + ide_pmac, ide_etrax100 +} hwif_chipset_t; + +#define IDE_CHIPSET_PCI_MASK \ + ((1<> (c)) & 1) + + +/* * Structure to hold all information about the location of this port */ typedef struct hw_regs_s { @@ -231,6 +263,7 @@ int dma; /* our dma entry */ ide_ack_intr_t *ack_intr; /* acknowledge interrupt */ void *priv; /* interface specific data */ + hwif_chipset_t chipset; } hw_regs_t; /* @@ -440,22 +473,6 @@ */ typedef int (ide_busproc_t) (struct hwif_s *, int); -/* - * hwif_chipset_t is used to keep track of the specific hardware - * chipset used by each IDE interface, if known. - */ -typedef enum { ide_unknown, ide_generic, ide_pci, - ide_cmd640, ide_dtc2278, ide_ali14xx, - ide_qd65xx, ide_umc8672, ide_ht6560b, - ide_pdc4030, ide_rz1000, ide_trm290, - ide_cmd646, ide_cy82c693, ide_4drives, - ide_pmac, ide_etrax100 -} hwif_chipset_t; - -#define IDE_CHIPSET_PCI_MASK \ - ((1<> (c)) & 1) - #ifdef CONFIG_BLK_DEV_IDEPCI typedef struct ide_pci_devid_s { unsigned short vid; @@ -488,7 +505,6 @@ struct scatterlist *sg_table; /* Scatter-gather list used to build the above */ int sg_nents; /* Current number of entries in it */ int sg_dma_direction; /* dma transfer direction */ - int sg_dma_active; /* is it in use */ struct hwif_s *mate; /* other hwif from same PCI chip */ unsigned long dma_base; /* base addr for dma ports */ unsigned dma_extra; /* extra addr for dma ports */ @@ -507,6 +523,7 @@ unsigned reset : 1; /* reset after probe */ unsigned autodma : 1; /* automatically try to enable DMA at boot */ unsigned udma_four : 1; /* 1=ATA-66 capable, 0=default */ + unsigned highmem : 1; /* can do full 32-bit dma */ byte channel; /* for dual-port chips: 0=primary, 1=secondary */ #ifdef CONFIG_BLK_DEV_IDEPCI struct pci_dev *pci_dev; /* for pci chipsets */ @@ -541,10 +558,12 @@ */ typedef int (ide_expiry_t)(ide_drive_t *); +#define IDE_BUSY 0 +#define IDE_SLEEP 1 + typedef struct hwgroup_s { ide_handler_t *handler;/* irq handler, if active */ - volatile int busy; /* BOOL: protects all fields below */ - int sleeping; /* BOOL: wake us up on timer expiry */ + unsigned long flags; /* BUSY, SLEEPING */ ide_drive_t *drive; /* current drive */ ide_hwif_t *hwif; /* ptr to current hwif in linked-list */ struct request *rq; /* current request */ @@ -711,7 +730,8 @@ #define LOCAL_END_REQUEST /* Don't generate end_request in blk.h */ #include -void ide_end_request(byte uptodate, ide_hwgroup_t *hwgroup); +inline int __ide_end_request(ide_hwgroup_t *, int, int); +int ide_end_request(byte uptodate, ide_hwgroup_t *hwgroup); /* * This is used for (nearly) all data transfers from/to the IDE interface @@ -787,6 +807,11 @@ unsigned long current_capacity (ide_drive_t *drive); /* + * Revalidate (read partition tables) + */ +void ide_revalidate_drive (ide_drive_t *drive); + +/* * Start a reset operation for an IDE interface. * The caller should return immediately after invoking this. */ @@ -814,6 +839,21 @@ } ide_action_t; /* + * temporarily mapping a (possible) highmem bio for PIO transfer + */ +#define ide_rq_offset(rq) (((rq)->hard_cur_sectors - (rq)->current_nr_sectors) << 9) + +extern inline void *ide_map_buffer(struct request *rq, unsigned long *flags) +{ + return bio_kmap_irq(rq->bio, flags) + ide_rq_offset(rq); +} + +extern inline void ide_unmap_buffer(char *buffer, unsigned long *flags) +{ + bio_kunmap_irq(buffer, flags); +} + +/* * This function issues a special IDE device request * onto the request queue. * @@ -960,5 +1000,8 @@ #endif void hwif_unregister (ide_hwif_t *hwif); + +#define DRIVE_LOCK(drive) (&(drive)->queue.queue_lock) +extern spinlock_t ide_lock; #endif /* _IDE_H */ diff -u --recursive --new-file v2.5.0/linux/include/linux/iobuf.h linux/include/linux/iobuf.h --- v2.5.0/linux/include/linux/iobuf.h Thu Nov 22 11:46:26 2001 +++ linux/include/linux/iobuf.h Tue Nov 27 09:40:28 2001 @@ -28,7 +28,7 @@ #define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1) #define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2) -/* The main kiobuf struct used for all our IO! */ +/* The main kiobuf struct */ struct kiobuf { @@ -48,8 +48,7 @@ /* Always embed enough struct pages for atomic IO */ struct page * map_array[KIO_STATIC_PAGES]; - struct buffer_head * bh[KIO_MAX_SECTORS]; - unsigned long blocks[KIO_MAX_SECTORS]; + sector_t blocks[KIO_MAX_SECTORS]; /* Dynamic state for IO completion: */ atomic_t io_count; /* IOs still in progress */ @@ -69,7 +68,7 @@ /* fs/iobuf.c */ -void end_kio_request(struct kiobuf *, int); +int end_kio_request(struct kiobuf *, int); void simple_wakeup_kiobuf(struct kiobuf *); int alloc_kiovec(int nr, struct kiobuf **); void free_kiovec(int nr, struct kiobuf **); @@ -81,6 +80,9 @@ /* fs/buffer.c */ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - kdev_t dev, unsigned long b[], int size); + kdev_t dev, sector_t [], int size); + +/* fs/bio.c */ +void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, sector_t block); #endif /* __LINUX_IOBUF_H */ diff -u --recursive --new-file v2.5.0/linux/include/linux/iso_fs.h linux/include/linux/iso_fs.h --- v2.5.0/linux/include/linux/iso_fs.h Thu Nov 22 11:47:11 2001 +++ linux/include/linux/iso_fs.h Tue Nov 27 09:40:56 2001 @@ -220,7 +220,7 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *); extern struct buffer_head *isofs_bread(struct inode *, unsigned int, unsigned int); -extern int isofs_get_blocks(struct inode *, long, struct buffer_head **, unsigned long); +extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); extern struct inode_operations isofs_dir_inode_operations; extern struct file_operations isofs_dir_operations; diff -u --recursive --new-file v2.5.0/linux/include/linux/loop.h linux/include/linux/loop.h --- v2.5.0/linux/include/linux/loop.h Mon Sep 17 13:16:30 2001 +++ linux/include/linux/loop.h Tue Nov 27 09:23:27 2001 @@ -49,8 +49,8 @@ int old_gfp_mask; spinlock_t lo_lock; - struct buffer_head *lo_bh; - struct buffer_head *lo_bhtail; + struct bio *lo_bio; + struct bio *lo_biotail; int lo_state; struct semaphore lo_sem; struct semaphore lo_ctl_mutex; diff -u --recursive --new-file v2.5.0/linux/include/linux/lvm.h linux/include/linux/lvm.h --- v2.5.0/linux/include/linux/lvm.h Sun Nov 11 10:09:32 2001 +++ linux/include/linux/lvm.h Tue Nov 27 09:23:27 2001 @@ -468,6 +468,12 @@ } lv_bmap_t; /* + * fixme... + */ +#define LVM_MAX_ATOMIC_IO 512 +#define LVM_MAX_SECTORS (LVM_MAX_ATOMIC_IO * 2) + +/* * Structure Logical Volume (LV) Version 3 */ @@ -505,6 +511,7 @@ uint lv_snapshot_minor; #ifdef __KERNEL__ struct kiobuf *lv_iobuf; + sector_t blocks[LVM_MAX_SECTORS]; struct kiobuf *lv_COW_table_iobuf; struct rw_semaphore lv_lock; struct list_head *lv_snapshot_hash_table; diff -u --recursive --new-file v2.5.0/linux/include/linux/nbd.h linux/include/linux/nbd.h --- v2.5.0/linux/include/linux/nbd.h Mon Oct 15 19:29:05 2001 +++ linux/include/linux/nbd.h Tue Nov 27 09:23:27 2001 @@ -37,24 +37,25 @@ static void nbd_end_request(struct request *req) { - struct buffer_head *bh; + struct bio *bio; unsigned nsect; unsigned long flags; int uptodate = (req->errors == 0) ? 1 : 0; + request_queue_t *q = req->q; #ifdef PARANOIA requests_out++; #endif - spin_lock_irqsave(&io_request_lock, flags); - while((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; + spin_lock_irqsave(&q->queue_lock, flags); + while((bio = req->bio) != NULL) { + nsect = bio_sectors(bio); blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); + req->bio = bio->bi_next; + bio->bi_next = NULL; + bio_endio(bio, uptodate, nsect); } blkdev_release_request(req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } #define MAX_NBD 128 diff -u --recursive --new-file v2.5.0/linux/include/linux/raid/md_k.h linux/include/linux/raid/md_k.h --- v2.5.0/linux/include/linux/raid/md_k.h Mon Nov 12 09:51:56 2001 +++ linux/include/linux/raid/md_k.h Tue Nov 27 09:23:27 2001 @@ -220,7 +220,7 @@ struct mdk_personality_s { char *name; - int (*make_request)(mddev_t *mddev, int rw, struct buffer_head * bh); + int (*make_request)(mddev_t *mddev, int rw, struct bio *bio); int (*run)(mddev_t *mddev); int (*stop)(mddev_t *mddev); int (*status)(char *page, mddev_t *mddev); diff -u --recursive --new-file v2.5.0/linux/include/linux/reiserfs_fs.h linux/include/linux/reiserfs_fs.h --- v2.5.0/linux/include/linux/reiserfs_fs.h Fri Nov 9 14:18:25 2001 +++ linux/include/linux/reiserfs_fs.h Tue Nov 27 09:23:27 2001 @@ -1856,7 +1856,7 @@ loff_t offset, int type, int length, int entry_count); /*void store_key (struct key * key); void forget_key (struct key * key);*/ -int reiserfs_get_block (struct inode * inode, long block, +int reiserfs_get_block (struct inode * inode, sector_t block, struct buffer_head * bh_result, int create); struct inode * reiserfs_iget (struct super_block * s, const struct cpu_key * key); diff -u --recursive --new-file v2.5.0/linux/include/linux/slab.h linux/include/linux/slab.h --- v2.5.0/linux/include/linux/slab.h Thu Nov 22 11:46:20 2001 +++ linux/include/linux/slab.h Tue Nov 27 09:40:22 2001 @@ -38,6 +38,7 @@ #define SLAB_NO_REAP 0x00001000UL /* never reap from the cache */ #define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */ #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */ +#define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */ /* flags passed to a constructor func */ #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */ @@ -76,6 +77,7 @@ extern kmem_cache_t *bh_cachep; extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sigact_cachep; +extern kmem_cache_t *bio_cachep; #endif /* __KERNEL__ */ diff -u --recursive --new-file v2.5.0/linux/include/linux/types.h linux/include/linux/types.h --- v2.5.0/linux/include/linux/types.h Thu Nov 22 11:46:18 2001 +++ linux/include/linux/types.h Tue Nov 27 09:40:20 2001 @@ -113,6 +113,17 @@ typedef __s64 int64_t; #endif +/* + * transition to 64-bit sector_t, possibly making it an option... + */ +#undef BLK_64BIT_SECTOR + +#ifdef BLK_64BIT_SECTOR +typedef u64 sector_t; +#else +typedef unsigned long sector_t; +#endif + #endif /* __KERNEL_STRICT_NAMES */ /* diff -u --recursive --new-file v2.5.0/linux/init/main.c linux/init/main.c --- v2.5.0/linux/init/main.c Fri Nov 9 14:15:00 2001 +++ linux/init/main.c Tue Nov 27 09:23:27 2001 @@ -591,6 +591,8 @@ #endif mem_init(); kmem_cache_sizes_init(); + pgtable_cache_init(); + mempages = num_physpages; fork_init(mempages); @@ -598,6 +600,7 @@ vfs_caches_init(mempages); buffer_init(mempages); page_cache_init(mempages); + bio_hash_init(mempages); #if defined(CONFIG_ARCH_S390) ccwcache_init(); #endif diff -u --recursive --new-file v2.5.0/linux/kernel/exec_domain.c linux/kernel/exec_domain.c --- v2.5.0/linux/kernel/exec_domain.c Sun Nov 11 10:20:21 2001 +++ linux/kernel/exec_domain.c Sat Nov 24 14:27:27 2001 @@ -102,7 +102,7 @@ } #endif - ep = NULL; + ep = &default_exec_domain; out: read_unlock(&exec_domains_lock); return (ep); @@ -162,8 +162,6 @@ struct exec_domain *ep, *oep; ep = lookup_exec_domain(personality); - if (ep == NULL) - return -EINVAL; if (ep == current->exec_domain) { current->personality = personality; return 0; diff -u --recursive --new-file v2.5.0/linux/kernel/ksyms.c linux/kernel/ksyms.c --- v2.5.0/linux/kernel/ksyms.c Wed Nov 21 14:07:25 2001 +++ linux/kernel/ksyms.c Tue Nov 27 09:23:27 2001 @@ -121,6 +121,8 @@ EXPORT_SYMBOL(kunmap_high); EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(create_bounce); +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); #endif /* filesystem internal functions */ @@ -290,7 +292,6 @@ /* block device driver support */ EXPORT_SYMBOL(blksize_size); -EXPORT_SYMBOL(hardsect_size); EXPORT_SYMBOL(blk_size); EXPORT_SYMBOL(blk_dev); EXPORT_SYMBOL(is_read_only); @@ -307,8 +308,8 @@ EXPORT_SYMBOL(tq_disk); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(refile_buffer); -EXPORT_SYMBOL(max_sectors); EXPORT_SYMBOL(max_readahead); +EXPORT_SYMBOL(wipe_partitions); /* tty routines */ EXPORT_SYMBOL(tty_hangup); diff -u --recursive --new-file v2.5.0/linux/mm/bootmem.c linux/mm/bootmem.c --- v2.5.0/linux/mm/bootmem.c Tue Sep 18 14:10:43 2001 +++ linux/mm/bootmem.c Tue Nov 27 09:23:27 2001 @@ -25,6 +25,7 @@ */ unsigned long max_low_pfn; unsigned long min_low_pfn; +unsigned long max_pfn; /* return the number of _pages_ that will be allocated for the boot bitmap */ unsigned long __init bootmem_bootmap_pages (unsigned long pages) diff -u --recursive --new-file v2.5.0/linux/mm/filemap.c linux/mm/filemap.c --- v2.5.0/linux/mm/filemap.c Wed Nov 21 14:07:25 2001 +++ linux/mm/filemap.c Sun Nov 25 09:55:10 2001 @@ -935,7 +935,6 @@ spin_unlock(&pagecache_lock); if (!page) { struct page *newpage = alloc_page(gfp_mask); - page = ERR_PTR(-ENOMEM); if (newpage) { spin_lock(&pagecache_lock); page = __find_lock_page_helper(mapping, index, *hash); diff -u --recursive --new-file v2.5.0/linux/mm/highmem.c linux/mm/highmem.c --- v2.5.0/linux/mm/highmem.c Mon Oct 22 15:01:57 2001 +++ linux/mm/highmem.c Tue Nov 27 09:23:27 2001 @@ -21,6 +21,9 @@ #include #include #include +#include + +#include /* * Virtual_count is not a pure "count". @@ -186,7 +189,7 @@ wake_up(&pkmap_map_wait); } -#define POOL_SIZE 32 +#define POOL_SIZE 64 /* * This lock gets no contention at all, normally. @@ -200,77 +203,41 @@ static LIST_HEAD(emergency_bhs); /* - * Simple bounce buffer support for highmem pages. - * This will be moved to the block layer in 2.5. + * Simple bounce buffer support for highmem pages. Depending on the + * queue gfp mask set, *to may or may not be a highmem page. kmap it + * always, it will do the Right Thing */ - -static inline void copy_from_high_bh (struct buffer_head *to, - struct buffer_head *from) +static inline void copy_from_high_bio(struct bio *to, struct bio *from) { - struct page *p_from; - char *vfrom; + unsigned char *vto, *vfrom; + + if (unlikely(in_interrupt())) + BUG(); + + vto = bio_kmap(to); + vfrom = bio_kmap(from); - p_from = from->b_page; + memcpy(vto, vfrom + bio_offset(from), bio_size(to)); - vfrom = kmap_atomic(p_from, KM_USER0); - memcpy(to->b_data, vfrom + bh_offset(from), to->b_size); - kunmap_atomic(vfrom, KM_USER0); + bio_kunmap(from); + bio_kunmap(to); } -static inline void copy_to_high_bh_irq (struct buffer_head *to, - struct buffer_head *from) +static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from) { - struct page *p_to; - char *vto; + unsigned char *vto, *vfrom; unsigned long flags; - p_to = to->b_page; __save_flags(flags); __cli(); - vto = kmap_atomic(p_to, KM_BOUNCE_READ); - memcpy(vto + bh_offset(to), from->b_data, to->b_size); + vto = kmap_atomic(bio_page(to), KM_BOUNCE_READ); + vfrom = kmap_atomic(bio_page(from), KM_BOUNCE_READ); + memcpy(vto + bio_offset(to), vfrom, bio_size(to)); + kunmap_atomic(vfrom, KM_BOUNCE_READ); kunmap_atomic(vto, KM_BOUNCE_READ); __restore_flags(flags); } -static inline void bounce_end_io (struct buffer_head *bh, int uptodate) -{ - struct page *page; - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); - unsigned long flags; - - bh_orig->b_end_io(bh_orig, uptodate); - - page = bh->b_page; - - spin_lock_irqsave(&emergency_lock, flags); - if (nr_emergency_pages >= POOL_SIZE) - __free_page(page); - else { - /* - * We are abusing page->list to manage - * the highmem emergency pool: - */ - list_add(&page->list, &emergency_pages); - nr_emergency_pages++; - } - - if (nr_emergency_bhs >= POOL_SIZE) { -#ifdef HIGHMEM_DEBUG - /* Don't clobber the constructed slab cache */ - init_waitqueue_head(&bh->b_wait); -#endif - kmem_cache_free(bh_cachep, bh); - } else { - /* - * Ditto in the bh case, here we abuse b_inode_buffers: - */ - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } - spin_unlock_irqrestore(&emergency_lock, flags); -} - static __init int init_emergency_pool(void) { struct sysinfo i; @@ -290,44 +257,63 @@ list_add(&page->list, &emergency_pages); nr_emergency_pages++; } - while (nr_emergency_bhs < POOL_SIZE) { - struct buffer_head * bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC); - if (!bh) { - printk("couldn't refill highmem emergency bhs"); - break; - } - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } spin_unlock_irq(&emergency_lock); - printk("allocated %d pages and %d bhs reserved for the highmem bounces\n", - nr_emergency_pages, nr_emergency_bhs); - + printk("allocated %d pages reserved for the highmem bounces\n", nr_emergency_pages); return 0; } __initcall(init_emergency_pool); -static void bounce_end_io_write (struct buffer_head *bh, int uptodate) +static inline void bounce_end_io (struct bio *bio, int nr_sectors) +{ + struct bio *bio_orig = bio->bi_private; + struct page *page = bio_page(bio); + unsigned long flags; + + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + set_bit(BIO_UPTODATE, bio_orig->bi_flags); + + bio_orig->bi_end_io(bio_orig, nr_sectors); + + spin_lock_irqsave(&emergency_lock, flags); + if (nr_emergency_pages >= POOL_SIZE) { + spin_unlock_irqrestore(&emergency_lock, flags); + __free_page(page); + } else { + /* + * We are abusing page->list to manage + * the highmem emergency pool: + */ + list_add(&page->list, &emergency_pages); + nr_emergency_pages++; + spin_unlock_irqrestore(&emergency_lock, flags); + } + + bio_hash_remove(bio); + bio_put(bio); +} + +static void bounce_end_io_write (struct bio *bio, int nr_sectors) { - bounce_end_io(bh, uptodate); + bounce_end_io(bio, nr_sectors); } -static void bounce_end_io_read (struct buffer_head *bh, int uptodate) +static void bounce_end_io_read (struct bio *bio, int nr_sectors) { - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); + struct bio *bio_orig = bio->bi_private; + + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + copy_to_high_bio_irq(bio_orig, bio); - if (uptodate) - copy_to_high_bh_irq(bh_orig, bh); - bounce_end_io(bh, uptodate); + bounce_end_io(bio, nr_sectors); } -struct page *alloc_bounce_page (void) +struct page *alloc_bounce_page(int gfp_mask) { struct list_head *tmp; struct page *page; - page = alloc_page(GFP_NOHIGHIO); + page = alloc_page(gfp_mask); if (page) return page; /* @@ -360,91 +346,35 @@ goto repeat_alloc; } -struct buffer_head *alloc_bounce_bh (void) +void create_bounce(struct bio **bio_orig, int gfp_mask) { - struct list_head *tmp; - struct buffer_head *bh; + struct page *page; + struct bio *bio; - bh = kmem_cache_alloc(bh_cachep, SLAB_NOHIGHIO); - if (bh) - return bh; - /* - * No luck. First, kick the VM so it doesnt idle around while - * we are using up our emergency rations. - */ - wakeup_bdflush(); + bio = bio_alloc(GFP_NOHIGHIO, 1); -repeat_alloc: /* - * Try to allocate from the emergency pool. + * wasteful for 1kB fs, but machines with lots of ram are less likely + * to have 1kB fs for anything that needs to go fast. so all things + * considered, it should be ok. */ - tmp = &emergency_bhs; - spin_lock_irq(&emergency_lock); - if (!list_empty(tmp)) { - bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers); - list_del(tmp->next); - nr_emergency_bhs--; - } - spin_unlock_irq(&emergency_lock); - if (bh) - return bh; + page = alloc_bounce_page(gfp_mask); - /* we need to wait I/O completion */ - run_task_queue(&tq_disk); + bio->bi_dev = (*bio_orig)->bi_dev; + bio->bi_sector = (*bio_orig)->bi_sector; + bio->bi_rw = (*bio_orig)->bi_rw; - current->policy |= SCHED_YIELD; - __set_current_state(TASK_RUNNING); - schedule(); - goto repeat_alloc; -} + bio->bi_io_vec->bvl_vec[0].bv_page = page; + bio->bi_io_vec->bvl_vec[0].bv_len = bio_size(*bio_orig); + bio->bi_io_vec->bvl_vec[0].bv_offset = 0; -struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig) -{ - struct page *page; - struct buffer_head *bh; + bio->bi_private = *bio_orig; - if (!PageHighMem(bh_orig->b_page)) - return bh_orig; - - bh = alloc_bounce_bh(); - /* - * This is wasteful for 1k buffers, but this is a stopgap measure - * and we are being ineffective anyway. This approach simplifies - * things immensly. On boxes with more than 4GB RAM this should - * not be an issue anyway. - */ - page = alloc_bounce_page(); - - set_bh_page(bh, page, 0); - - bh->b_next = NULL; - bh->b_blocknr = bh_orig->b_blocknr; - bh->b_size = bh_orig->b_size; - bh->b_list = -1; - bh->b_dev = bh_orig->b_dev; - bh->b_count = bh_orig->b_count; - bh->b_rdev = bh_orig->b_rdev; - bh->b_state = bh_orig->b_state; -#ifdef HIGHMEM_DEBUG - bh->b_flushtime = jiffies; - bh->b_next_free = NULL; - bh->b_prev_free = NULL; - /* bh->b_this_page */ - bh->b_reqnext = NULL; - bh->b_pprev = NULL; -#endif - /* bh->b_page */ - if (rw == WRITE) { - bh->b_end_io = bounce_end_io_write; - copy_from_high_bh(bh, bh_orig); + if (bio_rw(bio) == WRITE) { + bio->bi_end_io = bounce_end_io_write; + copy_from_high_bio(bio, *bio_orig); } else - bh->b_end_io = bounce_end_io_read; - bh->b_private = (void *)bh_orig; - bh->b_rsector = bh_orig->b_rsector; -#ifdef HIGHMEM_DEBUG - memset(&bh->b_wait, -1, sizeof(bh->b_wait)); -#endif + bio->bi_end_io = bounce_end_io_read; - return bh; + *bio_orig = bio; } - diff -u --recursive --new-file v2.5.0/linux/mm/page_io.c linux/mm/page_io.c --- v2.5.0/linux/mm/page_io.c Mon Nov 19 15:19:42 2001 +++ linux/mm/page_io.c Tue Nov 27 09:23:27 2001 @@ -36,7 +36,7 @@ static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page) { unsigned long offset; - int zones[PAGE_SIZE/512]; + sector_t zones[PAGE_SIZE/512]; int zones_used; kdev_t dev = 0; int block_size; diff -u --recursive --new-file v2.5.0/linux/mm/slab.c linux/mm/slab.c --- v2.5.0/linux/mm/slab.c Tue Sep 18 14:16:26 2001 +++ linux/mm/slab.c Tue Nov 27 09:31:22 2001 @@ -109,9 +109,11 @@ #if DEBUG # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ SLAB_POISON | SLAB_HWCACHE_ALIGN | \ - SLAB_NO_REAP | SLAB_CACHE_DMA) + SLAB_NO_REAP | SLAB_CACHE_DMA | \ + SLAB_MUST_HWCACHE_ALIGN) #else -# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA) +# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ + SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN) #endif /* @@ -649,7 +651,7 @@ flags &= ~SLAB_POISON; } #if FORCED_DEBUG - if (size < (PAGE_SIZE>>3)) + if ((size < (PAGE_SIZE>>3)) && !(flags & SLAB_MUST_HWCACHE_ALIGN)) /* * do not red zone large object, causes severe * fragmentation.