## Automatically generated incremental diff ## From: linux-2.5.1-pre1 ## To: linux-2.5.1-pre2 ## Robot: $Id: make-incremental-diff,v 1.5 2001/12/01 08:32:36 hpa Exp $ diff -urN linux-2.5.1-pre1/CREDITS linux/CREDITS --- linux-2.5.1-pre1/CREDITS Sun Nov 11 10:09:32 2001 +++ linux/CREDITS Sat Dec 1 00:37:04 2001 @@ -140,9 +140,11 @@ D: VIA MVP-3/TX Pro III chipset IDE N: Jens Axboe -E: axboe@image.dk -D: Linux CD-ROM maintainer -D: jiffies wrap fixes + schedule timeouts depending on HZ == 100 +E: axboe@suse.de +D: Linux CD-ROM maintainer, DVD support +D: elevator + block layer rewrites +D: highmem I/O support +D: misc hacking on IDE, SCSI, block drivers, etc S: Peter Bangs Vej 258, 2TH S: 2500 Valby S: Denmark diff -urN linux-2.5.1-pre1/Documentation/Configure.help linux/Documentation/Configure.help --- linux-2.5.1-pre1/Documentation/Configure.help Thu Nov 22 10:52:44 2001 +++ linux/Documentation/Configure.help Sat Dec 1 00:37:05 2001 @@ -6644,17 +6644,6 @@ . The module will be called sg.o. If unsure, say N. -Enable extra checks in SCSI queueing code -CONFIG_SCSI_DEBUG_QUEUES - This option turns on a lot of additional consistency checking for - the new queueing code. This will adversely affect performance, but - it is likely that bugs will be caught sooner if this is turned on. - This will typically cause the kernel to panic if an error is - detected, but it would have probably crashed if the panic weren't - there. Comments/questions/problems to linux-scsi mailing list - please. See for more - up-to-date information. - Probe all LUNs on each SCSI device CONFIG_SCSI_MULTI_LUN If you have a SCSI device that supports more than one LUN (Logical diff -urN linux-2.5.1-pre1/Documentation/DocBook/Makefile linux/Documentation/DocBook/Makefile --- linux-2.5.1-pre1/Documentation/DocBook/Makefile Fri Nov 2 17:13:53 2001 +++ linux/Documentation/DocBook/Makefile Sat Dec 1 00:37:05 2001 @@ -108,6 +108,7 @@ $(TOPDIR)/drivers/video/modedb.c \ $(TOPDIR)/fs/devfs/base.c \ $(TOPDIR)/fs/locks.c \ + $(TOPDIR)/fs/bio.c \ $(TOPDIR)/include/asm-i386/bitops.h \ $(TOPDIR)/kernel/pm.c \ $(TOPDIR)/kernel/ksyms.c \ diff -urN linux-2.5.1-pre1/Documentation/filesystems/devfs/ChangeLog linux/Documentation/filesystems/devfs/ChangeLog --- linux-2.5.1-pre1/Documentation/filesystems/devfs/ChangeLog Sat Nov 3 10:06:38 2001 +++ linux/Documentation/filesystems/devfs/ChangeLog Sat Dec 1 00:37:05 2001 @@ -1778,3 +1778,19 @@ Thanks to Kari Hurtta - Avoid deadlock in by using temporary buffer +=============================================================================== +Changes for patch v197 + +- First release of new locking code for devfs core (v1.0) + +- Fixed bug in drivers/cdrom/cdrom.c +=============================================================================== +Changes for patch v198 + +- Discard temporary buffer, now use "%s" for dentry names + +- Don't generate path in : use fake entry instead + +- Use "existing" directory in <_devfs_make_parent_for_leaf> + +- Use slab cache rather than fixed buffer for devfsd events diff -urN linux-2.5.1-pre1/Documentation/filesystems/devfs/README linux/Documentation/filesystems/devfs/README --- linux-2.5.1-pre1/Documentation/filesystems/devfs/README Wed Oct 10 23:23:24 2001 +++ linux/Documentation/filesystems/devfs/README Sat Dec 1 00:37:05 2001 @@ -3,7 +3,7 @@ Linux Devfs (Device File System) FAQ Richard Gooch -29-SEP-2001 +9-NOV-2001 ----------------------------------------------------------------------------- @@ -11,7 +11,9 @@ http://www.atnf.csiro.au/~rgooch/linux/docs/devfs.html and looks much better than the text version distributed with the -kernel sources. +kernel sources. A mirror site is available at: + +http://www.ras.ucalgary.ca/~rgooch/linux/docs/devfs.html There is also an optional daemon that may be used with devfs. You can find out more about it at: diff -urN linux-2.5.1-pre1/Documentation/usb/philips.txt linux/Documentation/usb/philips.txt --- linux-2.5.1-pre1/Documentation/usb/philips.txt Wed Oct 17 14:34:06 2001 +++ linux/Documentation/usb/philips.txt Sat Dec 1 00:37:05 2001 @@ -1,5 +1,5 @@ This file contains some additional information for the Philips webcams. -E-mail: webcam@smcc.demon.nl Last updated: 2001-07-27 +E-mail: webcam@smcc.demon.nl Last updated: 2001-09-24 The main webpage for the Philips driver is http://www.smcc.demon.nl/webcam/. It contains a lot of extra information, a FAQ, and the binary plugin @@ -13,11 +13,9 @@ the latter, since it makes troubleshooting a lot easier. The built-in microphone is supported through the USB Audio class. -(Taken from install.html) - When you load the module you can set some default settings for the -camera; some programs depend on a particular image-size or -format. The -options are: +camera; some programs depend on a particular image-size or -format and +don't know how to set it properly in the driver. The options are: size Can be one of 'sqcif', 'qsif', 'qcif', 'sif', 'cif' or @@ -99,6 +97,57 @@ This parameter works only with the ToUCam range of cameras (730, 740, 750). For other cameras this command is silently ignored, and the LED cannot be controlled. + +dev_hint + A long standing problem with USB devices is their dynamic nature: you + never know what device a camera gets assigned; it depends on module load + order, the hub configuration, the order in which devices are plugged in, + and the phase of the moon (i.e. it can be random). With this option you + can give the driver a hint as to what video device node (/dev/videoX) it + should use with a specific camera. This is also handy if you have two + cameras of the same model. + + A camera is specified by its type (the number from the camera model, + like PCA645, PCVC750VC, etc) and optionally the serial number (visible + in /proc/bus/usb/devices). A hint consists of a string with the following + format: + + [type[.serialnumber]:]node + + The square brackets mean that both the type and the serialnumber are + optional, but a serialnumber cannot be specified without a type (which + would be rather pointless). The serialnumber is separated from the type + by a '.'; the node number by a ':'. + + This somewhat cryptic syntax is best explained by a few examples: + + dev_hint=3,5 The first detected cam gets assigned + /dev/video3, the second /dev/video5. Any + other cameras will get the first free + available slot (see below). + + dev_hint=645:1,680=2 The PCA645 camera will get /dev/video1, + and a PCVC680 /dev/video2. + + dev_hint=645.0123:3,645.4567:0 The PCA645 camera with serialnumber + 0123 goes to /dev/video3, the same + camera model with the 4567 serial + gets /dev/video0. + + dev_hint=750:1,4,5,6 The PCVC750 camera will get /dev/video1, the + next 3 Philips cams will use /dev/video4 + through /dev/video6. + + Some points worth knowing: + - Serialnumbers are case sensitive and must be written full, including + leading zeroes (it's treated as a string). + - If a device node is already occupied, registration will fail and + the webcam is not available. + - You can have up to 64 video devices; be sure to make enough device + nodes in /dev if you want to spread the numbers (this does not apply + to devfs). After /dev/video9 comes /dev/video10 (not /dev/videoA). + - If a camera does not match any dev_hint, it will simply get assigned + the first available device node, just as it used to be. trace In order to better detect problems, it is now possible to turn on a diff -urN linux-2.5.1-pre1/MAINTAINERS linux/MAINTAINERS --- linux-2.5.1-pre1/MAINTAINERS Fri Nov 16 10:03:24 2001 +++ linux/MAINTAINERS Sat Dec 1 00:37:05 2001 @@ -1660,8 +1660,8 @@ W: http://misc.nu/hugh/keyspan/ USB SUBSYSTEM -P: Johannes Erdfelt -M: johannes@erdfelt.com +P: Greg Kroah-Hartman +M: greg@kroah.com L: linux-usb-users@lists.sourceforge.net L: linux-usb-devel@lists.sourceforge.net W: http://www.linux-usb.org diff -urN linux-2.5.1-pre1/Makefile linux/Makefile --- linux-2.5.1-pre1/Makefile Sat Dec 1 00:36:00 2001 +++ linux/Makefile Sat Dec 1 00:37:05 2001 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 5 SUBLEVEL = 1 -EXTRAVERSION =-pre1 +EXTRAVERSION =-pre2 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -urN linux-2.5.1-pre1/arch/alpha/kernel/alpha_ksyms.c linux/arch/alpha/kernel/alpha_ksyms.c --- linux-2.5.1-pre1/arch/alpha/kernel/alpha_ksyms.c Tue Nov 20 15:49:31 2001 +++ linux/arch/alpha/kernel/alpha_ksyms.c Sat Dec 1 00:37:05 2001 @@ -109,6 +109,7 @@ EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(memcmp); EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memscan); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(__memset); EXPORT_SYMBOL(__memsetw); diff -urN linux-2.5.1-pre1/arch/i386/defconfig linux/arch/i386/defconfig --- linux-2.5.1-pre1/arch/i386/defconfig Mon Nov 12 11:59:03 2001 +++ linux/arch/i386/defconfig Sat Dec 1 00:37:05 2001 @@ -286,7 +286,6 @@ # # Some SCSI devices (e.g. CD jukebox) support multiple LUNs # -CONFIG_SCSI_DEBUG_QUEUES=y CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOGGING is not set @@ -431,6 +430,7 @@ # CONFIG_SUNDANCE is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set +# CONFIG_VIA_RHINE_MMIO is not set # CONFIG_WINBOND_840 is not set # CONFIG_NET_POCKET is not set diff -urN linux-2.5.1-pre1/arch/i386/kernel/setup.c linux/arch/i386/kernel/setup.c --- linux-2.5.1-pre1/arch/i386/kernel/setup.c Mon Nov 19 15:16:13 2001 +++ linux/arch/i386/kernel/setup.c Sat Dec 1 00:37:05 2001 @@ -157,6 +157,7 @@ extern void mcheck_init(struct cpuinfo_x86 *c); extern int root_mountflags; extern char _text, _etext, _edata, _end; +extern int blk_nohighio; static int disable_x86_serial_nr __initdata = 1; static int disable_x86_fxsr __initdata = 0; @@ -782,7 +783,7 @@ void __init setup_arch(char **cmdline_p) { unsigned long bootmap_size, low_mem_size; - unsigned long start_pfn, max_pfn, max_low_pfn; + unsigned long start_pfn, max_low_pfn; int i; #ifdef CONFIG_VISWS @@ -1067,6 +1068,14 @@ __setup("notsc", tsc_setup); #endif +static int __init highio_setup(char *str) +{ + printk("i386: disabling HIGHMEM block I/O\n"); + blk_nohighio = 1; + return 1; +} +__setup("nohighio", highio_setup); + static int __init get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; diff -urN linux-2.5.1-pre1/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c --- linux-2.5.1-pre1/arch/i386/kernel/traps.c Sun Sep 30 12:26:08 2001 +++ linux/arch/i386/kernel/traps.c Sat Dec 1 00:37:05 2001 @@ -697,7 +697,7 @@ */ asmlinkage void math_state_restore(struct pt_regs regs) { - __asm__ __volatile__("clts"); /* Allow maths ops (or we recurse) */ + clts(); /* Allow maths ops (or we recurse) */ if (current->used_math) { restore_fpu(current); diff -urN linux-2.5.1-pre1/arch/i386/mm/init.c linux/arch/i386/mm/init.c --- linux-2.5.1-pre1/arch/i386/mm/init.c Sun Nov 11 10:09:32 2001 +++ linux/arch/i386/mm/init.c Sat Dec 1 00:37:05 2001 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -333,7 +334,7 @@ { pagetable_init(); - __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir))); + __asm__( "movl %0,%%cr3\n" ::"r"(__pa(swapper_pg_dir))); #if CONFIG_X86_PAE /* @@ -596,3 +597,17 @@ val->mem_unit = PAGE_SIZE; return; } + +#if defined(CONFIG_X86_PAE) +struct kmem_cache_s *pae_pgd_cachep; +void __init pgtable_cache_init(void) +{ + /* + * PAE pgds must be 16-byte aligned: + */ + pae_pgd_cachep = kmem_cache_create("pae_pgd", 32, 0, + SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, NULL, NULL); + if (!pae_pgd_cachep) + panic("init_pae(): Cannot alloc pae_pgd SLAB cache"); +} +#endif /* CONFIG_X86_PAE */ diff -urN linux-2.5.1-pre1/drivers/block/DAC960.c linux/drivers/block/DAC960.c --- linux-2.5.1-pre1/drivers/block/DAC960.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/block/DAC960.c Sat Dec 1 00:37:05 2001 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -306,9 +307,9 @@ static void DAC960_WaitForCommand(DAC960_Controller_T *Controller) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&Controller->RequestQueue->queue_lock); __wait_event(Controller->CommandWaitQueue, Controller->FreeCommands); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&Controller->RequestQueue->queue_lock); } @@ -1922,76 +1923,6 @@ /* - DAC960_BackMergeFunction is the Back Merge Function for the DAC960 driver. -*/ - -static int DAC960_BackMergeFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - BufferHeader_T *BufferHeader, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - if (Request->bhtail->b_data + Request->bhtail->b_size == BufferHeader->b_data) - return true; - if (Request->nr_segments < MaxSegments && - Request->nr_segments < Controller->DriverScatterGatherLimit) - { - Request->nr_segments++; - return true; - } - return false; -} - - -/* - DAC960_FrontMergeFunction is the Front Merge Function for the DAC960 driver. -*/ - -static int DAC960_FrontMergeFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - BufferHeader_T *BufferHeader, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - if (BufferHeader->b_data + BufferHeader->b_size == Request->bh->b_data) - return true; - if (Request->nr_segments < MaxSegments && - Request->nr_segments < Controller->DriverScatterGatherLimit) - { - Request->nr_segments++; - return true; - } - return false; -} - - -/* - DAC960_MergeRequestsFunction is the Merge Requests Function for the - DAC960 driver. -*/ - -static int DAC960_MergeRequestsFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - IO_Request_T *NextRequest, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - int TotalSegments = Request->nr_segments + NextRequest->nr_segments; - if (Request->bhtail->b_data + Request->bhtail->b_size - == NextRequest->bh->b_data) - TotalSegments--; - if (TotalSegments > MaxSegments || - TotalSegments > Controller->DriverScatterGatherLimit) - return false; - Request->nr_segments = TotalSegments; - return true; -} - - -/* DAC960_RegisterBlockDevice registers the Block Device structures associated with Controller. */ @@ -2015,15 +1946,15 @@ Initialize the I/O Request Queue. */ RequestQueue = BLK_DEFAULT_QUEUE(MajorNumber); - blk_init_queue(RequestQueue, DAC960_RequestFunction); + blk_init_queue(RequestQueue, DAC960_RequestFunction, "dac960"); blk_queue_headactive(RequestQueue, 0); - RequestQueue->back_merge_fn = DAC960_BackMergeFunction; - RequestQueue->front_merge_fn = DAC960_FrontMergeFunction; - RequestQueue->merge_requests_fn = DAC960_MergeRequestsFunction; RequestQueue->queuedata = Controller; + RequestQueue->max_segments = Controller->DriverScatterGatherLimit; + RequestQueue->max_sectors = Controller->MaxBlocksPerCommand; Controller->RequestQueue = RequestQueue; /* - Initialize the Max Sectors per Request array. + Initialize the Disk Partitions array, Partition Sizes array, Block Sizes + array, and Max Sectors per Request array. */ for (MinorNumber = 0; MinorNumber < DAC960_MinorCount; MinorNumber++) Controller->MaxSectorsPerRequest[MinorNumber] = @@ -2031,7 +1962,6 @@ Controller->GenericDiskInfo.part = Controller->DiskPartitions; Controller->GenericDiskInfo.sizes = Controller->PartitionSizes; blksize_size[MajorNumber] = Controller->BlockSizes; - max_sectors[MajorNumber] = Controller->MaxSectorsPerRequest; /* Initialize Read Ahead to 128 sectors. */ @@ -2080,9 +2010,7 @@ */ Controller->GenericDiskInfo.part = NULL; Controller->GenericDiskInfo.sizes = NULL; - blk_size[MajorNumber] = NULL; - blksize_size[MajorNumber] = NULL; - max_sectors[MajorNumber] = NULL; + blk_clear(MajorNumber); /* Remove the Generic Disk Information structure from the list. */ @@ -2813,23 +2741,24 @@ CommandMailbox->Type5.ScatterGatherCount = Command->SegmentCount; while (BufferHeader != NULL) { - if (BufferHeader->b_data == LastDataEndPointer) + if (bio_data(BufferHeader) == LastDataEndPointer) { ScatterGatherList[SegmentNumber-1].SegmentByteCount += - BufferHeader->b_size; - LastDataEndPointer += BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer += bio_size(BufferHeader); } else { ScatterGatherList[SegmentNumber].SegmentDataPointer = - Virtual_to_Bus32(BufferHeader->b_data); + Virtual_to_Bus32(bio_data(BufferHeader)); ScatterGatherList[SegmentNumber].SegmentByteCount = - BufferHeader->b_size; - LastDataEndPointer = BufferHeader->b_data + BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer = bio_data(BufferHeader) + + bio_size(BufferHeader); if (SegmentNumber++ > Controller->DriverScatterGatherLimit) panic("DAC960: Scatter/Gather Segment Overflow\n"); } - BufferHeader = BufferHeader->b_reqnext; + BufferHeader = BufferHeader->bi_next; } if (SegmentNumber != Command->SegmentCount) panic("DAC960: SegmentNumber != SegmentCount\n"); @@ -2903,23 +2832,24 @@ .ScatterGatherSegments; while (BufferHeader != NULL) { - if (BufferHeader->b_data == LastDataEndPointer) + if (bio_data(BufferHeader) == LastDataEndPointer) { ScatterGatherList[SegmentNumber-1].SegmentByteCount += - BufferHeader->b_size; - LastDataEndPointer += BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer += bio_size(BufferHeader); } else { ScatterGatherList[SegmentNumber].SegmentDataPointer = - Virtual_to_Bus64(BufferHeader->b_data); + Virtual_to_Bus64(bio_data(BufferHeader)); ScatterGatherList[SegmentNumber].SegmentByteCount = - BufferHeader->b_size; - LastDataEndPointer = BufferHeader->b_data + BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer = bio_data(BufferHeader) + + bio_size(BufferHeader); if (SegmentNumber++ > Controller->DriverScatterGatherLimit) panic("DAC960: Scatter/Gather Segment Overflow\n"); } - BufferHeader = BufferHeader->b_reqnext; + BufferHeader = BufferHeader->bi_next; } if (SegmentNumber != Command->SegmentCount) panic("DAC960: SegmentNumber != SegmentCount\n"); @@ -2947,7 +2877,7 @@ while (true) { if (list_empty(RequestQueueHead)) return false; - Request = blkdev_entry_next_request(RequestQueueHead); + Request = elv_next_request(RequestQueue); Command = DAC960_AllocateCommand(Controller); if (Command != NULL) break; if (!WaitForCommand) return false; @@ -2958,12 +2888,10 @@ else Command->CommandType = DAC960_WriteCommand; Command->Completion = Request->waiting; Command->LogicalDriveNumber = DAC960_LogicalDriveNumber(Request->rq_dev); - Command->BlockNumber = - Request->sector - + Controller->GenericDiskInfo.part[MINOR(Request->rq_dev)].start_sect; + Command->BlockNumber = Request->sector; Command->BlockCount = Request->nr_sectors; Command->SegmentCount = Request->nr_segments; - Command->BufferHeader = Request->bh; + Command->BufferHeader = Request->bio; Command->RequestBuffer = Request->buffer; blkdev_dequeue_request(Request); blkdev_release_request(Request); @@ -3016,8 +2944,10 @@ static inline void DAC960_ProcessCompletedBuffer(BufferHeader_T *BufferHeader, boolean SuccessfulIO) { - blk_finished_io(BufferHeader->b_size >> 9); - BufferHeader->b_end_io(BufferHeader, SuccessfulIO); + if (SuccessfulIO) + set_bit(BIO_UPTODATE, &BufferHeader->bi_flags); + blk_finished_io(bio_sectors(BufferHeader)); + BufferHeader->bi_end_io(BufferHeader); } @@ -3071,13 +3001,13 @@ Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, Command->BlockNumber, Command->BlockNumber + Command->BlockCount - 1); - if (DAC960_PartitionNumber(Command->BufferHeader->b_rdev) > 0) + if (DAC960_PartitionNumber(Command->BufferHeader->bi_dev) > 0) DAC960_Error(" /dev/rd/c%dd%dp%d: relative blocks %u..%u\n", Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, - DAC960_PartitionNumber(Command->BufferHeader->b_rdev), - Command->BufferHeader->b_rsector, - Command->BufferHeader->b_rsector + Command->BlockCount - 1); + DAC960_PartitionNumber(Command->BufferHeader->bi_dev), + Command->BufferHeader->bi_sector, + Command->BufferHeader->bi_sector + Command->BlockCount - 1); } @@ -3104,8 +3034,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, true); BufferHeader = NextBufferHeader; } @@ -3119,7 +3049,7 @@ else if ((CommandStatus == DAC960_V1_IrrecoverableDataError || CommandStatus == DAC960_V1_BadDataEncountered) && BufferHeader != NULL && - BufferHeader->b_reqnext != NULL) + BufferHeader->bi_next != NULL) { DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; @@ -3133,10 +3063,10 @@ Command->CommandType = DAC960_WriteRetryCommand; CommandMailbox->Type5.CommandOpcode = DAC960_V1_Write; } - Command->BlockCount = BufferHeader->b_size >> DAC960_BlockSizeBits; + Command->BlockCount = bio_size(BufferHeader) >> DAC960_BlockSizeBits; CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; CommandMailbox->Type5.BusAddress = - Virtual_to_Bus32(BufferHeader->b_data); + Virtual_to_Bus32(bio_data(BufferHeader)); DAC960_QueueCommand(Command); return; } @@ -3149,8 +3079,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, false); BufferHeader = NextBufferHeader; } @@ -3164,8 +3094,8 @@ else if (CommandType == DAC960_ReadRetryCommand || CommandType == DAC960_WriteRetryCommand) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; /* Perform completion processing for this single buffer. */ @@ -3182,14 +3112,14 @@ DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; Command->BlockNumber += - BufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(BufferHeader) >> DAC960_BlockSizeBits; Command->BlockCount = - NextBufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(NextBufferHeader) >> DAC960_BlockSizeBits; Command->BufferHeader = NextBufferHeader; CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber; CommandMailbox->Type5.BusAddress = - Virtual_to_Bus32(NextBufferHeader->b_data); + Virtual_to_Bus32(bio_data(NextBufferHeader)); DAC960_QueueCommand(Command); return; } @@ -3935,13 +3865,13 @@ Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, Command->BlockNumber, Command->BlockNumber + Command->BlockCount - 1); - if (DAC960_PartitionNumber(Command->BufferHeader->b_rdev) > 0) + if (DAC960_PartitionNumber(Command->BufferHeader->bi_dev) > 0) DAC960_Error(" /dev/rd/c%dd%dp%d: relative blocks %u..%u\n", Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, - DAC960_PartitionNumber(Command->BufferHeader->b_rdev), - Command->BufferHeader->b_rsector, - Command->BufferHeader->b_rsector + Command->BlockCount - 1); + DAC960_PartitionNumber(Command->BufferHeader->bi_dev), + Command->BufferHeader->bi_sector, + Command->BufferHeader->bi_sector + Command->BlockCount - 1); } @@ -4210,8 +4140,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, true); BufferHeader = NextBufferHeader; } @@ -4225,19 +4155,19 @@ else if (Command->V2.RequestSense.SenseKey == DAC960_SenseKey_MediumError && BufferHeader != NULL && - BufferHeader->b_reqnext != NULL) + BufferHeader->bi_next != NULL) { if (CommandType == DAC960_ReadCommand) Command->CommandType = DAC960_ReadRetryCommand; else Command->CommandType = DAC960_WriteRetryCommand; - Command->BlockCount = BufferHeader->b_size >> DAC960_BlockSizeBits; + Command->BlockCount = bio_size(BufferHeader) >> DAC960_BlockSizeBits; CommandMailbox->SCSI_10.CommandControlBits .AdditionalScatterGatherListMemory = false; CommandMailbox->SCSI_10.DataTransferSize = Command->BlockCount << DAC960_BlockSizeBits; CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0].SegmentDataPointer = - Virtual_to_Bus64(BufferHeader->b_data); + Virtual_to_Bus64(bio_data(BufferHeader)); CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0].SegmentByteCount = CommandMailbox->SCSI_10.DataTransferSize; @@ -4255,8 +4185,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, false); BufferHeader = NextBufferHeader; } @@ -4270,8 +4200,8 @@ else if (CommandType == DAC960_ReadRetryCommand || CommandType == DAC960_WriteRetryCommand) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; /* Perform completion processing for this single buffer. */ @@ -4286,16 +4216,16 @@ if (NextBufferHeader != NULL) { Command->BlockNumber += - BufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(BufferHeader) >> DAC960_BlockSizeBits; Command->BlockCount = - NextBufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(NextBufferHeader) >> DAC960_BlockSizeBits; Command->BufferHeader = NextBufferHeader; CommandMailbox->SCSI_10.DataTransferSize = Command->BlockCount << DAC960_BlockSizeBits; CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0] .SegmentDataPointer = - Virtual_to_Bus64(NextBufferHeader->b_data); + Virtual_to_Bus64(bio_data(NextBufferHeader)); CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0] .SegmentByteCount = @@ -5416,7 +5346,8 @@ int LogicalDriveNumber = DAC960_LogicalDriveNumber(Inode->i_rdev); DiskGeometry_T Geometry, *UserGeometry; DAC960_Controller_T *Controller; - int PartitionNumber; + int res; + if (File != NULL && (File->f_flags & O_NONBLOCK)) return DAC960_UserIOCTL(Inode, File, Request, Argument); if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1) @@ -5465,61 +5396,27 @@ LogicalDeviceInfo->ConfigurableDeviceSize / (Geometry.heads * Geometry.sectors); } - Geometry.start = - Controller->GenericDiskInfo.part[MINOR(Inode->i_rdev)].start_sect; + Geometry.start = get_start_sect(Inode->i_rdev); return (copy_to_user(UserGeometry, &Geometry, sizeof(DiskGeometry_T)) ? -EFAULT : 0); case BLKGETSIZE: - /* Get Device Size. */ - if ((unsigned long *) Argument == NULL) return -EINVAL; - return put_user(Controller->GenericDiskInfo.part[MINOR(Inode->i_rdev)] - .nr_sects, - (unsigned long *) Argument); case BLKGETSIZE64: - if ((u64 *) Argument == NULL) return -EINVAL; - return put_user((u64) Controller->GenericDiskInfo - .part[MINOR(Inode->i_rdev)] - .nr_sects << 9, - (u64 *) Argument); case BLKRAGET: case BLKRASET: case BLKFLSBUF: case BLKBSZGET: case BLKBSZSET: return blk_ioctl(Inode->i_rdev, Request, Argument); + case BLKRRPART: /* Re-Read Partition Table. */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (Controller->LogicalDriveUsageCount[LogicalDriveNumber] > 1) return -EBUSY; - for (PartitionNumber = 0; - PartitionNumber < DAC960_MaxPartitions; - PartitionNumber++) - { - KernelDevice_T Device = DAC960_KernelDevice(ControllerNumber, - LogicalDriveNumber, - PartitionNumber); - int MinorNumber = DAC960_MinorNumber(LogicalDriveNumber, - PartitionNumber); - if (Controller->GenericDiskInfo.part[MinorNumber].nr_sects == 0) - continue; - /* - Flush all changes and invalidate buffered state. - */ - invalidate_device(Device, 1); - /* - Clear existing partition sizes. - */ - if (PartitionNumber > 0) - { - Controller->GenericDiskInfo.part[MinorNumber].start_sect = 0; - Controller->GenericDiskInfo.part[MinorNumber].nr_sects = 0; - } - /* - Reset the Block Size so that the partition table can be read. - */ - set_blocksize(Device, BLOCK_SIZE); - } + res = wipe_partitions(Inode->i_rdev); + if (res) /* nothing */ + return res; + DAC960_RegisterDisk(Controller, LogicalDriveNumber); return 0; } @@ -5641,11 +5538,11 @@ while (Controller->V1.DirectCommandActive[DCDB.Channel] [DCDB.TargetID]) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&Controller->RequestQueue->queue_lock); __wait_event(Controller->CommandWaitQueue, !Controller->V1.DirectCommandActive [DCDB.Channel][DCDB.TargetID]); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&Controller->RequestQueue->queue_lock); } Controller->V1.DirectCommandActive[DCDB.Channel] [DCDB.TargetID] = true; diff -urN linux-2.5.1-pre1/drivers/block/DAC960.h linux/drivers/block/DAC960.h --- linux-2.5.1-pre1/drivers/block/DAC960.h Wed Oct 17 14:46:29 2001 +++ linux/drivers/block/DAC960.h Sat Dec 1 00:37:05 2001 @@ -2191,7 +2191,7 @@ of the Linux Kernel and I/O Subsystem. */ -typedef struct buffer_head BufferHeader_T; +typedef struct bio BufferHeader_T; typedef struct file File_T; typedef struct block_device_operations BlockDeviceOperations_T; typedef struct completion Completion_T; @@ -2475,7 +2475,6 @@ DiskPartition_T DiskPartitions[DAC960_MinorCount]; int PartitionSizes[DAC960_MinorCount]; int BlockSizes[DAC960_MinorCount]; - int MaxSectorsPerRequest[DAC960_MinorCount]; unsigned char ProgressBuffer[DAC960_ProgressBufferSize]; unsigned char UserStatusBuffer[DAC960_UserMessageSize]; } @@ -2509,7 +2508,7 @@ void DAC960_AcquireControllerLock(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_lock_irqsave(&io_request_lock, *ProcessorFlags); + spin_lock_irqsave(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } @@ -2521,13 +2520,13 @@ void DAC960_ReleaseControllerLock(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags); + spin_unlock_irqrestore(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } /* DAC960_AcquireControllerLockRF acquires exclusive access to Controller, - but is only called from the request function with the io_request_lock held. + but is only called from the request function with the queue lock held. */ static inline @@ -2539,7 +2538,7 @@ /* DAC960_ReleaseControllerLockRF releases exclusive access to Controller, - but is only called from the request function with the io_request_lock held. + but is only called from the request function with the queue lock held. */ static inline @@ -2558,7 +2557,7 @@ void DAC960_AcquireControllerLockIH(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_lock_irqsave(&io_request_lock, *ProcessorFlags); + spin_lock_irqsave(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } @@ -2571,7 +2570,7 @@ void DAC960_ReleaseControllerLockIH(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags); + spin_unlock_irqrestore(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } diff -urN linux-2.5.1-pre1/drivers/block/Makefile linux/drivers/block/Makefile --- linux-2.5.1-pre1/drivers/block/Makefile Sun Sep 9 12:00:55 2001 +++ linux/drivers/block/Makefile Sat Dec 1 00:37:05 2001 @@ -10,9 +10,9 @@ O_TARGET := block.o -export-objs := ll_rw_blk.o blkpg.o loop.o DAC960.o genhd.o +export-objs := elevator.o ll_rw_blk.o blkpg.o loop.o DAC960.o genhd.o -obj-y := ll_rw_blk.o blkpg.o genhd.o elevator.o +obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o diff -urN linux-2.5.1-pre1/drivers/block/acsi.c linux/drivers/block/acsi.c --- linux-2.5.1-pre1/drivers/block/acsi.c Fri Nov 9 13:58:03 2001 +++ linux/drivers/block/acsi.c Sat Dec 1 00:37:05 2001 @@ -1011,7 +1011,6 @@ goto repeat; } - block += acsi_part[dev].start_sect; target = acsi_info[DEVICE_NR(dev)].target; lun = acsi_info[DEVICE_NR(dev)].lun; @@ -1123,7 +1122,7 @@ put_user( 64, &geo->heads ); put_user( 32, &geo->sectors ); put_user( acsi_info[dev].size >> 11, &geo->cylinders ); - put_user( acsi_part[MINOR(inode->i_rdev)].start_sect, &geo->start ); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; } @@ -1852,7 +1851,7 @@ { int device; struct gendisk * gdev; - int max_p, start, i; + int res; struct acsi_info_struct *aip; device = DEVICE_NR(MINOR(dev)); @@ -1867,16 +1866,7 @@ DEVICE_BUSY = 1; sti(); - max_p = gdev->max_p; - start = device << gdev->minor_shift; - - for( i = max_p - 1; i >= 0 ; i-- ) { - if (gdev->part[start + i].nr_sects != 0) { - invalidate_device(MKDEV(MAJOR_NR, start + i), 1); - gdev->part[start + i].nr_sects = 0; - } - gdev->part[start+i].start_sect = 0; - }; + res = wipe_partitions(dev); stdma_lock( NULL, NULL ); @@ -1891,12 +1881,13 @@ ENABLE_IRQ(); stdma_release(); - - grok_partitions(gdev, device, (aip->type==HARDDISK)?1<<4:1, aip->size); + + if (!res) + grok_partitions(dev, aip->size); DEVICE_BUSY = 0; wake_up(&busy_wait); - return 0; + return res; } diff -urN linux-2.5.1-pre1/drivers/block/amiflop.c linux/drivers/block/amiflop.c --- linux-2.5.1-pre1/drivers/block/amiflop.c Thu Oct 25 13:58:34 2001 +++ linux/drivers/block/amiflop.c Sat Dec 1 00:37:05 2001 @@ -1895,10 +1895,9 @@ free_irq(IRQ_AMIGA_DSKBLK, NULL); custom.dmacon = DMAF_DISK; /* disable DMA */ amiga_chip_free(raw_buf); - blk_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); release_mem_region(CUSTOM_PHYSADDR+0x20, 8); unregister_blkdev(MAJOR_NR, "fd"); + blk_clear(MAJOR_NR); } #endif diff -urN linux-2.5.1-pre1/drivers/block/blkpg.c linux/drivers/block/blkpg.c --- linux-2.5.1-pre1/drivers/block/blkpg.c Sun Nov 11 10:20:21 2001 +++ linux/drivers/block/blkpg.c Sat Dec 1 00:37:05 2001 @@ -63,7 +63,8 @@ * or has the same number as an existing one * 0: all OK. */ -int add_partition(kdev_t dev, struct blkpg_partition *p) { +int add_partition(kdev_t dev, struct blkpg_partition *p) +{ struct gendisk *g; long long ppstart, pplength; long pstart, plength; @@ -123,7 +124,8 @@ * * Note that the dev argument refers to the entire disk, not the partition. */ -int del_partition(kdev_t dev, struct blkpg_partition *p) { +int del_partition(kdev_t dev, struct blkpg_partition *p) +{ struct gendisk *g; kdev_t devp; int drive, first_minor, minor; @@ -195,9 +197,10 @@ int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) { + request_queue_t *q; struct gendisk *g; u64 ullval = 0; - int intval; + int intval, *iptr; if (!dev) return -EINVAL; @@ -226,8 +229,26 @@ return -EINVAL; return put_user(read_ahead[MAJOR(dev)], (long *) arg); + case BLKFRASET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (!(iptr = max_readahead[MAJOR(dev)])) + return -EINVAL; + iptr[MINOR(dev)] = arg; + return 0; + + case BLKFRAGET: + if (!(iptr = max_readahead[MAJOR(dev)])) + return -EINVAL; + return put_user(iptr[MINOR(dev)], (long *) arg); + + case BLKSECTGET: + if ((q = blk_get_queue(dev))) + return put_user(q->max_sectors, (unsigned short *)arg); + return -EINVAL; + case BLKFLSBUF: - if(!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EACCES; fsync_dev(dev); invalidate_buffers(dev); @@ -246,8 +267,7 @@ if (cmd == BLKGETSIZE) return put_user((unsigned long)ullval, (unsigned long *)arg); - else - return put_user(ullval, (u64 *)arg); + return put_user(ullval, (u64 *)arg); #if 0 case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) @@ -258,34 +278,38 @@ case BLKPG: return blkpg_ioctl(dev, (struct blkpg_ioctl_arg *) arg); + /* + * deprecated, use the /proc/iosched interface instead + */ case BLKELVGET: - return blkelvget_ioctl(&blk_get_queue(dev)->elevator, - (blkelv_ioctl_arg_t *) arg); case BLKELVSET: - return blkelvset_ioctl(&blk_get_queue(dev)->elevator, - (blkelv_ioctl_arg_t *) arg); + return -ENOTTY; + + case BLKHASHPROF: + case BLKHASHCLEAR: + return bio_ioctl(dev, cmd, arg); case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */ intval = BLOCK_SIZE; if (blksize_size[MAJOR(dev)]) intval = blksize_size[MAJOR(dev)][MINOR(dev)]; - return put_user (intval, (int *) arg); + return put_user(intval, (int *) arg); case BLKBSZSET: /* set the logical block size */ - if (!capable (CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (!dev || !arg) + if (!arg) return -EINVAL; - if (get_user (intval, (int *) arg)) + if (get_user(intval, (int *) arg)) return -EFAULT; if (intval > PAGE_SIZE || intval < 512 || (intval & (intval - 1))) return -EINVAL; - if (is_mounted (dev) || is_swap_partition (dev)) + if (is_mounted(dev) || is_swap_partition(dev)) return -EBUSY; - set_blocksize (dev, intval); + set_blocksize(dev, intval); return 0; default: diff -urN linux-2.5.1-pre1/drivers/block/cciss.c linux/drivers/block/cciss.c --- linux-2.5.1-pre1/drivers/block/cciss.c Fri Nov 9 14:28:46 2001 +++ linux/drivers/block/cciss.c Sat Dec 1 00:37:05 2001 @@ -84,7 +84,7 @@ #define MAX_CONFIG_WAIT 1000 #define READ_AHEAD 128 -#define NR_CMDS 128 /* #commands that can be outstanding */ +#define NR_CMDS 384 /* #commands that can be outstanding */ #define MAX_CTLR 8 #define CCISS_DMA_MASK 0xFFFFFFFF /* 32 bit DMA */ @@ -147,7 +147,6 @@ " IRQ: %d\n" " Logical drives: %d\n" " Current Q depth: %d\n" - " Current # commands on controller %d\n" " Max Q depth since init: %d\n" " Max # commands on controller since init: %d\n" " Max SG entries since init: %d\n\n", @@ -158,8 +157,7 @@ (unsigned long)h->vaddr, (unsigned int)h->intr, h->num_luns, - h->Qdepth, h->commands_outstanding, - h->maxQsinceinit, h->max_outstanding, h->maxSG); + h->Qdepth, h->maxQsinceinit, h->max_outstanding, h->maxSG); pos += size; len += size; for(i=0; inum_luns; i++) { @@ -237,7 +235,7 @@ i = find_first_zero_bit(h->cmd_pool_bits, NR_CMDS); if (i == NR_CMDS) return NULL; - } while(test_and_set_bit(i%32, h->cmd_pool_bits+(i/32)) != 0); + } while(test_and_set_bit(i & 31, h->cmd_pool_bits+(i/32)) != 0); #ifdef CCISS_DEBUG printk(KERN_DEBUG "cciss: using command buffer %d\n", i); #endif @@ -308,13 +306,10 @@ /* for each partition */ for(j=0; jblocksizes[(i<hardsizes[ (i<block_size; - } hba[ctlr]->gendisk.nr_real++; + (BLK_DEFAULT_QUEUE(MAJOR_NR + ctlr))->hardsect_size = drv->block_size; } } /* @@ -377,8 +372,6 @@ { int ctlr = MAJOR(inode->i_rdev) - MAJOR_NR; int dsk = MINOR(inode->i_rdev) >> NWD_SHIFT; - int diskinfo[4]; - struct hd_geometry *geo = (struct hd_geometry *)arg; #ifdef CCISS_DEBUG printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg); @@ -386,6 +379,10 @@ switch(cmd) { case HDIO_GETGEO: + { + struct hd_geometry *geo = (struct hd_geometry *)arg; + int diskinfo[4]; + if (hba[ctlr]->drv[dsk].cylinders) { diskinfo[0] = hba[ctlr]->drv[dsk].heads; diskinfo[1] = hba[ctlr]->drv[dsk].sectors; @@ -393,20 +390,18 @@ } else { diskinfo[0] = 0xff; diskinfo[1] = 0x3f; - diskinfo[2] = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f); } + diskinfo[2] = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f); + } put_user(diskinfo[0], &geo->heads); put_user(diskinfo[1], &geo->sectors); put_user(diskinfo[2], &geo->cylinders); - put_user(hba[ctlr]->hd[MINOR(inode->i_rdev)].start_sect, &geo->start); - return 0; - case BLKGETSIZE: - put_user(hba[ctlr]->hd[MINOR(inode->i_rdev)].nr_sects, (unsigned long *)arg); - return 0; - case BLKGETSIZE64: - put_user((u64)hba[ctlr]->hd[MINOR(inode->i_rdev)].nr_sects << 9, (u64*)arg); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; + } case BLKRRPART: return revalidate_logvol(inode->i_rdev, 1); + case BLKGETSIZE: + case BLKGETSIZE64: case BLKFLSBUF: case BLKBSZSET: case BLKBSZGET: @@ -415,9 +410,7 @@ case BLKRASET: case BLKRAGET: case BLKPG: - case BLKELVGET: - case BLKELVSET: - return( blk_ioctl(inode->i_rdev, cmd, arg)); + return blk_ioctl(inode->i_rdev, cmd, arg); case CCISS_GETPCIINFO: { cciss_pci_info_struct pciinfo; @@ -459,16 +452,7 @@ // printk("cciss_ioctl: delay and count cannot be 0\n"); return( -EINVAL); } - spin_lock_irqsave(&io_request_lock, flags); - /* Can only safely update if no commands outstanding */ - if (c->commands_outstanding > 0 ) - { -// printk("cciss_ioctl: cannot change coalasing " -// "%d commands outstanding on controller\n", -// c->commands_outstanding); - spin_unlock_irqrestore(&io_request_lock, flags); - return(-EINVAL); - } + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); /* Update the field, and then ring the doorbell */ writel( intinfo.delay, &(c->cfgtable->HostWrite.CoalIntDelay)); @@ -484,7 +468,7 @@ /* delay and try again */ udelay(1000); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); if (i >= MAX_CONFIG_WAIT) return( -EFAULT); return(0); @@ -515,7 +499,7 @@ if (copy_from_user(NodeName, (void *) arg, sizeof( NodeName_type))) return -EFAULT; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); /* Update the field, and then ring the doorbell */ for(i=0;i<16;i++) @@ -531,7 +515,7 @@ /* delay and try again */ udelay(1000); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); if (i >= MAX_CONFIG_WAIT) return( -EFAULT); return(0); @@ -658,11 +642,11 @@ c->SG[0].Ext = 0; // we are not chaining } /* Put the request on the tail of the request queue */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); addQ(&h->reqQ, c); h->Qdepth++; start_io(h); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); /* Wait for completion */ while(c->cmd_type != CMD_IOCTL_DONE) @@ -710,42 +694,32 @@ int ctlr, target; struct gendisk *gdev; unsigned long flags; - int max_p; - int start; - int i; + int res; target = MINOR(dev) >> NWD_SHIFT; ctlr = MAJOR(dev) - MAJOR_NR; gdev = &(hba[ctlr]->gendisk); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); if (hba[ctlr]->drv[target].usage_count > maxusage) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); printk(KERN_WARNING "cciss: Device busy for " "revalidation (usage=%d)\n", hba[ctlr]->drv[target].usage_count); return -EBUSY; } hba[ctlr]->drv[target].usage_count++; - spin_unlock_irqrestore(&io_request_lock, flags); - - max_p = gdev->max_p; - start = target << gdev->minor_shift; + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); - for(i=max_p-1; i>=0; i--) { - int minor = start+i; - invalidate_device(MKDEV(MAJOR_NR + ctlr, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; + res = wipe_partitions(dev); + if (res) + goto leave; - /* reset the blocksize so we can read the partition table */ - blksize_size[MAJOR_NR+ctlr][minor] = 1024; - } /* setup partitions per disk */ - grok_partitions(gdev, target, MAX_PART, - hba[ctlr]->drv[target].nr_blocks); + grok_partitions(dev, hba[ctlr]->drv[target].nr_blocks); +leave: hba[ctlr]->drv[target].usage_count--; - return 0; + return res; } static int frevalidate_logvol(kdev_t dev) @@ -776,15 +750,15 @@ if (MINOR(dev) != 0) return -ENXIO; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); if (hba[ctlr]->usage_count > 1) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); printk(KERN_WARNING "cciss: Device busy for volume" " revalidation (usage=%d)\n", hba[ctlr]->usage_count); return -EBUSY; } - spin_unlock_irqrestore(&io_request_lock, flags); hba[ctlr]->usage_count++; + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); /* * Set the partition and block size structures for all volumes @@ -793,7 +767,6 @@ memset(hba[ctlr]->hd, 0, sizeof(struct hd_struct) * 256); memset(hba[ctlr]->sizes, 0, sizeof(int) * 256); memset(hba[ctlr]->blocksizes, 0, sizeof(int) * 256); - memset(hba[ctlr]->hardsizes, 0, sizeof(int) * 256); memset(hba[ctlr]->drv, 0, sizeof(drive_info_struct) * CISS_MAX_LUN); hba[ctlr]->gendisk.nr_real = 0; @@ -1089,11 +1062,11 @@ while(( c = h->reqQ) != NULL ) { /* can't do anything if fifo is full */ - if ((h->access.fifo_full(h))) - { - printk(KERN_WARNING "cciss: fifo full \n"); - return; + if ((h->access.fifo_full(h))) { + printk(KERN_WARNING "cciss: fifo full\n"); + break; } + /* Get the frist entry from the Request Q */ removeQ(&(h->reqQ), c); h->Qdepth--; @@ -1106,18 +1079,18 @@ } } -static inline void complete_buffers( struct buffer_head *bh, int status) +static inline void complete_buffers(struct bio *bio, int status) { - struct buffer_head *xbh; - - while(bh) - { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, status); - bh = xbh; + while (bio) { + int nsecs = bio_sectors(bio); + + struct bio *xbh = bio->bi_next; + bio->bi_next = NULL; + blk_finished_io(nsecs); + bio_endio(bio, status, nsecs); + bio = xbh; } + } /* checks the status of the job and calls complete buffers to mark all * buffers for the completed job. @@ -1135,7 +1108,7 @@ { temp64.val32.lower = cmd->SG[i].Addr.lower; temp64.val32.upper = cmd->SG[i].Addr.upper; - pci_unmap_single(hba[cmd->ctlr]->pdev, + pci_unmap_page(hba[cmd->ctlr]->pdev, temp64.val, cmd->SG[i].Len, (cmd->Request.Type.Direction == XFER_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); @@ -1214,83 +1187,38 @@ status=0; } } - complete_buffers(cmd->rq->bh, status); + + complete_buffers(cmd->rq->bio, status); #ifdef CCISS_DEBUG printk("Done with %p\n", cmd->rq); #endif /* CCISS_DEBUG */ - end_that_request_last(cmd->rq); -} - - -static inline int cpq_new_segment(request_queue_t *q, struct request *rq, - int max_segments) -{ - if (rq->nr_segments < MAXSGENTRIES) { - rq->nr_segments++; - return 1; - } - return 0; -} -static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (bh->b_data + bh->b_size == rq->bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, - struct request *nxt, int max_segments) -{ - int total_segments = rq->nr_segments + nxt->nr_segments; - - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) - total_segments--; - - if (total_segments > MAXSGENTRIES) - return 0; - - rq->nr_segments = total_segments; - return 1; + end_that_request_last(cmd->rq); } /* * Get a request and submit it to the controller. - * Currently we do one request at a time. Ideally we would like to send - * everything to the controller on the first call, but there is a danger - * of holding the io_request_lock for to long. */ static void do_cciss_request(request_queue_t *q) { ctlr_info_t *h= q->queuedata; CommandList_struct *c; int log_unit, start_blk, seg; - char *lastdataend; - struct buffer_head *bh; struct list_head *queue_head = &q->queue_head; struct request *creq; u64bit temp64; - struct my_sg tmp_sg[MAXSGENTRIES]; - int i; + struct scatterlist tmp_sg[MAXSGENTRIES]; + int i, dir; - if (q->plugged) + if (blk_queue_plugged(q)) goto startio; -queue_next: +queue: if (list_empty(queue_head)) goto startio; - creq = blkdev_entry_next_request(queue_head); + creq = elv_next_request(q); if (creq->nr_segments > MAXSGENTRIES) BUG(); @@ -1299,7 +1227,7 @@ printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq->bio, 0); end_that_request_last(creq); goto startio; } @@ -1309,10 +1237,9 @@ blkdev_dequeue_request(creq); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); - c->cmd_type = CMD_RWREQ; - bh = creq->bh; + c->cmd_type = CMD_RWREQ; c->rq = creq; /* fill in the request */ @@ -1328,41 +1255,26 @@ (creq->cmd == READ) ? XFER_READ: XFER_WRITE; c->Request.Timeout = 0; // Don't time out c->Request.CDB[0] = (creq->cmd == READ) ? CCISS_READ : CCISS_WRITE; - start_blk = hba[h->ctlr]->hd[MINOR(creq->rq_dev)].start_sect + creq->sector; + start_blk = creq->sector; #ifdef CCISS_DEBUG - if (bh == NULL) - panic("cciss: bh== NULL?"); printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",(int) creq->sector, (int) creq->nr_sectors); #endif /* CCISS_DEBUG */ - seg = 0; - lastdataend = NULL; - while(bh) - { - if (bh->b_data == lastdataend) - { // tack it on to the last segment - tmp_sg[seg-1].len +=bh->b_size; - lastdataend += bh->b_size; - } else - { - if (seg == MAXSGENTRIES) - BUG(); - tmp_sg[seg].len = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; - seg++; - } - bh = bh->b_reqnext; - } + + seg = blk_rq_map_sg(q, creq, tmp_sg); + /* get the DMA records for the setup */ + if (c->Request.Type.Direction == XFER_READ) + dir = PCI_DMA_FROMDEVICE; + else + dir = PCI_DMA_TODEVICE; + for (i=0; iSG[i].Len = tmp_sg[i].len; - temp64.val = (__u64) pci_map_single( h->pdev, - tmp_sg[i].start_addr, - tmp_sg[i].len, - (c->Request.Type.Direction == XFER_READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + c->SG[i].Len = tmp_sg[i].length; + temp64.val = (__u64) pci_map_page(h->pdev, tmp_sg[i].page, + tmp_sg[i].offset, tmp_sg[i].length, + dir); c->SG[i].Addr.lower = temp64.val32.lower; c->SG[i].Addr.upper = temp64.val32.upper; c->SG[i].Ext = 0; // we are not chaining @@ -1386,14 +1298,14 @@ c->Request.CDB[8]= creq->nr_sectors & 0xff; c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0; - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); addQ(&(h->reqQ),c); h->Qdepth++; if(h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; - goto queue_next; + goto queue; startio: start_io(h); } @@ -1414,7 +1326,7 @@ * If there are completed commands in the completion queue, * we had better do something about it. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); while( h->access.intr_pending(h)) { while((a = h->access.command_completed(h)) != FIFO_EMPTY) @@ -1447,11 +1359,12 @@ } } } + /* * See if we can queue up some more IO */ do_cciss_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); } /* * We cannot read the structure directly, for portablity we must use @@ -1873,7 +1786,18 @@ sprintf(hba[i]->devname, "cciss%d", i); hba[i]->ctlr = i; hba[i]->pdev = pdev; - + + /* configure PCI DMA stuff */ + if (!pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff)) + printk("cciss: using DAC cycles\n"); + else if (!pci_set_dma_mask(pdev, 0xffffffff)) + printk("cciss: not using DAC cycles\n"); + else { + printk("cciss: no suitable DMA available\n"); + free_hba(i); + return -ENODEV; + } + if( register_blkdev(MAJOR_NR+i, hba[i]->devname, &cciss_fops)) { printk(KERN_ERR "cciss: Unable to get major number " @@ -1942,20 +1866,16 @@ q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); q->queuedata = hba[i]; - blk_init_queue(q, do_cciss_request); + blk_init_queue(q, do_cciss_request, hba[i]->devname); blk_queue_headactive(q, 0); + blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask); + q->max_segments = MAXSGENTRIES; + blk_queue_max_sectors(q, 512); /* fill in the other Kernel structs */ blksize_size[MAJOR_NR+i] = hba[i]->blocksizes; - hardsect_size[MAJOR_NR+i] = hba[i]->hardsizes; read_ahead[MAJOR_NR+i] = READ_AHEAD; - /* Set the pointers to queue functions */ - q->back_merge_fn = cpq_back_merge_fn; - q->front_merge_fn = cpq_front_merge_fn; - q->merge_requests_fn = cpq_merge_requests_fn; - - /* Fill in the gendisk data */ hba[i]->gendisk.major = MAJOR_NR + i; hba[i]->gendisk.major_name = "cciss"; @@ -2004,12 +1924,11 @@ unregister_blkdev(MAJOR_NR+i, hba[i]->devname); remove_proc_entry(hba[i]->devname, proc_cciss); - /* remove it from the disk list */ del_gendisk(&(hba[i]->gendisk)); - pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), - hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); + pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), + hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof( ErrorInfo_struct), hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle); kfree(hba[i]->cmd_pool_bits); @@ -2017,32 +1936,31 @@ } static struct pci_driver cciss_pci_driver = { - name: "cciss", - probe: cciss_init_one, - remove: cciss_remove_one, - id_table: cciss_pci_device_id, /* id_table */ + name: "cciss", + probe: cciss_init_one, + remove: cciss_remove_one, + id_table: cciss_pci_device_id, /* id_table */ }; /* -* This is it. Register the PCI driver information for the cards we control -* the OS will call our registered routines when it finds one of our cards. -*/ + * This is it. Register the PCI driver information for the cards we control + * the OS will call our registered routines when it finds one of our cards. + */ int __init cciss_init(void) { - printk(KERN_INFO DRIVER_NAME "\n"); + /* Register for out PCI devices */ if (pci_register_driver(&cciss_pci_driver) > 0 ) return 0; else return -ENODEV; - } +} EXPORT_NO_SYMBOLS; static int __init init_cciss_module(void) { - return ( cciss_init()); } diff -urN linux-2.5.1-pre1/drivers/block/cciss.h linux/drivers/block/cciss.h --- linux-2.5.1-pre1/drivers/block/cciss.h Tue May 22 10:23:16 2001 +++ linux/drivers/block/cciss.h Sat Dec 1 00:37:05 2001 @@ -15,11 +15,6 @@ #define MAJOR_NR COMPAQ_CISS_MAJOR -struct my_sg { - int len; - char *start_addr; -}; - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; @@ -85,9 +80,8 @@ struct gendisk gendisk; // indexed by minor numbers struct hd_struct hd[256]; - int sizes[256]; + int sizes[256]; int blocksizes[256]; - int hardsizes[256]; }; /* Defining the diffent access_menthods */ @@ -247,5 +241,8 @@ char *product_name; struct access_method *access; }; + +#define CCISS_LOCK(i) (&((BLK_DEFAULT_QUEUE(MAJOR_NR + i))->queue_lock)) + #endif /* CCISS_H */ diff -urN linux-2.5.1-pre1/drivers/block/cciss_cmd.h linux/drivers/block/cciss_cmd.h --- linux-2.5.1-pre1/drivers/block/cciss_cmd.h Fri Nov 2 17:45:42 2001 +++ linux/drivers/block/cciss_cmd.h Sat Dec 1 00:37:05 2001 @@ -7,7 +7,7 @@ //general boundary defintions #define SENSEINFOBYTES 32//note that this value may vary between host implementations -#define MAXSGENTRIES 31 +#define MAXSGENTRIES 32 #define MAXREPLYQS 256 //Command Status value diff -urN linux-2.5.1-pre1/drivers/block/cpqarray.c linux/drivers/block/cpqarray.c --- linux-2.5.1-pre1/drivers/block/cpqarray.c Fri Nov 9 14:28:46 2001 +++ linux/drivers/block/cpqarray.c Sat Dec 1 00:37:05 2001 @@ -100,7 +100,6 @@ static struct hd_struct * ida; static int * ida_sizes; static int * ida_blocksizes; -static int * ida_hardsizes; static struct gendisk ida_gendisk[MAX_CTLR]; static struct proc_dir_entry *proc_array; @@ -145,7 +144,7 @@ static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c); static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c); -static inline void complete_buffers(struct buffer_head *bh, int ok); +static inline void complete_buffers(struct bio *bio, int ok); static inline void complete_command(cmdlist_t *cmd, int timeout); static void do_ida_intr(int irq, void *dev_id, struct pt_regs * regs); @@ -176,12 +175,11 @@ ida_sizes[(ctlr<nr_blks; - for(j=0; j<16; j++) { + for(j=0; j<16; j++) ida_blocksizes[(ctlr<blk_size; - } + + (BLK_DEFAULT_QUEUE(MAJOR_NR + ctlr))->hardsect_size = drv->blk_size; ida_gendisk[ctlr].nr_real++; } @@ -341,52 +339,10 @@ remove_proc_entry("cpqarray", proc_root_driver); kfree(ida); kfree(ida_sizes); - kfree(ida_hardsizes); kfree(ida_blocksizes); } #endif /* MODULE */ -static inline int cpq_new_segment(request_queue_t *q, struct request *rq, - int max_segments) -{ - if (rq->nr_segments < SG_MAX) { - rq->nr_segments++; - return 1; - } - return 0; -} - -static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (bh->b_data + bh->b_size == rq->bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, - struct request *nxt, int max_segments) -{ - int total_segments = rq->nr_segments + nxt->nr_segments; - - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) - total_segments--; - - if (total_segments > SG_MAX) - return 0; - - rq->nr_segments = total_segments; - return 1; -} - /* * This is it. Find all the controllers and register them. I really hate * stealing all these major device numbers. @@ -433,20 +389,9 @@ return(num_cntlrs_reg); } - ida_hardsizes = kmalloc(sizeof(int)*nr_ctlr*NWD*16, GFP_KERNEL); - if(ida_hardsizes==NULL) - { - kfree(ida); - kfree(ida_sizes); - kfree(ida_blocksizes); - printk( KERN_ERR "cpqarray: out of memory"); - return(num_cntlrs_reg); - } - memset(ida, 0, sizeof(struct hd_struct)*nr_ctlr*NWD*16); memset(ida_sizes, 0, sizeof(int)*nr_ctlr*NWD*16); memset(ida_blocksizes, 0, sizeof(int)*nr_ctlr*NWD*16); - memset(ida_hardsizes, 0, sizeof(int)*nr_ctlr*NWD*16); memset(ida_gendisk, 0, sizeof(struct gendisk)*MAX_CTLR); /* @@ -504,7 +449,6 @@ { kfree(ida); kfree(ida_sizes); - kfree(ida_hardsizes); kfree(ida_blocksizes); } return(num_cntlrs_reg); @@ -523,16 +467,13 @@ q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); q->queuedata = hba[i]; - blk_init_queue(q, do_ida_request); + blk_init_queue(q, do_ida_request, hba[i]->devname); blk_queue_headactive(q, 0); + blk_queue_bounce_limit(q, hba[i]->pci_dev->dma_mask); + q->max_segments = SG_MAX; blksize_size[MAJOR_NR+i] = ida_blocksizes + (i*256); - hardsect_size[MAJOR_NR+i] = ida_hardsizes + (i*256); read_ahead[MAJOR_NR+i] = READ_AHEAD; - q->back_merge_fn = cpq_back_merge_fn; - q->front_merge_fn = cpq_front_merge_fn; - q->merge_requests_fn = cpq_merge_requests_fn; - ida_gendisk[i].major = MAJOR_NR + i; ida_gendisk[i].major_name = "ida"; ida_gendisk[i].minor_shift = NWD_SHIFT; @@ -911,21 +852,19 @@ { ctlr_info_t *h = q->queuedata; cmdlist_t *c; - char *lastdataend; struct list_head * queue_head = &q->queue_head; - struct buffer_head *bh; struct request *creq; - struct my_sg tmp_sg[SG_MAX]; - int i, seg; + struct scatterlist tmp_sg[SG_MAX]; + int i, dir, seg; - if (q->plugged) + if (blk_queue_plugged(q)) goto startio; queue_next: if (list_empty(queue_head)) goto startio; - creq = blkdev_entry_next_request(queue_head); + creq = elv_next_request(q); if (creq->nr_segments > SG_MAX) BUG(); @@ -934,7 +873,7 @@ printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq->bio, 0); end_that_request_last(creq); goto startio; } @@ -944,55 +883,40 @@ blkdev_dequeue_request(creq); - spin_unlock_irq(&io_request_lock); - - bh = creq->bh; + spin_unlock_irq(&q->queue_lock); c->ctlr = h->ctlr; c->hdr.unit = MINOR(creq->rq_dev) >> NWD_SHIFT; c->hdr.size = sizeof(rblk_t) >> 2; c->size += sizeof(rblk_t); - c->req.hdr.blk = ida[(h->ctlr<rq_dev)].start_sect + creq->sector; + c->req.hdr.blk = creq->sector; c->rq = creq; DBGPX( - if (bh == NULL) - panic("bh == NULL?"); - printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors); ); - seg = 0; lastdataend = NULL; - while(bh) { - if (bh->b_data == lastdataend) { - tmp_sg[seg-1].size += bh->b_size; - lastdataend += bh->b_size; - } else { - if (seg == SG_MAX) - BUG(); - tmp_sg[seg].size = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; - seg++; - } - bh = bh->b_reqnext; - } + seg = blk_rq_map_sg(q, creq, tmp_sg); + /* Now do all the DMA Mappings */ + if (creq->cmd == READ) + dir = PCI_DMA_FROMDEVICE; + else + dir = PCI_DMA_TODEVICE; for( i=0; i < seg; i++) { - c->req.sg[i].size = tmp_sg[i].size; - c->req.sg[i].addr = (__u32) pci_map_single( - h->pci_dev, tmp_sg[i].start_addr, - tmp_sg[i].size, - (creq->cmd == READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + c->req.sg[i].size = tmp_sg[i].length; + c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev, + tmp_sg[i].page, + tmp_sg[i].offset, + tmp_sg[i].length, dir); } -DBGPX( printk("Submitting %d sectors in %d segments\n", sect, seg); ); +DBGPX( printk("Submitting %d sectors in %d segments\n", creq->nr_sectors, seg); ); c->req.hdr.sg_cnt = seg; c->req.hdr.blk_cnt = creq->nr_sectors; c->req.hdr.cmd = (creq->cmd == READ) ? IDA_READ : IDA_WRITE; c->type = CMD_RWREQ; - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); /* Put the request on the tail of the request queue */ addQ(&h->reqQ, c); @@ -1033,17 +957,19 @@ } } -static inline void complete_buffers(struct buffer_head *bh, int ok) +static inline void complete_buffers(struct bio *bio, int ok) { - struct buffer_head *xbh; - while(bh) { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; + struct bio *xbh; + while(bio) { + int nsecs = bio_sectors(bio); + + xbh = bio->bi_next; + bio->bi_next = NULL; - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, ok); + blk_finished_io(nsecs); + bio_endio(bio, ok, nsecs); - bh = xbh; + bio = xbh; } } /* @@ -1052,7 +978,7 @@ static inline void complete_command(cmdlist_t *cmd, int timeout) { int ok=1; - int i; + int i, ddir; if (cmd->req.hdr.rcode & RCODE_NONFATAL && (hba[cmd->ctlr]->misc_tflags & MISC_NONFATAL_WARN) == 0) { @@ -1074,19 +1000,18 @@ } if (timeout) ok = 0; /* unmap the DMA mapping for all the scatter gather elements */ + if (cmd->req.hdr.cmd == IDA_READ) + ddir = PCI_DMA_FROMDEVICE; + else + ddir = PCI_DMA_TODEVICE; for(i=0; ireq.hdr.sg_cnt; i++) - { - pci_unmap_single(hba[cmd->ctlr]->pci_dev, - cmd->req.sg[i].addr, cmd->req.sg[i].size, - (cmd->req.hdr.cmd == IDA_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); - } + pci_unmap_page(hba[cmd->ctlr]->pci_dev, cmd->req.sg[i].addr, + cmd->req.sg[i].size, ddir); - complete_buffers(cmd->rq->bh, ok); + complete_buffers(cmd->rq->bio, ok); - DBGPX(printk("Done with %p\n", cmd->rq);); + DBGPX(printk("Done with %p\n", cmd->rq);); end_that_request_last(cmd->rq); - - } /* @@ -1111,7 +1036,7 @@ * If there are completed commands in the completion queue, * we had better do something about it. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); if (istat & FIFO_NOT_EMPTY) { while((a = h->access.command_completed(h))) { a1 = a; a &= ~3; @@ -1155,7 +1080,7 @@ * See if we can queue up some more IO */ do_ida_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); } /* @@ -1201,14 +1126,10 @@ put_user(diskinfo[0], &geo->heads); put_user(diskinfo[1], &geo->sectors); put_user(diskinfo[2], &geo->cylinders); - put_user(ida[(ctlr<i_rdev)].start_sect, &geo->start); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; case IDAGETDRVINFO: return copy_to_user(&io->c.drv,&hba[ctlr]->drv[dsk],sizeof(drv_info_t)); - case BLKGETSIZE: - return put_user(ida[(ctlr<i_rdev)].nr_sects, (unsigned long *)arg); - case BLKGETSIZE64: - return put_user((u64)(ida[(ctlr<i_rdev)].nr_sects) << 9, (u64*)arg); case BLKRRPART: return revalidate_logvol(inode->i_rdev, 1); case IDAPASSTHRU: @@ -1244,6 +1165,8 @@ return(0); } + case BLKGETSIZE: + case BLKGETSIZE64: case BLKFLSBUF: case BLKBSZSET: case BLKBSZGET: @@ -1251,8 +1174,6 @@ case BLKROGET: case BLKRASET: case BLKRAGET: - case BLKELVGET: - case BLKELVSET: case BLKPG: return blk_ioctl(inode->i_rdev, cmd, arg); @@ -1352,11 +1273,11 @@ } /* Put the request on the tail of the request queue */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); addQ(&h->reqQ, c); h->Qdepth++; start_io(h); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); /* Wait for completion */ while(c->type != CMD_IOCTL_DONE) @@ -1570,15 +1491,15 @@ if (MINOR(dev) != 0) return -ENXIO; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); if (hba[ctlr]->usage_count > 1) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); printk(KERN_WARNING "cpqarray: Device busy for volume" " revalidation (usage=%d)\n", hba[ctlr]->usage_count); return -EBUSY; } - spin_unlock_irqrestore(&io_request_lock, flags); hba[ctlr]->usage_count++; + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); /* * Set the partition and block size structures for all volumes @@ -1587,7 +1508,6 @@ memset(ida+(ctlr*256), 0, sizeof(struct hd_struct)*NWD*16); memset(ida_sizes+(ctlr*256), 0, sizeof(int)*NWD*16); memset(ida_blocksizes+(ctlr*256), 0, sizeof(int)*NWD*16); - memset(ida_hardsizes+(ctlr*256), 0, sizeof(int)*NWD*16); memset(hba[ctlr]->drv, 0, sizeof(drv_info_t)*NWD); ida_gendisk[ctlr].nr_real = 0; @@ -1615,17 +1535,15 @@ int ctlr, target; struct gendisk *gdev; unsigned long flags; - int max_p; - int start; - int i; + int res; target = DEVICE_NR(dev); ctlr = MAJOR(dev) - MAJOR_NR; gdev = &ida_gendisk[ctlr]; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); if (hba[ctlr]->drv[target].usage_count > maxusage) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); printk(KERN_WARNING "cpqarray: Device busy for " "revalidation (usage=%d)\n", hba[ctlr]->drv[target].usage_count); @@ -1633,25 +1551,14 @@ } hba[ctlr]->drv[target].usage_count++; - spin_unlock_irqrestore(&io_request_lock, flags); - - max_p = gdev->max_p; - start = target << gdev->minor_shift; + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); - for(i=max_p-1; i>=0; i--) { - int minor = start+i; - invalidate_device(MKDEV(MAJOR_NR + ctlr, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; + res = wipe_partitions(dev); + if (!res) + grok_partitions(dev, hba[ctlr]->drv[target].nr_blks); - /* reset the blocksize so we can read the partition table */ - blksize_size[MAJOR_NR+ctlr][minor] = 1024; - } - - /* 16 minors per disk... */ - grok_partitions(gdev, target, 16, hba[ctlr]->drv[target].nr_blks); hba[ctlr]->drv[target].usage_count--; - return 0; + return res; } diff -urN linux-2.5.1-pre1/drivers/block/cpqarray.h linux/drivers/block/cpqarray.h --- linux-2.5.1-pre1/drivers/block/cpqarray.h Tue May 22 10:23:16 2001 +++ linux/drivers/block/cpqarray.h Sat Dec 1 00:37:05 2001 @@ -56,11 +56,6 @@ #ifdef __KERNEL__ -struct my_sg { - int size; - char *start_addr; -}; - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; @@ -121,6 +116,9 @@ struct timer_list timer; unsigned int misc_tflags; }; + +#define IDA_LOCK(i) (&((BLK_DEFAULT_QUEUE(MAJOR_NR + i))->queue_lock)) + #endif #endif /* CPQARRAY_H */ diff -urN linux-2.5.1-pre1/drivers/block/elevator.c linux/drivers/block/elevator.c --- linux-2.5.1-pre1/drivers/block/elevator.c Thu Jul 19 20:59:41 2001 +++ linux/drivers/block/elevator.c Sat Dec 1 00:37:05 2001 @@ -18,48 +18,65 @@ * Removed tests for max-bomb-segments, which was breaking elvtune * when run without -bN * + * Jens: + * - Rework again to work with bio instead of buffer_heads + * - added merge by hash-lookup + * - loose bi_dev comparisons, partition handling is right now + * - completely modularize elevator setup and teardown + * */ - +#include #include #include #include #include +#include #include +#include +#include +#include + #include /* - * This is a bit tricky. It's given that bh and rq are for the same + * This is a bit tricky. It's given that bio and rq are for the same * device, but the next request might of course not be. Run through * the tests below to check if we want to insert here if we can't merge - * bh into an existing request + * bio into an existing request */ -inline int bh_rq_in_between(struct buffer_head *bh, struct request *rq, - struct list_head *head) +inline int bio_rq_in_between(struct bio *bio, struct request *rq, + struct list_head *head) { struct list_head *next; struct request *next_rq; - next = rq->queue.next; + /* + * if .next is a valid request + */ + next = rq->queuelist.next; if (next == head) return 0; + next_rq = list_entry(next, struct request, queuelist); + + BUG_ON(!next_rq->inactive); + /* - * if the device is different (usually on a different partition), - * just check if bh is after rq + * if the device is different (not a normal case) just check if + * bio is after rq */ - next_rq = blkdev_entry_to_request(next); if (next_rq->rq_dev != rq->rq_dev) - return bh->b_rsector > rq->sector; + return bio->bi_sector > rq->sector; /* - * ok, rq, next_rq and bh are on the same device. if bh is in between + * ok, rq, next_rq and bio are on the same device. if bio is in between * the two, this is the sweet spot */ - if (bh->b_rsector < next_rq->sector && bh->b_rsector > rq->sector) + if (bio->bi_sector < next_rq->sector && bio->bi_sector > rq->sector) return 1; /* - * next_rq is ordered wrt rq, but bh is not in between the two + * next_rq is ordered wrt rq, but bio is not in between the two */ if (next_rq->sector > rq->sector) return 0; @@ -68,66 +85,139 @@ * next_rq and rq not ordered, if we happen to be either before * next_rq or after rq insert here anyway */ - if (bh->b_rsector > rq->sector || bh->b_rsector < next_rq->sector) + if (bio->bi_sector > rq->sector || bio->bi_sector < next_rq->sector) return 1; return 0; } +/* + * can we safely merge with this request? + */ +inline int elv_rq_merge_ok(request_queue_t *q, struct request *rq, + struct bio *bio) +{ + if (bio_data_dir(bio) == rq->cmd) { + if (rq->rq_dev == bio->bi_dev && !rq->waiting + && !rq->special && rq->inactive && rq->q == q) + return 1; + } + + return 0; +} + +/* + * find a struct request that has a bio linked that we can merge with + */ +inline struct request *bio_get_hash_rq(kdev_t dev, sector_t sector, int vc) +{ + struct bio *bio = bio_hash_find(dev, sector, vc); + struct request *rq = NULL; + + /* + * bio is pinned until we bio_put it + */ + if (bio) { + rq = bio->bi_hash_desc; + + BUG_ON(!rq); + + bio_put(bio); + } + + return rq; +} int elevator_linus_merge(request_queue_t *q, struct request **req, - struct list_head * head, - struct buffer_head *bh, int rw, - int max_sectors) + struct list_head *head, struct bio *bio) { - struct list_head *entry = &q->queue_head; - unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE; + unsigned int count = bio_sectors(bio); + struct elv_linus_data *edat = q->elevator.elevator_data; + unsigned int vc = q->hash_valid_counter; + struct list_head *entry; + struct request *__rq; + /* + * first try a back merge, then front, then give up and scan. this + * will of course fail for different size bios on the same queue, + * however that isn't really an issue + */ + if (likely(edat->flags & ELV_LINUS_BACK_MERGE)) { + __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector - count, vc); + if (__rq) { + if (!elv_rq_merge_ok(q, __rq, bio)) + goto front; + + /* + * looks ok to merge + */ + if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { + *req = __rq; + return ELEVATOR_BACK_MERGE; + } + } + } + +front: + if (likely(edat->flags & ELV_LINUS_FRONT_MERGE)) { + __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector + count, vc); + if (__rq) { + if (!elv_rq_merge_ok(q, __rq, bio)) + goto scan; + + /* + * looks ok to merge + */ + if (__rq->sector - count == bio->bi_sector) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; + } + } + } + + /* + * no merge possible, scan for insertion + */ +scan: + entry = &q->queue_head; while ((entry = entry->prev) != head) { - struct request *__rq = blkdev_entry_to_request(entry); + __rq = list_entry_rq(entry); - /* - * simply "aging" of requests in queue - */ - if (__rq->elevator_sequence-- <= 0) - break; + prefetch(list_entry_rq(entry->prev)); - if (__rq->waiting) - continue; - if (__rq->rq_dev != bh->b_rdev) - continue; - if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head)) - *req = __rq; - if (__rq->cmd != rw) + if (unlikely(__rq->waiting || __rq->special)) continue; - if (__rq->nr_sectors + count > max_sectors) - continue; - if (__rq->elevator_sequence < count) + if (unlikely(!__rq->inactive)) break; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { - ret = ELEVATOR_BACK_MERGE; + if (!*req && bio_rq_in_between(bio, __rq, &q->queue_head)) *req = __rq; + + /* + * simple "aging" of requests in queue + */ + if (__rq->elevator_sequence-- <= 0) break; - } else if (__rq->sector - count == bh->b_rsector) { - ret = ELEVATOR_FRONT_MERGE; - __rq->elevator_sequence -= count; - *req = __rq; + else if (__rq->elevator_sequence < count) break; - } } - return ret; + return ELEVATOR_NO_MERGE; } void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count) { - struct list_head *entry = &req->queue, *head = &q->queue_head; + struct list_head *entry; + + BUG_ON(req->q != q); /* * second pass scan of requests that got passed over, if any */ - while ((entry = entry->next) != head) { - struct request *tmp = blkdev_entry_to_request(entry); + entry = &req->queuelist; + while ((entry = entry->next) != &q->queue_head) { + struct request *tmp; + prefetch(list_entry_rq(entry->next)); + tmp = list_entry_rq(entry); tmp->elevator_sequence -= count; } } @@ -138,85 +228,114 @@ req->elevator_sequence = next->elevator_sequence; } +void elv_add_request_fn(request_queue_t *q, struct request *rq, + struct list_head *insert_here) +{ + /* + * insert into queue pending list, merge hash, and possible latency + * list + */ + list_add(&rq->queuelist, insert_here); +} + +struct request *elv_next_request_fn(request_queue_t *q) +{ + if (!blk_queue_empty(q)) + return list_entry(q->queue_head.next, struct request, queuelist); + + return NULL; +} + +int elv_linus_init(request_queue_t *q, elevator_t *e) +{ + struct elv_linus_data *edata; + + edata = kmalloc(sizeof(struct elv_linus_data), GFP_ATOMIC); + if (!edata) + return -ENOMEM; + + /* + * default to doing both front and back merges + */ + edata->flags = ELV_LINUS_BACK_MERGE | ELV_LINUS_FRONT_MERGE; + e->elevator_data = edata; + return 0; +} + +void elv_linus_exit(request_queue_t *q, elevator_t *e) +{ + kfree(e->elevator_data); +} + /* * See if we can find a request that this buffer can be coalesced with. */ int elevator_noop_merge(request_queue_t *q, struct request **req, - struct list_head * head, - struct buffer_head *bh, int rw, - int max_sectors) + struct list_head * head, struct bio *bio) { - struct list_head *entry; - unsigned int count = bh->b_size >> 9; + struct request *__rq; + int count, ret; + unsigned int vc; + + count = bio_sectors(bio); + ret = ELEVATOR_NO_MERGE; + vc = q->hash_valid_counter; + + __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector - count, vc); + if (__rq) { + if (!elv_rq_merge_ok(q, __rq, bio)) + goto front; - if (list_empty(&q->queue_head)) - return ELEVATOR_NO_MERGE; + if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { + ret = ELEVATOR_BACK_MERGE; + *req = __rq; + goto out; + } + } - entry = &q->queue_head; - while ((entry = entry->prev) != head) { - struct request *__rq = blkdev_entry_to_request(entry); +front: + __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector + count, vc); + if (__rq) { + if (!elv_rq_merge_ok(q, __rq, bio)) + goto out; - if (__rq->cmd != rw) - continue; - if (__rq->rq_dev != bh->b_rdev) - continue; - if (__rq->nr_sectors + count > max_sectors) - continue; - if (__rq->waiting) - continue; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { - *req = __rq; - return ELEVATOR_BACK_MERGE; - } else if (__rq->sector - count == bh->b_rsector) { + if (__rq->sector - count == bio->bi_sector) { + ret = ELEVATOR_FRONT_MERGE; *req = __rq; - return ELEVATOR_FRONT_MERGE; + goto out; } } - *req = blkdev_entry_to_request(q->queue_head.prev); - return ELEVATOR_NO_MERGE; +out: + return ret; } void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {} void elevator_noop_merge_req(struct request *req, struct request *next) {} -int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg) +int elevator_init(request_queue_t *q, elevator_t *e, elevator_t type,char *name) { - blkelv_ioctl_arg_t output; + *e = type; - output.queue_ID = elevator->queue_ID; - output.read_latency = elevator->read_latency; - output.write_latency = elevator->write_latency; - output.max_bomb_segments = 0; + INIT_LIST_HEAD(&q->queue_head); + strncpy(e->queue_name, name, 15); - if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t))) - return -EFAULT; + if (e->elevator_init_fn) + return e->elevator_init_fn(q, e); return 0; } -int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg) +void elevator_exit(request_queue_t *q, elevator_t *e) { - blkelv_ioctl_arg_t input; - - if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t))) - return -EFAULT; - - if (input.read_latency < 0) - return -EINVAL; - if (input.write_latency < 0) - return -EINVAL; - - elevator->read_latency = input.read_latency; - elevator->write_latency = input.write_latency; - return 0; + if (e->elevator_exit_fn) + e->elevator_exit_fn(q, e); } -void elevator_init(elevator_t * elevator, elevator_t type) +int elevator_global_init(void) { - static unsigned int queue_ID; - - *elevator = type; - elevator->queue_ID = queue_ID++; + return 0; } + +module_init(elevator_global_init); diff -urN linux-2.5.1-pre1/drivers/block/floppy.c linux/drivers/block/floppy.c --- linux-2.5.1-pre1/drivers/block/floppy.c Thu Oct 25 13:58:34 2001 +++ linux/drivers/block/floppy.c Sat Dec 1 00:37:05 2001 @@ -576,7 +576,7 @@ static struct floppy_struct *_floppy = floppy_type; static unsigned char current_drive; static long current_count_sectors; -static unsigned char sector_t; /* sector in track */ +static unsigned char fsector_t; /* sector in track */ static unsigned char in_sector_offset; /* offset within physical sector, * expressed in units of 512 bytes */ @@ -2276,8 +2276,8 @@ * logical buffer */ static void request_done(int uptodate) { - int block; unsigned long flags; + int block; probing = 0; reschedule_timeout(MAXTIMEOUT, "request done %d", uptodate); @@ -2296,7 +2296,7 @@ DRS->maxtrack = 1; /* unlock chained buffers */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&QUEUE->queue_lock, flags); while (current_count_sectors && !QUEUE_EMPTY && current_count_sectors >= CURRENT->current_nr_sectors){ current_count_sectors -= CURRENT->current_nr_sectors; @@ -2304,7 +2304,7 @@ CURRENT->sector += CURRENT->current_nr_sectors; end_request(1); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&QUEUE->queue_lock, flags); if (current_count_sectors && !QUEUE_EMPTY){ /* "unlock" last subsector */ @@ -2329,9 +2329,9 @@ DRWE->last_error_sector = CURRENT->sector; DRWE->last_error_generation = DRS->generation; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&QUEUE->queue_lock, flags); end_request(0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&QUEUE->queue_lock, flags); } } @@ -2377,7 +2377,7 @@ printk("rt=%d t=%d\n", R_TRACK, TRACK); printk("heads=%d eoc=%d\n", heads, eoc); printk("spt=%d st=%d ss=%d\n", SECT_PER_TRACK, - sector_t, ssize); + fsector_t, ssize); printk("in_sector_offset=%d\n", in_sector_offset); } #endif @@ -2424,7 +2424,7 @@ } else if (CT(COMMAND) == FD_READ){ buffer_track = raw_cmd->track; buffer_drive = current_drive; - INFBOUND(buffer_max, nr_sectors + sector_t); + INFBOUND(buffer_max, nr_sectors + fsector_t); } cont->redo(); } @@ -2432,19 +2432,19 @@ /* Compute maximal contiguous buffer size. */ static int buffer_chain_size(void) { - struct buffer_head *bh; + struct bio *bio; int size; char *base; base = CURRENT->buffer; size = CURRENT->current_nr_sectors << 9; - bh = CURRENT->bh; + bio = CURRENT->bio; - if (bh){ - bh = bh->b_reqnext; - while (bh && bh->b_data == base + size){ - size += bh->b_size; - bh = bh->b_reqnext; + if (bio){ + bio = bio->bi_next; + while (bio && bio_data(bio) == base + size){ + size += bio_size(bio); + bio = bio->bi_next; } } return size >> 9; @@ -2453,13 +2453,13 @@ /* Compute the maximal transfer size */ static int transfer_size(int ssize, int max_sector, int max_size) { - SUPBOUND(max_sector, sector_t + max_size); + SUPBOUND(max_sector, fsector_t + max_size); /* alignment */ max_sector -= (max_sector % _floppy->sect) % ssize; /* transfer size, beginning not aligned */ - current_count_sectors = max_sector - sector_t ; + current_count_sectors = max_sector - fsector_t ; return max_sector; } @@ -2470,7 +2470,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) { int remaining; /* number of transferred 512-byte sectors */ - struct buffer_head *bh; + struct bio *bio; char *buffer, *dma_buffer; int size; @@ -2479,8 +2479,8 @@ CURRENT->nr_sectors); if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE && - buffer_max > sector_t + CURRENT->nr_sectors) - current_count_sectors = minimum(buffer_max - sector_t, + buffer_max > fsector_t + CURRENT->nr_sectors) + current_count_sectors = minimum(buffer_max - fsector_t, CURRENT->nr_sectors); remaining = current_count_sectors << 9; @@ -2491,7 +2491,7 @@ printk("current_count_sectors=%ld\n", current_count_sectors); printk("remaining=%d\n", remaining >> 9); printk("CURRENT->nr_sectors=%ld\n",CURRENT->nr_sectors); - printk("CURRENT->current_nr_sectors=%ld\n", + printk("CURRENT->current_nr_sectors=%u\n", CURRENT->current_nr_sectors); printk("max_sector=%d\n", max_sector); printk("ssize=%d\n", ssize); @@ -2500,9 +2500,9 @@ buffer_max = maximum(max_sector, buffer_max); - dma_buffer = floppy_track_buffer + ((sector_t - buffer_min) << 9); + dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9); - bh = CURRENT->bh; + bio = CURRENT->bio; size = CURRENT->current_nr_sectors << 9; buffer = CURRENT->buffer; @@ -2514,8 +2514,8 @@ dma_buffer < floppy_track_buffer){ DPRINT("buffer overrun in copy buffer %d\n", (int) ((floppy_track_buffer - dma_buffer) >>9)); - printk("sector_t=%d buffer_min=%d\n", - sector_t, buffer_min); + printk("fsector_t=%d buffer_min=%d\n", + fsector_t, buffer_min); printk("current_count_sectors=%ld\n", current_count_sectors); if (CT(COMMAND) == FD_READ) @@ -2536,15 +2536,15 @@ break; dma_buffer += size; - bh = bh->b_reqnext; + bio = bio->bi_next; #ifdef FLOPPY_SANITY_CHECK - if (!bh){ + if (!bio){ DPRINT("bh=null in copy buffer after copy\n"); break; } #endif - size = bh->b_size; - buffer = bh->b_data; + size = bio_size(bio); + buffer = bio_data(bio); } #ifdef FLOPPY_SANITY_CHECK if (remaining){ @@ -2636,7 +2636,7 @@ max_sector = _floppy->sect * _floppy->head; TRACK = CURRENT->sector / max_sector; - sector_t = CURRENT->sector % max_sector; + fsector_t = CURRENT->sector % max_sector; if (_floppy->track && TRACK >= _floppy->track) { if (CURRENT->current_nr_sectors & 1) { current_count_sectors = 1; @@ -2644,17 +2644,17 @@ } else return 0; } - HEAD = sector_t / _floppy->sect; + HEAD = fsector_t / _floppy->sect; if (((_floppy->stretch & FD_SWAPSIDES) || TESTF(FD_NEED_TWADDLE)) && - sector_t < _floppy->sect) + fsector_t < _floppy->sect) max_sector = _floppy->sect; /* 2M disks have phantom sectors on the first track */ if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)){ max_sector = 2 * _floppy->sect / 3; - if (sector_t >= max_sector){ - current_count_sectors = minimum(_floppy->sect - sector_t, + if (fsector_t >= max_sector){ + current_count_sectors = minimum(_floppy->sect - fsector_t, CURRENT->nr_sectors); return 1; } @@ -2676,7 +2676,7 @@ GAP = _floppy->gap; CODE2SIZE; SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; - SECTOR = ((sector_t % _floppy->sect) << 2 >> SIZECODE) + 1; + SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + 1; /* tracksize describes the size which can be filled up with sectors * of size ssize. @@ -2684,11 +2684,11 @@ tracksize = _floppy->sect - _floppy->sect % ssize; if (tracksize < _floppy->sect){ SECT_PER_TRACK ++; - if (tracksize <= sector_t % _floppy->sect) + if (tracksize <= fsector_t % _floppy->sect) SECTOR--; /* if we are beyond tracksize, fill up using smaller sectors */ - while (tracksize <= sector_t % _floppy->sect){ + while (tracksize <= fsector_t % _floppy->sect){ while(tracksize + ssize > _floppy->sect){ SIZECODE--; ssize >>= 1; @@ -2704,12 +2704,12 @@ max_sector = _floppy->sect; } - in_sector_offset = (sector_t % _floppy->sect) % ssize; - aligned_sector_t = sector_t - in_sector_offset; + in_sector_offset = (fsector_t % _floppy->sect) % ssize; + aligned_sector_t = fsector_t - in_sector_offset; max_size = CURRENT->nr_sectors; if ((raw_cmd->track == buffer_track) && (current_drive == buffer_drive) && - (sector_t >= buffer_min) && (sector_t < buffer_max)) { + (fsector_t >= buffer_min) && (fsector_t < buffer_max)) { /* data already in track buffer */ if (CT(COMMAND) == FD_READ) { copy_buffer(1, max_sector, buffer_max); @@ -2717,8 +2717,8 @@ } } else if (in_sector_offset || CURRENT->nr_sectors < ssize){ if (CT(COMMAND) == FD_WRITE){ - if (sector_t + CURRENT->nr_sectors > ssize && - sector_t + CURRENT->nr_sectors < ssize + ssize) + if (fsector_t + CURRENT->nr_sectors > ssize && + fsector_t + CURRENT->nr_sectors < ssize + ssize) max_size = ssize + ssize; else max_size = ssize; @@ -2731,7 +2731,7 @@ int direct, indirect; indirect= transfer_size(ssize,max_sector,max_buffer_sectors*2) - - sector_t; + fsector_t; /* * Do NOT use minimum() here---MAX_DMA_ADDRESS is 64 bits wide @@ -2746,7 +2746,7 @@ if (CROSS_64KB(CURRENT->buffer, max_size << 9)) max_size = (K_64 - ((unsigned long)CURRENT->buffer) % K_64)>>9; - direct = transfer_size(ssize,max_sector,max_size) - sector_t; + direct = transfer_size(ssize,max_sector,max_size) - fsector_t; /* * We try to read tracks, but if we get too many errors, we * go back to reading just one sector at a time. @@ -2765,8 +2765,8 @@ raw_cmd->length = current_count_sectors << 9; if (raw_cmd->length == 0){ DPRINT("zero dma transfer attempted from make_raw_request\n"); - DPRINT("indirect=%d direct=%d sector_t=%d", - indirect, direct, sector_t); + DPRINT("indirect=%d direct=%d fsector_t=%d", + indirect, direct, fsector_t); return 0; } /* check_dma_crossing(raw_cmd->kernel_data, @@ -2784,12 +2784,12 @@ /* claim buffer track if needed */ if (buffer_track != raw_cmd->track || /* bad track */ buffer_drive !=current_drive || /* bad drive */ - sector_t > buffer_max || - sector_t < buffer_min || + fsector_t > buffer_max || + fsector_t < buffer_min || ((CT(COMMAND) == FD_READ || (!in_sector_offset && CURRENT->nr_sectors >= ssize))&& max_sector > 2 * max_buffer_sectors + buffer_min && - max_size + sector_t > 2 * max_buffer_sectors + buffer_min) + max_size + fsector_t > 2 * max_buffer_sectors + buffer_min) /* not enough space */){ buffer_track = -1; buffer_drive = current_drive; @@ -2836,7 +2836,7 @@ floppy_track_buffer) >> 9), current_count_sectors); printk("st=%d ast=%d mse=%d msi=%d\n", - sector_t, aligned_sector_t, max_sector, max_size); + fsector_t, aligned_sector_t, max_sector, max_size); printk("ssize=%x SIZECODE=%d\n", ssize, SIZECODE); printk("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n", COMMAND, SECTOR, HEAD, TRACK); @@ -2854,8 +2854,8 @@ raw_cmd->kernel_data + raw_cmd->length > floppy_track_buffer + (max_buffer_sectors << 10)){ DPRINT("buffer overrun in schedule dma\n"); - printk("sector_t=%d buffer_min=%d current_count=%ld\n", - sector_t, buffer_min, + printk("fsector_t=%d buffer_min=%d current_count=%ld\n", + fsector_t, buffer_min, raw_cmd->length >> 9); printk("current_count_sectors=%ld\n", current_count_sectors); @@ -2908,8 +2908,6 @@ } if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) panic(DEVICE_NAME ": request list destroyed"); - if (CURRENT->bh && !buffer_locked(CURRENT->bh)) - panic(DEVICE_NAME ": block not locked"); device = CURRENT->rq_dev; set_fdc(DRIVE(device)); @@ -4172,7 +4170,7 @@ blk_size[MAJOR_NR] = floppy_sizes; blksize_size[MAJOR_NR] = floppy_blocksizes; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST, "floppy"); reschedule_timeout(MAXTIMEOUT, "floppy init", MAXTIMEOUT); config_types(); diff -urN linux-2.5.1-pre1/drivers/block/genhd.c linux/drivers/block/genhd.c --- linux-2.5.1-pre1/drivers/block/genhd.c Wed Oct 17 14:46:29 2001 +++ linux/drivers/block/genhd.c Sat Dec 1 00:37:05 2001 @@ -28,14 +28,8 @@ /* * Global kernel list of partitioning information. - * - * XXX: you should _never_ access this directly. - * the only reason this is exported is source compatiblity. */ -/*static*/ struct gendisk *gendisk_head; - -EXPORT_SYMBOL(gendisk_head); - +static struct gendisk *gendisk_head; /** * add_gendisk - add partitioning information to kernel list @@ -122,6 +116,30 @@ EXPORT_SYMBOL(get_gendisk); + +unsigned long +get_start_sect(kdev_t dev) +{ + struct gendisk *gp; + + gp = get_gendisk(dev); + if (gp) + return gp->part[MINOR(dev)].start_sect; + return 0; +} + +EXPORT_SYMBOL(get_start_sect); + +unsigned long +get_nr_sects(kdev_t dev) +{ + struct gendisk *gp; + + gp = get_gendisk(dev); + if (gp) + return gp->part[MINOR(dev)].nr_sects; + return 0; +} #ifdef CONFIG_PROC_FS int diff -urN linux-2.5.1-pre1/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- linux-2.5.1-pre1/drivers/block/ll_rw_blk.c Mon Oct 29 12:11:17 2001 +++ linux/drivers/block/ll_rw_blk.c Sat Dec 1 00:37:05 2001 @@ -6,6 +6,7 @@ * Elevator latency, (C) 2000 Andrea Arcangeli SuSE * Queue request tables / lock, selectable elevator, Jens Axboe * kernel-doc documentation started by NeilBrown - July2000 + * bio rewrite, highmem i/o, etc, Jens Axboe - may 2001 */ /* @@ -22,7 +23,9 @@ #include #include #include +#include #include +#include #include #include @@ -50,27 +53,13 @@ */ DECLARE_TASK_QUEUE(tq_disk); -/* - * Protect the request list against multiple users.. - * - * With this spinlock the Linux block IO subsystem is 100% SMP threaded - * from the IRQ event side, and almost 100% SMP threaded from the syscall - * side (we still have protect against block device array operations, and - * the do_request() side is casually still unsafe. The kernel lock protects - * this part currently.). - * - * there is a fair chance that things will work just OK if these functions - * are called with no global kernel lock held ... - */ -spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; - /* This specifies how many sectors to read ahead on the disk. */ int read_ahead[MAX_BLKDEV]; /* blk_dev_struct is: - * *request_fn - * *current_request + * request_queue + * *queue */ struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */ @@ -94,42 +83,29 @@ int * blksize_size[MAX_BLKDEV]; /* - * hardsect_size contains the size of the hardware sector of a device. - * - * hardsect_size[MAJOR][MINOR] - * - * if (!hardsect_size[MAJOR]) - * then 512 bytes is assumed. - * else - * sector_size is hardsect_size[MAJOR][MINOR] - * This is currently set by some scsi devices and read by the msdos fs driver. - * Other uses may appear later. - */ -int * hardsect_size[MAX_BLKDEV]; - -/* * The following tunes the read-ahead algorithm in mm/filemap.c */ int * max_readahead[MAX_BLKDEV]; /* - * Max number of sectors per request - */ -int * max_sectors[MAX_BLKDEV]; - -/* * How many reqeusts do we allocate per queue, * and how many do we "batch" on freeing them? */ -static int queue_nr_requests, batch_requests; - -static inline int get_max_sectors(kdev_t dev) -{ - if (!max_sectors[MAJOR(dev)]) - return MAX_SECTORS; - return max_sectors[MAJOR(dev)][MINOR(dev)]; -} +int queue_nr_requests, batch_requests; +unsigned long blk_max_low_pfn, blk_max_pfn; +int blk_nohighio = 0; +/** + * blk_get_queue: - return the queue that matches the given device + * @dev: device + * + * Description: + * Given a specific device, return the queue that will hold I/O + * for it. This is either a &struct blk_dev_struct lookup and a + * call to the ->queue() function defined, or the default queue + * stored in the same location. + * + **/ inline request_queue_t *blk_get_queue(kdev_t dev) { struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); @@ -140,113 +116,227 @@ return &blk_dev[MAJOR(dev)].request_queue; } -static int __blk_cleanup_queue(struct request_list *list) +/** + * blk_queue_make_request - define an alternate make_request function for a device + * @q: the request queue for the device to be affected + * @mfn: the alternate make_request function + * + * Description: + * The normal way for &struct bios to be passed to a device + * driver is for them to be collected into requests on a request + * queue, and then to allow the device driver to select requests + * off that queue when it is ready. This works well for many block + * devices. However some block devices (typically virtual devices + * such as md or lvm) do not benefit from the processing on the + * request queue, and are served best by having the requests passed + * directly to them. This can be achieved by providing a function + * to blk_queue_make_request(). + * + * Caveat: + * The driver that does this *must* be able to deal appropriately + * with buffers in "highmemory". This can be accomplished by either calling + * bio_kmap() to get a temporary kernel mapping, or by calling + * blk_queue_bounce() to create a buffer in normal memory. + **/ +void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) { - struct list_head *head = &list->free; - struct request *rq; - int i = 0; - - while (!list_empty(head)) { - rq = list_entry(head->next, struct request, queue); - list_del(&rq->queue); - kmem_cache_free(request_cachep, rq); - i++; - }; - - if (i != list->count) - printk("request list leak!\n"); + /* + * set defaults + */ + q->max_segments = MAX_SEGMENTS; + q->make_request_fn = mfn; + blk_queue_max_sectors(q, MAX_SECTORS); + blk_queue_hardsect_size(q, 512); - list->count = 0; - return i; + init_waitqueue_head(&q->queue_wait); } /** - * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed - * @q: the request queue to be released + * blk_queue_bounce_limit - set bounce buffer limit for queue + * @q: the request queue for the device + * @dma_addr: bus address limit * * Description: - * blk_cleanup_queue is the pair to blk_init_queue(). It should - * be called when a request queue is being released; typically - * when a block device is being de-registered. Currently, its - * primary task it to free all the &struct request structures that - * were allocated to the queue. - * Caveat: - * Hopefully the low level driver will have finished any - * outstanding requests first... + * Different hardware can have different requirements as to what pages + * it can do I/O directly to. A low level driver can call + * blk_queue_bounce_limit to have lower memory pages allocated as bounce + * buffers for doing I/O to pages residing above @page. By default + * the block layer sets this to the highest numbered "low" memory page. **/ -void blk_cleanup_queue(request_queue_t * q) +void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) { - int count = queue_nr_requests; + unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; + unsigned long mb = dma_addr >> 20; + static request_queue_t *last_q; - count -= __blk_cleanup_queue(&q->rq[READ]); - count -= __blk_cleanup_queue(&q->rq[WRITE]); - - if (count) - printk("blk_cleanup_queue: leaked requests (%d)\n", count); + /* + * keep this for debugging for now... + */ + if (dma_addr != BLK_BOUNCE_HIGH && q != last_q) { + printk("blk: queue %p, ", q); + if (dma_addr == BLK_BOUNCE_ANY) + printk("no I/O memory limit\n"); + else + printk("I/O limit %luMb (mask 0x%Lx)\n", mb, (u64) dma_addr); + } - memset(q, 0, sizeof(*q)); + q->bounce_pfn = bounce_pfn; + last_q = q; } + /** - * blk_queue_headactive - indicate whether head of request queue may be active - * @q: The queue which this applies to. - * @active: A flag indication where the head of the queue is active. + * blk_queue_max_sectors - set max sectors for a request for this queue + * @q: the request queue for the device + * @max_sectors: max sectors in the usual 512b unit * * Description: - * The driver for a block device may choose to leave the currently active - * request on the request queue, removing it only when it has completed. - * The queue handling routines assume this by default for safety reasons - * and will not involve the head of the request queue in any merging or - * reordering of requests when the queue is unplugged (and thus may be - * working on this particular request). - * - * If a driver removes requests from the queue before processing them, then - * it may indicate that it does so, there by allowing the head of the queue - * to be involved in merging and reordering. This is done be calling - * blk_queue_headactive() with an @active flag of %0. - * - * If a driver processes several requests at once, it must remove them (or - * at least all but one of them) from the request queue. + * Enables a low level driver to set an upper limit on the size of + * received requests. + **/ +void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors) +{ + q->max_sectors = max_sectors; +} + +/** + * blk_queue_max_segments - set max segments for a request for this queue + * @q: the request queue for the device + * @max_segments: max number of segments * - * When a queue is plugged the head will be assumed to be inactive. + * Description: + * Enables a low level driver to set an upper limit on the number of + * data segments in a request **/ - -void blk_queue_headactive(request_queue_t * q, int active) +void blk_queue_max_segments(request_queue_t *q, unsigned short max_segments) { - q->head_active = active; + q->max_segments = max_segments; } /** - * blk_queue_make_request - define an alternate make_request function for a device - * @q: the request queue for the device to be affected - * @mfn: the alternate make_request function + * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg + * @q: the request queue for the device + * @max_size: max size of segment in bytes * * Description: - * The normal way for &struct buffer_heads to be passed to a device - * driver is for them to be collected into requests on a request - * queue, and then to allow the device driver to select requests - * off that queue when it is ready. This works well for many block - * devices. However some block devices (typically virtual devices - * such as md or lvm) do not benefit from the processing on the - * request queue, and are served best by having the requests passed - * directly to them. This can be achieved by providing a function - * to blk_queue_make_request(). + * Enables a low level driver to set an upper limit on the size of a + * coalesced segment + **/ +void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size) +{ + q->max_segment_size = max_size; +} + +/** + * blk_queue_hardsect_size - set hardware sector size for the queue + * @q: the request queue for the device + * @size: the hardware sector size, in bytes * - * Caveat: - * The driver that does this *must* be able to deal appropriately - * with buffers in "highmemory", either by calling bh_kmap() to get - * a kernel mapping, to by calling create_bounce() to create a - * buffer in normal memory. + * Description: + * This should typically be set to the lowest possible sector size + * that the hardware can operate on (possible without reverting to + * even internal read-modify-write operations). Usually the default + * of 512 covers most hardware. **/ +void blk_queue_hardsect_size(request_queue_t *q, unsigned short size) +{ + q->hardsect_size = size; +} -void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) +/* + * can we merge the two segments, or do we need to start a new one? + */ +inline int blk_same_segment(request_queue_t *q, struct bio *bio, + struct bio *nxt) { - q->make_request_fn = mfn; + /* + * not contigous, just forget it + */ + if (!BIO_CONTIG(bio, nxt)) + return 0; + + /* + * bio and nxt are contigous, if they don't span a 4GB mem boundary + * return ok + */ + if (BIO_PHYS_4G(bio, nxt)) + return 1; + + return 0; } -static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) +/* + * map a request to scatterlist, return number of sg entries setup. Caller + * must make sure sg can hold rq->nr_segments entries + */ +int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg) { - if (req->nr_segments < max_segments) { + unsigned long long lastend; + struct bio_vec *bvec; + struct bio *bio; + int nsegs, i; + + nsegs = 0; + bio = rq->bio; + lastend = ~0ULL; + + /* + * for each bio in rq + */ + rq_for_each_bio(bio, rq) { + /* + * for each segment in bio + */ + bio_for_each_segment(bvec, bio, i) { + int nbytes = bvec->bv_len; + + BIO_BUG_ON(i > bio->bi_io_vec->bvl_cnt); + + if (bvec_to_phys(bvec) == lastend) { + if (sg[nsegs - 1].length + nbytes > q->max_segment_size) { + printk("blk_rq_map_sg: %d segment size exceeded\n", q->max_segment_size); + goto new_segment; + } + + /* + * make sure to not map a 4GB boundary into + * same sg entry + */ + if (!__BIO_PHYS_4G(lastend, lastend + nbytes)) { + printk("blk_rq_map_sg: 4GB cross\n"); + lastend = ~0ULL; + } else + lastend += nbytes; + + sg[nsegs - 1].length += nbytes; + } else { +new_segment: + if (nsegs >= q->max_segments) { + printk("map: %d >= %d\n", nsegs, q->max_segments); + BUG(); + } + + sg[nsegs].address = NULL; + sg[nsegs].page = bvec->bv_page; + sg[nsegs].length = nbytes; + sg[nsegs].offset = bvec->bv_offset; + + lastend = bvec_to_phys(bvec) + nbytes; + nsegs++; + } + } /* segments in bio */ + } /* bios in rq */ + + return nsegs; +} + +/* + * the standard queue merge functions, can be overridden with device + * specific ones if so desired + */ +static inline int ll_new_segment(request_queue_t *q, struct request *req) +{ + if (req->nr_segments < q->max_segments) { req->nr_segments++; return 1; } @@ -254,30 +344,36 @@ } static int ll_back_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) + struct bio *bio) { - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) + return 0; + if (blk_same_segment(q, req->biotail, bio)) return 1; - return ll_new_segment(q, req, max_segments); + + return ll_new_segment(q, req); } static int ll_front_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) + struct bio *bio) { - if (bh->b_data + bh->b_size == req->bh->b_data) + if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) + return 0; + if (blk_same_segment(q, bio, req->bio)) return 1; - return ll_new_segment(q, req, max_segments); + + return ll_new_segment(q, req); } static int ll_merge_requests_fn(request_queue_t *q, struct request *req, - struct request *next, int max_segments) + struct request *next) { int total_segments = req->nr_segments + next->nr_segments; - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + if (blk_same_segment(q, req->biotail, next->bio)) total_segments--; - if (total_segments > max_segments) + if (total_segments > q->max_segments) return 0; req->nr_segments = total_segments; @@ -292,16 +388,16 @@ * This is called with interrupts off and no requests on the queue. * (and with the request spinlock acquired) */ -static void generic_plug_device(request_queue_t *q, kdev_t dev) +static void blk_plug_device(request_queue_t *q) { /* - * no need to replug device + * common case */ - if (!list_empty(&q->queue_head) || q->plugged) + if (!elv_queue_empty(q)) return; - q->plugged = 1; - queue_task(&q->plug_tq, &tq_disk); + if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) + queue_task(&q->plug_tq, &tq_disk); } /* @@ -309,24 +405,83 @@ */ static inline void __generic_unplug_device(request_queue_t *q) { - if (q->plugged) { - q->plugged = 0; - if (!list_empty(&q->queue_head)) + if (test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) + if (!elv_queue_empty(q)) q->request_fn(q); - } } +/** + * generic_unplug_device - fire a request queue + * @q: The &request_queue_t in question + * + * Description: + * Linux uses plugging to build bigger requests queues before letting + * the device have at them. If a queue is plugged, the I/O scheduler + * is still adding and merging requests on the queue. Once the queue + * gets unplugged (either by manually calling this function, or by + * running the tq_disk task queue), the request_fn defined for the + * queue is invoked and transfers started. + **/ void generic_unplug_device(void *data) { request_queue_t *q = (request_queue_t *) data; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); __generic_unplug_device(q); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } -static void blk_init_free_list(request_queue_t *q) +static int __blk_cleanup_queue(struct request_list *list) +{ + struct list_head *head = &list->free; + struct request *rq; + int i = 0; + + while (!list_empty(head)) { + rq = list_entry(head->next, struct request, queuelist); + list_del(&rq->queuelist); + kmem_cache_free(request_cachep, rq); + i++; + } + + if (i != list->count) + printk("request list leak!\n"); + + list->count = 0; + return i; +} + +/** + * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed + * @q: the request queue to be released + * + * Description: + * blk_cleanup_queue is the pair to blk_init_queue(). It should + * be called when a request queue is being released; typically + * when a block device is being de-registered. Currently, its + * primary task it to free all the &struct request structures that + * were allocated to the queue. + * Caveat: + * Hopefully the low level driver will have finished any + * outstanding requests first... + **/ +void blk_cleanup_queue(request_queue_t * q) +{ + int count = queue_nr_requests; + + count -= __blk_cleanup_queue(&q->rq[READ]); + count -= __blk_cleanup_queue(&q->rq[WRITE]); + + if (count) + printk("blk_cleanup_queue: leaked requests (%d)\n", count); + + elevator_exit(q, &q->elevator); + + memset(q, 0, sizeof(*q)); +} + +static int blk_init_free_list(request_queue_t *q) { struct request *rq; int i; @@ -341,22 +496,30 @@ */ for (i = 0; i < queue_nr_requests; i++) { rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL); - if (rq == NULL) { - /* We'll get a `leaked requests' message from blk_cleanup_queue */ - printk(KERN_EMERG "blk_init_free_list: error allocating requests\n"); - break; - } + if (!rq) + goto nomem; + memset(rq, 0, sizeof(struct request)); rq->rq_status = RQ_INACTIVE; - list_add(&rq->queue, &q->rq[i&1].free); - q->rq[i&1].count++; + if (i < queue_nr_requests >> 1) { + list_add(&rq->queuelist, &q->rq[READ].free); + q->rq[READ].count++; + } else { + list_add(&rq->queuelist, &q->rq[WRITE].free); + q->rq[WRITE].count++; + } } - init_waitqueue_head(&q->wait_for_request); + init_waitqueue_head(&q->rq[READ].wait); + init_waitqueue_head(&q->rq[WRITE].wait); spin_lock_init(&q->queue_lock); + return 0; +nomem: + blk_cleanup_queue(q); + return 1; } -static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); +static int __make_request(request_queue_t *, struct bio *); /** * blk_init_queue - prepare a request queue for use with a block device @@ -379,45 +542,47 @@ * requests on the queue, it is responsible for arranging that the requests * get dealt with eventually. * - * A global spin lock $io_request_lock must be held while manipulating the - * requests on the request queue. - * - * The request on the head of the queue is by default assumed to be - * potentially active, and it is not considered for re-ordering or merging - * whenever the given queue is unplugged. This behaviour can be changed with - * blk_queue_headactive(). + * The queue spin lock must be held while manipulating the requests on the + * request queue. * * Note: * blk_init_queue() must be paired with a blk_cleanup_queue() call * when the block device is deactivated (such as at module unload). **/ -void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) +int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, char *name) { - INIT_LIST_HEAD(&q->queue_head); - elevator_init(&q->elevator, ELEVATOR_LINUS); - blk_init_free_list(q); + int ret; + + if (blk_init_free_list(q)) + return -ENOMEM; + + if ((ret = elevator_init(q, &q->elevator, ELEVATOR_LINUS, name))) { + blk_cleanup_queue(q); + return ret; + } + q->request_fn = rfn; q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; q->merge_requests_fn = ll_merge_requests_fn; - q->make_request_fn = __make_request; q->plug_tq.sync = 0; q->plug_tq.routine = &generic_unplug_device; q->plug_tq.data = q; - q->plugged = 0; + q->queue_flags = 0; + /* - * These booleans describe the queue properties. We set the - * default (and most common) values here. Other drivers can - * use the appropriate functions to alter the queue properties. - * as appropriate. + * by default assume old behaviour and bounce for any highmem page */ - q->plug_device_fn = generic_plug_device; - q->head_active = 1; + blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); + + blk_queue_make_request(q, __make_request); + blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); + return 0; } -#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue); +#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) /* - * Get a free request. io_request_lock must be held and interrupts + * Get a free request. queue lock must be held and interrupts * disabled on the way in. */ static inline struct request *get_request(request_queue_t *q, int rw) @@ -427,8 +592,9 @@ if (!list_empty(&rl->free)) { rq = blkdev_free_rq(&rl->free); - list_del(&rq->queue); + list_del(&rq->queuelist); rl->count--; + rq->inactive = 1; rq->rq_status = RQ_ACTIVE; rq->special = NULL; rq->q = q; @@ -440,38 +606,28 @@ /* * No available requests for this queue, unplug the device. */ -static struct request *__get_request_wait(request_queue_t *q, int rw) +static struct request *get_request_wait(request_queue_t *q, int rw) { - register struct request *rq; DECLARE_WAITQUEUE(wait, current); + struct request *rq; + + spin_lock_prefetch(&q->queue_lock); generic_unplug_device(q); - add_wait_queue(&q->wait_for_request, &wait); + add_wait_queue(&q->rq[rw].wait, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); if (q->rq[rw].count < batch_requests) schedule(); - spin_lock_irq(&io_request_lock); - rq = get_request(q,rw); - spin_unlock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); + rq = get_request(q, rw); + spin_unlock_irq(&q->queue_lock); } while (rq == NULL); - remove_wait_queue(&q->wait_for_request, &wait); + remove_wait_queue(&q->rq[rw].wait, &wait); current->state = TASK_RUNNING; return rq; } -static inline struct request *get_request_wait(request_queue_t *q, int rw) -{ - register struct request *rq; - - spin_lock_irq(&io_request_lock); - rq = get_request(q, rw); - spin_unlock_irq(&io_request_lock); - if (rq) - return rq; - return __get_request_wait(q, rw); -} - /* RO fail safe mechanism */ static long ro_bits[MAX_BLKDEV][8]; @@ -497,8 +653,7 @@ else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31)); } -inline void drive_stat_acct (kdev_t dev, int rw, - unsigned long nr_sectors, int new_io) +void drive_stat_acct (kdev_t dev, int rw, unsigned long nr_sectors, int new_io) { unsigned int major = MAJOR(dev); unsigned int index; @@ -520,33 +675,32 @@ /* * add-request adds a request to the linked list. - * io_request_lock is held and interrupts disabled, as we muck with the + * queue lock is held and interrupts disabled, as we muck with the * request queue list. - * - * By this point, req->cmd is always either READ/WRITE, never READA, - * which is important for drive_stat_acct() above. */ static inline void add_request(request_queue_t * q, struct request * req, struct list_head *insert_here) { drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); - if (!q->plugged && q->head_active && insert_here == &q->queue_head) { - spin_unlock_irq(&io_request_lock); - BUG(); + { + struct request *__rq = __elv_next_request(q); + + if (__rq && !__rq->inactive && insert_here == &q->queue_head) + BUG(); } /* * elevator indicated where it wants this request to be * inserted at elevator_merge time */ - list_add(&req->queue, insert_here); + q->elevator.elevator_add_req_fn(q, req, insert_here); } /* - * Must be called with io_request_lock held and interrupts disabled + * Must be called with queue lock held and interrupts disabled */ -inline void blkdev_release_request(struct request *req) +void blkdev_release_request(struct request *req) { request_queue_t *q = req->q; int rw = req->cmd; @@ -555,169 +709,216 @@ req->q = NULL; /* + * should only happen on freereq logic in __make_request, in which + * case we don't want to prune these entries from the hash + */ +#if 1 + if (req->bio) + bio_hash_remove(req->bio); + if (req->biotail) + bio_hash_remove(req->biotail); +#endif + + /* * Request may not have originated from ll_rw_blk. if not, * assume it has free buffers and check waiters */ if (q) { - list_add(&req->queue, &q->rq[rw].free); - if (++q->rq[rw].count >= batch_requests && waitqueue_active(&q->wait_for_request)) - wake_up(&q->wait_for_request); + list_add(&req->queuelist, &q->rq[rw].free); + if (++q->rq[rw].count >= batch_requests + && waitqueue_active(&q->rq[rw].wait)) + wake_up(&q->rq[rw].wait); } } /* * Has to be called with the request spinlock acquired */ -static void attempt_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) -{ - struct request *next; - - next = blkdev_next_request(req); +static void attempt_merge(request_queue_t *q, struct request *req) +{ + struct request *next = blkdev_next_request(req); + if (req->sector + req->nr_sectors != next->sector) return; + if (req->cmd != next->cmd || req->rq_dev != next->rq_dev - || req->nr_sectors + next->nr_sectors > max_sectors - || next->waiting) + || req->nr_sectors + next->nr_sectors > q->max_sectors + || next->waiting || next->special || !next->inactive) return; + /* * If we are not allowed to merge these requests, then * return. If we are allowed to merge, then the count * will have been updated to the appropriate number, * and we shouldn't do it here too. */ - if (!q->merge_requests_fn(q, req, next, max_segments)) - return; + if (q->merge_requests_fn(q, req, next)) { + q->elevator.elevator_merge_req_fn(req, next); + + bio_hash_remove(req->biotail); + + /* + * will handle dangling hash too + */ + blkdev_dequeue_request(next); + + req->biotail->bi_next = next->bio; + req->biotail = next->biotail; + + next->bio = next->biotail = NULL; - q->elevator.elevator_merge_req_fn(req, next); - req->bhtail->b_reqnext = next->bh; - req->bhtail = next->bhtail; - req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; - list_del(&next->queue); - blkdev_release_request(next); + bio_hash_add_unique(req->biotail, req, q->hash_valid_counter); + + req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; + + blkdev_release_request(next); + } } -static inline void attempt_back_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) +static inline void attempt_back_merge(request_queue_t *q, struct request *rq) { - if (&req->queue == q->queue_head.prev) - return; - attempt_merge(q, req, max_sectors, max_segments); + if (&rq->queuelist != q->queue_head.prev) + attempt_merge(q, rq); } -static inline void attempt_front_merge(request_queue_t * q, - struct list_head * head, - struct request *req, - int max_sectors, - int max_segments) +static inline void attempt_front_merge(request_queue_t *q, + struct list_head *head, + struct request *rq) { - struct list_head * prev; + struct list_head *prev = rq->queuelist.prev; - prev = req->queue.prev; - if (head == prev) - return; - attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments); + if (prev != head) + attempt_merge(q, blkdev_entry_to_request(prev)); } -static int __make_request(request_queue_t * q, int rw, - struct buffer_head * bh) +static inline void __blk_attempt_remerge(request_queue_t *q, struct request *rq) { - unsigned int sector, count; - int max_segments = MAX_SEGMENTS; - struct request * req, *freereq = NULL; - int rw_ahead, max_sectors, el_ret; - struct list_head *head, *insert_here; - int latency; - elevator_t *elevator = &q->elevator; + if (rq->queuelist.next != &q->queue_head) + attempt_merge(q, rq); +} - count = bh->b_size >> 9; - sector = bh->b_rsector; +/** + * blk_attempt_remerge - attempt to remerge active head with next request + * @q: The &request_queue_t belonging to the device + * @rq: The head request (usually) + * + * Description: + * For head-active devices, the queue can easily be unplugged so quickly + * that proper merging is not done on the front request. This may hurt + * performance greatly for some devices. The block layer cannot safely + * do merging on that first request for these queues, but the driver can + * call this function and make it happen any way. Only the driver knows + * when it is safe to do so. + **/ +void blk_attempt_remerge(request_queue_t *q, struct request *rq) +{ + unsigned long flags; - rw_ahead = 0; /* normal case; gets changed below for READA */ - switch (rw) { - case READA: - rw_ahead = 1; - rw = READ; /* drop into READ */ - case READ: - case WRITE: - latency = elevator_request_latency(elevator, rw); - break; - default: - BUG(); - goto end_io; - } + spin_lock_irqsave(&q->queue_lock, flags); + __blk_attempt_remerge(q, rq); + spin_unlock_irqrestore(&q->queue_lock, flags); +} - /* We'd better have a real physical mapping! - Check this bit only if the buffer was dirty and just locked - down by us so at this point flushpage will block and - won't clear the mapped bit under us. */ - if (!buffer_mapped(bh)) - BUG(); +static int __make_request(request_queue_t *q, struct bio *bio) +{ + struct request *req, *freereq = NULL; + int el_ret, latency = 0, rw, nr_sectors, cur_nr_sectors, barrier; + struct list_head *head, *insert_here; + elevator_t *elevator = &q->elevator; + sector_t sector; - /* - * Temporary solution - in 2.5 this will be done by the lowlevel - * driver. Create a bounce buffer if the buffer data points into - * high memory - keep the original buffer otherwise. - */ -#if CONFIG_HIGHMEM - bh = create_bounce(rw, bh); -#endif + sector = bio->bi_sector; + nr_sectors = bio_sectors(bio); + cur_nr_sectors = bio_iovec(bio)->bv_len >> 9; + rw = bio_data_dir(bio); -/* look for a free request. */ /* - * Try to coalesce the new request with old requests + * low level driver can indicate that it wants pages above a + * certain limit bounced to low memory (ie for highmem, or even + * ISA dma in theory) */ - max_sectors = get_max_sectors(bh->b_rdev); + blk_queue_bounce(q, &bio); + + spin_lock_prefetch(&q->queue_lock); + + latency = elevator_request_latency(elevator, rw); + + barrier = test_bit(BIO_BARRIER, &bio->bi_flags); again: req = NULL; head = &q->queue_head; + + spin_lock_irq(&q->queue_lock); + /* - * Now we acquire the request spinlock, we have to be mega careful - * not to schedule or do something nonatomic + * barrier write must not be passed - so insert with 0 latency at + * the back of the queue and invalidate the entire existing merge hash + * for this device */ - spin_lock_irq(&io_request_lock); + if (barrier && !freereq) { + latency = 0; + bio_hash_invalidate(q, bio->bi_dev); + } insert_here = head->prev; - if (list_empty(head)) { - q->plug_device_fn(q, bh->b_rdev); /* is atomic */ + if (blk_queue_empty(q) || barrier) { + blk_plug_device(q); goto get_rq; - } else if (q->head_active && !q->plugged) +#if 0 + } else if (test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { head = head->next; +#else + } else if ((req = __elv_next_request(q))) { + if (!req->inactive) + head = head->next; - el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); - switch (el_ret) { + req = NULL; +#endif + } + el_ret = elevator->elevator_merge_fn(q, &req, head, bio); + switch (el_ret) { case ELEVATOR_BACK_MERGE: - if (!q->back_merge_fn(q, req, bh, max_segments)) + if (&req->queuelist == head && !req->inactive) + BUG(); + if (!q->back_merge_fn(q, req, bio)) break; - elevator->elevator_merge_cleanup_fn(q, req, count); - req->bhtail->b_reqnext = bh; - req->bhtail = bh; - req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); - drive_stat_acct(req->rq_dev, req->cmd, count, 0); - attempt_back_merge(q, req, max_sectors, max_segments); + elevator->elevator_merge_cleanup_fn(q, req, nr_sectors); + + bio_hash_remove(req->biotail); + + req->biotail->bi_next = bio; + req->biotail = bio; + req->nr_sectors = req->hard_nr_sectors += nr_sectors; + drive_stat_acct(req->rq_dev, req->cmd, nr_sectors, 0); + attempt_back_merge(q, req); goto out; case ELEVATOR_FRONT_MERGE: - if (!q->front_merge_fn(q, req, bh, max_segments)) + if (&req->queuelist == head && !req->inactive) + BUG(); + if (!q->front_merge_fn(q, req, bio)) break; - elevator->elevator_merge_cleanup_fn(q, req, count); - bh->b_reqnext = req->bh; - req->bh = bh; - req->buffer = bh->b_data; - req->current_nr_sectors = count; + elevator->elevator_merge_cleanup_fn(q, req, nr_sectors); + + bio_hash_remove(req->bio); + + bio->bi_next = req->bio; + req->bio = bio; + /* + * may not be valid. if the low level driver said + * it didn't need a bounce buffer then it better + * not touch req->buffer either... + */ + req->buffer = bio_data(bio); + req->current_nr_sectors = cur_nr_sectors; + req->hard_cur_sectors = cur_nr_sectors; req->sector = req->hard_sector = sector; - req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); - drive_stat_acct(req->rq_dev, req->cmd, count, 0); - attempt_front_merge(q, head, req, max_sectors, max_segments); + req->nr_sectors = req->hard_nr_sectors += nr_sectors; + drive_stat_acct(req->rq_dev, req->cmd, nr_sectors, 0); + attempt_front_merge(q, head, req); goto out; /* @@ -730,14 +931,14 @@ * of the queue */ if (req) - insert_here = &req->queue; + insert_here = &req->queuelist; break; default: printk("elevator returned crap (%d)\n", el_ret); BUG(); } - + /* * Grab a free request from the freelist - if that is empty, check * if we are doing read ahead and abort instead of blocking for @@ -748,107 +949,132 @@ req = freereq; freereq = NULL; } else if ((req = get_request(q, rw)) == NULL) { - spin_unlock_irq(&io_request_lock); - if (rw_ahead) + + spin_unlock_irq(&q->queue_lock); + + /* + * READA bit set + */ + if (bio->bi_rw & RWA_MASK) { + set_bit(BIO_RW_BLOCK, &bio->bi_flags); goto end_io; + } - freereq = __get_request_wait(q, rw); + freereq = get_request_wait(q, rw); goto again; } -/* fill up the request-info, and add it to the queue */ + /* + * fill up the request-info, and add it to the queue + */ req->elevator_sequence = latency; req->cmd = rw; req->errors = 0; req->hard_sector = req->sector = sector; - req->hard_nr_sectors = req->nr_sectors = count; - req->current_nr_sectors = count; - req->nr_segments = 1; /* Always 1 for a new request. */ - req->nr_hw_segments = 1; /* Always 1 for a new request. */ - req->buffer = bh->b_data; + req->hard_nr_sectors = req->nr_sectors = nr_sectors; + req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors; + req->nr_segments = bio->bi_io_vec->bvl_cnt; + req->nr_hw_segments = req->nr_segments; + req->buffer = bio_data(bio); /* see ->buffer comment above */ req->waiting = NULL; - req->bh = bh; - req->bhtail = bh; - req->rq_dev = bh->b_rdev; - blk_started_io(count); + req->bio = req->biotail = bio; + req->rq_dev = bio->bi_dev; add_request(q, req, insert_here); out: - if (freereq) + if (freereq) { + freereq->bio = freereq->biotail = NULL; blkdev_release_request(freereq); - spin_unlock_irq(&io_request_lock); + } + + spin_unlock_irq(&q->queue_lock); + bio_hash_add_unique(bio, req, q->hash_valid_counter); return 0; + end_io: - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + bio->bi_end_io(bio, nr_sectors); return 0; } + +/* + * If bio->bi_dev is a partition, remap the location + */ +static inline void blk_partition_remap(struct bio *bio) +{ + int major, minor, drive, minor0; + struct gendisk *g; + kdev_t dev0; + + major = MAJOR(bio->bi_dev); + if ((g = get_gendisk(bio->bi_dev))) { + minor = MINOR(bio->bi_dev); + drive = (minor >> g->minor_shift); + minor0 = (drive << g->minor_shift); /* whole disk device */ + /* that is, minor0 = (minor & ~((1<minor_shift)-1)); */ + dev0 = MKDEV(major, minor0); + if (dev0 != bio->bi_dev) { + bio->bi_dev = dev0; + bio->bi_sector += g->part[minor].start_sect; + } + /* lots of checks are possible */ + } +} + /** - * generic_make_request: hand a buffer head to it's device driver for I/O - * @rw: READ, WRITE, or READA - what sort of I/O is desired. - * @bh: The buffer head describing the location in memory and on the device. + * generic_make_request: hand a buffer to it's device driver for I/O + * @bio: The bio describing the location in memory and on the device. * * generic_make_request() is used to make I/O requests of block - * devices. It is passed a &struct buffer_head and a &rw value. The - * %READ and %WRITE options are (hopefully) obvious in meaning. The - * %READA value means that a read is required, but that the driver is - * free to fail the request if, for example, it cannot get needed - * resources immediately. + * devices. It is passed a &struct bio, which describes the I/O that needs + * to be done. * * generic_make_request() does not return any status. The * success/failure status of the request, along with notification of - * completion, is delivered asynchronously through the bh->b_end_io + * completion, is delivered asynchronously through the bio->bi_end_io * function described (one day) else where. * - * The caller of generic_make_request must make sure that b_page, - * b_addr, b_size are set to describe the memory buffer, that b_rdev - * and b_rsector are set to describe the device address, and the - * b_end_io and optionally b_private are set to describe how - * completion notification should be signaled. BH_Mapped should also - * be set (to confirm that b_dev and b_blocknr are valid). - * - * generic_make_request and the drivers it calls may use b_reqnext, - * and may change b_rdev and b_rsector. So the values of these fields + * The caller of generic_make_request must make sure that bi_io_vec + * are set to describe the memory buffer, and that bi_dev and bi_sector are + & set to describe the device address, and the + * bi_end_io and optionally bi_private are set to describe how + * completion notification should be signaled. + * + * generic_make_request and the drivers it calls may use bi_next if this + * bio happens to be merged with someone else, and may change bi_dev and + * bi_rsector for remaps as it sees fit. So the values of these fields * should NOT be depended on after the call to generic_make_request. - * Because of this, the caller should record the device address - * information in b_dev and b_blocknr. * - * Apart from those fields mentioned above, no other fields, and in - * particular, no other flags, are changed by generic_make_request or - * any lower level drivers. * */ -void generic_make_request (int rw, struct buffer_head * bh) +void generic_make_request(struct bio *bio) { - int major = MAJOR(bh->b_rdev); - int minorsize = 0; + int major = MAJOR(bio->bi_dev); + int minor = MINOR(bio->bi_dev); request_queue_t *q; + sector_t minorsize = 0; + int nr_sectors = bio_sectors(bio); - if (!bh->b_end_io) - BUG(); - - /* Test device size, when known. */ + /* Test device or partition size, when known. */ if (blk_size[major]) - minorsize = blk_size[major][MINOR(bh->b_rdev)]; + minorsize = blk_size[major][minor]; if (minorsize) { unsigned long maxsector = (minorsize << 1) + 1; - unsigned long sector = bh->b_rsector; - unsigned int count = bh->b_size >> 9; + unsigned long sector = bio->bi_sector; - if (maxsector < count || maxsector - count < sector) { - /* Yecch */ - bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped); - - /* This may well happen - the kernel calls bread() - without checking the size of the device, e.g., - when mounting a device. */ - printk(KERN_INFO - "attempt to access beyond end of device\n"); - printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", - kdevname(bh->b_rdev), rw, - (sector + count)>>1, minorsize); - - /* Yecch again */ - bh->b_end_io(bh, 0); - return; + if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { + if (blk_size[major][minor]) { + + /* This may well happen - the kernel calls + * bread() without checking the size of the + * device, e.g., when mounting a device. */ + printk(KERN_INFO + "attempt to access beyond end of device\n"); + printk(KERN_INFO "%s: rw=%ld, want=%ld, limit=%Lu\n", + kdevname(bio->bi_dev), bio->bi_rw, + (sector + nr_sectors)>>1, + (u64) blk_size[major][minor]); + } + set_bit(BIO_EOF, &bio->bi_flags); + goto end_io; } } @@ -856,63 +1082,127 @@ * Resolve the mapping until finished. (drivers are * still free to implement/resolve their own stacking * by explicitly returning 0) - */ - /* NOTE: we don't repeat the blk_size check for each new device. + * + * NOTE: we don't repeat the blk_size check for each new device. * Stacking drivers are expected to know what they are doing. */ do { - q = blk_get_queue(bh->b_rdev); + q = blk_get_queue(bio->bi_dev); if (!q) { printk(KERN_ERR - "generic_make_request: Trying to access " - "nonexistent block-device %s (%ld)\n", - kdevname(bh->b_rdev), bh->b_rsector); - buffer_IO_error(bh); + "generic_make_request: Trying to access nonexistent block-device %s (%Lu)\n", + kdevname(bio->bi_dev), (u64) bio->bi_sector); +end_io: + bio->bi_end_io(bio, nr_sectors); break; } - } while (q->make_request_fn(q, rw, bh)); + + /* + * uh oh, need to split this bio... not implemented yet + */ + if (bio_sectors(bio) > q->max_sectors) + BUG(); + + /* + * If this device has partitions, remap block n + * of partition p to block n+start(p) of the disk. + */ + blk_partition_remap(bio); + + } while (q->make_request_fn(q, bio)); } +/* + * our default bio end_io callback handler for a buffer_head mapping. + */ +static int end_bio_bh_io_sync(struct bio *bio, int nr_sectors) +{ + struct buffer_head *bh = bio->bi_private; + + BIO_BUG_ON(nr_sectors != (bh->b_size >> 9)); + + /* + * I/O is complete -- remove from hash, end buffer_head, put bio + */ + bio_hash_remove(bio); + + bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags)); + bio_put(bio); + + return 0; +} /** - * submit_bh: submit a buffer_head to the block device later for I/O + * submit_bio: submit a bio to the block device layer for I/O * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) - * @bh: The &struct buffer_head which describes the I/O + * @bio: The &struct bio which describes the I/O * - * submit_bh() is very similar in purpose to generic_make_request(), and - * uses that function to do most of the work. + * submit_bio() is very similar in purpose to generic_make_request(), and + * uses that function to do most of the work. Both are fairly rough + * interfaces, @bio must be presetup and ready for I/O. * - * The extra functionality provided by submit_bh is to determine - * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev. - * This is is appropriate for IO requests that come from the buffer - * cache and page cache which (currently) always use aligned blocks. */ -void submit_bh(int rw, struct buffer_head * bh) +int submit_bio(int rw, struct bio *bio) +{ + int count = bio_sectors(bio); + + /* + * do some validity checks... + */ + BUG_ON(!bio->bi_end_io); + + BIO_BUG_ON(bio_offset(bio) > PAGE_SIZE); + BIO_BUG_ON(!bio_size(bio)); + BIO_BUG_ON(!bio->bi_io_vec); + + bio->bi_rw = rw; + + if (rw & WRITE) + kstat.pgpgout += count; + else + kstat.pgpgin += count; + + generic_make_request(bio); + return 1; +} + +/** + * submit_bh: submit a buffer_head to the block device layer for I/O + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bh: The &struct buffer_head which describes the I/O + * + **/ +int submit_bh(int rw, struct buffer_head * bh) { - int count = bh->b_size >> 9; + struct bio *bio; - if (!test_bit(BH_Lock, &bh->b_state)) - BUG(); + BUG_ON(!test_bit(BH_Lock, &bh->b_state)); + BUG_ON(!buffer_mapped(bh)); + BUG_ON(!bh->b_end_io); set_bit(BH_Req, &bh->b_state); /* - * First step, 'identity mapping' - RAID or LVM might - * further remap this. + * from here on down, it's all bio -- do the initial mapping, + * submit_bio -> generic_make_request may further map this bio around */ - bh->b_rdev = bh->b_dev; - bh->b_rsector = bh->b_blocknr * count; + bio = bio_alloc(GFP_NOIO, 1); - generic_make_request(rw, bh); + bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_next = NULL; + bio->bi_dev = bh->b_dev; + bio->bi_private = bh; + bio->bi_end_io = end_bio_bh_io_sync; + + bio->bi_io_vec->bvl_vec[0].bv_page = bh->b_page; + bio->bi_io_vec->bvl_vec[0].bv_len = bh->b_size; + bio->bi_io_vec->bvl_vec[0].bv_offset = bh_offset(bh); + + bio->bi_io_vec->bvl_cnt = 1; + bio->bi_io_vec->bvl_idx = 0; + bio->bi_io_vec->bvl_size = bh->b_size; - switch (rw) { - case WRITE: - kstat.pgpgout += count; - break; - default: - kstat.pgpgin += count; - break; - } + return submit_bio(rw, bio); } /** @@ -944,8 +1234,9 @@ * * Caveat: * All of the buffers must be for the same device, and must also be - * of the current approved size for the device. */ - + * a multiple of the current approved size for the device. + * + **/ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) { unsigned int major; @@ -963,7 +1254,7 @@ /* Verify requested block sizes. */ for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (bh->b_size % correct_size) { + if (bh->b_size & (correct_size - 1)) { printk(KERN_NOTICE "ll_rw_block: device %s: " "only %d-char blocks implemented (%u)\n", kdevname(bhs[0]->b_dev), @@ -1024,12 +1315,11 @@ extern int stram_device_init (void); #endif - /** * end_that_request_first - end I/O on one buffer. + * &q: queue that finished request * @req: the request being processed * @uptodate: 0 for I/O error - * @name: the name printed for an I/O error * * Description: * Ends I/O on the first buffer attached to @req, and sets it up @@ -1038,40 +1328,52 @@ * Return: * 0 - we are done with this request, call end_that_request_last() * 1 - still buffers pending for this request - * - * Caveat: - * Drivers implementing their own end_request handling must call - * blk_finished_io() appropriately. **/ -int end_that_request_first (struct request *req, int uptodate, char *name) +int end_that_request_first(struct request *req, int uptodate, int nr_sectors) { - struct buffer_head * bh; + struct bio *bio, *nxt; int nsect; req->errors = 0; if (!uptodate) - printk("end_request: I/O error, dev %s (%s), sector %lu\n", - kdevname(req->rq_dev), name, req->sector); + printk("end_request: I/O error, dev %s, sector %lu\n", + kdevname(req->rq_dev), req->sector); + + if ((bio = req->bio) != NULL) { +next_chunk: + nsect = bio_iovec(bio)->bv_len >> 9; + + nr_sectors -= nsect; + + nxt = bio->bi_next; + bio->bi_next = NULL; + if (!bio_endio(bio, uptodate, nsect)) + req->bio = nxt; + else + bio->bi_next = nxt; - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; - blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { + if ((bio = req->bio) != NULL) { + bio_hash_add_unique(bio,req,req->q->hash_valid_counter); req->hard_sector += nsect; req->hard_nr_sectors -= nsect; req->sector = req->hard_sector; req->nr_sectors = req->hard_nr_sectors; - req->current_nr_sectors = bh->b_size >> 9; + req->current_nr_sectors = bio_iovec(bio)->bv_len >> 9; + req->hard_cur_sectors = req->current_nr_sectors; if (req->nr_sectors < req->current_nr_sectors) { - req->nr_sectors = req->current_nr_sectors; printk("end_request: buffer-list destroyed\n"); + req->nr_sectors = req->current_nr_sectors; } - req->buffer = bh->b_data; + + req->buffer = bio_data(bio); + /* + * end more in this run, or just return 'not-done' + */ + if (nr_sectors > 0) + goto next_chunk; + return 1; } } @@ -1080,7 +1382,7 @@ void end_that_request_last(struct request *req) { - if (req->waiting != NULL) + if (req->waiting) complete(req->waiting); blkdev_release_request(req); @@ -1105,7 +1407,6 @@ memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); - memset(max_sectors, 0, sizeof(max_sectors)); total_ram = nr_free_pages() << (PAGE_SHIFT - 10); @@ -1115,129 +1416,46 @@ */ queue_nr_requests = 64; if (total_ram > MB(32)) - queue_nr_requests = 128; + queue_nr_requests = 256; /* * Batch frees according to queue length */ - batch_requests = queue_nr_requests/4; + if ((batch_requests = queue_nr_requests / 4) > 32) + batch_requests = 32; printk("block: %d slots per queue, batch=%d\n", queue_nr_requests, batch_requests); -#ifdef CONFIG_AMIGA_Z2RAM - z2_init(); -#endif -#ifdef CONFIG_STRAM_SWAP - stram_device_init(); -#endif -#ifdef CONFIG_BLK_DEV_RAM - rd_init(); -#endif -#ifdef CONFIG_ISP16_CDI - isp16_init(); -#endif + blk_max_low_pfn = max_low_pfn; + blk_max_pfn = max_pfn; + #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE) ide_init(); /* this MUST precede hd_init */ #endif #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD) hd_init(); #endif -#ifdef CONFIG_BLK_DEV_PS2 - ps2esdi_init(); -#endif -#ifdef CONFIG_BLK_DEV_XD - xd_init(); -#endif -#ifdef CONFIG_BLK_DEV_MFM - mfm_init(); -#endif -#ifdef CONFIG_PARIDE - { extern void paride_init(void); paride_init(); }; -#endif -#ifdef CONFIG_MAC_FLOPPY - swim3_init(); -#endif -#ifdef CONFIG_BLK_DEV_SWIM_IOP - swimiop_init(); -#endif -#ifdef CONFIG_AMIGA_FLOPPY - amiga_floppy_init(); -#endif -#ifdef CONFIG_ATARI_FLOPPY - atari_floppy_init(); -#endif -#ifdef CONFIG_BLK_DEV_FD - floppy_init(); -#else #if defined(__i386__) /* Do we even need this? */ outb_p(0xc, 0x3f2); #endif -#endif -#ifdef CONFIG_CDU31A - cdu31a_init(); -#endif -#ifdef CONFIG_ATARI_ACSI - acsi_init(); -#endif -#ifdef CONFIG_MCD - mcd_init(); -#endif -#ifdef CONFIG_MCDX - mcdx_init(); -#endif -#ifdef CONFIG_SBPCD - sbpcd_init(); -#endif -#ifdef CONFIG_AZTCD - aztcd_init(); -#endif -#ifdef CONFIG_CDU535 - sony535_init(); -#endif -#ifdef CONFIG_GSCD - gscd_init(); -#endif -#ifdef CONFIG_CM206 - cm206_init(); -#endif -#ifdef CONFIG_OPTCD - optcd_init(); -#endif -#ifdef CONFIG_SJCD - sjcd_init(); -#endif -#ifdef CONFIG_APBLOCK - ap_init(); -#endif -#ifdef CONFIG_DDV - ddv_init(); -#endif -#ifdef CONFIG_MDISK - mdisk_init(); -#endif -#ifdef CONFIG_DASD - dasd_init(); -#endif -#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK) - tapeblock_init(); -#endif -#ifdef CONFIG_BLK_DEV_XPRAM - xpram_init(); -#endif -#ifdef CONFIG_SUN_JSFLASH - jsfd_init(); -#endif return 0; }; -EXPORT_SYMBOL(io_request_lock); EXPORT_SYMBOL(end_that_request_first); EXPORT_SYMBOL(end_that_request_last); EXPORT_SYMBOL(blk_init_queue); EXPORT_SYMBOL(blk_get_queue); EXPORT_SYMBOL(blk_cleanup_queue); -EXPORT_SYMBOL(blk_queue_headactive); EXPORT_SYMBOL(blk_queue_make_request); +EXPORT_SYMBOL(blk_queue_bounce_limit); EXPORT_SYMBOL(generic_make_request); EXPORT_SYMBOL(blkdev_release_request); EXPORT_SYMBOL(generic_unplug_device); +EXPORT_SYMBOL(blk_attempt_remerge); +EXPORT_SYMBOL(blk_max_low_pfn); +EXPORT_SYMBOL(blk_queue_max_sectors); +EXPORT_SYMBOL(blk_queue_max_segments); +EXPORT_SYMBOL(blk_queue_max_segment_size); +EXPORT_SYMBOL(blk_queue_hardsect_size); +EXPORT_SYMBOL(blk_rq_map_sg); +EXPORT_SYMBOL(blk_nohighio); diff -urN linux-2.5.1-pre1/drivers/block/loop.c linux/drivers/block/loop.c --- linux-2.5.1-pre1/drivers/block/loop.c Mon Nov 19 14:48:02 2001 +++ linux/drivers/block/loop.c Sat Dec 1 00:37:05 2001 @@ -168,8 +168,7 @@ lo->lo_device); } -static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ struct address_space *mapping = file->f_dentry->d_inode->i_mapping; @@ -183,8 +182,8 @@ down(&mapping->host->i_sem); index = pos >> PAGE_CACHE_SHIFT; offset = pos & (PAGE_CACHE_SIZE - 1); - len = bh->b_size; - data = bh->b_data; + len = bio_size(bio); + data = bio_data(bio); while (len > 0) { int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize; int transfer_result; @@ -263,18 +262,17 @@ return size; } -static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { struct lo_read_data cookie; read_descriptor_t desc; struct file *file; cookie.lo = lo; - cookie.data = bh->b_data; + cookie.data = bio_data(bio); cookie.bsize = bsize; desc.written = 0; - desc.count = bh->b_size; + desc.count = bio_size(bio); desc.buf = (char*)&cookie; desc.error = 0; spin_lock_irq(&lo->lo_lock); @@ -310,46 +308,46 @@ return IV; } -static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw) +static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) { loff_t pos; int ret; - pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset; + pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; - if (rw == WRITE) - ret = lo_send(lo, bh, loop_get_bs(lo), pos); + if (bio_rw(bio) == WRITE) + ret = lo_send(lo, bio, loop_get_bs(lo), pos); else - ret = lo_receive(lo, bh, loop_get_bs(lo), pos); + ret = lo_receive(lo, bio, loop_get_bs(lo), pos); return ret; } -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate); -static void loop_put_buffer(struct buffer_head *bh) +static int loop_end_io_transfer(struct bio *, int); +static void loop_put_buffer(struct bio *bio) { /* - * check b_end_io, may just be a remapped bh and not an allocated one + * check bi_end_io, may just be a remapped bio */ - if (bh && bh->b_end_io == loop_end_io_transfer) { - __free_page(bh->b_page); - kmem_cache_free(bh_cachep, bh); + if (bio && bio->bi_end_io == loop_end_io_transfer) { + __free_page(bio_page(bio)); + bio_put(bio); } } /* - * Add buffer_head to back of pending list + * Add bio to back of pending list */ -static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh) +static void loop_add_bio(struct loop_device *lo, struct bio *bio) { unsigned long flags; spin_lock_irqsave(&lo->lo_lock, flags); - if (lo->lo_bhtail) { - lo->lo_bhtail->b_reqnext = bh; - lo->lo_bhtail = bh; + if (lo->lo_biotail) { + lo->lo_biotail->bi_next = bio; + lo->lo_biotail = bio; } else - lo->lo_bh = lo->lo_bhtail = bh; + lo->lo_bio = lo->lo_biotail = bio; spin_unlock_irqrestore(&lo->lo_lock, flags); up(&lo->lo_bh_mutex); @@ -358,70 +356,60 @@ /* * Grab first pending buffer */ -static struct buffer_head *loop_get_bh(struct loop_device *lo) +static struct bio *loop_get_bio(struct loop_device *lo) { - struct buffer_head *bh; + struct bio *bio; spin_lock_irq(&lo->lo_lock); - if ((bh = lo->lo_bh)) { - if (bh == lo->lo_bhtail) - lo->lo_bhtail = NULL; - lo->lo_bh = bh->b_reqnext; - bh->b_reqnext = NULL; + if ((bio = lo->lo_bio)) { + if (bio == lo->lo_biotail) + lo->lo_biotail = NULL; + lo->lo_bio = bio->bi_next; + bio->bi_next = NULL; } spin_unlock_irq(&lo->lo_lock); - return bh; + return bio; } /* - * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE - * and lo->transfer stuff has already been done. if not, it was a READ - * so queue it for the loop thread and let it do the transfer out of - * b_end_io context (we don't want to do decrypt of a page with irqs + * if this was a WRITE lo->transfer stuff has already been done. for READs, + * queue it for the loop thread and let it do the transfer out of + * bi_end_io context (we don't want to do decrypt of a page with irqs * disabled) */ -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate) +static int loop_end_io_transfer(struct bio *bio, int nr_sectors) { - struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)]; + struct loop_device *lo = &loop_dev[MINOR(bio->bi_dev)]; + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) { - struct buffer_head *rbh = bh->b_private; + if (!uptodate || bio_rw(bio) == WRITE) { + struct bio *rbh = bio->bi_private; - rbh->b_end_io(rbh, uptodate); + bio_endio(rbh, uptodate, nr_sectors); if (atomic_dec_and_test(&lo->lo_pending)) up(&lo->lo_bh_mutex); - loop_put_buffer(bh); + loop_put_buffer(bio); } else - loop_add_bh(lo, bh); + loop_add_bio(lo, bio); + + return 0; } -static struct buffer_head *loop_get_buffer(struct loop_device *lo, - struct buffer_head *rbh) +static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh) { - struct buffer_head *bh; + struct page *page; + struct bio *bio; /* * for xfer_funcs that can operate on the same bh, do that */ if (lo->lo_flags & LO_FLAGS_BH_REMAP) { - bh = rbh; + bio = rbh; goto out_bh; } - do { - bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO); - if (bh) - break; - - run_task_queue(&tq_disk); - schedule_timeout(HZ); - } while (1); - memset(bh, 0, sizeof(*bh)); - - bh->b_size = rbh->b_size; - bh->b_dev = rbh->b_rdev; - bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock); + bio = bio_alloc(GFP_NOIO, 1); /* * easy way out, although it does waste some memory for < PAGE_SIZE @@ -429,41 +417,46 @@ * so can we :-) */ do { - bh->b_page = alloc_page(GFP_NOIO); - if (bh->b_page) + page = alloc_page(GFP_NOIO); + if (page) break; run_task_queue(&tq_disk); schedule_timeout(HZ); } while (1); - bh->b_data = page_address(bh->b_page); - bh->b_end_io = loop_end_io_transfer; - bh->b_private = rbh; - init_waitqueue_head(&bh->b_wait); + bio->bi_io_vec->bvl_vec[0].bv_page = page; + bio->bi_io_vec->bvl_vec[0].bv_len = bio_size(rbh); + bio->bi_io_vec->bvl_vec[0].bv_offset = bio_offset(rbh); + + bio->bi_io_vec->bvl_cnt = 1; + bio->bi_io_vec->bvl_idx = 1; + bio->bi_io_vec->bvl_size = bio_size(rbh); + + bio->bi_end_io = loop_end_io_transfer; + bio->bi_private = rbh; out_bh: - bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9); + bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9); + bio->bi_rw = rbh->bi_rw; spin_lock_irq(&lo->lo_lock); - bh->b_rdev = lo->lo_device; + bio->bi_dev = lo->lo_device; spin_unlock_irq(&lo->lo_lock); - return bh; + return bio; } -static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh) +static int loop_make_request(request_queue_t *q, struct bio *rbh) { - struct buffer_head *bh = NULL; + struct bio *bh = NULL; struct loop_device *lo; unsigned long IV; + int rw = bio_rw(rbh); - if (!buffer_locked(rbh)) - BUG(); - - if (MINOR(rbh->b_rdev) >= max_loop) + if (MINOR(rbh->bi_dev) >= max_loop) goto out; - lo = &loop_dev[MINOR(rbh->b_rdev)]; + lo = &loop_dev[MINOR(rbh->bi_dev)]; spin_lock_irq(&lo->lo_lock); if (lo->lo_state != Lo_bound) goto inactive; @@ -476,25 +469,17 @@ } else if (rw == READA) { rw = READ; } else if (rw != READ) { - printk(KERN_ERR "loop: unknown command (%d)\n", rw); + printk(KERN_ERR "loop: unknown command (%x)\n", rw); goto err; } -#if CONFIG_HIGHMEM - rbh = create_bounce(rw, rbh); -#endif + blk_queue_bounce(q, &rbh); /* * file backed, queue for loop_thread to handle */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - /* - * rbh locked at this point, noone else should clear - * the dirty flag - */ - if (rw == WRITE) - set_bit(BH_Dirty, &rbh->b_state); - loop_add_bh(lo, rbh); + loop_add_bio(lo, rbh); return 0; } @@ -502,15 +487,14 @@ * piggy old buffer on original, and submit for I/O */ bh = loop_get_buffer(lo, rbh); - IV = loop_get_iv(lo, rbh->b_rsector); + IV = loop_get_iv(lo, rbh->bi_sector); if (rw == WRITE) { - set_bit(BH_Dirty, &bh->b_state); - if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data, - bh->b_size, IV)) + if (lo_do_transfer(lo, WRITE, bio_data(bh), bio_data(rbh), + bio_size(bh), IV)) goto err; } - generic_make_request(rw, bh); + generic_make_request(bh); return 0; err: @@ -518,14 +502,14 @@ up(&lo->lo_bh_mutex); loop_put_buffer(bh); out: - buffer_IO_error(rbh); + bio_io_error(rbh); return 0; inactive: spin_unlock_irq(&lo->lo_lock); goto out; } -static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh) +static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) { int ret; @@ -533,19 +517,17 @@ * For block backed loop, we know this is a READ */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state); - - ret = do_bh_filebacked(lo, bh, rw); - bh->b_end_io(bh, !ret); + ret = do_bio_filebacked(lo, bio); + bio_endio(bio, !ret, bio_sectors(bio)); } else { - struct buffer_head *rbh = bh->b_private; - unsigned long IV = loop_get_iv(lo, rbh->b_rsector); + struct bio *rbh = bio->bi_private; + unsigned long IV = loop_get_iv(lo, rbh->bi_sector); - ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data, - bh->b_size, IV); + ret = lo_do_transfer(lo, READ, bio_data(bio), bio_data(rbh), + bio_size(bio), IV); - rbh->b_end_io(rbh, !ret); - loop_put_buffer(bh); + bio_endio(rbh, !ret, bio_sectors(bio)); + loop_put_buffer(bio); } } @@ -558,7 +540,7 @@ static int loop_thread(void *data) { struct loop_device *lo = data; - struct buffer_head *bh; + struct bio *bio; daemonize(); exit_files(current); @@ -592,12 +574,12 @@ if (!atomic_read(&lo->lo_pending)) break; - bh = loop_get_bh(lo); - if (!bh) { - printk("loop: missing bh\n"); + bio = loop_get_bio(lo); + if (!bio) { + printk("loop: missing bio\n"); continue; } - loop_handle_bh(lo, bh); + loop_handle_bio(lo, bio); /* * upped both for pending work and tear-down, lo_pending @@ -683,7 +665,7 @@ set_blocksize(dev, bs); - lo->lo_bh = lo->lo_bhtail = NULL; + lo->lo_bio = lo->lo_biotail = NULL; kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); down(&lo->lo_sem); @@ -873,7 +855,7 @@ err = -ENXIO; break; } - err = put_user((unsigned long)loop_sizes[lo->lo_number] << 1, (unsigned long *) arg); + err = put_user((unsigned long) loop_sizes[lo->lo_number] << 1, (unsigned long *) arg); break; case BLKGETSIZE64: if (lo->lo_state != Lo_bound) { @@ -1019,11 +1001,11 @@ loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_sizes) - goto out_sizes; + goto out_mem; loop_blksizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_blksizes) - goto out_blksizes; + goto out_mem; blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request); @@ -1047,9 +1029,8 @@ printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop); return 0; -out_sizes: +out_mem: kfree(loop_dev); -out_blksizes: kfree(loop_sizes); printk(KERN_ERR "loop: ran out of memory\n"); return -ENOMEM; diff -urN linux-2.5.1-pre1/drivers/block/nbd.c linux/drivers/block/nbd.c --- linux-2.5.1-pre1/drivers/block/nbd.c Fri Oct 26 15:39:02 2001 +++ linux/drivers/block/nbd.c Sat Dec 1 00:37:05 2001 @@ -165,14 +165,14 @@ FAIL("Sendmsg failed for control."); if (req->cmd == WRITE) { - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; DEBUG("data, "); do { - result = nbd_xmit(1, sock, bh->b_data, bh->b_size, bh->b_reqnext == NULL ? 0 : MSG_MORE); + result = nbd_xmit(1, sock, bio_data(bio), bio_size(bio), bio->bi_next == NULL ? 0 : MSG_MORE); if (result <= 0) FAIL("Send data failed."); - bh = bh->b_reqnext; - } while(bh); + bio = bio->bi_next; + } while(bio); } return; @@ -205,14 +205,14 @@ if (ntohl(reply.error)) FAIL("Other side returned error."); if (req->cmd == READ) { - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; DEBUG("data, "); do { - result = nbd_xmit(0, lo->sock, bh->b_data, bh->b_size, MSG_WAITALL); + result = nbd_xmit(0, lo->sock, bio_data(bio), bio_size(bio), MSG_WAITALL); if (result <= 0) HARDFAIL("Recv data failed."); - bh = bh->b_reqnext; - } while(bh); + bio = bio->bi_next; + } while(bio); } DEBUG("done.\n"); return req; @@ -250,7 +250,7 @@ goto out; } #endif - list_del(&req->queue); + blkdev_dequeue_request(req); up (&lo->queue_lock); nbd_end_request(req); @@ -285,7 +285,7 @@ } #endif req->errors++; - list_del(&req->queue); + blkdev_dequeue_request(req); up(&lo->queue_lock); nbd_end_request(req); @@ -333,22 +333,22 @@ #endif req->errors = 0; blkdev_dequeue_request(req); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); down (&lo->queue_lock); - list_add(&req->queue, &lo->queue_head); + list_add(&req->queuelist, &lo->queue_head); nbd_send_req(lo->sock, req); /* Why does this block? */ up (&lo->queue_lock); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); continue; error_out: req->errors++; blkdev_dequeue_request(req); - spin_unlock(&io_request_lock); + spin_unlock(&q->queue_lock); nbd_end_request(req); - spin_lock(&io_request_lock); + spin_lock(&q->queue_lock); } return; } @@ -501,7 +501,7 @@ #endif blksize_size[MAJOR_NR] = nbd_blksizes; blk_size[MAJOR_NR] = nbd_sizes; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request, "nbd"); blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); for (i = 0; i < MAX_NBD; i++) { nbd_dev[i].refcnt = 0; diff -urN linux-2.5.1-pre1/drivers/block/paride/pd.c linux/drivers/block/paride/pd.c --- linux-2.5.1-pre1/drivers/block/paride/pd.c Fri Nov 9 13:58:03 2001 +++ linux/drivers/block/paride/pd.c Sat Dec 1 00:37:05 2001 @@ -287,7 +287,6 @@ static struct hd_struct pd_hd[PD_DEVS]; static int pd_sizes[PD_DEVS]; static int pd_blocksizes[PD_DEVS]; -static int pd_maxsectors[PD_DEVS]; #define PD_NAMELEN 8 @@ -330,7 +329,6 @@ static int pd_cmd; /* current command READ/WRITE */ static int pd_unit; /* unit of current request */ static int pd_dev; /* minor of current request */ -static int pd_poffs; /* partition offset of current minor */ static char * pd_buf; /* buffer for request in progress */ static DECLARE_WAIT_QUEUE_HEAD(pd_wait_open); @@ -397,6 +395,7 @@ } q = BLK_DEFAULT_QUEUE(MAJOR_NR); blk_init_queue(q, DEVICE_REQUEST); + blk_queue_max_sectors(q, cluster); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read ahead */ pd_gendisk.major = major; @@ -406,9 +405,6 @@ for(i=0;ii_rdev)) return -EINVAL; - dev = MINOR(inode->i_rdev); + if (!inode || !inode->i_rdev) + return -EINVAL; unit = DEVICE_NR(inode->i_rdev); - if (dev >= PD_DEVS) return -EINVAL; - if (!PD.present) return -ENODEV; + if (!PD.present) + return -ENODEV; - switch (cmd) { + switch (cmd) { case CDROMEJECT: if (PD.access == 1) pd_eject(unit); return 0; - case HDIO_GETGEO: - if (!geo) return -EINVAL; - err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); - if (err) return err; + case HDIO_GETGEO: + if (!geo) return -EINVAL; + err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); + if (err) return err; if (PD.alt_geom) { - put_user(PD.capacity/(PD_LOG_HEADS*PD_LOG_SECTS), + put_user(PD.capacity/(PD_LOG_HEADS*PD_LOG_SECTS), (short *) &geo->cylinders); - put_user(PD_LOG_HEADS, (char *) &geo->heads); - put_user(PD_LOG_SECTS, (char *) &geo->sectors); + put_user(PD_LOG_HEADS, (char *) &geo->heads); + put_user(PD_LOG_SECTS, (char *) &geo->sectors); } else { - put_user(PD.cylinders, (short *) &geo->cylinders); - put_user(PD.heads, (char *) &geo->heads); - put_user(PD.sectors, (char *) &geo->sectors); + put_user(PD.cylinders, (short *) &geo->cylinders); + put_user(PD.heads, (char *) &geo->heads); + put_user(PD.sectors, (char *) &geo->sectors); } - put_user(pd_hd[dev].start_sect,(long *)&geo->start); - return 0; - case BLKRRPART: + put_user(get_start_sect(inode->i_rdev), (long *)&geo->start); + return 0; + case BLKRRPART: if (!capable(CAP_SYS_ADMIN)) return -EACCES; - return pd_revalidate(inode->i_rdev); + return pd_revalidate(inode->i_rdev); case BLKGETSIZE: case BLKGETSIZE64: case BLKROSET: @@ -488,9 +484,9 @@ case BLKFLSBUF: case BLKPG: return blk_ioctl(inode->i_rdev, cmd, arg); - default: - return -EINVAL; - } + default: + return -EINVAL; + } } static int pd_release (struct inode *inode, struct file *file) @@ -526,36 +522,32 @@ } static int pd_revalidate(kdev_t dev) +{ + int unit, res; + long flags; -{ int p, unit, minor; - long flags; - - unit = DEVICE_NR(dev); - if ((unit >= PD_UNITS) || (!PD.present)) return -ENODEV; - - save_flags(flags); - cli(); - if (PD.access > 1) { - restore_flags(flags); - return -EBUSY; - } - pd_valid = 0; - restore_flags(flags); + unit = DEVICE_NR(dev); + if ((unit >= PD_UNITS) || !PD.present) + return -ENODEV; - for (p=(PD_PARTNS-1);p>=0;p--) { - minor = p + unit*PD_PARTNS; - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - pd_hd[minor].start_sect = 0; - pd_hd[minor].nr_sects = 0; - } + save_flags(flags); + cli(); + if (PD.access > 1) { + restore_flags(flags); + return -EBUSY; + } + pd_valid = 0; + restore_flags(flags); - if (pd_identify(unit)) - grok_partitions(&pd_gendisk,unit,1<cmd; - pd_poffs = pd_hd[pd_dev].start_sect; - pd_block += pd_poffs; pd_buf = CURRENT->buffer; pd_retries = 0; @@ -902,7 +890,7 @@ (CURRENT->cmd != pd_cmd) || (MINOR(CURRENT->rq_dev) != pd_dev) || (CURRENT->rq_status == RQ_INACTIVE) || - (CURRENT->sector+pd_poffs != pd_block)) + (CURRENT->sector != pd_block)) printk("%s: OUCH: request list changed unexpectedly\n", PD.name); diff -urN linux-2.5.1-pre1/drivers/block/paride/pf.c linux/drivers/block/paride/pf.c --- linux-2.5.1-pre1/drivers/block/paride/pf.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/block/paride/pf.c Sat Dec 1 00:37:05 2001 @@ -458,7 +458,7 @@ if (PF.access == 1) { pf_eject(unit); return 0; - } + } case HDIO_GETGEO: if (!geo) return -EINVAL; err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); diff -urN linux-2.5.1-pre1/drivers/block/ps2esdi.c linux/drivers/block/ps2esdi.c --- linux-2.5.1-pre1/drivers/block/ps2esdi.c Fri Nov 9 14:01:21 2001 +++ linux/drivers/block/ps2esdi.c Sat Dec 1 00:37:05 2001 @@ -66,6 +66,7 @@ #define TYPE_0_CMD_BLK_LENGTH 2 #define TYPE_1_CMD_BLK_LENGTH 4 +#define PS2ESDI_LOCK (&((BLK_DEFAULT_QUEUE(MAJOR_NR))->queue_lock)) static void reset_ctrl(void); @@ -118,7 +119,6 @@ static char ps2esdi_valid[MAX_HD]; static int ps2esdi_sizes[MAX_HD << 6]; static int ps2esdi_blocksizes[MAX_HD << 6]; -static int ps2esdi_maxsect[MAX_HD << 6]; static int ps2esdi_drives; static struct hd_struct ps2esdi[MAX_HD << 6]; static u_short io_base; @@ -221,8 +221,7 @@ } void -cleanup_module(void) -{ +cleanup_module(void) { if(ps2esdi_slot) { mca_mark_as_unused(ps2esdi_slot); mca_set_adapter_procfn(ps2esdi_slot, NULL, NULL); @@ -231,8 +230,9 @@ free_dma(dma_arb_level); free_irq(PS2ESDI_IRQ, NULL); devfs_unregister_blkdev(MAJOR_NR, "ed"); - del_gendisk(&ps2esdi_gendisk); blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); + del_gendisk(&ps2esdi_gendisk); + blk_clear(MAJOR_NR); } #endif /* MODULE */ @@ -415,16 +415,13 @@ ps2esdi_gendisk.nr_real = ps2esdi_drives; - /* 128 was old default, maybe maxsect=255 is ok too? - Paul G. */ - for (i = 0; i < (MAX_HD << 6); i++) { - ps2esdi_maxsect[i] = 128; + for (i = 0; i < (MAX_HD << 6); i++) ps2esdi_blocksizes[i] = 1024; - } request_dma(dma_arb_level, "ed"); request_region(io_base, 4, "ed"); blksize_size[MAJOR_NR] = ps2esdi_blocksizes; - max_sectors[MAJOR_NR] = ps2esdi_maxsect; + blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 128); for (i = 0; i < ps2esdi_drives; i++) { register_disk(&ps2esdi_gendisk,MKDEV(MAJOR_NR,i<<6),1<<6, @@ -495,13 +492,9 @@ CURRENT->current_nr_sectors); #endif - - block = CURRENT->sector + ps2esdi[MINOR(CURRENT->rq_dev)].start_sect; - -#if 0 - printk("%s: blocknumber : %d\n", DEVICE_NAME, block); -#endif + block = CURRENT->sector; count = CURRENT->current_nr_sectors; + switch (CURRENT->cmd) { case READ: ps2esdi_readwrite(READ, CURRENT_DEV, block, count); @@ -958,10 +951,10 @@ break; } if(ending != -1) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(PS2ESDI_LOCK, flags); end_request(ending); do_ps2esdi_request(BLK_DEFAULT_QUEUE(MAJOR_NR)); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(PS2ESDI_LOCK, flags); } } /* handle interrupts */ @@ -1100,10 +1093,10 @@ put_user(ps2esdi_info[dev].head, (char *) &geometry->heads); put_user(ps2esdi_info[dev].sect, (char *) &geometry->sectors); put_user(ps2esdi_info[dev].cyl, (short *) &geometry->cylinders); - put_user(ps2esdi[MINOR(inode->i_rdev)].start_sect, + put_user(get_start_sect(inode->i_rdev), (long *) &geometry->start); - return (0); + return 0; } break; @@ -1132,8 +1125,7 @@ static int ps2esdi_reread_partitions(kdev_t dev) { int target = DEVICE_NR(dev); - int start = target << ps2esdi_gendisk.minor_shift; - int partition; + int res; cli(); ps2esdi_valid[target] = (access_count[target] != 1); @@ -1141,21 +1133,16 @@ if (ps2esdi_valid[target]) return (-EBUSY); - for (partition = ps2esdi_gendisk.max_p - 1; - partition >= 0; partition--) { - int minor = (start | partition); - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - ps2esdi_gendisk.part[minor].start_sect = 0; - ps2esdi_gendisk.part[minor].nr_sects = 0; - } - - grok_partitions(&ps2esdi_gendisk, target, 1<<6, - ps2esdi_info[target].head * ps2esdi_info[target].cyl * ps2esdi_info[target].sect); - + res = wipe_partitions(dev); + if (res == 0) + grok_partitions(dev, ps2esdi_info[target].head + * ps2esdi_info[target].cyl + * ps2esdi_info[target].sect); + ps2esdi_valid[target] = 1; wake_up(&ps2esdi_wait_open); - return (0); + return (res); } static void ps2esdi_reset_timer(unsigned long unused) diff -urN linux-2.5.1-pre1/drivers/block/rd.c linux/drivers/block/rd.c --- linux-2.5.1-pre1/drivers/block/rd.c Fri Nov 9 14:15:00 2001 +++ linux/drivers/block/rd.c Sat Dec 1 00:37:05 2001 @@ -98,7 +98,7 @@ static unsigned long rd_length[NUM_RAMDISKS]; /* Size of RAM disks in bytes */ static int rd_hardsec[NUM_RAMDISKS]; /* Size of real blocks in bytes */ static int rd_blocksizes[NUM_RAMDISKS]; /* Size of 1024 byte blocks :) */ -static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ +static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ static devfs_handle_t devfs_handle; static struct block_device *rd_bdev[NUM_RAMDISKS];/* Protected device data */ @@ -227,19 +227,18 @@ commit_write: ramdisk_commit_write, }; -static int rd_blkdev_pagecache_IO(int rw, struct buffer_head * sbh, int minor) +static int rd_blkdev_pagecache_IO(int rw, struct bio *sbh, int minor) { struct address_space * mapping; unsigned long index; int offset, size, err; err = -EIO; - err = 0; mapping = rd_bdev[minor]->bd_inode->i_mapping; - index = sbh->b_rsector >> (PAGE_CACHE_SHIFT - 9); - offset = (sbh->b_rsector << 9) & ~PAGE_CACHE_MASK; - size = sbh->b_size; + index = sbh->bi_sector >> (PAGE_CACHE_SHIFT - 9); + offset = (sbh->bi_sector << 9) & ~PAGE_CACHE_MASK; + size = bio_size(sbh); do { int count; @@ -276,18 +275,18 @@ if (rw == READ) { src = kmap(page); src += offset; - dst = bh_kmap(sbh); + dst = bio_kmap(sbh); } else { dst = kmap(page); dst += offset; - src = bh_kmap(sbh); + src = bio_kmap(sbh); } offset = 0; memcpy(dst, src, count); kunmap(page); - bh_kunmap(sbh); + bio_kunmap(sbh); if (rw == READ) { flush_dcache_page(page); @@ -311,19 +310,19 @@ * 19-JAN-1998 Richard Gooch Added devfs support * */ -static int rd_make_request(request_queue_t * q, int rw, struct buffer_head *sbh) +static int rd_make_request(request_queue_t * q, struct bio *sbh) { unsigned int minor; unsigned long offset, len; + int rw = sbh->bi_rw; - minor = MINOR(sbh->b_rdev); + minor = MINOR(sbh->bi_dev); if (minor >= NUM_RAMDISKS) goto fail; - - offset = sbh->b_rsector << 9; - len = sbh->b_size; + offset = sbh->bi_sector << 9; + len = bio_size(sbh); if ((offset + len) > rd_length[minor]) goto fail; @@ -338,10 +337,11 @@ if (rd_blkdev_pagecache_IO(rw, sbh, minor)) goto fail; - sbh->b_end_io(sbh,1); + set_bit(BIO_UPTODATE, &sbh->bi_flags); + sbh->bi_end_io(sbh, len >> 9); return 0; fail: - sbh->b_end_io(sbh,0); + bio_io_error(sbh); return 0; } @@ -477,9 +477,7 @@ devfs_unregister (devfs_handle); unregister_blkdev( MAJOR_NR, "ramdisk" ); - hardsect_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; + blk_clear(MAJOR_NR); } #endif @@ -524,7 +522,6 @@ register_disk(NULL, MKDEV(MAJOR_NR,INITRD_MINOR), 1, &rd_bd_op, rd_size<<1); #endif - hardsect_size[MAJOR_NR] = rd_hardsec; /* Size of the RAM disk blocks */ blksize_size[MAJOR_NR] = rd_blocksizes; /* Avoid set_blocksize() check */ blk_size[MAJOR_NR] = rd_kbsize; /* Size of the RAM disk in kB */ diff -urN linux-2.5.1-pre1/drivers/block/xd.c linux/drivers/block/xd.c --- linux-2.5.1-pre1/drivers/block/xd.c Fri Nov 9 14:01:21 2001 +++ linux/drivers/block/xd.c Sat Dec 1 00:37:05 2001 @@ -257,7 +257,6 @@ } xd_gendisk.nr_real = xd_drives; - } /* xd_open: open a device */ @@ -292,7 +291,7 @@ if (CURRENT_DEV < xd_drives && CURRENT->sector + CURRENT->nr_sectors <= xd_struct[MINOR(CURRENT->rq_dev)].nr_sects) { - block = CURRENT->sector + xd_struct[MINOR(CURRENT->rq_dev)].start_sect; + block = CURRENT->sector; count = CURRENT->nr_sectors; switch (CURRENT->cmd) { @@ -329,7 +328,7 @@ g.heads = xd_info[dev].heads; g.sectors = xd_info[dev].sectors; g.cylinders = xd_info[dev].cylinders; - g.start = xd_struct[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user(geometry, &g, sizeof g) ? -EFAULT : 0; } case HDIO_SET_DMA: @@ -337,7 +336,8 @@ if (xdc_busy) return -EBUSY; nodma = !arg; if (nodma && xd_dma_buffer) { - xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200); + xd_dma_mem_free((unsigned long)xd_dma_buffer, + xd_maxsectors * 0x200); xd_dma_buffer = 0; } return 0; @@ -378,11 +378,9 @@ static int xd_reread_partitions(kdev_t dev) { int target; - int start; - int partition; + int res; target = DEVICE_NR(dev); - start = target << xd_gendisk.minor_shift; cli(); xd_valid[target] = (xd_access[target] != 1); @@ -390,20 +388,16 @@ if (xd_valid[target]) return -EBUSY; - for (partition = xd_gendisk.max_p - 1; partition >= 0; partition--) { - int minor = (start | partition); - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - xd_gendisk.part[minor].start_sect = 0; - xd_gendisk.part[minor].nr_sects = 0; - }; - - grok_partitions(&xd_gendisk, target, 1<<6, - xd_info[target].heads * xd_info[target].cylinders * xd_info[target].sectors); + res = wipe_partitions(dev); + if (!res) + grok_partitions(dev, xd_info[target].heads + * xd_info[target].cylinders + * xd_info[target].sectors); xd_valid[target] = 1; wake_up(&xd_wait_open); - return 0; + return res; } /* xd_readwrite: handle a read/write request */ @@ -1105,12 +1099,9 @@ static void xd_done (void) { - blksize_size[MAJOR_NR] = NULL; blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); - blk_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - read_ahead[MAJOR_NR] = 0; del_gendisk(&xd_gendisk); + blk_clear(MAJOR_NR); release_region(xd_iobase,4); } diff -urN linux-2.5.1-pre1/drivers/cdrom/cdrom.c linux/drivers/cdrom/cdrom.c --- linux-2.5.1-pre1/drivers/cdrom/cdrom.c Fri Nov 16 10:14:08 2001 +++ linux/drivers/cdrom/cdrom.c Sat Dec 1 00:37:05 2001 @@ -1987,7 +1987,7 @@ return -EINVAL; /* FIXME: we need upper bound checking, too!! */ - if (lba < 0 || ra.nframes <= 0) + if (lba < 0 || ra.nframes <= 0 || ra.nframes > 64) return -EINVAL; /* diff -urN linux-2.5.1-pre1/drivers/cdrom/cdu31a.c linux/drivers/cdrom/cdu31a.c --- linux-2.5.1-pre1/drivers/cdrom/cdu31a.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/cdrom/cdu31a.c Sat Dec 1 00:37:05 2001 @@ -1583,7 +1583,10 @@ /* Make sure we have a valid TOC. */ sony_get_toc(); - spin_unlock_irq(&io_request_lock); + /* + * jens: driver has lots of races + */ + spin_unlock_irq(&q->queue_lock); /* Make sure the timer is cancelled. */ del_timer(&cdu31a_abort_timer); @@ -1730,7 +1733,7 @@ } end_do_cdu31a_request: - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); #if 0 /* After finished, cancel any pending operations. */ abort_read(); diff -urN linux-2.5.1-pre1/drivers/cdrom/cm206.c linux/drivers/cdrom/cm206.c --- linux-2.5.1-pre1/drivers/cdrom/cm206.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/cdrom/cm206.c Sat Dec 1 00:37:05 2001 @@ -866,7 +866,7 @@ end_request(0); continue; } - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); error = 0; for (i = 0; i < CURRENT->nr_sectors; i++) { int e1, e2; @@ -893,7 +893,7 @@ debug(("cm206_request: %d %d\n", e1, e2)); } } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); end_request(!error); } } diff -urN linux-2.5.1-pre1/drivers/cdrom/sbpcd.c linux/drivers/cdrom/sbpcd.c --- linux-2.5.1-pre1/drivers/cdrom/sbpcd.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/cdrom/sbpcd.c Sat Dec 1 00:37:05 2001 @@ -4930,7 +4930,7 @@ sbpcd_end_request(req, 0); if (req -> sector == -1) sbpcd_end_request(req, 0); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); down(&ioctl_read_sem); if (req->cmd != READ) @@ -4970,7 +4970,7 @@ xnr, req, req->sector, req->nr_sectors, jiffies); #endif up(&ioctl_read_sem); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 1); goto request_loop; } @@ -5011,7 +5011,7 @@ xnr, req, req->sector, req->nr_sectors, jiffies); #endif up(&ioctl_read_sem); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 1); goto request_loop; } @@ -5027,7 +5027,7 @@ #endif up(&ioctl_read_sem); sbp_sleep(0); /* wait a bit, try again */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 0); goto request_loop; } diff -urN linux-2.5.1-pre1/drivers/char/joystick/ns558.c linux/drivers/char/joystick/ns558.c --- linux-2.5.1-pre1/drivers/char/joystick/ns558.c Wed Sep 12 15:34:06 2001 +++ linux/drivers/char/joystick/ns558.c Sat Dec 1 00:37:05 2001 @@ -153,11 +153,7 @@ return port; } -#if defined(CONFIG_ISAPNP) || (defined(CONFIG_ISAPNP_MODULE) && defined(MODULE)) -#define NSS558_ISAPNP -#endif - -#ifdef NSS558_ISAPNP +#ifdef __ISAPNP__ static struct isapnp_device_id pnp_devids[] = { { ISAPNP_ANY_ID, ISAPNP_ANY_ID, ISAPNP_VENDOR('@','P','@'), ISAPNP_DEVICE(0x0001), 0 }, @@ -229,7 +225,7 @@ int __init ns558_init(void) { int i = 0; -#ifdef NSS558_ISAPNP +#ifdef __ISAPNP__ struct isapnp_device_id *devid; struct pci_dev *dev = NULL; #endif @@ -245,7 +241,7 @@ * Probe for PnP ports. */ -#ifdef NSS558_ISAPNP +#ifdef __ISAPNP__ for (devid = pnp_devids; devid->vendor; devid++) { while ((dev = isapnp_find_dev(NULL, devid->vendor, devid->function, dev))) { ns558 = ns558_pnp_probe(dev, ns558); @@ -264,7 +260,7 @@ gameport_unregister_port(&port->gameport); switch (port->type) { -#ifdef NSS558_ISAPNP +#ifdef __ISAPNP__ case NS558_PNP: if (port->dev->deactivate) port->dev->deactivate(port->dev); diff -urN linux-2.5.1-pre1/drivers/char/raw.c linux/drivers/char/raw.c --- linux-2.5.1-pre1/drivers/char/raw.c Sat Sep 22 20:35:43 2001 +++ linux/drivers/char/raw.c Sat Dec 1 00:37:05 2001 @@ -126,10 +126,8 @@ if (is_mounted(rdev)) { if (blksize_size[MAJOR(rdev)]) sector_size = blksize_size[MAJOR(rdev)][MINOR(rdev)]; - } else { - if (hardsect_size[MAJOR(rdev)]) - sector_size = hardsect_size[MAJOR(rdev)][MINOR(rdev)]; - } + } else + sector_size = get_hardsect_size(rdev); set_blocksize(rdev, sector_size); raw_devices[minor].sector_size = sector_size; @@ -273,16 +271,14 @@ struct kiobuf * iobuf; int new_iobuf; int err = 0; - unsigned long blocknr, blocks; + unsigned long blocks; size_t transferred; int iosize; - int i; int minor; kdev_t dev; unsigned long limit; - int sector_size, sector_bits, sector_mask; - int max_sectors; + sector_t blocknr; /* * First, a few checks on device size limits @@ -307,7 +303,6 @@ sector_size = raw_devices[minor].sector_size; sector_bits = raw_devices[minor].sector_bits; sector_mask = sector_size- 1; - max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9); if (blk_size[MAJOR(dev)]) limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits; @@ -325,18 +320,10 @@ if ((*offp >> sector_bits) >= limit) goto out_free; - /* - * Split the IO into KIO_MAX_SECTORS chunks, mapping and - * unmapping the single kiobuf as we go to perform each chunk of - * IO. - */ - transferred = 0; blocknr = *offp >> sector_bits; while (size > 0) { blocks = size >> sector_bits; - if (blocks > max_sectors) - blocks = max_sectors; if (blocks > limit - blocknr) blocks = limit - blocknr; if (!blocks) @@ -348,10 +335,7 @@ if (err) break; - for (i=0; i < blocks; i++) - iobuf->blocks[i] = blocknr++; - - err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size); + err = brw_kiovec(rw, 1, &iobuf, dev, &blocknr, sector_size); if (rw == READ && err > 0) mark_dirty_kiobuf(iobuf, err); @@ -361,6 +345,8 @@ size -= err; buf += err; } + + blocknr += blocks; unmap_kiobuf(iobuf); diff -urN linux-2.5.1-pre1/drivers/char/serial.c linux/drivers/char/serial.c --- linux-2.5.1-pre1/drivers/char/serial.c Fri Nov 9 14:12:55 2001 +++ linux/drivers/char/serial.c Sat Dec 1 00:37:05 2001 @@ -122,7 +122,7 @@ #define ENABLE_SERIAL_ACPI #endif -#if defined(CONFIG_ISAPNP)|| (defined(CONFIG_ISAPNP_MODULE) && defined(MODULE)) +#ifdef __ISAPNP__ #ifndef ENABLE_SERIAL_PNP #define ENABLE_SERIAL_PNP #endif diff -urN linux-2.5.1-pre1/drivers/ide/aec62xx.c linux/drivers/ide/aec62xx.c --- linux-2.5.1-pre1/drivers/ide/aec62xx.c Tue Jun 20 07:52:36 2000 +++ linux/drivers/ide/aec62xx.c Sat Dec 1 00:37:05 2001 @@ -557,6 +557,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) hwif->dmaproc = &aec62xx_dmaproc; + hwif->highmem = 1; #else /* !CONFIG_BLK_DEV_IDEDMA */ hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; diff -urN linux-2.5.1-pre1/drivers/ide/amd74xx.c linux/drivers/ide/amd74xx.c --- linux-2.5.1-pre1/drivers/ide/amd74xx.c Mon Aug 13 14:56:19 2001 +++ linux/drivers/ide/amd74xx.c Sat Dec 1 00:37:05 2001 @@ -75,7 +75,8 @@ { unsigned int class_rev; - if (dev->device == PCI_DEVICE_ID_AMD_VIPER_7411) + if ((dev->device == PCI_DEVICE_ID_AMD_VIPER_7411) || + (dev->device == PCI_DEVICE_ID_AMD_VIPER_7441)) return 0; pci_read_config_dword(dev, PCI_CLASS_REVISION, &class_rev); @@ -122,8 +123,8 @@ pci_read_config_byte(dev, 0x4c, &pio_timing); #ifdef DEBUG - printk("%s: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x ", - drive->name, ultra_timing, dma_pio_timing, pio_timing); + printk("%s:%d: Speed 0x%02x UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x\n", + drive->name, drive->dn, speed, ultra_timing, dma_pio_timing, pio_timing); #endif ultra_timing &= ~0xC7; @@ -131,22 +132,19 @@ pio_timing &= ~(0x03 << drive->dn); #ifdef DEBUG - printk(":: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x ", - ultra_timing, dma_pio_timing, pio_timing); + printk("%s: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x\n", + drive->name, ultra_timing, dma_pio_timing, pio_timing); #endif switch(speed) { #ifdef CONFIG_BLK_DEV_IDEDMA + case XFER_UDMA_7: + case XFER_UDMA_6: + speed = XFER_UDMA_5; case XFER_UDMA_5: -#undef __CAN_MODE_5 -#ifdef __CAN_MODE_5 ultra_timing |= 0x46; dma_pio_timing |= 0x20; break; -#else - printk("%s: setting to mode 4, driver problems in mode 5.\n", drive->name); - speed = XFER_UDMA_4; -#endif /* __CAN_MODE_5 */ case XFER_UDMA_4: ultra_timing |= 0x45; dma_pio_timing |= 0x20; @@ -222,8 +220,8 @@ pci_write_config_byte(dev, 0x4c, pio_timing); #ifdef DEBUG - printk(":: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x\n", - ultra_timing, dma_pio_timing, pio_timing); + printk("%s: UDMA 0x%02x DMAPIO 0x%02x PIO 0x%02x\n", + drive->name, ultra_timing, dma_pio_timing, pio_timing); #endif #ifdef CONFIG_BLK_DEV_IDEDMA @@ -303,11 +301,12 @@ struct pci_dev *dev = hwif->pci_dev; struct hd_driveid *id = drive->id; byte udma_66 = eighty_ninty_three(drive); - byte udma_100 = (dev->device==PCI_DEVICE_ID_AMD_VIPER_7411) ? 1 : 0; + byte udma_100 = ((dev->device==PCI_DEVICE_ID_AMD_VIPER_7411)|| + (dev->device==PCI_DEVICE_ID_AMD_VIPER_7441)) ? 1 : 0; byte speed = 0x00; int rval; - if ((id->dma_ultra & 0x0020) && (udma_66)&& (udma_100)) { + if ((id->dma_ultra & 0x0020) && (udma_66) && (udma_100)) { speed = XFER_UDMA_5; } else if ((id->dma_ultra & 0x0010) && (udma_66)) { speed = XFER_UDMA_4; @@ -331,7 +330,7 @@ (void) amd74xx_tune_chipset(drive, speed); - rval = (int)( ((id->dma_ultra >> 11) & 3) ? ide_dma_on : + rval = (int)( ((id->dma_ultra >> 11) & 7) ? ide_dma_on : ((id->dma_ultra >> 8) & 7) ? ide_dma_on : ((id->dma_mword >> 8) & 7) ? ide_dma_on : ide_dma_off_quietly); @@ -352,7 +351,7 @@ } dma_func = ide_dma_off_quietly; if (id->field_valid & 4) { - if (id->dma_ultra & 0x002F) { + if (id->dma_ultra & 0x003F) { /* Force if Capable UltraDMA */ dma_func = config_chipset_for_dma(drive); if ((id->field_valid & 2) && diff -urN linux-2.5.1-pre1/drivers/ide/cmd64x.c linux/drivers/ide/cmd64x.c --- linux-2.5.1-pre1/drivers/ide/cmd64x.c Thu Jul 27 16:40:57 2000 +++ linux/drivers/ide/cmd64x.c Sat Dec 1 00:37:05 2001 @@ -795,5 +795,7 @@ default: break; } + + hwif->highmem = 1; #endif /* CONFIG_BLK_DEV_IDEDMA */ } diff -urN linux-2.5.1-pre1/drivers/ide/cs5530.c linux/drivers/ide/cs5530.c --- linux-2.5.1-pre1/drivers/ide/cs5530.c Tue Jan 2 16:58:45 2001 +++ linux/drivers/ide/cs5530.c Sat Dec 1 00:37:05 2001 @@ -352,9 +352,10 @@ unsigned int basereg, d0_timings; #ifdef CONFIG_BLK_DEV_IDEDMA - hwif->dmaproc = &cs5530_dmaproc; + hwif->dmaproc = &cs5530_dmaproc; + hwif->highmem = 1; #else - hwif->autodma = 0; + hwif->autodma = 0; #endif /* CONFIG_BLK_DEV_IDEDMA */ hwif->tuneproc = &cs5530_tuneproc; diff -urN linux-2.5.1-pre1/drivers/ide/cy82c693.c linux/drivers/ide/cy82c693.c --- linux-2.5.1-pre1/drivers/ide/cy82c693.c Sat May 19 17:43:06 2001 +++ linux/drivers/ide/cy82c693.c Sat Dec 1 00:37:05 2001 @@ -441,6 +441,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { + hwif->highmem = 1; hwif->dmaproc = &cy82c693_dmaproc; if (!noautodma) hwif->autodma = 1; diff -urN linux-2.5.1-pre1/drivers/ide/hd.c linux/drivers/ide/hd.c --- linux-2.5.1-pre1/drivers/ide/hd.c Mon Oct 15 13:27:42 2001 +++ linux/drivers/ide/hd.c Sat Dec 1 00:37:05 2001 @@ -107,7 +107,6 @@ static int hd_sizes[MAX_HD<<6]; static int hd_blocksizes[MAX_HD<<6]; static int hd_hardsectsizes[MAX_HD<<6]; -static int hd_maxsect[MAX_HD<<6]; static struct timer_list device_timer; @@ -560,19 +559,18 @@ dev = MINOR(CURRENT->rq_dev); block = CURRENT->sector; nsect = CURRENT->nr_sectors; - if (dev >= (NR_HD<<6) || block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) { -#ifdef DEBUG - if (dev >= (NR_HD<<6)) + if (dev >= (NR_HD<<6) || (dev & 0x3f) || + block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) { + if (dev >= (NR_HD<<6) || (dev & 0x3f)) printk("hd: bad minor number: device=%s\n", kdevname(CURRENT->rq_dev)); else printk("hd%c: bad access: block=%d, count=%d\n", (MINOR(CURRENT->rq_dev)>>6)+'a', block, nsect); -#endif end_request(0); goto repeat; } - block += hd[dev].start_sect; + dev >>= 6; if (special_op[dev]) { if (do_special_op(dev)) @@ -634,22 +632,17 @@ g.heads = hd_info[dev].head; g.sectors = hd_info[dev].sect; g.cylinders = hd_info[dev].cyl; - g.start = hd[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: /* Return device size */ - return put_user(hd[MINOR(inode->i_rdev)].nr_sects, - (unsigned long *) arg); - case BLKGETSIZE64: - return put_user((u64)hd[MINOR(inode->i_rdev)].nr_sects << 9, - (u64 *) arg); - case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; return revalidate_hddisk(inode->i_rdev, 1); + case BLKGETSIZE: + case BLKGETSIZE64: case BLKROSET: case BLKROGET: case BLKRASET: @@ -733,11 +726,9 @@ for(drive=0; drive < (MAX_HD << 6); drive++) { hd_blocksizes[drive] = 1024; hd_hardsectsizes[drive] = 512; - hd_maxsect[drive]=255; } blksize_size[MAJOR_NR] = hd_blocksizes; hardsect_size[MAJOR_NR] = hd_hardsectsizes; - max_sectors[MAJOR_NR] = hd_maxsect; #ifdef __i386__ if (!NR_HD) { @@ -840,6 +831,7 @@ return -1; } blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 255); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ add_gendisk(&hd_gendisk); init_timer(&device_timer); @@ -868,9 +860,7 @@ { int target; struct gendisk * gdev; - int max_p; - int start; - int i; + int res; long flags; target = DEVICE_NR(dev); @@ -885,25 +875,20 @@ DEVICE_BUSY = 1; restore_flags(flags); - max_p = gdev->max_p; - start = target << gdev->minor_shift; - - for (i=max_p - 1; i >=0 ; i--) { - int minor = start + i; - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; - } + res = wipe_partitions(dev); + if (res) + goto leave; #ifdef MAYBE_REINIT MAYBE_REINIT; #endif - grok_partitions(gdev, target, 1<<6, CAPACITY); + grok_partitions(dev, CAPACITY); +leave: DEVICE_BUSY = 0; wake_up(&busy_wait); - return 0; + return res; } static int parse_hd_setup (char *line) { diff -urN linux-2.5.1-pre1/drivers/ide/hpt34x.c linux/drivers/ide/hpt34x.c --- linux-2.5.1-pre1/drivers/ide/hpt34x.c Sat May 19 17:43:06 2001 +++ linux/drivers/ide/hpt34x.c Sat Dec 1 00:37:05 2001 @@ -425,6 +425,7 @@ hwif->autodma = 0; hwif->dmaproc = &hpt34x_dmaproc; + hwif->highmem = 1; } else { hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; diff -urN linux-2.5.1-pre1/drivers/ide/hpt366.c linux/drivers/ide/hpt366.c --- linux-2.5.1-pre1/drivers/ide/hpt366.c Tue Aug 14 20:01:07 2001 +++ linux/drivers/ide/hpt366.c Sat Dec 1 00:37:05 2001 @@ -730,6 +730,7 @@ hwif->autodma = 1; else hwif->autodma = 0; + hwif->highmem = 1; } else { hwif->autodma = 0; hwif->drives[0].autotune = 1; diff -urN linux-2.5.1-pre1/drivers/ide/ide-cd.c linux/drivers/ide/ide-cd.c --- linux-2.5.1-pre1/drivers/ide/ide-cd.c Wed Oct 24 23:53:51 2001 +++ linux/drivers/ide/ide-cd.c Sat Dec 1 00:37:05 2001 @@ -891,7 +891,7 @@ int stat; int ireason, len, sectors_to_transfer, nskip; struct cdrom_info *info = drive->driver_data; - int i, dma = info->dma, dma_error = 0; + int dma = info->dma, dma_error = 0; ide_startstop_t startstop; struct request *rq = HWGROUP(drive)->rq; @@ -908,10 +908,7 @@ if (dma) { if (!dma_error) { - for (i = rq->nr_sectors; i > 0;) { - i -= rq->current_nr_sectors; - ide_end_request(1, HWGROUP(drive)); - } + __ide_end_request(HWGROUP(drive), 1, rq->nr_sectors); return ide_stopped; } else return ide_error (drive, "dma error", stat); @@ -926,7 +923,7 @@ /* If we're not done filling the current buffer, complain. Otherwise, complete the command normally. */ if (rq->current_nr_sectors > 0) { - printk ("%s: cdrom_read_intr: data underrun (%ld blocks)\n", + printk ("%s: cdrom_read_intr: data underrun (%u blocks)\n", drive->name, rq->current_nr_sectors); cdrom_end_request (0, drive); } else @@ -959,8 +956,7 @@ /* First, figure out if we need to bit-bucket any of the leading sectors. */ - nskip = MIN ((int)(rq->current_nr_sectors - (rq->bh->b_size >> SECTOR_BITS)), - sectors_to_transfer); + nskip = MIN(rq->current_nr_sectors - bio_sectors(rq->bio), sectors_to_transfer); while (nskip > 0) { /* We need to throw away a sector. */ @@ -1058,7 +1054,7 @@ represent the number of sectors to skip at the start of a transfer will fail. I think that this will never happen, but let's be paranoid and check. */ - if (rq->current_nr_sectors < (rq->bh->b_size >> SECTOR_BITS) && + if (rq->current_nr_sectors < bio_sectors(rq->bio) && (rq->sector % SECTORS_PER_FRAME) != 0) { printk ("%s: cdrom_read_from_buffer: buffer botch (%ld)\n", drive->name, rq->sector); @@ -1097,9 +1093,9 @@ nskip = (sector % SECTORS_PER_FRAME); if (nskip > 0) { /* Sanity check... */ - if (rq->current_nr_sectors != (rq->bh->b_size >> SECTOR_BITS) && + if (rq->current_nr_sectors != bio_sectors(rq->bio) && (rq->sector % CD_FRAMESIZE != 0)) { - printk ("%s: cdrom_start_read_continuation: buffer botch (%lu)\n", + printk ("%s: cdrom_start_read_continuation: buffer botch (%u)\n", drive->name, rq->current_nr_sectors); cdrom_end_request (0, drive); return ide_stopped; @@ -1192,66 +1188,17 @@ return cdrom_start_packet_command (drive, 0, cdrom_start_seek_continuation); } -static inline int cdrom_merge_requests(struct request *rq, struct request *nxt) -{ - int ret = 1; - - /* - * partitions not really working, but better check anyway... - */ - if (rq->cmd == nxt->cmd && rq->rq_dev == nxt->rq_dev) { - rq->nr_sectors += nxt->nr_sectors; - rq->hard_nr_sectors += nxt->nr_sectors; - rq->bhtail->b_reqnext = nxt->bh; - rq->bhtail = nxt->bhtail; - list_del(&nxt->queue); - blkdev_release_request(nxt); - ret = 0; - } - - return ret; -} - -/* - * the current request will always be the first one on the list - */ -static void cdrom_attempt_remerge(ide_drive_t *drive, struct request *rq) -{ - struct list_head *entry; - struct request *nxt; - unsigned long flags; - - spin_lock_irqsave(&io_request_lock, flags); - - while (1) { - entry = rq->queue.next; - if (entry == &drive->queue.queue_head) - break; - - nxt = blkdev_entry_to_request(entry); - if (rq->sector + rq->nr_sectors != nxt->sector) - break; - else if (rq->nr_sectors + nxt->nr_sectors > SECTORS_MAX) - break; - - if (cdrom_merge_requests(rq, nxt)) - break; - } - - spin_unlock_irqrestore(&io_request_lock, flags); -} - /* Fix up a possibly partially-processed request so that we can - start it over entirely, or even put it back on the request queue. */ + start it over entirely */ static void restore_request (struct request *rq) { - if (rq->buffer != rq->bh->b_data) { - int n = (rq->buffer - rq->bh->b_data) / SECTOR_SIZE; - rq->buffer = rq->bh->b_data; + if (rq->buffer != bio_data(rq->bio)) { + int n = (rq->buffer - (char *) bio_data(rq->bio)) / SECTOR_SIZE; + rq->buffer = bio_data(rq->bio); rq->nr_sectors += n; rq->sector -= n; } - rq->current_nr_sectors = rq->bh->b_size >> SECTOR_BITS; + rq->hard_cur_sectors = rq->current_nr_sectors = bio_sectors(rq->bio); rq->hard_nr_sectors = rq->nr_sectors; rq->hard_sector = rq->sector; } @@ -1281,7 +1228,7 @@ if (cdrom_read_from_buffer(drive)) return ide_stopped; - cdrom_attempt_remerge(drive, rq); + blk_attempt_remerge(&drive->queue, rq); /* Clear the local sector buffer. */ info->nsectors_buffered = 0; @@ -1532,7 +1479,7 @@ { int stat, ireason, len, sectors_to_transfer, uptodate; struct cdrom_info *info = drive->driver_data; - int i, dma_error = 0, dma = info->dma; + int dma_error = 0, dma = info->dma; ide_startstop_t startstop; struct request *rq = HWGROUP(drive)->rq; @@ -1559,10 +1506,7 @@ return ide_error(drive, "dma error", stat); rq = HWGROUP(drive)->rq; - for (i = rq->nr_sectors; i > 0;) { - i -= rq->current_nr_sectors; - ide_end_request(1, HWGROUP(drive)); - } + __ide_end_request(HWGROUP(drive), 1, rq->nr_sectors); return ide_stopped; } @@ -1577,7 +1521,7 @@ */ uptodate = 1; if (rq->current_nr_sectors > 0) { - printk("%s: write_intr: data underrun (%ld blocks)\n", + printk("%s: write_intr: data underrun (%u blocks)\n", drive->name, rq->current_nr_sectors); uptodate = 0; } @@ -1674,7 +1618,7 @@ * remerge requests, often the plugging will not have had time * to do this properly */ - cdrom_attempt_remerge(drive, rq); + blk_attempt_remerge(&drive->queue, rq); info->nsectors_buffered = 0; @@ -2202,7 +2146,9 @@ pc.quiet = cgc->quiet; pc.timeout = cgc->timeout; pc.sense = cgc->sense; - return cgc->stat = cdrom_queue_packet_command(drive, &pc); + cgc->stat = cdrom_queue_packet_command(drive, &pc); + cgc->buflen -= pc.buflen; + return cgc->stat; } static @@ -2711,7 +2657,6 @@ ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); ide_add_setting(drive, "dsc_overlap", SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->dsc_overlap, NULL); } @@ -2875,7 +2820,7 @@ MOD_INC_USE_COUNT; if (info->buffer == NULL) info->buffer = (char *) kmalloc(SECTOR_BUFFER_SIZE, GFP_KERNEL); - if ((info->buffer == NULL) || (rc = cdrom_open(ip, fp))) { + if ((info->buffer == NULL) || (rc = cdrom_open(ip, fp))) { drive->usage--; MOD_DEC_USE_COUNT; } diff -urN linux-2.5.1-pre1/drivers/ide/ide-cd.h linux/drivers/ide/ide-cd.h --- linux-2.5.1-pre1/drivers/ide/ide-cd.h Thu Nov 22 11:46:58 2001 +++ linux/drivers/ide/ide-cd.h Sat Dec 1 00:37:05 2001 @@ -435,7 +435,7 @@ byte curlba[3]; byte nslots; - __u8 short slot_tablelen; + __u16 short slot_tablelen; }; diff -urN linux-2.5.1-pre1/drivers/ide/ide-cs.c linux/drivers/ide/ide-cs.c --- linux-2.5.1-pre1/drivers/ide/ide-cs.c Sun Sep 30 12:26:05 2001 +++ linux/drivers/ide/ide-cs.c Sat Dec 1 00:37:05 2001 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -226,6 +227,16 @@ #define CFG_CHECK(fn, args...) \ if (CardServices(fn, args) != 0) goto next_entry +int idecs_register (int io_base, int ctl_base, int irq) +{ + hw_regs_t hw; + ide_init_hwif_ports(&hw, (ide_ioreg_t) io_base, (ide_ioreg_t) ctl_base, NULL); + hw.irq = irq; + hw.chipset = ide_pci; // this enables IRQ sharing w/ PCI irqs + return ide_register_hw(&hw, NULL); +} + + void ide_config(dev_link_t *link) { client_handle_t handle = link->handle; @@ -327,12 +338,16 @@ if (link->io.NumPorts2) release_region(link->io.BasePort2, link->io.NumPorts2); + /* disable drive interrupts during IDE probe */ + if(ctl_base) + outb(0x02, ctl_base); + /* retry registration in case device is still spinning up */ for (i = 0; i < 10; i++) { - hd = ide_register(io_base, ctl_base, link->irq.AssignedIRQ); + hd = idecs_register(io_base, ctl_base, link->irq.AssignedIRQ); if (hd >= 0) break; if (link->io.NumPorts1 == 0x20) { - hd = ide_register(io_base+0x10, ctl_base+0x10, + hd = idecs_register(io_base+0x10, ctl_base+0x10, link->irq.AssignedIRQ); if (hd >= 0) { io_base += 0x10; ctl_base += 0x10; diff -urN linux-2.5.1-pre1/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c --- linux-2.5.1-pre1/drivers/ide/ide-disk.c Tue Nov 20 21:35:28 2001 +++ linux/drivers/ide/ide-disk.c Sat Dec 1 00:37:05 2001 @@ -27,6 +27,7 @@ * Version 1.09 added increment of rq->sector in ide_multwrite * added UDMA 3/4 reporting * Version 1.10 request queue changes, Ultra DMA 100 + * Version 1.11 Highmem I/O support, Jens Axboe */ #define IDEDISK_VERSION "1.10" @@ -139,7 +140,9 @@ byte stat; int i; unsigned int msect, nsect; + unsigned long flags; struct request *rq; + char *to; /* new way for dealing with premature shared PCI interrupts */ if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { @@ -150,8 +153,8 @@ ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); return ide_started; } + msect = drive->mult_count; - read_next: rq = HWGROUP(drive)->rq; if (msect) { @@ -160,14 +163,15 @@ msect -= nsect; } else nsect = 1; - idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); + to = ide_map_buffer(rq, &flags); + idedisk_input_data(drive, to, nsect * SECTOR_WORDS); #ifdef DEBUG printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n", drive->name, rq->sector, rq->sector+nsect-1, (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); #endif + ide_unmap_buffer(to, &flags); rq->sector += nsect; - rq->buffer += nsect<<9; rq->errors = 0; i = (rq->nr_sectors -= nsect); if (((long)(rq->current_nr_sectors -= nsect)) <= 0) @@ -201,14 +205,16 @@ #endif if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) { rq->sector++; - rq->buffer += 512; rq->errors = 0; i = --rq->nr_sectors; --rq->current_nr_sectors; if (((long)rq->current_nr_sectors) <= 0) ide_end_request(1, hwgroup); if (i > 0) { - idedisk_output_data (drive, rq->buffer, SECTOR_WORDS); + unsigned long flags; + char *to = ide_map_buffer(rq, &flags); + idedisk_output_data (drive, to, SECTOR_WORDS); + ide_unmap_buffer(to, &flags); ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); return ide_started; } @@ -238,28 +244,28 @@ do { char *buffer; int nsect = rq->current_nr_sectors; - + unsigned long flags; + if (nsect > mcount) nsect = mcount; mcount -= nsect; - buffer = rq->buffer; + buffer = ide_map_buffer(rq, &flags); rq->sector += nsect; - rq->buffer += nsect << 9; rq->nr_sectors -= nsect; rq->current_nr_sectors -= nsect; /* Do we move to the next bh after this? */ if (!rq->current_nr_sectors) { - struct buffer_head *bh = rq->bh->b_reqnext; + struct bio *bio = rq->bio->bi_next; /* end early early we ran out of requests */ - if (!bh) { + if (!bio) { mcount = 0; } else { - rq->bh = bh; - rq->current_nr_sectors = bh->b_size >> 9; - rq->buffer = bh->b_data; + rq->bio = bio; + rq->current_nr_sectors = bio_sectors(bio); + rq->hard_cur_sectors = rq->current_nr_sectors; } } @@ -268,6 +274,7 @@ * re-entering us on the last transfer. */ idedisk_output_data(drive, buffer, nsect<<7); + ide_unmap_buffer(buffer, &flags); } while (mcount); return 0; @@ -279,7 +286,6 @@ static ide_startstop_t multwrite_intr (ide_drive_t *drive) { byte stat; - int i; ide_hwgroup_t *hwgroup = HWGROUP(drive); struct request *rq = &hwgroup->wrq; @@ -302,10 +308,8 @@ */ if (!rq->nr_sectors) { /* all done? */ rq = hwgroup->rq; - for (i = rq->nr_sectors; i > 0;){ - i -= rq->current_nr_sectors; - ide_end_request(1, hwgroup); - } + + __ide_end_request(hwgroup, 1, rq->nr_sectors); return ide_stopped; } } @@ -367,6 +371,8 @@ */ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) { + unsigned long flags; + if (IDE_CONTROL_REG) OUT_BYTE(drive->ctl,IDE_CONTROL_REG); OUT_BYTE(0x00, IDE_FEATURE_REG); @@ -444,16 +450,17 @@ hwgroup->wrq = *rq; /* scratchpad */ ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL); if (ide_multwrite(drive, drive->mult_count)) { - unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return ide_stopped; } } else { + char *buffer = ide_map_buffer(rq, &flags); ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); - idedisk_output_data(drive, rq->buffer, SECTOR_WORDS); + idedisk_output_data(drive, buffer, SECTOR_WORDS); + ide_unmap_buffer(buffer, &flags); } return ide_started; } @@ -482,7 +489,8 @@ { if (drive->removable && !drive->usage) { invalidate_bdev(inode->i_bdev, 0); - if (drive->doorlocking && ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL)) + if (drive->doorlocking && + ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL)) drive->doorlocking = 0; } MOD_DEC_USE_COUNT; @@ -495,9 +503,7 @@ static void idedisk_revalidate (ide_drive_t *drive) { - grok_partitions(HWIF(drive)->gd, drive->select.b.unit, - 1<nowerr = arg; drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&ide_lock); return 0; } @@ -691,7 +697,6 @@ ide_add_setting(drive, "nowerr", SETTING_RW, HDIO_GET_NOWERR, HDIO_SET_NOWERR, TYPE_BYTE, 0, 1, 1, 1, &drive->nowerr, set_nowerr); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, 4096, PAGE_SIZE, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); ide_add_setting(drive, "lun", SETTING_RW, -1, -1, TYPE_INT, 0, 7, 1, 1, &drive->lun, NULL); ide_add_setting(drive, "failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->failures, NULL); ide_add_setting(drive, "max_failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->max_failures, NULL); diff -urN linux-2.5.1-pre1/drivers/ide/ide-dma.c linux/drivers/ide/ide-dma.c --- linux-2.5.1-pre1/drivers/ide/ide-dma.c Sun Sep 9 10:43:02 2001 +++ linux/drivers/ide/ide-dma.c Sat Dec 1 00:37:05 2001 @@ -203,30 +203,10 @@ #endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */ /* - * Our Physical Region Descriptor (PRD) table should be large enough - * to handle the biggest I/O request we are likely to see. Since requests - * can have no more than 256 sectors, and since the typical blocksize is - * two or more sectors, we could get by with a limit of 128 entries here for - * the usual worst case. Most requests seem to include some contiguous blocks, - * further reducing the number of table entries required. - * - * The driver reverts to PIO mode for individual requests that exceed - * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling - * 100% of all crazy scenarios here is not necessary. - * - * As it turns out though, we must allocate a full 4KB page for this, - * so the two PRD tables (ide0 & ide1) will each get half of that, - * allowing each to have about 256 entries (8 bytes each) from this. - */ -#define PRD_BYTES 8 -#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) - -/* * dma_intr() is the handler for disk read/write DMA interrupts */ ide_startstop_t ide_dma_intr (ide_drive_t *drive) { - int i; byte stat, dma_stat; dma_stat = HWIF(drive)->dmaproc(ide_dma_end, drive); @@ -234,11 +214,8 @@ if (OK_STAT(stat,DRIVE_READY,drive->bad_wstat|DRQ_STAT)) { if (!dma_stat) { struct request *rq = HWGROUP(drive)->rq; - rq = HWGROUP(drive)->rq; - for (i = rq->nr_sectors; i > 0;) { - i -= rq->current_nr_sectors; - ide_end_request(1, HWGROUP(drive)); - } + + __ide_end_request(HWGROUP(drive), 1, rq->nr_sectors); return ide_stopped; } printk("%s: dma_intr: bad DMA status (dma_stat=%x)\n", @@ -249,35 +226,18 @@ static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq) { - struct buffer_head *bh; struct scatterlist *sg = hwif->sg_table; - int nents = 0; + int nents; + + nents = blk_rq_map_sg(rq->q, rq, hwif->sg_table); + + if (nents > rq->nr_segments) + printk("ide-dma: received %d segments, build %d\n", rq->nr_segments, nents); - if (hwif->sg_dma_active) - BUG(); - if (rq->cmd == READ) hwif->sg_dma_direction = PCI_DMA_FROMDEVICE; else hwif->sg_dma_direction = PCI_DMA_TODEVICE; - bh = rq->bh; - do { - unsigned char *virt_addr = bh->b_data; - unsigned int size = bh->b_size; - - if (nents >= PRD_ENTRIES) - return 0; - - while ((bh = bh->b_reqnext) != NULL) { - if ((virt_addr + size) != (unsigned char *) bh->b_data) - break; - size += bh->b_size; - } - memset(&sg[nents], 0, sizeof(*sg)); - sg[nents].address = virt_addr; - sg[nents].length = size; - nents++; - } while (bh != NULL); return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); } @@ -289,9 +249,10 @@ */ int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func) { - unsigned int *table = HWIF(drive)->dmatable_cpu; + ide_hwif_t *hwif = HWIF(drive); + unsigned int *table = hwif->dmatable_cpu; #ifdef CONFIG_BLK_DEV_TRM290 - unsigned int is_trm290_chipset = (HWIF(drive)->chipset == ide_trm290); + unsigned int is_trm290_chipset = (hwif->chipset == ide_trm290); #else const int is_trm290_chipset = 0; #endif @@ -299,13 +260,12 @@ int i; struct scatterlist *sg; - HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq); - + hwif->sg_nents = i = ide_build_sglist(hwif, HWGROUP(drive)->rq); if (!i) return 0; - sg = HWIF(drive)->sg_table; - while (i && sg_dma_len(sg)) { + sg = hwif->sg_table; + while (i) { u32 cur_addr; u32 cur_len; @@ -319,55 +279,53 @@ */ while (cur_len) { + u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); + if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - goto use_pio_instead; - } else { - u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); - - if (bcount > cur_len) - bcount = cur_len; - *table++ = cpu_to_le32(cur_addr); - xcount = bcount & 0xffff; - if (is_trm290_chipset) - xcount = ((xcount >> 2) - 1) << 16; - if (xcount == 0x0000) { - /* - * Most chipsets correctly interpret a length of 0x0000 as 64KB, - * but at least one (e.g. CS5530) misinterprets it as zero (!). - * So here we break the 64KB entry into two 32KB entries instead. - */ - if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - goto use_pio_instead; - } - *table++ = cpu_to_le32(0x8000); - *table++ = cpu_to_le32(cur_addr + 0x8000); - xcount = 0x8000; + printk("ide-dma: req %p\n", HWGROUP(drive)->rq); + printk("count %d, sg_nents %d, cur_len %d, cur_addr %u\n", count, hwif->sg_nents, cur_len, cur_addr); + BUG(); + } + + if (bcount > cur_len) + bcount = cur_len; + *table++ = cpu_to_le32(cur_addr); + xcount = bcount & 0xffff; + if (is_trm290_chipset) + xcount = ((xcount >> 2) - 1) << 16; + if (xcount == 0x0000) { + /* + * Most chipsets correctly interpret a length of + * 0x0000 as 64KB, but at least one (e.g. CS5530) + * misinterprets it as zero (!). So here we break + * the 64KB entry into two 32KB entries instead. + */ + if (count++ >= PRD_ENTRIES) { + pci_unmap_sg(hwif->pci_dev, sg, + hwif->sg_nents, + hwif->sg_dma_direction); + return 0; } - *table++ = cpu_to_le32(xcount); - cur_addr += bcount; - cur_len -= bcount; + + *table++ = cpu_to_le32(0x8000); + *table++ = cpu_to_le32(cur_addr + 0x8000); + xcount = 0x8000; } + *table++ = cpu_to_le32(xcount); + cur_addr += bcount; + cur_len -= bcount; } sg++; i--; } - if (count) { - if (!is_trm290_chipset) - *--table |= cpu_to_le32(0x80000000); - return count; - } - printk("%s: empty DMA table?\n", drive->name); -use_pio_instead: - pci_unmap_sg(HWIF(drive)->pci_dev, - HWIF(drive)->sg_table, - HWIF(drive)->sg_nents, - HWIF(drive)->sg_dma_direction); - HWIF(drive)->sg_dma_active = 0; - return 0; /* revert to PIO for this request */ + if (!count) + printk("%s: empty DMA table?\n", drive->name); + else if (!is_trm290_chipset) + *--table |= cpu_to_le32(0x80000000); + + return count; } /* Teardown mappings after DMA has completed. */ @@ -378,7 +336,6 @@ int nents = HWIF(drive)->sg_nents; pci_unmap_sg(dev, sg, nents, HWIF(drive)->sg_dma_direction); - HWIF(drive)->sg_dma_active = 0; } /* @@ -532,6 +489,20 @@ } #endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ +static void ide_toggle_bounce(ide_drive_t *drive, int on) +{ + dma64_addr_t addr = BLK_BOUNCE_HIGH; + + if (on && drive->media == ide_disk && HWIF(drive)->highmem) { + if (!PCI_DMA_BUS_IS_PHYS) + addr = BLK_BOUNCE_ANY; + else + addr = HWIF(drive)->pci_dev->dma_mask; + } + + blk_queue_bounce_limit(&drive->queue, addr); +} + /* * ide_dmaproc() initiates/aborts DMA read/write operations on a drive. * @@ -550,19 +521,20 @@ */ int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive) { -// ide_hwgroup_t *hwgroup = HWGROUP(drive); - ide_hwif_t *hwif = HWIF(drive); - unsigned long dma_base = hwif->dma_base; - byte unit = (drive->select.b.unit & 0x01); - unsigned int count, reading = 0; + ide_hwif_t *hwif = HWIF(drive); + unsigned long dma_base = hwif->dma_base; + byte unit = (drive->select.b.unit & 0x01); + unsigned int count, reading = 0, set_high = 1; byte dma_stat; switch (func) { case ide_dma_off: printk("%s: DMA disabled\n", drive->name); + set_high = 0; case ide_dma_off_quietly: outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2); case ide_dma_on: + ide_toggle_bounce(drive, set_high); drive->using_dma = (func == ide_dma_on); if (drive->using_dma) outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2); diff -urN linux-2.5.1-pre1/drivers/ide/ide-floppy.c linux/drivers/ide/ide-floppy.c --- linux-2.5.1-pre1/drivers/ide/ide-floppy.c Thu Oct 11 09:14:32 2001 +++ linux/drivers/ide/ide-floppy.c Sat Dec 1 00:37:05 2001 @@ -707,24 +707,24 @@ static void idefloppy_input_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount) { struct request *rq = pc->rq; - struct buffer_head *bh = rq->bh; + struct bio *bio = rq->bio; int count; while (bcount) { - if (pc->b_count == bh->b_size) { + if (pc->b_count == bio_size(bio)) { rq->sector += rq->current_nr_sectors; rq->nr_sectors -= rq->current_nr_sectors; idefloppy_end_request (1, HWGROUP(drive)); - if ((bh = rq->bh) != NULL) + if ((bio = rq->bio) != NULL) pc->b_count = 0; } - if (bh == NULL) { - printk (KERN_ERR "%s: bh == NULL in idefloppy_input_buffers, bcount == %d\n", drive->name, bcount); + if (bio == NULL) { + printk (KERN_ERR "%s: bio == NULL in idefloppy_input_buffers, bcount == %d\n", drive->name, bcount); idefloppy_discard_data (drive, bcount); return; } - count = IDEFLOPPY_MIN (bh->b_size - pc->b_count, bcount); - atapi_input_bytes (drive, bh->b_data + pc->b_count, count); + count = IDEFLOPPY_MIN (bio_size(bio) - pc->b_count, bcount); + atapi_input_bytes (drive, bio_data(bio) + pc->b_count, count); bcount -= count; pc->b_count += count; } } @@ -732,7 +732,7 @@ static void idefloppy_output_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount) { struct request *rq = pc->rq; - struct buffer_head *bh = rq->bh; + struct bio *bio = rq->bio; int count; while (bcount) { @@ -740,13 +740,13 @@ rq->sector += rq->current_nr_sectors; rq->nr_sectors -= rq->current_nr_sectors; idefloppy_end_request (1, HWGROUP(drive)); - if ((bh = rq->bh) != NULL) { - pc->b_data = bh->b_data; - pc->b_count = bh->b_size; + if ((bio = rq->bio) != NULL) { + pc->b_data = bio_data(bio); + pc->b_count = bio_size(bio); } } - if (bh == NULL) { - printk (KERN_ERR "%s: bh == NULL in idefloppy_output_buffers, bcount == %d\n", drive->name, bcount); + if (bio == NULL) { + printk (KERN_ERR "%s: bio == NULL in idefloppy_output_buffers, bcount == %d\n", drive->name, bcount); idefloppy_write_zeros (drive, bcount); return; } @@ -760,9 +760,9 @@ static void idefloppy_update_buffers (ide_drive_t *drive, idefloppy_pc_t *pc) { struct request *rq = pc->rq; - struct buffer_head *bh = rq->bh; + struct bio *bio = rq->bio; - while ((bh = rq->bh) != NULL) + while ((bio = rq->bio) != NULL) idefloppy_end_request (1, HWGROUP(drive)); } #endif /* CONFIG_BLK_DEV_IDEDMA */ @@ -1210,7 +1210,7 @@ pc->callback = &idefloppy_rw_callback; pc->rq = rq; pc->b_data = rq->buffer; - pc->b_count = rq->cmd == READ ? 0 : rq->bh->b_size; + pc->b_count = rq->cmd == READ ? 0 : bio_size(rq->bio); if (rq->cmd == WRITE) set_bit (PC_WRITING, &pc->flags); pc->buffer = NULL; @@ -1778,9 +1778,7 @@ */ static void idefloppy_revalidate (ide_drive_t *drive) { - grok_partitions(HWIF(drive)->gd, drive->select.b.unit, - 1<bios_sect, NULL); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); } @@ -1930,8 +1927,7 @@ static void idefloppy_setup (ide_drive_t *drive, idefloppy_floppy_t *floppy) { struct idefloppy_id_gcw gcw; - int major = HWIF(drive)->major, i; - int minor = drive->select.b.unit << PARTN_BITS; + int i; *((unsigned short *) &gcw) = drive->id->config; drive->driver_data = floppy; @@ -1953,34 +1949,17 @@ */ if (strcmp(drive->id->model, "IOMEGA ZIP 100 ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; - } - /* - * Guess what? The IOMEGA Clik! drive also needs the - * above fix. It makes nasty clicking noises without - * it, so please don't remove this. - */ - if (strcmp(drive->id->model, "IOMEGA Clik! 40 CZ ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; - set_bit(IDEFLOPPY_CLIK_DRIVE, &floppy->flags); - } + blk_queue_max_sectors(&drive->queue, 64); /* * Guess what? The IOMEGA Clik! drive also needs the * above fix. It makes nasty clicking noises without * it, so please don't remove this. */ - if (strcmp(drive->id->model, "IOMEGA Clik! 40 CZ ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; + if (strcmp(drive->id->model, "IOMEGA Clik! 40 CZ ATAPI") == 0) { + blk_queue_max_sectors(&drive->queue, 64); set_bit(IDEFLOPPY_CLIK_DRIVE, &floppy->flags); } - (void) idefloppy_get_capacity (drive); idefloppy_add_settings(drive); diff -urN linux-2.5.1-pre1/drivers/ide/ide-pci.c linux/drivers/ide/ide-pci.c --- linux-2.5.1-pre1/drivers/ide/ide-pci.c Thu Oct 25 13:53:47 2001 +++ linux/drivers/ide/ide-pci.c Sat Dec 1 00:37:05 2001 @@ -79,6 +79,7 @@ #define DEVID_AMD7401 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_COBRA_7401}) #define DEVID_AMD7409 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7409}) #define DEVID_AMD7411 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7411}) +#define DEVID_AMD7441 ((ide_pci_devid_t){PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_7441}) #define DEVID_PDCADMA ((ide_pci_devid_t){PCI_VENDOR_ID_PDC, PCI_DEVICE_ID_PDC_1841}) #define DEVID_SLC90E66 ((ide_pci_devid_t){PCI_VENDOR_ID_EFAR, PCI_DEVICE_ID_EFAR_SLC90E66_1}) #define DEVID_OSB4 ((ide_pci_devid_t){PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4IDE}) @@ -437,6 +438,7 @@ {DEVID_AMD7401, "AMD7401", NULL, NULL, NULL, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, {DEVID_AMD7409, "AMD7409", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, {DEVID_AMD7411, "AMD7411", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, + {DEVID_AMD7441, "AMD7441", PCI_AMD74XX, ATA66_AMD74XX, INIT_AMD74XX, DMA_AMD74XX, {{0x40,0x01,0x01}, {0x40,0x02,0x02}}, ON_BOARD, 0 }, {DEVID_PDCADMA, "PDCADMA", PCI_PDCADMA, ATA66_PDCADMA, INIT_PDCADMA, DMA_PDCADMA, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, OFF_BOARD, 0 }, {DEVID_SLC90E66,"SLC90E66", PCI_SLC90E66, ATA66_SLC90E66, INIT_SLC90E66, NULL, {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, ON_BOARD, 0 }, {DEVID_OSB4, "ServerWorks OSB4", PCI_SVWKS, ATA66_SVWKS, INIT_SVWKS, NULL, {{0x00,0x00,0x00}, {0x00,0x00,0x00}}, ON_BOARD, 0 }, diff -urN linux-2.5.1-pre1/drivers/ide/ide-probe.c linux/drivers/ide/ide-probe.c --- linux-2.5.1-pre1/drivers/ide/ide-probe.c Thu Oct 11 09:14:32 2001 +++ linux/drivers/ide/ide-probe.c Sat Dec 1 00:37:05 2001 @@ -594,9 +594,21 @@ static void ide_init_queue(ide_drive_t *drive) { request_queue_t *q = &drive->queue; + int max_sectors; q->queuedata = HWGROUP(drive); - blk_init_queue(q, do_ide_request); + blk_init_queue(q, do_ide_request, drive->name); + + /* IDE can do up to 128K per request, pdc4030 needs smaller limit */ +#ifdef CONFIG_BLK_DEV_PDC4030 + max_sectors = 127; +#else + max_sectors = 255; +#endif + blk_queue_max_sectors(q, max_sectors); + + /* IDE DMA can do PRD_ENTRIES number of segments */ + q->max_segments = PRD_ENTRIES; } /* @@ -670,7 +682,7 @@ hwgroup->rq = NULL; hwgroup->handler = NULL; hwgroup->drive = NULL; - hwgroup->busy = 0; + hwgroup->flags = 0; init_timer(&hwgroup->timer); hwgroup->timer.function = &ide_timer_expiry; hwgroup->timer.data = (unsigned long) hwgroup; @@ -749,7 +761,7 @@ { struct gendisk *gd; unsigned int unit, units, minors; - int *bs, *max_sect, *max_ra; + int *bs, *max_ra; extern devfs_handle_t ide_devfs_handle; /* figure out maximum drive number on the interface */ @@ -762,23 +774,15 @@ gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); bs = kmalloc (minors*sizeof(int), GFP_KERNEL); - max_sect = kmalloc (minors*sizeof(int), GFP_KERNEL); max_ra = kmalloc (minors*sizeof(int), GFP_KERNEL); memset(gd->part, 0, minors * sizeof(struct hd_struct)); /* cdroms and msdos f/s are examples of non-1024 blocksizes */ blksize_size[hwif->major] = bs; - max_sectors[hwif->major] = max_sect; max_readahead[hwif->major] = max_ra; for (unit = 0; unit < minors; ++unit) { *bs++ = BLOCK_SIZE; -#ifdef CONFIG_BLK_DEV_PDC4030 - *max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 255); -#else - /* IDE can do up to 128K per request. */ - *max_sect++ = 255; -#endif *max_ra++ = MAX_READAHEAD; } @@ -870,13 +874,6 @@ read_ahead[hwif->major] = 8; /* (4kB) */ hwif->present = 1; /* success */ -#if (DEBUG_SPINLOCK > 0) -{ - static int done = 0; - if (!done++) - printk("io_request_lock is %p\n", &io_request_lock); /* FIXME */ -} -#endif return hwif->present; } diff -urN linux-2.5.1-pre1/drivers/ide/ide-proc.c linux/drivers/ide/ide-proc.c --- linux-2.5.1-pre1/drivers/ide/ide-proc.c Fri Sep 7 09:28:38 2001 +++ linux/drivers/ide/ide-proc.c Sat Dec 1 00:37:05 2001 @@ -190,7 +190,7 @@ if (hwif->mate && hwif->mate->hwgroup) mategroup = (ide_hwgroup_t *)(hwif->mate->hwgroup); cli(); /* all CPUs; ensure all writes are done together */ - while (mygroup->busy || (mategroup && mategroup->busy)) { + while (test_bit(IDE_BUSY, &mygroup->flags) || (mategroup && test_bit(IDE_BUSY, &mategroup->flags))) { sti(); /* all CPUs */ if (0 < (signed long)(jiffies - timeout)) { printk("/proc/ide/%s/config: channel(s) busy, cannot write\n", hwif->name); diff -urN linux-2.5.1-pre1/drivers/ide/ide-tape.c linux/drivers/ide/ide-tape.c --- linux-2.5.1-pre1/drivers/ide/ide-tape.c Mon Aug 13 14:56:19 2001 +++ linux/drivers/ide/ide-tape.c Sat Dec 1 00:37:05 2001 @@ -1887,8 +1887,7 @@ printk("ide-tape: %s: skipping over config parition..\n", tape->name); #endif tape->onstream_write_error = OS_PART_ERROR; - if (tape->waiting) - complete(tape->waiting); + complete(tape->waiting); } } remove_stage = 1; @@ -1904,8 +1903,7 @@ tape->nr_pending_stages++; tape->next_stage = tape->first_stage; rq->current_nr_sectors = rq->nr_sectors; - if (tape->waiting) - complete(tape->waiting); + complete(tape->waiting); } } } else if (rq->cmd == IDETAPE_READ_RQ) { diff -urN linux-2.5.1-pre1/drivers/ide/ide.c linux/drivers/ide/ide.c --- linux-2.5.1-pre1/drivers/ide/ide.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/ide/ide.c Sat Dec 1 00:37:05 2001 @@ -113,6 +113,8 @@ * Version 6.31 Debug Share INTR's and request queue streaming * Native ATA-100 support * Prep for Cascades Project + * Version 6.32 4GB highmem support for DMA, and mapping of those for + * PIO transfer (Jens Axboe) * * Some additional driver compile-time options are in ./include/linux/ide.h * @@ -121,8 +123,8 @@ * */ -#define REVISION "Revision: 6.31" -#define VERSION "Id: ide.c 6.31 2000/06/09" +#define REVISION "Revision: 6.32" +#define VERSION "Id: ide.c 6.32 2001/05/24" #undef REALLY_SLOW_IO /* most systems can safely undef this */ @@ -171,6 +173,14 @@ static int system_bus_speed; /* holds what we think is VESA/PCI bus speed */ static int initializing; /* set while initializing built-in drivers */ +/* + * protects global structures etc, we want to split this into per-hwgroup + * instead. + * + * anti-deadlock ordering: ide_lock -> DRIVE_LOCK + */ +spinlock_t ide_lock = SPIN_LOCK_UNLOCKED; + #ifdef CONFIG_BLK_DEV_IDEPCI static int ide_scan_direction; /* THIS was formerly 2.2.x pci=reverse */ #endif /* CONFIG_BLK_DEV_IDEPCI */ @@ -180,7 +190,7 @@ * ide_lock is used by the Atari code to obtain access to the IDE interrupt, * which is shared between several drivers. */ -static int ide_lock; +static int ide_intr_lock; #endif /* __mc68000__ || CONFIG_APUS */ int noautodma = 0; @@ -542,18 +552,26 @@ return 1; /* drive ready: *might* be interrupting */ } -/* - * This is our end_request replacement function. - */ -void ide_end_request (byte uptodate, ide_hwgroup_t *hwgroup) +inline int __ide_end_request(ide_hwgroup_t *hwgroup, int uptodate, int nr_secs) { + ide_drive_t *drive = hwgroup->drive; struct request *rq; unsigned long flags; - ide_drive_t *drive = hwgroup->drive; + int ret = 1; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); rq = hwgroup->rq; + if (rq->inactive) + BUG(); + + /* + * small hack to eliminate locking from ide_end_request to grab + * the first segment number of sectors + */ + if (!nr_secs) + nr_secs = rq->hard_cur_sectors; + /* * decide whether to reenable DMA -- 3 is a random magic for now, * if we DMA timeout more than 3 times, just stay in PIO @@ -563,13 +581,26 @@ hwgroup->hwif->dmaproc(ide_dma_on, drive); } - if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) { + if (!end_that_request_first(rq, uptodate, nr_secs)) { add_blkdev_randomness(MAJOR(rq->rq_dev)); + spin_lock(DRIVE_LOCK(drive)); blkdev_dequeue_request(rq); hwgroup->rq = NULL; end_that_request_last(rq); + spin_unlock(DRIVE_LOCK(drive)); + ret = 0; } - spin_unlock_irqrestore(&io_request_lock, flags); + + spin_unlock_irqrestore(&ide_lock, flags); + return ret; +} + +/* + * This is our end_request replacement function. + */ +int ide_end_request (byte uptodate, ide_hwgroup_t *hwgroup) +{ + return __ide_end_request(hwgroup, uptodate, 0); } /* @@ -585,7 +616,7 @@ unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); if (hwgroup->handler != NULL) { printk("%s: ide_set_handler: handler not null; old=%p, new=%p\n", drive->name, hwgroup->handler, handler); @@ -594,7 +625,7 @@ hwgroup->expiry = expiry; hwgroup->timer.expires = jiffies + timeout; add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -844,9 +875,8 @@ unsigned long flags; struct request *rq; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); rq = HWGROUP(drive)->rq; - spin_unlock_irqrestore(&io_request_lock, flags); if (rq->cmd == IDE_DRIVE_CMD) { byte *args = (byte *) rq->buffer; @@ -869,11 +899,16 @@ args[6] = IN_BYTE(IDE_SELECT_REG); } } - spin_lock_irqsave(&io_request_lock, flags); + + spin_lock(DRIVE_LOCK(drive)); + if (rq->inactive) + BUG(); blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; end_that_request_last(rq); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock(DRIVE_LOCK(drive)); + + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -1192,17 +1227,20 @@ /* * start_request() initiates handling of a new I/O request */ -static ide_startstop_t start_request (ide_drive_t *drive) +static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) { ide_startstop_t startstop; - unsigned long block, blockend; - struct request *rq = blkdev_entry_next_request(&drive->queue.queue_head); + unsigned long block; unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS; ide_hwif_t *hwif = HWIF(drive); + if (rq->inactive) + BUG(); + #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", hwif->name, (unsigned long) rq); #endif + /* bail early if we've exceeded max_failures */ if (drive->max_failures && (drive->failures > drive->max_failures)) { goto kill_rq; @@ -1219,16 +1257,11 @@ } #endif block = rq->sector; - blockend = block + rq->nr_sectors; + /* Strange disk manager remap */ if ((rq->cmd == READ || rq->cmd == WRITE) && (drive->media == ide_disk || drive->media == ide_floppy)) { - if ((blockend < block) || (blockend > drive->part[minor&PARTN_MASK].nr_sects)) { - printk("%s%c: bad access: block=%ld, count=%ld\n", drive->name, - (minor&PARTN_MASK)?'0'+(minor&PARTN_MASK):' ', block, rq->nr_sectors); - goto kill_rq; - } - block += drive->part[minor&PARTN_MASK].start_sect + drive->sect0; + block += drive->sect0; } /* Yecch - this will shift the entire interval, possibly killing some innocent following sector */ @@ -1240,7 +1273,8 @@ #endif SELECT_DRIVE(hwif, drive); - if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) { + if (ide_wait_stat(&startstop, drive, drive->ready_stat, + BUSY_STAT|DRQ_STAT, WAIT_READY)) { printk("%s: drive not ready for command\n", drive->name); return startstop; } @@ -1251,7 +1285,8 @@ if (drive->driver != NULL) { return (DRIVER(drive)->do_request(drive, rq, block)); } - printk("%s: media type %d not supported\n", drive->name, drive->media); + printk("%s: media type %d not supported\n", + drive->name, drive->media); goto kill_rq; } return do_special(drive); @@ -1267,13 +1302,15 @@ { ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned long flags; + struct request *rq; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + rq = hwgroup->rq; + spin_unlock_irqrestore(&ide_lock, flags); - return start_request(drive); + return start_request(drive, rq); } /* @@ -1305,7 +1342,7 @@ || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep))) || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive)))) { - if( !drive->queue.plugged ) + if (!blk_queue_plugged(&drive->queue)) best = drive; } } @@ -1334,7 +1371,7 @@ /* * Issue a new request to a drive from hwgroup - * Caller must have already done spin_lock_irqsave(&io_request_lock, ..); + * Caller must have already done spin_lock_irqsave(DRIVE_LOCK(drive), ...) * * A hwgroup is a serialized group of IDE interfaces. Usually there is * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640) @@ -1346,39 +1383,34 @@ * possibly along with many other devices. This is especially common in * PCI-based systems with off-board IDE controller cards. * - * The IDE driver uses the single global io_request_lock spinlock to protect - * access to the request queues, and to protect the hwgroup->busy flag. + * The IDE driver uses the queue spinlock to protect access to the request + * queues. * * The first thread into the driver for a particular hwgroup sets the - * hwgroup->busy flag to indicate that this hwgroup is now active, + * hwgroup->flags IDE_BUSY flag to indicate that this hwgroup is now active, * and then initiates processing of the top request from the request queue. * * Other threads attempting entry notice the busy setting, and will simply - * queue their new requests and exit immediately. Note that hwgroup->busy - * remains set even when the driver is merely awaiting the next interrupt. + * queue their new requests and exit immediately. Note that hwgroup->flags + * remains busy even when the driver is merely awaiting the next interrupt. * Thus, the meaning is "this hwgroup is busy processing a request". * * When processing of a request completes, the completing thread or IRQ-handler * will start the next request from the queue. If no more work remains, - * the driver will clear the hwgroup->busy flag and exit. - * - * The io_request_lock (spinlock) is used to protect all access to the - * hwgroup->busy flag, but is otherwise not needed for most processing in - * the driver. This makes the driver much more friendlier to shared IRQs - * than previous designs, while remaining 100% (?) SMP safe and capable. + * the driver will clear the hwgroup->flags IDE_BUSY flag and exit. */ static void ide_do_request(ide_hwgroup_t *hwgroup, int masked_irq) { ide_drive_t *drive; ide_hwif_t *hwif; ide_startstop_t startstop; + struct request *rq; - ide_get_lock(&ide_lock, ide_intr, hwgroup); /* for atari only: POSSIBLY BROKEN HERE(?) */ + ide_get_lock(&ide_intr_lock, ide_intr, hwgroup);/* for atari only: POSSIBLY BROKEN HERE(?) */ __cli(); /* necessary paranoia: ensure IRQs are masked on local CPU */ - while (!hwgroup->busy) { - hwgroup->busy = 1; + while (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) { drive = choose_drive(hwgroup); if (drive == NULL) { unsigned long sleep = 0; @@ -1401,13 +1433,13 @@ if (timer_pending(&hwgroup->timer)) printk("ide_set_handler: timer already active\n"); #endif - hwgroup->sleeping = 1; /* so that ide_timer_expiry knows what to do */ + set_bit(IDE_SLEEP, &hwgroup->flags); mod_timer(&hwgroup->timer, sleep); - /* we purposely leave hwgroup->busy==1 while sleeping */ + /* we purposely leave hwgroup busy while sleeping */ } else { /* Ugly, but how can we sleep for the lock otherwise? perhaps from tq_disk? */ - ide_release_lock(&ide_lock); /* for atari only */ - hwgroup->busy = 0; + ide_release_lock(&ide_intr_lock);/* for atari only */ + clear_bit(IDE_BUSY, &hwgroup->flags); } return; /* no more work for this hwgroup (for now) */ } @@ -1421,9 +1453,16 @@ drive->sleep = 0; drive->service_start = jiffies; - if ( drive->queue.plugged ) /* paranoia */ - printk("%s: Huh? nuking plugged queue\n", drive->name); - hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head); + if (blk_queue_plugged(&drive->queue)) + BUG(); + + /* + * just continuing an interrupted request maybe + */ + spin_lock(DRIVE_LOCK(drive)); + rq = hwgroup->rq = elv_next_request(&drive->queue); + spin_unlock(DRIVE_LOCK(drive)); + /* * Some systems have trouble with IDE IRQs arriving while * the driver is still setting things up. So, here we disable @@ -1434,14 +1473,14 @@ */ if (masked_irq && hwif->irq != masked_irq) disable_irq_nosync(hwif->irq); - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); ide__sti(); /* allow other IRQs while we start this request */ - startstop = start_request(drive); - spin_lock_irq(&io_request_lock); + startstop = start_request(drive, rq); + spin_lock_irq(&ide_lock); if (masked_irq && hwif->irq != masked_irq) enable_irq(hwif->irq); if (startstop == ide_stopped) - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); } } @@ -1460,7 +1499,19 @@ */ void do_ide_request(request_queue_t *q) { + unsigned long flags; + + /* + * release queue lock, grab IDE global lock and restore when + * we leave... + */ + spin_unlock(&q->queue_lock); + + spin_lock_irqsave(&ide_lock, flags); ide_do_request(q->queuedata, 0); + spin_unlock_irqrestore(&ide_lock, flags); + + spin_lock(&q->queue_lock); } /* @@ -1501,9 +1552,14 @@ HWGROUP(drive)->rq = NULL; rq->errors = 0; - rq->sector = rq->bh->b_rsector; - rq->current_nr_sectors = rq->bh->b_size >> 9; - rq->buffer = rq->bh->b_data; + rq->sector = rq->bio->bi_sector; + rq->current_nr_sectors = bio_sectors(rq->bio); + + /* + * just to make sure... + */ + if (rq->bio) + rq->buffer = NULL; } /* @@ -1519,7 +1575,11 @@ unsigned long flags; unsigned long wait; - spin_lock_irqsave(&io_request_lock, flags); + /* + * a global lock protects timers etc -- shouldn't get contention + * worth mentioning + */ + spin_lock_irqsave(&ide_lock, flags); del_timer(&hwgroup->timer); if ((handler = hwgroup->handler) == NULL) { @@ -1529,10 +1589,8 @@ * or we were "sleeping" to give other devices a chance. * Either way, we don't really want to complain about anything. */ - if (hwgroup->sleeping) { - hwgroup->sleeping = 0; - hwgroup->busy = 0; - } + if (test_and_clear_bit(IDE_SLEEP, &hwgroup->flags)) + clear_bit(IDE_BUSY, &hwgroup->flags); } else { ide_drive_t *drive = hwgroup->drive; if (!drive) { @@ -1541,17 +1599,16 @@ } else { ide_hwif_t *hwif; ide_startstop_t startstop; - if (!hwgroup->busy) { - hwgroup->busy = 1; /* paranoia */ - printk("%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name); - } + /* paranoia */ + if (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) + printk("%s: ide_timer_expiry: hwgroup was not busy??\n", drive->name); if ((expiry = hwgroup->expiry) != NULL) { /* continue */ if ((wait = expiry(drive)) != 0) { /* reset timer */ hwgroup->timer.expires = jiffies + wait; add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } } @@ -1561,7 +1618,7 @@ * the handler() function, which means we need to globally * mask the specific IRQ: */ - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); hwif = HWIF(drive); #if DISABLE_IRQ_NOSYNC disable_irq_nosync(hwif->irq); @@ -1587,13 +1644,13 @@ set_recovery_timer(hwif); drive->service_time = jiffies - drive->service_start; enable_irq(hwif->irq); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); if (startstop == ide_stopped) - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); } } ide_do_request(hwgroup, 0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -1656,13 +1713,11 @@ ide_handler_t *handler; ide_startstop_t startstop; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwif = hwgroup->hwif; - if (!ide_ack_intr(hwif)) { - spin_unlock_irqrestore(&io_request_lock, flags); - return; - } + if (!ide_ack_intr(hwif)) + goto out_lock; if ((handler = hwgroup->handler) == NULL || hwgroup->poll_timeout != 0) { /* @@ -1694,16 +1749,14 @@ (void) IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]); #endif /* CONFIG_BLK_DEV_IDEPCI */ } - spin_unlock_irqrestore(&io_request_lock, flags); - return; + goto out_lock; } drive = hwgroup->drive; if (!drive) { /* * This should NEVER happen, and there isn't much we could do about it here. */ - spin_unlock_irqrestore(&io_request_lock, flags); - return; + goto out_lock; } if (!drive_is_ready(drive)) { /* @@ -1712,21 +1765,19 @@ * the IRQ before their status register is up to date. Hopefully we have * enough advance overhead that the latter isn't a problem. */ - spin_unlock_irqrestore(&io_request_lock, flags); - return; - } - if (!hwgroup->busy) { - hwgroup->busy = 1; /* paranoia */ - printk("%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name); + goto out_lock; } + /* paranoia */ + if (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) + printk("%s: ide_intr: hwgroup was not busy??\n", drive->name); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); if (drive->unmask) ide__sti(); /* local CPU only */ startstop = handler(drive); /* service this interrupt, may set handler for next interrupt */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); /* * Note that handler() may have set things up for another @@ -1739,13 +1790,15 @@ drive->service_time = jiffies - drive->service_start; if (startstop == ide_stopped) { if (hwgroup->handler == NULL) { /* paranoia */ - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); ide_do_request(hwgroup, hwif->irq); } else { printk("%s: ide_intr: huh? expected NULL handler on exit\n", drive->name); } } - spin_unlock_irqrestore(&io_request_lock, flags); + +out_lock: + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -1755,9 +1808,6 @@ ide_drive_t *get_info_ptr (kdev_t i_rdev) { int major = MAJOR(i_rdev); -#if 0 - int minor = MINOR(i_rdev) & PARTN_MASK; -#endif unsigned int h; for (h = 0; h < MAX_HWIFS; ++h) { @@ -1766,11 +1816,7 @@ unsigned unit = DEVICE_NR(i_rdev); if (unit < MAX_DRIVES) { ide_drive_t *drive = &hwif->drives[unit]; -#if 0 - if ((drive->present) && (drive->part[minor].nr_sects)) -#else if (drive->present) -#endif return drive; } break; @@ -1818,7 +1864,8 @@ unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned int major = HWIF(drive)->major; - struct list_head *queue_head = &drive->queue.queue_head; + request_queue_t *q = &drive->queue; + struct list_head *queue_head = &q->queue_head; DECLARE_COMPLETION(wait); #ifdef CONFIG_BLK_DEV_PDC4030 @@ -1830,8 +1877,9 @@ rq->rq_dev = MKDEV(major,(drive->select.b.unit)<waiting = &wait; - spin_lock_irqsave(&io_request_lock, flags); - if (list_empty(queue_head) || action == ide_preempt) { + spin_lock_irqsave(&ide_lock, flags); + spin_lock(DRIVE_LOCK(drive)); + if (blk_queue_empty(&drive->queue) || action == ide_preempt) { if (action == ide_preempt) hwgroup->rq = NULL; } else { @@ -1840,9 +1888,10 @@ } else queue_head = queue_head->next; } - list_add(&rq->queue, queue_head); + q->elevator.elevator_add_req_fn(q, rq, queue_head); + spin_unlock(DRIVE_LOCK(drive)); ide_do_request(hwgroup, 0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); if (action == ide_wait) { wait_for_completion(&wait); /* wait for it to be serviced */ return rq->errors ? -EIO : 0; /* return -EIO if errors */ @@ -1851,6 +1900,16 @@ } +/* Common for ide-floppy.c and ide-disk.c */ +void ide_revalidate_drive (ide_drive_t *drive) +{ + struct gendisk *g = HWIF(drive)->gd; + int minor = (drive->select.b.unit << g->minor_shift); + kdev_t dev = MKDEV(g->major, minor); + + grok_partitions(dev, current_capacity(drive)); +} + /* * This routine is called to flush all partitions and partition tables * for a changed disk, and then re-read the new partition table. @@ -1863,40 +1922,33 @@ { ide_drive_t *drive; ide_hwgroup_t *hwgroup; - unsigned int p, major, minor; - long flags; + unsigned long flags; + int res; if ((drive = get_info_ptr(i_rdev)) == NULL) return -ENODEV; - major = MAJOR(i_rdev); - minor = drive->select.b.unit << PARTN_BITS; hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); if (drive->busy || (drive->usage > 1)) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return -EBUSY; - }; + } drive->busy = 1; MOD_INC_USE_COUNT; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); - for (p = 0; p < (1<part[p].nr_sects > 0) { - kdev_t devp = MKDEV(major, minor+p); - invalidate_device(devp, 1); - set_blocksize(devp, 1024); - } - drive->part[p].start_sect = 0; - drive->part[p].nr_sects = 0; - }; + res = wipe_partitions(i_rdev); + if (res) + goto leave; if (DRIVER(drive)->revalidate) DRIVER(drive)->revalidate(drive); + leave: drive->busy = 0; wake_up(&drive->wqueue); MOD_DEC_USE_COUNT; - return 0; + return res; } static void revalidate_drives (void) @@ -2169,11 +2221,10 @@ */ unregister_blkdev(hwif->major, hwif->name); kfree(blksize_size[hwif->major]); - kfree(max_sectors[hwif->major]); kfree(max_readahead[hwif->major]); blk_dev[hwif->major].data = NULL; blk_dev[hwif->major].queue = NULL; - blksize_size[hwif->major] = NULL; + blk_clear(hwif->major); gd = hwif->gd; if (gd) { del_gendisk(gd); @@ -2293,6 +2344,7 @@ memcpy(hwif->io_ports, hwif->hw.io_ports, sizeof(hwif->hw.io_ports)); hwif->irq = hw->irq; hwif->noprobe = 0; + hwif->chipset = hw->chipset; if (!initializing) { ide_probe_module(); @@ -2403,7 +2455,7 @@ unsigned long flags; if ((setting->rw & SETTING_READ)) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); switch(setting->data_type) { case TYPE_BYTE: val = *((u8 *) setting->data); @@ -2416,7 +2468,7 @@ val = *((u32 *) setting->data); break; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); } return val; } @@ -2426,11 +2478,11 @@ ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned long timeout = jiffies + (3 * HZ); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); - while (hwgroup->busy) { + while (test_bit(IDE_BUSY, &hwgroup->flags)) { unsigned long lflags; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&ide_lock); __save_flags(lflags); /* local CPU only */ __sti(); /* local CPU only; needed for jiffies */ if (0 < (signed long)(jiffies - timeout)) { @@ -2439,7 +2491,7 @@ return -EBUSY; } __restore_flags(lflags); /* local CPU only */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); } return 0; } @@ -2480,7 +2532,7 @@ *p = val; break; } - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&ide_lock); return 0; } @@ -2633,6 +2685,7 @@ { struct hd_big_geometry *loc = (struct hd_big_geometry *) arg; if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; + if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT; if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT; if (put_user(drive->bios_cyl, (unsigned int *) &loc->cylinders)) return -EFAULT; @@ -2653,11 +2706,6 @@ return 0; } - case BLKGETSIZE: /* Return device size */ - return put_user(drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects, (unsigned long *) arg); - case BLKGETSIZE64: - return put_user((u64)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects << 9, (u64 *) arg); - case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; return ide_revalidate_disk(inode->i_rdev); @@ -2775,6 +2823,8 @@ } return 0; + case BLKGETSIZE: + case BLKGETSIZE64: case BLKROSET: case BLKROGET: case BLKFLSBUF: @@ -2784,6 +2834,8 @@ case BLKELVSET: case BLKBSZGET: case BLKBSZSET: + case BLKHASHPROF: + case BLKHASHCLEAR: return blk_ioctl(inode->i_rdev, cmd, arg); case HDIO_GET_BUSSTATE: @@ -3409,7 +3461,7 @@ #ifdef CONFIG_BLK_DEV_IDE #if defined(__mc68000__) || defined(CONFIG_APUS) if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) { - ide_get_lock(&ide_lock, NULL, NULL); /* for atari only */ + ide_get_lock(&ide_intr_lock, NULL, NULL);/* for atari only */ disable_irq(ide_hwifs[0].irq); /* disable_irq_nosync ?? */ // disable_irq_nosync(ide_hwifs[0].irq); } @@ -3420,7 +3472,7 @@ #if defined(__mc68000__) || defined(CONFIG_APUS) if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) { enable_irq(ide_hwifs[0].irq); - ide_release_lock(&ide_lock); /* for atari only */ + ide_release_lock(&ide_intr_lock);/* for atari only */ } #endif /* __mc68000__ || CONFIG_APUS */ #endif /* CONFIG_BLK_DEV_IDE */ @@ -3685,6 +3737,7 @@ EXPORT_SYMBOL(ide_do_drive_cmd); EXPORT_SYMBOL(ide_end_drive_cmd); EXPORT_SYMBOL(ide_end_request); +EXPORT_SYMBOL(__ide_end_request); EXPORT_SYMBOL(ide_revalidate_disk); EXPORT_SYMBOL(ide_cmd); EXPORT_SYMBOL(ide_wait_cmd); diff -urN linux-2.5.1-pre1/drivers/ide/pdc202xx.c linux/drivers/ide/pdc202xx.c --- linux-2.5.1-pre1/drivers/ide/pdc202xx.c Wed Nov 14 11:44:03 2001 +++ linux/drivers/ide/pdc202xx.c Sat Dec 1 00:37:05 2001 @@ -893,6 +893,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { hwif->dmaproc = &pdc202xx_dmaproc; + hwif->highmem = 1; if (!noautodma) hwif->autodma = 1; } else { diff -urN linux-2.5.1-pre1/drivers/ide/piix.c linux/drivers/ide/piix.c --- linux-2.5.1-pre1/drivers/ide/piix.c Thu Oct 25 13:53:47 2001 +++ linux/drivers/ide/piix.c Sat Dec 1 00:37:05 2001 @@ -523,6 +523,7 @@ if (!hwif->dma_base) return; + hwif->highmem = 1; #ifndef CONFIG_BLK_DEV_IDEDMA hwif->autodma = 0; #else /* CONFIG_BLK_DEV_IDEDMA */ diff -urN linux-2.5.1-pre1/drivers/ide/serverworks.c linux/drivers/ide/serverworks.c --- linux-2.5.1-pre1/drivers/ide/serverworks.c Sun Sep 9 10:43:02 2001 +++ linux/drivers/ide/serverworks.c Sat Dec 1 00:37:05 2001 @@ -593,6 +593,7 @@ if (!noautodma) hwif->autodma = 1; hwif->dmaproc = &svwks_dmaproc; + hwif->highmem = 1; } else { hwif->autodma = 0; hwif->drives[0].autotune = 1; diff -urN linux-2.5.1-pre1/drivers/ide/sis5513.c linux/drivers/ide/sis5513.c --- linux-2.5.1-pre1/drivers/ide/sis5513.c Fri Sep 7 09:28:38 2001 +++ linux/drivers/ide/sis5513.c Sat Dec 1 00:37:05 2001 @@ -671,6 +671,7 @@ case PCI_DEVICE_ID_SI_5591: if (!noautodma) hwif->autodma = 1; + hwif->highmem = 1; hwif->dmaproc = &sis5513_dmaproc; break; #endif /* CONFIG_BLK_DEV_IDEDMA */ diff -urN linux-2.5.1-pre1/drivers/ide/slc90e66.c linux/drivers/ide/slc90e66.c --- linux-2.5.1-pre1/drivers/ide/slc90e66.c Sun Jul 15 16:22:23 2001 +++ linux/drivers/ide/slc90e66.c Sat Dec 1 00:37:05 2001 @@ -373,6 +373,7 @@ return; hwif->autodma = 0; + hwif->highmem = 1; #ifdef CONFIG_BLK_DEV_IDEDMA if (!noautodma) hwif->autodma = 1; diff -urN linux-2.5.1-pre1/drivers/ide/via82cxxx.c linux/drivers/ide/via82cxxx.c --- linux-2.5.1-pre1/drivers/ide/via82cxxx.c Tue Sep 11 08:40:36 2001 +++ linux/drivers/ide/via82cxxx.c Sat Dec 1 00:37:05 2001 @@ -520,6 +520,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { + hwif->highmem = 1; hwif->dmaproc = &via82cxxx_dmaproc; #ifdef CONFIG_IDEDMA_AUTO if (!noautodma) diff -urN linux-2.5.1-pre1/drivers/md/lvm-snap.c linux/drivers/md/lvm-snap.c --- linux-2.5.1-pre1/drivers/md/lvm-snap.c Mon Nov 12 09:34:20 2001 +++ linux/drivers/md/lvm-snap.c Sat Dec 1 00:37:05 2001 @@ -351,7 +351,7 @@ blksize_snap = lvm_get_blksize(snap_phys_dev); max_blksize = max(blksize_org, blksize_snap); min_blksize = min(blksize_org, blksize_snap); - max_sectors = KIO_MAX_SECTORS * (min_blksize>>9); + max_sectors = LVM_MAX_SECTORS * (min_blksize>>9); if (chunk_size % (max_blksize>>9)) goto fail_blksize; @@ -363,20 +363,20 @@ iobuf->length = nr_sectors << 9; - if(!lvm_snapshot_prepare_blocks(iobuf->blocks, org_start, + if(!lvm_snapshot_prepare_blocks(lv_snap->blocks, org_start, nr_sectors, blksize_org)) goto fail_prepare; if (brw_kiovec(READ, 1, &iobuf, org_phys_dev, - iobuf->blocks, blksize_org) != (nr_sectors<<9)) + lv_snap->blocks, blksize_org) != (nr_sectors<<9)) goto fail_raw_read; - if(!lvm_snapshot_prepare_blocks(iobuf->blocks, snap_start, + if(!lvm_snapshot_prepare_blocks(lv_snap->blocks, snap_start, nr_sectors, blksize_snap)) goto fail_prepare; if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, - iobuf->blocks, blksize_snap) != (nr_sectors<<9)) + lv_snap->blocks, blksize_snap) !=(nr_sectors<<9)) goto fail_raw_write; } @@ -505,7 +505,7 @@ ret = alloc_kiovec(1, &lv_snap->lv_iobuf); if (ret) goto out; - max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9); + max_sectors = LVM_MAX_SECTORS << (PAGE_SHIFT-9); ret = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors); if (ret) goto out_free_kiovec; @@ -542,8 +542,6 @@ void lvm_snapshot_release(lv_t * lv) { - int nbhs = KIO_MAX_SECTORS; - if (lv->lv_block_exception) { vfree(lv->lv_block_exception); diff -urN linux-2.5.1-pre1/drivers/md/lvm.c linux/drivers/md/lvm.c --- linux-2.5.1-pre1/drivers/md/lvm.c Mon Nov 19 09:56:04 2001 +++ linux/drivers/md/lvm.c Sat Dec 1 00:37:05 2001 @@ -236,7 +236,7 @@ /* * External function prototypes */ -static int lvm_make_request_fn(request_queue_t*, int, struct buffer_head*); +static int lvm_make_request_fn(request_queue_t*, struct bio *); static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong); static int lvm_blk_open(struct inode *, struct file *); @@ -262,7 +262,7 @@ #ifdef LVM_HD_NAME extern void (*lvm_hd_name_ptr) (char *, int); #endif -static int lvm_map(struct buffer_head *, int); +static int lvm_map(struct bio *); static int lvm_do_lock_lvm(void); static int lvm_do_le_remap(vg_t *, void *); @@ -291,9 +291,9 @@ static void __update_hardsectsize(lv_t *lv); -static void _queue_io(struct buffer_head *bh, int rw); -static struct buffer_head *_dequeue_io(void); -static void _flush_io(struct buffer_head *bh); +static void _queue_io(struct bio *bh, int rw); +static struct bio *_dequeue_io(void); +static void _flush_io(struct bio *bh); static int _open_pv(pv_t *pv); static void _close_pv(pv_t *pv); @@ -346,7 +346,7 @@ static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED; static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; -static struct buffer_head *_pe_requests; +static struct bio *_pe_requests; static DECLARE_RWSEM(_pe_lock); @@ -369,7 +369,6 @@ /* gendisk structures */ static struct hd_struct lvm_hd_struct[MAX_LV]; static int lvm_blocksizes[MAX_LV]; -static int lvm_hardsectsizes[MAX_LV]; static int lvm_size[MAX_LV]; static struct gendisk lvm_gendisk = @@ -451,9 +450,7 @@ del_gendisk(&lvm_gendisk); - blk_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; + blk_clear(MAJOR_NR); #ifdef LVM_HD_NAME /* reference from linux/drivers/block/genhd.c */ @@ -1037,25 +1034,25 @@ static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result) { - struct buffer_head bh; + struct bio bio; unsigned long block; int err; if (get_user(block, &user_result->lv_block)) return -EFAULT; - memset(&bh,0,sizeof bh); - bh.b_blocknr = block; - bh.b_dev = bh.b_rdev = inode->i_rdev; - bh.b_size = lvm_get_blksize(bh.b_dev); - bh.b_rsector = block * (bh.b_size >> 9); - if ((err=lvm_map(&bh, READ)) < 0) { + memset(&bio,0,sizeof(bio)); + bio.bi_dev = inode->i_rdev; + bio.bi_io_vec.bv_len = lvm_get_blksize(bio.bi_dev); + bio.bi_sector = block * bio_sectors(&bio); + bio.bi_rw = READ; + if ((err=lvm_map(&bio)) < 0) { printk("lvm map failed: %d\n", err); return -EINVAL; } - return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) || - put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block) ? + return put_user(kdev_t_to_nr(bio.bi_dev), &user_result->lv_dev) || + put_user(bio.bi_sector/bio_sectors(&bio), &user_result->lv_block) ? -EFAULT : 0; } @@ -1104,7 +1101,7 @@ (sector < (pe_lock_req.data.pv_offset + pe_size))); } -static inline int _defer_extent(struct buffer_head *bh, int rw, +static inline int _defer_extent(struct bio *bh, int rw, kdev_t pv, ulong sector, uint32_t pe_size) { if (pe_lock_req.lock == LOCK_PE) { @@ -1122,17 +1119,18 @@ return 0; } -static int lvm_map(struct buffer_head *bh, int rw) +static int lvm_map(struct bio *bh) { - int minor = MINOR(bh->b_rdev); + int minor = MINOR(bh->bi_dev); ulong index; ulong pe_start; - ulong size = bh->b_size >> 9; - ulong rsector_org = bh->b_rsector; + ulong size = bio_sectors(bh); + ulong rsector_org = bh->bi_sector; ulong rsector_map; kdev_t rdev_map; vg_t *vg_this = vg[VG_BLK(minor)]; lv_t *lv = vg_this->lv[LV_BLK(minor)]; + int rw = bio_data_dir(bh); down_read(&lv->lv_lock); @@ -1153,7 +1151,7 @@ P_MAP("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n", lvm_name, minor, - kdevname(bh->b_rdev), + kdevname(bh->bi_dev), rsector_org, size); if (rsector_org + size > lv->lv_size) { @@ -1248,13 +1246,15 @@ } out: - bh->b_rdev = rdev_map; - bh->b_rsector = rsector_map; + if (test_bit(BIO_HASHED, &bh->bi_flags)) + BUG(); + bh->bi_dev = rdev_map; + bh->bi_sector = rsector_map; up_read(&lv->lv_lock); return 1; bad: - buffer_IO_error(bh); + bio_io_error(bh); up_read(&lv->lv_lock); return -1; } /* lvm_map() */ @@ -1287,10 +1287,9 @@ /* * make request function */ -static int lvm_make_request_fn(request_queue_t *q, - int rw, - struct buffer_head *bh) { - return (lvm_map(bh, rw) <= 0) ? 0 : 1; +static int lvm_make_request_fn(request_queue_t *q, struct bio *bio) +{ + return (lvm_map(bio) <= 0) ? 0 : 1; } @@ -1331,7 +1330,7 @@ static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg) { pe_lock_req_t new_lock; - struct buffer_head *bh; + struct bio *bh; uint p; if (vg_ptr == NULL) return -ENXIO; @@ -1820,8 +1819,6 @@ max_hardsectsize = hardsectsize; } } - - lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize; } /* @@ -2665,7 +2662,6 @@ blk_size[MAJOR_NR] = lvm_size; blksize_size[MAJOR_NR] = lvm_blocksizes; - hardsect_size[MAJOR_NR] = lvm_hardsectsizes; return; } /* lvm_gen_init() */ @@ -2673,16 +2669,16 @@ /* Must have down_write(_pe_lock) when we enqueue buffers */ -static void _queue_io(struct buffer_head *bh, int rw) { - if (bh->b_reqnext) BUG(); - bh->b_reqnext = _pe_requests; +static void _queue_io(struct bio *bh, int rw) { + if (bh->bi_next) BUG(); + bh->bi_next = _pe_requests; _pe_requests = bh; } /* Must have down_write(_pe_lock) when we dequeue buffers */ -static struct buffer_head *_dequeue_io(void) +static struct bio *_dequeue_io(void) { - struct buffer_head *bh = _pe_requests; + struct bio *bh = _pe_requests; _pe_requests = NULL; return bh; } @@ -2697,13 +2693,14 @@ * If, for some reason, the same PE is locked again before all of these writes * have finished, then these buffers will just be re-queued (i.e. no danger). */ -static void _flush_io(struct buffer_head *bh) +static void _flush_io(struct bio *bh) { while (bh) { - struct buffer_head *next = bh->b_reqnext; - bh->b_reqnext = NULL; + struct bio *next = bh->bi_next; + bh->bi_next = NULL; /* resubmit this buffer head */ - generic_make_request(WRITE, bh); + bh->bi_rw = WRITE; /* needed? */ + generic_make_request(bh); bh = next; } } diff -urN linux-2.5.1-pre1/drivers/md/md.c linux/drivers/md/md.c --- linux-2.5.1-pre1/drivers/md/md.c Thu Oct 25 13:58:34 2001 +++ linux/drivers/md/md.c Sat Dec 1 00:37:05 2001 @@ -105,7 +105,6 @@ */ struct hd_struct md_hd_struct[MAX_MD_DEVS]; static int md_blocksizes[MAX_MD_DEVS]; -static int md_hardsect_sizes[MAX_MD_DEVS]; static int md_maxreadahead[MAX_MD_DEVS]; static mdk_thread_t *md_recovery_thread; @@ -172,14 +171,14 @@ mddev_map[minor].data = NULL; } -static int md_make_request(request_queue_t *q, int rw, struct buffer_head * bh) +static int md_make_request (request_queue_t *q, struct bio *bio) { - mddev_t *mddev = kdev_to_mddev(bh->b_rdev); + mddev_t *mddev = kdev_to_mddev(bio->bi_dev); if (mddev && mddev->pers) - return mddev->pers->make_request(mddev, rw, bh); + return mddev->pers->make_request(mddev, bio_rw(bio), bio); else { - buffer_IO_error(bh); + bio_io_error(bio); return 0; } } @@ -1701,19 +1700,14 @@ * device. * Also find largest hardsector size */ - md_hardsect_sizes[mdidx(mddev)] = 512; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) continue; invalidate_device(rdev->dev, 1); - if (get_hardsect_size(rdev->dev) - > md_hardsect_sizes[mdidx(mddev)]) - md_hardsect_sizes[mdidx(mddev)] = - get_hardsect_size(rdev->dev); - } - md_blocksizes[mdidx(mddev)] = 1024; - if (md_blocksizes[mdidx(mddev)] < md_hardsect_sizes[mdidx(mddev)]) - md_blocksizes[mdidx(mddev)] = md_hardsect_sizes[mdidx(mddev)]; + md_blocksizes[mdidx(mddev)] = 1024; + if (get_hardsect_size(rdev->dev) > md_blocksizes[mdidx(mddev)]) + md_blocksizes[mdidx(mddev)] = get_hardsect_size(rdev->dev); + } mddev->pers = pers[pnum]; err = mddev->pers->run(mddev); @@ -2769,7 +2763,7 @@ (short *) &loc->cylinders); if (err) goto abort_unlock; - err = md_put_user (md_hd_struct[minor].start_sect, + err = md_put_user (get_start_sect(dev), (long *) &loc->start); goto done_unlock; } @@ -3621,13 +3615,11 @@ for(i = 0; i < MAX_MD_DEVS; i++) { md_blocksizes[i] = 1024; md_size[i] = 0; - md_hardsect_sizes[i] = 512; md_maxreadahead[i] = MD_READAHEAD; } blksize_size[MAJOR_NR] = md_blocksizes; blk_size[MAJOR_NR] = md_size; max_readahead[MAJOR_NR] = md_maxreadahead; - hardsect_size[MAJOR_NR] = md_hardsect_sizes; dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t)); @@ -3670,7 +3662,8 @@ md_recovery_thread = md_register_thread(md_do_recovery, NULL, name); if (!md_recovery_thread) - printk(KERN_ALERT "md: bug: couldn't allocate md_recovery_thread\n"); + printk(KERN_ALERT + "md: bug: couldn't allocate md_recovery_thread\n"); md_register_reboot_notifier(&md_notifier); raid_table_header = register_sysctl_table(raid_root_table, 1); @@ -4008,15 +4001,10 @@ #endif del_gendisk(&md_gendisk); - blk_dev[MAJOR_NR].queue = NULL; - blksize_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; - max_readahead[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - + blk_clear(MAJOR_NR); + free_device_names(); - } #endif diff -urN linux-2.5.1-pre1/drivers/message/i2o/i2o_block.c linux/drivers/message/i2o/i2o_block.c --- linux-2.5.1-pre1/drivers/message/i2o/i2o_block.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/message/i2o/i2o_block.c Sat Dec 1 00:37:05 2001 @@ -114,15 +114,16 @@ #define I2O_BSA_DSC_VOLUME_CHANGED 0x000D #define I2O_BSA_DSC_TIMEOUT 0x000E +#define I2O_UNIT(dev) (i2ob_dev[MINOR((dev)) & 0xf0]) +#define I2O_LOCK(unit) (i2ob_dev[(unit)].req_queue->queue_lock) + /* * Some of these can be made smaller later */ static int i2ob_blksizes[MAX_I2OB<<4]; -static int i2ob_hardsizes[MAX_I2OB<<4]; static int i2ob_sizes[MAX_I2OB<<4]; static int i2ob_media_change_flag[MAX_I2OB]; -static u32 i2ob_max_sectors[MAX_I2OB<<4]; static int i2ob_context; @@ -252,9 +253,9 @@ unsigned long mptr; u64 offset; struct request *req = ireq->req; - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; int count = req->nr_sectors<<9; - char *last = NULL; + unsigned long last = ~0UL; unsigned short size = 0; // printk(KERN_INFO "i2ob_send called\n"); @@ -283,30 +284,30 @@ if(req->cmd == READ) { __raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4); - while(bh!=NULL) + while(bio) { - if(bh->b_data == last) { - size += bh->b_size; - last += bh->b_size; - if(bh->b_reqnext) + if (bio_to_phys(bio) == last) { + size += bio_size(bio); + last += bio_size(bio); + if(bio->bi_next) __raw_writel(0x14000000|(size), mptr-8); else __raw_writel(0xD4000000|(size), mptr-8); } else { - if(bh->b_reqnext) - __raw_writel(0x10000000|(bh->b_size), mptr); + if(bio->bi_next) + __raw_writel(0x10000000|bio_size(bio), mptr); else - __raw_writel(0xD0000000|(bh->b_size), mptr); - __raw_writel(virt_to_bus(bh->b_data), mptr+4); + __raw_writel(0xD0000000|bio_size(bio), mptr); + __raw_writel(bio_to_phys(bio), mptr+4); mptr += 8; - size = bh->b_size; - last = bh->b_data + size; + size = bio_size(bio); + last = bio_to_phys(bio) + bio_size(bio); } - count -= bh->b_size; - bh = bh->b_reqnext; + count -= bio_size(bio); + bio = bio->bi_next; } /* * Heuristic for now since the block layer doesnt give @@ -322,30 +323,30 @@ else if(req->cmd == WRITE) { __raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4); - while(bh!=NULL) + while(bio) { - if(bh->b_data == last) { - size += bh->b_size; - last += bh->b_size; - if(bh->b_reqnext) + if (bio_to_phys(bio) == last) { + size += bio_size(bio); + last += bio_size(bio); + if(bio->bi_next) __raw_writel(0x14000000|(size), mptr-8); else __raw_writel(0xD4000000|(size), mptr-8); } else { - if(bh->b_reqnext) - __raw_writel(0x14000000|(bh->b_size), mptr); + if(bio->bi_next) + __raw_writel(0x14000000|bio_size(bio), mptr); else - __raw_writel(0xD4000000|(bh->b_size), mptr); - __raw_writel(virt_to_bus(bh->b_data), mptr+4); + __raw_writel(0xD4000000|bio_size(bio), mptr); + __raw_writel(bio_to_phys(bio), mptr+4); mptr += 8; - size = bh->b_size; - last = bh->b_data + size; + size = bio_size(bio); + last = bio_to_phys(bio) + bio_size(bio); } - count -= bh->b_size; - bh = bh->b_reqnext; + count -= bio_size(bio); + bio = bio->bi_next; } if(c->battery) @@ -409,7 +410,8 @@ * unlocked. */ - while (end_that_request_first( req, !req->errors, "i2o block" )); + while (end_that_request_first(req, !req->errors)) + ; /* * It is now ok to complete the request. @@ -417,61 +419,6 @@ end_that_request_last( req ); } -/* - * Request merging functions - */ -static inline int i2ob_new_segment(request_queue_t *q, struct request *req, - int __max_segments) -{ - int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments; - - if (__max_segments < max_segments) - max_segments = __max_segments; - - if (req->nr_segments < max_segments) { - req->nr_segments++; - return 1; - } - return 0; -} - -static int i2ob_back_merge(request_queue_t *q, struct request *req, - struct buffer_head *bh, int __max_segments) -{ - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) - return 1; - return i2ob_new_segment(q, req, __max_segments); -} - -static int i2ob_front_merge(request_queue_t *q, struct request *req, - struct buffer_head *bh, int __max_segments) -{ - if (bh->b_data + bh->b_size == req->bh->b_data) - return 1; - return i2ob_new_segment(q, req, __max_segments); -} - -static int i2ob_merge_requests(request_queue_t *q, - struct request *req, - struct request *next, - int __max_segments) -{ - int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments; - int total_segments = req->nr_segments + next->nr_segments; - - if (__max_segments < max_segments) - max_segments = __max_segments; - - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) - total_segments--; - - if (total_segments > max_segments) - return 0; - - req->nr_segments = total_segments; - return 1; -} - static int i2ob_flush(struct i2o_controller *c, struct i2ob_device *d, int unit) { unsigned long msg; @@ -535,10 +482,10 @@ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; ireq->req->errors++; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* Now flush the message by making it a NOP */ m[0]&=0x00FFFFFF; @@ -559,12 +506,12 @@ if(msg->function == I2O_CMD_BLOCK_CFLUSH) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); dev->constipated=0; DEBUG(("unconstipated\n")); if(i2ob_backlog_request(c, dev)==0) i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -580,10 +527,10 @@ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; ireq->req->errors++; printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n"); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -629,7 +576,7 @@ */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); if(err==4) { /* @@ -674,7 +621,7 @@ */ i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* * and out @@ -682,7 +629,7 @@ return; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name, bsa_errors[m[4]&0XFFFF]); if(m[4]&0x00FF0000) @@ -697,8 +644,8 @@ * Dequeue the request. We use irqsave locks as one day we * may be running polled controllers from a BH... */ - - spin_lock_irqsave(&io_request_lock, flags); + + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); atomic_dec(&i2ob_queues[c->unit]->queue_depth); @@ -710,7 +657,7 @@ if(i2ob_backlog_request(c, dev)==0) i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); } /* @@ -789,8 +736,7 @@ for(i = unit; i <= unit+15; i++) { i2ob_sizes[i] = 0; - i2ob_hardsizes[i] = 0; - i2ob_max_sectors[i] = 0; + blk_queue_max_sectors(i2ob_dev[i].req_queue, 0); i2ob[i].nr_sects = 0; i2ob_gendisk.part[i].nr_sects = 0; } @@ -824,11 +770,11 @@ if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 ) i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(unit), flags); i2ob_sizes[unit] = (int)(size>>10); i2ob_gendisk.part[unit].nr_sects = size>>9; i2ob[unit].nr_sects = (int)(size>>9); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(unit), flags); break; } @@ -881,13 +827,14 @@ static void i2ob_timer_handler(unsigned long q) { + request_queue_t *req_queue = (request_queue_t *) q; unsigned long flags; /* * We cannot touch the request queue or the timer - * flag without holding the io_request_lock. + * flag without holding the queue_lock */ - spin_lock_irqsave(&io_request_lock,flags); + spin_lock_irqsave(&req_queue->queue_lock,flags); /* * Clear the timer started flag so that @@ -898,12 +845,12 @@ /* * Restart any requests. */ - i2ob_request((request_queue_t*)q); + i2ob_request(req_queue); /* * Free the lock. */ - spin_unlock_irqrestore(&io_request_lock,flags); + spin_unlock_irqrestore(&req_queue->queue_lock,flags); } static int i2ob_backlog_request(struct i2o_controller *c, struct i2ob_device *dev) @@ -1132,34 +1079,23 @@ static int i2ob_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - struct i2ob_device *dev; - int minor; - /* Anyone capable of this syscall can do *real bad* things */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!inode) + if (!inode || !inode->i_rdev) return -EINVAL; - minor = MINOR(inode->i_rdev); - if (minor >= (MAX_I2OB<<4)) - return -ENODEV; - dev = &i2ob_dev[minor]; switch (cmd) { - case BLKGETSIZE: - return put_user(i2ob[minor].nr_sects, (long *) arg); - case BLKGETSIZE64: - return put_user((u64)i2ob[minor].nr_sects << 9, (u64 *)arg); - case HDIO_GETGEO: { struct hd_geometry g; - int u=minor&0xF0; + int u = MINOR(inode->i_rdev) & 0xF0; i2o_block_biosparam(i2ob_sizes[u]<<1, &g.cylinders, &g.heads, &g.sectors); - g.start = i2ob[minor].start_sect; - return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0; + g.start = get_start_sect(inode->i_rdev); + return copy_to_user((void *)arg, &g, sizeof(g)) + ? -EFAULT : 0; } case BLKRRPART: @@ -1167,6 +1103,8 @@ return -EACCES; return do_i2ob_revalidate(inode->i_rdev,1); + case BLKGETSIZE: + case BLKGETSIZE64: case BLKFLSBUF: case BLKROSET: case BLKROGET: @@ -1354,8 +1292,6 @@ i2ob_query_device(dev, 0x0000, 5, &flags, 4); i2ob_query_device(dev, 0x0000, 6, &status, 4); i2ob_sizes[unit] = (int)(size>>10); - for(i=unit; i <= unit+15 ; i++) - i2ob_hardsizes[i] = blocksize; i2ob_gendisk.part[unit].nr_sects = size>>9; i2ob[unit].nr_sects = (int)(size>>9); @@ -1366,26 +1302,27 @@ /* * Max number of Scatter-Gather Elements */ - for(i=unit;i<=unit+15;i++) { - i2ob_max_sectors[i] = 256; - i2ob_dev[i].max_segments = (d->controller->status_block->inbound_frame_size - 8)/2; + request_queue_t *q = i2ob_dev[unit].req_queue; + + blk_queue_max_sectors(q, 256); + blk_queue_max_segments(q, (d->controller->status_block->inbound_frame_size - 8)/2); if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy == 2) i2ob_dev[i].depth = 32; if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy == 1) { - i2ob_max_sectors[i] = 32; - i2ob_dev[i].max_segments = 8; + blk_queue_max_sectors(q, 32); + blk_queue_max_segments(q, 8); i2ob_dev[i].depth = 4; } if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.short_req) { - i2ob_max_sectors[i] = 8; - i2ob_dev[i].max_segments = 8; + blk_queue_max_sectors(q, 8); + blk_queue_max_segments(q, 8); } } @@ -1430,7 +1367,7 @@ } printk(".\n"); printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n", - d->dev_name, i2ob_max_sectors[unit]); + d->dev_name, i2ob_dev[unit].req_queue->max_sectors); /* * If this is the first I2O block device found on this IOP, @@ -1450,7 +1387,7 @@ */ dev->req_queue = &i2ob_queues[c->unit]->req_queue; - grok_partitions(&i2ob_gendisk, unit>>4, 1<<4, (long)(size>>9)); + grok_partitions(MKDEV(MAJOR_NR, unit), (long)(size>>9)); /* * Register for the events we're interested in and that the @@ -1468,6 +1405,7 @@ */ static int i2ob_init_iop(unsigned int unit) { + char name[16]; int i; i2ob_queues[unit] = (struct i2ob_iop_queue*) @@ -1491,11 +1429,9 @@ i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0]; atomic_set(&i2ob_queues[unit]->queue_depth, 0); - blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request); + sprintf(name, "i2o%d", unit); + blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request, name); blk_queue_headactive(&i2ob_queues[unit]->req_queue, 0); - i2ob_queues[unit]->req_queue.back_merge_fn = i2ob_back_merge; - i2ob_queues[unit]->req_queue.front_merge_fn = i2ob_front_merge; - i2ob_queues[unit]->req_queue.merge_requests_fn = i2ob_merge_requests; i2ob_queues[unit]->req_queue.queuedata = &i2ob_queues[unit]; return 0; @@ -1506,11 +1442,11 @@ */ static request_queue_t* i2ob_get_queue(kdev_t dev) { - int unit = MINOR(dev)&0xF0; - - return i2ob_dev[unit].req_queue; + return I2O_UNIT(dev).req_queue; } + + /* * Probe the I2O subsytem for block class devices */ @@ -1708,7 +1644,7 @@ int i = 0; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); /* * Need to do this...we somtimes get two events from the IRTOS @@ -1730,7 +1666,7 @@ if(unit >= MAX_I2OB<<4) { printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n"); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -1743,12 +1679,11 @@ { i2ob_dev[i].i2odev = NULL; i2ob_sizes[i] = 0; - i2ob_hardsizes[i] = 0; - i2ob_max_sectors[i] = 0; + blk_queue_max_sectors(i2ob_dev[i].req_queue, 0); i2ob[i].nr_sects = 0; i2ob_gendisk.part[i].nr_sects = 0; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* * Decrease usage count for module @@ -1891,12 +1826,10 @@ */ blksize_size[MAJOR_NR] = i2ob_blksizes; - hardsect_size[MAJOR_NR] = i2ob_hardsizes; blk_size[MAJOR_NR] = i2ob_sizes; - max_sectors[MAJOR_NR] = i2ob_max_sectors; blk_dev[MAJOR_NR].queue = i2ob_get_queue; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request, "i2o"); blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); for (i = 0; i < MAX_I2OB << 4; i++) { @@ -1909,7 +1842,6 @@ i2ob_dev[i].tail = NULL; i2ob_dev[i].depth = MAX_I2OB_DEPTH; i2ob_blksizes[i] = 1024; - i2ob_max_sectors[i] = 2; } /* @@ -1977,7 +1909,6 @@ MODULE_AUTHOR("Red Hat Software"); MODULE_DESCRIPTION("I2O Block Device OSM"); MODULE_LICENSE("GPL"); - void cleanup_module(void) { diff -urN linux-2.5.1-pre1/drivers/message/i2o/i2o_core.c linux/drivers/message/i2o/i2o_core.c --- linux-2.5.1-pre1/drivers/message/i2o/i2o_core.c Mon Oct 22 08:39:56 2001 +++ linux/drivers/message/i2o/i2o_core.c Sat Dec 1 00:37:05 2001 @@ -125,6 +125,7 @@ * Function table to send to bus specific layers * See for explanation of this */ +#ifdef CONFIG_I2O_PCI_MODULE static struct i2o_core_func_table i2o_core_functions = { i2o_install_controller, @@ -135,7 +136,6 @@ i2o_delete_controller }; -#ifdef CONFIG_I2O_PCI_MODULE extern int i2o_pci_core_attach(struct i2o_core_func_table *); extern void i2o_pci_core_detach(void); #endif /* CONFIG_I2O_PCI_MODULE */ diff -urN linux-2.5.1-pre1/drivers/mtd/ftl.c linux/drivers/mtd/ftl.c --- linux-2.5.1-pre1/drivers/mtd/ftl.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/mtd/ftl.c Sat Dec 1 00:37:05 2001 @@ -1166,7 +1166,7 @@ put_user(1, (char *)&geo->heads); put_user(8, (char *)&geo->sectors); put_user((sect>>3), (short *)&geo->cylinders); - put_user(ftl_hd[minor].start_sect, (u_long *)&geo->start); + put_user(get_start_sect(inode->i_rdev), (u_long *)&geo->start); break; case BLKGETSIZE: ret = put_user(ftl_hd[minor].nr_sects, (unsigned long *)arg); @@ -1206,42 +1206,27 @@ ======================================================================*/ -static int ftl_reread_partitions(int minor) +static int ftl_reread_partitions(kdev_t dev) { + int minor = MINOR(dev); partition_t *part = myparts[minor >> 4]; - int i, whole; + int res; DEBUG(0, "ftl_cs: ftl_reread_partition(%d)\n", minor); if ((atomic_read(&part->open) > 1)) { return -EBUSY; } - whole = minor & ~(MAX_PART-1); - i = MAX_PART - 1; - while (i-- > 0) { - if (ftl_hd[whole+i].nr_sects > 0) { - kdev_t rdev = MKDEV(FTL_MAJOR, whole+i); - - invalidate_device(rdev, 1); - } - ftl_hd[whole+i].start_sect = 0; - ftl_hd[whole+i].nr_sects = 0; - } + res = wipe_partitions(dev); + if (res) + goto leave; scan_header(part); register_disk(&ftl_gendisk, whole >> PART_BITS, MAX_PART, &ftl_blk_fops, le32_to_cpu(part->header.FormattedSize)/SECTOR_SIZE); -#ifdef PCMCIA_DEBUG - for (i = 0; i < MAX_PART; i++) { - if (ftl_hd[whole+i].nr_sects > 0) - printk(KERN_INFO " %d: start %ld size %ld\n", i, - ftl_hd[whole+i].start_sect, - ftl_hd[whole+i].nr_sects); - } -#endif - return 0; + return res; } /*====================================================================== @@ -1431,7 +1416,7 @@ unregister_blkdev(FTL_MAJOR, "ftl"); blk_cleanup_queue(BLK_DEFAULT_QUEUE(FTL_MAJOR)); - blksize_size[FTL_MAJOR] = NULL; + bklk_clear(FTL_MAJOR); del_gendisk(&ftl_gendisk); } diff -urN linux-2.5.1-pre1/drivers/mtd/mtdblock.c linux/drivers/mtd/mtdblock.c --- linux-2.5.1-pre1/drivers/mtd/mtdblock.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/mtd/mtdblock.c Sat Dec 1 00:37:05 2001 @@ -29,7 +29,7 @@ #if LINUX_VERSION_CODE < 0x20300 #define QUEUE_PLUGGED (blk_dev[MAJOR_NR].plug_tq.sync) #else -#define QUEUE_PLUGGED (blk_dev[MAJOR_NR].request_queue.plugged) +#define QUEUE_PLUGGED (blk_queue_plugged(QUEUE)) #endif #ifdef CONFIG_DEVFS_FS @@ -402,7 +402,7 @@ /* * This is a special request_fn because it is executed in a process context - * to be able to sleep independently of the caller. The io_request_lock + * to be able to sleep independently of the caller. The queue_lock * is held upon entry and exit. * The head of our request queue is considered active so there is no need * to dequeue requests before we are done. @@ -416,7 +416,7 @@ for (;;) { INIT_REQUEST; req = CURRENT; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); mtdblk = mtdblks[MINOR(req->rq_dev)]; res = 0; @@ -458,7 +458,7 @@ } end_req: - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); end_request(res); } } @@ -490,16 +490,16 @@ while (!leaving) { add_wait_queue(&thr_wq, &wait); set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); if (QUEUE_EMPTY || QUEUE_PLUGGED) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); schedule(); remove_wait_queue(&thr_wq, &wait); } else { remove_wait_queue(&thr_wq, &wait); set_current_state(TASK_RUNNING); handle_mtdblock_request(); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); } } diff -urN linux-2.5.1-pre1/drivers/mtd/nftlcore.c linux/drivers/mtd/nftlcore.c --- linux-2.5.1-pre1/drivers/mtd/nftlcore.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/mtd/nftlcore.c Sat Dec 1 00:37:05 2001 @@ -59,11 +59,6 @@ /* .. for the Linux partition table handling. */ struct hd_struct part_table[256]; -#if LINUX_VERSION_CODE < 0x20328 -static void dummy_init (struct gendisk *crap) -{} -#endif - static struct gendisk nftl_gendisk = { major: MAJOR_NR, major_name: "nftl", @@ -166,7 +161,8 @@ #if LINUX_VERSION_CODE < 0x20328 resetup_one_dev(&nftl_gendisk, firstfree); #else - grok_partitions(&nftl_gendisk, firstfree, 1<nr_sects); + grok_partitions(MKDEV(MAJOR_NR,firstfree<nr_sects); #endif } @@ -786,7 +782,7 @@ static int nftl_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) { struct NFTLrecord *nftl; - int p; + int res; nftl = NFTLs[MINOR(inode->i_rdev) >> NFTL_PARTN_BITS]; @@ -799,16 +795,9 @@ g.heads = nftl->heads; g.sectors = nftl->sectors; g.cylinders = nftl->cylinders; - g.start = part_table[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user((void *)arg, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: /* Return device size */ - return put_user(part_table[MINOR(inode->i_rdev)].nr_sects, - (unsigned long *) arg); - case BLKGETSIZE64: - return put_user((u64)part_table[MINOR(inode->i_rdev)].nr_sects << 9, - (u64 *)arg); - case BLKFLSBUF: if (!capable(CAP_SYS_ADMIN)) return -EACCES; fsync_dev(inode->i_rdev); @@ -825,27 +814,17 @@ * or we won't be able to re-use the partitions, * if there was a change and we don't want to reboot */ - p = (1< 0) { - kdev_t devp = MKDEV(MAJOR(inode->i_dev), MINOR(inode->i_dev)+p); - if (part_table[p].nr_sects > 0) - invalidate_device (devp, 1); + res = wipe_partitions(inode->i_rdev); + if (!res) + grok_partitions(inode->i_rdev, nftl->nr_sects); - part_table[MINOR(inode->i_dev)+p].start_sect = 0; - part_table[MINOR(inode->i_dev)+p].nr_sects = 0; - } - -#if LINUX_VERSION_CODE < 0x20328 - resetup_one_dev(&nftl_gendisk, MINOR(inode->i_rdev) >> NFTL_PARTN_BITS); -#else - grok_partitions(&nftl_gendisk, MINOR(inode->i_rdev) >> NFTL_PARTN_BITS, - 1<nr_sects); -#endif - return 0; + return res; #if (LINUX_VERSION_CODE < 0x20303) RO_IOCTLS(inode->i_rdev, arg); /* ref. linux/blk.h */ #else + case BLKGETSIZE: + case BLKGETSIZE64: case BLKROSET: case BLKROGET: case BLKSSZGET: @@ -859,7 +838,7 @@ void nftl_request(RQFUNC_ARG) { - unsigned int dev, block, nsect; + unsigned int dev, unit, block, nsect; struct NFTLrecord *nftl; char *buffer; struct request *req; @@ -871,10 +850,11 @@ /* We can do this because the generic code knows not to touch the request at the head of the queue */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); DEBUG(MTD_DEBUG_LEVEL2, "NFTL_request\n"); - DEBUG(MTD_DEBUG_LEVEL3, "NFTL %s request, from sector 0x%04lx for 0x%04lx sectors\n", + DEBUG(MTD_DEBUG_LEVEL3, + "NFTL %s request, from sector 0x%04lx for 0x%04lx sectors\n", (req->cmd == READ) ? "Read " : "Write", req->sector, req->current_nr_sectors); @@ -884,8 +864,8 @@ buffer = req->buffer; res = 1; /* succeed */ - if (dev >= MAX_NFTLS * (1<> NFTL_PARTN_BITS; + if (unit >= MAX_NFTLS || dev != (unit << NFTL_PARTN_BITS)) { printk("nftl: bad minor number: device = %s\n", kdevname(req->rq_dev)); res = 0; /* fail */ @@ -906,8 +886,6 @@ goto repeat; } - block += part_table[dev].start_sect; - if (req->cmd == READ) { DEBUG(MTD_DEBUG_LEVEL2, "NFTL read request of 0x%x sectors @ %x " "(req->nr_sectors == %lx)\n", nsect, block, req->nr_sectors); @@ -953,7 +931,7 @@ } repeat: DEBUG(MTD_DEBUG_LEVEL3, "end_request(%d)\n", res); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); end_request(res); } } diff -urN linux-2.5.1-pre1/drivers/net/3c509.c linux/drivers/net/3c509.c --- linux-2.5.1-pre1/drivers/net/3c509.c Sun Sep 30 12:26:06 2001 +++ linux/drivers/net/3c509.c Sat Dec 1 00:37:05 2001 @@ -175,7 +175,7 @@ }; #endif /* CONFIG_MCA */ -#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE) +#ifdef __ISAPNP__ static struct isapnp_device_id el3_isapnp_adapters[] __initdata = { { ISAPNP_ANY_ID, ISAPNP_ANY_ID, ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5090), @@ -206,7 +206,7 @@ static u16 el3_isapnp_phys_addr[8][3]; -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ +#endif /* __ISAPNP__ */ static int nopnp; int __init el3_probe(struct net_device *dev) @@ -217,9 +217,9 @@ u16 phys_addr[3]; static int current_tag; int mca_slot = -1; -#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE) +#ifdef __ISAPNP__ static int pnp_cards; -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ +#endif /* __ISAPNP__ */ if (dev) SET_MODULE_OWNER(dev); @@ -323,7 +323,7 @@ } #endif /* CONFIG_MCA */ -#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE) +#ifdef __ISAPNP__ if (nopnp == 1) goto no_pnp; @@ -359,7 +359,7 @@ } } no_pnp: -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ +#endif /* __ISAPNP__ */ /* Select an open I/O location at 0x1*0 to do contention select. */ for ( ; id_port < 0x200; id_port += 0x10) { @@ -405,7 +405,7 @@ phys_addr[i] = htons(id_read_eeprom(i)); } -#if defined(CONFIG_ISAPNP) || defined(CONFIG_ISAPNP_MODULE) +#ifdef __ISAPNP__ if (nopnp == 0) { /* The ISA PnP 3c509 cards respond to the ID sequence. This check is needed in order not to register them twice. */ @@ -425,7 +425,7 @@ } } } -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ +#endif /* __ISAPNP__ */ { unsigned int iobase = id_read_eeprom(8); @@ -1017,10 +1017,10 @@ MODULE_PARM_DESC(irq, "EtherLink III IRQ number(s) (assigned)"); MODULE_PARM_DESC(xcvr,"EtherLink III tranceiver(s) (0=internal, 1=external)"); MODULE_PARM_DESC(max_interrupt_work, "EtherLink III maximum events handled per interrupt"); -#ifdef CONFIG_ISAPNP +#ifdef __ISAPNP__ MODULE_PARM(nopnp, "i"); MODULE_PARM_DESC(nopnp, "EtherLink III disable ISA PnP support (0-1)"); -#endif /* CONFIG_ISAPNP */ +#endif /* __ISAPNP__ */ int init_module(void) diff -urN linux-2.5.1-pre1/drivers/net/3c515.c linux/drivers/net/3c515.c --- linux-2.5.1-pre1/drivers/net/3c515.c Sun Sep 30 12:26:06 2001 +++ linux/drivers/net/3c515.c Sat Dec 1 00:37:05 2001 @@ -359,7 +359,7 @@ { "Default", 0, 0xFF, XCVR_10baseT, 10000}, }; -#ifdef CONFIG_ISAPNP +#ifdef __ISAPNP__ static struct isapnp_device_id corkscrew_isapnp_adapters[] = { { ISAPNP_ANY_ID, ISAPNP_ANY_ID, ISAPNP_VENDOR('T', 'C', 'M'), ISAPNP_FUNCTION(0x5051), @@ -372,7 +372,7 @@ static int corkscrew_isapnp_phys_addr[3]; static int nopnp; -#endif /* CONFIG_ISAPNP */ +#endif /* __ISAPNP__ */ static int corkscrew_scan(struct net_device *dev); static struct net_device *corkscrew_found_device(struct net_device *dev, @@ -450,12 +450,12 @@ { int cards_found = 0; static int ioaddr; -#ifdef CONFIG_ISAPNP +#ifdef __ISAPNP__ short i; static int pnp_cards; #endif -#ifdef CONFIG_ISAPNP +#ifdef __ISAPNP__ if(nopnp == 1) goto no_pnp; for(i=0; corkscrew_isapnp_adapters[i].vendor != 0; i++) { @@ -513,17 +513,17 @@ } } no_pnp: -#endif /* CONFIG_ISAPNP */ +#endif /* __ISAPNP__ */ /* Check all locations on the ISA bus -- evil! */ for (ioaddr = 0x100; ioaddr < 0x400; ioaddr += 0x20) { int irq; -#ifdef CONFIG_ISAPNP +#ifdef __ISAPNP__ /* Make sure this was not already picked up by isapnp */ if(ioaddr == corkscrew_isapnp_phys_addr[0]) continue; if(ioaddr == corkscrew_isapnp_phys_addr[1]) continue; if(ioaddr == corkscrew_isapnp_phys_addr[2]) continue; -#endif /* CONFIG_ISAPNP */ +#endif /* __ISAPNP__ */ if (check_region(ioaddr, CORKSCREW_TOTAL_SIZE)) continue; /* Check the resource configuration for a matching ioaddr. */ diff -urN linux-2.5.1-pre1/drivers/net/hamradio/baycom_epp.c linux/drivers/net/hamradio/baycom_epp.c --- linux-2.5.1-pre1/drivers/net/hamradio/baycom_epp.c Mon Sep 10 09:04:53 2001 +++ linux/drivers/net/hamradio/baycom_epp.c Sat Dec 1 00:37:05 2001 @@ -807,10 +807,11 @@ /* --------------------------------------------------------------------- */ #ifdef __i386__ +#include #define GETTICK(x) \ ({ \ if (cpu_has_tsc) \ - __asm__ __volatile__("rdtsc" : "=a" (x) : : "dx");\ + rdtscl(x); \ }) #else /* __i386__ */ #define GETTICK(x) diff -urN linux-2.5.1-pre1/drivers/net/smc-ultra.c linux/drivers/net/smc-ultra.c --- linux-2.5.1-pre1/drivers/net/smc-ultra.c Sun Sep 30 12:26:07 2001 +++ linux/drivers/net/smc-ultra.c Sat Dec 1 00:37:05 2001 @@ -80,7 +80,7 @@ int ultra_probe(struct net_device *dev); static int ultra_probe1(struct net_device *dev, int ioaddr); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static int ultra_probe_isapnp(struct net_device *dev); #endif @@ -100,7 +100,7 @@ const unsigned char *buf, const int start_page); static int ultra_close_card(struct net_device *dev); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static struct isapnp_device_id ultra_device_ids[] __initdata = { { ISAPNP_VENDOR('S','M','C'), ISAPNP_FUNCTION(0x8416), ISAPNP_VENDOR('S','M','C'), ISAPNP_FUNCTION(0x8416), @@ -140,7 +140,7 @@ else if (base_addr != 0) /* Don't probe at all. */ return -ENXIO; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ /* Look for any installed ISAPnP cards */ if (isapnp_present() && (ultra_probe_isapnp(dev) == 0)) return 0; @@ -279,7 +279,7 @@ return retval; } -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static int __init ultra_probe_isapnp(struct net_device *dev) { int i; @@ -544,7 +544,7 @@ /* NB: ultra_close_card() does free_irq */ int ioaddr = dev->base_addr - ULTRA_NIC_OFFSET; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ struct pci_dev *idev = (struct pci_dev *)ei_status.priv; if (idev) idev->deactivate(idev); diff -urN linux-2.5.1-pre1/drivers/net/tlan.c linux/drivers/net/tlan.c --- linux-2.5.1-pre1/drivers/net/tlan.c Sun Sep 30 12:26:08 2001 +++ linux/drivers/net/tlan.c Sat Dec 1 00:37:05 2001 @@ -2265,8 +2265,8 @@ printk("TLAN: Partner capability: "); for (i = 5; i <= 10; i++) if (partner & (1<base_addr, TLAN_LED_REG, TLAN_LED_LINK ); diff -urN linux-2.5.1-pre1/drivers/pci/setup-res.c linux/drivers/pci/setup-res.c --- linux-2.5.1-pre1/drivers/pci/setup-res.c Thu Oct 4 18:47:08 2001 +++ linux/drivers/pci/setup-res.c Sat Dec 1 00:37:05 2001 @@ -219,9 +219,8 @@ cmd |= PCI_COMMAND_IO; } - /* ??? Always turn on bus mastering. If the device doesn't support - it, the bit will go into the bucket. */ - cmd |= PCI_COMMAND_MASTER; + /* Do not enable bus mastering. A device could corrupt + * system memory by DMAing before a driver is ready for it. */ /* Set the cache line and default latency (32). */ pci_write_config_word(dev, PCI_CACHE_LINE_SIZE, diff -urN linux-2.5.1-pre1/drivers/pcmcia/i82365.c linux/drivers/pcmcia/i82365.c --- linux-2.5.1-pre1/drivers/pcmcia/i82365.c Mon Nov 12 09:39:01 2001 +++ linux/drivers/pcmcia/i82365.c Sat Dec 1 00:37:05 2001 @@ -813,11 +813,7 @@ #ifdef CONFIG_ISA -#if defined(CONFIG_ISAPNP) || (defined(CONFIG_ISAPNP_MODULE) && defined(MODULE)) -#define I82365_ISAPNP -#endif - -#ifdef I82365_ISAPNP +#ifdef __ISAPNP__ static struct isapnp_device_id id_table[] __initdata = { { ISAPNP_ANY_ID, ISAPNP_ANY_ID, ISAPNP_VENDOR('P', 'N', 'P'), ISAPNP_FUNCTION(0x0e00), (unsigned long) "Intel 82365-Compatible" }, @@ -836,7 +832,7 @@ { int i, j, sock, k, ns, id; ioaddr_t port; -#ifdef I82365_ISAPNP +#ifdef __ISAPNP__ struct isapnp_device_id *devid; struct pci_dev *dev; @@ -1647,7 +1643,7 @@ i365_set(i, I365_CSCINT, 0); release_region(socket[i].ioaddr, 2); } -#if defined(CONFIG_ISA) && defined(I82365_ISAPNP) +#if defined(CONFIG_ISA) && defined(__ISAPNP__) if (i82365_pnpdev && i82365_pnpdev->deactivate) i82365_pnpdev->deactivate(i82365_pnpdev); #endif diff -urN linux-2.5.1-pre1/drivers/s390/block/dasd.c linux/drivers/s390/block/dasd.c --- linux-2.5.1-pre1/drivers/s390/block/dasd.c Fri Nov 9 14:05:02 2001 +++ linux/drivers/s390/block/dasd.c Sat Dec 1 00:37:05 2001 @@ -730,13 +730,6 @@ goto out_hardsect_size; memset (hardsect_size[major], 0, (1 << MINORBITS) * sizeof (int)); - /* init max_sectors */ - max_sectors[major] = - (int *) kmalloc ((1 << MINORBITS) * sizeof (int), GFP_ATOMIC); - if (!max_sectors[major]) - goto out_max_sectors; - memset (max_sectors[major], 0, (1 << MINORBITS) * sizeof (int)); - /* finally do the gendisk stuff */ major_info->gendisk.part = kmalloc ((1 << MINORBITS) * sizeof (struct hd_struct), @@ -755,10 +748,6 @@ /* error handling - free the prior allocated memory */ out_gendisk: - kfree (max_sectors[major]); - max_sectors[major] = NULL; - - out_max_sectors: kfree (hardsect_size[major]); hardsect_size[major] = NULL; @@ -825,12 +814,8 @@ kfree (blk_size[major]); kfree (blksize_size[major]); kfree (hardsect_size[major]); - kfree (max_sectors[major]); - blk_size[major] = NULL; - blksize_size[major] = NULL; - hardsect_size[major] = NULL; - max_sectors[major] = NULL; + blk_clear(major); rc = devfs_unregister_blkdev (major, DASD_NAME); if (rc < 0) { @@ -1704,10 +1689,6 @@ dasd_end_request (req, 0); dasd_dequeue_request (queue,req); } else { - /* relocate request according to partition table */ - req->sector += - device->major_info->gendisk. - part[MINOR (req->rq_dev)].start_sect; cqr = device->discipline->build_cp_from_req (device, req); if (cqr == NULL) { @@ -1716,10 +1697,7 @@ "on request %p\n", device->devinfo.devno, req); - /* revert relocation of request */ - req->sector -= - device->major_info->gendisk. - part[MINOR (req->rq_dev)].start_sect; + break; /* terminate request queue loop */ } @@ -1769,10 +1747,10 @@ dasd_run_bh (dasd_device_t * device) { long flags; - spin_lock_irqsave (&io_request_lock, flags); + spin_lock_irqsave (&device->request_queue.queue_lock, flags); atomic_set (&device->bh_scheduled, 0); dasd_process_queues (device); - spin_unlock_irqrestore (&io_request_lock, flags); + spin_unlock_irqrestore (&device->request_queue.queue_lock, flags); } /* @@ -2468,14 +2446,12 @@ dasd_info.chanq_len = 0; if (device->request_queue->request_fn) { struct list_head *l; + request_queue_t *q = drive->request_queue; ccw_req_t *cqr = device->queue.head; - spin_lock_irqsave (&io_request_lock, flags); - list_for_each (l, - &device->request_queue-> - queue_head) { + spin_lock_irqsave (&q->queue_lock, flags); + list_for_each (l, q->queue_head, queue_head) dasd_info.req_queue_len++; - } - spin_unlock_irqrestore (&io_request_lock, + spin_unlock_irqrestore (&q->queue_lock, flags); s390irq_spin_lock_irqsave (device->devinfo.irq, flags); @@ -2668,7 +2644,7 @@ /* SECTION: Management of device list */ int -dasd_fillgeo(int kdev,struct hd_geometry *geo) +dasd_fillgeo(kdev_t kdev,struct hd_geometry *geo) { dasd_device_t *device = dasd_device_from_kdev (kdev); @@ -2679,8 +2655,7 @@ return -EINVAL; device->discipline->fill_geometry (device, geo); - geo->start = device->major_info->gendisk.part[MINOR(kdev)].start_sect - >> device->sizes.s2b_shift;; + geo->start = get_start_sect(kdev); return 0; } @@ -3365,6 +3340,12 @@ int major = MAJOR(device->kdev); int minor = MINOR(device->kdev); + device->request_queue = kmalloc(sizeof(request_queue_t),GFP_KERNEL); + device->request_queue->queuedata = device; + blk_init_queue (device->request_queue, do_dasd_request); + blk_queue_headactive (device->request_queue, 0); + elevator_init (&(device->request_queue->elevator),ELEVATOR_NOOP); + for (i = 0; i < (1 << DASD_PARTN_BITS); i++) { if (i == 0) device->major_info->gendisk.sizes[minor] = @@ -3374,17 +3355,11 @@ device->major_info->gendisk.sizes[minor + i] = 0; hardsect_size[major][minor + i] = device->sizes.bp_block; blksize_size[major][minor + i] = device->sizes.bp_block; - max_sectors[major][minor + i] = - device->discipline->max_blocks << - device->sizes.s2b_shift; + blk_queue_max_sectors(device->request_queue, + device->discipline->max_blocks << device->sizes.s2b_shift); device->major_info->gendisk.part[minor+i].start_sect = 0; device->major_info->gendisk.part[minor+i].nr_sects = 0; } - device->request_queue = kmalloc(sizeof(request_queue_t),GFP_KERNEL); - device->request_queue->queuedata = device; - blk_init_queue (device->request_queue, do_dasd_request); - blk_queue_headactive (device->request_queue, 0); - elevator_init (&(device->request_queue->elevator),ELEVATOR_NOOP); return rc; } @@ -3411,7 +3386,6 @@ device->major_info->gendisk.sizes[minor + i] = 0; hardsect_size[major][minor + i] = 0; blksize_size[major][minor + i] = 0; - max_sectors[major][minor + i] = 0; } if (device->request_queue) { blk_cleanup_queue (device->request_queue); diff -urN linux-2.5.1-pre1/drivers/s390/block/xpram.c linux/drivers/s390/block/xpram.c --- linux-2.5.1-pre1/drivers/s390/block/xpram.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/s390/block/xpram.c Sat Dec 1 00:37:05 2001 @@ -1213,8 +1213,7 @@ { int i; - /* first of all, flush it all and reset all the data structures */ - + /* first of all, flush it all and reset all the data structures */ for (i=0; irequest_queue; + + spin_lock_irqsave (&q->queue_lock, flags_ior); s390irq_spin_lock_irqsave(ti->devinfo.irq,flags_390irq); atomic_set(&ti->bh_scheduled,0); tapeblock_exec_IO(ti); s390irq_spin_unlock_irqrestore(ti->devinfo.irq,flags_390irq); - spin_unlock_irqrestore (&io_request_lock, flags_ior); + spin_unlock_irqrestore (&q->queue_lock, flags_ior); } void diff -urN linux-2.5.1-pre1/drivers/scsi/53c7,8xx.c linux/drivers/scsi/53c7,8xx.c --- linux-2.5.1-pre1/drivers/scsi/53c7,8xx.c Thu Oct 25 13:53:48 2001 +++ linux/drivers/scsi/53c7,8xx.c Sat Dec 1 00:37:05 2001 @@ -1427,13 +1427,14 @@ return -1; } -#ifdef __powerpc__ if ( ! (command & PCI_COMMAND_MASTER)) { - printk("SCSI: PCI Master Bit has not been set. Setting...\n"); + printk(KERN_INFO "SCSI: PCI Master Bit has not been set. Setting...\n"); command |= PCI_COMMAND_MASTER|PCI_COMMAND_IO; pci_write_config_word(pdev, PCI_COMMAND, command); + } - if (io_port >= 0x10000000 && is_prep ) { +#ifdef __powerpc__ + if (io_port >= 0x10000000 && is_prep ) { /* Mapping on PowerPC can't handle this! */ unsigned long new_io_port; new_io_port = (io_port & 0x00FFFFFF) | 0x01000000; @@ -1441,7 +1442,6 @@ io_port = new_io_port; pci_write_config_dword(pdev, PCI_BASE_ADDRESS_0, io_port); pdev->base_address[0] = io_port; - } } #endif diff -urN linux-2.5.1-pre1/drivers/scsi/Config.in linux/drivers/scsi/Config.in --- linux-2.5.1-pre1/drivers/scsi/Config.in Fri Nov 9 15:22:54 2001 +++ linux/drivers/scsi/Config.in Sat Dec 1 00:37:05 2001 @@ -20,10 +20,6 @@ comment 'Some SCSI devices (e.g. CD jukebox) support multiple LUNs' -#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' Enable extra checks in new queueing code' CONFIG_SCSI_DEBUG_QUEUES -#fi - bool ' Probe all LUNs on each SCSI device' CONFIG_SCSI_MULTI_LUN bool ' Verbose SCSI error reporting (kernel size +=12K)' CONFIG_SCSI_CONSTANTS diff -urN linux-2.5.1-pre1/drivers/scsi/aic7xxx/aic7xxx_linux.c linux/drivers/scsi/aic7xxx/aic7xxx_linux.c --- linux-2.5.1-pre1/drivers/scsi/aic7xxx/aic7xxx_linux.c Wed Nov 21 14:05:29 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_linux.c Sat Dec 1 00:37:05 2001 @@ -1123,9 +1123,9 @@ if (host == NULL) return (ENOMEM); - ahc_lock(ahc, &s); *((struct ahc_softc **)host->hostdata) = ahc; ahc->platform_data->host = host; + ahc_lock(ahc, &s); host->can_queue = AHC_MAX_QUEUE; host->cmd_per_lun = 2; host->sg_tablesize = AHC_NSEG; @@ -1272,7 +1272,9 @@ TAILQ_INIT(&ahc->platform_data->completeq); TAILQ_INIT(&ahc->platform_data->device_runq); ahc->platform_data->hw_dma_mask = 0xFFFFFFFF; - ahc_lockinit(ahc); + /* + * ahc_lockinit done by scsi_register, as we don't own that lock + */ ahc_done_lockinit(ahc); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) init_MUTEX_LOCKED(&ahc->platform_data->eh_sem); @@ -1530,22 +1532,17 @@ int ahc_linux_queue(Scsi_Cmnd * cmd, void (*scsi_done) (Scsi_Cmnd *)) { - struct ahc_softc *ahc; + struct ahc_softc *ahc = *(struct ahc_softc **)cmd->host->hostdata; struct ahc_linux_device *dev; - u_long flags; - - ahc = *(struct ahc_softc **)cmd->host->hostdata; /* * Save the callback on completion function. */ cmd->scsi_done = scsi_done; - ahc_lock(ahc, &flags); dev = ahc_linux_get_device(ahc, cmd->channel, cmd->target, cmd->lun, /*alloc*/TRUE); if (dev == NULL) { - ahc_unlock(ahc, &flags); printf("aic7xxx_linux_queue: Unable to allocate device!\n"); return (-ENOMEM); } @@ -1556,7 +1553,6 @@ dev->flags |= AHC_DEV_ON_RUN_LIST; ahc_linux_run_device_queues(ahc); } - ahc_unlock(ahc, &flags); return (0); } @@ -2408,12 +2404,10 @@ flag == SCB_ABORT ? "n ABORT" : " TARGET RESET"); /* - * It is a bug that the upper layer takes - * this lock just prior to calling us. + * we used to drop io_request_lock and lock ahc from here, but + * now that the global lock is gone the upper layer have already + * done what ahc_lock would do /jens */ - spin_unlock_irq(&io_request_lock); - - ahc_lock(ahc, &s); /* * First determine if we currently own this command. @@ -2661,7 +2655,7 @@ ahc_unlock(ahc, &s); if (acmd != NULL) ahc_linux_run_complete_queue(ahc, acmd); - spin_lock_irq(&io_request_lock); + ahc_lock(ahc, &s); return (retval); } @@ -2704,14 +2698,7 @@ u_long s; int found; - /* - * It is a bug that the upper layer takes - * this lock just prior to calling us. - */ - spin_unlock_irq(&io_request_lock); - ahc = *(struct ahc_softc **)cmd->host->hostdata; - ahc_lock(ahc, &s); found = ahc_reset_channel(ahc, cmd->channel + 'A', /*initiate reset*/TRUE); acmd = TAILQ_FIRST(&ahc->platform_data->completeq); @@ -2724,7 +2711,7 @@ if (acmd != NULL) ahc_linux_run_complete_queue(ahc, acmd); - spin_lock_irq(&io_request_lock); + ahc_lock(ahc, &s); return SUCCESS; } diff -urN linux-2.5.1-pre1/drivers/scsi/aic7xxx/aic7xxx_linux_host.h linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h --- linux-2.5.1-pre1/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Thu Oct 25 13:53:49 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Sat Dec 1 00:37:05 2001 @@ -89,7 +89,8 @@ present: 0, /* number of 7xxx's present */\ unchecked_isa_dma: 0, /* no memory DMA restrictions */\ use_clustering: ENABLE_CLUSTERING, \ - use_new_eh_code: 1 \ + use_new_eh_code: 1, \ + highmem_io: 1 \ } #endif /* _AIC7XXX_LINUX_HOST_H_ */ diff -urN linux-2.5.1-pre1/drivers/scsi/aic7xxx/aic7xxx_osm.h linux/drivers/scsi/aic7xxx/aic7xxx_osm.h --- linux-2.5.1-pre1/drivers/scsi/aic7xxx/aic7xxx_osm.h Thu Oct 25 13:53:49 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_osm.h Sat Dec 1 00:37:05 2001 @@ -575,9 +575,6 @@ TAILQ_HEAD(, ahc_linux_device) device_runq; struct ahc_completeq completeq; -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,1,0) - spinlock_t spin_lock; -#endif u_int qfrozen; struct timer_list reset_timer; struct semaphore eh_sem; @@ -716,20 +713,20 @@ static __inline void ahc_lockinit(struct ahc_softc *ahc) { - spin_lock_init(&ahc->platform_data->spin_lock); + spin_lock_init(&ahc->platform_data->host->host_lock); } static __inline void ahc_lock(struct ahc_softc *ahc, unsigned long *flags) { *flags = 0; - spin_lock_irqsave(&ahc->platform_data->spin_lock, *flags); + spin_lock_irqsave(&ahc->platform_data->host->host_lock, *flags); } static __inline void ahc_unlock(struct ahc_softc *ahc, unsigned long *flags) { - spin_unlock_irqrestore(&ahc->platform_data->spin_lock, *flags); + spin_unlock_irqrestore(&ahc->platform_data->host->host_lock, *flags); } static __inline void @@ -741,14 +738,18 @@ static __inline void ahc_done_lock(struct ahc_softc *ahc, unsigned long *flags) { + struct Scsi_Host *host = ahc->platform_data->host; + *flags = 0; - spin_lock_irqsave(&io_request_lock, *flags); + spin_lock_irqsave(&host->host_lock, *flags); } static __inline void ahc_done_unlock(struct ahc_softc *ahc, unsigned long *flags) { - spin_unlock_irqrestore(&io_request_lock, *flags); + struct Scsi_Host *host = ahc->platform_data->host; + + spin_unlock_irqrestore(&host->host_lock, *flags); } #else /* LINUX_VERSION_CODE < KERNEL_VERSION(2,1,0) */ diff -urN linux-2.5.1-pre1/drivers/scsi/aic7xxx_old.c linux/drivers/scsi/aic7xxx_old.c --- linux-2.5.1-pre1/drivers/scsi/aic7xxx_old.c Wed Nov 21 14:05:29 2001 +++ linux/drivers/scsi/aic7xxx_old.c Sat Dec 1 00:37:05 2001 @@ -4127,7 +4127,7 @@ unsigned long cpu_flags = 0; struct aic7xxx_scb *scb; - spin_lock_irqsave(&io_request_lock, cpu_flags); + spin_lock_irqsave(&p->host->host_lock, cpu_flags); p->dev_timer_active &= ~(0x01 << MAX_TARGETS); if ( (p->dev_timer_active & (0x01 << p->scsi_id)) && time_after_eq(jiffies, p->dev_expires[p->scsi_id]) ) @@ -4184,7 +4184,7 @@ } aic7xxx_run_waiting_queues(p); - spin_unlock_irqrestore(&io_request_lock, cpu_flags); + spin_unlock_irqrestore(&p->host->host_lock, cpu_flags); } /*+F************************************************************************* @@ -7011,7 +7011,7 @@ p = (struct aic7xxx_host *)dev_id; if(!p) return; - spin_lock_irqsave(&io_request_lock, cpu_flags); + spin_lock_irqsave(&p->host->host_lock, cpu_flags); p->flags |= AHC_IN_ISR; do { @@ -7020,7 +7020,7 @@ aic7xxx_done_cmds_complete(p); aic7xxx_run_waiting_queues(p); p->flags &= ~AHC_IN_ISR; - spin_unlock_irqrestore(&io_request_lock, cpu_flags); + spin_unlock_irqrestore(&p->host->host_lock, cpu_flags); } /*+F************************************************************************* @@ -11148,7 +11148,7 @@ disable_irq(p->irq); aic7xxx_print_card(p); aic7xxx_print_scratch_ram(p); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&p->host->host_lock); for(;;) barrier(); } diff -urN linux-2.5.1-pre1/drivers/scsi/hosts.c linux/drivers/scsi/hosts.c --- linux-2.5.1-pre1/drivers/scsi/hosts.c Thu Jul 5 11:28:17 2001 +++ linux/drivers/scsi/hosts.c Sat Dec 1 00:37:05 2001 @@ -129,7 +129,7 @@ * once we are 100% sure that we want to use this host adapter - it is a * pain to reverse this, so we try to avoid it */ - +extern int blk_nohighio; struct Scsi_Host * scsi_register(Scsi_Host_Template * tpnt, int j){ struct Scsi_Host * retval, *shpnt, *o_shp; Scsi_Host_Name *shn, *shn2; @@ -160,6 +160,7 @@ break; } } + spin_lock_init(&retval->host_lock); atomic_set(&retval->host_active,0); retval->host_busy = 0; retval->host_failed = 0; @@ -235,6 +236,8 @@ retval->cmd_per_lun = tpnt->cmd_per_lun; retval->unchecked_isa_dma = tpnt->unchecked_isa_dma; retval->use_clustering = tpnt->use_clustering; + if (!blk_nohighio) + retval->highmem_io = tpnt->highmem_io; retval->select_queue_depths = tpnt->select_queue_depths; retval->max_sectors = tpnt->max_sectors; diff -urN linux-2.5.1-pre1/drivers/scsi/hosts.h linux/drivers/scsi/hosts.h --- linux-2.5.1-pre1/drivers/scsi/hosts.h Thu Nov 22 11:49:15 2001 +++ linux/drivers/scsi/hosts.h Sat Dec 1 00:37:05 2001 @@ -291,6 +291,8 @@ */ unsigned emulated:1; + unsigned highmem_io:1; + /* * Name of proc directory */ @@ -317,6 +319,7 @@ struct Scsi_Host * next; Scsi_Device * host_queue; + spinlock_t host_lock; struct task_struct * ehandler; /* Error recovery thread. */ struct semaphore * eh_wait; /* The error recovery thread waits on @@ -390,6 +393,7 @@ unsigned in_recovery:1; unsigned unchecked_isa_dma:1; unsigned use_clustering:1; + unsigned highmem_io:1; /* * True if this host was loaded as a loadable module */ diff -urN linux-2.5.1-pre1/drivers/scsi/ide-scsi.c linux/drivers/scsi/ide-scsi.c --- linux-2.5.1-pre1/drivers/scsi/ide-scsi.c Sun Sep 30 12:26:07 2001 +++ linux/drivers/scsi/ide-scsi.c Sat Dec 1 00:37:05 2001 @@ -235,13 +235,13 @@ kfree(atapi_buf); } -static inline void idescsi_free_bh (struct buffer_head *bh) +static inline void idescsi_free_bio (struct bio *bio) { - struct buffer_head *bhp; + struct bio *bhp; - while (bh) { - bhp = bh; - bh = bh->b_reqnext; + while (bio) { + bhp = bio; + bio = bio->bi_next; kfree (bhp); } } @@ -263,6 +263,7 @@ struct request *rq = hwgroup->rq; idescsi_pc_t *pc = (idescsi_pc_t *) rq->buffer; int log = test_bit(IDESCSI_LOG_CMD, &scsi->log); + struct Scsi_Host *host; u8 *scsi_buf; unsigned long flags; @@ -291,10 +292,11 @@ } else printk("\n"); } } - spin_lock_irqsave(&io_request_lock,flags); + host = pc->scsi_cmd->host; + spin_lock_irqsave(&host->host_lock, flags); pc->done(pc->scsi_cmd); - spin_unlock_irqrestore(&io_request_lock,flags); - idescsi_free_bh (rq->bh); + spin_unlock_irqrestore(&host->host_lock, flags); + idescsi_free_bio (rq->bio); kfree(pc); kfree(rq); scsi->pc = NULL; } @@ -427,7 +429,7 @@ pc->current_position=pc->buffer; bcount = IDE_MIN (pc->request_transfer, 63 * 1024); /* Request to transfer the entire buffer at once */ - if (drive->using_dma && rq->bh) + if (drive->using_dma && rq->special) dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive); SELECT_DRIVE(HWIF(drive), drive); @@ -653,25 +655,24 @@ return -EINVAL; } -static inline struct buffer_head *idescsi_kmalloc_bh (int count) +static inline struct bio *idescsi_kmalloc_bio (int count) { - struct buffer_head *bh, *bhp, *first_bh; + struct bio *bh, *bhp, *first_bh; - if ((first_bh = bhp = bh = kmalloc (sizeof(struct buffer_head), GFP_ATOMIC)) == NULL) + if ((first_bh = bhp = bh = bio_alloc(GFP_ATOMIC, 1)) == NULL) goto abort; - memset (bh, 0, sizeof (struct buffer_head)); - bh->b_reqnext = NULL; + memset (bh, 0, sizeof (struct bio)); while (--count) { - if ((bh = kmalloc (sizeof(struct buffer_head), GFP_ATOMIC)) == NULL) + if ((bh = bio_alloc(GFP_ATOMIC, 1)) == NULL) goto abort; - memset (bh, 0, sizeof (struct buffer_head)); - bhp->b_reqnext = bh; + memset (bh, 0, sizeof (struct bio)); + bhp->bi_next = bh; bhp = bh; - bh->b_reqnext = NULL; + bh->bi_next = NULL; } return first_bh; abort: - idescsi_free_bh (first_bh); + idescsi_free_bio (first_bh); return NULL; } @@ -689,9 +690,9 @@ } } -static inline struct buffer_head *idescsi_dma_bh (ide_drive_t *drive, idescsi_pc_t *pc) +static inline struct bio *idescsi_dma_bio(ide_drive_t *drive, idescsi_pc_t *pc) { - struct buffer_head *bh = NULL, *first_bh = NULL; + struct bio *bh = NULL, *first_bh = NULL; int segments = pc->scsi_cmd->use_sg; struct scatterlist *sg = pc->scsi_cmd->request_buffer; @@ -700,25 +701,27 @@ if (idescsi_set_direction(pc)) return NULL; if (segments) { - if ((first_bh = bh = idescsi_kmalloc_bh (segments)) == NULL) + if ((first_bh = bh = idescsi_kmalloc_bio (segments)) == NULL) return NULL; #if IDESCSI_DEBUG_LOG printk ("ide-scsi: %s: building DMA table, %d segments, %dkB total\n", drive->name, segments, pc->request_transfer >> 10); #endif /* IDESCSI_DEBUG_LOG */ while (segments--) { - bh->b_data = sg->address; - bh->b_size = sg->length; - bh = bh->b_reqnext; + bh->bi_io_vec.bv_page = sg->page; + bh->bi_io_vec.bv_len = sg->length; + bh->bi_io_vec.bv_offset = sg->offset; + bh = bh->bi_next; sg++; } } else { - if ((first_bh = bh = idescsi_kmalloc_bh (1)) == NULL) + if ((first_bh = bh = idescsi_kmalloc_bio (1)) == NULL) return NULL; #if IDESCSI_DEBUG_LOG printk ("ide-scsi: %s: building DMA table for a single buffer (%dkB)\n", drive->name, pc->request_transfer >> 10); #endif /* IDESCSI_DEBUG_LOG */ - bh->b_data = pc->scsi_cmd->request_buffer; - bh->b_size = pc->request_transfer; + bh->bi_io_vec.bv_page = virt_to_page(pc->scsi_cmd->request_buffer); + bh->bi_io_vec.bv_len = pc->request_transfer; + bh->bi_io_vec.bv_offset = (unsigned long) pc->scsi_cmd->request_buffer & ~PAGE_MASK; } return first_bh; } @@ -783,11 +786,11 @@ ide_init_drive_cmd (rq); rq->buffer = (char *) pc; - rq->bh = idescsi_dma_bh (drive, pc); + rq->bio = idescsi_dma_bio (drive, pc); rq->cmd = IDESCSI_PC_RQ; - spin_unlock(&io_request_lock); + spin_unlock(&cmd->host->host_lock); (void) ide_do_drive_cmd (drive, rq, ide_end); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&cmd->host->host_lock); return 0; abort: if (pc) kfree (pc); diff -urN linux-2.5.1-pre1/drivers/scsi/megaraid.c linux/drivers/scsi/megaraid.c --- linux-2.5.1-pre1/drivers/scsi/megaraid.c Thu Oct 25 13:53:51 2001 +++ linux/drivers/scsi/megaraid.c Sat Dec 1 00:37:05 2001 @@ -586,8 +586,10 @@ #define DRIVER_LOCK(p) #define DRIVER_UNLOCK(p) #define IO_LOCK_T unsigned long io_flags = 0 -#define IO_LOCK spin_lock_irqsave(&io_request_lock,io_flags); -#define IO_UNLOCK spin_unlock_irqrestore(&io_request_lock,io_flags); +#define IO_LOCK(host) spin_lock_irqsave(&(host)->host_lock,io_flags) +#define IO_UNLOCK(host) spin_unlock_irqrestore(&(host)->host_lock,io_flags) +#define IO_LOCK_IRQ(host) spin_lock_irq(&(host)->host_lock) +#define IO_UNLOCK_IRQ(host) spin_unlock_irq(&(host)->host_lock) #define queue_task_irq(a,b) queue_task(a,b) #define queue_task_irq_off(a,b) queue_task(a,b) @@ -612,8 +614,8 @@ #define DRIVER_LOCK(p) #define DRIVER_UNLOCK(p) #define IO_LOCK_T unsigned long io_flags = 0 -#define IO_LOCK spin_lock_irqsave(&io_request_lock,io_flags); -#define IO_UNLOCK spin_unlock_irqrestore(&io_request_lock,io_flags); +#define IO_LOCK(host) spin_lock_irqsave(&io_request_lock,io_flags); +#define IO_UNLOCK(host) spin_unlock_irqrestore(&io_request_lock,io_flags); #define pci_free_consistent(a,b,c,d) #define pci_unmap_single(a,b,c,d) @@ -2101,7 +2103,7 @@ for (idx = 0; idx < MAX_FIRMWARE_STATUS; idx++) completed[idx] = 0; - IO_LOCK; + IO_LOCK(megaCfg->host); megaCfg->nInterrupts++; qCnt = 0xff; @@ -2220,7 +2222,7 @@ megaCfg->flag &= ~IN_ISR; /* Loop through any pending requests */ mega_runpendq (megaCfg); - IO_UNLOCK; + IO_UNLOCK(megaCfg->host); } @@ -3032,9 +3034,7 @@ sizeof (mega_mailbox64), &(megaCfg->dma_handle64)); - mega_register_mailbox (megaCfg, - virt_to_bus ((void *) megaCfg-> - mailbox64ptr)); + mega_register_mailbox (megaCfg, megaCfg->dma_handle64); #else mega_register_mailbox (megaCfg, virt_to_bus ((void *) &megaCfg-> @@ -3800,7 +3800,7 @@ if (pScb->SCpnt->cmnd[0] == M_RD_IOCTL_CMD_NEW) { init_MUTEX_LOCKED (&pScb->ioctl_sem); - spin_unlock_irq (&io_request_lock); + IO_UNLOCK_IRQ(megaCfg->host); down (&pScb->ioctl_sem); user_area = (char *)*((u32*)&pScb->SCpnt->cmnd[4]); if (copy_to_user @@ -3809,7 +3809,7 @@ ("megaraid: Error copying ioctl return value to user buffer.\n"); pScb->SCpnt->result = (DID_ERROR << 16); } - spin_lock_irq (&io_request_lock); + IO_LOCK_IRQ(megaCfg->host); DRIVER_LOCK (megaCfg); kfree (pScb->buff_ptr); pScb->buff_ptr = NULL; @@ -4744,10 +4744,10 @@ init_MUTEX_LOCKED(&mimd_ioctl_sem); - IO_LOCK; + IO_LOCK(shpnt); megaraid_queue(scsicmd, megadev_ioctl_done); - IO_UNLOCK; + IO_UNLOCK(shpnt); down(&mimd_ioctl_sem); @@ -4893,10 +4893,10 @@ init_MUTEX_LOCKED (&mimd_ioctl_sem); - IO_LOCK; + IO_LOCK(shpnt); megaraid_queue (scsicmd, megadev_ioctl_done); - IO_UNLOCK; + IO_UNLOCK(shpnt); down (&mimd_ioctl_sem); if (!scsicmd->result && outlen) { diff -urN linux-2.5.1-pre1/drivers/scsi/megaraid.h linux/drivers/scsi/megaraid.h --- linux-2.5.1-pre1/drivers/scsi/megaraid.h Thu Oct 25 13:53:51 2001 +++ linux/drivers/scsi/megaraid.h Sat Dec 1 00:37:05 2001 @@ -223,7 +223,8 @@ cmd_per_lun: MAX_CMD_PER_LUN, /* SCSI Commands per LUN */\ present: 0, /* Present */\ unchecked_isa_dma: 0, /* Default Unchecked ISA DMA */\ - use_clustering: ENABLE_CLUSTERING /* Enable Clustering */\ + use_clustering: ENABLE_CLUSTERING, /* Enable Clustering */\ + highmem_io: 1, \ } #endif diff -urN linux-2.5.1-pre1/drivers/scsi/qlogicfc.c linux/drivers/scsi/qlogicfc.c --- linux-2.5.1-pre1/drivers/scsi/qlogicfc.c Thu Oct 25 13:53:51 2001 +++ linux/drivers/scsi/qlogicfc.c Sat Dec 1 00:37:05 2001 @@ -2042,6 +2042,7 @@ return 1; } + pci_set_master(pdev); if (!(command & PCI_COMMAND_MASTER)) { printk("qlogicfc%d : bus mastering is disabled\n", hostdata->host_id); return 1; diff -urN linux-2.5.1-pre1/drivers/scsi/qlogicfc.h linux/drivers/scsi/qlogicfc.h --- linux-2.5.1-pre1/drivers/scsi/qlogicfc.h Sun Oct 21 10:36:54 2001 +++ linux/drivers/scsi/qlogicfc.h Sat Dec 1 00:37:05 2001 @@ -95,7 +95,8 @@ cmd_per_lun: QLOGICFC_CMD_PER_LUN, \ present: 0, \ unchecked_isa_dma: 0, \ - use_clustering: ENABLE_CLUSTERING \ + use_clustering: ENABLE_CLUSTERING, \ + highmem_io: 1 \ } #endif /* _QLOGICFC_H */ diff -urN linux-2.5.1-pre1/drivers/scsi/qlogicisp.c linux/drivers/scsi/qlogicisp.c --- linux-2.5.1-pre1/drivers/scsi/qlogicisp.c Thu Oct 25 13:53:51 2001 +++ linux/drivers/scsi/qlogicisp.c Sat Dec 1 00:37:05 2001 @@ -1403,11 +1403,6 @@ command &= ~PCI_COMMAND_MEMORY; #endif - if (!(command & PCI_COMMAND_MASTER)) { - printk("qlogicisp : bus mastering is disabled\n"); - return 1; - } - sh->io_port = io_base; if (!request_region(sh->io_port, 0xff, "qlogicisp")) { @@ -1471,6 +1466,8 @@ printk("qlogicisp : can't allocate request queue\n"); goto out_unmap; } + + pci_set_master(pdev); LEAVE("isp1020_init"); diff -urN linux-2.5.1-pre1/drivers/scsi/scsi.c linux/drivers/scsi/scsi.c --- linux-2.5.1-pre1/drivers/scsi/scsi.c Fri Nov 9 14:05:06 2001 +++ linux/drivers/scsi/scsi.c Sat Dec 1 00:37:05 2001 @@ -186,10 +186,22 @@ * handler in the list - ultimately they call scsi_request_fn * to do the dirty deed. */ -void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) { - blk_init_queue(&SDpnt->request_queue, scsi_request_fn); - blk_queue_headactive(&SDpnt->request_queue, 0); - SDpnt->request_queue.queuedata = (void *) SDpnt; +void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) +{ + char name[16]; + + request_queue_t *q = &SDpnt->request_queue; + + sprintf(name, "scsi%d%d%d", SDpnt->id, SDpnt->lun, SDpnt->channel); + blk_init_queue(q, scsi_request_fn, name); + blk_queue_headactive(q, 0); + q->queuedata = (void *) SDpnt; +#ifdef DMA_CHUNK_SIZE + blk_queue_max_segments(q, 64); +#else + blk_queue_max_segments(q, SHpnt->sg_tablesize); +#endif + blk_queue_max_sectors(q, SHpnt->max_sectors); } #ifdef MODULE @@ -227,9 +239,7 @@ req = &SCpnt->request; req->rq_status = RQ_SCSI_DONE; /* Busy, but indicate request done */ - if (req->waiting != NULL) { - complete(req->waiting); - } + complete(req->waiting); } /* @@ -620,8 +630,6 @@ unsigned long flags = 0; unsigned long timeout; - ASSERT_LOCK(&io_request_lock, 0); - #if DEBUG unsigned long *ret = 0; #ifdef __mips__ @@ -633,6 +641,8 @@ host = SCpnt->host; + ASSERT_LOCK(&host->host_lock, 0); + /* Assign a unique nonzero serial_number. */ if (++serial_number == 0) serial_number = 1; @@ -692,9 +702,9 @@ * length exceeds what the host adapter can handle. */ if (CDB_SIZE(SCpnt) <= SCpnt->host->max_cmd_len) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); rtn = host->hostt->queuecommand(SCpnt, scsi_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); if (rtn != 0) { scsi_delete_timer(SCpnt); scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_HOST_BUSY); @@ -703,10 +713,11 @@ } else { SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n")); SCpnt->result = (DID_ABORT << 16); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); scsi_done(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); rtn = 1; + } } else { /* @@ -714,15 +725,15 @@ * length exceeds what the host adapter can handle. */ if (CDB_SIZE(SCpnt) <= SCpnt->host->max_cmd_len) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); host->hostt->queuecommand(SCpnt, scsi_old_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } else { SCSI_LOG_MLQUEUE(3, printk("queuecommand : command too long.\n")); SCpnt->result = (DID_ABORT << 16); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); scsi_old_done(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); rtn = 1; } } @@ -730,11 +741,11 @@ int temp; SCSI_LOG_MLQUEUE(3, printk("command() : routine at %p\n", host->hostt->command)); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); temp = host->hostt->command(SCpnt); SCpnt->result = temp; #ifdef DEBUG_DELAY - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); clock = jiffies + 4 * HZ; while (time_before(jiffies, clock)) { barrier(); @@ -742,14 +753,14 @@ } printk("done(host = %d, result = %04x) : routine at %p\n", host->host_no, temp, host->hostt->command); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); #endif if (host->hostt->use_new_eh_code) { scsi_done(SCpnt); } else { scsi_old_done(SCpnt); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()\n")); return rtn; @@ -817,7 +828,7 @@ Scsi_Device * SDpnt = SRpnt->sr_device; struct Scsi_Host *host = SDpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCSI_LOG_MLQUEUE(4, { @@ -914,7 +925,7 @@ { struct Scsi_Host *host = SCpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCpnt->owner = SCSI_OWNER_MIDLEVEL; SRpnt->sr_command = SCpnt; @@ -1004,7 +1015,7 @@ { struct Scsi_Host *host = SCpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCpnt->pid = scsi_pid++; SCpnt->owner = SCSI_OWNER_MIDLEVEL; @@ -1355,11 +1366,11 @@ Scsi_Request * SRpnt; unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); - host = SCpnt->host; device = SCpnt->device; + ASSERT_LOCK(&host->host_lock, 0); + /* * We need to protect the decrement, as otherwise a race condition * would exist. Fiddling with SCpnt isn't a problem as the @@ -1367,10 +1378,10 @@ * one execution context, but the device and host structures are * shared. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); host->host_busy--; /* Indicate that we are free */ device->device_busy--; /* Decrement device usage counter. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); /* * Clear the flags which say that the device/host is no longer @@ -1858,7 +1869,6 @@ Scsi_Device *SDpnt; struct Scsi_Device_Template *sdtpnt; const char *name; - unsigned long flags; int out_of_space = 0; if (tpnt->next || !tpnt->detect) @@ -1868,7 +1878,7 @@ /* If max_sectors isn't set, default to max */ if (!tpnt->max_sectors) - tpnt->max_sectors = MAX_SECTORS; + tpnt->max_sectors = 1024; pcount = next_scsi_host; @@ -1882,10 +1892,11 @@ using the new scsi code. NOTE: the detect routine could redefine the value tpnt->use_new_eh_code. (DB, 13 May 1998) */ + /* + * detect should do its own locking + */ if (tpnt->use_new_eh_code) { - spin_lock_irqsave(&io_request_lock, flags); tpnt->present = tpnt->detect(tpnt); - spin_unlock_irqrestore(&io_request_lock, flags); } else tpnt->present = tpnt->detect(tpnt); diff -urN linux-2.5.1-pre1/drivers/scsi/scsi.h linux/drivers/scsi/scsi.h --- linux-2.5.1-pre1/drivers/scsi/scsi.h Thu Nov 22 11:49:15 2001 +++ linux/drivers/scsi/scsi.h Sat Dec 1 00:37:05 2001 @@ -386,15 +386,6 @@ #define ASKED_FOR_SENSE 0x20 #define SYNC_RESET 0x40 -#if defined(__mc68000__) || defined(CONFIG_APUS) -#include -#define CONTIGUOUS_BUFFERS(X,Y) \ - (virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data)) -#else -#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data) -#endif - - /* * This is the crap from the old error handling code. We have it in a special * place so that we can more easily delete it later on. diff -urN linux-2.5.1-pre1/drivers/scsi/scsi_error.c linux/drivers/scsi/scsi_error.c --- linux-2.5.1-pre1/drivers/scsi/scsi_error.c Sun Sep 9 10:52:35 2001 +++ linux/drivers/scsi/scsi_error.c Sat Dec 1 00:37:05 2001 @@ -423,8 +423,6 @@ unsigned char scsi_result0[256], *scsi_result = NULL; int saved_result; - ASSERT_LOCK(&io_request_lock, 0); - memcpy((void *) SCpnt->cmnd, (void *) generic_sense, sizeof(generic_sense)); @@ -583,16 +581,14 @@ STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout) { unsigned long flags; - struct Scsi_Host *host; - - ASSERT_LOCK(&io_request_lock, 0); + struct Scsi_Host *host = SCpnt->host; - host = SCpnt->host; + ASSERT_LOCK(&host->host_lock, 0); - retry: +retry: /* - * We will use a queued command if possible, otherwise we will emulate the - * queuing and calling of completion function ourselves. + * We will use a queued command if possible, otherwise we will + * emulate the queuing and calling of completion function ourselves. */ SCpnt->owner = SCSI_OWNER_LOWLEVEL; @@ -609,9 +605,9 @@ SCpnt->host->eh_action = &sem; SCpnt->request.rq_status = RQ_SCSI_BUSY; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); host->hostt->queuecommand(SCpnt, scsi_eh_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); down(&sem); @@ -634,10 +630,10 @@ * abort a timed out command or not. Not sure how * we should treat them differently anyways. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); if (SCpnt->host->hostt->eh_abort_handler) SCpnt->host->hostt->eh_abort_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); SCpnt->request.rq_status = RQ_SCSI_DONE; SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; @@ -650,13 +646,13 @@ int temp; /* - * We damn well had better never use this code. There is no timeout - * protection here, since we would end up waiting in the actual low - * level driver, we don't know how to wake it up. + * We damn well had better never use this code. There is no + * timeout protection here, since we would end up waiting in + * the actual low level driver, we don't know how to wake it up. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); temp = host->hostt->command(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); SCpnt->result = temp; /* Fall through to code below to examine status. */ @@ -664,8 +660,8 @@ } /* - * Now examine the actual status codes to see whether the command actually - * did complete normally. + * Now examine the actual status codes to see whether the command + * actually did complete normally. */ if (SCpnt->eh_state == SUCCESS) { int ret = scsi_eh_completed_normally(SCpnt); @@ -776,9 +772,9 @@ SCpnt->owner = SCSI_OWNER_LOWLEVEL; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); return rtn; } @@ -808,9 +804,9 @@ } SCpnt->owner = SCSI_OWNER_LOWLEVEL; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -841,9 +837,9 @@ return FAILED; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -887,9 +883,9 @@ if (SCpnt->host->hostt->eh_host_reset_handler == NULL) { return FAILED; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -1230,7 +1226,7 @@ Scsi_Device *SDpnt; unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); /* * Next free up anything directly waiting upon the host. This will be @@ -1247,19 +1243,22 @@ * now that error recovery is done, we will need to ensure that these * requests are started. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { - request_queue_t *q; + request_queue_t *q = &SDpnt->request_queue; + if ((host->can_queue > 0 && (host->host_busy >= host->can_queue)) || (host->host_blocked) || (host->host_self_blocked) || (SDpnt->device_blocked)) { break; } - q = &SDpnt->request_queue; + + spin_lock(&q->queue_lock); q->request_fn(q); + spin_unlock(&q->queue_lock); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } /* @@ -1306,7 +1305,7 @@ Scsi_Cmnd *SCdone; int timed_out; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCdone = NULL; diff -urN linux-2.5.1-pre1/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c --- linux-2.5.1-pre1/drivers/scsi/scsi_lib.c Fri Oct 12 15:35:54 2001 +++ linux/drivers/scsi/scsi_lib.c Sat Dec 1 00:37:05 2001 @@ -61,7 +61,7 @@ * data - private data * at_head - insert request at head or tail of queue * - * Lock status: Assumed that io_request_lock is not held upon entry. + * Lock status: Assumed that queue lock is not held upon entry. * * Returns: Nothing */ @@ -70,13 +70,15 @@ { unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); rq->cmd = SPECIAL; rq->special = data; rq->q = NULL; + rq->bio = rq->biotail = NULL; rq->nr_segments = 0; rq->elevator_sequence = 0; + rq->inactive = 0; /* * We have the option of inserting the head or the tail of the queue. @@ -84,15 +86,15 @@ * head of the queue for things like a QUEUE_FULL message from a * device, or a host that is unable to accept a particular command. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); if (at_head) - list_add(&rq->queue, &q->queue_head); + list_add(&rq->queuelist, &q->queue_head); else - list_add_tail(&rq->queue, &q->queue_head); + list_add_tail(&rq->queuelist, &q->queue_head); q->request_fn(q); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } @@ -167,8 +169,6 @@ */ int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt) { - ASSERT_LOCK(&io_request_lock, 0); - SCpnt->owner = SCSI_OWNER_MIDLEVEL; SCpnt->reset_chain = NULL; SCpnt->serial_number = 0; @@ -250,9 +250,9 @@ Scsi_Device *SDpnt; struct Scsi_Host *SHpnt; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); if (SCpnt != NULL) { /* @@ -262,7 +262,7 @@ * the bad sector. */ SCpnt->request.special = (void *) SCpnt; - list_add(&SCpnt->request.queue, &q->queue_head); + list_add(&SCpnt->request.queuelist, &q->queue_head); } /* @@ -280,14 +280,10 @@ * with special case code, then spin off separate versions and * use function pointers to pick the right one. */ - if (SDpnt->single_lun - && list_empty(&q->queue_head) - && SDpnt->device_busy == 0) { + if (SDpnt->single_lun && blk_queue_empty(q) && SDpnt->device_busy ==0) { request_queue_t *q; - for (SDpnt = SHpnt->host_queue; - SDpnt; - SDpnt = SDpnt->next) { + for (SDpnt = SHpnt->host_queue; SDpnt; SDpnt = SDpnt->next) { if (((SHpnt->can_queue > 0) && (SHpnt->host_busy >= SHpnt->can_queue)) || (SHpnt->host_blocked) @@ -295,6 +291,7 @@ || (SDpnt->device_blocked)) { break; } + q = &SDpnt->request_queue; q->request_fn(q); } @@ -328,7 +325,7 @@ SHpnt->some_device_starved = 0; } } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } /* @@ -360,57 +357,27 @@ int requeue, int frequeue) { + request_queue_t *q = &SCpnt->device->request_queue; struct request *req; - struct buffer_head *bh; - Scsi_Device * SDpnt; - int nsect; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); req = &SCpnt->request; - req->errors = 0; - if (!uptodate) { - printk(" I/O error: dev %s, sector %lu\n", - kdevname(req->rq_dev), req->sector); - } - do { - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; - blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - sectors -= nsect; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector += nsect; - req->nr_sectors -= nsect; - - req->current_nr_sectors = bh->b_size >> 9; - if (req->nr_sectors < req->current_nr_sectors) { - req->nr_sectors = req->current_nr_sectors; - printk("scsi_end_request: buffer-list destroyed\n"); - } - } + while (end_that_request_first(req, 1, sectors)) { + if (!req->bio) { + printk("scsi_end_request: missing bio\n"); + break; } - } while (sectors && bh); + } /* * If there are blocks left over at the end, set up the command * to queue the remainder of them. */ - if (req->bh) { - request_queue_t *q; - - if( !requeue ) - { + if (req->bio) { + if (!requeue) return SCpnt; - } - - q = &SCpnt->device->request_queue; - req->buffer = bh->b_data; /* * Bleah. Leftovers again. Stick the leftovers in * the front of the queue, and goose the queue again. @@ -418,17 +385,15 @@ scsi_queue_next_request(q, SCpnt); return SCpnt; } + /* * This request is done. If there is someone blocked waiting for this - * request, wake them up. Typically used to wake up processes trying - * to swap a page into memory. + * request, wake them up. */ - if (req->waiting != NULL) { + if (req->waiting) complete(req->waiting); - } - add_blkdev_randomness(MAJOR(req->rq_dev)); - SDpnt = SCpnt->device; + add_blkdev_randomness(MAJOR(req->rq_dev)); /* * This will goose the queue request function at the end, so we don't @@ -436,12 +401,9 @@ */ __scsi_release_command(SCpnt); - if( frequeue ) { - request_queue_t *q; + if (frequeue) + scsi_queue_next_request(q, NULL); - q = &SDpnt->request_queue; - scsi_queue_next_request(q, NULL); - } return NULL; } @@ -489,7 +451,9 @@ */ static void scsi_release_buffers(Scsi_Cmnd * SCpnt) { - ASSERT_LOCK(&io_request_lock, 0); + struct request *req = &SCpnt->request; + + ASSERT_LOCK(&SCpnt->device->request_queue.queue_lock, 0); /* * Free up any indirection buffers we allocated for DMA purposes. @@ -510,9 +474,8 @@ } scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); } else { - if (SCpnt->request_buffer != SCpnt->request.buffer) { - scsi_free(SCpnt->request_buffer, SCpnt->request_bufflen); - } + if (SCpnt->request_buffer != req->buffer) + scsi_free(SCpnt->request_buffer,SCpnt->request_bufflen); } /* @@ -548,6 +511,7 @@ int result = SCpnt->result; int this_count = SCpnt->bufflen >> 9; request_queue_t *q = &SCpnt->device->request_queue; + struct request *req = &SCpnt->request; /* * We must do one of several things here: @@ -562,7 +526,7 @@ * would be used if we just wanted to retry, for example. * */ - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); /* * Free up any indirection buffers we allocated for DMA purposes. @@ -591,10 +555,13 @@ } scsi_free(SCpnt->buffer, SCpnt->sglist_len); } else { - if (SCpnt->buffer != SCpnt->request.buffer) { - if (SCpnt->request.cmd == READ) { - memcpy(SCpnt->request.buffer, SCpnt->buffer, - SCpnt->bufflen); + if (SCpnt->buffer != req->buffer) { + if (req->cmd == READ) { + unsigned long flags; + char *to = bio_kmap_irq(req->bio, &flags); + + memcpy(to, SCpnt->buffer, SCpnt->bufflen); + bio_kunmap_irq(to, &flags); } scsi_free(SCpnt->buffer, SCpnt->bufflen); } @@ -615,11 +582,10 @@ */ if (good_sectors > 0) { SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d sectors done.\n", - SCpnt->request.nr_sectors, - good_sectors)); + req->nr_sectors good_sectors)); SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg)); - SCpnt->request.errors = 0; + req->errors = 0; /* * If multiple sectors are requested in one buffer, then * they will have been finished off by the first command. @@ -716,7 +682,7 @@ break; case NOT_READY: printk(KERN_INFO "Device %s not ready.\n", - kdevname(SCpnt->request.rq_dev)); + kdevname(req->rq_dev)); SCpnt = scsi_end_request(SCpnt, 0, this_count); return; break; @@ -760,7 +726,7 @@ * We sometimes get this cruft in the event that a medium error * isn't properly reported. */ - SCpnt = scsi_end_request(SCpnt, 0, SCpnt->request.current_nr_sectors); + SCpnt = scsi_end_request(SCpnt, 0, req->current_nr_sectors); return; } } @@ -774,7 +740,7 @@ * Arguments: request - I/O request we are preparing to queue. * * Lock status: No locks assumed to be held, but as it happens the - * io_request_lock is held when this is called. + * q->queue_lock is held when this is called. * * Returns: Nothing * @@ -788,7 +754,7 @@ kdev_t dev = req->rq_dev; int major = MAJOR(dev); - ASSERT_LOCK(&io_request_lock, 1); + ASSERT_LOCK(&req->q->queue_lock, 1); for (spnt = scsi_devicelist; spnt; spnt = spnt->next) { /* @@ -846,7 +812,7 @@ struct Scsi_Host *SHpnt; struct Scsi_Device_Template *STpnt; - ASSERT_LOCK(&io_request_lock, 1); + ASSERT_LOCK(&q->queue_lock, 1); SDpnt = (Scsi_Device *) q->queuedata; if (!SDpnt) { @@ -864,10 +830,17 @@ * released the lock and grabbed it again, so each time * we need to check to see if the queue is plugged or not. */ - if (SHpnt->in_recovery || q->plugged) + if (SHpnt->in_recovery || blk_queue_plugged(q)) return; /* + * if we are at the max queue depth, don't attempt to queue + * more + */ + if (SHpnt->host_busy == SDpnt->queue_depth) + break; + + /* * If the device cannot accept another request, then quit. */ if (SDpnt->device_blocked) { @@ -913,9 +886,9 @@ */ SDpnt->was_reset = 0; if (SDpnt->removable && !in_interrupt()) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); continue; } } @@ -924,14 +897,14 @@ * If we couldn't find a request that could be queued, then we * can also quit. */ - if (list_empty(&q->queue_head)) + if (blk_queue_empty(q)) break; /* - * Loop through all of the requests in this queue, and find - * one that is queueable. + * get next queueable request. cur_rq would be set if we + * previously had to abort for some reason */ - req = blkdev_entry_next_request(&q->queue_head); + req = elv_next_request(q); /* * Find the actual device driver associated with this command. @@ -951,9 +924,8 @@ if( SRpnt->sr_magic == SCSI_REQ_MAGIC ) { SCpnt = scsi_allocate_device(SRpnt->sr_device, FALSE, FALSE); - if( !SCpnt ) { + if (!SCpnt) break; - } scsi_init_cmd_from_req(SCpnt, SRpnt); } @@ -973,7 +945,7 @@ * scatter-gather segments here - the * normal case code assumes this to be * correct, as it would be a performance - * lose to always recount. Handling + * loss to always recount. Handling * errors is always unusual, of course. */ recount_segments(SCpnt); @@ -985,9 +957,8 @@ * while the queue is locked and then break out of the * loop. Otherwise loop around and try another request. */ - if (!SCpnt) { + if (!SCpnt) break; - } } /* @@ -1024,7 +995,7 @@ * another. */ req = NULL; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); if (SCpnt->request.cmd != SPECIAL) { /* @@ -1054,7 +1025,7 @@ { panic("Should not have leftover blocks\n"); } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); SHpnt->host_busy--; SDpnt->device_busy--; continue; @@ -1070,7 +1041,7 @@ { panic("Should not have leftover blocks\n"); } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); SHpnt->host_busy--; SDpnt->device_busy--; continue; @@ -1091,7 +1062,7 @@ * Now we need to grab the lock again. We are about to mess * with the request queue and try to find another command. */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); } } diff -urN linux-2.5.1-pre1/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c --- linux-2.5.1-pre1/drivers/scsi/scsi_merge.c Thu Oct 25 14:05:31 2001 +++ linux/drivers/scsi/scsi_merge.c Sat Dec 1 00:37:05 2001 @@ -6,6 +6,7 @@ * Based upon conversations with large numbers * of people at Linux Expo. * Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com). + * Support for highmem I/O: Jens Axboe */ /* @@ -71,51 +72,6 @@ */ #define DMA_SEGMENT_SIZE_LIMITED -#ifdef CONFIG_SCSI_DEBUG_QUEUES -/* - * Enable a bunch of additional consistency checking. Turn this off - * if you are benchmarking. - */ -static int dump_stats(struct request *req, - int use_clustering, - int dma_host, - int segments) -{ - struct buffer_head *bh; - - /* - * Dump the information that we have. We know we have an - * inconsistency. - */ - printk("nr_segments is %x\n", req->nr_segments); - printk("counted segments is %x\n", segments); - printk("Flags %d %d\n", use_clustering, dma_host); - for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) - { - printk("Segment 0x%p, blocks %d, addr 0x%lx\n", - bh, - bh->b_size >> 9, - virt_to_phys(bh->b_data - 1)); - } - panic("Ththththaats all folks. Too dangerous to continue.\n"); -} - - -/* - * Simple sanity check that we will use for the first go around - * in order to ensure that we are doing the counting correctly. - * This can be removed for optimization. - */ -#define SANITY_CHECK(req, _CLUSTER, _DMA) \ - if( req->nr_segments != __count_segments(req, _CLUSTER, _DMA, NULL) ) \ - { \ - printk("Incorrect segment count at 0x%p", current_text_addr()); \ - dump_stats(req, _CLUSTER, _DMA, __count_segments(req, _CLUSTER, _DMA, NULL)); \ - } -#else -#define SANITY_CHECK(req, _CLUSTER, _DMA) -#endif - static void dma_exhausted(Scsi_Cmnd * SCpnt, int i) { int jj; @@ -191,31 +147,23 @@ { int ret = 1; int reqsize = 0; - struct buffer_head *bh; - struct buffer_head *bhnext; + struct bio *bio, *bionext; - if( remainder != NULL ) { + if (remainder) reqsize = *remainder; - } /* * Add in the size increment for the first buffer. */ - bh = req->bh; + bio = req->bio; #ifdef DMA_SEGMENT_SIZE_LIMITED - if( reqsize + bh->b_size > PAGE_SIZE ) { + if (reqsize + bio_size(bio) > PAGE_SIZE) ret++; - reqsize = bh->b_size; - } else { - reqsize += bh->b_size; - } -#else - reqsize += bh->b_size; #endif - for (bh = req->bh, bhnext = bh->b_reqnext; - bhnext != NULL; - bh = bhnext, bhnext = bh->b_reqnext) { + for (bio = req->bio, bionext = bio->bi_next; + bionext != NULL; + bio = bionext, bionext = bio->bi_next) { if (use_clustering) { /* * See if we can do this without creating another @@ -223,11 +171,10 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(bionext) - 1 == ISA_DMA_THRESHOLD) { ret++; - reqsize = bhnext->b_size; - } else if (CONTIGUOUS_BUFFERS(bh, bhnext)) { + reqsize = bio_size(bionext); + } else if (BIO_CONTIG(bio, bionext)) { /* * This one is OK. Let it go. */ @@ -241,23 +188,22 @@ * kind of screwed and we need to start * another segment. */ - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD - && reqsize + bhnext->b_size > PAGE_SIZE ) + if(dma_host && bio_to_phys(bionext) - 1 >= ISA_DMA_THRESHOLD + && reqsize + bio_size(bionext) > PAGE_SIZE ) { ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); continue; } #endif - reqsize += bhnext->b_size; + reqsize += bio_size(bionext); continue; } ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); } else { ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); } } if( remainder != NULL ) { @@ -304,14 +250,13 @@ } #define MERGEABLE_BUFFERS(X,Y) \ -(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \ +(((((long)bio_to_phys((X))+bio_size((X)))|((long)bio_to_phys((Y)))) & \ (DMA_CHUNK_SIZE - 1)) == 0) #ifdef DMA_CHUNK_SIZE static inline int scsi_new_mergeable(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { /* * pci_map_sg will be able to merge these two @@ -320,47 +265,47 @@ * scsi.c allocates for this purpose * min(64,sg_tablesize) entries. */ - if (req->nr_segments >= max_segments || - req->nr_segments >= SHpnt->sg_tablesize) + if (req->nr_segments >= q->max_segments) return 0; + req->nr_segments++; return 1; } static inline int scsi_new_segment(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { /* * pci_map_sg won't be able to map these two * into a single hardware sg entry, so we have to * check if things fit into sg_tablesize. */ - if (req->nr_hw_segments >= SHpnt->sg_tablesize || - req->nr_segments >= SHpnt->sg_tablesize) + if (req->nr_hw_segments >= q->max_segments) return 0; + else if (req->nr_segments >= q->max_segments) + return 0; + req->nr_hw_segments++; req->nr_segments++; return 1; } + #else + static inline int scsi_new_segment(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { - if (req->nr_segments < SHpnt->sg_tablesize && - req->nr_segments < max_segments) { - /* - * This will form the start of a new segment. Bump the - * counter. - */ - req->nr_segments++; - return 1; - } else { + if (req->nr_segments >= q->max_segments) return 0; - } + + /* + * This will form the start of a new segment. Bump the + * counter. + */ + req->nr_segments++; + return 1; } #endif @@ -371,7 +316,7 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * use_clustering - 1 if this host wishes to use clustering * dma_host - 1 if this host has ISA DMA issues (bus doesn't * expose all of the address lines, so that DMA cannot @@ -380,7 +325,7 @@ * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Some drivers have limited scatter-gather table sizes, and * thus they cannot queue an infinitely large command. This @@ -399,25 +344,17 @@ */ __inline static int __scsi_back_merge_fn(request_queue_t * q, struct request *req, - struct buffer_head *bh, - int max_segments, + struct bio *bio, int use_clustering, int dma_host) { unsigned int count; unsigned int segment_size = 0; - Scsi_Device *SDpnt; - struct Scsi_Host *SHpnt; - - SDpnt = (Scsi_Device *) q->queuedata; - SHpnt = SDpnt->host; + Scsi_Device *SDpnt = q->queuedata; -#ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; -#endif - - if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) + if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) + return 0; + else if (!BIO_PHYS_4G(req->biotail, bio)) return 0; if (use_clustering) { @@ -427,17 +364,15 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(req->biotail) - 1 == ISA_DMA_THRESHOLD) { goto new_end_segment; } - if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) { + if (BIO_CONTIG(req->biotail, bio)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { + if( dma_host && bio_to_phys(bio) - 1 >= ISA_DMA_THRESHOLD ) { segment_size = 0; count = __count_segments(req, use_clustering, dma_host, &segment_size); - if( segment_size + bh->b_size > PAGE_SIZE ) { + if( segment_size + bio_size(bio) > PAGE_SIZE ) { goto new_end_segment; } } @@ -450,33 +385,25 @@ } new_end_segment: #ifdef DMA_CHUNK_SIZE - if (MERGEABLE_BUFFERS(req->bhtail, bh)) - return scsi_new_mergeable(q, req, SHpnt, max_segments); + if (MERGEABLE_BUFFERS(req->biotail, bio)) + return scsi_new_mergeable(q, req, SDpnt->host); #endif - return scsi_new_segment(q, req, SHpnt, max_segments); + return scsi_new_segment(q, req, SDpnt->host); } __inline static int __scsi_front_merge_fn(request_queue_t * q, struct request *req, - struct buffer_head *bh, - int max_segments, + struct bio *bio, int use_clustering, int dma_host) { unsigned int count; unsigned int segment_size = 0; - Scsi_Device *SDpnt; - struct Scsi_Host *SHpnt; - - SDpnt = (Scsi_Device *) q->queuedata; - SHpnt = SDpnt->host; - -#ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; -#endif + Scsi_Device *SDpnt = q->queuedata; - if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) + if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) + return 0; + else if (!BIO_PHYS_4G(bio, req->bio)) return 0; if (use_clustering) { @@ -486,15 +413,13 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(bio) - 1 == ISA_DMA_THRESHOLD) { goto new_start_segment; } - if (CONTIGUOUS_BUFFERS(bh, req->bh)) { + if (BIO_CONTIG(bio, req->bio)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { - segment_size = bh->b_size; + if( dma_host && bio_to_phys(bio) - 1 >= ISA_DMA_THRESHOLD ) { + segment_size = bio_size(bio); count = __count_segments(req, use_clustering, dma_host, &segment_size); if( count != req->nr_segments ) { goto new_start_segment; @@ -509,10 +434,10 @@ } new_start_segment: #ifdef DMA_CHUNK_SIZE - if (MERGEABLE_BUFFERS(bh, req->bh)) - return scsi_new_mergeable(q, req, SHpnt, max_segments); + if (MERGEABLE_BUFFERS(bio, req->bio)) + return scsi_new_mergeable(q, req, SDpnt->host); #endif - return scsi_new_segment(q, req, SHpnt, max_segments); + return scsi_new_segment(q, req, SDpnt->host); } /* @@ -522,12 +447,12 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. @@ -535,15 +460,12 @@ #define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ - struct buffer_head * bh, \ - int max_segments) \ + struct bio *bio) \ { \ int ret; \ - SANITY_CHECK(req, _CLUSTER, _DMA); \ ret = __scsi_ ## _BACK_FRONT ## _merge_fn(q, \ req, \ - bh, \ - max_segments, \ + bio, \ _CLUSTER, \ _DMA); \ return ret; \ @@ -576,7 +498,7 @@ * Returns: 1 if it is OK to merge the two requests. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Some drivers have limited scatter-gather table sizes, and * thus they cannot queue an infinitely large command. This @@ -596,7 +518,6 @@ __inline static int __scsi_merge_requests_fn(request_queue_t * q, struct request *req, struct request *next, - int max_segments, int use_clustering, int dma_host) { @@ -609,31 +530,28 @@ */ if (req->special || next->special) return 0; + else if (!BIO_PHYS_4G(req->biotail, next->bio)) + return 0; SDpnt = (Scsi_Device *) q->queuedata; SHpnt = SDpnt->host; #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; - /* If it would not fit into prepared memory space for sg chain, * then don't allow the merge. */ - if (req->nr_segments + next->nr_segments - 1 > max_segments || - req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments - 1 > q->max_segments) return 0; - } - if (req->nr_hw_segments + next->nr_hw_segments - 1 > SHpnt->sg_tablesize) { + + if (req->nr_hw_segments + next->nr_hw_segments - 1 > q->max_segments) return 0; - } #else /* * If the two requests together are too large (even assuming that we * can merge the boundary requests into one segment, then don't * allow the merge. */ - if (req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments - 1 > q->max_segments) { return 0; } #endif @@ -652,8 +570,7 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(req->biotail) - 1 == ISA_DMA_THRESHOLD) { goto dont_combine; } #ifdef DMA_SEGMENT_SIZE_LIMITED @@ -662,8 +579,8 @@ * buffers in chunks of PAGE_SIZE or less. */ if (dma_host - && CONTIGUOUS_BUFFERS(req->bhtail, next->bh) - && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD ) + && BIO_CONTIG(req->biotail, next->bio) + && bio_to_phys(req->biotail) - 1 >= ISA_DMA_THRESHOLD ) { int segment_size = 0; int count = 0; @@ -675,7 +592,7 @@ } } #endif - if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) { + if (BIO_CONTIG(req->biotail, next->bio)) { /* * This one is OK. Let it go. */ @@ -688,17 +605,16 @@ } dont_combine: #ifdef DMA_CHUNK_SIZE - if (req->nr_segments + next->nr_segments > max_segments || - req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments > q->max_segments) return 0; - } + /* If dynamic DMA mapping can merge last segment in req with * first segment in next, then the check for hw segments was * done above already, so we can always merge. */ - if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) { + if (MERGEABLE_BUFFERS(req->biotail, next->bio)) { req->nr_hw_segments += next->nr_hw_segments - 1; - } else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) { + } else if (req->nr_hw_segments + next->nr_hw_segments > q->max_segments) return 0; } else { req->nr_hw_segments += next->nr_hw_segments; @@ -711,8 +627,7 @@ * Make sure we can fix something that is the sum of the two. * A slightly stricter test than we had above. */ - if (req->nr_segments + next->nr_segments > max_segments || - req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments > q->max_segments) { return 0; } else { /* @@ -732,12 +647,12 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. @@ -745,12 +660,10 @@ #define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ - struct request * next, \ - int max_segments) \ + struct request * next) \ { \ int ret; \ - SANITY_CHECK(req, _CLUSTER, _DMA); \ - ret = __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \ + ret = __scsi_merge_requests_fn(q, req, next, _CLUSTER, _DMA); \ return ret; \ } @@ -798,8 +711,8 @@ int use_clustering, int dma_host) { - struct buffer_head * bh; - struct buffer_head * bhprev; + struct bio * bio; + struct bio * bioprev; char * buff; int count; int i; @@ -810,29 +723,12 @@ void ** bbpnt; /* - * FIXME(eric) - don't inline this - it doesn't depend on the - * integer flags. Come to think of it, I don't think this is even - * needed any more. Need to play with it and see if we hit the - * panic. If not, then don't bother. + * now working right now */ - if (!SCpnt->request.bh) { - /* - * Case of page request (i.e. raw device), or unlinked buffer - * Typically used for swapping, but this isn't how we do - * swapping any more. - */ - panic("I believe this is dead code. If we hit this, I was wrong"); -#if 0 - SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9; - SCpnt->request_buffer = SCpnt->request.buffer; - SCpnt->use_sg = 0; - /* - * FIXME(eric) - need to handle DMA here. - */ -#endif - return 1; - } + BUG_ON(dma_host); + req = &SCpnt->request; + /* * First we need to know how many scatter gather segments are needed. */ @@ -848,16 +744,15 @@ * buffer. */ if (dma_host && scsi_dma_free_sectors <= 10) { - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; goto single_segment; } + /* - * Don't bother with scatter-gather if there is only one segment. + * we used to not use scatter-gather for single segment request, + * but now we do (it makes highmem I/O easier to support without + * kmapping pages) */ - if (count == 1) { - this_count = SCpnt->request.nr_sectors; - goto single_segment; - } SCpnt->use_sg = count; /* @@ -875,29 +770,27 @@ * round it up. */ SCpnt->sglist_len = (SCpnt->sglist_len + 511) & ~511; - + sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len); - /* - * Now fill the scatter-gather table. - */ if (!sgpnt) { + struct Scsi_Host *SHpnt = SCpnt->host; + /* * If we cannot allocate the scatter-gather table, then * simply write the first buffer all by itself. */ printk("Warning - running *really* short on DMA buffers\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; + printk("SCSI: depth is %d, # segs %d, # hw segs %d\n", SHpnt->host_busy, req->nr_segments, req->nr_hw_segments); goto single_segment; } - /* - * Next, walk the list, and fill in the addresses and sizes of - * each segment. - */ + memset(sgpnt, 0, SCpnt->sglist_len); SCpnt->request_buffer = (char *) sgpnt; SCpnt->request_bufflen = 0; - bhprev = NULL; + req->buffer = NULL; + bioprev = NULL; if (dma_host) bbpnt = (void **) ((char *)sgpnt + @@ -907,62 +800,30 @@ SCpnt->bounce_buffers = bbpnt; - for (count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { - if (use_clustering && bhprev != NULL) { - if (dma_host && - virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) { - /* Nothing - fall through */ - } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) { - /* - * This one is OK. Let it go. Note that we - * do not have the ability to allocate - * bounce buffer segments > PAGE_SIZE, so - * for now we limit the thing. - */ - if( dma_host ) { -#ifdef DMA_SEGMENT_SIZE_LIMITED - if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD - || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) { - sgpnt[count - 1].length += bh->b_size; - bhprev = bh; - continue; - } -#else - sgpnt[count - 1].length += bh->b_size; - bhprev = bh; - continue; -#endif - } else { - sgpnt[count - 1].length += bh->b_size; - SCpnt->request_bufflen += bh->b_size; - bhprev = bh; - continue; - } - } - } - count++; - sgpnt[count - 1].address = bh->b_data; - sgpnt[count - 1].page = NULL; - sgpnt[count - 1].length += bh->b_size; - if (!dma_host) { - SCpnt->request_bufflen += bh->b_size; - } - bhprev = bh; - } + /* + * Next, walk the list, and fill in the addresses and sizes of + * each segment. + */ + SCpnt->request_bufflen = req->nr_sectors << 9; + count = blk_rq_map_sg(req->q, req, SCpnt->request_buffer); /* * Verify that the count is correct. */ - if (count != SCpnt->use_sg) { + if (count > SCpnt->use_sg) { printk("Incorrect number of segments after building list\n"); -#ifdef CONFIG_SCSI_DEBUG_QUEUES - dump_stats(req, use_clustering, dma_host, count); -#endif + printk("counted %d, received %d\n", count, SCpnt->use_sg); + printk("req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors, req->current_nr_sectors); + scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); + this_count = req->current_nr_sectors; + goto single_segment; } - if (!dma_host) { + + SCpnt->use_sg = count; + + if (!dma_host) return 1; - } + /* * Now allocate bounce buffers, if needed. */ @@ -971,7 +832,7 @@ sectors = (sgpnt[i].length >> 9); SCpnt->request_bufflen += sgpnt[i].length; if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 > - ISA_DMA_THRESHOLD) { + ISA_DMA_THRESHOLD) { if( scsi_dma_free_sectors - sectors <= 10 ) { /* * If this would nearly drain the DMA @@ -989,9 +850,12 @@ break; } - bbpnt[i] = sgpnt[i].address; - sgpnt[i].address = - (char *) scsi_malloc(sgpnt[i].length); + /* + * this is not a dma host, so it will never + * be a highmem page + */ + bbpnt[i] = page_address(sgpnt[i].page) +sgpnt[i].offset; + sgpnt[i].address = (char *)scsi_malloc(sgpnt[i].length); /* * If we cannot allocate memory for this DMA bounce * buffer, then queue just what we have done so far. @@ -1005,7 +869,7 @@ } break; } - if (SCpnt->request.cmd == WRITE) { + if (req->cmd == WRITE) { memcpy(sgpnt[i].address, bbpnt[i], sgpnt[i].length); } @@ -1050,21 +914,20 @@ * single-block requests if we had hundreds of free sectors. */ if( scsi_dma_free_sectors > 30 ) { - for (this_count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { + for (this_count = 0, bio = req->bio; bio; bio = bio->bi_next) { if( scsi_dma_free_sectors - this_count < 30 || this_count == sectors ) { break; } - this_count += bh->b_size >> 9; + this_count += bio_sectors(bio); } } else { /* * Yow! Take the absolute minimum here. */ - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; } /* @@ -1077,28 +940,33 @@ * segment. Possibly the entire request, or possibly a small * chunk of the entire request. */ - bh = SCpnt->request.bh; - buff = SCpnt->request.buffer; - if (dma_host) { + bio = req->bio; + buff = req->buffer = bio_data(bio); + + if (dma_host || PageHighMem(bio_page(bio))) { /* * Allocate a DMA bounce buffer. If the allocation fails, fall * back and allocate a really small one - enough to satisfy * the first buffer. */ - if (virt_to_phys(SCpnt->request.bh->b_data) - + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) { + if (bio_to_phys(bio) + bio_size(bio) - 1 > ISA_DMA_THRESHOLD) { buff = (char *) scsi_malloc(this_count << 9); if (!buff) { printk("Warning - running low on DMA memory\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; buff = (char *) scsi_malloc(this_count << 9); if (!buff) { dma_exhausted(SCpnt, 0); + return 0; } } - if (SCpnt->request.cmd == WRITE) - memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9); + if (req->cmd == WRITE) { + unsigned long flags; + char *buf = bio_kmap_irq(bio, &flags); + memcpy(buff, buf, this_count << 9); + bio_kunmap_irq(buf, &flags); + } } } SCpnt->request_bufflen = this_count << 9; @@ -1139,21 +1007,11 @@ */ void initialize_merge_fn(Scsi_Device * SDpnt) { - request_queue_t *q; - struct Scsi_Host *SHpnt; - SHpnt = SDpnt->host; - - q = &SDpnt->request_queue; + struct Scsi_Host *SHpnt = SDpnt->host; + request_queue_t *q = &SDpnt->request_queue; + dma64_addr_t bounce_limit; /* - * If the host has already selected a merge manager, then don't - * pick a new one. - */ -#if 0 - if (q->back_merge_fn && q->front_merge_fn) - return; -#endif - /* * If this host has an unlimited tablesize, then don't bother with a * merge manager. The whole point of the operation is to make sure * that requests don't grow too large, and this host isn't picky. @@ -1185,4 +1043,20 @@ q->merge_requests_fn = scsi_merge_requests_fn_dc; SDpnt->scsi_init_io_fn = scsi_init_io_vdc; } + + /* + * now enable highmem I/O, if appropriate + */ + bounce_limit = BLK_BOUNCE_HIGH; + if (SHpnt->highmem_io && (SDpnt->type == TYPE_DISK)) { + if (!PCI_DMA_BUS_IS_PHYS) + /* Platforms with virtual-DMA translation + * hardware have no practical limit. + */ + bounce_limit = BLK_BOUNCE_ANY; + else + bounce_limit = SHpnt->pci_dev->dma_mask; + } + + blk_queue_bounce_limit(q, bounce_limit); } diff -urN linux-2.5.1-pre1/drivers/scsi/scsi_obsolete.c linux/drivers/scsi/scsi_obsolete.c --- linux-2.5.1-pre1/drivers/scsi/scsi_obsolete.c Thu Jul 5 11:28:17 2001 +++ linux/drivers/scsi/scsi_obsolete.c Sat Dec 1 00:37:05 2001 @@ -145,9 +145,10 @@ void scsi_old_times_out(Scsi_Cmnd * SCpnt) { + struct Scsi_Host *host = SCpnt->host; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); /* Set the serial_number_at_timeout to the current serial_number */ SCpnt->serial_number_at_timeout = SCpnt->serial_number; @@ -164,7 +165,7 @@ break; case IN_ABORT: printk("SCSI host %d abort (pid %ld) timed out - resetting\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); if (!scsi_reset(SCpnt, SCSI_RESET_ASYNCHRONOUS)) break; case IN_RESET: @@ -175,7 +176,7 @@ */ printk("SCSI host %d channel %d reset (pid %ld) timed out - " "trying harder\n", - SCpnt->host->host_no, SCpnt->channel, SCpnt->pid); + host->host_no, SCpnt->channel, SCpnt->pid); SCpnt->internal_timeout &= ~IN_RESET; SCpnt->internal_timeout |= IN_RESET2; scsi_reset(SCpnt, @@ -188,7 +189,7 @@ * Maybe the HBA itself crashed and this will shake it loose. */ printk("SCSI host %d reset (pid %ld) timed out - trying to shake it loose\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2); SCpnt->internal_timeout |= IN_RESET3; scsi_reset(SCpnt, @@ -197,19 +198,19 @@ default: printk("SCSI host %d reset (pid %ld) timed out again -\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); printk("probably an unrecoverable SCSI bus or device hang.\n"); break; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } /* * From what I can find in scsi_obsolete.c, this function is only called * by scsi_old_done and scsi_reset. Both of these functions run with the - * io_request_lock already held, so we need do nothing here about grabbing + * host_lock already held, so we need do nothing here about grabbing * any locks. */ static void scsi_request_sense(Scsi_Cmnd * SCpnt) @@ -217,7 +218,6 @@ SCpnt->flags |= WAS_SENSE | ASKED_FOR_SENSE; update_timeout(SCpnt, SENSE_TIMEOUT); - memcpy((void *) SCpnt->cmnd, (void *) generic_sense, sizeof(generic_sense)); memset((void *) SCpnt->sense_buffer, 0, @@ -238,9 +238,9 @@ * Ugly, ugly. The newer interfaces all assume that the lock * isn't held. Mustn't disappoint, or we deadlock the system. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&SCpnt->host->host_lock); scsi_dispatch_cmd(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&SCpnt->host->host_lock); } @@ -646,9 +646,9 @@ * assume that the lock isn't held. Mustn't * disappoint, or we deadlock the system. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); scsi_dispatch_cmd(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } break; default: @@ -674,7 +674,7 @@ * use, the upper code is run from a bottom half handler, so * it isn't an issue. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); SRpnt = SCpnt->sc_request; if( SRpnt != NULL ) { SRpnt->sr_result = SRpnt->sr_command->result; @@ -686,7 +686,7 @@ } SCpnt->done(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } #undef CMD_FINISHED #undef REDO @@ -725,10 +725,10 @@ return 0; } if (SCpnt->internal_timeout & IN_ABORT) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); while (SCpnt->internal_timeout & IN_ABORT) barrier(); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } else { SCpnt->internal_timeout |= IN_ABORT; oldto = update_timeout(SCpnt, ABORT_TIMEOUT); @@ -908,10 +908,10 @@ return 0; } if (SCpnt->internal_timeout & IN_RESET) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); while (SCpnt->internal_timeout & IN_RESET) barrier(); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } else { SCpnt->internal_timeout |= IN_RESET; update_timeout(SCpnt, RESET_TIMEOUT); diff -urN linux-2.5.1-pre1/drivers/scsi/scsi_queue.c linux/drivers/scsi/scsi_queue.c --- linux-2.5.1-pre1/drivers/scsi/scsi_queue.c Fri Feb 9 11:30:23 2001 +++ linux/drivers/scsi/scsi_queue.c Sat Dec 1 00:37:05 2001 @@ -80,6 +80,7 @@ { struct Scsi_Host *host; unsigned long flags; + request_queue_t *q = &cmd->device->request_queue; SCSI_LOG_MLQUEUE(1, printk("Inserting command %p into mlqueue\n", cmd)); @@ -137,10 +138,10 @@ * Decrement the counters, since these commands are no longer * active on the host/device. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); cmd->host->host_busy--; cmd->device->device_busy--; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); /* * Insert this command at the head of the queue for it's device. diff -urN linux-2.5.1-pre1/drivers/scsi/sd.c linux/drivers/scsi/sd.c --- linux-2.5.1-pre1/drivers/scsi/sd.c Fri Nov 9 14:05:06 2001 +++ linux/drivers/scsi/sd.c Sat Dec 1 00:37:05 2001 @@ -61,10 +61,6 @@ #include -/* - * static const char RCSid[] = "$Header:"; - */ - #define SD_MAJOR(i) (!(i) ? SCSI_DISK0_MAJOR : SCSI_DISK1_MAJOR-1+(i)) #define SCSI_DISKS_PER_MAJOR 16 @@ -72,8 +68,7 @@ #define SD_MINOR_NUMBER(i) ((i) & 255) #define MKDEV_SD_PARTITION(i) MKDEV(SD_MAJOR_NUMBER(i), (i) & 255) #define MKDEV_SD(index) MKDEV_SD_PARTITION((index) << 4) -#define N_USED_SCSI_DISKS (sd_template.dev_max + SCSI_DISKS_PER_MAJOR - 1) -#define N_USED_SD_MAJORS (N_USED_SCSI_DISKS / SCSI_DISKS_PER_MAJOR) +#define N_USED_SD_MAJORS (1 + ((sd_template.dev_max - 1) >> 4)) #define MAX_RETRIES 5 @@ -89,7 +84,6 @@ static Scsi_Disk *rscsi_disks; static int *sd_sizes; static int *sd_blocksizes; -static int *sd_hardsizes; /* Hardware sector size */ static int *sd_max_sectors; static int check_scsidisk_media_change(kdev_t); @@ -97,7 +91,6 @@ static int sd_init_onedisk(int); - static int sd_init(void); static void sd_finish(void); static int sd_attach(Scsi_Device *); @@ -124,7 +117,6 @@ init_command:sd_init_command, }; - static void rw_intr(Scsi_Cmnd * SCpnt); #if defined(CONFIG_PPC) @@ -191,11 +183,11 @@ &diskinfo[0]); else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)], dev, &diskinfo[0]); - if (put_user(diskinfo[0], &loc->heads) || put_user(diskinfo[1], &loc->sectors) || put_user(diskinfo[2], &loc->cylinders) || - put_user(sd[SD_PARTITION(inode->i_rdev)].start_sect, &loc->start)) + put_user((unsigned) get_start_sect(inode->i_rdev), + (unsigned long *) &loc->start)) return -EFAULT; return 0; } @@ -226,7 +218,8 @@ if (put_user(diskinfo[0], &loc->heads) || put_user(diskinfo[1], &loc->sectors) || put_user(diskinfo[2], (unsigned int *) &loc->cylinders) || - put_user(sd[SD_PARTITION(inode->i_rdev)].start_sect, &loc->start)) + put_user((unsigned)get_start_sect(inode->i_rdev), + (unsigned long *)&loc->start)) return -EFAULT; return 0; } @@ -239,10 +232,12 @@ case BLKFLSBUF: case BLKSSZGET: case BLKPG: - case BLKELVGET: - case BLKELVSET: + case BLKELVGET: + case BLKELVSET: case BLKBSZGET: case BLKBSZSET: + case BLKHASHPROF: + case BLKHASHCLEAR: return blk_ioctl(inode->i_rdev, cmd, arg); case BLKRRPART: /* Re-read partition tables */ @@ -251,7 +246,8 @@ return revalidate_scsidisk(dev, 1); default: - return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device , cmd, (void *) arg); + return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device, + cmd, (void *) arg); } } @@ -301,7 +297,7 @@ SCSI_LOG_HLQUEUE(1, printk("Doing sd request, dev = %d, block = %d\n", devm, block)); dpnt = &rscsi_disks[dev]; - if (devm >= (sd_template.dev_max << 4) || + if (devm >= (sd_template.dev_max << 4) || (devm & 0xf) || !dpnt || !dpnt->device->online || block + SCpnt->request.nr_sectors > sd[devm].nr_sects) { @@ -309,7 +305,7 @@ SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt)); return 0; } - block += sd[devm].start_sect; + if (dpnt->device->changed) { /* * quietly refuse to do anything to a changed disc until the changed @@ -618,8 +614,8 @@ (SCpnt->sense_buffer[4] << 16) | (SCpnt->sense_buffer[5] << 8) | SCpnt->sense_buffer[6]; - if (SCpnt->request.bh != NULL) - block_sectors = SCpnt->request.bh->b_size >> 9; + if (SCpnt->request.bio != NULL) + block_sectors = bio_sectors(SCpnt->request.bio); switch (SCpnt->device->sector_size) { case 1024: error_sector <<= 1; @@ -642,7 +638,7 @@ default: break; } - error_sector -= sd[SD_PARTITION(SCpnt->request.rq_dev)].start_sect; + error_sector &= ~(block_sectors - 1); good_sectors = error_sector - SCpnt->request.sector; if (good_sectors < 0 || good_sectors >= this_count) @@ -970,15 +966,11 @@ * So I have created this table. See ll_rw_blk.c * Jacques Gelinas (Jacques@solucorp.qc.ca) */ - int m; int hard_sector = sector_size; int sz = rscsi_disks[i].capacity * (hard_sector/256); /* There are 16 minors allocated for each major device */ - for (m = i << 4; m < ((i + 1) << 4); m++) { - sd_hardsizes[m] = hard_sector; - } - + blk_queue_hardsect_size(blk_get_queue(SD_MAJOR(i)), hard_sector); printk("SCSI device %s: " "%d %d-byte hdwr sectors (%d MB)\n", nbuff, rscsi_disks[i].capacity, @@ -1063,7 +1055,7 @@ static int sd_init() { - int i; + int i, maxparts; if (sd_template.dev_noticed == 0) return 0; @@ -1074,10 +1066,17 @@ if (sd_template.dev_max > N_SD_MAJORS * SCSI_DISKS_PER_MAJOR) sd_template.dev_max = N_SD_MAJORS * SCSI_DISKS_PER_MAJOR; + /* At most 16 partitions on each scsi disk. */ + maxparts = (sd_template.dev_max << 4); + if (maxparts == 0) + return 0; + if (!sd_registered) { for (i = 0; i < N_USED_SD_MAJORS; i++) { - if (devfs_register_blkdev(SD_MAJOR(i), "sd", &sd_fops)) { - printk("Unable to get major %d for SCSI disk\n", SD_MAJOR(i)); + if (devfs_register_blkdev(SD_MAJOR(i), "sd", + &sd_fops)) { + printk("Unable to get major %d for SCSI disk\n", + SD_MAJOR(i)); return 1; } } @@ -1087,32 +1086,26 @@ if (rscsi_disks) return 0; - rscsi_disks = kmalloc(sd_template.dev_max * sizeof(Scsi_Disk), GFP_ATOMIC); - if (!rscsi_disks) - goto cleanup_devfs; - memset(rscsi_disks, 0, sd_template.dev_max * sizeof(Scsi_Disk)); - - /* for every (necessary) major: */ - sd_sizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_sizes) - goto cleanup_disks; - memset(sd_sizes, 0, (sd_template.dev_max << 4) * sizeof(int)); - - sd_blocksizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_blocksizes) - goto cleanup_sizes; - - sd_hardsizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_hardsizes) - goto cleanup_blocksizes; - - sd_max_sectors = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_max_sectors) - goto cleanup_max_sectors; + /* allocate memory */ +#define init_mem_lth(x,n) x = kmalloc((n) * sizeof(*x), GFP_ATOMIC) +#define zero_mem_lth(x,n) memset(x, 0, (n) * sizeof(*x)) + + init_mem_lth(rscsi_disks, sd_template.dev_max); + init_mem_lth(sd_sizes, maxparts); + init_mem_lth(sd_blocksizes, maxparts); + init_mem_lth(sd, maxparts); + init_mem_lth(sd_gendisks, N_USED_SD_MAJORS); + init_mem_lth(sd_max_sectors, sd_template.dev_max << 4); + + if (!rscsi_disks || !sd_sizes || !sd_blocksizes || !sd || !sd_gendisks) + goto cleanup_mem; + + zero_mem_lth(rscsi_disks, sd_template.dev_max); + zero_mem_lth(sd_sizes, maxparts); + zero_mem_lth(sd, maxparts); - for (i = 0; i < sd_template.dev_max << 4; i++) { + for (i = 0; i < maxparts; i++) { sd_blocksizes[i] = 1024; - sd_hardsizes[i] = 512; /* * Allow lowlevel device drivers to generate 512k large scsi * commands if they know what they're doing and they ask for it @@ -1122,45 +1115,34 @@ } for (i = 0; i < N_USED_SD_MAJORS; i++) { - blksize_size[SD_MAJOR(i)] = sd_blocksizes + i * (SCSI_DISKS_PER_MAJOR << 4); - hardsect_size[SD_MAJOR(i)] = sd_hardsizes + i * (SCSI_DISKS_PER_MAJOR << 4); - max_sectors[SD_MAJOR(i)] = sd_max_sectors + i * (SCSI_DISKS_PER_MAJOR << 4); - } - /* - * FIXME: should unregister blksize_size, hardsect_size and max_sectors when - * the module is unloaded. - */ - sd = kmalloc((sd_template.dev_max << 4) * - sizeof(struct hd_struct), - GFP_ATOMIC); - if (!sd) - goto cleanup_sd; - memset(sd, 0, (sd_template.dev_max << 4) * sizeof(struct hd_struct)); - - if (N_USED_SD_MAJORS > 1) - sd_gendisks = kmalloc(N_USED_SD_MAJORS * sizeof(struct gendisk), GFP_ATOMIC); - if (!sd_gendisks) - goto cleanup_sd_gendisks; + request_queue_t *q = blk_get_queue(SD_MAJOR(i)); + int parts_per_major = (SCSI_DISKS_PER_MAJOR << 4); + + blksize_size[SD_MAJOR(i)] = + sd_blocksizes + i * parts_per_major; + blk_queue_hardsect_size(q, 512); + } + for (i = 0; i < N_USED_SD_MAJORS; i++) { + int N = SCSI_DISKS_PER_MAJOR; + sd_gendisks[i] = sd_gendisk; - sd_gendisks[i].de_arr = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr, - GFP_ATOMIC); - if (!sd_gendisks[i].de_arr) - goto cleanup_gendisks_de_arr; - memset (sd_gendisks[i].de_arr, 0, - SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr); - sd_gendisks[i].flags = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags, - GFP_ATOMIC); - if (!sd_gendisks[i].flags) - goto cleanup_gendisks_flags; - memset (sd_gendisks[i].flags, 0, - SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags); + + init_mem_lth(sd_gendisks[i].de_arr, N); + init_mem_lth(sd_gendisks[i].flags, N); + + if (!sd_gendisks[i].de_arr || !sd_gendisks[i].flags) + goto cleanup_gendisks; + + zero_mem_lth(sd_gendisks[i].de_arr, N); + zero_mem_lth(sd_gendisks[i].flags, N); + sd_gendisks[i].major = SD_MAJOR(i); sd_gendisks[i].major_name = "sd"; sd_gendisks[i].minor_shift = 4; sd_gendisks[i].max_p = 1 << 4; - sd_gendisks[i].part = sd + (i * SCSI_DISKS_PER_MAJOR << 4); - sd_gendisks[i].sizes = sd_sizes + (i * SCSI_DISKS_PER_MAJOR << 4); + sd_gendisks[i].part = sd + i * (N << 4); + sd_gendisks[i].sizes = sd_sizes + i * (N << 4); sd_gendisks[i].nr_real = 0; sd_gendisks[i].real_devices = (void *) (rscsi_disks + i * SCSI_DISKS_PER_MAJOR); @@ -1168,27 +1150,21 @@ return 0; -cleanup_gendisks_flags: - kfree(sd_gendisks[i].de_arr); -cleanup_gendisks_de_arr: - while (--i >= 0 ) { +#undef init_mem_lth +#undef zero_mem_lth + +cleanup_gendisks: + /* kfree can handle NULL, so no test is required here */ + for (i = 0; i < N_USED_SD_MAJORS; i++) { kfree(sd_gendisks[i].de_arr); kfree(sd_gendisks[i].flags); } +cleanup_mem: kfree(sd_gendisks); -cleanup_sd_gendisks: kfree(sd); -cleanup_sd: - kfree(sd_max_sectors); -cleanup_max_sectors: - kfree(sd_hardsizes); -cleanup_blocksizes: kfree(sd_blocksizes); -cleanup_sizes: kfree(sd_sizes); -cleanup_disks: kfree(rscsi_disks); -cleanup_devfs: for (i = 0; i < N_USED_SD_MAJORS; i++) { devfs_unregister_blkdev(SD_MAJOR(i), "sd"); } @@ -1203,7 +1179,7 @@ for (i = 0; i < N_USED_SD_MAJORS; i++) { blk_dev[SD_MAJOR(i)].queue = sd_find_queue; - add_gendisk(&sd_gendisks[i]); + add_gendisk(&(sd_gendisks[i])); } for (i = 0; i < sd_template.dev_max; ++i) @@ -1293,9 +1269,7 @@ int revalidate_scsidisk(kdev_t dev, int maxusage) { int target; - int max_p; - int start; - int i; + int res; target = DEVICE_NR(dev); @@ -1305,36 +1279,18 @@ } DEVICE_BUSY = 1; - max_p = sd_gendisks->max_p; - start = target << sd_gendisks->minor_shift; - - for (i = max_p - 1; i >= 0; i--) { - int index = start + i; - invalidate_device(MKDEV_SD_PARTITION(index), 1); - sd_gendisks->part[index].start_sect = 0; - sd_gendisks->part[index].nr_sects = 0; - /* - * Reset the blocksize for everything so that we can read - * the partition table. Technically we will determine the - * correct block size when we revalidate, but we do this just - * to make sure that everything remains consistent. - */ - sd_blocksizes[index] = 1024; - if (rscsi_disks[target].device->sector_size == 2048) - sd_blocksizes[index] = 2048; - else - sd_blocksizes[index] = 1024; - } + res = wipe_partitions(dev); + if (res) + goto leave; #ifdef MAYBE_REINIT MAYBE_REINIT; #endif - grok_partitions(&SD_GENDISK(target), target % SCSI_DISKS_PER_MAJOR, - 1<<4, CAPACITY); - + grok_partitions(dev, CAPACITY); +leave: DEVICE_BUSY = 0; - return 0; + return res; } static int fop_revalidate_scsidisk(kdev_t dev) @@ -1344,6 +1300,7 @@ static void sd_detach(Scsi_Device * SDp) { Scsi_Disk *dpnt; + kdev_t dev; int i, j; int max_p; int start; @@ -1351,18 +1308,13 @@ for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++) if (dpnt->device == SDp) { - /* If we are disconnecting a disk driver, sync and invalidate - * everything */ max_p = sd_gendisk.max_p; start = i << sd_gendisk.minor_shift; + dev = MKDEV_SD_PARTITION(start); + wipe_partitions(dev); + for (j = max_p - 1; j >= 0; j--) + sd_sizes[start + j] = 0; - for (j = max_p - 1; j >= 0; j--) { - int index = start + j; - invalidate_device(MKDEV_SD_PARTITION(index), 1); - sd_gendisks->part[index].start_sect = 0; - sd_gendisks->part[index].nr_sects = 0; - sd_sizes[index] = 0; - } devfs_register_partitions (&SD_GENDISK (i), SD_MINOR_NUMBER (start), 1); /* unregister_disk() */ @@ -1375,7 +1327,6 @@ SD_GENDISK(i).nr_real--; return; } - return; } static int __init init_sd(void) @@ -1398,14 +1349,11 @@ kfree(rscsi_disks); kfree(sd_sizes); kfree(sd_blocksizes); - kfree(sd_hardsizes); kfree((char *) sd); } for (i = 0; i < N_USED_SD_MAJORS; i++) { - del_gendisk(&sd_gendisks[i]); - blk_size[SD_MAJOR(i)] = NULL; - hardsect_size[SD_MAJOR(i)] = NULL; - read_ahead[SD_MAJOR(i)] = 0; + del_gendisk(&(sd_gendisks[i])); + blk_clear(SD_MAJOR(i)); } sd_template.dev_max = 0; if (sd_gendisks != &sd_gendisk) diff -urN linux-2.5.1-pre1/drivers/scsi/sr.c linux/drivers/scsi/sr.c --- linux-2.5.1-pre1/drivers/scsi/sr.c Thu Oct 25 13:58:35 2001 +++ linux/drivers/scsi/sr.c Sat Dec 1 00:37:05 2001 @@ -88,7 +88,6 @@ static int *sr_sizes; static int *sr_blocksizes; -static int *sr_hardsizes; static int sr_open(struct cdrom_device_info *, int); void get_sectorsize(int); @@ -218,8 +217,8 @@ (SCpnt->sense_buffer[4] << 16) | (SCpnt->sense_buffer[5] << 8) | SCpnt->sense_buffer[6]; - if (SCpnt->request.bh != NULL) - block_sectors = SCpnt->request.bh->b_size >> 9; + if (SCpnt->request.bio != NULL) + block_sectors = bio_sectors(SCpnt->request.bio); if (block_sectors < 4) block_sectors = 4; if (scsi_CDs[device_nr].device->sector_size == 2048) @@ -663,6 +662,7 @@ scsi_CDs[i].needs_sector_size = 0; sr_sizes[i] = scsi_CDs[i].capacity >> (BLOCK_SIZE_BITS - 9); }; + blk_queue_hardsect_size(blk_get_queue(MAJOR_NR), sector_size); scsi_free(buffer, 512); } @@ -811,21 +811,14 @@ if (!sr_blocksizes) goto cleanup_sizes; - sr_hardsizes = kmalloc(sr_template.dev_max * sizeof(int), GFP_ATOMIC); - if (!sr_hardsizes) - goto cleanup_blocksizes; /* * These are good guesses for the time being. */ - for (i = 0; i < sr_template.dev_max; i++) { + for (i = 0; i < sr_template.dev_max; i++) sr_blocksizes[i] = 2048; - sr_hardsizes[i] = 2048; - } + blksize_size[MAJOR_NR] = sr_blocksizes; - hardsect_size[MAJOR_NR] = sr_hardsizes; return 0; -cleanup_blocksizes: - kfree(sr_blocksizes); cleanup_sizes: kfree(sr_sizes); cleanup_cds: @@ -897,7 +890,6 @@ else read_ahead[MAJOR_NR] = 4; /* 4 sector read-ahead */ - return; } static void sr_detach(Scsi_Device * SDp) @@ -905,17 +897,18 @@ Scsi_CD *cpnt; int i; - for (cpnt = scsi_CDs, i = 0; i < sr_template.dev_max; i++, cpnt++) + for (cpnt = scsi_CDs, i = 0; i < sr_template.dev_max; i++, cpnt++) { if (cpnt->device == SDp) { /* - * Since the cdrom is read-only, no need to sync the device. + * Since the cdrom is read-only, no need to sync + * the device. * We should be kind to our buffer cache, however. */ invalidate_device(MKDEV(MAJOR_NR, i), 0); /* - * Reset things back to a sane state so that one can re-load a new - * driver (perhaps the same one). + * Reset things back to a sane state so that one can + * re-load a new driver (perhaps the same one). */ unregister_cdrom(&(cpnt->cdi)); cpnt->device = NULL; @@ -926,7 +919,7 @@ sr_sizes[i] = 0; return; } - return; + } } static int __init init_sr(void) @@ -948,13 +941,9 @@ kfree(sr_blocksizes); sr_blocksizes = NULL; - kfree(sr_hardsizes); - sr_hardsizes = NULL; } - blksize_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; read_ahead[MAJOR_NR] = 0; + blk_clear(MAJOR_NR); sr_template.dev_max = 0; } diff -urN linux-2.5.1-pre1/drivers/scsi/sym53c8xx.c linux/drivers/scsi/sym53c8xx.c --- linux-2.5.1-pre1/drivers/scsi/sym53c8xx.c Wed Oct 17 14:16:39 2001 +++ linux/drivers/scsi/sym53c8xx.c Sat Dec 1 00:37:05 2001 @@ -642,10 +642,10 @@ #define NCR_LOCK_NCB(np, flags) spin_lock_irqsave(&np->smp_lock, flags) #define NCR_UNLOCK_NCB(np, flags) spin_unlock_irqrestore(&np->smp_lock, flags) -#define NCR_LOCK_SCSI_DONE(np, flags) \ - spin_lock_irqsave(&io_request_lock, flags) -#define NCR_UNLOCK_SCSI_DONE(np, flags) \ - spin_unlock_irqrestore(&io_request_lock, flags) +#define NCR_LOCK_SCSI_DONE(host, flags) \ + spin_lock_irqsave(&((host)->host_lock), flags) +#define NCR_UNLOCK_SCSI_DONE(host, flags) \ + spin_unlock_irqrestore(&((host)->host_lock), flags) #else @@ -656,8 +656,8 @@ #define NCR_LOCK_NCB(np, flags) do { save_flags(flags); cli(); } while (0) #define NCR_UNLOCK_NCB(np, flags) do { restore_flags(flags); } while (0) -#define NCR_LOCK_SCSI_DONE(np, flags) do {;} while (0) -#define NCR_UNLOCK_SCSI_DONE(np, flags) do {;} while (0) +#define NCR_LOCK_SCSI_DONE(host, flags) do {;} while (0) +#define NCR_UNLOCK_SCSI_DONE(host, flags) do {;} while (0) #endif @@ -13676,9 +13676,9 @@ if (DEBUG_FLAGS & DEBUG_TINY) printk ("]\n"); if (done_list) { - NCR_LOCK_SCSI_DONE(np, flags); + NCR_LOCK_SCSI_DONE(done_list->host, flags); ncr_flush_done_cmds(done_list); - NCR_UNLOCK_SCSI_DONE(np, flags); + NCR_UNLOCK_SCSI_DONE(done_list->host, flags); } } @@ -13699,9 +13699,9 @@ NCR_UNLOCK_NCB(np, flags); if (done_list) { - NCR_LOCK_SCSI_DONE(np, flags); + NCR_LOCK_SCSI_DONE(done_list->host, flags); ncr_flush_done_cmds(done_list); - NCR_UNLOCK_SCSI_DONE(np, flags); + NCR_UNLOCK_SCSI_DONE(done_list->host, flags); } } diff -urN linux-2.5.1-pre1/drivers/scsi/sym53c8xx.h linux/drivers/scsi/sym53c8xx.h --- linux-2.5.1-pre1/drivers/scsi/sym53c8xx.h Thu Nov 22 11:49:48 2001 +++ linux/drivers/scsi/sym53c8xx.h Sat Dec 1 00:37:05 2001 @@ -96,8 +96,9 @@ this_id: 7, \ sg_tablesize: SCSI_NCR_SG_TABLESIZE, \ cmd_per_lun: SCSI_NCR_CMD_PER_LUN, \ - max_sectors: MAX_SEGMENTS*8, \ - use_clustering: DISABLE_CLUSTERING} + max_sectors: MAX_SEGMENTS*8, \ + use_clustering: DISABLE_CLUSTERING, \ + highmem_io: 1} #else diff -urN linux-2.5.1-pre1/drivers/scsi/sym53c8xx_2/sym53c8xx.h linux/drivers/scsi/sym53c8xx_2/sym53c8xx.h --- linux-2.5.1-pre1/drivers/scsi/sym53c8xx_2/sym53c8xx.h Fri Nov 9 15:22:54 2001 +++ linux/drivers/scsi/sym53c8xx_2/sym53c8xx.h Sat Dec 1 00:37:05 2001 @@ -119,7 +119,8 @@ this_id: 7, \ sg_tablesize: 0, \ cmd_per_lun: 0, \ - use_clustering: DISABLE_CLUSTERING} + use_clustering: DISABLE_CLUSTERING, \ + highmem_io: 1} #endif /* defined(HOSTS_C) || defined(MODULE) */ diff -urN linux-2.5.1-pre1/drivers/scsi/sym53c8xx_2/sym_glue.c linux/drivers/scsi/sym53c8xx_2/sym_glue.c --- linux-2.5.1-pre1/drivers/scsi/sym53c8xx_2/sym_glue.c Fri Nov 9 15:22:54 2001 +++ linux/drivers/scsi/sym53c8xx_2/sym_glue.c Sat Dec 1 00:37:05 2001 @@ -138,18 +138,11 @@ #define SYM_LOCK_DRIVER(flags) spin_lock_irqsave(&sym53c8xx_lock, flags) #define SYM_UNLOCK_DRIVER(flags) spin_unlock_irqrestore(&sym53c8xx_lock,flags) -#define SYM_INIT_LOCK_HCB(np) spin_lock_init(&np->s.smp_lock); -#define SYM_LOCK_HCB(np, flags) spin_lock_irqsave(&np->s.smp_lock, flags) -#define SYM_UNLOCK_HCB(np, flags) spin_unlock_irqrestore(&np->s.smp_lock, flags) - -#define SYM_LOCK_SCSI(np, flags) \ - spin_lock_irqsave(&io_request_lock, flags) -#define SYM_UNLOCK_SCSI(np, flags) \ - spin_unlock_irqrestore(&io_request_lock, flags) - -/* Ugly, but will make things easier if this locking will ever disappear */ -#define SYM_LOCK_SCSI_NOSAVE(np) spin_lock_irq(&io_request_lock) -#define SYM_UNLOCK_SCSI_NORESTORE(np) spin_unlock_irq(&io_request_lock) +#define SYM_INIT_LOCK_HCB(np) spin_lock_init(&np->s.host->host_lock); +#define SYM_LOCK_HCB(np, flags) \ + spin_lock_irqsave(&np->s.host->host_lock, flags) +#define SYM_UNLOCK_HCB(np, flags) \ + spin_unlock_irqrestore(&np->s.host->host_lock, flags) /* * These simple macros limit expression involving @@ -966,14 +959,18 @@ { hcb_p np = SYM_SOFTC_PTR(cmd); ucmd_p ucp = SYM_UCMD_PTR(cmd); - u_long flags; int sts = 0; +#if 0 + u_long flags; +#endif cmd->scsi_done = done; cmd->host_scribble = NULL; memset(ucp, 0, sizeof(*ucp)); +#if 0 SYM_LOCK_HCB(np, flags); +#endif /* * Shorten our settle_time if needed for @@ -999,7 +996,9 @@ sym_insque_tail(&ucp->link_cmdq, &np->s.wait_cmdq); } out: +#if 0 SYM_UNLOCK_HCB(np, flags); +#endif return 0; } @@ -1010,21 +1009,21 @@ static void sym53c8xx_intr(int irq, void *dev_id, struct pt_regs * regs) { unsigned long flags; - unsigned long flags1; hcb_p np = (hcb_p) dev_id; if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("["); - SYM_LOCK_SCSI(np, flags1); SYM_LOCK_HCB(np, flags); sym_interrupt(np); + /* + * push queue walk-through to tasklet + */ if (!sym_que_empty(&np->s.wait_cmdq) && !np->s.settle_time_valid) sym_requeue_awaiting_cmds(np); SYM_UNLOCK_HCB(np, flags); - SYM_UNLOCK_SCSI(np, flags1); if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("]\n"); } @@ -1036,9 +1035,7 @@ { hcb_p np = (hcb_p) npref; unsigned long flags; - unsigned long flags1; - SYM_LOCK_SCSI(np, flags1); SYM_LOCK_HCB(np, flags); sym_timer(np); @@ -1047,7 +1044,6 @@ sym_requeue_awaiting_cmds(np); SYM_UNLOCK_HCB(np, flags); - SYM_UNLOCK_SCSI(np, flags1); } @@ -1209,9 +1205,7 @@ ep->timer.data = (u_long)cmd; ep->timed_out = 1; /* Be pessimistic for once :) */ add_timer(&ep->timer); - SYM_UNLOCK_SCSI_NORESTORE(np); down(&ep->sem); - SYM_LOCK_SCSI_NOSAVE(np); if (ep->timed_out) sts = -2; } @@ -1975,6 +1969,7 @@ goto attach_failed; #endif host_data->ncb = np; + np->s.host = instance; SYM_INIT_LOCK_HCB(np); @@ -2140,6 +2135,7 @@ instance->max_cmd_len = 16; #endif instance->select_queue_depths = sym53c8xx_select_queue_depths; + instance->highmem_io = 1; SYM_UNLOCK_HCB(np, flags); diff -urN linux-2.5.1-pre1/drivers/scsi/sym53c8xx_2/sym_glue.h linux/drivers/scsi/sym53c8xx_2/sym_glue.h --- linux-2.5.1-pre1/drivers/scsi/sym53c8xx_2/sym_glue.h Thu Nov 22 10:41:14 2001 +++ linux/drivers/scsi/sym53c8xx_2/sym_glue.h Sat Dec 1 00:37:05 2001 @@ -456,10 +456,10 @@ char chip_name[8]; struct pci_dev *device; + struct Scsi_Host *host; + u_char bus; /* PCI BUS number */ u_char device_fn; /* PCI BUS device and function */ - - spinlock_t smp_lock; /* Lock for SMP threading */ vm_offset_t mmio_va; /* MMIO kernel virtual address */ vm_offset_t ram_va; /* RAM kernel virtual address */ diff -urN linux-2.5.1-pre1/drivers/scsi/sym53c8xx_2/sym_hipd.c linux/drivers/scsi/sym53c8xx_2/sym_hipd.c --- linux-2.5.1-pre1/drivers/scsi/sym53c8xx_2/sym_hipd.c Fri Nov 9 15:22:54 2001 +++ linux/drivers/scsi/sym53c8xx_2/sym_hipd.c Sat Dec 1 00:37:05 2001 @@ -4689,8 +4689,8 @@ return; out_clrack: OUTL_DSP (SCRIPTA_BA (np, clrack)); - return; out_stuck: + ; } /* @@ -5223,9 +5223,8 @@ * And accept tagged commands now. */ lp->head.itlq_tbl_sa = cpu_to_scr(vtobus(lp->itlq_tbl)); - - return; fail: + ; } /* diff -urN linux-2.5.1-pre1/drivers/scsi/sym53c8xx_2/sym_nvram.c linux/drivers/scsi/sym53c8xx_2/sym_nvram.c --- linux-2.5.1-pre1/drivers/scsi/sym53c8xx_2/sym_nvram.c Fri Nov 9 15:22:54 2001 +++ linux/drivers/scsi/sym53c8xx_2/sym_nvram.c Sat Dec 1 00:37:05 2001 @@ -505,10 +505,10 @@ return retv; } -#undef SET_BIT 0 -#undef CLR_BIT 1 -#undef SET_CLK 2 -#undef CLR_CLK 3 +#undef SET_BIT +#undef CLR_BIT +#undef SET_CLK +#undef CLR_CLK /* * Try reading Symbios NVRAM. diff -urN linux-2.5.1-pre1/drivers/sound/ad1816.c linux/drivers/sound/ad1816.c --- linux-2.5.1-pre1/drivers/sound/ad1816.c Fri Nov 9 15:22:54 2001 +++ linux/drivers/sound/ad1816.c Sat Dec 1 00:37:05 2001 @@ -1258,7 +1258,7 @@ static int __initdata dma = -1; static int __initdata dma2 = -1; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ struct pci_dev *ad1816_dev = NULL; static int activated = 1; @@ -1280,7 +1280,7 @@ MODULE_PARM(ad1816_clockfreq,"i"); MODULE_PARM(options,"i"); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static struct pci_dev *activate_dev(char *devname, char *resname, struct pci_dev *dev) { @@ -1407,7 +1407,7 @@ static int __init init_ad1816(void) { -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if(isapnp && (ad1816_probe_isapnp(&cfg) < 0) ) { printk(KERN_NOTICE "ad1816: No ISAPnP cards found, trying standard ones...\n"); isapnp = 0; @@ -1447,7 +1447,7 @@ } nr_ad1816_devs=0; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if(activated) if(ad1816_dev) ad1816_dev->deactivate(ad1816_dev); diff -urN linux-2.5.1-pre1/drivers/sound/ad1848.c linux/drivers/sound/ad1848.c --- linux-2.5.1-pre1/drivers/sound/ad1848.c Sun Sep 30 12:26:08 2001 +++ linux/drivers/sound/ad1848.c Sat Dec 1 00:37:05 2001 @@ -162,7 +162,7 @@ ,{CAP_F_TIMER} /* MD_1845_SSCAPE */ }; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static int isapnp = 1; static int isapnpjump = 0; static int reverse = 0; @@ -2830,7 +2830,7 @@ MODULE_PARM(deskpro_m, "i"); /* Special magic for Deskpro M box */ MODULE_PARM(soundpro, "i"); /* More special magic for SoundPro chips */ -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ MODULE_PARM(isapnp, "i"); MODULE_PARM(isapnpjump, "i"); MODULE_PARM(reverse, "i"); @@ -3000,7 +3000,7 @@ { printk(KERN_INFO "ad1848/cs4248 codec driver Copyright (C) by Hannu Savolainen 1993-1996\n"); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if(isapnp && (ad1848_isapnp_probe(&cfg) < 0) ) { printk(KERN_NOTICE "ad1848: No ISAPnP cards found, trying standard ones...\n"); isapnp = 0; @@ -3035,7 +3035,7 @@ if(loaded) unload_ms_sound(&cfg); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if(audio_activated) if(ad1848_dev) ad1848_dev->deactivate(ad1848_dev); diff -urN linux-2.5.1-pre1/drivers/sound/awe_wave.c linux/drivers/sound/awe_wave.c --- linux-2.5.1-pre1/drivers/sound/awe_wave.c Sun Sep 30 12:26:08 2001 +++ linux/drivers/sound/awe_wave.c Sat Dec 1 00:37:05 2001 @@ -26,9 +26,7 @@ #include #include #include -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE #include -#endif #include "sound_config.h" @@ -205,7 +203,7 @@ int io = AWE_DEFAULT_BASE_ADDR; /* Emu8000 base address */ int memsize = AWE_DEFAULT_MEM_SIZE; /* memory size in Kbytes */ -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static int isapnp = -1; #else static int isapnp = 0; @@ -4772,7 +4770,7 @@ return 1; } -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static struct { unsigned short card_vendor, card_device; unsigned short vendor; @@ -4841,7 +4839,7 @@ { int base; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if (isapnp) { if (awe_probe_isapnp(&io) < 0) { printk(KERN_ERR "AWE32: No ISAPnP cards found\n"); @@ -6132,7 +6130,7 @@ void __exit unload_awe(void) { _unload_awe(); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if (isapnp) awe_deactivate_isapnp(); #endif /* isapnp */ diff -urN linux-2.5.1-pre1/drivers/sound/cmpci.c linux/drivers/sound/cmpci.c --- linux-2.5.1-pre1/drivers/sound/cmpci.c Fri Nov 9 14:07:41 2001 +++ linux/drivers/sound/cmpci.c Sat Dec 1 00:37:05 2001 @@ -2496,7 +2496,6 @@ spin_unlock_irqrestore(&s->lock, flags); s->open_mode |= (file->f_mode << FMODE_MIDI_SHIFT) & (FMODE_MIDI_READ | FMODE_MIDI_WRITE); up(&s->open_sem); - MOD_INC_USE_COUNT; return 0; } @@ -2694,7 +2693,6 @@ outb(1, s->iosynth+3); /* enable OPL3 */ s->open_mode |= FMODE_DMFM; up(&s->open_sem); - MOD_INC_USE_COUNT; return 0; } diff -urN linux-2.5.1-pre1/drivers/sound/maestro3.c linux/drivers/sound/maestro3.c --- linux-2.5.1-pre1/drivers/sound/maestro3.c Fri Nov 9 13:41:42 2001 +++ linux/drivers/sound/maestro3.c Sat Dec 1 00:37:05 2001 @@ -2036,7 +2036,6 @@ set_fmt(s, fmtm, fmts); s->open_mode |= file->f_mode & (FMODE_READ | FMODE_WRITE); - MOD_INC_USE_COUNT; up(&s->open_sem); spin_unlock_irqrestore(&s->lock, flags); return 0; @@ -2075,7 +2074,6 @@ up(&s->open_sem); wake_up(&s->open_wait); - MOD_DEC_USE_COUNT; return 0; } @@ -2142,14 +2140,12 @@ int minor = MINOR(inode->i_rdev); struct m3_card *card = devs; - MOD_INC_USE_COUNT; for (card = devs; card != NULL; card = card->next) { if((card->ac97 != NULL) && (card->ac97->dev_mixer == minor)) break; } if (!card) { - MOD_DEC_USE_COUNT; return -ENODEV; } @@ -2160,7 +2156,6 @@ static int m3_release_mixdev(struct inode *inode, struct file *file) { - MOD_DEC_USE_COUNT; return 0; } @@ -2173,6 +2168,7 @@ } static struct file_operations m3_mixer_fops = { + owner: THIS_MODULE, llseek: no_llseek, ioctl: m3_ioctl_mixdev, open: m3_open_mixdev, @@ -2546,6 +2542,7 @@ } static struct file_operations m3_audio_fops = { + owner: THIS_MODULE, llseek: &no_llseek, read: &m3_read, write: &m3_write, diff -urN linux-2.5.1-pre1/drivers/sound/opl3sa2.c linux/drivers/sound/opl3sa2.c --- linux-2.5.1-pre1/drivers/sound/opl3sa2.c Thu Oct 11 09:43:30 2001 +++ linux/drivers/sound/opl3sa2.c Sat Dec 1 00:37:05 2001 @@ -99,7 +99,7 @@ #define CHIPSET_OPL3SA2 0 #define CHIPSET_OPL3SA3 1 -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ #define OPL3SA2_CARDS_MAX 4 #else #define OPL3SA2_CARDS_MAX 1 @@ -147,7 +147,7 @@ static int __initdata ymode = -1; static int __initdata loopback = -1; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ /* PnP specific parameters */ static int __initdata isapnp = 1; static int __initdata multiple = 1; @@ -191,7 +191,7 @@ MODULE_PARM(loopback, "i"); MODULE_PARM_DESC(loopback, "Set A/D input source. Useful for echo cancellation (0 = Mic Rch (default), 1 = Mono output loopback)"); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ MODULE_PARM(isapnp, "i"); MODULE_PARM_DESC(isapnp, "When set to 0, ISA PnP support will be disabled"); @@ -807,7 +807,7 @@ } -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ struct isapnp_device_id isapnp_opl3sa2_list[] __initdata = { { ISAPNP_ANY_ID, ISAPNP_ANY_ID, @@ -888,7 +888,7 @@ return 0; } -#endif /* CONFIG_ISAPNP || CONFIG_ISAPNP_MODULE */ +#endif /* __ISAPNP__ */ /* End of component functions */ @@ -909,9 +909,9 @@ max = (multiple && isapnp) ? OPL3SA2_CARDS_MAX : 1; for(card = 0; card < max; card++, opl3sa2_cards_num++) { -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ /* - * Please remember that even with CONFIG_ISAPNP defined one + * Please remember that even with __ISAPNP__ defined one * should still be able to disable PNP support for this * single driver! */ @@ -1039,7 +1039,7 @@ unload_opl3sa2_mss(&cfg_mss[card]); unload_opl3sa2(&cfg[card], card); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if(opl3sa2_activated[card] && opl3sa2_dev[card]) { opl3sa2_dev[card]->deactivate(opl3sa2_dev[card]); @@ -1058,7 +1058,7 @@ static int __init setup_opl3sa2(char *str) { /* io, irq, dma, dma2,... */ -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ int ints[11]; #else int ints[9]; @@ -1073,7 +1073,7 @@ mpu_io = ints[6]; ymode = ints[7]; loopback = ints[8]; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ isapnp = ints[9]; multiple = ints[10]; #endif diff -urN linux-2.5.1-pre1/drivers/sound/sb_card.c linux/drivers/sound/sb_card.c --- linux-2.5.1-pre1/drivers/sound/sb_card.c Thu Oct 11 09:43:30 2001 +++ linux/drivers/sound/sb_card.c Sat Dec 1 00:37:05 2001 @@ -69,7 +69,7 @@ #include "sb_mixer.h" #include "sb.h" -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ #define SB_CARDS_MAX 5 #else #define SB_CARDS_MAX 1 @@ -196,7 +196,7 @@ *opl_dev[SB_CARDS_MAX] = {NULL}; -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ static int isapnp = 1; static int isapnpjump = 0; static int multiple = 1; @@ -226,7 +226,7 @@ MODULE_PARM(esstype, "i"); MODULE_PARM(acer, "i"); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ MODULE_PARM(isapnp, "i"); MODULE_PARM(isapnpjump, "i"); MODULE_PARM(multiple, "i"); @@ -251,7 +251,7 @@ MODULE_PARM_DESC(esstype, "ESS chip type"); MODULE_PARM_DESC(acer, "Set this to detect cards in some ACER notebooks"); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ /* Please add new entries at the end of the table */ static struct { @@ -909,8 +909,8 @@ printk(KERN_INFO "Soundblaster audio driver Copyright (C) by Hannu Savolainen 1993-1996\n"); for(card = 0; card < max; card++, sb_cards_num++) { -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE - /* Please remember that even with CONFIG_ISAPNP defined one +#ifdef __ISAPNP__ + /* Please remember that even with __ISAPNP__ defined one * should still be able to disable PNP support for this * single driver! */ if((!pnplegacy||card>0) && isapnp && (sb_isapnp_probe(&cfg[card], &cfg_mpu[card], card) < 0) ) { @@ -997,7 +997,7 @@ if (sbmpu[i]) unload_sbmpu(&cfg_mpu[i]); -#if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE +#ifdef __ISAPNP__ if(!audio_activated[i] && sb_dev[i]) sb_dev[i]->deactivate(sb_dev[i]); if(!mpu_activated[i] && mpu_dev[i]) diff -urN linux-2.5.1-pre1/drivers/sound/sound_core.c linux/drivers/sound/sound_core.c --- linux-2.5.1-pre1/drivers/sound/sound_core.c Sun Sep 30 12:26:08 2001 +++ linux/drivers/sound/sound_core.c Sat Dec 1 00:37:05 2001 @@ -115,7 +115,6 @@ *list=s; - MOD_INC_USE_COUNT; return n; } @@ -133,7 +132,6 @@ *list=p->next; devfs_unregister (p->de); kfree(p); - MOD_DEC_USE_COUNT; return; } list=&(p->next); diff -urN linux-2.5.1-pre1/drivers/sound/ymfpci.c linux/drivers/sound/ymfpci.c --- linux-2.5.1-pre1/drivers/sound/ymfpci.c Mon Nov 19 14:53:19 2001 +++ linux/drivers/sound/ymfpci.c Sat Dec 1 00:37:05 2001 @@ -1872,7 +1872,6 @@ #endif up(&unit->open_sem); - MOD_INC_USE_COUNT; return 0; out_nodma: @@ -1921,7 +1920,6 @@ up(&codec->open_sem); - MOD_DEC_USE_COUNT; return 0; } @@ -1949,7 +1947,6 @@ match: file->private_data = unit->ac97_codec[i]; - MOD_INC_USE_COUNT; return 0; } @@ -1963,11 +1960,11 @@ static int ymf_release_mixdev(struct inode *inode, struct file *file) { - MOD_DEC_USE_COUNT; return 0; } static /*const*/ struct file_operations ymf_fops = { + owner: THIS_MODULE, llseek: no_llseek, read: ymf_read, write: ymf_write, @@ -1979,6 +1976,7 @@ }; static /*const*/ struct file_operations ymf_mixer_fops = { + owner: THIS_MODULE, llseek: no_llseek, ioctl: ymf_ioctl_mixdev, open: ymf_open_mixdev, @@ -2043,13 +2041,6 @@ ymfpci_aclink_reset(unit->pci); ymfpci_codec_ready(unit, 0, 1); /* prints diag if not ready. */ - for (i = 0; i < NR_AC97; i++) { - codec = unit->ac97_codec[i]; - if (!codec) - continue; - ac97_restore_state(codec); - } - #ifdef CONFIG_SOUND_YMFPCI_LEGACY /* XXX At this time the legacy registers are probably deprogrammed. */ #endif @@ -2063,6 +2054,13 @@ if (unit->start_count) { ymfpci_writel(unit, YDSXGR_MODE, 3); unit->active_bank = ymfpci_readl(unit, YDSXGR_CTRLSELECT) & 1; + } + + for (i = 0; i < NR_AC97; i++) { + codec = unit->ac97_codec[i]; + if (!codec) + continue; + ac97_restore_state(codec); } unit->suspended = 0; diff -urN linux-2.5.1-pre1/drivers/usb/dc2xx.c linux/drivers/usb/dc2xx.c --- linux-2.5.1-pre1/drivers/usb/dc2xx.c Fri Sep 14 14:04:07 2001 +++ linux/drivers/usb/dc2xx.c Sat Dec 1 00:37:05 2001 @@ -112,12 +112,15 @@ /* These have a different application level protocol which * is part of the Flashpoint "DigitaOS". That supports some * non-camera devices, and some non-Kodak cameras. + * Use this driver to get USB and "OpenDis" to talk. */ { USB_DEVICE(0x040a, 0x0100) }, // Kodak DC-220 { USB_DEVICE(0x040a, 0x0110) }, // Kodak DC-260 { USB_DEVICE(0x040a, 0x0111) }, // Kodak DC-265 { USB_DEVICE(0x040a, 0x0112) }, // Kodak DC-290 { USB_DEVICE(0xf003, 0x6002) }, // HP PhotoSmart C500 + { USB_DEVICE(0x03f0, 0x4102) }, // HP PhotoSmart C618 + { USB_DEVICE(0x0a17, 0x1001) }, // Pentax EI-200 /* Other USB devices may well work here too, so long as they * just stick to half duplex bulk packet exchanges. That diff -urN linux-2.5.1-pre1/drivers/usb/pwc-ctrl.c linux/drivers/usb/pwc-ctrl.c --- linux-2.5.1-pre1/drivers/usb/pwc-ctrl.c Wed Oct 17 14:34:06 2001 +++ linux/drivers/usb/pwc-ctrl.c Sat Dec 1 00:37:05 2001 @@ -782,7 +782,7 @@ { char buf; - if (pdev->type < 675 || pdev->release < 6) + if (pdev->type < 675 || (pdev->type < 730 && pdev->release < 6)) return 0; /* Not supported by Nala or Timon < release 6 */ if (power) diff -urN linux-2.5.1-pre1/drivers/usb/pwc-if.c linux/drivers/usb/pwc-if.c --- linux-2.5.1-pre1/drivers/usb/pwc-if.c Wed Oct 17 14:34:06 2001 +++ linux/drivers/usb/pwc-if.c Sat Dec 1 00:37:05 2001 @@ -91,6 +91,8 @@ disconnect: usb_pwc_disconnect, /* disconnect() */ }; +#define MAX_DEV_HINTS 10 + static int default_size = PSZ_QCIF; static int default_fps = 10; static int default_palette = VIDEO_PALETTE_YUV420P; /* This format is understood by most tools */ @@ -99,13 +101,17 @@ int pwc_trace = TRACE_MODULE | TRACE_FLOW | TRACE_PWCX; static int power_save = 0; static int led_on = 1, led_off = 0; /* defaults to LED that is on while in use */ -int pwc_preferred_compression = 2; /* 0..3 = uncompressed..high */ + int pwc_preferred_compression = 2; /* 0..3 = uncompressed..high */ +static struct { + int type; + char serial_number[30]; + int device_node; + struct pwc_device *pdev; +} device_hint[MAX_DEV_HINTS]; static struct semaphore mem_lock; static void *mem_leak = NULL; /* For delayed kfree()s. See below */ -static int video_nr = -1; - /***/ static int pwc_video_open(struct video_device *vdev, int mode); @@ -647,7 +653,8 @@ errmsg = "Unknown"; switch(urb->status) { case -ENOSR: errmsg = "Buffer error (overrun)"; break; - case -EPIPE: errmsg = "Babble/stalled (bad cable?)"; break; + case -EPIPE: errmsg = "Stalled (device not responding)"; break; + case -EOVERFLOW: errmsg = "Babble (bad cable?)"; break; case -EPROTO: errmsg = "Bit-stuff error (bad cable?)"; break; case -EILSEQ: errmsg = "CRC/Timeout"; break; case -ETIMEDOUT: errmsg = "NAK (device does not respond)"; break; @@ -765,6 +772,11 @@ } /* .. flen < last_packet_size */ pdev->vlast_packet_size = flen; } /* ..status == 0 */ +#ifdef PWC_DEBUG + /* This is normally not interesting to the user, unless you are really debugging something */ + else + Trace(TRACE_FLOW, "Iso frame %d of USB has error %d\n", i, fst); +#endif } if (awake) wake_up_interruptible(&pdev->frameq); @@ -1140,7 +1152,7 @@ return -ERESTARTSYS; } schedule(); - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_INTERRUPTIBLE); } remove_wait_queue(&pdev->frameq, &wait); set_current_state(TASK_RUNNING); @@ -1595,7 +1607,9 @@ struct pwc_device *pdev = NULL; struct video_device *vdev; int vendor_id, product_id, type_id; - int i; + int i, hint; + int video_nr = -1; /* default: use next available device */ + char serial_number[30]; free_mem_leak(); @@ -1698,6 +1712,10 @@ } else return NULL; /* Not Philips, Askey, Logitech or Samsung, for sure. */ + memset(serial_number, 0, 30); + usb_string(udev, udev->descriptor.iSerialNumber, serial_number, 29); + Trace(TRACE_PROBE, "Device serial number is %s\n", serial_number); + if (udev->descriptor.bNumConfigurations > 1) Info("Warning: more than 1 configuration available.\n"); @@ -1734,6 +1752,21 @@ pdev->release = udev->descriptor.bcdDevice; Trace(TRACE_PROBE, "Release: %04x\n", pdev->release); + + /* Now search device_hint[] table for a match, so we can hint a node number. */ + for (hint = 0; hint < MAX_DEV_HINTS; hint++) { + if (((device_hint[hint].type == -1) || (device_hint[hint].type == pdev->type)) && + (device_hint[hint].pdev == NULL)) { + /* so far, so good... try serial number */ + if ((device_hint[hint].serial_number[0] == '*') || !strcmp(device_hint[hint].serial_number, serial_number)) { + /* match! */ + video_nr = device_hint[hint].device_node; + Trace(TRACE_PROBE, "Found hint, will try to register as /dev/video%d\n", video_nr); + break; + } + } + } + i = video_register_device(vdev, VFL_TYPE_GRABBER, video_nr); if (i < 0) { Err("Failed to register as video device (%d).\n", i); @@ -1743,6 +1776,9 @@ Trace(TRACE_PROBE, "Registered video struct at 0x%p.\n", vdev); Info("Registered as /dev/video%d.\n", vdev->minor & 0x3F); } + /* occupy slot */ + if (hint < MAX_DEV_HINTS) + device_hint[hint].pdev = pdev; #if 0 /* Shut down camera now (some people like the LED off) */ @@ -1762,6 +1798,7 @@ static void usb_pwc_disconnect(struct usb_device *udev, void *ptr) { struct pwc_device *pdev; + int hint; lock_kernel(); free_mem_leak(); @@ -1815,12 +1852,31 @@ pdev->vdev = NULL; } } + + /* search device_hint[] table if we occupy a slot, by any chance */ + for (hint = 0; hint < MAX_DEV_HINTS; hint++) + if (device_hint[hint].pdev == pdev) + device_hint[hint].pdev = NULL; + pdev->udev = NULL; unlock_kernel(); kfree(pdev); } +/* *grunt* We have to do atoi ourselves :-( */ +static int pwc_atoi(char *s) +{ + int k = 0; + + k = 0; + while (*s != '\0' && *s >= '0' && *s <= '9') { + k = 10 * k + (*s - '0'); + s++; + } + return k; +} + /* * Initialization code & module stuff @@ -1833,8 +1889,8 @@ static int trace = -1; static int compression = -1; static int leds[2] = { -1, -1 }; +static char *dev_hint[10] = { }; -MODULE_PARM(video_nr, "i"); MODULE_PARM(size, "s"); MODULE_PARM_DESC(size, "Initial image size. One of sqcif, qsif, qcif, sif, cif, vga"); MODULE_PARM(fps, "i"); @@ -1851,13 +1907,16 @@ MODULE_PARM_DESC(compression, "Preferred compression quality. Range 0 (uncompressed) to 3 (high compression)"); MODULE_PARM(leds, "2i"); MODULE_PARM_DESC(leds, "LED on,off time in milliseconds"); +MODULE_PARM(dev_hint, "0-10s"); +MODULE_PARM_DESC(dev_hint, "Device node hints"); + MODULE_DESCRIPTION("Philips USB webcam driver"); MODULE_AUTHOR("Nemosoft Unv. "); MODULE_LICENSE("GPL"); static int __init usb_pwc_init(void) { - int s; + int i, sz; char *sizenames[PSZ_MAX] = { "sqcif", "qsif", "qcif", "sif", "cif", "vga" }; Info("Philips PCA645/646 + PCVC675/680/690 + PCVC730/740/750 webcam module version " PWC_VERSION " loaded.\n"); @@ -1874,13 +1933,13 @@ if (size) { /* string; try matching with array */ - for (s = 0; s < PSZ_MAX; s++) { - if (!strcmp(sizenames[s], size)) { /* Found! */ - default_size = s; + for (sz = 0; sz < PSZ_MAX; sz++) { + if (!strcmp(sizenames[sz], size)) { /* Found! */ + default_size = sz; break; } } - if (s == PSZ_MAX) { + if (sz == PSZ_MAX) { Err("Size not recognized; try size=[sqcif | qsif | qcif | sif | cif | vga].\n"); return -EINVAL; } @@ -1920,6 +1979,74 @@ led_on = leds[0] / 100; if (leds[1] >= 0) led_off = leds[1] / 100; + + /* Big device node whoopla. Basicly, it allows you to assign a + device node (/dev/videoX) to a camera, based on its type + & serial number. The format is [type[.serialnumber]:]node. + + Any camera that isn't matched by these rules gets the next + available free device node. + */ + for (i = 0; i < MAX_DEV_HINTS; i++) { + char *s, *colon, *dot; + + /* This loop also initializes the array */ + device_hint[i].pdev = NULL; + s = dev_hint[i]; + if (s != NULL && *s != '\0') { + device_hint[i].type = -1; /* wildcard */ + strcpy(device_hint[i].serial_number, "*"); + + /* parse string: chop at ':' & '/' */ + colon = dot = s; + while (*colon != '\0' && *colon != ':') + colon++; + while (*dot != '\0' && *dot != '.') + dot++; + /* Few sanity checks */ + if (*dot != '\0' && dot > colon) { + Err("Malformed camera hint: the colon must be after the dot.\n"); + return -EINVAL; + } + + if (*colon == '\0') { + /* No colon */ + if (*dot != '\0') { + Err("Malformed camera hint: no colon + device node given.\n"); + return -EINVAL; + } + else { + /* No type or serial number specified, just a number. */ + device_hint[i].device_node = pwc_atoi(s); + } + } + else { + /* There's a colon, so we have at least a type and a device node */ + device_hint[i].type = pwc_atoi(s); + device_hint[i].device_node = pwc_atoi(colon + 1); + if (*dot != '\0') { + /* There's a serial number as well */ + int k; + + dot++; + k = 0; + while (*dot != ':' && k < 29) { + device_hint[i].serial_number[k++] = *dot; + dot++; + } + device_hint[i].serial_number[k] = '\0'; + } + } +#ifdef PWC_DEBUG + Debug("device_hint[%d]:\n", i); + Debug(" type : %d\n", device_hint[i].type); + Debug(" serial# : %s\n", device_hint[i].serial_number); + Debug(" node : %d\n", device_hint[i].device_node); +#endif + } + else + device_hint[i].type = 0; /* not filled */ + } /* ..for MAX_DEV_HINTS */ init_MUTEX(&mem_lock); Trace(TRACE_PROBE, "Registering driver at address 0x%p.\n", &pwc_driver); diff -urN linux-2.5.1-pre1/drivers/usb/pwc.h linux/drivers/usb/pwc.h --- linux-2.5.1-pre1/drivers/usb/pwc.h Wed Oct 17 14:34:06 2001 +++ linux/drivers/usb/pwc.h Sat Dec 1 00:37:05 2001 @@ -60,8 +60,8 @@ /* Version block */ #define PWC_MAJOR 8 -#define PWC_MINOR 3 -#define PWC_VERSION "8.3" +#define PWC_MINOR 4 +#define PWC_VERSION "8.4" #define PWC_NAME "pwc" /* Turn certain features on/off */ diff -urN linux-2.5.1-pre1/drivers/usb/serial/belkin_sa.c linux/drivers/usb/serial/belkin_sa.c --- linux-2.5.1-pre1/drivers/usb/serial/belkin_sa.c Wed Oct 10 23:42:47 2001 +++ linux/drivers/usb/serial/belkin_sa.c Sat Dec 1 00:37:05 2001 @@ -140,7 +140,7 @@ MODULE_DEVICE_TABLE (usb, id_table_combined); /* All of the device info needed for the Belkin dockstation serial converter */ -struct usb_serial_device_type belkin_dockstation_device = { +static struct usb_serial_device_type belkin_dockstation_device = { name: "Belkin F5U120-PC USB Serial Adapter", id_table: belkin_dockstation_table, /* the Belkin F5U103 device */ needs_interrupt_in: MUST_HAVE, /* this device must have an interrupt in endpoint */ @@ -161,7 +161,7 @@ }; /* All of the device info needed for the Belkin serial converter */ -struct usb_serial_device_type belkin_sa_device = { +static struct usb_serial_device_type belkin_sa_device = { name: "Belkin F5U103 USB Serial Adapter", id_table: belkin_sa_table, /* the Belkin F5U103 device */ needs_interrupt_in: MUST_HAVE, /* this device must have an interrupt in endpoint */ @@ -183,7 +183,7 @@ /* This driver also supports the "old" school Belkin single port adaptor */ -struct usb_serial_device_type belkin_old_device = { +static struct usb_serial_device_type belkin_old_device = { name: "Belkin USB Serial Adapter", id_table: belkin_old_table, /* the old Belkin device */ needs_interrupt_in: MUST_HAVE, /* this device must have an interrupt in endpoint */ @@ -204,7 +204,7 @@ }; /* this driver also works for the Peracom single port adapter */ -struct usb_serial_device_type peracom_device = { +static struct usb_serial_device_type peracom_device = { name: "Peracom single port USB Serial Adapter", id_table: peracom_table, /* the Peracom device */ needs_interrupt_in: MUST_HAVE, /* this device must have an interrupt in endpoint */ @@ -225,7 +225,7 @@ }; /* the GoHubs Go-COM232 device is the same as the Peracom single port adapter */ -struct usb_serial_device_type gocom232_device = { +static struct usb_serial_device_type gocom232_device = { name: "GO-COM232 USB Serial Converter", id_table: gocom232_table, /* the GO-COM232 device */ needs_interrupt_in: MUST_HAVE, /* this device must have an interrupt in endpoint */ diff -urN linux-2.5.1-pre1/drivers/usb/serial/cyberjack.c linux/drivers/usb/serial/cyberjack.c --- linux-2.5.1-pre1/drivers/usb/serial/cyberjack.c Wed Oct 10 23:42:47 2001 +++ linux/drivers/usb/serial/cyberjack.c Sat Dec 1 00:37:05 2001 @@ -76,7 +76,7 @@ MODULE_DEVICE_TABLE (usb, id_table); -struct usb_serial_device_type cyberjack_device = { +static struct usb_serial_device_type cyberjack_device = { name: "Reiner SCT Cyberjack USB card reader", id_table: id_table, needs_interrupt_in: MUST_HAVE, diff -urN linux-2.5.1-pre1/drivers/usb/serial/empeg.c linux/drivers/usb/serial/empeg.c --- linux-2.5.1-pre1/drivers/usb/serial/empeg.c Wed Oct 10 23:42:47 2001 +++ linux/drivers/usb/serial/empeg.c Sat Dec 1 00:37:05 2001 @@ -113,7 +113,7 @@ MODULE_DEVICE_TABLE (usb, id_table); -struct usb_serial_device_type empeg_device = { +static struct usb_serial_device_type empeg_device = { name: "Empeg", id_table: id_table, needs_interrupt_in: MUST_HAVE_NOT, /* must not have an interrupt in endpoint */ diff -urN linux-2.5.1-pre1/drivers/usb/serial/ftdi_sio.c linux/drivers/usb/serial/ftdi_sio.c --- linux-2.5.1-pre1/drivers/usb/serial/ftdi_sio.c Tue Nov 13 09:19:41 2001 +++ linux/drivers/usb/serial/ftdi_sio.c Sat Dec 1 00:37:05 2001 @@ -173,7 +173,7 @@ /* Should rename most ftdi_sio's to ftdi_ now since there are two devices which share common code */ -struct usb_serial_device_type ftdi_sio_device = { +static struct usb_serial_device_type ftdi_sio_device = { name: "FTDI SIO", id_table: id_table_sio, needs_interrupt_in: MUST_HAVE_NOT, @@ -196,7 +196,7 @@ shutdown: ftdi_sio_shutdown, }; -struct usb_serial_device_type ftdi_8U232AM_device = { +static struct usb_serial_device_type ftdi_8U232AM_device = { name: "FTDI 8U232AM", id_table: id_table_8U232AM, needs_interrupt_in: DONT_CARE, @@ -660,7 +660,7 @@ } /* ftdi_sio_serial_read_bulk_callback */ -__u16 translate_baudrate_to_ftdi(unsigned int cflag, ftdi_type_t ftdi_type) +static __u16 translate_baudrate_to_ftdi(unsigned int cflag, ftdi_type_t ftdi_type) { /* translate_baudrate_to_ftdi */ __u16 urb_value = ftdi_sio_b9600; diff -urN linux-2.5.1-pre1/drivers/usb/serial/io_edgeport.c linux/drivers/usb/serial/io_edgeport.c --- linux-2.5.1-pre1/drivers/usb/serial/io_edgeport.c Wed Nov 21 09:59:11 2001 +++ linux/drivers/usb/serial/io_edgeport.c Sat Dec 1 00:37:05 2001 @@ -318,11 +318,6 @@ }; -/* the info for all of the devices that this driver supports */ -int EdgeportDevices[] = EDGEPORT_DEVICE_IDS; -#define NUM_EDGEPORT_DEVICES (sizeof(EdgeportDevices) / sizeof(int)) - - /* Transmit Fifo * This Transmit queue is an extension of the edgeport Rx buffer. * The maximum amount of data buffered in both the edgeport @@ -495,17 +490,15 @@ // ************************************************************************ // ************************************************************************ -// These functions should be in firmware.c - /************************************************************************ * * - * update_edgeport_E2PROM() Compare current versions of * + * update_edgeport_E2PROM() Compare current versions of * * Boot ROM and Manufacture * * Descriptors with versions * * embedded in this driver * * * ************************************************************************/ -void update_edgeport_E2PROM (struct edgeport_serial *edge_serial) +static void update_edgeport_E2PROM (struct edgeport_serial *edge_serial) { __u32 BootCurVer; __u32 BootNewVer; diff -urN linux-2.5.1-pre1/drivers/usb/serial/keyspan.c linux/drivers/usb/serial/keyspan.c --- linux-2.5.1-pre1/drivers/usb/serial/keyspan.c Wed Oct 10 23:42:47 2001 +++ linux/drivers/usb/serial/keyspan.c Sat Dec 1 00:37:05 2001 @@ -176,7 +176,7 @@ /* Functions used by new usb-serial code. */ -int keyspan_init (void) +static int __init keyspan_init (void) { usb_serial_register (&keyspan_usa18x_pre_device); usb_serial_register (&keyspan_usa19_pre_device); @@ -201,7 +201,7 @@ return 0; } -void keyspan_exit (void) +static void __exit keyspan_exit (void) { usb_serial_deregister (&keyspan_usa18x_pre_device); usb_serial_deregister (&keyspan_usa19_pre_device); @@ -1089,7 +1089,7 @@ return urb; } -struct callbacks { +static struct callbacks { void (*instat_callback)(urb_t *); void (*glocont_callback)(urb_t *); void (*indat_callback)(urb_t *); diff -urN linux-2.5.1-pre1/drivers/usb/serial/keyspan.h linux/drivers/usb/serial/keyspan.h --- linux-2.5.1-pre1/drivers/usb/serial/keyspan.h Tue Oct 9 15:15:02 2001 +++ linux/drivers/usb/serial/keyspan.h Sat Dec 1 00:37:05 2001 @@ -448,7 +448,7 @@ }; /* Structs for the devices, pre and post renumeration. */ -struct usb_serial_device_type keyspan_usa18x_pre_device = { +static struct usb_serial_device_type keyspan_usa18x_pre_device = { name: "Keyspan USA18X - (without firmware)", id_table: keyspan_usa18x_pre_ids, needs_interrupt_in: DONT_CARE, @@ -461,7 +461,7 @@ startup: keyspan_fake_startup }; -struct usb_serial_device_type keyspan_usa19_pre_device = { +static struct usb_serial_device_type keyspan_usa19_pre_device = { name: "Keyspan USA19 - (without firmware)", id_table: keyspan_usa19_pre_ids, needs_interrupt_in: DONT_CARE, @@ -475,7 +475,7 @@ }; -struct usb_serial_device_type keyspan_usa19w_pre_device = { +static struct usb_serial_device_type keyspan_usa19w_pre_device = { name: "Keyspan USA19W - (without firmware)", id_table: keyspan_usa19w_pre_ids, needs_interrupt_in: DONT_CARE, @@ -489,7 +489,7 @@ }; -struct usb_serial_device_type keyspan_usa28_pre_device = { +static struct usb_serial_device_type keyspan_usa28_pre_device = { name: "Keyspan USA28 - (without firmware)", id_table: keyspan_usa28_pre_ids, needs_interrupt_in: DONT_CARE, @@ -502,7 +502,7 @@ startup: keyspan_fake_startup }; -struct usb_serial_device_type keyspan_usa28x_pre_device = { +static struct usb_serial_device_type keyspan_usa28x_pre_device = { name: "Keyspan USA28X - (without firmware)", id_table: keyspan_usa28x_pre_ids, needs_interrupt_in: DONT_CARE, @@ -515,7 +515,7 @@ startup: keyspan_fake_startup }; -struct usb_serial_device_type keyspan_usa28xa_pre_device = { +static struct usb_serial_device_type keyspan_usa28xa_pre_device = { name: "Keyspan USA28XA - (without firmware)", id_table: keyspan_usa28xa_pre_ids, needs_interrupt_in: DONT_CARE, @@ -528,7 +528,7 @@ startup: keyspan_fake_startup }; -struct usb_serial_device_type keyspan_usa28xb_pre_device = { +static struct usb_serial_device_type keyspan_usa28xb_pre_device = { name: "Keyspan USA28XB - (without firmware)", id_table: keyspan_usa28xb_pre_ids, needs_interrupt_in: DONT_CARE, @@ -541,7 +541,7 @@ startup: keyspan_fake_startup }; -struct usb_serial_device_type keyspan_usa49w_pre_device = { +static struct usb_serial_device_type keyspan_usa49w_pre_device = { name: "Keyspan USA49W - (without firmware)", id_table: keyspan_usa49w_pre_ids, needs_interrupt_in: DONT_CARE, @@ -554,7 +554,7 @@ startup: keyspan_fake_startup }; -struct usb_serial_device_type keyspan_usa18x_device = { +static struct usb_serial_device_type keyspan_usa18x_device = { name: "Keyspan USA18X", id_table: keyspan_usa18x_ids, needs_interrupt_in: DONT_CARE, @@ -580,7 +580,7 @@ shutdown: keyspan_shutdown, }; -struct usb_serial_device_type keyspan_usa19_device = { +static struct usb_serial_device_type keyspan_usa19_device = { name: "Keyspan USA19", id_table: keyspan_usa19_ids, needs_interrupt_in: DONT_CARE, @@ -607,7 +607,7 @@ }; -struct usb_serial_device_type keyspan_usa19w_device = { +static struct usb_serial_device_type keyspan_usa19w_device = { name: "Keyspan USA19W", id_table: keyspan_usa19w_ids, needs_interrupt_in: DONT_CARE, @@ -634,7 +634,7 @@ }; -struct usb_serial_device_type keyspan_usa28_device = { +static struct usb_serial_device_type keyspan_usa28_device = { name: "Keyspan USA28", id_table: keyspan_usa28_ids, needs_interrupt_in: DONT_CARE, @@ -652,7 +652,7 @@ }; -struct usb_serial_device_type keyspan_usa28x_device = { +static struct usb_serial_device_type keyspan_usa28x_device = { name: "Keyspan USA28X/XB", id_table: keyspan_usa28x_ids, needs_interrupt_in: DONT_CARE, @@ -679,7 +679,7 @@ }; -struct usb_serial_device_type keyspan_usa28xa_device = { +static struct usb_serial_device_type keyspan_usa28xa_device = { name: "Keyspan USA28XA", id_table: keyspan_usa28xa_ids, needs_interrupt_in: DONT_CARE, @@ -706,7 +706,7 @@ }; -struct usb_serial_device_type keyspan_usa49w_device = { +static struct usb_serial_device_type keyspan_usa49w_device = { name: "Keyspan USA49W", id_table: keyspan_usa49w_ids, needs_interrupt_in: DONT_CARE, diff -urN linux-2.5.1-pre1/drivers/usb/serial/mct_u232.c linux/drivers/usb/serial/mct_u232.c --- linux-2.5.1-pre1/drivers/usb/serial/mct_u232.c Mon Nov 12 09:53:56 2001 +++ linux/drivers/usb/serial/mct_u232.c Sat Dec 1 00:37:05 2001 @@ -154,7 +154,7 @@ MODULE_DEVICE_TABLE (usb, id_table_combined); -struct usb_serial_device_type mct_u232_device = { +static struct usb_serial_device_type mct_u232_device = { name: "Magic Control Technology USB-RS232", id_table: mct_u232_table, needs_interrupt_in: MUST_HAVE, /* 2 interrupt-in endpoints */ @@ -178,7 +178,7 @@ shutdown: mct_u232_shutdown, }; -struct usb_serial_device_type mct_u232_sitecom_device = { +static struct usb_serial_device_type mct_u232_sitecom_device = { name: "MCT/Sitecom USB-RS232", id_table: mct_u232_sitecom_table, needs_interrupt_in: MUST_HAVE, /* 2 interrupt-in endpoints */ @@ -202,7 +202,7 @@ shutdown: mct_u232_shutdown, }; -struct usb_serial_device_type mct_u232_du_h3sp_device = { +static struct usb_serial_device_type mct_u232_du_h3sp_device = { name: "MCT/D-Link DU-H3SP USB BAY", id_table: mct_u232_du_h3sp_table, needs_interrupt_in: MUST_HAVE, /* 2 interrupt-in endpoints */ diff -urN linux-2.5.1-pre1/drivers/usb/serial/omninet.c linux/drivers/usb/serial/omninet.c --- linux-2.5.1-pre1/drivers/usb/serial/omninet.c Wed Oct 10 23:42:47 2001 +++ linux/drivers/usb/serial/omninet.c Sat Dec 1 00:37:05 2001 @@ -87,7 +87,7 @@ MODULE_DEVICE_TABLE (usb, id_table); -struct usb_serial_device_type zyxel_omninet_device = { +static struct usb_serial_device_type zyxel_omninet_device = { name: "ZyXEL - omni.net lcd plus usb", id_table: id_table, needs_interrupt_in: MUST_HAVE, diff -urN linux-2.5.1-pre1/drivers/usb/serial/usbserial.c linux/drivers/usb/serial/usbserial.c --- linux-2.5.1-pre1/drivers/usb/serial/usbserial.c Wed Oct 10 23:42:47 2001 +++ linux/drivers/usb/serial/usbserial.c Sat Dec 1 00:37:05 2001 @@ -397,7 +397,7 @@ static struct usb_serial *serial_table[SERIAL_TTY_MINORS]; /* initially all NULL */ -LIST_HEAD(usb_serial_driver_list); +static LIST_HEAD(usb_serial_driver_list); static struct usb_serial *get_serial_by_minor (int minor) @@ -1433,7 +1433,7 @@ }; -int usb_serial_init(void) +static int __init usb_serial_init(void) { int i; int result; @@ -1473,7 +1473,7 @@ } -void usb_serial_exit(void) +static void __exit usb_serial_exit(void) { #ifdef CONFIG_USB_SERIAL_GENERIC diff -urN linux-2.5.1-pre1/drivers/usb/serial/visor.c linux/drivers/usb/serial/visor.c --- linux-2.5.1-pre1/drivers/usb/serial/visor.c Mon Nov 12 09:53:56 2001 +++ linux/drivers/usb/serial/visor.c Sat Dec 1 00:37:05 2001 @@ -184,7 +184,7 @@ /* All of the device info needed for the Handspring Visor */ -struct usb_serial_device_type handspring_device = { +static struct usb_serial_device_type handspring_device = { name: "Handspring Visor", id_table: visor_id_table, needs_interrupt_in: MUST_HAVE_NOT, /* this device must not have an interrupt in endpoint */ @@ -210,7 +210,7 @@ }; /* device info for the Palm 4.0 devices */ -struct usb_serial_device_type palm_4_0_device = { +static struct usb_serial_device_type palm_4_0_device = { name: "Palm 4.0", id_table: palm_4_0_id_table, needs_interrupt_in: MUST_HAVE_NOT, /* this device must not have an interrupt in endpoint */ diff -urN linux-2.5.1-pre1/drivers/usb/serial/whiteheat.c linux/drivers/usb/serial/whiteheat.c --- linux-2.5.1-pre1/drivers/usb/serial/whiteheat.c Fri Sep 14 14:04:07 2001 +++ linux/drivers/usb/serial/whiteheat.c Sat Dec 1 00:37:05 2001 @@ -131,7 +131,7 @@ static int whiteheat_startup (struct usb_serial *serial); static void whiteheat_shutdown (struct usb_serial *serial); -struct usb_serial_device_type whiteheat_fake_device = { +static struct usb_serial_device_type whiteheat_fake_device = { name: "Connect Tech - WhiteHEAT - (prerenumeration)", id_table: id_table_prerenumeration, needs_interrupt_in: DONT_CARE, /* don't have to have an interrupt in endpoint */ @@ -144,7 +144,7 @@ startup: whiteheat_startup }; -struct usb_serial_device_type whiteheat_device = { +static struct usb_serial_device_type whiteheat_device = { name: "Connect Tech - WhiteHEAT", id_table: id_table_std, needs_interrupt_in: DONT_CARE, /* don't have to have an interrupt in endpoint */ diff -urN linux-2.5.1-pre1/drivers/usb/usb.c linux/drivers/usb/usb.c --- linux-2.5.1-pre1/drivers/usb/usb.c Wed Nov 21 09:59:11 2001 +++ linux/drivers/usb/usb.c Sat Dec 1 00:37:05 2001 @@ -7,7 +7,8 @@ * (C) Copyright Gregory P. Smith 1999 * (C) Copyright Deti Fliegl 1999 (new USB architecture) * (C) Copyright Randy Dunlap 2000 - * (C) Copyright David Brownell 2000 (kernel hotplug, usb_device_id) + * (C) Copyright David Brownell 2000-2001 (kernel hotplug, usb_device_id, + more docs, etc) * (C) Copyright Yggdrasil Computing, Inc. 2000 * (usb_device_id matching changes by Adam J. Richter) * @@ -193,6 +194,22 @@ up (&usb_bus_list_lock); } +/** + * usb_ifnum_to_if - get the interface object with a given interface number + * @dev: the device whose current configuration is considered + * @ifnum: the desired interface + * + * This walks the device descriptor for the currently active configuration + * and returns a pointer to the interface with that particular interface + * number, or null. + * + * Note that configuration descriptors are not required to assign interface + * numbers sequentially, so that it would be incorrect to assume that + * the first interface in that descriptor corresponds to interface zero. + * This routine helps device drivers avoid such mistakes. + * However, you should make sure that you do the right thing with any + * alternate settings available for this interfaces. + */ struct usb_interface *usb_ifnum_to_if(struct usb_device *dev, unsigned ifnum) { int i; @@ -204,6 +221,20 @@ return NULL; } +/** + * usb_epnum_to_ep_desc - get the endpoint object with a given endpoint number + * @dev: the device whose current configuration is considered + * @epnum: the desired endpoint + * + * This walks the device descriptor for the currently active configuration, + * and returns a pointer to the endpoint with that particular endpoint + * number, or null. + * + * Note that interface descriptors are not required to assign endpont + * numbers sequentially, so that it would be incorrect to assume that + * the first endpoint in that descriptor corresponds to interface zero. + * This routine helps device drivers avoid such mistakes. + */ struct usb_endpoint_descriptor *usb_epnum_to_ep_desc(struct usb_device *dev, unsigned epnum) { int i, j, k; @@ -356,7 +387,7 @@ } /** - * usb_alloc_bus - creates a new USB host controller structure + * usb_alloc_bus - creates a new USB host controller structure (usbcore-internal) * @op: pointer to a struct usb_operations that this bus structure should use * * Creates a USB host controller bus structure with the specified @@ -398,7 +429,7 @@ } /** - * usb_free_bus - frees the memory used by a bus structure + * usb_free_bus - frees the memory used by a bus structure (usbcore-internal) * @bus: pointer to the bus to free * * (For use only by USB Host Controller Drivers.) @@ -412,10 +443,12 @@ } /** - * usb_register_bus - registers the USB host controller with the usb core + * usb_register_bus - registers the USB host controller with the usb core (usbcore-internal) * @bus: pointer to the bus to register * * (For use only by USB Host Controller Drivers.) + * + * This call is synchronous, and may not be used in an interrupt context. */ void usb_register_bus(struct usb_bus *bus) { @@ -441,10 +474,12 @@ } /** - * usb_deregister_bus - deregisters the USB host controller + * usb_deregister_bus - deregisters the USB host controller (usbcore-internal) * @bus: pointer to the bus to deregister * * (For use only by USB Host Controller Drivers.) + * + * This call is synchronous, and may not be used in an interrupt context. */ void usb_deregister_bus(struct usb_bus *bus) { @@ -493,27 +528,49 @@ } -/* - * This is intended to be used by usb device drivers that need to - * claim more than one interface on a device at once when probing - * (audio and acm are good examples). No device driver should have - * to mess with the internal usb_interface or usb_device structure - * members. +/** + * usb_driver_claim_interface - bind a driver to an interface + * @driver: the driver to be bound + * @iface: the interface to which it will be bound + * @priv: driver data associated with that interface + * + * This is used by usb device drivers that need to claim more than one + * interface on a device when probing (audio and acm are current examples). + * No device driver should directly modify internal usb_interface or + * usb_device structure members. + * + * Few drivers should need to use this routine, since the most natural + * way to bind to an interface is to return the private data from + * the driver's probe() method. Any driver that does use this must + * first be sure that no other driver has claimed the interface, by + * checking with usb_interface_claimed(). */ void usb_driver_claim_interface(struct usb_driver *driver, struct usb_interface *iface, void* priv) { if (!iface || !driver) return; - dbg("%s driver claimed interface %p", driver->name, iface); + // FIXME change API to report an error in this case + if (iface->driver) + err ("%s driver booted %s off interface %p", + driver->name, iface->driver->name, iface); + else + dbg("%s driver claimed interface %p", driver->name, iface); iface->driver = driver; iface->private_data = priv; } /* usb_driver_claim_interface() */ -/* +/** + * usb_interface_claimed - returns true iff an interface is claimed + * @iface: the interface being checked + * * This should be used by drivers to check other interfaces to see if - * they are available or not. + * they are available or not. If another driver has claimed the interface, + * they may not claim it. Otherwise it's OK to claim it using + * usb_driver_claim_interface(). + * + * Returns true (nonzero) iff the interface is claimed, else false (zero). */ int usb_interface_claimed(struct usb_interface *iface) { @@ -523,8 +580,19 @@ return (iface->driver != NULL); } /* usb_interface_claimed() */ -/* - * This should be used by drivers to release their claimed interfaces +/** + * usb_driver_release_interface - unbind a driver from an interface + * @driver: the driver to be unbound + * @iface: the interface from which it will be unbound + * + * This should be used by drivers to release their claimed interfaces. + * It is normally called in their disconnect() methods, and only for + * drivers that bound to more than one interface in their probe(). + * + * When the USB subsystem disconnect()s a driver from some interface, + * it automatically invokes this method for that interface. That + * means that even drivers that used usb_driver_claim_interface() + * usually won't need to call this. */ void usb_driver_release_interface(struct usb_driver *driver, struct usb_interface *iface) { @@ -923,9 +991,15 @@ } } -/* - * Only HC's should call usb_alloc_dev and usb_free_dev directly - * Anybody may use usb_inc_dev_use or usb_dec_dev_use +/** + * usb_alloc_dev - allocate a usb device structure (usbcore-internal) + * @parent: hub to which device is connected + * @bus: bus used to access the device + * + * Only hub drivers (including virtual root hub drivers for host + * controllers) should ever call this. + * + * This call is synchronous, and may not be used in an interrupt context. */ struct usb_device *usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus) { @@ -952,6 +1026,8 @@ return dev; } +// usbcore-internal ... +// but usb_dec_dev_use() is #defined to this, and that's public!! void usb_free_dev(struct usb_device *dev) { if (atomic_dec_and_test(&dev->refcnt)) { @@ -964,14 +1040,25 @@ } } +/** + * usb_inc_dev_use - record another reference to a device + * @dev: the device being referenced + * + * Each live reference to a device should be refcounted. + * + * Device drivers should normally record such references in their + * open() methods. + * Drivers should then release them, using usb_dec_dev_use(), in their + * close() methods. + */ void usb_inc_dev_use(struct usb_device *dev) { atomic_inc(&dev->refcnt); } -/* ------------------------------------------------------------------------------------- +/* ---------------------------------------------------------------------- * New USB Core Functions - * -------------------------------------------------------------------------------------*/ + * ----------------------------------------------------------------------*/ /** * usb_alloc_urb - creates a new urb for a USB driver to use @@ -1017,6 +1104,58 @@ kfree(urb); } /*-------------------------------------------------------------------*/ + +/** + * usb_submit_urb - asynchronously issue a transfer request for an endpoint + * @urb: pointer to the urb describing the request + * + * This submits a transfer request, and transfers control of the URB + * describing that request to the USB subsystem. Request completion will + * indicated later, asynchronously, by calling the completion handler. + * This call may be issued in interrupt context. + * + * The caller must have correctly initialized the URB before submitting + * it. Macros such as FILL_BULK_URB() and FILL_CONTROL_URB() are + * available to ensure that most fields are correctly initialized, for + * the particular kind of transfer, although they will not initialize + * any transfer flags. + * + * Successful submissions return 0; otherwise this routine returns a + * negative error number. + * + * Unreserved Bandwidth Transfers: + * + * Bulk or control requests complete only once. When the completion + * function is called, control of the URB is returned to the device + * driver which issued the request. The completion handler may then + * immediately free or reuse that URB. + * + * Bulk URBs will be queued if the USB_QUEUE_BULK transfer flag is set + * in the URB. This can be used to maximize bandwidth utilization by + * letting the USB controller start work on the next URB without any + * delay to report completion (scheduling and processing an interrupt) + * and then submit that next request. + * + * For control endpoints, the synchronous usb_control_msg() call is + * often used (in non-interrupt context) instead of this call. + * + * Reserved Bandwidth Transfers: + * + * Periodic URBs (interrupt or isochronous) are completed repeatedly, + * until the original request is aborted. When the completion callback + * indicates the URB has been unlinked (with a special status code), + * control of that URB returns to the device driver. Otherwise, the + * completion handler does not control the URB, and should not change + * any of its fields. + * + * Note that isochronous URBs should be submitted in a "ring" data + * structure (using urb->next) to ensure that they are resubmitted + * appropriately. + * + * If the USB subsystem can't reserve sufficient bandwidth to perform + * the periodic request, and bandwidth reservation is being done for + * this controller, submitting such a periodic request will fail. + */ int usb_submit_urb(urb_t *urb) { if (urb && urb->dev && urb->dev->bus && urb->dev->bus->op) @@ -1026,6 +1165,31 @@ } /*-------------------------------------------------------------------*/ + +/** + * usb_unlink_urb - abort/cancel a transfer request for an endpoint + * @urb: pointer to urb describing a previously submitted request + * + * This routine cancels an in-progress request. The requests's + * completion handler will be called with a status code indicating + * that the request has been canceled, and that control of the URB + * has been returned to that device driver. This is the only way + * to stop an interrupt transfer, so long as the device is connected. + * + * When the USB_ASYNC_UNLINK transfer flag for the URB is clear, this + * request is synchronous. Success is indicated by returning zero, + * at which time the urb will have been unlinked, + * and the completion function will see status -ENOENT. Failure is + * indicated by any other return value. This mode may not be used + * when unlinking an urb from an interrupt context, such as a bottom + * half or a completion handler, + * + * When the USB_ASYNC_UNLINK transfer flag for the URB is set, this + * request is asynchronous. Success is indicated by returning -EINPROGRESS, + * at which time the urb will normally not have been unlinked, + * and the completion function will see status -ECONNRESET. Failure is + * indicated by any other return value. + */ int usb_unlink_urb(urb_t *urb) { if (urb && urb->dev && urb->dev->bus && urb->dev->bus->op) @@ -1050,7 +1214,7 @@ } /*-------------------------------------------------------------------* - * COMPATIBILITY STUFF * + * SYNCHRONOUS CALLS * *-------------------------------------------------------------------*/ // Starts urb and waits for completion or timeout @@ -1145,7 +1309,7 @@ * This function sends a simple control message to a specified endpoint * and waits for the message to complete, or timeout. * - * If successful, it returns 0, othwise a negative error number. + * If successful, it returns 0, otherwise a negative error number. * * Don't use this function from within an interrupt context, like a * bottom half handler. If you need a asyncronous message, or need to send @@ -1188,9 +1352,9 @@ * This function sends a simple bulk message to a specified endpoint * and waits for the message to complete, or timeout. * - * If successful, it returns 0, othwise a negative error number. - * The number of actual bytes transferred will be plaed in the - * actual_timeout paramater. + * If successful, it returns 0, otherwise a negative error number. + * The number of actual bytes transferred will be stored in the + * actual_length paramater. * * Don't use this function from within an interrupt context, like a * bottom half handler. If you need a asyncronous message, or need to @@ -1214,16 +1378,19 @@ return usb_start_wait_urb(urb,timeout,actual_length); } -/* - * usb_get_current_frame_number() +/** + * usb_get_current_frame_number - return current bus frame number + * @dev: the device whose bus is being queried * - * returns the current frame number for the parent USB bus/controller - * of the given USB device. + * Returns the current frame number for the USB host controller + * used with the given USB device. This can be used when scheduling + * isochronous requests. */ -int usb_get_current_frame_number(struct usb_device *usb_dev) +int usb_get_current_frame_number(struct usb_device *dev) { - return usb_dev->bus->op->get_frame_number (usb_dev); + return dev->bus->op->get_frame_number (dev); } + /*-------------------------------------------------------------------*/ static int usb_parse_endpoint(struct usb_endpoint_descriptor *endpoint, unsigned char *buffer, int size) @@ -1556,6 +1723,7 @@ return size; } +// usbcore-internal: enumeration/hub only!! void usb_destroy_configuration(struct usb_device *dev) { int c, i, j, k; @@ -1685,8 +1853,16 @@ return -1; } -/* +/** + * usb_disconnect - disconnect a device (usbcore-internal) + * @pdev: pointer to device being disconnected + * * Something got disconnected. Get rid of it, and all of its children. + * + * Only hub drivers (including virtual root hub drivers for host + * controllers) should ever call this. + * + * This call is synchronous, and may not be used in an interrupt context. */ void usb_disconnect(struct usb_device **pdev) { @@ -1735,11 +1911,17 @@ usb_free_dev(dev); } -/* +/** + * usb_connect - connects a new device during enumeration (usbcore-internal) + * @dev: partially enumerated device + * * Connect a new USB device. This basically just initializes * the USB device information and sets up the topology - it's * up to the low-level driver to reset the port and actually * do the setup (the upper levels don't know how to do that). + * + * Only hub drivers (including virtual root hub drivers for host + * controllers) should ever call this. */ void usb_connect(struct usb_device *dev) { @@ -1747,6 +1929,9 @@ // FIXME needs locking for SMP!! /* why? this is called only from the hub thread, * which hopefully doesn't run on multiple CPU's simultaneously 8-) + * ... it's also called from modprobe/rmmod/apmd threads as part + * of virtual root hub init/reinit. In the init case, the hub code + * won't have seen this, but not so for reinit ... */ dev->descriptor.bMaxPacketSize0 = 8; /* Start off at 8 bytes */ #ifndef DEVNUM_ROUND_ROBIN @@ -1777,12 +1962,35 @@ #endif #define SET_TIMEOUT 3 +// hub driver only!!! for enumeration int usb_set_address(struct usb_device *dev) { return usb_control_msg(dev, usb_snddefctrl(dev), USB_REQ_SET_ADDRESS, 0, dev->devnum, 0, NULL, 0, HZ * GET_TIMEOUT); } +/** + * usb_get_descriptor - issues a generic GET_DESCRIPTOR request + * @dev: the device whose descriptor is being retrieved + * @type: the descriptor type (USB_DT_*) + * @index: the number of the descriptor + * @buf: where to put the descriptor + * @size: how big is "buf"? + * + * Gets a USB descriptor. Convenience functions exist to simplify + * getting some types of descriptors. Use + * usb_get_device_descriptor() for USB_DT_DEVICE, + * and usb_get_string() or usb_string() for USB_DT_STRING. + * Configuration descriptors (USB_DT_CONFIG) are part of the device + * structure, at least for the current configuration. + * In addition to a number of USB-standard descriptors, some + * devices also use vendor-specific descriptors. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. + */ int usb_get_descriptor(struct usb_device *dev, unsigned char type, unsigned char index, void *buf, int size) { int i = 5; @@ -1800,6 +2008,7 @@ return result; } +// FIXME Doesn't use USB_DT_CLASS ... but hid-core.c expects it this way int usb_get_class_descriptor(struct usb_device *dev, int ifnum, unsigned char type, unsigned char id, void *buf, int size) { @@ -1808,6 +2017,27 @@ (type << 8) + id, ifnum, buf, size, HZ * GET_TIMEOUT); } +/** + * usb_get_string - gets a string descriptor + * @dev: the device whose string descriptor is being retrieved + * @langid: code for language chosen (from string descriptor zero) + * @index: the number of the descriptor + * @buf: where to put the string + * @size: how big is "buf"? + * + * Retrieves a string, encoded using UTF-16LE (Unicode, 16 bits per character, + * in little-endian byte order). + * The usb_string() function will often be a convenient way to turn + * these strings into kernel-printable form. + * + * Strings may be referenced in device, configuration, interface, or other + * descriptors, and could also be used in vendor-specific ways. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. + */ int usb_get_string(struct usb_device *dev, unsigned short langid, unsigned char index, void *buf, int size) { return usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), @@ -1815,6 +2045,24 @@ (USB_DT_STRING << 8) + index, langid, buf, size, HZ * GET_TIMEOUT); } +/** + * usb_get_device_descriptor - (re)reads the device descriptor + * @dev: the device whose device descriptor is being updated + * + * Updates the copy of the device descriptor stored in the device structure, + * which dedicates space for this purpose. Note that several fields are + * converted to the host CPU's byte order: the USB version (bcdUSB), and + * vendors product and version fields (idVendor, idProduct, and bcdDevice). + * That lets device drivers compare against non-byteswapped constants. + * + * There's normally no need to use this call, although some devices + * will change their descriptors after events like updating firmware. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. + */ int usb_get_device_descriptor(struct usb_device *dev) { int ret = usb_get_descriptor(dev, USB_DT_DEVICE, 0, &dev->descriptor, @@ -1828,12 +2076,34 @@ return ret; } +/** + * usb_get_status - issues a GET_STATUS call + * @dev: the device whose status is being checked + * @type: USB_RECIP_*; for device, interface, or endpoint + * @target: zero (for device), else interface or endpoint number + * @data: pointer to two bytes of bitmap data + * + * Returns device, interface, or endpoint status. Normally only of + * interest to see if the device is self powered, or has enabled the + * remote wakeup facility; or whether a bulk or interrupt endpoint + * is halted ("stalled"). + * + * Bits in these status bitmaps are set using the SET_FEATURE request, + * and cleared using the CLEAR_FEATURE request. The usb_clear_halt() + * function should be used to clear halt ("stall") status. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. + */ int usb_get_status(struct usb_device *dev, int type, int target, void *data) { return usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), USB_REQ_GET_STATUS, USB_DIR_IN | type, 0, target, data, 2, HZ * GET_TIMEOUT); } +// FIXME hid-specific !! DOES NOT BELONG HERE int usb_get_protocol(struct usb_device *dev, int ifnum) { unsigned char type; @@ -1847,6 +2117,7 @@ return type; } +// FIXME hid-specific !! DOES NOT BELONG HERE int usb_set_protocol(struct usb_device *dev, int ifnum, int protocol) { return usb_control_msg(dev, usb_sndctrlpipe(dev, 0), @@ -1854,6 +2125,7 @@ protocol, ifnum, NULL, 0, HZ * SET_TIMEOUT); } +// FIXME hid-specific !! DOES NOT BELONG HERE int usb_set_idle(struct usb_device *dev, int ifnum, int duration, int report_id) { return usb_control_msg(dev, usb_sndctrlpipe(dev, 0), @@ -1861,6 +2133,7 @@ (duration << 8) | report_id, ifnum, NULL, 0, HZ * SET_TIMEOUT); } +// hub-only!! void usb_set_maxpacket(struct usb_device *dev) { int i, b; @@ -1890,9 +2163,26 @@ } } -/* - * endp: endpoint number in bits 0-3; - * direction flag in bit 7 (1 = IN, 0 = OUT) +/** + * usb_clear_halt - tells device to clear endpoint halt/stall condition + * @dev: device whose endpoint is halted + * @pipe: endpoint "pipe" being cleared + * + * This is used to clear halt conditions for bulk and interrupt endpoints, + * as reported by URB completion status. Endpoints that are halted are + * sometimes referred to as being "stalled". Such endpoints are unable + * to transmit or receive data until the halt status is cleared. Any URBs + * queued queued for such an endpoint should normally be unlinked before + * clearing the halt condition. + * + * Note that control and isochronous endpoints don't halt, although control + * endpoints report "protocol stall" (for unsupported requests) using the + * same status code used to report a true stall. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. */ int usb_clear_halt(struct usb_device *dev, int pipe) { @@ -1941,6 +2231,33 @@ return 0; } +/** + * usb_set_interface - Makes a particular alternate setting be current + * @dev: the device whose interface is being updated + * @interface: the interface being updated + * @alternate: the setting being chosen. + * + * This is used to enable data transfers on interfaces that may not + * be enabled by default. Not all devices support such configurability. + * + * Within any given configuration, each interface may have several + * alternative settings. These are often used to control levels of + * bandwidth consumption. For example, the default setting for a high + * speed interrupt endpoint may not send more than about 4KBytes per + * microframe, and isochronous endpoints may never be part of a an + * interface's default setting. To access such bandwidth, alternate + * interface setting must be made current. + * + * Note that in the Linux USB subsystem, bandwidth associated with + * an endpoint in a given alternate setting is not reserved until an + * is submitted that needs that bandwidth. Some other operating systems + * allocate bandwidth early, when a configuration is chosen. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. + */ int usb_set_interface(struct usb_device *dev, int interface, int alternate) { struct usb_interface *iface; @@ -1964,6 +2281,35 @@ return 0; } +/** + * usb_set_configuration - Makes a particular device setting be current + * @dev: the device whose configuration is being updated + * @configuration: the configuration being chosen. + * + * This is used to enable non-default device modes. Not all devices + * support this kind of configurability. By default, configuration + * zero is selected after enumeration; many devices only have a single + * configuration. + * + * USB devices may support one or more configurations, which affect + * power consumption and the functionality available. For example, + * the default configuration is limited to using 100mA of bus power, + * so that when certain device functionality requires more power, + * and the device is bus powered, that functionality will be in some + * non-default device configuration. Other device modes may also be + * reflected as configuration options, such as whether two ISDN + * channels are presented as independent 64Kb/s interfaces or as one + * bonded 128Kb/s interface. + * + * Note that USB has an additional level of device configurability, + * associated with interfaces. That configurability is accessed using + * usb_set_interface(). + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns zero on success, or else the status code returned by the + * underlying usb_control_msg() call. + */ int usb_set_configuration(struct usb_device *dev, int configuration) { int i, ret; @@ -1992,6 +2338,7 @@ return 0; } +// FIXME hid-specific !! DOES NOT BELONG HERE int usb_get_report(struct usb_device *dev, int ifnum, unsigned char type, unsigned char id, void *buf, int size) { return usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), @@ -1999,6 +2346,7 @@ (type << 8) + id, ifnum, buf, size, HZ * GET_TIMEOUT); } +// FIXME hid-specific !! DOES NOT BELONG HERE int usb_set_report(struct usb_device *dev, int ifnum, unsigned char type, unsigned char id, void *buf, int size) { return usb_control_msg(dev, usb_sndctrlpipe(dev, 0), @@ -2006,6 +2354,7 @@ (type << 8) + id, ifnum, buf, size, HZ); } +// hub driver only !! int usb_get_configuration(struct usb_device *dev) { int result; @@ -2106,9 +2455,28 @@ return result; } -/* - * usb_string: - * returns string length (> 0) or error (< 0) +/** + * usb_string - returns ISO 8859-1 version of a string descriptor + * @dev: the device whose string descriptor is being retrieved + * @index: the number of the descriptor + * @buf: where to put the string + * @size: how big is "buf"? + * + * This converts the UTF-16LE encoded strings returned by devices, from + * usb_get_string_descriptor(), to null-terminated ISO-8859-1 encoded ones + * that are more usable in most kernel contexts. Note that all characters + * in the chosen descriptor that can't be encoded using ISO-8859-1 + * are converted to the question mark ("?") character, and this function + * chooses strings in the first language supported by the device. + * + * The ASCII (or, redundantly, "US-ASCII") character set is the seven-bit + * subset of ISO 8859-1. ISO-8859-1 is the eight-bit subset of Unicode, + * and is appropriate for use many uses of English and several other + * Western European languages. (But it doesn't include the "Euro" symbol.) + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Returns length of the string (>= 0) or usb_control_msg status (< 0). */ int usb_string(struct usb_device *dev, int index, char *buf, size_t size) { @@ -2155,7 +2523,7 @@ if (idx >= size) break; if (tbuf[u+1]) /* high byte */ - buf[idx++] = '?'; /* non-ASCII character */ + buf[idx++] = '?'; /* non ISO-8859-1 character */ else buf[idx++] = tbuf[u]; } @@ -2173,6 +2541,11 @@ * get the ball rolling.. * * Returns 0 for success, != 0 for error. + * + * This call is synchronous, and may not be used in an interrupt context. + * + * Only hub drivers (including virtual root hub drivers for host + * controllers) should ever call this. */ int usb_new_device(struct usb_device *dev) { diff -urN linux-2.5.1-pre1/fs/Makefile linux/fs/Makefile --- linux-2.5.1-pre1/fs/Makefile Mon Nov 12 09:34:16 2001 +++ linux/fs/Makefile Sat Dec 1 00:37:05 2001 @@ -7,12 +7,12 @@ O_TARGET := fs.o -export-objs := filesystems.o open.o dcache.o buffer.o +export-objs := filesystems.o open.o dcache.o buffer.o bio.o mod-subdirs := nls obj-y := open.o read_write.o devices.o file_table.o buffer.o \ - super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \ - fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ + bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \ + namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \ filesystems.o namespace.o seq_file.o diff -urN linux-2.5.1-pre1/fs/bio.c linux/fs/bio.c --- linux-2.5.1-pre1/fs/bio.c Wed Dec 31 16:00:00 1969 +++ linux/fs/bio.c Sat Dec 1 00:37:05 2001 @@ -0,0 +1,999 @@ +/* + * Copyright (C) 2001 Jens Axboe + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +kmem_cache_t *bio_cachep; +static spinlock_t __cacheline_aligned bio_lock = SPIN_LOCK_UNLOCKED; +static struct bio *bio_pool; +static DECLARE_WAIT_QUEUE_HEAD(bio_pool_wait); +static DECLARE_WAIT_QUEUE_HEAD(biovec_pool_wait); + +struct bio_hash_bucket *bio_hash_table; +unsigned int bio_hash_bits, bio_hash_mask; + +static unsigned int bio_pool_free; + +#define BIOVEC_NR_POOLS 6 + +struct biovec_pool { + int bp_size; + kmem_cache_t *bp_cachep; + wait_queue_head_t bp_wait; +}; + +static struct biovec_pool bvec_list[BIOVEC_NR_POOLS]; + +/* + * if you change this list, also change bvec_alloc or things will + * break badly! + */ +static const int bvec_pool_sizes[BIOVEC_NR_POOLS] = { 1, 4, 16, 64, 128, 256 }; + +#define BIO_MAX_PAGES (bvec_pool_sizes[BIOVEC_NR_POOLS - 1]) + +#ifdef BIO_HASH_PROFILING +static struct bio_hash_stats bio_stats; +#endif + +/* + * optimized for 2^BIO_HASH_SCALE kB block size + */ +#define BIO_HASH_SCALE 3 +#define BIO_HASH_BLOCK(sector) ((sector) >> BIO_HASH_SCALE) + +/* + * pending further testing, grabbed from fs/buffer.c hash so far... + */ +#define __bio_hash(dev,block) \ + (((((dev)<<(bio_hash_bits - 6)) ^ ((dev)<<(bio_hash_bits - 9))) ^ \ + (((block)<<(bio_hash_bits - 6)) ^ ((block) >> 13) ^ \ + ((block) << (bio_hash_bits - 12)))) & bio_hash_mask) + +#define bio_hash(dev, sector) &((bio_hash_table + __bio_hash(dev, BIO_HASH_BLOCK((sector))))->hash) + +#define bio_hash_bucket(dev, sector) (bio_hash_table + __bio_hash(dev, BIO_HASH_BLOCK((sector)))) + +#define __BIO_HASH_RWLOCK(dev, sector) \ + &((bio_hash_table + __bio_hash((dev), BIO_HASH_BLOCK((sector))))->lock) +#define BIO_HASH_RWLOCK(bio) \ + __BIO_HASH_RWLOCK((bio)->bi_dev, (bio)->bi_sector) + +/* + * TODO: change this to use slab reservation scheme once that infrastructure + * is in place... + */ +#define BIO_POOL_SIZE (256) + +void __init bio_hash_init(unsigned long mempages) +{ + unsigned long htable_size, order; + int i; + + /* + * need to experiment on size of hash + */ + mempages >>= 2; + + htable_size = mempages * sizeof(struct bio_hash_bucket *); + for (order = 0; (PAGE_SIZE << order) < htable_size; order++) + ; + + do { + unsigned long tmp = (PAGE_SIZE << order) / sizeof(struct bio_hash_bucket); + + bio_hash_bits = 0; + while ((tmp >>= 1UL) != 0UL) + bio_hash_bits++; + + bio_hash_table = (struct bio_hash_bucket *) __get_free_pages(GFP_ATOMIC, order); + } while (bio_hash_table == NULL && --order > 0); + + if (!bio_hash_table) + panic("Failed to allocate page hash table\n"); + + printk("Bio-cache hash table entries: %ld (order: %ld, %ld bytes)\n", + BIO_HASH_SIZE, order, (PAGE_SIZE << order)); + + for (i = 0; i < BIO_HASH_SIZE; i++) { + struct bio_hash_bucket *hb = &bio_hash_table[i]; + + rwlock_init(&hb->lock); + hb->hash = NULL; + } + + bio_hash_mask = BIO_HASH_SIZE - 1; +} + +inline void __bio_hash_remove(struct bio *bio) +{ + bio_hash_t *entry = &bio->bi_hash; + bio_hash_t **pprev = entry->pprev_hash; + + if (pprev) { + bio_hash_t *nxt = entry->next_hash; + + if (nxt) + nxt->pprev_hash = pprev; + + *pprev = nxt; +#if 1 + entry->next_hash = NULL; +#endif + entry->pprev_hash = NULL; + entry->valid_counter = 0; + bio->bi_hash_desc = NULL; +#ifdef BIO_HASH_PROFILING + atomic_dec(&bio_stats.nr_entries); +#endif + } +} + +inline void bio_hash_remove(struct bio *bio) +{ + rwlock_t *hash_lock = BIO_HASH_RWLOCK(bio); + unsigned long flags; + + write_lock_irqsave(hash_lock, flags); + __bio_hash_remove(bio); + write_unlock_irqrestore(hash_lock, flags); +} + +inline void __bio_hash_add(struct bio *bio, bio_hash_t **hash, + void *hash_desc, unsigned int vc) +{ + bio_hash_t *entry = &bio->bi_hash; + bio_hash_t *nxt = *hash; + + BUG_ON(entry->pprev_hash); + + *hash = entry; + entry->next_hash = nxt; + entry->pprev_hash = hash; + entry->valid_counter = vc; + + if (nxt) + nxt->pprev_hash = &entry->next_hash; + + bio->bi_hash_desc = hash_desc; + +#ifdef BIO_HASH_PROFILING + atomic_inc(&bio_stats.nr_inserts); + atomic_inc(&bio_stats.nr_entries); + { + int entries = atomic_read(&bio_stats.nr_entries); + if (entries > atomic_read(&bio_stats.max_entries)) + atomic_set(&bio_stats.max_entries, entries); + } +#endif +} + +inline void bio_hash_add(struct bio *bio, void *hash_desc, unsigned int vc) +{ + struct bio_hash_bucket *hb =bio_hash_bucket(bio->bi_dev,bio->bi_sector); + unsigned long flags; + + write_lock_irqsave(&hb->lock, flags); + __bio_hash_add(bio, &hb->hash, hash_desc, vc); + write_unlock_irqrestore(&hb->lock, flags); +} + +inline struct bio *__bio_hash_find(kdev_t dev, sector_t sector, + bio_hash_t **hash, unsigned int vc) +{ + bio_hash_t *next = *hash, *entry; + struct bio *bio; + int nr = 0; + +#ifdef BIO_HASH_PROFILING + atomic_inc(&bio_stats.nr_lookups); +#endif + while ((entry = next)) { + next = entry->next_hash; + prefetch(next); + bio = bio_hash_entry(entry); + + if (entry->valid_counter == vc) { + if (bio->bi_sector == sector && bio->bi_dev == dev) { +#ifdef BIO_HASH_PROFILING + if (nr > atomic_read(&bio_stats.max_bucket_size)) + atomic_set(&bio_stats.max_bucket_size, nr); + if (nr <= MAX_PROFILE_BUCKETS) + atomic_inc(&bio_stats.bucket_size[nr]); + atomic_inc(&bio_stats.nr_hits); +#endif + bio_get(bio); + return bio; + } + } + nr++; + } + + return NULL; +} + +inline struct bio *bio_hash_find(kdev_t dev, sector_t sector, unsigned int vc) +{ + struct bio_hash_bucket *hb = bio_hash_bucket(dev, sector); + unsigned long flags; + struct bio *bio; + + read_lock_irqsave(&hb->lock, flags); + bio = __bio_hash_find(dev, sector, &hb->hash, vc); + read_unlock_irqrestore(&hb->lock, flags); + + return bio; +} + +inline int __bio_hash_add_unique(struct bio *bio, bio_hash_t **hash, + void *hash_desc, unsigned int vc) +{ + struct bio *alias = __bio_hash_find(bio->bi_dev, bio->bi_sector, hash, vc); + + if (!alias) { + __bio_hash_add(bio, hash, hash_desc, vc); + return 0; + } + + /* + * release reference to alias + */ + bio_put(alias); + return 1; +} + +inline int bio_hash_add_unique(struct bio *bio, void *hash_desc, unsigned int vc) +{ + struct bio_hash_bucket *hb =bio_hash_bucket(bio->bi_dev,bio->bi_sector); + unsigned long flags; + int ret = 1; + + if (!bio->bi_hash.pprev_hash) { + write_lock_irqsave(&hb->lock, flags); + ret = __bio_hash_add_unique(bio, &hb->hash, hash_desc, vc); + write_unlock_irqrestore(&hb->lock, flags); + } + + return ret; +} + +/* + * increment validity counter on barrier inserts. if it wraps, we must + * prune all existing entries for this device to be completely safe + * + * q->queue_lock must be held by caller + */ +void bio_hash_invalidate(request_queue_t *q, kdev_t dev) +{ + bio_hash_t *hash; + struct bio *bio; + int i; + + if (++q->hash_valid_counter) + return; + + /* + * it wrapped... + */ + for (i = 0; i < (1 << bio_hash_bits); i++) { + struct bio_hash_bucket *hb = &bio_hash_table[i]; + unsigned long flags; + + write_lock_irqsave(&hb->lock, flags); + while ((hash = hb->hash) != NULL) { + bio = bio_hash_entry(hash); + if (bio->bi_dev != dev) + __bio_hash_remove(bio); + } + write_unlock_irqrestore(&hb->lock, flags); + } + + /* + * entries pruned, reset validity counter + */ + q->hash_valid_counter = 1; +} + + +/* + * if need be, add bio_pool_get_irq() to match... + */ +static inline struct bio *__bio_pool_get(void) +{ + struct bio *bio; + + if ((bio = bio_pool)) { + BUG_ON(bio_pool_free <= 0); + bio_pool = bio->bi_next; + bio->bi_next = NULL; + bio_pool_free--; + } + + return bio; +} + +static inline struct bio *bio_pool_get(void) +{ + unsigned long flags; + struct bio *bio; + + spin_lock_irqsave(&bio_lock, flags); + bio = __bio_pool_get(); + BUG_ON(!bio && bio_pool_free); + spin_unlock_irqrestore(&bio_lock, flags); + + return bio; +} + +static inline void bio_pool_put(struct bio *bio) +{ + unsigned long flags; + int wake_pool = 0; + + spin_lock_irqsave(&bio_lock, flags); + + /* + * if the pool has enough free entries, just slab free the bio + */ + if (bio_pool_free < BIO_POOL_SIZE) { + bio->bi_next = bio_pool; + bio_pool = bio; + bio_pool_free++; + wake_pool = waitqueue_active(&bio_pool_wait); + spin_unlock_irqrestore(&bio_lock, flags); + + if (wake_pool) + wake_up_nr(&bio_pool_wait, 1); + } else { + spin_unlock_irqrestore(&bio_lock, flags); + kmem_cache_free(bio_cachep, bio); + } +} + +#define BIO_CAN_WAIT(gfp_mask) \ + (((gfp_mask) & (__GFP_WAIT | __GFP_IO)) == (__GFP_WAIT | __GFP_IO)) + +static inline struct bio_vec_list *bvec_alloc(int gfp_mask, int nr) +{ + struct bio_vec_list *bvl = NULL; + struct biovec_pool *bp; + int idx; + + /* + * see comment near bvec_pool_sizes define! + */ + switch (nr) { + case 1: + idx = 0; + break; + case 2 ... 4: + idx = 1; + break; + case 5 ... 16: + idx = 2; + break; + case 17 ... 64: + idx = 3; + break; + case 65 ... 128: + idx = 4; + break; + case 129 ... 256: + idx = 5; + break; + default: + return NULL; + } + bp = &bvec_list[idx]; + + /* + * ok, so idx now points to the slab we want to allocate from + */ + if ((bvl = kmem_cache_alloc(bp->bp_cachep, gfp_mask))) + goto out_gotit; + + /* + * we need slab reservations for this to be completely + * deadlock free... + */ + if (BIO_CAN_WAIT(gfp_mask)) { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue_exclusive(&bp->bp_wait, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + bvl = kmem_cache_alloc(bp->bp_cachep, gfp_mask); + if (bvl) + goto out_gotit; + + run_task_queue(&tq_disk); + schedule(); + } + remove_wait_queue(&bp->bp_wait, &wait); + __set_current_state(TASK_RUNNING); + } + + /* + * we use bvl_max as index into bvec_pool_sizes, non-slab originated + * bvecs may use it for something else if they use their own + * destructor + */ + if (bvl) { +out_gotit: + memset(bvl, 0, bp->bp_size); + bvl->bvl_max = idx; + } + + return bvl; +} + +/* + * default destructor for a bio allocated with bio_alloc() + */ +void bio_destructor(struct bio *bio) +{ + struct biovec_pool *bp = &bvec_list[bio->bi_io_vec->bvl_max]; + + BUG_ON(bio->bi_io_vec->bvl_max >= BIOVEC_NR_POOLS); + + /* + * cloned bio doesn't own the veclist + */ + if (!(bio->bi_flags & (1 << BIO_CLONED))) + kmem_cache_free(bp->bp_cachep, bio->bi_io_vec); + + bio_pool_put(bio); +} + +static inline struct bio *__bio_alloc(int gfp_mask, bio_destructor_t *dest) +{ + struct bio *bio; + + /* + * first try our reserved pool + */ + if ((bio = bio_pool_get())) + goto gotit; + + /* + * no such luck, try slab alloc + */ + if ((bio = kmem_cache_alloc(bio_cachep, gfp_mask))) + goto gotit; + + /* + * hrmpf, not much luck. if we are allowed to wait, wait on + * bio_pool to be replenished + */ + if (BIO_CAN_WAIT(gfp_mask)) { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue_exclusive(&bio_pool_wait, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if ((bio = bio_pool_get())) + break; + + run_task_queue(&tq_disk); + schedule(); + } + remove_wait_queue(&bio_pool_wait, &wait); + __set_current_state(TASK_RUNNING); + } + + if (bio) { +gotit: + bio->bi_next = NULL; + bio->bi_hash.pprev_hash = NULL; + atomic_set(&bio->bi_cnt, 1); + bio->bi_io_vec = NULL; + bio->bi_flags = 0; + bio->bi_rw = 0; + bio->bi_end_io = NULL; + bio->bi_hash_desc = NULL; + bio->bi_destructor = dest; + } + + return bio; +} + +/** + * bio_alloc - allocate a bio for I/O + * @gfp_mask: the GFP_ mask given to the slab allocator + * @nr_iovecs: number of iovecs to pre-allocate + * + * Description: + * bio_alloc will first try it's on internal pool to satisfy the allocation + * and if that fails fall back to the bio slab cache. In the latter case, + * the @gfp_mask specifies the priority of the allocation. In particular, + * if %__GFP_WAIT is set then we will block on the internal pool waiting + * for a &struct bio to become free. + **/ +struct bio *bio_alloc(int gfp_mask, int nr_iovecs) +{ + struct bio *bio = __bio_alloc(gfp_mask, bio_destructor); + struct bio_vec_list *bvl = NULL; + + if (unlikely(!bio)) + return NULL; + + if (!nr_iovecs || (bvl = bvec_alloc(gfp_mask, nr_iovecs))) { + bio->bi_io_vec = bvl; + return bio; + } + + bio_pool_put(bio); + return NULL; +} + +/* + * queue lock assumed held! + */ +static inline void bio_free(struct bio *bio) +{ + BUG_ON(bio_is_hashed(bio)); + + bio->bi_destructor(bio); +} + +/** + * bio_put - release a reference to a bio + * @bio: bio to release reference to + * + * Description: + * Put a reference to a &struct bio, either one you have gotten with + * bio_alloc or bio_get. The last put of a bio will free it. + **/ +void bio_put(struct bio *bio) +{ + BUG_ON(!atomic_read(&bio->bi_cnt)); + + /* + * last put frees it + */ + if (atomic_dec_and_test(&bio->bi_cnt)) { + BUG_ON(bio->bi_next); + + bio_free(bio); + } +} + +/** + * bio_clone - duplicate a bio + * @bio: bio to clone + * @gfp_mask: allocation priority + * + * Duplicate a &bio. Caller will own the returned bio, but not + * the actual data it points to. Reference count of returned + * bio will be one. + */ +struct bio *bio_clone(struct bio *bio, int gfp_mask) +{ + struct bio *b = bio_alloc(gfp_mask, 0); + + if (b) { + b->bi_io_vec = bio->bi_io_vec; + + b->bi_sector = bio->bi_sector; + b->bi_dev = bio->bi_dev; + b->bi_flags |= 1 << BIO_CLONED; + b->bi_rw = bio->bi_rw; + } + + return b; +} + +/** + * bio_copy - create copy of a bio + * @bio: bio to copy + * @gfp_mask: allocation priority + * + * Create a copy of a &bio. Caller will own the returned bio and + * the actual data it points to. Reference count of returned + * bio will be one. + */ +struct bio *bio_copy(struct bio *bio, int gfp_mask) +{ + struct bio *b = bio_alloc(gfp_mask, bio->bi_io_vec->bvl_cnt); + unsigned long flags = 0; /* gcc silly */ + int i; + + if (b) { + struct bio_vec *bv; + + /* + * iterate iovec list and alloc pages + copy data + */ + bio_for_each_segment(bv, bio, i) { + struct bio_vec *bbv = &b->bi_io_vec->bvl_vec[i]; + char *vfrom, *vto; + + bbv->bv_page = alloc_page(gfp_mask); + if (bbv->bv_page == NULL) + goto oom; + + if (gfp_mask & __GFP_WAIT) { + vfrom = kmap(bv->bv_page); + vto = kmap(bv->bv_page); + } else { + __save_flags(flags); + __cli(); + vfrom = kmap_atomic(bv->bv_page, KM_BIO_IRQ); + vto = kmap_atomic(bv->bv_page, KM_BIO_IRQ); + } + + memcpy(vto + bv->bv_offset, vfrom + bv->bv_offset, bv->bv_len); + if (gfp_mask & __GFP_WAIT) { + kunmap(vto); + kunmap(vfrom); + } else { + kunmap_atomic(vto, KM_BIO_IRQ); + kunmap_atomic(vfrom, KM_BIO_IRQ); + __restore_flags(flags); + } + + bbv->bv_len = bv->bv_len; + bbv->bv_offset = bv->bv_offset; + } + + b->bi_sector = bio->bi_sector; + b->bi_dev = bio->bi_dev; + b->bi_rw = bio->bi_rw; + + b->bi_io_vec->bvl_cnt = bio->bi_io_vec->bvl_cnt; + b->bi_io_vec->bvl_size = bio->bi_io_vec->bvl_size; + } + + return b; + +oom: + while (i >= 0) { + __free_page(b->bi_io_vec->bvl_vec[i].bv_page); + i--; + } + + bio_pool_put(b); + return NULL; +} + +#ifdef BIO_PAGEIO +static int bio_end_io_page(struct bio *bio) +{ + struct page *page = bio_page(bio); + + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + SetPageError(page); + if (!PageError(page)) + SetPageUptodate(page); + + /* + * Run the hooks that have to be done when a page I/O has completed. + */ + if (PageTestandClearDecrAfter(page)) + atomic_dec(&nr_async_pages); + + UnlockPage(page); + bio_put(bio); + return 1; +} +#endif + +static int bio_end_io_kio(struct bio *bio, int nr_sectors) +{ + struct kiobuf *kio = (struct kiobuf *) bio->bi_private; + struct bio_vec_list *bv = bio->bi_io_vec; + int uptodate, done; + + BUG_ON(!bv); + + done = 0; + uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + do { + int sectors = bv->bvl_vec[bv->bvl_idx].bv_len >> 9; + + nr_sectors -= sectors; + + bv->bvl_idx++; + + done = !end_kio_request(kio, uptodate); + + if (bv->bvl_idx == bv->bvl_cnt) + done = 1; + + } while (!done && nr_sectors > 0); + + /* + * all done + */ + if (done) { + bio_hash_remove(bio); + bio_put(bio); + return 0; + } + + return 1; +} + +/* + * obviously doesn't work for stacking drivers, but ll_rw_blk will split + * bio for those + */ +int get_max_segments(kdev_t dev) +{ + int segments = MAX_SEGMENTS; + request_queue_t *q; + + if ((q = blk_get_queue(dev))) + segments = q->max_segments; + + return segments; +} + +int get_max_sectors(kdev_t dev) +{ + int sectors = MAX_SECTORS; + request_queue_t *q; + + if ((q = blk_get_queue(dev))) + sectors = q->max_sectors; + + return sectors; +} + +/** + * ll_rw_kio - submit a &struct kiobuf for I/O + * @rw: %READ or %WRITE + * @kio: the kiobuf to do I/O on + * @dev: target device + * @sector: start location on disk + * + * Description: + * ll_rw_kio will map the page list inside the &struct kiobuf to + * &struct bio and queue them for I/O. The kiobuf given must describe + * a continous range of data, and must be fully prepared for I/O. + **/ +void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, sector_t sector) +{ + int i, offset, size, err, map_i, total_nr_pages, nr_pages; + int max_bytes, max_segments; + struct bio_vec *bvec; + struct bio *bio; + + err = 0; + if ((rw & WRITE) && is_read_only(dev)) { + printk("ll_rw_bio: WRITE to ro device %s\n", kdevname(dev)); + err = -EPERM; + goto out; + } + + if (!kio->nr_pages) { + err = -EINVAL; + goto out; + } + + /* + * rudimentary max sectors/segments checks and setup. once we are + * sure that drivers can handle requests that cannot be completed in + * one go this will die + */ + max_bytes = get_max_sectors(dev) << 9; + max_segments = get_max_segments(dev); + if ((max_bytes >> PAGE_SHIFT) < (max_segments + 1)) + max_segments = (max_bytes >> PAGE_SHIFT) + 1; + + if (max_segments > BIO_MAX_PAGES) + max_segments = BIO_MAX_PAGES; + + /* + * maybe kio is bigger than the max we can easily map into a bio. + * if so, split it up in appropriately sized chunks. + */ + total_nr_pages = kio->nr_pages; + offset = kio->offset & ~PAGE_MASK; + size = kio->length; + + /* + * set I/O count to number of pages for now + */ + atomic_set(&kio->io_count, total_nr_pages); + + map_i = 0; + +next_chunk: + if ((nr_pages = total_nr_pages) > max_segments) + nr_pages = max_segments; + + /* + * allocate bio and do initial setup + */ + if ((bio = bio_alloc(GFP_NOIO, nr_pages)) == NULL) { + err = -ENOMEM; + goto out; + } + + bio->bi_sector = sector; + bio->bi_dev = dev; + bio->bi_io_vec->bvl_idx = 0; + bio->bi_flags |= 1 << BIO_PREBUILT; + bio->bi_end_io = bio_end_io_kio; + bio->bi_private = kio; + + bvec = &bio->bi_io_vec->bvl_vec[0]; + for (i = 0; i < nr_pages; i++, bvec++, map_i++) { + int nbytes = PAGE_SIZE - offset; + + if (nbytes > size) + nbytes = size; + + BUG_ON(kio->maplist[map_i] == NULL); + + if (bio->bi_io_vec->bvl_size + nbytes > max_bytes) + goto queue_io; + + bio->bi_io_vec->bvl_cnt++; + bio->bi_io_vec->bvl_size += nbytes; + + bvec->bv_page = kio->maplist[map_i]; + bvec->bv_len = nbytes; + bvec->bv_offset = offset; + + /* + * kiobuf only has an offset into the first page + */ + offset = 0; + + sector += nbytes >> 9; + size -= nbytes; + total_nr_pages--; + } + +queue_io: + submit_bio(rw, bio); + + if (total_nr_pages) + goto next_chunk; + + if (size) { + printk("ll_rw_kio: size %d left (kio %d)\n", size, kio->length); + BUG(); + } + +out: + if (err) + kio->errno = err; +} + +int bio_endio(struct bio *bio, int uptodate, int nr_sectors) +{ + if (uptodate) + set_bit(BIO_UPTODATE, &bio->bi_flags); + else + clear_bit(BIO_UPTODATE, &bio->bi_flags); + + return bio->bi_end_io(bio, nr_sectors); +} + +static int __init bio_init_pool(void) +{ + struct bio *bio; + int i; + + for (i = 0; i < BIO_POOL_SIZE; i++) { + bio = kmem_cache_alloc(bio_cachep, GFP_ATOMIC); + if (!bio) + panic("bio: cannot init bio pool\n"); + + bio_pool_put(bio); + } + + return i; +} + +static void __init biovec_init_pool(void) +{ + char name[16]; + int i, size; + + memset(&bvec_list, 0, sizeof(bvec_list)); + + for (i = 0; i < BIOVEC_NR_POOLS; i++) { + struct biovec_pool *bp = &bvec_list[i]; + + size = bvec_pool_sizes[i] * sizeof(struct bio_vec); + size += sizeof(struct bio_vec_list); + + printk("biovec: init pool %d, %d entries, %d bytes\n", i, + bvec_pool_sizes[i], size); + + snprintf(name, sizeof(name) - 1,"biovec-%d",bvec_pool_sizes[i]); + bp->bp_cachep = kmem_cache_create(name, size, 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + + if (!bp->bp_cachep) + panic("biovec: can't init slab pools\n"); + + bp->bp_size = size; + init_waitqueue_head(&bp->bp_wait); + } +} + +static int __init init_bio(void) +{ + int nr; + + bio_cachep = kmem_cache_create("bio", sizeof(struct bio), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!bio_cachep) + panic("bio: can't create bio_cachep slab cache\n"); + + nr = bio_init_pool(); + printk("BIO: pool of %d setup, %uKb (%d bytes/bio)\n", nr, nr * sizeof(struct bio) >> 10, sizeof(struct bio)); + + biovec_init_pool(); + +#ifdef BIO_HASH_PROFILING + memset(&bio_stats, 0, sizeof(bio_stats)); +#endif + + return 0; +} + +int bio_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) +{ +#ifdef BIO_HASH_PROFILING + switch (cmd) { + case BLKHASHPROF: + if (copy_to_user((struct bio_hash_stats *) arg, &bio_stats, sizeof(bio_stats))) + return -EFAULT; + break; + case BLKHASHCLEAR: + memset(&bio_stats, 0, sizeof(bio_stats)); + break; + default: + return -ENOTTY; + } + +#endif + return 0; +} + +module_init(init_bio); + +EXPORT_SYMBOL(bio_alloc); +EXPORT_SYMBOL(bio_put); +EXPORT_SYMBOL(ll_rw_kio); +EXPORT_SYMBOL(bio_hash_remove); +EXPORT_SYMBOL(bio_hash_add); +EXPORT_SYMBOL(bio_hash_add_unique); +EXPORT_SYMBOL(bio_endio); diff -urN linux-2.5.1-pre1/fs/block_dev.c linux/fs/block_dev.c --- linux-2.5.1-pre1/fs/block_dev.c Wed Nov 21 14:07:25 2001 +++ linux/fs/block_dev.c Sat Dec 1 00:37:05 2001 @@ -102,7 +102,7 @@ return 0; } -static int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh, int create) +static int blkdev_get_block(struct inode * inode, sector_t iblock, struct buffer_head * bh, int create) { if (iblock >= max_block(inode->i_rdev)) return -EIO; diff -urN linux-2.5.1-pre1/fs/buffer.c linux/fs/buffer.c --- linux-2.5.1-pre1/fs/buffer.c Wed Nov 21 14:40:17 2001 +++ linux/fs/buffer.c Sat Dec 1 00:37:05 2001 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -548,7 +549,7 @@ spin_unlock(&lru_list_lock); } -struct buffer_head * get_hash_table(kdev_t dev, int block, int size) +struct buffer_head * get_hash_table(kdev_t dev, sector_t block, int size) { struct buffer_head *bh, **p = &hash(dev, block); @@ -1014,7 +1015,7 @@ * 14.02.92: changed it to sync dirty buffers a bit: better performance * when the filesystem starts to get full of dirty blocks (I hope). */ -struct buffer_head * getblk(kdev_t dev, int block, int size) +struct buffer_head * getblk(kdev_t dev, sector_t block, int size) { for (;;) { struct buffer_head * bh; @@ -1988,7 +1989,8 @@ goto done; } -int generic_block_bmap(struct address_space *mapping, long block, get_block_t *get_block) +sector_t generic_block_bmap(struct address_space *mapping, sector_t block, + get_block_t *get_block) { struct buffer_head tmp; struct inode *inode = mapping->host; @@ -2001,7 +2003,7 @@ int generic_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize, get_block_t * get_block) { int i, nr_blocks, retval; - unsigned long * blocks = iobuf->blocks; + sector_t *blocks = iobuf->blocks; nr_blocks = iobuf->length / blocksize; /* build the blocklist */ @@ -2012,7 +2014,7 @@ bh.b_dev = inode->i_dev; bh.b_size = blocksize; - retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1); + retval = get_block(inode, blocknr, &bh, rw & 1); if (retval) goto out; @@ -2033,64 +2035,13 @@ blocks[i] = bh.b_blocknr; } - retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize); + retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, blocks, blocksize); out: return retval; } /* - * IO completion routine for a buffer_head being used for kiobuf IO: we - * can't dispatch the kiobuf callback until io_count reaches 0. - */ - -static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate) -{ - struct kiobuf *kiobuf; - - mark_buffer_uptodate(bh, uptodate); - - kiobuf = bh->b_private; - unlock_buffer(bh); - end_kio_request(kiobuf, uptodate); -} - -/* - * For brw_kiovec: submit a set of buffer_head temporary IOs and wait - * for them to complete. Clean up the buffer_heads afterwards. - */ - -static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size) -{ - int iosize, err; - int i; - struct buffer_head *tmp; - - iosize = 0; - err = 0; - - for (i = nr; --i >= 0; ) { - iosize += size; - tmp = bh[i]; - if (buffer_locked(tmp)) { - wait_on_buffer(tmp); - } - - if (!buffer_uptodate(tmp)) { - /* We are traversing bh'es in reverse order so - clearing iosize on error calculates the - amount of IO before the first error. */ - iosize = 0; - err = -EIO; - } - } - - if (iosize) - return iosize; - return err; -} - -/* * Start I/O on a physical range of kernel memory, defined by a vector * of kiobuf structs (much like a user-space iovec list). * @@ -2101,22 +2052,13 @@ * It is up to the caller to make sure that there are enough blocks * passed in to completely map the iobufs to disk. */ - -int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - kdev_t dev, unsigned long b[], int size) +int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], kdev_t dev, sector_t b[], + int size) { - int err; - int length; int transferred; int i; - int bufind; - int pageind; - int bhind; - int offset; - unsigned long blocknr; - struct kiobuf * iobuf = NULL; - struct page * map; - struct buffer_head *tmp, **bhs = NULL; + int err; + struct kiobuf * iobuf; if (!nr) return 0; @@ -2126,8 +2068,7 @@ */ for (i = 0; i < nr; i++) { iobuf = iovec[i]; - if ((iobuf->offset & (size-1)) || - (iobuf->length & (size-1))) + if ((iobuf->offset & (size-1)) || (iobuf->length & (size-1))) return -EINVAL; if (!iobuf->nr_pages) panic("brw_kiovec: iobuf not initialised"); @@ -2136,94 +2077,28 @@ /* * OK to walk down the iovec doing page IO on each page we find. */ - bufind = bhind = transferred = err = 0; for (i = 0; i < nr; i++) { iobuf = iovec[i]; - offset = iobuf->offset; - length = iobuf->length; iobuf->errno = 0; - if (!bhs) - bhs = iobuf->bh; - - for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { - map = iobuf->maplist[pageind]; - if (!map) { - err = -EFAULT; - goto finished; - } - - while (length > 0) { - blocknr = b[bufind++]; - if (blocknr == -1UL) { - if (rw == READ) { - /* there was an hole in the filesystem */ - memset(kmap(map) + offset, 0, size); - flush_dcache_page(map); - kunmap(map); - - transferred += size; - goto skip_block; - } else - BUG(); - } - tmp = bhs[bhind++]; - tmp->b_size = size; - set_bh_page(tmp, map, offset); - tmp->b_this_page = tmp; - - init_buffer(tmp, end_buffer_io_kiobuf, iobuf); - tmp->b_dev = dev; - tmp->b_blocknr = blocknr; - tmp->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req); - - if (rw == WRITE) { - set_bit(BH_Uptodate, &tmp->b_state); - clear_bit(BH_Dirty, &tmp->b_state); - } else - set_bit(BH_Uptodate, &tmp->b_state); - - atomic_inc(&iobuf->io_count); - submit_bh(rw, tmp); - /* - * Wait for IO if we have got too much - */ - if (bhind >= KIO_MAX_SECTORS) { - kiobuf_wait_for_io(iobuf); /* wake-one */ - err = wait_kio(rw, bhind, bhs, size); - if (err >= 0) - transferred += err; - else - goto finished; - bhind = 0; - } + ll_rw_kio(rw, iobuf, dev, b[i] * (size >> 9)); + } - skip_block: - length -= size; - offset += size; - - if (offset >= PAGE_SIZE) { - offset = 0; - break; - } - } /* End of block loop */ - } /* End of page loop */ - } /* End of iovec loop */ - - /* Is there any IO still left to submit? */ - if (bhind) { - kiobuf_wait_for_io(iobuf); /* wake-one */ - err = wait_kio(rw, bhind, bhs, size); - if (err >= 0) - transferred += err; - else - goto finished; - } - - finished: - if (transferred) - return transferred; - return err; + /* + * now they are all submitted, wait for completion + */ + transferred = 0; + err = 0; + for (i = 0; i < nr; i++) { + iobuf = iovec[i]; + kiobuf_wait_for_io(iobuf); + if (iobuf->errno && !err) + err = iobuf->errno; + if (!err) + transferred += iobuf->length; + } + + return err ? err : transferred; } /* @@ -2238,7 +2113,7 @@ * FIXME: we need a swapper_inode->get_block function to remove * some of the bmap kludges and interface ugliness here. */ -int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size) +int brw_page(int rw, struct page *page, kdev_t dev, sector_t b[], int size) { struct buffer_head *head, *bh; @@ -2326,7 +2201,7 @@ struct buffer_head *bh; page = find_or_create_page(bdev->bd_inode->i_mapping, index, GFP_NOFS); - if (IS_ERR(page)) + if (!page) return NULL; if (!PageLocked(page)) @@ -2489,6 +2364,9 @@ int try_to_free_buffers(struct page * page, unsigned int gfp_mask) { struct buffer_head * tmp, * bh = page->buffers; + + BUG_ON(!PageLocked(page)); + BUG_ON(!bh); cleaned_buffers_try_again: spin_lock(&lru_list_lock); diff -urN linux-2.5.1-pre1/fs/devfs/base.c linux/fs/devfs/base.c --- linux-2.5.1-pre1/fs/devfs/base.c Sat Nov 3 10:06:38 2001 +++ linux/fs/devfs/base.c Sat Dec 1 00:37:05 2001 @@ -545,21 +545,23 @@ 20010919 Richard Gooch Set inode->i_mapping->a_ops for block nodes in . v0.116 - 20010927 Richard Gooch - Went back to global rwsem for symlinks (refcount scheme no good) - v0.117 20011008 Richard Gooch Fixed overrun in by removing function (not needed). - v0.118 20011009 Richard Gooch Fixed buffer underrun in . - Moved down_read() from to - v0.119 20011029 Richard Gooch Fixed race in when setting event mask. - 20011103 Richard Gooch - Avoid deadlock in by using temporary buffer. - v0.120 + 20011114 Richard Gooch + First release of new locking code. + v1.0 + 20011117 Richard Gooch + Discard temporary buffer, now use "%s" for dentry names. + 20011118 Richard Gooch + Don't generate path in : use fake entry instead. + Use "existing" directory in <_devfs_make_parent_for_leaf>. + 20011122 Richard Gooch + Use slab cache rather than fixed buffer for devfsd events. + v1.1 */ #include #include @@ -592,7 +594,7 @@ #include #include -#define DEVFS_VERSION "0.120 (20011103)" +#define DEVFS_VERSION "1.1 (20011122)" #define DEVFS_NAME "devfs" @@ -605,27 +607,30 @@ # define FALSE 0 #endif -#define IS_HIDDEN(de) (( ((de)->hide && !is_devfsd_or_child(fs_info)) || !(de)->registered)) +#define MODE_DIR (S_IFDIR | S_IWUSR | S_IRUGO | S_IXUGO) -#define DEBUG_NONE 0x00000 -#define DEBUG_MODULE_LOAD 0x00001 -#define DEBUG_REGISTER 0x00002 -#define DEBUG_UNREGISTER 0x00004 -#define DEBUG_SET_FLAGS 0x00008 -#define DEBUG_S_PUT 0x00010 -#define DEBUG_I_LOOKUP 0x00020 -#define DEBUG_I_CREATE 0x00040 -#define DEBUG_I_GET 0x00080 -#define DEBUG_I_CHANGE 0x00100 -#define DEBUG_I_UNLINK 0x00200 -#define DEBUG_I_RLINK 0x00400 -#define DEBUG_I_FLINK 0x00800 -#define DEBUG_I_MKNOD 0x01000 -#define DEBUG_F_READDIR 0x02000 -#define DEBUG_D_DELETE 0x04000 -#define DEBUG_D_RELEASE 0x08000 -#define DEBUG_D_IPUT 0x10000 -#define DEBUG_ALL 0xfffff +#define IS_HIDDEN(de) ( (de)->hide && !is_devfsd_or_child(fs_info) ) + +#define DEBUG_NONE 0x0000000 +#define DEBUG_MODULE_LOAD 0x0000001 +#define DEBUG_REGISTER 0x0000002 +#define DEBUG_UNREGISTER 0x0000004 +#define DEBUG_FREE 0x0000008 +#define DEBUG_SET_FLAGS 0x0000010 +#define DEBUG_S_READ 0x0000100 /* Break */ +#define DEBUG_I_LOOKUP 0x0001000 /* Break */ +#define DEBUG_I_CREATE 0x0002000 +#define DEBUG_I_GET 0x0004000 +#define DEBUG_I_CHANGE 0x0008000 +#define DEBUG_I_UNLINK 0x0010000 +#define DEBUG_I_RLINK 0x0020000 +#define DEBUG_I_FLINK 0x0040000 +#define DEBUG_I_MKNOD 0x0080000 +#define DEBUG_F_READDIR 0x0100000 /* Break */ +#define DEBUG_D_DELETE 0x1000000 /* Break */ +#define DEBUG_D_RELEASE 0x2000000 +#define DEBUG_D_IPUT 0x4000000 +#define DEBUG_ALL 0xfffffff #define DEBUG_DISABLED DEBUG_NONE #define OPTION_NONE 0x00 @@ -638,9 +643,11 @@ struct directory_type { + rwlock_t lock; /* Lock for searching(R)/updating(W) */ struct devfs_entry *first; struct devfs_entry *last; - unsigned int num_removable; + unsigned short num_removable; /* Lock for writing but not reading */ + unsigned char no_more_additions:1; }; struct file_type @@ -656,8 +663,6 @@ struct fcb_type /* File, char, block type */ { - uid_t default_uid; - gid_t default_gid; void *ops; union { @@ -678,20 +683,13 @@ char *linkname; /* This is NULL-terminated */ }; -struct fifo_type -{ - uid_t uid; - gid_t gid; -}; - -struct devfs_inode /* This structure is for "persistent" inode storage */ +struct devfs_inode /* This structure is for "persistent" inode storage */ { + struct dentry *dentry; time_t atime; time_t mtime; time_t ctime; - unsigned int ino; /* Inode number as seen in the VFS */ - struct dentry *dentry; - umode_t mode; + unsigned int ino; /* Inode number as seen in the VFS */ uid_t uid; gid_t gid; }; @@ -699,12 +697,13 @@ struct devfs_entry { void *info; + atomic_t refcount; /* When this drops to zero, it's unused */ union { struct directory_type dir; struct fcb_type fcb; struct symlink_type symlink; - struct fifo_type fifo; + const char *name; /* Only used for (mode == 0) */ } u; struct devfs_entry *prev; /* Previous entry in the parent directory */ @@ -713,12 +712,11 @@ struct devfs_entry *slave; /* Another entry to unregister */ struct devfs_inode inode; umode_t mode; - unsigned short namelen; /* I think 64k+ filenames are a way off... */ - unsigned char registered:1; + unsigned short namelen; /* I think 64k+ filenames are a way off... */ unsigned char hide:1; - unsigned char no_persistence:1; - char name[1]; /* This is just a dummy: the allocated array is - bigger. This is NULL-terminated */ + unsigned char vfs_created:1; /* Whether created by driver or VFS */ + char name[1]; /* This is just a dummy: the allocated array + is bigger. This is NULL-terminated */ }; /* The root of the device tree */ @@ -726,35 +724,38 @@ struct devfsd_buf_entry { - void *data; - unsigned int type; + struct devfs_entry *de; /* The name is generated with this */ + unsigned short type; /* The type of event */ umode_t mode; uid_t uid; gid_t gid; + struct devfsd_buf_entry *next; }; -struct fs_info /* This structure is for the mounted devfs */ +struct fs_info /* This structure is for the mounted devfs */ { struct super_block *sb; - volatile struct devfsd_buf_entry *devfsd_buffer; - spinlock_t devfsd_buffer_lock; - volatile unsigned int devfsd_buf_in; - volatile unsigned int devfsd_buf_out; + spinlock_t devfsd_buffer_lock; /* Lock when inserting/deleting events */ + struct devfsd_buf_entry *devfsd_first_event; + struct devfsd_buf_entry *devfsd_last_event; volatile int devfsd_sleeping; volatile struct task_struct *devfsd_task; volatile struct file *devfsd_file; struct devfsd_notify_struct *devfsd_info; volatile unsigned long devfsd_event_mask; atomic_t devfsd_overrun_count; - wait_queue_head_t devfsd_wait_queue; - wait_queue_head_t revalidate_wait_queue; + wait_queue_head_t devfsd_wait_queue; /* Wake devfsd on input */ + wait_queue_head_t revalidate_wait_queue; /* Wake when devfsd sleeps */ }; static struct fs_info fs_info = {devfsd_buffer_lock: SPIN_LOCK_UNLOCKED}; -static const int devfsd_buf_size = PAGE_SIZE / sizeof(struct devfsd_buf_entry); +static kmem_cache_t *devfsd_buf_cache; #ifdef CONFIG_DEVFS_DEBUG static unsigned int devfs_debug_init __initdata = DEBUG_NONE; static unsigned int devfs_debug = DEBUG_NONE; +static spinlock_t stat_lock = SPIN_LOCK_UNLOCKED; +static unsigned int stat_num_entries; +static unsigned int stat_num_bytes; #endif #ifdef CONFIG_DEVFS_MOUNT @@ -763,19 +764,23 @@ static unsigned int boot_options = OPTION_NONE; #endif -static DECLARE_RWSEM (symlink_rwsem); - /* Forward function declarations */ -static struct devfs_entry *search_for_entry (struct devfs_entry *dir, - const char *name, - unsigned int namelen, int mkdir, - int mkfile, int *is_new, - int traverse_symlink); +static devfs_handle_t _devfs_walk_path (struct devfs_entry *dir, + const char *name, int namelen, + int traverse_symlink); static ssize_t devfsd_read (struct file *file, char *buf, size_t len, loff_t *ppos); static int devfsd_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); static int devfsd_close (struct inode *inode, struct file *file); +#ifdef CONFIG_DEVFS_DEBUG +static int stat_read (struct file *file, char *buf, size_t len, + loff_t *ppos); +static struct file_operations stat_fops = +{ + read: stat_read, +}; +#endif /* Devfs daemon file operations */ @@ -791,46 +796,95 @@ /** - * search_for_entry_in_dir - Search for a devfs entry inside another devfs entry. - * @parent: The parent devfs entry. - * @name: The name of the entry. + * devfs_get - Get a reference to a devfs entry. + * @de: The devfs entry. + */ + +static struct devfs_entry *devfs_get (struct devfs_entry *de) +{ + if (de) atomic_inc (&de->refcount); + return de; +} /* End Function devfs_get */ + +/** + * devfs_put - Put (release) a reference to a devfs entry. + * @de: The devfs entry. + */ + +static void devfs_put (struct devfs_entry *de) +{ + if (!de) return; + if ( !atomic_dec_and_test (&de->refcount) ) return; + if (de == root_entry) + OOPS ("%s: devfs_put(): root entry being freed\n", DEVFS_NAME); +#ifdef CONFIG_DEVFS_DEBUG + if (devfs_debug & DEBUG_FREE) + printk ("%s: devfs_put(%s): de: %p, parent: %p \"%s\"\n", + DEVFS_NAME, de->name, de, de->parent, + de->parent ? de->parent->name : "no parent"); +#endif + if ( S_ISLNK (de->mode) ) kfree (de->u.symlink.linkname); + if ( ( S_ISCHR (de->mode) || S_ISBLK (de->mode) ) && de->u.fcb.autogen ) + { + devfs_dealloc_devnum ( S_ISCHR (de->mode) ? DEVFS_SPECIAL_CHR : + DEVFS_SPECIAL_BLK, + MKDEV (de->u.fcb.u.device.major, + de->u.fcb.u.device.minor) ); + } +#ifdef CONFIG_DEVFS_DEBUG + spin_lock (&stat_lock); + --stat_num_entries; + stat_num_bytes -= sizeof *de + de->namelen; + if ( S_ISLNK (de->mode) ) stat_num_bytes -= de->u.symlink.length + 1; + spin_unlock (&stat_lock); +#endif + kfree (de); +} /* End Function devfs_put */ + +/** + * _devfs_search_dir - Search for a devfs entry in a directory. + * @dir: The directory to search. + * @name: The name of the entry to search for. * @namelen: The number of characters in @name. - * @traverse_symlink: If %TRUE then the entry is traversed if it is a symlink. * - * Search for a devfs entry inside another devfs entry and returns a pointer - * to the entry on success, else %NULL. + * Search for a devfs entry in a directory and returns a pointer to the entry + * on success, else %NULL. The directory must be locked already. + * An implicit devfs_get() is performed on the returned entry. */ -static struct devfs_entry *search_for_entry_in_dir (struct devfs_entry *parent, - const char *name, - unsigned int namelen, - int traverse_symlink) +static struct devfs_entry *_devfs_search_dir (struct devfs_entry *dir, + const char *name, + unsigned int namelen) { - struct devfs_entry *curr, *retval; + struct devfs_entry *curr; - if ( !S_ISDIR (parent->mode) ) + if ( !S_ISDIR (dir->mode) ) { - printk ("%s: entry is not a directory\n", DEVFS_NAME); + printk ("%s: search_dir(%s): not a directory\n", DEVFS_NAME,dir->name); return NULL; } - for (curr = parent->u.dir.first; curr != NULL; curr = curr->next) + for (curr = dir->u.dir.first; curr != NULL; curr = curr->next) { if (curr->namelen != namelen) continue; if (memcmp (curr->name, name, namelen) == 0) break; /* Not found: try the next one */ } - if (curr == NULL) return NULL; - if (!S_ISLNK (curr->mode) || !traverse_symlink) return curr; - /* Need to follow the link: this is a stack chomper */ - retval = curr->registered ? - search_for_entry (parent, curr->u.symlink.linkname, - curr->u.symlink.length, FALSE, FALSE, NULL, - TRUE) : NULL; - return retval; -} /* End Function search_for_entry_in_dir */ + return devfs_get (curr); +} /* End Function _devfs_search_dir */ + + +/** + * _devfs_alloc_entry - Allocate a devfs entry. + * @name: The name of the entry. + * @namelen: The number of characters in @name. + * + * Allocate a devfs entry and returns a pointer to the entry on success, else + * %NULL. + */ -static struct devfs_entry *create_entry (struct devfs_entry *parent, - const char *name,unsigned int namelen) +static struct devfs_entry *_devfs_alloc_entry (const char *name, + unsigned int namelen, + umode_t mode) { struct devfs_entry *new; static unsigned long inode_counter = FIRST_INODE; @@ -839,168 +893,270 @@ if ( name && (namelen < 1) ) namelen = strlen (name); if ( ( new = kmalloc (sizeof *new + namelen, GFP_KERNEL) ) == NULL ) return NULL; - /* Magic: this will set the ctime to zero, thus subsequent lookups will - trigger the call to */ memset (new, 0, sizeof *new + namelen); + new->mode = mode; + if ( S_ISDIR (mode) ) rwlock_init (&new->u.dir.lock); + atomic_set (&new->refcount, 1); spin_lock (&counter_lock); new->inode.ino = inode_counter++; spin_unlock (&counter_lock); - new->parent = parent; if (name) memcpy (new->name, name, namelen); new->namelen = namelen; - if (parent == NULL) return new; - new->prev = parent->u.dir.last; - /* Insert into the parent directory's list of children */ - if (parent->u.dir.first == NULL) parent->u.dir.first = new; - else parent->u.dir.last->next = new; - parent->u.dir.last = new; +#ifdef CONFIG_DEVFS_DEBUG + spin_lock (&stat_lock); + ++stat_num_entries; + stat_num_bytes += sizeof *new + namelen; + spin_unlock (&stat_lock); +#endif return new; -} /* End Function create_entry */ +} /* End Function _devfs_alloc_entry */ + -static void update_devfs_inode_from_entry (struct devfs_entry *de) +/** + * _devfs_append_entry - Append a devfs entry to a directory's child list. + * @dir: The directory to add to. + * @de: The devfs entry to append. + * @removable: If TRUE, increment the count of removable devices for %dir. + * @old_de: If an existing entry exists, it will be written here. This may + * be %NULL. + * + * Append a devfs entry to a directory's list of children, checking first to + * see if an entry of the same name exists. The directory will be locked. + * The value 0 is returned on success, else a negative error code. + * On failure, an implicit devfs_put() is performed on %de. + */ + +static int _devfs_append_entry (devfs_handle_t dir, devfs_handle_t de, + int removable, devfs_handle_t *old_de) { - if (de == NULL) return; - if ( S_ISDIR (de->mode) ) - { - de->inode.mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; - de->inode.uid = 0; - de->inode.gid = 0; - } - else if ( S_ISLNK (de->mode) ) - { - de->inode.mode = S_IFLNK | S_IRUGO | S_IXUGO; - de->inode.uid = 0; - de->inode.gid = 0; - } - else if ( S_ISFIFO (de->mode) ) + int retval; + + if (old_de) *old_de = NULL; + if ( !S_ISDIR (dir->mode) ) { - de->inode.mode = de->mode; - de->inode.uid = de->u.fifo.uid; - de->inode.gid = de->u.fifo.gid; + printk ("%s: append_entry(%s): dir: \"%s\" is not a directory\n", + DEVFS_NAME, de->name, dir->name); + devfs_put (de); + return -ENOTDIR; } + write_lock (&dir->u.dir.lock); + if (dir->u.dir.no_more_additions) retval = -ENOENT; else { - if (de->u.fcb.auto_owner) - de->inode.mode = (de->mode & ~S_IALLUGO) | S_IRUGO | S_IWUGO; - else de->inode.mode = de->mode; - de->inode.uid = de->u.fcb.default_uid; - de->inode.gid = de->u.fcb.default_gid; + struct devfs_entry *old; + + old = _devfs_search_dir (dir, de->name, de->namelen); + if (old_de) *old_de = old; + else devfs_put (old); + if (old == NULL) + { + de->parent = dir; + de->prev = dir->u.dir.last; + /* Append to the directory's list of children */ + if (dir->u.dir.first == NULL) dir->u.dir.first = de; + else dir->u.dir.last->next = de; + dir->u.dir.last = de; + if (removable) ++dir->u.dir.num_removable; + retval = 0; + } + else retval = -EEXIST; } -} /* End Function update_devfs_inode_from_entry */ + write_unlock (&dir->u.dir.lock); + if (retval) devfs_put (de); + return retval; +} /* End Function _devfs_append_entry */ + /** - * get_root_entry - Get the root devfs entry. + * _devfs_get_root_entry - Get the root devfs entry. * * Returns the root devfs entry on success, else %NULL. */ -static struct devfs_entry *get_root_entry (void) +static struct devfs_entry *_devfs_get_root_entry (void) { kdev_t devnum; struct devfs_entry *new; + static spinlock_t root_lock = SPIN_LOCK_UNLOCKED; /* Always ensure the root is created */ - if (root_entry != NULL) return root_entry; - if ( ( root_entry = create_entry (NULL, NULL, 0) ) == NULL ) return NULL; - root_entry->mode = S_IFDIR; - /* Force an inode update, because lookup() is never done for the root */ - update_devfs_inode_from_entry (root_entry); - root_entry->registered = TRUE; + if (root_entry) return root_entry; + if ( ( new = _devfs_alloc_entry (NULL, 0,MODE_DIR) ) == NULL ) return NULL; + spin_lock (&root_lock); + if (root_entry) + { + spin_unlock (&root_lock); + devfs_put (new); + return (root_entry); + } + root_entry = new; + spin_unlock (&root_lock); /* And create the entry for ".devfsd" */ - if ( ( new = create_entry (root_entry, ".devfsd", 0) ) == NULL ) - return NULL; + if ( ( new = _devfs_alloc_entry (".devfsd", 0, S_IFCHR |S_IRUSR |S_IWUSR) ) + == NULL ) return NULL; devnum = devfs_alloc_devnum (DEVFS_SPECIAL_CHR); new->u.fcb.u.device.major = MAJOR (devnum); new->u.fcb.u.device.minor = MINOR (devnum); - new->mode = S_IFCHR | S_IRUSR | S_IWUSR; - new->u.fcb.default_uid = 0; - new->u.fcb.default_gid = 0; new->u.fcb.ops = &devfsd_fops; - new->registered = TRUE; + _devfs_append_entry (root_entry, new, FALSE, NULL); +#ifdef CONFIG_DEVFS_DEBUG + if ( ( new = _devfs_alloc_entry (".stat", 0, S_IFCHR | S_IRUGO | S_IWUGO) ) + == NULL ) return NULL; + devnum = devfs_alloc_devnum (DEVFS_SPECIAL_CHR); + new->u.fcb.u.device.major = MAJOR (devnum); + new->u.fcb.u.device.minor = MINOR (devnum); + new->u.fcb.ops = &stat_fops; + _devfs_append_entry (root_entry, new, FALSE, NULL); +#endif return root_entry; -} /* End Function get_root_entry */ +} /* End Function _devfs_get_root_entry */ /** - * search_for_entry - Search for an entry in the devfs tree. - * @dir: The parent directory to search from. If this is %NULL the root is used - * @name: The name of the entry. - * @namelen: The number of characters in @name. - * @mkdir: If %TRUE intermediate directories are created as needed. - * @mkfile: If %TRUE the file entry is created if it doesn't exist. - * @is_new: If the returned entry was newly made, %TRUE is written here. If - * this is %NULL nothing is written here. - * @traverse_symlink: If %TRUE then symbolic links are traversed. + * _devfs_descend - Descend down a tree using the next component name. + * @dir: The directory to search. + * @name: The component name to search for. + * @namelen: The length of %name. + * @next_pos: The position of the next '/' or '\0' is written here. * - * If the entry is created, then it will be in the unregistered state. - * Returns a pointer to the entry on success, else %NULL. + * Descend into a directory, searching for a component. This function forms + * the core of a tree-walking algorithm. The directory will be locked. + * The devfs entry corresponding to the component is returned. If there is + * no matching entry, %NULL is returned. + * An implicit devfs_get() is performed on the returned entry. */ -static struct devfs_entry *search_for_entry (struct devfs_entry *dir, - const char *name, - unsigned int namelen, int mkdir, - int mkfile, int *is_new, - int traverse_symlink) +static struct devfs_entry *_devfs_descend (struct devfs_entry *dir, + const char *name, int namelen, + int *next_pos) { - int len; - const char *subname, *stop, *ptr; + const char *stop, *ptr; struct devfs_entry *entry; - if (is_new) *is_new = FALSE; - if (dir == NULL) dir = get_root_entry (); - if (dir == NULL) return NULL; - /* Extract one filename component */ - subname = name; + if ( (namelen >= 3) && (strncmp (name, "../", 3) == 0) ) + { /* Special-case going to parent directory */ + *next_pos = 3; + return devfs_get (dir->parent); + } stop = name + namelen; - while (subname < stop) - { - /* Search for a possible '/' */ - for (ptr = subname; (ptr < stop) && (*ptr != '/'); ++ptr); - if (ptr >= stop) - { - /* Look for trailing component */ - len = stop - subname; - entry = search_for_entry_in_dir (dir, subname, len, - traverse_symlink); - if (entry != NULL) return entry; - if (!mkfile) return NULL; - entry = create_entry (dir, subname, len); - if (entry && is_new) *is_new = TRUE; - return entry; - } - /* Found '/': search for directory */ - if (strncmp (subname, "../", 3) == 0) - { - /* Going up */ - dir = dir->parent; - if (dir == NULL) return NULL; /* Cannot escape from devfs */ - subname += 3; - continue; + /* Search for a possible '/' */ + for (ptr = name; (ptr < stop) && (*ptr != '/'); ++ptr); + *next_pos = ptr - name; + read_lock (&dir->u.dir.lock); + entry = _devfs_search_dir (dir, name, *next_pos); + read_unlock (&dir->u.dir.lock); + return entry; +} /* End Function _devfs_descend */ + + +static devfs_handle_t _devfs_make_parent_for_leaf (struct devfs_entry *dir, + const char *name, + int namelen, int *leaf_pos) +{ + int next_pos = 0; + + if (dir == NULL) dir = _devfs_get_root_entry (); + if (dir == NULL) return NULL; + devfs_get (dir); + /* Search for possible trailing component and ignore it */ + for (--namelen; (namelen > 0) && (name[namelen] != '/'); --namelen); + *leaf_pos = (name[namelen] == '/') ? (namelen + 1) : 0; + for (; namelen > 0; name += next_pos, namelen -= next_pos) + { + struct devfs_entry *de, *old; + + if ( ( de = _devfs_descend (dir, name, namelen, &next_pos) ) == NULL ) + { + de = _devfs_alloc_entry (name, next_pos, MODE_DIR); + devfs_get (de); + if ( !de || _devfs_append_entry (dir, de, FALSE, &old) ) + { + devfs_put (de); + if ( !old || !S_ISDIR (old->mode) ) + { + devfs_put (old); + devfs_put (dir); + return NULL; + } + de = old; /* Use the existing directory */ + } } - len = ptr - subname; - entry = search_for_entry_in_dir (dir, subname, len, traverse_symlink); - if (!entry && !mkdir) return NULL; - if (entry == NULL) + if (de == dir->parent) { - /* Make it */ - if ( ( entry = create_entry (dir, subname, len) ) == NULL ) - return NULL; - entry->mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; - if (is_new) *is_new = TRUE; + devfs_put (dir); + devfs_put (de); + return NULL; } - if ( !S_ISDIR (entry->mode) ) + devfs_put (dir); + dir = de; + if (name[next_pos] == '/') ++next_pos; + } + return dir; +} /* End Function _devfs_make_parent_for_leaf */ + + +static devfs_handle_t _devfs_prepare_leaf (devfs_handle_t *dir, + const char *name, umode_t mode) +{ + int namelen, leaf_pos; + struct devfs_entry *de; + + namelen = strlen (name); + if ( ( *dir = _devfs_make_parent_for_leaf (*dir, name, namelen, + &leaf_pos) ) == NULL ) + { + printk ("%s: prepare_leaf(%s): could not create parent path\n", + DEVFS_NAME, name); + return NULL; + } + if ( ( de = _devfs_alloc_entry (name + leaf_pos, namelen - leaf_pos,mode) ) + == NULL ) + { + printk ("%s: prepare_leaf(%s): could not allocate entry\n", + DEVFS_NAME, name); + devfs_put (*dir); + return NULL; + } + return de; +} /* End Function _devfs_prepare_leaf */ + + +static devfs_handle_t _devfs_walk_path (struct devfs_entry *dir, + const char *name, int namelen, + int traverse_symlink) +{ + int next_pos = 0; + + if (dir == NULL) dir = _devfs_get_root_entry (); + if (dir == NULL) return NULL; + devfs_get (dir); + for (; namelen > 0; name += next_pos, namelen -= next_pos) + { + struct devfs_entry *de, *link; + + if ( ( de = _devfs_descend (dir, name, namelen, &next_pos) ) == NULL ) { - printk ("%s: existing non-directory entry\n", DEVFS_NAME); + devfs_put (dir); return NULL; } - /* Ensure an unregistered entry is re-registered and visible */ - entry->hide = FALSE; - entry->registered = TRUE; - subname = ptr + 1; - dir = entry; + if (S_ISLNK (de->mode) && traverse_symlink) + { /* Need to follow the link: this is a stack chomper */ + link = _devfs_walk_path (dir, de->u.symlink.linkname, + de->u.symlink.length, TRUE); + devfs_put (de); + if (!link) + { + devfs_put (dir); + return NULL; + } + de = link; + } + devfs_put (dir); + dir = de; + if (name[next_pos] == '/') ++next_pos; } - return NULL; -} /* End Function search_for_entry */ + return dir; +} /* End Function _devfs_walk_path */ /** @@ -1020,20 +1176,29 @@ { struct devfs_entry *entry, *de; + devfs_get (dir); if (dir == NULL) return NULL; if ( !S_ISDIR (dir->mode) ) { printk ("%s: find_by_dev(): not a directory\n", DEVFS_NAME); + devfs_put (dir); return NULL; } /* First search files in this directory */ + read_lock (&dir->u.dir.lock); for (entry = dir->u.dir.first; entry != NULL; entry = entry->next) { if ( !S_ISCHR (entry->mode) && !S_ISBLK (entry->mode) ) continue; if ( S_ISCHR (entry->mode) && (type != DEVFS_SPECIAL_CHR) ) continue; if ( S_ISBLK (entry->mode) && (type != DEVFS_SPECIAL_BLK) ) continue; if ( (entry->u.fcb.u.device.major == major) && - (entry->u.fcb.u.device.minor == minor) ) return entry; + (entry->u.fcb.u.device.minor == minor) ) + { + devfs_get (entry); + read_unlock (&dir->u.dir.lock); + devfs_put (dir); + return entry; + } /* Not found: try the next one */ } /* Now recursively search the subdirectories: this is a stack chomper */ @@ -1041,8 +1206,15 @@ { if ( !S_ISDIR (entry->mode) ) continue; de = find_by_dev (entry, major, minor, type); - if (de) return de; + if (de) + { + read_unlock (&dir->u.dir.lock); + devfs_put (dir); + return de; + } } + read_unlock (&dir->u.dir.lock); + devfs_put (dir); return NULL; } /* End Function find_by_dev */ @@ -1063,7 +1235,6 @@ * %DEVFS_SPECIAL_CHR or %DEVFS_SPECIAL_BLK. * @traverse_symlink: If %TRUE then symbolic links are traversed. * - * FIXME: What the hell is @handle? - ch * Returns the devfs_entry pointer on success, else %NULL. */ @@ -1095,10 +1266,7 @@ ++name; --namelen; } - if (traverse_symlink) down_read (&symlink_rwsem); - entry = search_for_entry (dir, name, namelen, FALSE, FALSE, NULL, - traverse_symlink); - if (traverse_symlink) up_read (&symlink_rwsem); + entry = _devfs_walk_path (dir, name, namelen, traverse_symlink); if (entry != NULL) return entry; } /* Have to search by major and minor: slow */ @@ -1106,42 +1274,34 @@ return find_by_dev (root_entry, major, minor, type); } /* End Function find_entry */ -static struct devfs_entry *get_devfs_entry_from_vfs_inode (struct inode *inode, - int do_check) +static struct devfs_entry *get_devfs_entry_from_vfs_inode (struct inode *inode) { - struct devfs_entry *de; - if (inode == NULL) return NULL; - de = inode->u.generic_ip; - if (!de) printk (__FUNCTION__ "(): NULL de for inode %ld\n", inode->i_ino); - if (do_check && de && !de->registered) de = NULL; - return de; + return inode->u.generic_ip; } /* End Function get_devfs_entry_from_vfs_inode */ /** - * free_dentries - Free the dentries for a device entry and invalidate inodes. + * free_dentry - Free the dentry for a device entry and invalidate inode. * @de: The entry. + * + * This must only be called after the entry has been unhooked from it's + * parent directory. */ -static void free_dentries (struct devfs_entry *de) +static void free_dentry (struct devfs_entry *de) { - struct dentry *dentry; + struct dentry *dentry = de->inode.dentry; + if (!dentry) return; spin_lock (&dcache_lock); - dentry = de->inode.dentry; - if (dentry != NULL) - { - dget_locked (dentry); - de->inode.dentry = NULL; - spin_unlock (&dcache_lock); - /* Forcefully remove the inode */ - if (dentry->d_inode != NULL) dentry->d_inode->i_nlink = 0; - d_drop (dentry); - dput (dentry); - } - else spin_unlock (&dcache_lock); -} /* End Function free_dentries */ + dget_locked (dentry); + spin_unlock (&dcache_lock); + /* Forcefully remove the inode */ + if (dentry->d_inode != NULL) dentry->d_inode->i_nlink = 0; + d_drop (dentry); + dput (dentry); +} /* End Function free_dentry */ /** @@ -1172,7 +1332,7 @@ static inline int devfsd_queue_empty (struct fs_info *fs_info) { - return (fs_info->devfsd_buf_out == fs_info->devfsd_buf_in) ? TRUE : FALSE; + return (fs_info->devfsd_last_event) ? FALSE : TRUE; } /* End Function devfsd_queue_empty */ @@ -1201,8 +1361,9 @@ /** - * devfsd_notify_one - Notify a single devfsd daemon of a change. - * @data: Data to be passed. + * devfsd_notify_de - Notify the devfsd daemon of a change. + * @de: The devfs entry that has changed. This and all parent entries will + * have their reference counts incremented if the event was queued. * @type: The type of change. * @mode: The mode of the entry. * @uid: The user ID. @@ -1212,51 +1373,48 @@ * Returns %TRUE if an event was queued and devfsd woken up, else %FALSE. */ -static int devfsd_notify_one (void *data, unsigned int type, umode_t mode, - uid_t uid, gid_t gid, struct fs_info *fs_info) +static int devfsd_notify_de (struct devfs_entry *de, + unsigned short type, umode_t mode, + uid_t uid, gid_t gid, struct fs_info *fs_info) { - unsigned int next_pos; - unsigned long flags; struct devfsd_buf_entry *entry; + struct devfs_entry *curr; if ( !( fs_info->devfsd_event_mask & (1 << type) ) ) return (FALSE); - next_pos = fs_info->devfsd_buf_in + 1; - if (next_pos >= devfsd_buf_size) next_pos = 0; - if (next_pos == fs_info->devfsd_buf_out) + if ( ( entry = kmem_cache_alloc (devfsd_buf_cache, 0) ) == NULL ) { - /* Running up the arse of the reader: drop it */ atomic_inc (&fs_info->devfsd_overrun_count); return (FALSE); } - spin_lock_irqsave (&fs_info->devfsd_buffer_lock, flags); - next_pos = fs_info->devfsd_buf_in + 1; - if (next_pos >= devfsd_buf_size) next_pos = 0; - entry = (struct devfsd_buf_entry *) fs_info->devfsd_buffer + - fs_info->devfsd_buf_in; - entry->data = data; + for (curr = de; curr != NULL; curr = curr->parent) devfs_get (curr); + entry->de = de; entry->type = type; entry->mode = mode; entry->uid = uid; entry->gid = gid; - fs_info->devfsd_buf_in = next_pos; - spin_unlock_irqrestore (&fs_info->devfsd_buffer_lock, flags); + entry->next = NULL; + spin_lock (&fs_info->devfsd_buffer_lock); + if (!fs_info->devfsd_first_event) fs_info->devfsd_first_event = entry; + if (fs_info->devfsd_last_event) fs_info->devfsd_last_event->next = entry; + fs_info->devfsd_last_event = entry; + spin_unlock (&fs_info->devfsd_buffer_lock); wake_up_interruptible (&fs_info->devfsd_wait_queue); return (TRUE); -} /* End Function devfsd_notify_one */ +} /* End Function devfsd_notify_de */ /** - * devfsd_notify - Notify all devfsd daemons of a change. + * devfsd_notify - Notify the devfsd daemon of a change. * @de: The devfs entry that has changed. * @type: The type of change event. - * @wait: If TRUE, the functions waits for all daemons to finish processing + * @wait: If TRUE, the function waits for the daemon to finish processing * the event. */ -static void devfsd_notify (struct devfs_entry *de, unsigned int type, int wait) +static void devfsd_notify (struct devfs_entry *de,unsigned short type,int wait) { - if (devfsd_notify_one (de, type, de->mode, current->euid, - current->egid, &fs_info) && wait) + if (devfsd_notify_de (de, type, de->mode, current->euid, + current->egid, &fs_info) && wait) wait_for_devfsd_finished (&fs_info); } /* End Function devfsd_notify */ @@ -1287,7 +1445,7 @@ umode_t mode, void *ops, void *info) { char devtype = S_ISCHR (mode) ? DEVFS_SPECIAL_CHR : DEVFS_SPECIAL_BLK; - int is_new; + int err; kdev_t devnum = NODEV; struct devfs_entry *de; @@ -1332,146 +1490,127 @@ major = MAJOR (devnum); minor = MINOR (devnum); } - de = search_for_entry (dir, name, strlen (name), TRUE, TRUE, &is_new, - FALSE); - if (de == NULL) + if ( ( de = _devfs_prepare_leaf (&dir, name, mode) ) == NULL ) { - printk ("%s: devfs_register(): could not create entry: \"%s\"\n", + printk ("%s: devfs_register(%s): could not prepare leaf\n", DEVFS_NAME, name); if (devnum != NODEV) devfs_dealloc_devnum (devtype, devnum); return NULL; } -#ifdef CONFIG_DEVFS_DEBUG - if (devfs_debug & DEBUG_REGISTER) - printk ("%s: devfs_register(%s): de: %p %s\n", - DEVFS_NAME, name, de, is_new ? "new" : "existing"); -#endif - if (!is_new) - { - /* Existing entry */ - if ( !S_ISCHR (de->mode) && !S_ISBLK (de->mode) && - !S_ISREG (de->mode) ) - { - printk ("%s: devfs_register(): existing non-device/file entry: \"%s\"\n", - DEVFS_NAME, name); - if (devnum != NODEV) devfs_dealloc_devnum (devtype, devnum); - return NULL; - } - if (de->registered) - { - printk("%s: devfs_register(): device already registered: \"%s\"\n", - DEVFS_NAME, name); - if (devnum != NODEV) devfs_dealloc_devnum (devtype, devnum); - return NULL; - } - } - de->u.fcb.autogen = FALSE; if ( S_ISCHR (mode) || S_ISBLK (mode) ) { de->u.fcb.u.device.major = major; de->u.fcb.u.device.minor = minor; de->u.fcb.autogen = (devnum == NODEV) ? FALSE : TRUE; } - else if ( S_ISREG (mode) ) de->u.fcb.u.file.size = 0; - else + else if ( !S_ISREG (mode) ) { - printk ("%s: devfs_register(): illegal mode: %x\n", - DEVFS_NAME, mode); + printk ("%s: devfs_register(%s): illegal mode: %x\n", + DEVFS_NAME, name, mode); + devfs_put (de); + devfs_put (dir); return (NULL); } de->info = info; - de->mode = mode; if (flags & DEVFS_FL_CURRENT_OWNER) { - de->u.fcb.default_uid = current->uid; - de->u.fcb.default_gid = current->gid; + de->inode.uid = current->uid; + de->inode.gid = current->gid; } else { - de->u.fcb.default_uid = 0; - de->u.fcb.default_gid = 0; + de->inode.uid = 0; + de->inode.gid = 0; } de->u.fcb.ops = ops; de->u.fcb.auto_owner = (flags & DEVFS_FL_AUTO_OWNER) ? TRUE : FALSE; de->u.fcb.aopen_notify = (flags & DEVFS_FL_AOPEN_NOTIFY) ? TRUE : FALSE; - if (flags & DEVFS_FL_REMOVABLE) - { - de->u.fcb.removable = TRUE; - ++de->parent->u.dir.num_removable; - } - de->u.fcb.open = FALSE; de->hide = (flags & DEVFS_FL_HIDE) ? TRUE : FALSE; - de->no_persistence = (flags & DEVFS_FL_NO_PERSISTENCE) ? TRUE : FALSE; - de->registered = TRUE; + if (flags & DEVFS_FL_REMOVABLE) de->u.fcb.removable = TRUE; + if ( ( err = _devfs_append_entry (dir, de, de->u.fcb.removable, NULL) ) + != 0 ) + { + printk("%s: devfs_register(%s): could not append to parent, err: %d\n", + DEVFS_NAME, name, err); + devfs_put (dir); + if (devnum != NODEV) devfs_dealloc_devnum (devtype, devnum); + return NULL; + } +#ifdef CONFIG_DEVFS_DEBUG + if (devfs_debug & DEBUG_REGISTER) + printk ("%s: devfs_register(%s): de: %p dir: %p \"%s\" pp: %p\n", + DEVFS_NAME, name, de, dir, dir->name, dir->parent); +#endif devfsd_notify (de, DEVFSD_NOTIFY_REGISTERED, flags & DEVFS_FL_WAIT); + devfs_put (dir); return de; } /* End Function devfs_register */ /** - * unregister - Unregister a device entry. + * _devfs_unhook - Unhook a device entry from its parents list + * @de: The entry to unhook. + * + * Returns %TRUE if the entry was unhooked, else %FALSE if it was + * previously unhooked. + * The caller must have a write lock on the parent directory. + */ + +static int _devfs_unhook (struct devfs_entry *de) +{ + struct devfs_entry *parent; + + if ( !de || (de->prev == de) ) return FALSE; + parent = de->parent; + if (de->prev == NULL) parent->u.dir.first = de->next; + else de->prev->next = de->next; + if (de->next == NULL) parent->u.dir.last = de->prev; + else de->next->prev = de->prev; + de->prev = de; /* Indicate we're unhooked */ + de->next = NULL; /* Force early termination for */ + if ( ( S_ISREG (de->mode) || S_ISCHR (de->mode) || S_ISBLK (de->mode) ) && + de->u.fcb.removable ) + --parent->u.dir.num_removable; + return TRUE; +} /* End Function _devfs_unhook */ + + +/** + * unregister - Unregister a device entry from it's parent. + * @dir: The parent directory. * @de: The entry to unregister. + * + * The caller must have a write lock on the parent directory, which is + * unlocked by this function. */ -static void unregister (struct devfs_entry *de) +static void unregister (struct devfs_entry *dir, struct devfs_entry *de) { - struct devfs_entry *child; + int unhooked = _devfs_unhook (de); - if ( (child = de->slave) != NULL ) - { - de->slave = NULL; /* Unhook first in case slave is parent directory */ - unregister (child); - } - if (de->registered) - { - devfsd_notify (de, DEVFSD_NOTIFY_UNREGISTERED, 0); - free_dentries (de); - } - de->info = NULL; - if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) || S_ISREG (de->mode) ) - { - de->registered = FALSE; - de->u.fcb.ops = NULL; - if (!S_ISREG (de->mode) && de->u.fcb.autogen) - { - devfs_dealloc_devnum ( S_ISCHR (de->mode) ? DEVFS_SPECIAL_CHR : - DEVFS_SPECIAL_BLK, - MKDEV (de->u.fcb.u.device.major, - de->u.fcb.u.device.minor) ); - } - de->u.fcb.autogen = FALSE; - return; - } - if (S_ISLNK (de->mode) && de->registered) - { - de->registered = FALSE; - down_write (&symlink_rwsem); - if (de->u.symlink.linkname) kfree (de->u.symlink.linkname); - de->u.symlink.linkname = NULL; - up_write (&symlink_rwsem); - return; - } - if ( S_ISFIFO (de->mode) ) - { - de->registered = FALSE; - return; - } - if (!de->registered) return; - if ( !S_ISDIR (de->mode) ) - { - printk ("%s: unregister(): unsupported type\n", DEVFS_NAME); - return; - } - de->registered = FALSE; - /* Now recursively search the subdirectories: this is a stack chomper */ - for (child = de->u.dir.first; child != NULL; child = child->next) - { + write_unlock (&dir->u.dir.lock); + if (!unhooked) return; + devfs_get (dir); + devfs_unregister (de->slave); /* Let it handle the locking */ + devfsd_notify (de, DEVFSD_NOTIFY_UNREGISTERED, 0); + free_dentry (de); + devfs_put (dir); + if ( !S_ISDIR (de->mode) ) return; + while (TRUE) /* Recursively unregister: this is a stack chomper */ + { + struct devfs_entry *child; + + write_lock (&de->u.dir.lock); + de->u.dir.no_more_additions = TRUE; + child = de->u.dir.first; + unregister (de, child); + if (!child) break; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_UNREGISTER) printk ("%s: unregister(): child->name: \"%s\" child: %p\n", DEVFS_NAME, child->name, child); #endif - unregister (child); + devfs_put (child); } } /* End Function unregister */ @@ -1484,20 +1623,22 @@ void devfs_unregister (devfs_handle_t de) { - if (de == NULL) return; + if ( (de == NULL) || (de->parent == NULL) ) return; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_UNREGISTER) printk ("%s: devfs_unregister(): de->name: \"%s\" de: %p\n", DEVFS_NAME, de->name, de); #endif - unregister (de); + write_lock (&de->parent->u.dir.lock); + unregister (de->parent, de); + devfs_put (de); } /* End Function devfs_unregister */ static int devfs_do_symlink (devfs_handle_t dir, const char *name, unsigned int flags, const char *link, devfs_handle_t *handle, void *info) { - int is_new; + int err; unsigned int linklength; char *newlink; struct devfs_entry *de; @@ -1522,28 +1663,31 @@ return -ENOMEM; memcpy (newlink, link, linklength); newlink[linklength] = '\0'; - if ( ( de = search_for_entry (dir, name, strlen (name), TRUE, TRUE, - &is_new, FALSE) ) == NULL ) - { - kfree (newlink); - return -ENOMEM; - } - down_write (&symlink_rwsem); - if (de->registered) + if ( ( de = _devfs_prepare_leaf (&dir, name, S_IFLNK | S_IRUGO | S_IXUGO) ) + == NULL ) { - up_write (&symlink_rwsem); - kfree (newlink); - printk ("%s: devfs_do_symlink(%s): entry already exists\n", + printk ("%s: devfs_do_symlink(%s): could not prepare leaf\n", DEVFS_NAME, name); - return -EEXIST; + kfree (newlink); + return -ENOTDIR; } - de->mode = S_IFLNK | S_IRUGO | S_IXUGO; de->info = info; de->hide = (flags & DEVFS_FL_HIDE) ? TRUE : FALSE; de->u.symlink.linkname = newlink; de->u.symlink.length = linklength; - de->registered = TRUE; - up_write (&symlink_rwsem); + if ( ( err = _devfs_append_entry (dir, de, FALSE, NULL) ) != 0 ) + { + printk ("%s: devfs_do_symlink(%s): could not append to parent, err: %d\n", + DEVFS_NAME, name, err); + devfs_put (dir); + return err; + } + devfs_put (dir); +#ifdef CONFIG_DEVFS_DEBUG + spin_lock (&stat_lock); + stat_num_bytes += linklength + 1; + spin_unlock (&stat_lock); +#endif if (handle != NULL) *handle = de; return 0; } /* End Function devfs_do_symlink */ @@ -1593,7 +1737,7 @@ devfs_handle_t devfs_mk_dir (devfs_handle_t dir, const char *name, void *info) { - int is_new; + int err; struct devfs_entry *de; if (name == NULL) @@ -1601,36 +1745,26 @@ printk ("%s: devfs_mk_dir(): NULL name pointer\n", DEVFS_NAME); return NULL; } - de = search_for_entry (dir, name, strlen (name), TRUE, TRUE, &is_new, - FALSE); - if (de == NULL) + if ( ( de = _devfs_prepare_leaf (&dir, name, MODE_DIR) ) == NULL ) { - printk ("%s: devfs_mk_dir(): could not create entry: \"%s\"\n", + printk ("%s: devfs_mk_dir(%s): could not prepare leaf\n", DEVFS_NAME, name); return NULL; } - if (!S_ISDIR (de->mode) && de->registered) + de->info = info; + if ( ( err = _devfs_append_entry (dir, de, FALSE, NULL) ) != 0 ) { - printk ("%s: devfs_mk_dir(): existing non-directory entry: \"%s\"\n", - DEVFS_NAME, name); + printk ("%s: devfs_mk_dir(%s): could not append to dir: %p \"%s\", err: %d\n", + DEVFS_NAME, name, dir, dir->name, err); + devfs_put (dir); return NULL; } #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_REGISTER) - printk ("%s: devfs_mk_dir(%s): de: %p %s\n", - DEVFS_NAME, name, de, is_new ? "new" : "existing"); + printk ("%s: devfs_mk_dir(%s): de: %p dir: %p \"%s\"\n", + DEVFS_NAME, name, de, dir, dir->name); #endif - if (!S_ISDIR (de->mode) && !is_new) - { - /* Transmogrifying an old entry */ - de->u.dir.first = NULL; - de->u.dir.last = NULL; - } - de->mode = S_IFDIR | S_IRUGO | S_IXUGO; - de->info = info; - if (!de->registered) de->u.dir.num_removable = 0; - de->hide = FALSE; - de->registered = TRUE; + devfs_put (dir); return de; } /* End Function devfs_mk_dir */ @@ -1660,8 +1794,8 @@ if ( (name != NULL) && (name[0] == '\0') ) name = NULL; de = find_entry (dir, name, 0, major, minor, type, traverse_symlinks); - if (de == NULL) return NULL; - if (!de->registered) return NULL; + devfs_put (de); /* FIXME: in 2.5 consider dropping this and require a + call to devfs_put() */ return de; } /* End Function devfs_find_handle */ @@ -1679,7 +1813,6 @@ unsigned int fl = 0; if (de == NULL) return -EINVAL; - if (!de->registered) return -ENODEV; if (de->hide) fl |= DEVFS_FL_HIDE; if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) || S_ISREG (de->mode) ) { @@ -1703,7 +1836,6 @@ int devfs_set_flags (devfs_handle_t de, unsigned int flags) { if (de == NULL) return -EINVAL; - if (!de->registered) return -ENODEV; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_SET_FLAGS) printk ("%s: devfs_set_flags(): de->name: \"%s\"\n", @@ -1714,16 +1846,6 @@ { de->u.fcb.auto_owner = (flags & DEVFS_FL_AUTO_OWNER) ? TRUE : FALSE; de->u.fcb.aopen_notify = (flags & DEVFS_FL_AOPEN_NOTIFY) ? TRUE:FALSE; - if ( de->u.fcb.removable && !(flags & DEVFS_FL_REMOVABLE) ) - { - de->u.fcb.removable = FALSE; - --de->parent->u.dir.num_removable; - } - else if ( !de->u.fcb.removable && (flags & DEVFS_FL_REMOVABLE) ) - { - de->u.fcb.removable = TRUE; - ++de->parent->u.dir.num_removable; - } } return 0; } /* End Function devfs_set_flags */ @@ -1742,7 +1864,6 @@ unsigned int *minor) { if (de == NULL) return -EINVAL; - if (!de->registered) return -ENODEV; if ( S_ISDIR (de->mode) ) return -EISDIR; if ( !S_ISCHR (de->mode) && !S_ISBLK (de->mode) ) return -EINVAL; if (major != NULL) *major = de->u.fcb.u.device.major; @@ -1762,7 +1883,7 @@ { if (!inode || !inode->i_sb) return NULL; if (inode->i_sb->s_magic != DEVFS_SUPER_MAGIC) return NULL; - return get_devfs_entry_from_vfs_inode (inode, TRUE); + return get_devfs_entry_from_vfs_inode (inode); } /* End Function devfs_get_handle_from_inode */ @@ -1780,19 +1901,20 @@ int devfs_generate_path (devfs_handle_t de, char *path, int buflen) { int pos; +#define NAMEOF(de) ( (de)->mode ? (de)->name : (de)->u.name ) if (de == NULL) return -EINVAL; if (de->namelen >= buflen) return -ENAMETOOLONG; /* Must be first */ path[buflen - 1] = '\0'; if (de->parent == NULL) return buflen - 1; /* Don't prepend root */ pos = buflen - de->namelen - 1; - memcpy (path + pos, de->name, de->namelen); + memcpy (path + pos, NAMEOF (de), de->namelen); for (de = de->parent; de->parent != NULL; de = de->parent) { if (pos - de->namelen - 1 < 0) return -ENAMETOOLONG; path[--pos] = '/'; pos -= de->namelen; - memcpy (path + pos, de->name, de->namelen); + memcpy (path + pos, NAMEOF (de), de->namelen); } return pos; } /* End Function devfs_generate_path */ @@ -1808,7 +1930,6 @@ void *devfs_get_ops (devfs_handle_t de) { if (de == NULL) return NULL; - if (!de->registered) return NULL; if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) || S_ISREG (de->mode) ) return de->u.fcb.ops; return NULL; @@ -1826,7 +1947,6 @@ int devfs_set_file_size (devfs_handle_t de, unsigned long size) { if (de == NULL) return -EINVAL; - if (!de->registered) return -EINVAL; if ( !S_ISREG (de->mode) ) return -EINVAL; if (de->u.fcb.u.file.size == size) return 0; de->u.fcb.u.file.size = size; @@ -1846,7 +1966,6 @@ void *devfs_get_info (devfs_handle_t de) { if (de == NULL) return NULL; - if (!de->registered) return NULL; return de->info; } /* End Function devfs_get_info */ @@ -1861,7 +1980,6 @@ int devfs_set_info (devfs_handle_t de, void *info) { if (de == NULL) return -EINVAL; - if (!de->registered) return -EINVAL; de->info = info; return 0; } /* End Function devfs_set_info */ @@ -1876,7 +1994,6 @@ devfs_handle_t devfs_get_parent (devfs_handle_t de) { if (de == NULL) return NULL; - if (!de->registered) return NULL; return de->parent; } /* End Function devfs_get_parent */ @@ -1891,7 +2008,6 @@ devfs_handle_t devfs_get_first_child (devfs_handle_t de) { if (de == NULL) return NULL; - if (!de->registered) return NULL; if ( !S_ISDIR (de->mode) ) return NULL; return de->u.dir.first; } /* End Function devfs_get_first_child */ @@ -1907,7 +2023,6 @@ devfs_handle_t devfs_get_next_sibling (devfs_handle_t de) { if (de == NULL) return NULL; - if (!de->registered) return NULL; return de->next; } /* End Function devfs_get_next_sibling */ @@ -1961,7 +2076,6 @@ const char *devfs_get_name (devfs_handle_t de, unsigned int *namelen) { if (de == NULL) return NULL; - if (!de->registered) return NULL; if (namelen != NULL) *namelen = de->namelen; return de->name; } /* End Function devfs_get_name */ @@ -2057,8 +2171,10 @@ {"dmod", DEBUG_MODULE_LOAD, &devfs_debug_init}, {"dreg", DEBUG_REGISTER, &devfs_debug_init}, {"dunreg", DEBUG_UNREGISTER, &devfs_debug_init}, + {"dfree", DEBUG_FREE, &devfs_debug_init}, {"diget", DEBUG_I_GET, &devfs_debug_init}, {"dchange", DEBUG_SET_FLAGS, &devfs_debug_init}, + {"dsread", DEBUG_S_READ, &devfs_debug_init}, {"dichange", DEBUG_I_CHANGE, &devfs_debug_init}, {"dimknod", DEBUG_I_MKNOD, &devfs_debug_init}, {"dilookup", DEBUG_I_LOOKUP, &devfs_debug_init}, @@ -2129,34 +2245,31 @@ /** - * try_modload - Notify devfsd of an inode lookup. + * try_modload - Notify devfsd of an inode lookup by a non-devfsd process. * @parent: The parent devfs entry. * @fs_info: The filesystem info. * @name: The device name. * @namelen: The number of characters in @name. - * @buf: A working area that will be used. This must not go out of scope until - * devfsd is idle again. + * @buf: A working area that will be used. This must not go out of scope + * until devfsd is idle again. * * Returns 0 on success, else a negative error code. */ static int try_modload (struct devfs_entry *parent, struct fs_info *fs_info, const char *name, unsigned namelen, - char buf[STRING_LENGTH]) + struct devfs_entry *buf) { - int pos = STRING_LENGTH - namelen - 1; - if ( !( fs_info->devfsd_event_mask & (1 << DEVFSD_NOTIFY_LOOKUP) ) ) return -ENOENT; if ( is_devfsd_or_child (fs_info) ) return -ENOENT; - if (namelen >= STRING_LENGTH - 1) return -ENAMETOOLONG; - memcpy (buf + pos, name, namelen); - buf[STRING_LENGTH - 1] = '\0'; - if (parent->parent != NULL) pos = devfs_generate_path (parent, buf, pos); - if (pos < 0) return pos; - buf[STRING_LENGTH - namelen - 2] = '/'; - if ( !devfsd_notify_one (buf + pos, DEVFSD_NOTIFY_LOOKUP, 0, - current->euid, current->egid, fs_info) ) + memset (buf, 0, sizeof *buf); + atomic_set (&buf->refcount, 1); + buf->parent = parent; + buf->namelen = namelen; + buf->u.name = name; + if ( !devfsd_notify_de (buf, DEVFSD_NOTIFY_LOOKUP, 0, + current->euid, current->egid, fs_info) ) return -ENOENT; /* Possible success */ return 0; @@ -2206,7 +2319,6 @@ if (dir->u.dir.num_removable < 1) return; for (de = dir->u.dir.first; de != NULL; de = de->next) { - if (!de->registered) continue; if ( !S_ISBLK (de->mode) ) continue; if (!de->u.fcb.removable) continue; check_disc_changed (de); @@ -2229,7 +2341,6 @@ for (de = dir->u.dir.first; de != NULL; de = de->next) { - if (!de->registered) continue; if ( !S_ISBLK (de->mode) ) continue; if (!de->u.fcb.removable) continue; if (strcmp (de->name, "disc") == 0) return check_disc_changed (de); @@ -2258,7 +2369,7 @@ struct inode *inode = dentry->d_inode; struct fs_info *fs_info = inode->i_sb->u.generic_sbp; - de = get_devfs_entry_from_vfs_inode (inode, TRUE); + de = get_devfs_entry_from_vfs_inode (inode); if (de == NULL) return -ENODEV; retval = inode_change_ok (inode, iattr); if (retval != 0) return retval; @@ -2276,15 +2387,19 @@ #endif /* Inode is not on hash chains, thus must save permissions here rather than in a write_inode() method */ - de->inode.mode = inode->i_mode; - de->inode.uid = inode->i_uid; - de->inode.gid = inode->i_gid; + if ( ( !S_ISREG (inode->i_mode) && !S_ISCHR (inode->i_mode) && + !S_ISBLK (inode->i_mode) ) || !de->u.fcb.auto_owner ) + { + de->mode = inode->i_mode; + de->inode.uid = inode->i_uid; + de->inode.gid = inode->i_gid; + } de->inode.atime = inode->i_atime; de->inode.mtime = inode->i_mtime; de->inode.ctime = inode->i_ctime; if ( iattr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID) ) - devfsd_notify_one (de, DEVFSD_NOTIFY_CHANGE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); + devfsd_notify_de (de, DEVFSD_NOTIFY_CHANGE, inode->i_mode, + inode->i_uid, inode->i_gid, fs_info); return 0; } /* End Function devfs_notify_change */ @@ -2299,11 +2414,10 @@ return 0; } /* End Function devfs_statfs */ -static void devfs_clear_inode(struct inode *inode) +static void devfs_clear_inode (struct inode *inode) { - if (S_ISBLK(inode->i_mode)) - bdput(inode->i_bdev); -} + if ( S_ISBLK (inode->i_mode) ) bdput (inode->i_bdev); +} /* End Function devfs_clear_inode */ static struct super_operations devfs_sops = { @@ -2319,32 +2433,37 @@ * @de: The devfs inode. * @dentry: The dentry to register with the devfs inode. * - * Returns the inode on success, else %NULL. + * Returns the inode on success, else %NULL. An implicit devfs_get() is + * performed if the inode is created. */ static struct inode *get_vfs_inode (struct super_block *sb, struct devfs_entry *de, struct dentry *dentry) { + int is_fcb = FALSE; struct inode *inode; - if (de->inode.dentry != NULL) - { - printk ("%s: get_vfs_inode(%u): old de->inode.dentry: %p \"%s\" new dentry: %p \"%s\"\n", - DEVFS_NAME, de->inode.ino, - de->inode.dentry, de->inode.dentry->d_name.name, - dentry, dentry->d_name.name); - printk (" old inode: %p\n", de->inode.dentry->d_inode); - return NULL; - } + if (de->prev == de) return NULL; /* Quick check to see if unhooked */ if ( ( inode = new_inode (sb) ) == NULL ) { printk ("%s: get_vfs_inode(%s): new_inode() failed, de: %p\n", DEVFS_NAME, de->name, de); return NULL; } - de->inode.dentry = dentry; - inode->u.generic_ip = de; + if (de->parent) + { + read_lock (&de->parent->u.dir.lock); + if (de->prev != de) de->inode.dentry = dentry; /* Not unhooked */ + read_unlock (&de->parent->u.dir.lock); + } + else de->inode.dentry = dentry; /* Root: no locking needed */ + if (de->inode.dentry != dentry) + { /* Must have been unhooked */ + iput (inode); + return NULL; + } + inode->u.generic_ip = devfs_get (de); inode->i_ino = de->inode.ino; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_I_GET) @@ -2356,37 +2475,45 @@ inode->i_op = &devfs_iops; inode->i_fop = &devfs_fops; inode->i_rdev = NODEV; - if ( S_ISCHR (de->inode.mode) ) + if ( S_ISCHR (de->mode) ) { inode->i_rdev = MKDEV (de->u.fcb.u.device.major, de->u.fcb.u.device.minor); - inode->i_cdev = cdget (kdev_t_to_nr(inode->i_rdev)); + inode->i_cdev = cdget ( kdev_t_to_nr (inode->i_rdev) ); + is_fcb = TRUE; } - else if ( S_ISBLK (de->inode.mode) ) + else if ( S_ISBLK (de->mode) ) { inode->i_rdev = MKDEV (de->u.fcb.u.device.major, de->u.fcb.u.device.minor); - if (bd_acquire(inode) == 0) + if (bd_acquire (inode) == 0) { if (!inode->i_bdev->bd_op && de->u.fcb.ops) inode->i_bdev->bd_op = de->u.fcb.ops; } else printk ("%s: get_vfs_inode(%d): no block device from bdget()\n", DEVFS_NAME, (int) inode->i_ino); + is_fcb = TRUE; } - else if ( S_ISFIFO (de->inode.mode) ) inode->i_fop = &def_fifo_fops; - else if ( S_ISREG (de->inode.mode) ) inode->i_size = de->u.fcb.u.file.size; - else if ( S_ISDIR (de->inode.mode) ) + else if ( S_ISFIFO (de->mode) ) inode->i_fop = &def_fifo_fops; + else if ( S_ISREG (de->mode) ) + { + inode->i_size = de->u.fcb.u.file.size; + is_fcb = TRUE; + } + else if ( S_ISDIR (de->mode) ) { inode->i_op = &devfs_dir_iops; inode->i_fop = &devfs_dir_fops; } - else if ( S_ISLNK (de->inode.mode) ) + else if ( S_ISLNK (de->mode) ) { inode->i_op = &devfs_symlink_iops; inode->i_size = de->u.symlink.length; } - inode->i_mode = de->inode.mode; + if (is_fcb && de->u.fcb.auto_owner) + inode->i_mode = (de->mode & S_IFMT) | S_IRUGO | S_IWUGO; + else inode->i_mode = de->mode; inode->i_uid = de->inode.uid; inode->i_gid = de->inode.gid; inode->i_atime = de->inode.atime; @@ -2409,11 +2536,11 @@ int err, count; int stored = 0; struct fs_info *fs_info; - struct devfs_entry *parent, *de; + struct devfs_entry *parent, *de, *next = NULL; struct inode *inode = file->f_dentry->d_inode; fs_info = inode->i_sb->u.generic_sbp; - parent = get_devfs_entry_from_vfs_inode (file->f_dentry->d_inode, TRUE); + parent = get_devfs_entry_from_vfs_inode (file->f_dentry->d_inode); if ( (long) file->f_pos < 0 ) return -EINVAL; #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_F_READDIR) @@ -2441,19 +2568,32 @@ default: /* Skip entries */ count = file->f_pos - 2; - for (de = parent->u.dir.first; (de != NULL) && (count > 0); - de = de->next) + read_lock (&parent->u.dir.lock); + for (de = parent->u.dir.first; de && (count > 0); de = de->next) if ( !IS_HIDDEN (de) ) --count; + devfs_get (de); + read_unlock (&parent->u.dir.lock); /* Now add all remaining entries */ - for (; de != NULL; de = de->next) + while (de) { - if ( IS_HIDDEN (de) ) continue; - err = (*filldir) (dirent, de->name, de->namelen, - file->f_pos, de->inode.ino, de->mode >> 12); + if ( IS_HIDDEN (de) ) err = 0; + else + { + err = (*filldir) (dirent, de->name, de->namelen, + file->f_pos, de->inode.ino, de->mode >> 12); + if (err >= 0) + { + file->f_pos++; + ++stored; + } + } + read_lock (&parent->u.dir.lock); + next = devfs_get (de->next); + read_unlock (&parent->u.dir.lock); + devfs_put (de); + de = next; if (err == -EINVAL) break; if (err < 0) return err; - file->f_pos++; - ++stored; } break; } @@ -2467,14 +2607,9 @@ struct devfs_entry *de; struct fs_info *fs_info = inode->i_sb->u.generic_sbp; - lock_kernel (); - de = get_devfs_entry_from_vfs_inode (inode, TRUE); - err = -ENODEV; - if (de == NULL) - goto out; - err = 0; - if ( S_ISDIR (de->mode) ) - goto out; + de = get_devfs_entry_from_vfs_inode (inode); + if (de == NULL) return -ENODEV; + if ( S_ISDIR (de->mode) ) return 0; df = &de->u.fcb; file->private_data = de->info; if ( S_ISBLK (inode->i_mode) ) @@ -2482,7 +2617,7 @@ file->f_op = &def_blk_fops; if (df->ops) inode->i_bdev->bd_op = df->ops; } - else file->f_op = fops_get ( (struct file_operations*) df->ops ); + else file->f_op = fops_get ( (struct file_operations *) df->ops ); if (file->f_op) err = file->f_op->open ? (*file->f_op->open) (inode, file) : 0; else @@ -2491,39 +2626,33 @@ if ( S_ISCHR (inode->i_mode) ) err = chrdev_open (inode, file); else err = -ENODEV; } - if (err < 0) goto out; + if (err < 0) return err; /* Open was successful */ - err = 0; - if (df->open) goto out; + if (df->open) return 0; df->open = TRUE; /* This is the first open */ if (df->auto_owner) { - /* Change the ownership/protection */ - de->inode.mode = (de->inode.mode & ~S_IALLUGO) |(de->mode & S_IRWXUGO); - de->inode.uid = current->euid; - de->inode.gid = current->egid; - inode->i_mode = de->inode.mode; - inode->i_uid = de->inode.uid; - inode->i_gid = de->inode.gid; + /* Change the ownership/protection to what driver specified */ + inode->i_mode = de->mode; + inode->i_uid = current->euid; + inode->i_gid = current->egid; } if (df->aopen_notify) - devfsd_notify_one (de, DEVFSD_NOTIFY_ASYNC_OPEN, inode->i_mode, - current->euid, current->egid, fs_info); -out: - unlock_kernel (); - return err; + devfsd_notify_de (de, DEVFSD_NOTIFY_ASYNC_OPEN, inode->i_mode, + current->euid, current->egid, fs_info); + return 0; } /* End Function devfs_open */ static struct file_operations devfs_fops = { - open: devfs_open, + open: devfs_open, }; static struct file_operations devfs_dir_fops = { - read: generic_read_dir, + read: generic_read_dir, readdir: devfs_readdir, - open: devfs_open, + open: devfs_open, }; @@ -2556,16 +2685,18 @@ { struct devfs_entry *de; - lock_kernel (); - de = get_devfs_entry_from_vfs_inode (inode, FALSE); + de = get_devfs_entry_from_vfs_inode (inode); #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_D_IPUT) printk ("%s: d_iput(): dentry: %p inode: %p de: %p de->dentry: %p\n", DEVFS_NAME, dentry, inode, de, de->inode.dentry); #endif - if (de->inode.dentry == dentry) de->inode.dentry = NULL; - unlock_kernel (); + if ( de->inode.dentry && (de->inode.dentry != dentry) ) + OOPS ("%s: d_iput(%s): de: %p dentry: %p de->dentry: %p\n", + DEVFS_NAME, de->name, de, dentry, de->inode.dentry); + de->inode.dentry = NULL; iput (inode); + devfs_put (de); } /* End Function devfs_d_iput */ static int devfs_d_delete (struct dentry *dentry); @@ -2610,7 +2741,7 @@ return 1; } fs_info = inode->i_sb->u.generic_sbp; - de = get_devfs_entry_from_vfs_inode (inode, TRUE); + de = get_devfs_entry_from_vfs_inode (inode); #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_D_DELETE) printk ("%s: d_delete(): dentry: %p inode: %p devfs_entry: %p\n", @@ -2622,14 +2753,11 @@ if (!de->u.fcb.open) return 0; de->u.fcb.open = FALSE; if (de->u.fcb.aopen_notify) - devfsd_notify_one (de, DEVFSD_NOTIFY_CLOSE, inode->i_mode, - current->euid, current->egid, fs_info); + devfsd_notify_de (de, DEVFSD_NOTIFY_CLOSE, inode->i_mode, + current->euid, current->egid, fs_info); if (!de->u.fcb.auto_owner) return 0; /* Change the ownership/protection back */ - de->inode.mode = (de->inode.mode & ~S_IALLUGO) | S_IRUGO | S_IWUGO; - de->inode.uid = de->u.fcb.default_uid; - de->inode.gid = de->u.fcb.default_gid; - inode->i_mode = de->inode.mode; + inode->i_mode = (de->mode & S_IFMT) | S_IRUGO | S_IWUGO; inode->i_uid = de->inode.uid; inode->i_gid = de->inode.gid; return 0; @@ -2637,59 +2765,38 @@ static int devfs_d_revalidate_wait (struct dentry *dentry, int flags) { - devfs_handle_t de = dentry->d_fsdata; - struct inode *dir; - struct fs_info *fs_info; + struct inode *dir = dentry->d_parent->d_inode; + struct fs_info *fs_info = dir->i_sb->u.generic_sbp; - lock_kernel (); - dir = dentry->d_parent->d_inode; - fs_info = dir->i_sb->u.generic_sbp; - if (!de || de->registered) + if ( !dentry->d_inode && is_devfsd_or_child (fs_info) ) { - if ( !dentry->d_inode && is_devfsd_or_child (fs_info) ) - { - struct inode *inode; - -#ifdef CONFIG_DEVFS_DEBUG - char txt[STRING_LENGTH]; - - memset (txt, 0, STRING_LENGTH); - memcpy (txt, dentry->d_name.name, - (dentry->d_name.len >= STRING_LENGTH) ? - (STRING_LENGTH - 1) : dentry->d_name.len); - if (devfs_debug & DEBUG_I_LOOKUP) - printk ("%s: d_revalidate(): dentry: %p name: \"%s\" by: \"%s\"\n", - DEVFS_NAME, dentry, txt, current->comm); + devfs_handle_t de; + devfs_handle_t parent = get_devfs_entry_from_vfs_inode (dir); + struct inode *inode; + +#ifdef CONFIG_DEVFS_DEBUG + if (devfs_debug & DEBUG_I_LOOKUP) + printk ("%s: d_revalidate(%s): dentry: %p by: \"%s\"\n", + DEVFS_NAME, dentry->d_name.name, dentry, current->comm); +#endif + read_lock (&parent->u.dir.lock); + de = _devfs_search_dir (parent, dentry->d_name.name, + dentry->d_name.len); + read_lock (&parent->u.dir.lock); + if (de == NULL) return 1; + /* Create an inode, now that the driver information is available */ + inode = get_vfs_inode (dir->i_sb, de, dentry); + devfs_put (de); + if (!inode) return 1; +#ifdef CONFIG_DEVFS_DEBUG + if (devfs_debug & DEBUG_I_LOOKUP) + printk ("%s: d_revalidate(): new VFS inode(%u): %p devfs_entry: %p\n", + DEVFS_NAME, de->inode.ino, inode, de); #endif - if (de == NULL) - { - devfs_handle_t parent; - - parent = get_devfs_entry_from_vfs_inode (dir, TRUE); - de = search_for_entry_in_dir (parent, dentry->d_name.name, - dentry->d_name.len, FALSE); - } - if (de == NULL) goto out; - /* Create an inode, now that the driver information is available - */ - if (de->no_persistence) update_devfs_inode_from_entry (de); - else if (de->inode.ctime == 0) update_devfs_inode_from_entry (de); - else de->inode.mode = - (de->mode & ~S_IALLUGO) | (de->inode.mode & S_IALLUGO); - if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) - goto out; -#ifdef CONFIG_DEVFS_DEBUG - if (devfs_debug & DEBUG_I_LOOKUP) - printk ("%s: d_revalidate(): new VFS inode(%u): %p devfs_entry: %p\n", - DEVFS_NAME, de->inode.ino, inode, de); -#endif - d_instantiate (dentry, inode); - goto out; - } + d_instantiate (dentry, inode); + return 1; } if ( wait_for_devfsd_finished (fs_info) ) dentry->d_op = &devfs_dops; -out: - unlock_kernel (); return 1; } /* End Function devfs_d_revalidate_wait */ @@ -2701,67 +2808,61 @@ struct fs_info *fs_info; struct devfs_entry *parent, *de; struct inode *inode; - char txt[STRING_LENGTH]; /* Set up the dentry operations before anything else, to ensure cleaning up on any error */ dentry->d_op = &devfs_dops; - memset (txt, 0, STRING_LENGTH); - memcpy (txt, dentry->d_name.name, - (dentry->d_name.len >= STRING_LENGTH) ? - (STRING_LENGTH - 1) : dentry->d_name.len); fs_info = dir->i_sb->u.generic_sbp; /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode (dir, TRUE); + parent = get_devfs_entry_from_vfs_inode (dir); #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_I_LOOKUP) printk ("%s: lookup(%s): dentry: %p parent: %p by: \"%s\"\n", - DEVFS_NAME, txt, dentry, parent, current->comm); + DEVFS_NAME, dentry->d_name.name, dentry, parent,current->comm); #endif if (parent == NULL) return ERR_PTR (-ENOENT); - /* Try to reclaim an existing devfs entry */ - de = search_for_entry_in_dir (parent, - dentry->d_name.name, dentry->d_name.len, - FALSE); - if ( ( (de == NULL) || !de->registered ) && - (parent->u.dir.num_removable > 0) && + read_lock (&parent->u.dir.lock); + de = _devfs_search_dir (parent, dentry->d_name.name, dentry->d_name.len); + read_unlock (&parent->u.dir.lock); + if ( (de == NULL) && (parent->u.dir.num_removable > 0) && get_removable_partition (parent, dentry->d_name.name, dentry->d_name.len) ) { - if (de == NULL) - de = search_for_entry_in_dir (parent, dentry->d_name.name, - dentry->d_name.len, FALSE); + read_lock (&parent->u.dir.lock); + de = _devfs_search_dir (parent, dentry->d_name.name, + dentry->d_name.len); + read_unlock (&parent->u.dir.lock); } - if ( (de == NULL) || !de->registered ) - { - /* Try with devfsd. For any kind of failure, leave a negative dentry + if (de == NULL) + { /* Try with devfsd. For any kind of failure, leave a negative dentry so someone else can deal with it (in the case where the sysadmin does a mknod()). It's important to do this before hashing the dentry, so that the devfsd queue is filled before revalidates can start */ + struct devfs_entry tmp; + if (try_modload (parent, fs_info, - dentry->d_name.name, dentry->d_name.len, txt) < 0) + dentry->d_name.name, dentry->d_name.len, &tmp) < 0) { d_add (dentry, NULL); return NULL; } /* devfsd claimed success */ dentry->d_op = &devfs_wait_dops; - dentry->d_fsdata = de; d_add (dentry, NULL); /* Open the floodgates */ /* Unlock directory semaphore, which will release any waiters. They will get the hashed dentry, and may be forced to wait for revalidation */ up (&dir->i_sem); - devfs_d_revalidate_wait (dentry, 0); /* I might have to wait too */ + devfs_d_revalidate_wait (dentry, 0); /* I might have to wait too */ down (&dir->i_sem); /* Grab it again because them's the rules */ /* If someone else has been so kind as to make the inode, we go home early */ if (dentry->d_inode) return NULL; - if (de && !de->registered) return NULL; - if (de == NULL) - de = search_for_entry_in_dir (parent, dentry->d_name.name, - dentry->d_name.len, FALSE); + read_lock (&parent->u.dir.lock); + de = _devfs_search_dir (parent, dentry->d_name.name, + dentry->d_name.len); + read_unlock (&parent->u.dir.lock); if (de == NULL) return NULL; /* OK, there's an entry now, but no VFS inode yet */ } @@ -2771,58 +2872,47 @@ d_add (dentry, NULL); /* Open the floodgates */ } /* Create an inode, now that the driver information is available */ - if (de->no_persistence) update_devfs_inode_from_entry (de); - else if (de->inode.ctime == 0) update_devfs_inode_from_entry (de); - else de->inode.mode = - (de->mode & ~S_IALLUGO) | (de->inode.mode & S_IALLUGO); - if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) - return ERR_PTR (-ENOMEM); + inode = get_vfs_inode (dir->i_sb, de, dentry); + devfs_put (de); + if (!inode) return ERR_PTR (-ENOMEM); #ifdef CONFIG_DEVFS_DEBUG if (devfs_debug & DEBUG_I_LOOKUP) printk ("%s: lookup(): new VFS inode(%u): %p devfs_entry: %p\n", DEVFS_NAME, de->inode.ino, inode, de); #endif d_instantiate (dentry, inode); - /* Unlock directory semaphore, which will release any waiters. They will - get the hashed dentry, and may be forced to wait for revalidation */ - up (&dir->i_sem); if (dentry->d_op == &devfs_wait_dops) - devfs_d_revalidate_wait (dentry, 0); /* I might have to wait too */ - down (&dir->i_sem); /* Grab it again because them's the rules */ + { /* Unlock directory semaphore, which will release any waiters. They + will get the hashed dentry, and may be forced to wait for + revalidation */ + up (&dir->i_sem); + devfs_d_revalidate_wait (dentry, 0); /* I might have to wait too */ + down (&dir->i_sem); /* Grab it again because them's the rules */ + } return NULL; } /* End Function devfs_lookup */ static int devfs_unlink (struct inode *dir, struct dentry *dentry) { + int unhooked; struct devfs_entry *de; struct inode *inode = dentry->d_inode; #ifdef CONFIG_DEVFS_DEBUG - char txt[STRING_LENGTH]; - if (devfs_debug & DEBUG_I_UNLINK) - { - memset (txt, 0, STRING_LENGTH); - memcpy (txt, dentry->d_name.name, dentry->d_name.len); - txt[STRING_LENGTH - 1] = '\0'; - printk ("%s: unlink(%s)\n", DEVFS_NAME, txt); - } + printk ("%s: unlink(%s)\n", DEVFS_NAME, dentry->d_name.name); #endif - - de = get_devfs_entry_from_vfs_inode (dentry->d_inode, TRUE); + de = get_devfs_entry_from_vfs_inode (inode); if (de == NULL) return -ENOENT; - devfsd_notify_one (de, DEVFSD_NOTIFY_DELETE, inode->i_mode, - inode->i_uid, inode->i_gid, dir->i_sb->u.generic_sbp); - de->registered = FALSE; - de->hide = TRUE; - if ( S_ISLNK (de->mode) ) - { - down_write (&symlink_rwsem); - if (de->u.symlink.linkname) kfree (de->u.symlink.linkname); - de->u.symlink.linkname = NULL; - up_write (&symlink_rwsem); - } - free_dentries (de); + if (!de->vfs_created) return -EPERM; + write_lock (&de->parent->u.dir.lock); + unhooked = _devfs_unhook (de); + write_unlock (&de->parent->u.dir.lock); + if (!unhooked) return -ENOENT; + devfsd_notify_de (de, DEVFSD_NOTIFY_DELETE, inode->i_mode, + inode->i_uid, inode->i_gid, dir->i_sb->u.generic_sbp); + free_dentry (de); + devfs_put (de); return 0; } /* End Function devfs_unlink */ @@ -2836,7 +2926,7 @@ fs_info = dir->i_sb->u.generic_sbp; /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode (dir, TRUE); + parent = get_devfs_entry_from_vfs_inode (dir); if (parent == NULL) return -ENOENT; err = devfs_do_symlink (parent, dentry->d_name.name, DEVFS_FL_NONE, symname, &de, NULL); @@ -2846,7 +2936,9 @@ DEVFS_NAME, err); #endif if (err < 0) return err; - de->inode.mode = de->mode; + de->vfs_created = TRUE; + de->inode.uid = current->euid; + de->inode.gid = current->egid; de->inode.atime = CURRENT_TIME; de->inode.mtime = CURRENT_TIME; de->inode.ctime = CURRENT_TIME; @@ -2857,50 +2949,33 @@ printk ("%s: symlink(): new VFS inode(%u): %p dentry: %p\n", DEVFS_NAME, de->inode.ino, inode, dentry); #endif - de->hide = FALSE; d_instantiate (dentry, inode); - devfsd_notify_one (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); + devfsd_notify_de (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, + inode->i_uid, inode->i_gid, fs_info); return 0; } /* End Function devfs_symlink */ static int devfs_mkdir (struct inode *dir, struct dentry *dentry, int mode) { - int is_new; + int err; struct fs_info *fs_info; struct devfs_entry *parent, *de; struct inode *inode; - mode = (mode & ~S_IFMT) | S_IFDIR; + mode = (mode & ~S_IFMT) | S_IFDIR; /* VFS doesn't pass S_IFMT part */ fs_info = dir->i_sb->u.generic_sbp; - /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode (dir, TRUE); + parent = get_devfs_entry_from_vfs_inode (dir); if (parent == NULL) return -ENOENT; - /* Try to reclaim an existing devfs entry, create if there isn't one */ - de = search_for_entry (parent, dentry->d_name.name, dentry->d_name.len, - FALSE, TRUE, &is_new, FALSE); - if (de == NULL) return -ENOMEM; - if (de->registered) - { - printk ("%s: mkdir(): existing entry\n", DEVFS_NAME); - return -EEXIST; - } - de->hide = FALSE; - if (!S_ISDIR (de->mode) && !is_new) - { - /* Transmogrifying an old entry */ - de->u.dir.first = NULL; - de->u.dir.last = NULL; - } - de->mode = mode; - de->u.dir.num_removable = 0; - de->inode.mode = mode; + de = _devfs_alloc_entry (dentry->d_name.name, dentry->d_name.len, mode); + if (!de) return -ENOMEM; + de->vfs_created = TRUE; + if ( ( err = _devfs_append_entry (parent, de, FALSE, NULL) ) != 0 ) + return err; de->inode.uid = current->euid; de->inode.gid = current->egid; de->inode.atime = CURRENT_TIME; de->inode.mtime = CURRENT_TIME; de->inode.ctime = CURRENT_TIME; - de->registered = TRUE; if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) return -ENOMEM; #ifdef CONFIG_DEVFS_DEBUG @@ -2909,100 +2984,73 @@ DEVFS_NAME, de->inode.ino, inode, dentry); #endif d_instantiate (dentry, inode); - devfsd_notify_one (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); + devfsd_notify_de (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, + inode->i_uid, inode->i_gid, fs_info); return 0; } /* End Function devfs_mkdir */ static int devfs_rmdir (struct inode *dir, struct dentry *dentry) { - int has_children = FALSE; + int err = 0; + struct devfs_entry *de; struct fs_info *fs_info; - struct devfs_entry *de, *child; struct inode *inode = dentry->d_inode; if (dir->i_sb->u.generic_sbp != inode->i_sb->u.generic_sbp) return -EINVAL; fs_info = dir->i_sb->u.generic_sbp; - de = get_devfs_entry_from_vfs_inode (inode, TRUE); + de = get_devfs_entry_from_vfs_inode (inode); if (de == NULL) return -ENOENT; if ( !S_ISDIR (de->mode) ) return -ENOTDIR; - for (child = de->u.dir.first; child != NULL; child = child->next) - { - if (child->registered) - { - has_children = TRUE; - break; - } - } - if (has_children) return -ENOTEMPTY; - devfsd_notify_one (de, DEVFSD_NOTIFY_DELETE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); - de->hide = TRUE; - de->registered = FALSE; - free_dentries (de); + if (!de->vfs_created) return -EPERM; + /* First ensure the directory is empty and will stay thay way */ + write_lock (&de->u.dir.lock); + de->u.dir.no_more_additions = TRUE; + if (de->u.dir.first) err = -ENOTEMPTY; + write_unlock (&de->u.dir.lock); + if (err) return err; + /* Now unhook the directory from it's parent */ + write_lock (&de->parent->u.dir.lock); + if ( !_devfs_unhook (de) ) err = -ENOENT; + write_unlock (&de->parent->u.dir.lock); + if (err) return err; + devfsd_notify_de (de, DEVFSD_NOTIFY_DELETE, inode->i_mode, + inode->i_uid, inode->i_gid, fs_info); + free_dentry (de); + devfs_put (de); return 0; } /* End Function devfs_rmdir */ static int devfs_mknod (struct inode *dir, struct dentry *dentry, int mode, int rdev) { - int is_new; + int err; struct fs_info *fs_info; struct devfs_entry *parent, *de; struct inode *inode; #ifdef CONFIG_DEVFS_DEBUG - char txt[STRING_LENGTH]; - if (devfs_debug & DEBUG_I_MKNOD) - { - memset (txt, 0, STRING_LENGTH); - memcpy (txt, dentry->d_name.name, dentry->d_name.len); - txt[STRING_LENGTH - 1] = '\0'; printk ("%s: mknod(%s): mode: 0%o dev: %d\n", - DEVFS_NAME, txt, mode, rdev); - } + DEVFS_NAME, dentry->d_name.name, mode, rdev); #endif - fs_info = dir->i_sb->u.generic_sbp; - /* First try to get the devfs entry for this directory */ - parent = get_devfs_entry_from_vfs_inode (dir, TRUE); + parent = get_devfs_entry_from_vfs_inode (dir); if (parent == NULL) return -ENOENT; - /* Try to reclaim an existing devfs entry, create if there isn't one */ - de = search_for_entry (parent, dentry->d_name.name, dentry->d_name.len, - FALSE, TRUE, &is_new, FALSE); - if (de == NULL) return -ENOMEM; - if (de->registered) - { - printk ("%s: mknod(): existing entry\n", DEVFS_NAME); - return -EEXIST; - } - de->info = NULL; - de->mode = mode; + de = _devfs_alloc_entry (dentry->d_name.name, dentry->d_name.len, mode); + if (!de) return -ENOMEM; + de->vfs_created = TRUE; if ( S_ISBLK (mode) || S_ISCHR (mode) ) { de->u.fcb.u.device.major = MAJOR (rdev); de->u.fcb.u.device.minor = MINOR (rdev); - de->u.fcb.default_uid = current->euid; - de->u.fcb.default_gid = current->egid; - de->u.fcb.ops = NULL; - de->u.fcb.auto_owner = FALSE; - de->u.fcb.aopen_notify = FALSE; - de->u.fcb.open = FALSE; - } - else if ( S_ISFIFO (mode) ) - { - de->u.fifo.uid = current->euid; - de->u.fifo.gid = current->egid; } - de->hide = FALSE; - de->inode.mode = mode; + if ( ( err = _devfs_append_entry (parent, de, FALSE, NULL) ) != 0 ) + return err; de->inode.uid = current->euid; de->inode.gid = current->egid; de->inode.atime = CURRENT_TIME; de->inode.mtime = CURRENT_TIME; de->inode.ctime = CURRENT_TIME; - de->registered = TRUE; if ( ( inode = get_vfs_inode (dir->i_sb, de, dentry) ) == NULL ) return -ENOMEM; #ifdef CONFIG_DEVFS_DEBUG @@ -3011,8 +3059,8 @@ DEVFS_NAME, de->inode.ino, inode, dentry); #endif d_instantiate (dentry, inode); - devfsd_notify_one (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, - inode->i_uid, inode->i_gid, fs_info); + devfsd_notify_de (de, DEVFSD_NOTIFY_CREATE, inode->i_mode, + inode->i_uid, inode->i_gid, fs_info); return 0; } /* End Function devfs_mknod */ @@ -3021,12 +3069,9 @@ int err; struct devfs_entry *de; - de = get_devfs_entry_from_vfs_inode (dentry->d_inode, TRUE); + de = get_devfs_entry_from_vfs_inode (dentry->d_inode); if (!de) return -ENODEV; - down_read (&symlink_rwsem); - err = de->registered ? vfs_readlink (dentry, buffer, buflen, - de->u.symlink.linkname) : -ENODEV; - up_read (&symlink_rwsem); + err = vfs_readlink (dentry, buffer, buflen, de->u.symlink.linkname); return err; } /* End Function devfs_readlink */ @@ -3034,25 +3079,10 @@ { int err; struct devfs_entry *de; - char *copy; - de = get_devfs_entry_from_vfs_inode (dentry->d_inode, TRUE); + de = get_devfs_entry_from_vfs_inode (dentry->d_inode); if (!de) return -ENODEV; - down_read (&symlink_rwsem); - if (!de->registered) - { - up_read (&symlink_rwsem); - return -ENODEV; - } - copy = kmalloc (de->u.symlink.length + 1, GFP_KERNEL); - if (copy) memcpy (copy, de->u.symlink.linkname, de->u.symlink.length + 1); - up_read (&symlink_rwsem); - if (copy) - { - err = vfs_follow_link (nd, copy); - kfree (copy); - } - else err = -ENOMEM; + err = vfs_follow_link (nd, de->u.symlink.linkname); return err; } /* End Function devfs_follow_link */ @@ -3084,7 +3114,7 @@ { struct inode *root_inode = NULL; - if (get_root_entry () == NULL) goto out_no_root; + if (_devfs_get_root_entry () == NULL) goto out_no_root; atomic_set (&fs_info.devfsd_overrun_count, 0); init_waitqueue_head (&fs_info.devfsd_wait_queue); init_waitqueue_head (&fs_info.revalidate_wait_queue); @@ -3099,7 +3129,7 @@ sb->s_root = d_alloc_root (root_inode); if (!sb->s_root) goto out_no_root; #ifdef CONFIG_DEVFS_DEBUG - if (devfs_debug & DEBUG_DISABLED) + if (devfs_debug & DEBUG_S_READ) printk ("%s: read super, made devfs ptr: %p\n", DEVFS_NAME, sb->u.generic_sbp); #endif @@ -3123,6 +3153,7 @@ int done = FALSE; int ival; loff_t pos, devname_offset, tlen, rpos; + devfs_handle_t de; struct devfsd_buf_entry *entry; struct fs_info *fs_info = file->f_dentry->d_inode->i_sb->u.generic_sbp; struct devfsd_notify_struct *info = fs_info->devfsd_info; @@ -3149,40 +3180,28 @@ current->state = TASK_RUNNING; return -EINTR; } - set_current_state(TASK_INTERRUPTIBLE); + set_current_state (TASK_INTERRUPTIBLE); } remove_wait_queue (&fs_info->devfsd_wait_queue, &wait); current->state = TASK_RUNNING; /* Now play with the data */ ival = atomic_read (&fs_info->devfsd_overrun_count); - if (ival > 0) atomic_sub (ival, &fs_info->devfsd_overrun_count); info->overrun_count = ival; - entry = (struct devfsd_buf_entry *) fs_info->devfsd_buffer + - fs_info->devfsd_buf_out; + entry = fs_info->devfsd_first_event; info->type = entry->type; info->mode = entry->mode; info->uid = entry->uid; info->gid = entry->gid; - if (entry->type == DEVFSD_NOTIFY_LOOKUP) + de = entry->de; + if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) ) { - info->namelen = strlen (entry->data); - pos = 0; - memcpy (info->devname, entry->data, info->namelen + 1); - } - else - { - devfs_handle_t de = entry->data; - - if ( S_ISCHR (de->mode) || S_ISBLK (de->mode) || S_ISREG (de->mode) ) - { - info->major = de->u.fcb.u.device.major; - info->minor = de->u.fcb.u.device.minor; - } - pos = devfs_generate_path (de, info->devname, DEVFS_PATHLEN); - if (pos < 0) return pos; - info->namelen = DEVFS_PATHLEN - pos - 1; - if (info->mode == 0) info->mode = de->mode; + info->major = de->u.fcb.u.device.major; + info->minor = de->u.fcb.u.device.minor; } + pos = devfs_generate_path (de, info->devname, DEVFS_PATHLEN); + if (pos < 0) return pos; + info->namelen = DEVFS_PATHLEN - pos - 1; + if (info->mode == 0) info->mode = de->mode; devname_offset = info->devname - (char *) info; rpos = *ppos; if (rpos < devname_offset) @@ -3214,10 +3233,13 @@ tlen = rpos - *ppos; if (done) { - unsigned int next_pos = fs_info->devfsd_buf_out + 1; - - if (next_pos >= devfsd_buf_size) next_pos = 0; - fs_info->devfsd_buf_out = next_pos; + spin_lock (&fs_info->devfsd_buffer_lock); + fs_info->devfsd_first_event = entry->next; + if (entry->next == NULL) fs_info->devfsd_last_event = NULL; + spin_unlock (&fs_info->devfsd_buffer_lock); + for (; de != NULL; de = de->parent) devfs_put (de); + kmem_cache_free (devfsd_buf_cache, entry); + if (ival > 0) atomic_sub (ival, &fs_info->devfsd_overrun_count); *ppos = 0; } else *ppos = rpos; @@ -3253,15 +3275,13 @@ fs_info->devfsd_task = current; spin_unlock (&lock); fs_info->devfsd_file = file; - fs_info->devfsd_buffer = (void *) __get_free_page (GFP_KERNEL); fs_info->devfsd_info = kmalloc (sizeof *fs_info->devfsd_info, GFP_KERNEL); - if (!fs_info->devfsd_buffer || !fs_info->devfsd_info) + if (!fs_info->devfsd_info) { devfsd_close (inode, file); return -ENOMEM; } - fs_info->devfsd_buf_out = fs_info->devfsd_buf_in; } else if (fs_info->devfsd_task != current) return -EBUSY; fs_info->devfsd_event_mask = arg; /* Let the masses come forth */ @@ -3284,29 +3304,48 @@ static int devfsd_close (struct inode *inode, struct file *file) { - unsigned long flags; + struct devfsd_buf_entry *entry; struct fs_info *fs_info = inode->i_sb->u.generic_sbp; if (fs_info->devfsd_file != file) return 0; fs_info->devfsd_event_mask = 0; fs_info->devfsd_file = NULL; - spin_lock_irqsave (&fs_info->devfsd_buffer_lock, flags); - if (fs_info->devfsd_buffer) - { - free_page ( (unsigned long) fs_info->devfsd_buffer ); - fs_info->devfsd_buffer = NULL; - } + spin_lock (&fs_info->devfsd_buffer_lock); + entry = fs_info->devfsd_first_event; + fs_info->devfsd_first_event = NULL; + fs_info->devfsd_last_event = NULL; if (fs_info->devfsd_info) { kfree (fs_info->devfsd_info); fs_info->devfsd_info = NULL; } - spin_unlock_irqrestore (&fs_info->devfsd_buffer_lock, flags); + spin_unlock (&fs_info->devfsd_buffer_lock); fs_info->devfsd_task = NULL; wake_up (&fs_info->revalidate_wait_queue); + for (; entry; entry = entry->next) + kmem_cache_free (devfsd_buf_cache, entry); return 0; } /* End Function devfsd_close */ +#ifdef CONFIG_DEVFS_DEBUG +static ssize_t stat_read (struct file *file, char *buf, size_t len, + loff_t *ppos) +{ + ssize_t num; + char txt[80]; + + num = sprintf (txt, "Number of entries: %u number of bytes: %u\n", + stat_num_entries, stat_num_bytes) + 1; + /* Can't seek (pread) on this device */ + if (ppos != &file->f_pos) return -ESPIPE; + if (*ppos >= num) return 0; + if (*ppos + len > num) len = num - *ppos; + if ( copy_to_user (buf, txt + *ppos, len) ) return -EFAULT; + *ppos += len; + return len; +} /* End Function stat_read */ +#endif + static int __init init_devfs_fs (void) { @@ -3333,6 +3372,9 @@ { int err; + devfsd_buf_cache = kmem_cache_create ("devfsd_event", + sizeof (struct devfsd_buf_entry), + 0, 0, NULL, NULL); if ( !(boot_options & OPTION_MOUNT) ) return; err = do_mount ("none", "/dev", "devfs", 0, ""); if (err == 0) printk ("Mounted devfs on /dev\n"); diff -urN linux-2.5.1-pre1/fs/ext2/inode.c linux/fs/ext2/inode.c --- linux-2.5.1-pre1/fs/ext2/inode.c Wed Nov 21 14:07:25 2001 +++ linux/fs/ext2/inode.c Sat Dec 1 00:37:05 2001 @@ -505,7 +505,7 @@ * reachable from inode. */ -static int ext2_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create) +static int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int err = -EIO; int offsets[4]; diff -urN linux-2.5.1-pre1/fs/ext3/inode.c linux/fs/ext3/inode.c --- linux-2.5.1-pre1/fs/ext3/inode.c Fri Nov 9 14:25:04 2001 +++ linux/fs/ext3/inode.c Sat Dec 1 00:37:05 2001 @@ -719,7 +719,7 @@ */ static int ext3_get_block_handle(handle_t *handle, struct inode *inode, - long iblock, + sector_t iblock, struct buffer_head *bh_result, int create) { int err = -EIO; @@ -823,7 +823,7 @@ goto reread; } -static int ext3_get_block(struct inode *inode, long iblock, +static int ext3_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { handle_t *handle = 0; diff -urN linux-2.5.1-pre1/fs/iobuf.c linux/fs/iobuf.c --- linux-2.5.1-pre1/fs/iobuf.c Fri Apr 27 14:23:25 2001 +++ linux/fs/iobuf.c Sat Dec 1 00:37:05 2001 @@ -8,70 +8,45 @@ #include #include -#include -void end_kio_request(struct kiobuf *kiobuf, int uptodate) +int end_kio_request(struct kiobuf *kiobuf, int uptodate) { + int ret = 1; + if ((!uptodate) && !kiobuf->errno) kiobuf->errno = -EIO; if (atomic_dec_and_test(&kiobuf->io_count)) { + ret = 0; if (kiobuf->end_io) kiobuf->end_io(kiobuf); wake_up(&kiobuf->wait_queue); } + + return ret; } static void kiobuf_init(struct kiobuf *iobuf) { memset(iobuf, 0, sizeof(*iobuf)); init_waitqueue_head(&iobuf->wait_queue); + atomic_set(&iobuf->io_count, 0); iobuf->array_len = KIO_STATIC_PAGES; iobuf->maplist = iobuf->map_array; } -int alloc_kiobuf_bhs(struct kiobuf * kiobuf) -{ - int i; - - for (i = 0; i < KIO_MAX_SECTORS; i++) - if (!(kiobuf->bh[i] = kmem_cache_alloc(bh_cachep, SLAB_KERNEL))) { - while (i--) { - kmem_cache_free(bh_cachep, kiobuf->bh[i]); - kiobuf->bh[i] = NULL; - } - return -ENOMEM; - } - return 0; -} - -void free_kiobuf_bhs(struct kiobuf * kiobuf) -{ - int i; - - for (i = 0; i < KIO_MAX_SECTORS; i++) { - kmem_cache_free(bh_cachep, kiobuf->bh[i]); - kiobuf->bh[i] = NULL; - } -} - int alloc_kiovec(int nr, struct kiobuf **bufp) { int i; struct kiobuf *iobuf; for (i = 0; i < nr; i++) { - iobuf = vmalloc(sizeof(struct kiobuf)); + iobuf = kmalloc(sizeof(struct kiobuf), GFP_KERNEL); if (!iobuf) { free_kiovec(i, bufp); return -ENOMEM; } kiobuf_init(iobuf); - if (alloc_kiobuf_bhs(iobuf)) { - vfree(iobuf); - free_kiovec(i, bufp); - return -ENOMEM; - } bufp[i] = iobuf; } @@ -89,8 +64,7 @@ unlock_kiovec(1, &iobuf); if (iobuf->array_len > KIO_STATIC_PAGES) kfree (iobuf->maplist); - free_kiobuf_bhs(iobuf); - vfree(bufp[i]); + kfree(bufp[i]); } } diff -urN linux-2.5.1-pre1/fs/isofs/inode.c linux/fs/isofs/inode.c --- linux-2.5.1-pre1/fs/isofs/inode.c Thu Oct 25 13:53:53 2001 +++ linux/fs/isofs/inode.c Sat Dec 1 00:37:05 2001 @@ -888,7 +888,7 @@ * or getblk() if they are not. Returns the number of blocks inserted * (0 == error.) */ -int isofs_get_blocks(struct inode *inode, long iblock, +int isofs_get_blocks(struct inode *inode, sector_t iblock, struct buffer_head **bh_result, unsigned long nblocks) { unsigned long b_off; @@ -976,7 +976,7 @@ /* * Used by the standard interfaces. */ -static int isofs_get_block(struct inode *inode, long iblock, +static int isofs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { if ( create ) { diff -urN linux-2.5.1-pre1/fs/namespace.c linux/fs/namespace.c --- linux-2.5.1-pre1/fs/namespace.c Sun Nov 11 11:23:14 2001 +++ linux/fs/namespace.c Sat Dec 1 00:37:05 2001 @@ -19,9 +19,6 @@ #include -#include -#include -#include #include struct vfsmount *do_kern_mount(char *type, int flags, char *name, void *data); @@ -198,50 +195,10 @@ seq_escape(m, s, " \t\n\\"); } -static void show_nfs_mount(struct seq_file *m, struct vfsmount *mnt) -{ - static struct proc_nfs_info { - int flag; - char *str; - char *nostr; - } nfs_info[] = { - { NFS_MOUNT_SOFT, ",soft", ",hard" }, - { NFS_MOUNT_INTR, ",intr", "" }, - { NFS_MOUNT_POSIX, ",posix", "" }, - { NFS_MOUNT_TCP, ",tcp", ",udp" }, - { NFS_MOUNT_NOCTO, ",nocto", "" }, - { NFS_MOUNT_NOAC, ",noac", "" }, - { NFS_MOUNT_NONLM, ",nolock", ",lock" }, - { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" }, - { 0, NULL, NULL } - }; - struct proc_nfs_info *nfs_infop; - struct nfs_server *nfss = &mnt->mnt_sb->u.nfs_sb.s_server; - - seq_printf(m, ",v%d", nfss->rpc_ops->version); - seq_printf(m, ",rsize=%d", nfss->rsize); - seq_printf(m, ",wsize=%d", nfss->wsize); - if (nfss->acregmin != 3*HZ) - seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ) - seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ) - seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ) - seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); - for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { - if (nfss->flags & nfs_infop->flag) - seq_puts(m, nfs_infop->str); - else - seq_puts(m, nfs_infop->nostr); - } - seq_puts(m, ",addr="); - mangle(m, nfss->hostname); -} - static int show_vfsmnt(struct seq_file *m, void *v) { struct vfsmount *mnt = v; + int err = 0; static struct proc_fs_info { int flag; char *str; @@ -281,10 +238,10 @@ if (mnt->mnt_flags & fs_infop->flag) seq_puts(m, fs_infop->str); } - if (strcmp("nfs", mnt->mnt_sb->s_type->name) == 0) - show_nfs_mount(m, mnt); + if (mnt->mnt_sb->s_op->show_options) + err = mnt->mnt_sb->s_op->show_options(m, mnt); seq_puts(m, " 0 0\n"); - return 0; + return err; } struct seq_operations mounts_op = { diff -urN linux-2.5.1-pre1/fs/nfs/inode.c linux/fs/nfs/inode.c --- linux-2.5.1-pre1/fs/nfs/inode.c Fri Nov 9 14:28:15 2001 +++ linux/fs/nfs/inode.c Sat Dec 1 00:37:05 2001 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,7 @@ static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct statfs *); +static int nfs_show_options(struct seq_file *, struct vfsmount *); static struct super_operations nfs_sops = { read_inode: nfs_read_inode, @@ -60,6 +62,7 @@ statfs: nfs_statfs, clear_inode: nfs_clear_inode, umount_begin: nfs_umount_begin, + show_options: nfs_show_options, }; /* @@ -551,6 +554,48 @@ out_err: printk("nfs_statfs: statfs error = %d\n", -error); buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; + return 0; +} + +static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + static struct proc_nfs_info { + int flag; + char *str; + char *nostr; + } nfs_info[] = { + { NFS_MOUNT_SOFT, ",soft", ",hard" }, + { NFS_MOUNT_INTR, ",intr", "" }, + { NFS_MOUNT_POSIX, ",posix", "" }, + { NFS_MOUNT_TCP, ",tcp", ",udp" }, + { NFS_MOUNT_NOCTO, ",nocto", "" }, + { NFS_MOUNT_NOAC, ",noac", "" }, + { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" }, + { 0, NULL, NULL } + }; + struct proc_nfs_info *nfs_infop; + struct nfs_server *nfss = &mnt->mnt_sb->u.nfs_sb.s_server; + + seq_printf(m, ",v%d", nfss->rpc_ops->version); + seq_printf(m, ",rsize=%d", nfss->rsize); + seq_printf(m, ",wsize=%d", nfss->wsize); + if (nfss->acregmin != 3*HZ) + seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); + if (nfss->acregmax != 60*HZ) + seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); + if (nfss->acdirmin != 30*HZ) + seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); + if (nfss->acdirmax != 60*HZ) + seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); + for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { + if (nfss->flags & nfs_infop->flag) + seq_puts(m, nfs_infop->str); + else + seq_puts(m, nfs_infop->nostr); + } + seq_puts(m, ",addr="); + seq_escape(m, nfss->hostname, " \t\n\\"); return 0; } diff -urN linux-2.5.1-pre1/fs/partitions/check.c linux/fs/partitions/check.c --- linux-2.5.1-pre1/fs/partitions/check.c Thu Oct 11 17:25:10 2001 +++ linux/fs/partitions/check.c Sat Dec 1 00:37:05 2001 @@ -1,4 +1,6 @@ /* + * fs/partitions/check.c + * * Code extracted from drivers/block/genhd.c * Copyright (C) 1991-1998 Linus Torvalds * Re-organised Feb 1998 Russell King @@ -34,8 +36,6 @@ #include "ibm.h" #include "ultrix.h" -extern int *blk_size[]; - int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ static int (*check_part[])(struct gendisk *hd, struct block_device *bdev, unsigned long first_sect, int first_minor) = { @@ -369,38 +369,50 @@ { if (!gdev) return; - grok_partitions(gdev, MINOR(dev)>>gdev->minor_shift, minors, size); + grok_partitions(dev, size); } -void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size) +void grok_partitions(kdev_t dev, long size) { - int i; - int first_minor = drive << dev->minor_shift; - int end_minor = first_minor + dev->max_p; + int i, minors, first_minor, end_minor; + struct gendisk *g = get_gendisk(dev); + + if (!g) + return; + + minors = 1 << g->minor_shift; + first_minor = MINOR(dev); + if (first_minor & (minors-1)) { + printk("grok_partitions: bad device 0x%02x:%02x\n", + MAJOR(dev), first_minor); + first_minor &= ~(minors-1); + } + end_minor = first_minor + minors; + + if (!g->sizes) + blk_size[g->major] = NULL; - if(!dev->sizes) - blk_size[dev->major] = NULL; + g->part[first_minor].nr_sects = size; - dev->part[first_minor].nr_sects = size; /* No such device or no minors to use for partitions */ if (!size || minors == 1) return; - if (dev->sizes) { - dev->sizes[first_minor] = size >> (BLOCK_SIZE_BITS - 9); + if (g->sizes) { + g->sizes[first_minor] = size >> (BLOCK_SIZE_BITS - 9); for (i = first_minor + 1; i < end_minor; i++) - dev->sizes[i] = 0; + g->sizes[i] = 0; } - blk_size[dev->major] = dev->sizes; - check_partition(dev, MKDEV(dev->major, first_minor), 1 + first_minor); + blk_size[g->major] = g->sizes; + check_partition(g, MKDEV(g->major, first_minor), 1 + first_minor); /* * We need to set the sizes array before we will be able to access * any of the partitions on this device. */ - if (dev->sizes != NULL) { /* optional safeguard in ll_rw_blk.c */ + if (g->sizes != NULL) { /* optional safeguard in ll_rw_blk.c */ for (i = first_minor; i < end_minor; i++) - dev->sizes[i] = dev->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9); + g->sizes[i] = g->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9); } } @@ -425,4 +437,44 @@ } p->v = NULL; return NULL; +} + +int wipe_partitions(kdev_t dev) +{ + struct gendisk *g; + kdev_t devp; + int p, major, minor, minor0, max_p, res; + + g = get_gendisk(dev); + if (g == NULL) + return -EINVAL; + + max_p = 1 << g->minor_shift; + major = MAJOR(dev); + minor = MINOR(dev); + minor0 = minor & ~(max_p - 1); + if (minor0 != minor) /* for now only whole-disk reread */ + return -EINVAL; /* %%% later.. */ + + /* invalidate stuff */ + for (p = max_p - 1; p >= 0; p--) { + minor = minor0 + p; + devp = MKDEV(major,minor); +#if 0 /* %%% superfluous? */ + if (g->part[minor].nr_sects == 0) + continue; +#endif + res = invalidate_device(devp, 1); + if (res) + return res; + g->part[minor].start_sect = 0; + g->part[minor].nr_sects = 0; + } + + /* some places do blksize_size[major][minor] = 1024, + as preparation for reading partition table - superfluous */ + /* sd.c used to set blksize_size to 2048 in case + rscsi_disks[target].device->sector_size == 2048 */ + + return 0; } diff -urN linux-2.5.1-pre1/fs/partitions/check.h linux/fs/partitions/check.h --- linux-2.5.1-pre1/fs/partitions/check.h Mon Oct 1 20:03:26 2001 +++ linux/fs/partitions/check.h Sat Dec 1 00:37:05 2001 @@ -1,5 +1,5 @@ /* - * add_partition adds a partitions details to the devices partition + * add_gd_partition adds a partitions details to the devices partition * description. */ void add_gd_partition(struct gendisk *hd, int minor, int start, int size); diff -urN linux-2.5.1-pre1/fs/readdir.c linux/fs/readdir.c --- linux-2.5.1-pre1/fs/readdir.c Sun Aug 12 14:59:08 2001 +++ linux/fs/readdir.c Sat Dec 1 00:37:05 2001 @@ -79,6 +79,10 @@ while(1) { struct dentry *de = list_entry(list, struct dentry, d_child); + /* + * See comment on top of function on why we + * can just drop the lock here.. + */ if (!list_empty(&de->d_hash) && de->d_inode) { spin_unlock(&dcache_lock); if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino, DT_UNKNOWN) < 0) diff -urN linux-2.5.1-pre1/fs/reiserfs/inode.c linux/fs/reiserfs/inode.c --- linux-2.5.1-pre1/fs/reiserfs/inode.c Tue Oct 30 15:11:34 2001 +++ linux/fs/reiserfs/inode.c Sat Dec 1 00:37:05 2001 @@ -390,7 +390,7 @@ // this is called to create file map. So, _get_block_create_0 will not // read direct item -int reiserfs_bmap (struct inode * inode, long block, +int reiserfs_bmap (struct inode * inode, sector_t block, struct buffer_head * bh_result, int create) { if (!file_capable (inode, block)) @@ -420,7 +420,7 @@ ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, ** don't use this function. */ -static int reiserfs_get_block_create_0 (struct inode * inode, long block, +static int reiserfs_get_block_create_0 (struct inode * inode, sector_t block, struct buffer_head * bh_result, int create) { return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ; } @@ -511,7 +511,7 @@ // determine which parts are derivative, if any, understanding that // there are only so many ways to code to a given interface. // -int reiserfs_get_block (struct inode * inode, long block, +int reiserfs_get_block (struct inode * inode, sector_t block, struct buffer_head * bh_result, int create) { int repeat, retval; @@ -1963,7 +1963,7 @@ // // this is exactly what 2.3.99-pre9's ext2_bmap is // -static int reiserfs_aop_bmap(struct address_space *as, long block) { +static int reiserfs_aop_bmap(struct address_space *as, sector_t block) { return generic_block_bmap(as, block, reiserfs_bmap) ; } diff -urN linux-2.5.1-pre1/fs/udf/inode.c linux/fs/udf/inode.c --- linux-2.5.1-pre1/fs/udf/inode.c Fri Oct 12 13:48:42 2001 +++ linux/fs/udf/inode.c Sat Dec 1 00:37:05 2001 @@ -61,7 +61,7 @@ static void udf_update_extents(struct inode *, long_ad [EXTENT_MERGE_SIZE], int, int, lb_addr, Uint32, struct buffer_head **); -static int udf_get_block(struct inode *, long, struct buffer_head *, int); +static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); /* * udf_put_inode @@ -314,7 +314,7 @@ return dbh; } -static int udf_get_block(struct inode *inode, long block, struct buffer_head *bh_result, int create) +static int udf_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create) { int err, new; struct buffer_head *bh; diff -urN linux-2.5.1-pre1/include/asm-alpha/io.h linux/include/asm-alpha/io.h --- linux-2.5.1-pre1/include/asm-alpha/io.h Fri Nov 9 13:45:35 2001 +++ linux/include/asm-alpha/io.h Sat Dec 1 00:37:05 2001 @@ -60,6 +60,8 @@ return (void *) (address + IDENT_ADDR); } +#define page_to_phys(page) (((page) - (page)->zone->zone_mem_map) << PAGE_SHIFT) + /* * Change addresses as seen by the kernel (virtual) to addresses as * seen by a device (bus), and vice versa. diff -urN linux-2.5.1-pre1/include/asm-i386/checksum.h linux/include/asm-i386/checksum.h --- linux-2.5.1-pre1/include/asm-i386/checksum.h Thu Jul 26 13:41:22 2001 +++ linux/include/asm-i386/checksum.h Sat Dec 1 00:37:05 2001 @@ -69,25 +69,24 @@ unsigned int ihl) { unsigned int sum; - __asm__ __volatile__(" - movl (%1), %0 - subl $4, %2 - jbe 2f - addl 4(%1), %0 - adcl 8(%1), %0 - adcl 12(%1), %0 -1: adcl 16(%1), %0 - lea 4(%1), %1 - decl %2 - jne 1b - adcl $0, %0 - movl %0, %2 - shrl $16, %0 - addw %w2, %w0 - adcl $0, %0 - notl %0 -2: - " + __asm__ __volatile__( + "movl (%1), %0 ;\n" + "subl $4, %2 ;\n" + "jbe 2f ;\n" + "addl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0 ;\n" +"1: adcl 16(%1), %0 ;\n" + "lea 4(%1), %1 ;\n" + "decl %2 ;\n" + "jne 1b ;\n" + "adcl $0, %0 ;\n" + "movl %0, %2 ;\n" + "shrl $16, %0 ;\n" + "addw %w2, %w0 ;\n" + "adcl $0, %0 ;\n" + "notl %0 ;\n" +"2: ;\n" /* Since the input registers which are loaded with iph and ipl are modified, we must also specify them as outputs, or gcc will assume they contain their original values. */ @@ -102,10 +101,9 @@ static inline unsigned int csum_fold(unsigned int sum) { - __asm__(" - addl %1, %0 - adcl $0xffff, %0 - " + __asm__( + "addl %1, %0 ;\n" + "adcl $0xffff, %0 ;\n" : "=r" (sum) : "r" (sum << 16), "0" (sum & 0xffff0000) ); @@ -118,12 +116,11 @@ unsigned short proto, unsigned int sum) { - __asm__(" - addl %1, %0 - adcl %2, %0 - adcl %3, %0 - adcl $0, %0 - " + __asm__( + "addl %1, %0 ;\n" + "adcl %2, %0 ;\n" + "adcl %3, %0 ;\n" + "adcl $0, %0 ;\n" : "=r" (sum) : "g" (daddr), "g"(saddr), "g"((ntohs(len)<<16)+proto*256), "0"(sum)); return sum; @@ -158,19 +155,18 @@ unsigned short proto, unsigned int sum) { - __asm__(" - addl 0(%1), %0 - adcl 4(%1), %0 - adcl 8(%1), %0 - adcl 12(%1), %0 - adcl 0(%2), %0 - adcl 4(%2), %0 - adcl 8(%2), %0 - adcl 12(%2), %0 - adcl %3, %0 - adcl %4, %0 - adcl $0, %0 - " + __asm__( + "addl 0(%1), %0" + "adcl 4(%1), %0" + "adcl 8(%1), %0" + "adcl 12(%1), %0" + "adcl 0(%2), %0" + "adcl 4(%2), %0" + "adcl 8(%2), %0" + "adcl 12(%2), %0" + "adcl %3, %0" + "adcl %4, %0" + "adcl $0, %0" : "=&r" (sum) : "r" (saddr), "r" (daddr), "r"(htonl(len)), "r"(htonl(proto)), "0"(sum)); diff -urN linux-2.5.1-pre1/include/asm-i386/floppy.h linux/include/asm-i386/floppy.h --- linux-2.5.1-pre1/include/asm-i386/floppy.h Thu Nov 22 11:46:19 2001 +++ linux/include/asm-i386/floppy.h Sat Dec 1 00:37:05 2001 @@ -75,28 +75,28 @@ #ifndef NO_FLOPPY_ASSEMBLER __asm__ ( - "testl %1,%1 - je 3f -1: inb %w4,%b0 - andb $160,%b0 - cmpb $160,%b0 - jne 2f - incw %w4 - testl %3,%3 - jne 4f - inb %w4,%b0 - movb %0,(%2) - jmp 5f -4: movb (%2),%0 - outb %b0,%w4 -5: decw %w4 - outb %0,$0x80 - decl %1 - incl %2 - testl %1,%1 - jne 1b -3: inb %w4,%b0 -2: " + "testl %1,%1" + "je 3f" +"1: inb %w4,%b0" + "andb $160,%b0" + "cmpb $160,%b0" + "jne 2f" + "incw %w4" + "testl %3,%3" + "jne 4f" + "inb %w4,%b0" + "movb %0,(%2)" + "jmp 5f" +"4: movb (%2),%0" + "outb %b0,%w4" +"5: decw %w4" + "outb %0,$0x80" + "decl %1" + "incl %2" + "testl %1,%1" + "jne 1b" +"3: inb %w4,%b0" +"2: " : "=a" ((char) st), "=c" ((long) virtual_dma_count), "=S" ((long) virtual_dma_addr) diff -urN linux-2.5.1-pre1/include/asm-i386/kmap_types.h linux/include/asm-i386/kmap_types.h --- linux-2.5.1-pre1/include/asm-i386/kmap_types.h Mon Sep 17 13:16:30 2001 +++ linux/include/asm-i386/kmap_types.h Sat Dec 1 00:37:05 2001 @@ -7,6 +7,7 @@ KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, + KM_BIO_IRQ, KM_TYPE_NR }; diff -urN linux-2.5.1-pre1/include/asm-i386/page.h linux/include/asm-i386/page.h --- linux-2.5.1-pre1/include/asm-i386/page.h Thu Nov 22 11:46:18 2001 +++ linux/include/asm-i386/page.h Sat Dec 1 00:37:05 2001 @@ -101,6 +101,12 @@ BUG(); \ } while (0) +#define BUG_ON(condition) \ + do { \ + if (unlikely((int)(condition))) \ + BUG(); \ + } while (0) + /* Pure 2^n version of get_order */ static __inline__ int get_order(unsigned long size) { diff -urN linux-2.5.1-pre1/include/asm-i386/pgalloc.h linux/include/asm-i386/pgalloc.h --- linux-2.5.1-pre1/include/asm-i386/pgalloc.h Thu Nov 22 11:46:19 2001 +++ linux/include/asm-i386/pgalloc.h Sat Dec 1 00:37:05 2001 @@ -18,15 +18,21 @@ * Allocate and free page tables. */ -#if CONFIG_X86_PAE +#if defined (CONFIG_X86_PAE) +/* + * We can't include here, thus these uglinesses. + */ +struct kmem_cache_s; + +extern struct kmem_cache_s *pae_pgd_cachep; +extern void *kmem_cache_alloc(struct kmem_cache_s *, int); +extern void kmem_cache_free(struct kmem_cache_s *, void *); -extern void *kmalloc(size_t, int); -extern void kfree(const void *); -static __inline__ pgd_t *get_pgd_slow(void) +static inline pgd_t *get_pgd_slow(void) { int i; - pgd_t *pgd = kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + pgd_t *pgd = kmem_cache_alloc(pae_pgd_cachep, GFP_KERNEL); if (pgd) { for (i = 0; i < USER_PTRS_PER_PGD; i++) { @@ -36,32 +42,36 @@ clear_page(pmd); set_pgd(pgd + i, __pgd(1 + __pa(pmd))); } - memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); } return pgd; out_oom: for (i--; i >= 0; i--) free_page((unsigned long)__va(pgd_val(pgd[i])-1)); - kfree(pgd); + kmem_cache_free(pae_pgd_cachep, pgd); return NULL; } #else -static __inline__ pgd_t *get_pgd_slow(void) +static inline pgd_t *get_pgd_slow(void) { pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); if (pgd) { memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); - memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + memcpy(pgd + USER_PTRS_PER_PGD, + swapper_pg_dir + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); } return pgd; } -#endif +#endif /* CONFIG_X86_PAE */ -static __inline__ pgd_t *get_pgd_fast(void) +static inline pgd_t *get_pgd_fast(void) { unsigned long *ret; @@ -74,21 +84,21 @@ return (pgd_t *)ret; } -static __inline__ void free_pgd_fast(pgd_t *pgd) +static inline void free_pgd_fast(pgd_t *pgd) { *(unsigned long *)pgd = (unsigned long) pgd_quicklist; pgd_quicklist = (unsigned long *) pgd; pgtable_cache_size++; } -static __inline__ void free_pgd_slow(pgd_t *pgd) +static inline void free_pgd_slow(pgd_t *pgd) { -#if CONFIG_X86_PAE +#if defined(CONFIG_X86_PAE) int i; for (i = 0; i < USER_PTRS_PER_PGD; i++) free_page((unsigned long)__va(pgd_val(pgd[i])-1)); - kfree(pgd); + kmem_cache_free(pae_pgd_cachep, pgd); #else free_page((unsigned long)pgd); #endif @@ -104,7 +114,8 @@ return pte; } -static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) +static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, + unsigned long address) { unsigned long *ret; @@ -116,7 +127,7 @@ return (pte_t *)ret; } -static __inline__ void pte_free_fast(pte_t *pte) +static inline void pte_free_fast(pte_t *pte) { *(unsigned long *)pte = (unsigned long) pte_quicklist; pte_quicklist = (unsigned long *) pte; @@ -128,14 +139,9 @@ free_page((unsigned long)pte); } -#define pte_free(pte) pte_free_fast(pte) -#ifdef CONFIG_X86_PAE -#define pgd_alloc(mm) get_pgd_slow() +#define pte_free(pte) pte_free_slow(pte) #define pgd_free(pgd) free_pgd_slow(pgd) -#else #define pgd_alloc(mm) get_pgd_fast() -#define pgd_free(pgd) free_pgd_fast(pgd) -#endif /* * allocating and freeing a pmd is trivial: the 1-entry pmd is diff -urN linux-2.5.1-pre1/include/asm-m68k/machdep.h linux/include/asm-m68k/machdep.h --- linux-2.5.1-pre1/include/asm-m68k/machdep.h Mon Nov 27 17:57:34 2000 +++ linux/include/asm-m68k/machdep.h Sat Dec 1 00:37:05 2001 @@ -5,7 +5,6 @@ struct kbd_repeat; struct mktime; struct hwclk_time; -struct gendisk; struct buffer_head; extern void (*mach_sched_init) (void (*handler)(int, void *, struct pt_regs *)); diff -urN linux-2.5.1-pre1/include/asm-ppc/kmap_types.h linux/include/asm-ppc/kmap_types.h --- linux-2.5.1-pre1/include/asm-ppc/kmap_types.h Mon Sep 17 13:16:30 2001 +++ linux/include/asm-ppc/kmap_types.h Sat Dec 1 00:37:05 2001 @@ -11,6 +11,7 @@ KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, + KM_BIO_IRQ, KM_TYPE_NR }; diff -urN linux-2.5.1-pre1/include/asm-sparc/kmap_types.h linux/include/asm-sparc/kmap_types.h --- linux-2.5.1-pre1/include/asm-sparc/kmap_types.h Mon Sep 17 13:16:30 2001 +++ linux/include/asm-sparc/kmap_types.h Sat Dec 1 00:37:05 2001 @@ -7,6 +7,7 @@ KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, + KM_BIO_IRQ, KM_TYPE_NR }; diff -urN linux-2.5.1-pre1/include/asm-sparc64/io.h linux/include/asm-sparc64/io.h --- linux-2.5.1-pre1/include/asm-sparc64/io.h Tue Nov 13 09:16:05 2001 +++ linux/include/asm-sparc64/io.h Sat Dec 1 00:37:05 2001 @@ -18,6 +18,8 @@ extern unsigned long bus_to_virt_not_defined_use_pci_map(volatile void *addr); #define bus_to_virt bus_to_virt_not_defined_use_pci_map +#define page_to_phys(page) (((page) - mem_map) << PAGE_SHIFT) + /* Different PCI controllers we support have their PCI MEM space * mapped to an either 2GB (Psycho) or 4GB (Sabre) aligned area, * so need to chop off the top 33 or 32 bits. diff -urN linux-2.5.1-pre1/include/linux/bio.h linux/include/linux/bio.h --- linux-2.5.1-pre1/include/linux/bio.h Wed Dec 31 16:00:00 1969 +++ linux/include/linux/bio.h Sat Dec 1 00:37:05 2001 @@ -0,0 +1,230 @@ +/* + * New 2.5 block I/O model + * + * Copyright (C) 2001 Jens Axboe + * + * This program is free software; you can redistribute it and/or mo + * it under the terms of the GNU General Public License as publishe + * the Free Software Foundation; either version 2 of the License, o + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ +#ifndef __LINUX_BIO_H +#define __LINUX_BIO_H + +#define BIO_DEBUG + +#ifdef BIO_DEBUG +#define BIO_BUG_ON BUG_ON +#else +#define BIO_BUG_ON +#endif + +/* + * hash profiling stuff.. + */ +#define BIO_HASH_PROFILING + +#define BLKHASHPROF _IOR(0x12,108,sizeof(struct bio_hash_stats)) +#define BLKHASHCLEAR _IO(0x12,109) + +#define MAX_PROFILE_BUCKETS 64 + +struct bio_hash_stats { + atomic_t nr_lookups; + atomic_t nr_hits; + atomic_t nr_inserts; + atomic_t nr_entries; + atomic_t max_entries; + atomic_t max_bucket_size; + atomic_t bucket_size[MAX_PROFILE_BUCKETS + 1]; +}; + +/* + * was unsigned short, but we might as well be ready for > 64kB I/O pages + */ +struct bio_vec { + struct page *bv_page; + unsigned int bv_len; + unsigned int bv_offset; +}; + +struct bio_vec_list { + unsigned int bvl_cnt; /* how may bio_vec's */ + unsigned int bvl_idx; /* current index into bvl_vec */ + unsigned int bvl_size; /* total size in bytes */ + unsigned int bvl_max; /* max bvl_vecs we can hold, used + as index into pool */ + struct bio_vec bvl_vec[0]; /* the iovec array */ +}; + +typedef struct bio_hash_s { + struct bio_hash_s *next_hash; + struct bio_hash_s **pprev_hash; + unsigned long valid_counter; +} bio_hash_t; + +struct bio_hash_bucket { + rwlock_t lock; + bio_hash_t *hash; +} __attribute__((__aligned__(16))); + +#define BIO_HASH_BITS (bio_hash_bits) +#define BIO_HASH_SIZE (1UL << BIO_HASH_BITS) + +/* + * shamelessly stolen from the list.h implementation + */ +#define hash_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) +#define bio_hash_entry(ptr) \ + hash_entry((ptr), struct bio, bi_hash) + +/* + * main unit of I/O for the block layer and lower layers (ie drivers and + * stacking drivers) + */ +struct bio { + sector_t bi_sector; + struct bio *bi_next; /* request queue link */ + bio_hash_t bi_hash; + atomic_t bi_cnt; /* pin count */ + kdev_t bi_dev; /* will be block device */ + struct bio_vec_list *bi_io_vec; + unsigned long bi_flags; /* status, command, etc */ + unsigned long bi_rw; /* bottom bits READ/WRITE, + * top bits priority + */ + int (*bi_end_io)(struct bio *bio, int nr_sectors); + void *bi_private; + + void *bi_hash_desc; /* cookie for hash */ + + void (*bi_destructor)(struct bio *); /* destructor */ +}; + +#define BIO_SECTOR_BITS 9 +#define BIO_OFFSET_MASK ((1UL << (PAGE_CACHE_SHIFT - BIO_SECTOR_BITS)) - 1) +#define BIO_PAGE_MASK (PAGE_CACHE_SIZE - 1) + +/* + * bio flags + */ +#define BIO_UPTODATE 0 /* ok after I/O completion */ +#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ +#define BIO_EOF 2 /* out-out-bounds error */ +#define BIO_PREBUILT 3 /* not merged big */ +#define BIO_CLONED 4 /* doesn't own data */ + +#define bio_is_hashed(bio) ((bio)->bi_hash.pprev_hash) + +/* + * bio bi_rw flags + * + * bit 0 -- read (not set) or write (set) + * bit 1 -- rw-ahead when set + * bit 2 -- barrier + */ +#define BIO_RW 0 +#define BIO_RW_AHEAD 1 +#define BIO_BARRIER 2 + +/* + * various member access, note that bio_data should of course not be used + * on highmem page vectors + */ +#define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec->bvl_vec[(idx)])) +#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_io_vec->bvl_idx) +#define bio_page(bio) bio_iovec((bio))->bv_page +#define bio_size(bio) ((bio)->bi_io_vec->bvl_size) +#define bio_offset(bio) bio_iovec((bio))->bv_offset +#define bio_sectors(bio) (bio_size((bio)) >> BIO_SECTOR_BITS) +#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) +#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_BARRIER)) + +/* + * will die + */ +#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + bio_offset((bio))) +#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (bv)->bv_offset) + +/* + * hack to avoid doing 64-bit calculations on 32-bit archs, instead use a + * pseudo-pfn check to do segment coalescing + */ +#define bio_sec_pfn(bio) \ + ((((bio_page(bio) - bio_page(bio)->zone->zone_mem_map) << PAGE_SHIFT) / bio_size(bio)) + (bio_offset(bio) >> 9)) + +/* + * queues that have highmem support enabled may still need to revert to + * PIO transfers occasionally and thus map high pages temporarily. For + * permanent PIO fall back, user is probably better off disabling highmem + * I/O completely on that queue (see ide-dma for example) + */ +#define bio_kmap(bio) kmap(bio_page((bio))) + bio_offset((bio)) +#define bio_kunmap(bio) kunmap(bio_page((bio))) + +#define BIO_CONTIG(bio, nxt) \ + (bio_to_phys((bio)) + bio_size((bio)) == bio_to_phys((nxt))) +#define __BIO_PHYS_4G(addr1, addr2) \ + (((addr1) | 0xffffffff) == (((addr2) -1 ) | 0xffffffff)) +#define BIO_PHYS_4G(b1, b2) \ + __BIO_PHYS_4G(bio_to_phys((b1)), bio_to_phys((b2)) + bio_size((b2))) + +typedef int (bio_end_io_t) (struct bio *, int); +typedef void (bio_destructor_t) (struct bio *); + +#define bio_io_error(bio) bio_endio((bio), 0, bio_sectors((bio))) + +#define bio_for_each_segment(bvl, bio, i) \ + for (bvl = bio_iovec((bio)), i = (bio)->bi_io_vec->bvl_idx; \ + i < (bio)->bi_io_vec->bvl_cnt; \ + bvl++, i++) + +/* + * get a reference to a bio, so it won't disappear. the intended use is + * something like: + * + * bio_get(bio); + * submit_bio(rw, bio); + * if (bio->bi_flags ...) + * do_something + * bio_put(bio); + * + * without the bio_get(), it could potentially complete I/O before submit_bio + * returns. and then bio would be freed memory when if (bio->bi_flags ...) + * runs + */ +#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) + +extern struct bio *bio_alloc(int, int); +extern void bio_put(struct bio *); + +/* + * the hash stuff is pretty closely tied to the request queue (needed for + * locking etc anyway, and it's in no way an attempt at a generic hash) + */ +struct request_queue; + +extern inline void bio_hash_remove(struct bio *); +extern inline void bio_hash_add(struct bio *, void *, unsigned int); +extern inline struct bio *bio_hash_find(kdev_t, sector_t, unsigned int); +extern inline int bio_hash_add_unique(struct bio *, void *, unsigned int); +extern void bio_hash_invalidate(struct request_queue *, kdev_t); +extern int bio_endio(struct bio *, int, int); + +extern struct bio *bio_clone(struct bio *, int); +extern struct bio *bio_copy(struct bio *, int); + +extern int bio_ioctl(kdev_t, unsigned int, unsigned long); + +#endif /* __LINUX_BIO_H */ diff -urN linux-2.5.1-pre1/include/linux/blk.h linux/include/linux/blk.h --- linux-2.5.1-pre1/include/linux/blk.h Thu Nov 22 11:48:07 2001 +++ linux/include/linux/blk.h Sat Dec 1 00:37:05 2001 @@ -5,13 +5,7 @@ #include #include #include - -/* - * Spinlock for protecting the request queue which - * is mucked around with in interrupts on potentially - * multiple CPU's.. - */ -extern spinlock_t io_request_lock; +#include /* * Initialization functions. @@ -87,13 +81,18 @@ * code duplication in drivers. */ -static inline void blkdev_dequeue_request(struct request * req) +static inline void blkdev_dequeue_request(struct request *req) { - list_del(&req->queue); + if (req->bio) + bio_hash_remove(req->bio); + if (req->biotail) + bio_hash_remove(req->biotail); + + list_del(&req->queuelist); } -int end_that_request_first(struct request *req, int uptodate, char *name); -void end_that_request_last(struct request *req); +int end_that_request_first(struct request *, int uptodate, int nr_sectors); +void end_that_request_last(struct request *); #if defined(MAJOR_NR) || defined(IDE_DRIVER) @@ -338,12 +337,16 @@ #if !defined(IDE_DRIVER) #ifndef CURRENT -#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head) +#define CURRENT elv_next_request(&blk_dev[MAJOR_NR].request_queue) +#endif +#ifndef QUEUE +#define QUEUE (&blk_dev[MAJOR_NR].request_queue) #endif #ifndef QUEUE_EMPTY -#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head) +#define QUEUE_EMPTY blk_queue_empty(QUEUE) #endif + #ifndef DEVICE_NAME #define DEVICE_NAME "unknown" #endif @@ -367,16 +370,14 @@ #endif #define INIT_REQUEST \ - if (QUEUE_EMPTY) {\ + if (QUEUE_EMPTY) { \ CLEAR_INTR; \ - return; \ + return; \ } \ if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \ panic(DEVICE_NAME ": request list destroyed"); \ - if (CURRENT->bh) { \ - if (!buffer_locked(CURRENT->bh)) \ - panic(DEVICE_NAME ": block not locked"); \ - } + if (!CURRENT->bio) \ + panic(DEVICE_NAME ": no bio"); \ #endif /* !defined(IDE_DRIVER) */ @@ -385,10 +386,11 @@ #if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR) -static inline void end_request(int uptodate) { +static inline void end_request(int uptodate) +{ struct request *req = CURRENT; - if (end_that_request_first(req, uptodate, DEVICE_NAME)) + if (end_that_request_first(req, uptodate, CURRENT->hard_cur_sectors)) return; #ifndef DEVICE_NO_RANDOM diff -urN linux-2.5.1-pre1/include/linux/blkdev.h linux/include/linux/blkdev.h --- linux-2.5.1-pre1/include/linux/blkdev.h Thu Nov 22 11:47:08 2001 +++ linux/include/linux/blkdev.h Sat Dec 1 00:37:05 2001 @@ -6,60 +6,57 @@ #include #include #include +#include + +#include struct request_queue; typedef struct request_queue request_queue_t; struct elevator_s; typedef struct elevator_s elevator_t; -/* - * Ok, this is an expanded form so that we can use the same - * request for paging requests. - */ struct request { - struct list_head queue; + struct list_head queuelist; /* looking for ->queue? you must _not_ + * access it directly, use + * blkdev_dequeue_request! */ int elevator_sequence; - volatile int rq_status; /* should split this into a few status bits */ -#define RQ_INACTIVE (-1) -#define RQ_ACTIVE 1 -#define RQ_SCSI_BUSY 0xffff -#define RQ_SCSI_DONE 0xfffe -#define RQ_SCSI_DISCONNECTING 0xffe0 + int inactive; /* driver hasn't seen it yet */ + int rq_status; /* should split this into a few status bits */ kdev_t rq_dev; int cmd; /* READ or WRITE */ int errors; - unsigned long sector; + sector_t sector; unsigned long nr_sectors; unsigned long hard_sector, hard_nr_sectors; - unsigned int nr_segments; - unsigned int nr_hw_segments; - unsigned long current_nr_sectors; - void * special; - char * buffer; - struct completion * waiting; - struct buffer_head * bh; - struct buffer_head * bhtail; + unsigned short nr_segments; + unsigned short nr_hw_segments; + unsigned int current_nr_sectors; + unsigned int hard_cur_sectors; + void *special; + char *buffer; + struct completion *waiting; + struct bio *bio, *biotail; request_queue_t *q; }; #include -typedef int (merge_request_fn) (request_queue_t *q, - struct request *req, - struct buffer_head *bh, - int); -typedef int (merge_requests_fn) (request_queue_t *q, - struct request *req, - struct request *req2, - int); +typedef int (merge_request_fn) (request_queue_t *, struct request *, + struct bio *); +typedef int (merge_requests_fn) (request_queue_t *, struct request *, + struct request *); typedef void (request_fn_proc) (request_queue_t *q); typedef request_queue_t * (queue_proc) (kdev_t dev); -typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh); -typedef void (plug_device_fn) (request_queue_t *q, kdev_t device); +typedef int (make_request_fn) (request_queue_t *q, struct bio *bio); typedef void (unplug_device_fn) (void *q); +enum blk_queue_state { + Queue_down, + Queue_up, +}; + /* * Default nr free requests per queue, ll_rw_blk will scale it down * according to available RAM at init time @@ -69,6 +66,7 @@ struct request_list { unsigned int count; struct list_head free; + wait_queue_head_t wait; }; struct request_queue @@ -89,7 +87,7 @@ merge_request_fn * front_merge_fn; merge_requests_fn * merge_requests_fn; make_request_fn * make_request_fn; - plug_device_fn * plug_device_fn; + /* * The queue owner gets to use this for whatever they like. * ll_rw_blk doesn't touch it. @@ -97,33 +95,111 @@ void * queuedata; /* - * This is used to remove the plug when tq_disk runs. + * queue needs bounce pages for pages above this limit */ - struct tq_struct plug_tq; + unsigned long bounce_pfn; /* - * Boolean that indicates whether this queue is plugged or not. + * for memory zoning (<= 4GB and > 4GB) */ - char plugged; + int bounce_gfp; /* - * Boolean that indicates whether current_request is active or - * not. + * This is used to remove the plug when tq_disk runs. */ - char head_active; + struct tq_struct plug_tq; /* - * Is meant to protect the queue in the future instead of - * io_request_lock + * various queue flags, see QUEUE_* below + */ + unsigned long queue_flags; + + /* + * protects queue structures from reentrancy */ spinlock_t queue_lock; /* - * Tasks wait here for free request + * queue settings */ - wait_queue_head_t wait_for_request; + unsigned short max_sectors; + unsigned short max_segments; + unsigned short hardsect_size; + unsigned int max_segment_size; + + wait_queue_head_t queue_wait; + + unsigned int hash_valid_counter; }; +#define RQ_INACTIVE (-1) +#define RQ_ACTIVE 1 +#define RQ_SCSI_BUSY 0xffff +#define RQ_SCSI_DONE 0xfffe +#define RQ_SCSI_DISCONNECTING 0xffe0 + +#define QUEUE_FLAG_PLUGGED 0 /* queue is plugged */ +#define QUEUE_FLAG_NOSPLIT 1 /* can process bio over several goes */ + +#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) + +#define blk_mark_plugged(q) set_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) + +#define blk_queue_empty(q) elv_queue_empty(q) + +#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) + +/* + * noop, requests are automagically marked as active/inactive by I/O + * scheduler -- see elv_next_request + */ +#define blk_queue_headactive(q, head_active) + +extern unsigned long blk_max_low_pfn, blk_max_pfn; + +#define __elv_next_request(q) (q)->elevator.elevator_next_req_fn((q)) + +extern inline struct request *elv_next_request(request_queue_t *q) +{ + struct request *rq = __elv_next_request(q); + + if (rq) { + rq->inactive = 0; + wmb(); + + if (rq->bio) + bio_hash_remove(rq->bio); + if (rq->biotail) + bio_hash_remove(rq->biotail); + } + + return rq; +} + +#define BLK_BOUNCE_HIGH (blk_max_low_pfn << PAGE_SHIFT) +#define BLK_BOUNCE_ANY (blk_max_pfn << PAGE_SHIFT) + +#ifdef CONFIG_HIGHMEM + +extern void create_bounce(struct bio **bio_orig, int gfp_mask); + +extern inline void blk_queue_bounce(request_queue_t *q, struct bio **bio) +{ + struct page *page = bio_page(*bio); + + if (page - page->zone->zone_mem_map > q->bounce_pfn) + create_bounce(bio, q->bounce_gfp); +} + +#else /* CONFIG_HIGHMEM */ + +#define blk_queue_bounce(q, bio) do { } while (0) + +#endif /* CONFIG_HIGHMEM */ + +#define rq_for_each_bio(bio, rq) \ + for (bio = (rq)->bio; bio; bio = bio->bi_next) + struct blk_dev_struct { /* * queue_proc has to be atomic @@ -148,68 +224,78 @@ extern struct sec_size * blk_sec[MAX_BLKDEV]; extern struct blk_dev_struct blk_dev[MAX_BLKDEV]; -extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size); +extern void grok_partitions(kdev_t dev, long size); +extern int wipe_partitions(kdev_t dev); extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); -extern void generic_make_request(int rw, struct buffer_head * bh); +extern void generic_make_request(struct bio *bio); extern inline request_queue_t *blk_get_queue(kdev_t dev); extern void blkdev_release_request(struct request *); +extern void blk_attempt_remerge(request_queue_t *, struct request *); /* * Access functions for manipulating queue properties */ -extern void blk_init_queue(request_queue_t *, request_fn_proc *); +extern int blk_init_queue(request_queue_t *, request_fn_proc *, char *); extern void blk_cleanup_queue(request_queue_t *); -extern void blk_queue_headactive(request_queue_t *, int); extern void blk_queue_make_request(request_queue_t *, make_request_fn *); +extern void blk_queue_bounce_limit(request_queue_t *, unsigned long long); +extern void blk_queue_max_sectors(request_queue_t *q, unsigned short); +extern void blk_queue_max_segments(request_queue_t *q, unsigned short); +extern void blk_queue_max_segment_size(request_queue_t *q, unsigned int); +extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short); +extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); extern void generic_unplug_device(void *); extern int * blk_size[MAX_BLKDEV]; extern int * blksize_size[MAX_BLKDEV]; -extern int * hardsect_size[MAX_BLKDEV]; - extern int * max_readahead[MAX_BLKDEV]; -extern int * max_sectors[MAX_BLKDEV]; - -extern int * max_segments[MAX_BLKDEV]; - #define MAX_SEGMENTS 128 #define MAX_SECTORS 255 -#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK) +#define MAX_SEGMENT_SIZE 65536 /* read-ahead in pages.. */ #define MAX_READAHEAD 31 #define MIN_READAHEAD 3 -#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue) +#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) #define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next) #define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev) -#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next) -#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev) +#define blkdev_next_request(req) blkdev_entry_to_request((req)->queuelist.next) +#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queuelist.prev) extern void drive_stat_acct (kdev_t dev, int rw, unsigned long nr_sectors, int new_io); -static inline int get_hardsect_size(kdev_t dev) +extern inline void blk_clear(int major) { + blk_size[major] = NULL; +#if 0 + blk_size_in_bytes[major] = NULL; +#endif + blksize_size[major] = NULL; + max_readahead[major] = NULL; + read_ahead[major] = 0; +} + +extern inline int get_hardsect_size(kdev_t dev) +{ + request_queue_t *q = blk_get_queue(dev); int retval = 512; - int major = MAJOR(dev); - if (hardsect_size[major]) { - int minor = MINOR(dev); - if (hardsect_size[major][minor]) - retval = hardsect_size[major][minor]; - } + if (q && q->hardsect_size) + retval = q->hardsect_size; + return retval; } #define blk_finished_io(nsects) do { } while (0) #define blk_started_io(nsects) do { } while (0) -static inline unsigned int blksize_bits(unsigned int size) +extern inline unsigned int blksize_bits(unsigned int size) { unsigned int bits = 8; do { @@ -219,7 +305,7 @@ return bits; } -static inline unsigned int block_size(kdev_t dev) +extern inline unsigned int block_size(kdev_t dev) { int retval = BLOCK_SIZE; int major = MAJOR(dev); diff -urN linux-2.5.1-pre1/include/linux/bootmem.h linux/include/linux/bootmem.h --- linux-2.5.1-pre1/include/linux/bootmem.h Thu Nov 22 11:47:23 2001 +++ linux/include/linux/bootmem.h Sat Dec 1 00:37:05 2001 @@ -18,6 +18,11 @@ extern unsigned long min_low_pfn; /* + * highest page + */ +extern unsigned long max_pfn; + +/* * node_bootmem_map is a map pointer - the bits represent all physical * memory pages (including holes) on the node. */ diff -urN linux-2.5.1-pre1/include/linux/devfs_fs_kernel.h linux/include/linux/devfs_fs_kernel.h --- linux-2.5.1-pre1/include/linux/devfs_fs_kernel.h Thu Nov 22 11:47:00 2001 +++ linux/include/linux/devfs_fs_kernel.h Sat Dec 1 00:37:05 2001 @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include diff -urN linux-2.5.1-pre1/include/linux/elevator.h linux/include/linux/elevator.h --- linux-2.5.1-pre1/include/linux/elevator.h Thu Feb 15 16:58:34 2001 +++ linux/include/linux/elevator.h Sat Dec 1 00:37:05 2001 @@ -5,13 +5,20 @@ struct list_head *, struct list_head *, int); -typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *, - struct buffer_head *, int, int); +typedef int (elevator_merge_fn) (request_queue_t *, struct request **, + struct list_head *, struct bio *); typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int); typedef void (elevator_merge_req_fn) (struct request *, struct request *); +typedef struct request *(elevator_next_req_fn) (request_queue_t *); + +typedef void (elevator_add_req_fn) (request_queue_t *, struct request *, struct list_head *); + +typedef int (elevator_init_fn) (request_queue_t *, elevator_t *); +typedef void (elevator_exit_fn) (request_queue_t *, elevator_t *); + struct elevator_s { int read_latency; @@ -21,31 +28,46 @@ elevator_merge_cleanup_fn *elevator_merge_cleanup_fn; elevator_merge_req_fn *elevator_merge_req_fn; - unsigned int queue_ID; + elevator_next_req_fn *elevator_next_req_fn; + elevator_add_req_fn *elevator_add_req_fn; + + elevator_init_fn *elevator_init_fn; + elevator_exit_fn *elevator_exit_fn; + + /* + * per-elevator private data + */ + void *elevator_data; + + char queue_name[16]; }; -int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int); void elevator_noop_merge_req(struct request *, struct request *); -int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int); void elevator_linus_merge_req(struct request *, struct request *); +int elv_linus_init(request_queue_t *, elevator_t *); +void elv_linus_exit(request_queue_t *, elevator_t *); +struct request *elv_next_request_fn(request_queue_t *); +void elv_add_request_fn(request_queue_t *, struct request *,struct list_head *); +/* + * use the /proc/iosched interface, all the below is history -> + */ typedef struct blkelv_ioctl_arg_s { int queue_ID; int read_latency; int write_latency; int max_bomb_segments; } blkelv_ioctl_arg_t; - #define BLKELVGET _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t)) #define BLKELVSET _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t)) -extern int blkelvget_ioctl(elevator_t *, blkelv_ioctl_arg_t *); -extern int blkelvset_ioctl(elevator_t *, const blkelv_ioctl_arg_t *); - -extern void elevator_init(elevator_t *, elevator_t); +extern int elevator_init(request_queue_t *, elevator_t *, elevator_t, char *); +extern void elevator_exit(request_queue_t *, elevator_t *); /* * Return values from elevator merger @@ -81,6 +103,24 @@ return latency; } +/* + * will change once we move to a more complex data structure than a simple + * list for pending requests + */ +#define elv_queue_empty(q) list_empty(&(q)->queue_head) + +/* + * elevator private data + */ +struct elv_linus_data { + unsigned long flags; +}; + +#define ELV_DAT(e) ((struct elv_linus_data *)(e)->elevator_data) + +#define ELV_LINUS_BACK_MERGE 1 +#define ELV_LINUS_FRONT_MERGE 2 + #define ELEVATOR_NOOP \ ((elevator_t) { \ 0, /* read_latency */ \ @@ -89,6 +129,10 @@ elevator_noop_merge, /* elevator_merge_fn */ \ elevator_noop_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_noop_merge_req, /* elevator_merge_req_fn */ \ + elv_next_request_fn, \ + elv_add_request_fn, \ + elv_linus_init, \ + elv_linus_exit, \ }) #define ELEVATOR_LINUS \ @@ -99,6 +143,10 @@ elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \ + elv_next_request_fn, \ + elv_add_request_fn, \ + elv_linus_init, \ + elv_linus_exit, \ }) #endif diff -urN linux-2.5.1-pre1/include/linux/fs.h linux/include/linux/fs.h --- linux-2.5.1-pre1/include/linux/fs.h Sat Dec 1 00:36:02 2001 +++ linux/include/linux/fs.h Sat Dec 1 00:37:05 2001 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -74,6 +75,8 @@ #define FMODE_READ 1 #define FMODE_WRITE 2 +#define RW_MASK 1 +#define RWA_MASK 2 #define READ 0 #define WRITE 1 #define READA 2 /* read-ahead - don't block if no resources */ @@ -202,6 +205,7 @@ extern void update_atime (struct inode *); #define UPDATE_ATIME(inode) update_atime (inode) +extern void bio_hash_init(unsigned long); extern void buffer_init(unsigned long); extern void inode_init(unsigned long); extern void mnt_init(unsigned long); @@ -238,28 +242,24 @@ struct buffer_head { /* First cache line: */ struct buffer_head *b_next; /* Hash queue list */ - unsigned long b_blocknr; /* block number */ + sector_t b_blocknr; /* block number */ unsigned short b_size; /* block size */ unsigned short b_list; /* List that this buffer appears */ kdev_t b_dev; /* device (B_FREE = free) */ atomic_t b_count; /* users using this block */ - kdev_t b_rdev; /* Real device */ unsigned long b_state; /* buffer state bitmap (see above) */ unsigned long b_flushtime; /* Time when (dirty) buffer should be written */ struct buffer_head *b_next_free;/* lru/free list linkage */ struct buffer_head *b_prev_free;/* doubly linked list of buffers */ struct buffer_head *b_this_page;/* circular list of buffers in one page */ - struct buffer_head *b_reqnext; /* request queue */ - struct buffer_head **b_pprev; /* doubly linked list of hash-queue */ char * b_data; /* pointer to data block */ struct page *b_page; /* the page this bh is mapped to */ void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ void *b_private; /* reserved for b_end_io */ - unsigned long b_rsector; /* Real buffer location on disk */ wait_queue_head_t b_wait; struct inode * b_inode; @@ -854,6 +854,8 @@ int (*getattr) (struct dentry *, struct iattr *); }; +struct seq_file; + /* * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called * without the big kernel lock held in all filesystems. @@ -905,6 +907,7 @@ */ struct dentry * (*fh_to_dentry)(struct super_block *sb, __u32 *fh, int len, int fhtype, int parent); int (*dentry_to_fh)(struct dentry *, __u32 *fh, int *lenp, int need_parent); + int (*show_options)(struct seq_file *, struct vfsmount *); }; /* Inode state bits.. */ @@ -1170,12 +1173,25 @@ static inline void buffer_IO_error(struct buffer_head * bh) { mark_buffer_clean(bh); + /* - * b_end_io has to clear the BH_Uptodate bitflag in the error case! + * b_end_io has to clear the BH_Uptodate bitflag in the read error + * case, however buffer contents are not necessarily bad if a + * write fails */ - bh->b_end_io(bh, 0); + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); } +/* + * return READ, READA, or WRITE + */ +#define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) + +/* + * return data direction, READ or WRITE + */ +#define bio_data_dir(bio) ((bio)->bi_rw & 1) + extern void buffer_insert_inode_queue(struct buffer_head *, struct inode *); static inline void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) { @@ -1343,10 +1359,11 @@ extern void remove_inode_hash(struct inode *); extern struct file * get_empty_filp(void); extern void file_move(struct file *f, struct list_head *list); -extern struct buffer_head * get_hash_table(kdev_t, int, int); -extern struct buffer_head * getblk(kdev_t, int, int); +extern struct buffer_head * get_hash_table(kdev_t, sector_t, int); +extern struct buffer_head * getblk(kdev_t, sector_t, int); extern void ll_rw_block(int, int, struct buffer_head * bh[]); -extern void submit_bh(int, struct buffer_head *); +extern int submit_bh(int, struct buffer_head *); +extern int submit_bio(int, struct bio *); extern int is_read_only(kdev_t); extern void __brelse(struct buffer_head *); static inline void brelse(struct buffer_head *buf) @@ -1366,9 +1383,9 @@ extern void put_unused_buffer_head(struct buffer_head * bh); extern struct buffer_head * get_unused_buffer_head(int async); -extern int brw_page(int, struct page *, kdev_t, int [], int); +extern int brw_page(int, struct page *, kdev_t, sector_t [], int); -typedef int (get_block_t)(struct inode*,long,struct buffer_head*,int); +typedef int (get_block_t)(struct inode*,sector_t,struct buffer_head*,int); /* Generic buffer handling for block filesystems.. */ extern int try_to_release_page(struct page * page, int gfp_mask); @@ -1384,7 +1401,7 @@ extern int block_commit_write(struct page *page, unsigned from, unsigned to); extern int block_sync_page(struct page *); -int generic_block_bmap(struct address_space *, long, get_block_t *); +sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *); diff -urN linux-2.5.1-pre1/include/linux/genhd.h linux/include/linux/genhd.h --- linux-2.5.1-pre1/include/linux/genhd.h Thu Nov 22 11:47:05 2001 +++ linux/include/linux/genhd.h Sat Dec 1 00:37:05 2001 @@ -86,11 +86,11 @@ }; /* drivers/block/genhd.c */ -extern struct gendisk *gendisk_head; - extern void add_gendisk(struct gendisk *gp); extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(kdev_t dev); +extern unsigned long get_start_sect(kdev_t dev); +extern unsigned long get_nr_sects(kdev_t dev); #endif /* __KERNEL__ */ @@ -244,35 +244,10 @@ extern void devfs_register_partitions (struct gendisk *dev, int minor, int unregister); - - -/* - * FIXME: this should use genhd->minor_shift, but that is slow to look up. - */ static inline unsigned int disk_index (kdev_t dev) { - int major = MAJOR(dev); - int minor = MINOR(dev); - unsigned int index; - - switch (major) { - case DAC960_MAJOR+0: - index = (minor & 0x00f8) >> 3; - break; - case SCSI_DISK0_MAJOR: - index = (minor & 0x00f0) >> 4; - break; - case IDE0_MAJOR: /* same as HD_MAJOR */ - case XT_DISK_MAJOR: - index = (minor & 0x0040) >> 6; - break; - case IDE1_MAJOR: - index = ((minor & 0x0040) >> 6) + 2; - break; - default: - return 0; - } - return index; + struct gendisk *g = get_gendisk(dev); + return g ? (MINOR(dev) >> g->minor_shift) : 0; } #endif diff -urN linux-2.5.1-pre1/include/linux/highmem.h linux/include/linux/highmem.h --- linux-2.5.1-pre1/include/linux/highmem.h Thu Nov 22 11:46:23 2001 +++ linux/include/linux/highmem.h Sat Dec 1 00:37:05 2001 @@ -13,8 +13,7 @@ /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); -extern struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig); - +extern void create_bounce(struct bio **bio_orig, int gfp_mask); static inline char *bh_kmap(struct buffer_head *bh) { @@ -26,6 +25,42 @@ kunmap(bh->b_page); } +/* + * remember to add offset! and never ever reenable interrupts between a + * bio_kmap_irq and bio_kunmap_irq!! + */ +static inline char *bio_kmap_irq(struct bio *bio, unsigned long *flags) +{ + unsigned long addr; + + __save_flags(*flags); + + /* + * could be low + */ + if (!PageHighMem(bio_page(bio))) + return bio_data(bio); + + /* + * it's a highmem page + */ + __cli(); + addr = (unsigned long) kmap_atomic(bio_page(bio), KM_BIO_IRQ); + + if (addr & ~PAGE_MASK) + BUG(); + + return (char *) addr + bio_offset(bio); +} + +static inline void bio_kunmap_irq(char *buffer, unsigned long *flags) +{ + unsigned long ptr = (unsigned long) buffer & PAGE_MASK; + + kunmap_atomic((void *) ptr, KM_BIO_IRQ); + __restore_flags(*flags); +} + #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } @@ -39,6 +74,9 @@ #define bh_kmap(bh) ((bh)->b_data) #define bh_kunmap(bh) do { } while (0) + +#define bio_kmap_irq(bio, flags) (bio_data(bio)) +#define bio_kunmap_irq(buf, flags) do { *(flags) = 0; } while (0) #endif /* CONFIG_HIGHMEM */ diff -urN linux-2.5.1-pre1/include/linux/ide.h linux/include/linux/ide.h --- linux-2.5.1-pre1/include/linux/ide.h Thu Nov 22 11:48:07 2001 +++ linux/include/linux/ide.h Sat Dec 1 00:37:05 2001 @@ -149,6 +149,21 @@ #define DATA_READY (DRQ_STAT) /* + * Our Physical Region Descriptor (PRD) table should be large enough + * to handle the biggest I/O request we are likely to see. Since requests + * can have no more than 256 sectors, and since the typical blocksize is + * two or more sectors, we could get by with a limit of 128 entries here for + * the usual worst case. Most requests seem to include some contiguous blocks, + * further reducing the number of table entries required. + * + * As it turns out though, we must allocate a full 4KB page for this, + * so the two PRD tables (ide0 & ide1) will each get half of that, + * allowing each to have about 256 entries (8 bytes each) from this. + */ +#define PRD_BYTES 8 +#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) + +/* * Some more useful definitions */ #define IDE_MAJOR_NAME "hd" /* the same for all i/f; see also genhd.c */ @@ -223,6 +238,23 @@ #endif /* + * hwif_chipset_t is used to keep track of the specific hardware + * chipset used by each IDE interface, if known. + */ +typedef enum { ide_unknown, ide_generic, ide_pci, + ide_cmd640, ide_dtc2278, ide_ali14xx, + ide_qd65xx, ide_umc8672, ide_ht6560b, + ide_pdc4030, ide_rz1000, ide_trm290, + ide_cmd646, ide_cy82c693, ide_4drives, + ide_pmac, ide_etrax100 +} hwif_chipset_t; + +#define IDE_CHIPSET_PCI_MASK \ + ((1<> (c)) & 1) + + +/* * Structure to hold all information about the location of this port */ typedef struct hw_regs_s { @@ -231,6 +263,7 @@ int dma; /* our dma entry */ ide_ack_intr_t *ack_intr; /* acknowledge interrupt */ void *priv; /* interface specific data */ + hwif_chipset_t chipset; } hw_regs_t; /* @@ -440,22 +473,6 @@ */ typedef int (ide_busproc_t) (struct hwif_s *, int); -/* - * hwif_chipset_t is used to keep track of the specific hardware - * chipset used by each IDE interface, if known. - */ -typedef enum { ide_unknown, ide_generic, ide_pci, - ide_cmd640, ide_dtc2278, ide_ali14xx, - ide_qd65xx, ide_umc8672, ide_ht6560b, - ide_pdc4030, ide_rz1000, ide_trm290, - ide_cmd646, ide_cy82c693, ide_4drives, - ide_pmac, ide_etrax100 -} hwif_chipset_t; - -#define IDE_CHIPSET_PCI_MASK \ - ((1<> (c)) & 1) - #ifdef CONFIG_BLK_DEV_IDEPCI typedef struct ide_pci_devid_s { unsigned short vid; @@ -488,7 +505,6 @@ struct scatterlist *sg_table; /* Scatter-gather list used to build the above */ int sg_nents; /* Current number of entries in it */ int sg_dma_direction; /* dma transfer direction */ - int sg_dma_active; /* is it in use */ struct hwif_s *mate; /* other hwif from same PCI chip */ unsigned long dma_base; /* base addr for dma ports */ unsigned dma_extra; /* extra addr for dma ports */ @@ -507,6 +523,7 @@ unsigned reset : 1; /* reset after probe */ unsigned autodma : 1; /* automatically try to enable DMA at boot */ unsigned udma_four : 1; /* 1=ATA-66 capable, 0=default */ + unsigned highmem : 1; /* can do full 32-bit dma */ byte channel; /* for dual-port chips: 0=primary, 1=secondary */ #ifdef CONFIG_BLK_DEV_IDEPCI struct pci_dev *pci_dev; /* for pci chipsets */ @@ -541,10 +558,12 @@ */ typedef int (ide_expiry_t)(ide_drive_t *); +#define IDE_BUSY 0 +#define IDE_SLEEP 1 + typedef struct hwgroup_s { ide_handler_t *handler;/* irq handler, if active */ - volatile int busy; /* BOOL: protects all fields below */ - int sleeping; /* BOOL: wake us up on timer expiry */ + unsigned long flags; /* BUSY, SLEEPING */ ide_drive_t *drive; /* current drive */ ide_hwif_t *hwif; /* ptr to current hwif in linked-list */ struct request *rq; /* current request */ @@ -711,7 +730,8 @@ #define LOCAL_END_REQUEST /* Don't generate end_request in blk.h */ #include -void ide_end_request(byte uptodate, ide_hwgroup_t *hwgroup); +inline int __ide_end_request(ide_hwgroup_t *, int, int); +int ide_end_request(byte uptodate, ide_hwgroup_t *hwgroup); /* * This is used for (nearly) all data transfers from/to the IDE interface @@ -787,6 +807,11 @@ unsigned long current_capacity (ide_drive_t *drive); /* + * Revalidate (read partition tables) + */ +void ide_revalidate_drive (ide_drive_t *drive); + +/* * Start a reset operation for an IDE interface. * The caller should return immediately after invoking this. */ @@ -814,6 +839,21 @@ } ide_action_t; /* + * temporarily mapping a (possible) highmem bio for PIO transfer + */ +#define ide_rq_offset(rq) (((rq)->hard_cur_sectors - (rq)->current_nr_sectors) << 9) + +extern inline void *ide_map_buffer(struct request *rq, unsigned long *flags) +{ + return bio_kmap_irq(rq->bio, flags) + ide_rq_offset(rq); +} + +extern inline void ide_unmap_buffer(char *buffer, unsigned long *flags) +{ + bio_kunmap_irq(buffer, flags); +} + +/* * This function issues a special IDE device request * onto the request queue. * @@ -960,5 +1000,8 @@ #endif void hwif_unregister (ide_hwif_t *hwif); + +#define DRIVE_LOCK(drive) (&(drive)->queue.queue_lock) +extern spinlock_t ide_lock; #endif /* _IDE_H */ diff -urN linux-2.5.1-pre1/include/linux/iobuf.h linux/include/linux/iobuf.h --- linux-2.5.1-pre1/include/linux/iobuf.h Thu Nov 22 11:46:26 2001 +++ linux/include/linux/iobuf.h Sat Dec 1 00:37:05 2001 @@ -28,7 +28,7 @@ #define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1) #define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2) -/* The main kiobuf struct used for all our IO! */ +/* The main kiobuf struct */ struct kiobuf { @@ -48,8 +48,7 @@ /* Always embed enough struct pages for atomic IO */ struct page * map_array[KIO_STATIC_PAGES]; - struct buffer_head * bh[KIO_MAX_SECTORS]; - unsigned long blocks[KIO_MAX_SECTORS]; + sector_t blocks[KIO_MAX_SECTORS]; /* Dynamic state for IO completion: */ atomic_t io_count; /* IOs still in progress */ @@ -69,7 +68,7 @@ /* fs/iobuf.c */ -void end_kio_request(struct kiobuf *, int); +int end_kio_request(struct kiobuf *, int); void simple_wakeup_kiobuf(struct kiobuf *); int alloc_kiovec(int nr, struct kiobuf **); void free_kiovec(int nr, struct kiobuf **); @@ -81,6 +80,9 @@ /* fs/buffer.c */ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - kdev_t dev, unsigned long b[], int size); + kdev_t dev, sector_t [], int size); + +/* fs/bio.c */ +void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, sector_t block); #endif /* __LINUX_IOBUF_H */ diff -urN linux-2.5.1-pre1/include/linux/iso_fs.h linux/include/linux/iso_fs.h --- linux-2.5.1-pre1/include/linux/iso_fs.h Thu Nov 22 11:47:11 2001 +++ linux/include/linux/iso_fs.h Sat Dec 1 00:37:05 2001 @@ -220,7 +220,7 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *); extern struct buffer_head *isofs_bread(struct inode *, unsigned int, unsigned int); -extern int isofs_get_blocks(struct inode *, long, struct buffer_head **, unsigned long); +extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); extern struct inode_operations isofs_dir_inode_operations; extern struct file_operations isofs_dir_operations; diff -urN linux-2.5.1-pre1/include/linux/loop.h linux/include/linux/loop.h --- linux-2.5.1-pre1/include/linux/loop.h Mon Sep 17 13:16:30 2001 +++ linux/include/linux/loop.h Sat Dec 1 00:37:05 2001 @@ -49,8 +49,8 @@ int old_gfp_mask; spinlock_t lo_lock; - struct buffer_head *lo_bh; - struct buffer_head *lo_bhtail; + struct bio *lo_bio; + struct bio *lo_biotail; int lo_state; struct semaphore lo_sem; struct semaphore lo_ctl_mutex; diff -urN linux-2.5.1-pre1/include/linux/lvm.h linux/include/linux/lvm.h --- linux-2.5.1-pre1/include/linux/lvm.h Sun Nov 11 10:09:32 2001 +++ linux/include/linux/lvm.h Sat Dec 1 00:37:05 2001 @@ -468,6 +468,12 @@ } lv_bmap_t; /* + * fixme... + */ +#define LVM_MAX_ATOMIC_IO 512 +#define LVM_MAX_SECTORS (LVM_MAX_ATOMIC_IO * 2) + +/* * Structure Logical Volume (LV) Version 3 */ @@ -505,6 +511,7 @@ uint lv_snapshot_minor; #ifdef __KERNEL__ struct kiobuf *lv_iobuf; + sector_t blocks[LVM_MAX_SECTORS]; struct kiobuf *lv_COW_table_iobuf; struct rw_semaphore lv_lock; struct list_head *lv_snapshot_hash_table; diff -urN linux-2.5.1-pre1/include/linux/nbd.h linux/include/linux/nbd.h --- linux-2.5.1-pre1/include/linux/nbd.h Mon Oct 15 19:29:05 2001 +++ linux/include/linux/nbd.h Sat Dec 1 00:37:05 2001 @@ -37,24 +37,25 @@ static void nbd_end_request(struct request *req) { - struct buffer_head *bh; + struct bio *bio; unsigned nsect; unsigned long flags; int uptodate = (req->errors == 0) ? 1 : 0; + request_queue_t *q = req->q; #ifdef PARANOIA requests_out++; #endif - spin_lock_irqsave(&io_request_lock, flags); - while((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; + spin_lock_irqsave(&q->queue_lock, flags); + while((bio = req->bio) != NULL) { + nsect = bio_sectors(bio); blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); + req->bio = bio->bi_next; + bio->bi_next = NULL; + bio_endio(bio, uptodate, nsect); } blkdev_release_request(req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } #define MAX_NBD 128 diff -urN linux-2.5.1-pre1/include/linux/raid/md_k.h linux/include/linux/raid/md_k.h --- linux-2.5.1-pre1/include/linux/raid/md_k.h Mon Nov 12 09:51:56 2001 +++ linux/include/linux/raid/md_k.h Sat Dec 1 00:37:05 2001 @@ -220,7 +220,7 @@ struct mdk_personality_s { char *name; - int (*make_request)(mddev_t *mddev, int rw, struct buffer_head * bh); + int (*make_request)(mddev_t *mddev, int rw, struct bio *bio); int (*run)(mddev_t *mddev); int (*stop)(mddev_t *mddev); int (*status)(char *page, mddev_t *mddev); diff -urN linux-2.5.1-pre1/include/linux/reiserfs_fs.h linux/include/linux/reiserfs_fs.h --- linux-2.5.1-pre1/include/linux/reiserfs_fs.h Fri Nov 9 14:18:25 2001 +++ linux/include/linux/reiserfs_fs.h Sat Dec 1 00:37:05 2001 @@ -1856,7 +1856,7 @@ loff_t offset, int type, int length, int entry_count); /*void store_key (struct key * key); void forget_key (struct key * key);*/ -int reiserfs_get_block (struct inode * inode, long block, +int reiserfs_get_block (struct inode * inode, sector_t block, struct buffer_head * bh_result, int create); struct inode * reiserfs_iget (struct super_block * s, const struct cpu_key * key); diff -urN linux-2.5.1-pre1/include/linux/slab.h linux/include/linux/slab.h --- linux-2.5.1-pre1/include/linux/slab.h Thu Nov 22 11:46:20 2001 +++ linux/include/linux/slab.h Sat Dec 1 00:37:05 2001 @@ -38,6 +38,7 @@ #define SLAB_NO_REAP 0x00001000UL /* never reap from the cache */ #define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */ #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */ +#define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */ /* flags passed to a constructor func */ #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */ @@ -76,6 +77,7 @@ extern kmem_cache_t *bh_cachep; extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sigact_cachep; +extern kmem_cache_t *bio_cachep; #endif /* __KERNEL__ */ diff -urN linux-2.5.1-pre1/include/linux/types.h linux/include/linux/types.h --- linux-2.5.1-pre1/include/linux/types.h Thu Nov 22 11:46:18 2001 +++ linux/include/linux/types.h Sat Dec 1 00:37:05 2001 @@ -113,6 +113,17 @@ typedef __s64 int64_t; #endif +/* + * transition to 64-bit sector_t, possibly making it an option... + */ +#undef BLK_64BIT_SECTOR + +#ifdef BLK_64BIT_SECTOR +typedef u64 sector_t; +#else +typedef unsigned long sector_t; +#endif + #endif /* __KERNEL_STRICT_NAMES */ /* diff -urN linux-2.5.1-pre1/init/main.c linux/init/main.c --- linux-2.5.1-pre1/init/main.c Fri Nov 9 14:15:00 2001 +++ linux/init/main.c Sat Dec 1 00:37:05 2001 @@ -591,6 +591,8 @@ #endif mem_init(); kmem_cache_sizes_init(); + pgtable_cache_init(); + mempages = num_physpages; fork_init(mempages); @@ -598,6 +600,7 @@ vfs_caches_init(mempages); buffer_init(mempages); page_cache_init(mempages); + bio_hash_init(mempages); #if defined(CONFIG_ARCH_S390) ccwcache_init(); #endif diff -urN linux-2.5.1-pre1/kernel/exec_domain.c linux/kernel/exec_domain.c --- linux-2.5.1-pre1/kernel/exec_domain.c Sun Nov 11 10:20:21 2001 +++ linux/kernel/exec_domain.c Sat Dec 1 00:37:05 2001 @@ -102,7 +102,7 @@ } #endif - ep = NULL; + ep = &default_exec_domain; out: read_unlock(&exec_domains_lock); return (ep); @@ -162,8 +162,6 @@ struct exec_domain *ep, *oep; ep = lookup_exec_domain(personality); - if (ep == NULL) - return -EINVAL; if (ep == current->exec_domain) { current->personality = personality; return 0; diff -urN linux-2.5.1-pre1/kernel/ksyms.c linux/kernel/ksyms.c --- linux-2.5.1-pre1/kernel/ksyms.c Wed Nov 21 14:07:25 2001 +++ linux/kernel/ksyms.c Sat Dec 1 00:37:05 2001 @@ -121,6 +121,8 @@ EXPORT_SYMBOL(kunmap_high); EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(create_bounce); +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); #endif /* filesystem internal functions */ @@ -290,7 +292,6 @@ /* block device driver support */ EXPORT_SYMBOL(blksize_size); -EXPORT_SYMBOL(hardsect_size); EXPORT_SYMBOL(blk_size); EXPORT_SYMBOL(blk_dev); EXPORT_SYMBOL(is_read_only); @@ -307,8 +308,8 @@ EXPORT_SYMBOL(tq_disk); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(refile_buffer); -EXPORT_SYMBOL(max_sectors); EXPORT_SYMBOL(max_readahead); +EXPORT_SYMBOL(wipe_partitions); /* tty routines */ EXPORT_SYMBOL(tty_hangup); diff -urN linux-2.5.1-pre1/mm/bootmem.c linux/mm/bootmem.c --- linux-2.5.1-pre1/mm/bootmem.c Tue Sep 18 14:10:43 2001 +++ linux/mm/bootmem.c Sat Dec 1 00:37:05 2001 @@ -25,6 +25,7 @@ */ unsigned long max_low_pfn; unsigned long min_low_pfn; +unsigned long max_pfn; /* return the number of _pages_ that will be allocated for the boot bitmap */ unsigned long __init bootmem_bootmap_pages (unsigned long pages) diff -urN linux-2.5.1-pre1/mm/filemap.c linux/mm/filemap.c --- linux-2.5.1-pre1/mm/filemap.c Wed Nov 21 14:07:25 2001 +++ linux/mm/filemap.c Sat Dec 1 00:37:05 2001 @@ -935,7 +935,6 @@ spin_unlock(&pagecache_lock); if (!page) { struct page *newpage = alloc_page(gfp_mask); - page = ERR_PTR(-ENOMEM); if (newpage) { spin_lock(&pagecache_lock); page = __find_lock_page_helper(mapping, index, *hash); diff -urN linux-2.5.1-pre1/mm/highmem.c linux/mm/highmem.c --- linux-2.5.1-pre1/mm/highmem.c Mon Oct 22 15:01:57 2001 +++ linux/mm/highmem.c Sat Dec 1 00:37:05 2001 @@ -21,6 +21,9 @@ #include #include #include +#include + +#include /* * Virtual_count is not a pure "count". @@ -186,7 +189,7 @@ wake_up(&pkmap_map_wait); } -#define POOL_SIZE 32 +#define POOL_SIZE 64 /* * This lock gets no contention at all, normally. @@ -200,77 +203,41 @@ static LIST_HEAD(emergency_bhs); /* - * Simple bounce buffer support for highmem pages. - * This will be moved to the block layer in 2.5. + * Simple bounce buffer support for highmem pages. Depending on the + * queue gfp mask set, *to may or may not be a highmem page. kmap it + * always, it will do the Right Thing */ - -static inline void copy_from_high_bh (struct buffer_head *to, - struct buffer_head *from) +static inline void copy_from_high_bio(struct bio *to, struct bio *from) { - struct page *p_from; - char *vfrom; + unsigned char *vto, *vfrom; + + if (unlikely(in_interrupt())) + BUG(); + + vto = bio_kmap(to); + vfrom = bio_kmap(from); - p_from = from->b_page; + memcpy(vto, vfrom + bio_offset(from), bio_size(to)); - vfrom = kmap_atomic(p_from, KM_USER0); - memcpy(to->b_data, vfrom + bh_offset(from), to->b_size); - kunmap_atomic(vfrom, KM_USER0); + bio_kunmap(from); + bio_kunmap(to); } -static inline void copy_to_high_bh_irq (struct buffer_head *to, - struct buffer_head *from) +static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from) { - struct page *p_to; - char *vto; + unsigned char *vto, *vfrom; unsigned long flags; - p_to = to->b_page; __save_flags(flags); __cli(); - vto = kmap_atomic(p_to, KM_BOUNCE_READ); - memcpy(vto + bh_offset(to), from->b_data, to->b_size); + vto = kmap_atomic(bio_page(to), KM_BOUNCE_READ); + vfrom = kmap_atomic(bio_page(from), KM_BOUNCE_READ); + memcpy(vto + bio_offset(to), vfrom, bio_size(to)); + kunmap_atomic(vfrom, KM_BOUNCE_READ); kunmap_atomic(vto, KM_BOUNCE_READ); __restore_flags(flags); } -static inline void bounce_end_io (struct buffer_head *bh, int uptodate) -{ - struct page *page; - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); - unsigned long flags; - - bh_orig->b_end_io(bh_orig, uptodate); - - page = bh->b_page; - - spin_lock_irqsave(&emergency_lock, flags); - if (nr_emergency_pages >= POOL_SIZE) - __free_page(page); - else { - /* - * We are abusing page->list to manage - * the highmem emergency pool: - */ - list_add(&page->list, &emergency_pages); - nr_emergency_pages++; - } - - if (nr_emergency_bhs >= POOL_SIZE) { -#ifdef HIGHMEM_DEBUG - /* Don't clobber the constructed slab cache */ - init_waitqueue_head(&bh->b_wait); -#endif - kmem_cache_free(bh_cachep, bh); - } else { - /* - * Ditto in the bh case, here we abuse b_inode_buffers: - */ - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } - spin_unlock_irqrestore(&emergency_lock, flags); -} - static __init int init_emergency_pool(void) { struct sysinfo i; @@ -290,44 +257,63 @@ list_add(&page->list, &emergency_pages); nr_emergency_pages++; } - while (nr_emergency_bhs < POOL_SIZE) { - struct buffer_head * bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC); - if (!bh) { - printk("couldn't refill highmem emergency bhs"); - break; - } - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } spin_unlock_irq(&emergency_lock); - printk("allocated %d pages and %d bhs reserved for the highmem bounces\n", - nr_emergency_pages, nr_emergency_bhs); - + printk("allocated %d pages reserved for the highmem bounces\n", nr_emergency_pages); return 0; } __initcall(init_emergency_pool); -static void bounce_end_io_write (struct buffer_head *bh, int uptodate) +static inline void bounce_end_io (struct bio *bio, int nr_sectors) +{ + struct bio *bio_orig = bio->bi_private; + struct page *page = bio_page(bio); + unsigned long flags; + + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + set_bit(BIO_UPTODATE, bio_orig->bi_flags); + + bio_orig->bi_end_io(bio_orig, nr_sectors); + + spin_lock_irqsave(&emergency_lock, flags); + if (nr_emergency_pages >= POOL_SIZE) { + spin_unlock_irqrestore(&emergency_lock, flags); + __free_page(page); + } else { + /* + * We are abusing page->list to manage + * the highmem emergency pool: + */ + list_add(&page->list, &emergency_pages); + nr_emergency_pages++; + spin_unlock_irqrestore(&emergency_lock, flags); + } + + bio_hash_remove(bio); + bio_put(bio); +} + +static void bounce_end_io_write (struct bio *bio, int nr_sectors) { - bounce_end_io(bh, uptodate); + bounce_end_io(bio, nr_sectors); } -static void bounce_end_io_read (struct buffer_head *bh, int uptodate) +static void bounce_end_io_read (struct bio *bio, int nr_sectors) { - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); + struct bio *bio_orig = bio->bi_private; + + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + copy_to_high_bio_irq(bio_orig, bio); - if (uptodate) - copy_to_high_bh_irq(bh_orig, bh); - bounce_end_io(bh, uptodate); + bounce_end_io(bio, nr_sectors); } -struct page *alloc_bounce_page (void) +struct page *alloc_bounce_page(int gfp_mask) { struct list_head *tmp; struct page *page; - page = alloc_page(GFP_NOHIGHIO); + page = alloc_page(gfp_mask); if (page) return page; /* @@ -360,91 +346,35 @@ goto repeat_alloc; } -struct buffer_head *alloc_bounce_bh (void) +void create_bounce(struct bio **bio_orig, int gfp_mask) { - struct list_head *tmp; - struct buffer_head *bh; + struct page *page; + struct bio *bio; - bh = kmem_cache_alloc(bh_cachep, SLAB_NOHIGHIO); - if (bh) - return bh; - /* - * No luck. First, kick the VM so it doesnt idle around while - * we are using up our emergency rations. - */ - wakeup_bdflush(); + bio = bio_alloc(GFP_NOHIGHIO, 1); -repeat_alloc: /* - * Try to allocate from the emergency pool. + * wasteful for 1kB fs, but machines with lots of ram are less likely + * to have 1kB fs for anything that needs to go fast. so all things + * considered, it should be ok. */ - tmp = &emergency_bhs; - spin_lock_irq(&emergency_lock); - if (!list_empty(tmp)) { - bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers); - list_del(tmp->next); - nr_emergency_bhs--; - } - spin_unlock_irq(&emergency_lock); - if (bh) - return bh; + page = alloc_bounce_page(gfp_mask); - /* we need to wait I/O completion */ - run_task_queue(&tq_disk); + bio->bi_dev = (*bio_orig)->bi_dev; + bio->bi_sector = (*bio_orig)->bi_sector; + bio->bi_rw = (*bio_orig)->bi_rw; - current->policy |= SCHED_YIELD; - __set_current_state(TASK_RUNNING); - schedule(); - goto repeat_alloc; -} + bio->bi_io_vec->bvl_vec[0].bv_page = page; + bio->bi_io_vec->bvl_vec[0].bv_len = bio_size(*bio_orig); + bio->bi_io_vec->bvl_vec[0].bv_offset = 0; -struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig) -{ - struct page *page; - struct buffer_head *bh; + bio->bi_private = *bio_orig; - if (!PageHighMem(bh_orig->b_page)) - return bh_orig; - - bh = alloc_bounce_bh(); - /* - * This is wasteful for 1k buffers, but this is a stopgap measure - * and we are being ineffective anyway. This approach simplifies - * things immensly. On boxes with more than 4GB RAM this should - * not be an issue anyway. - */ - page = alloc_bounce_page(); - - set_bh_page(bh, page, 0); - - bh->b_next = NULL; - bh->b_blocknr = bh_orig->b_blocknr; - bh->b_size = bh_orig->b_size; - bh->b_list = -1; - bh->b_dev = bh_orig->b_dev; - bh->b_count = bh_orig->b_count; - bh->b_rdev = bh_orig->b_rdev; - bh->b_state = bh_orig->b_state; -#ifdef HIGHMEM_DEBUG - bh->b_flushtime = jiffies; - bh->b_next_free = NULL; - bh->b_prev_free = NULL; - /* bh->b_this_page */ - bh->b_reqnext = NULL; - bh->b_pprev = NULL; -#endif - /* bh->b_page */ - if (rw == WRITE) { - bh->b_end_io = bounce_end_io_write; - copy_from_high_bh(bh, bh_orig); + if (bio_rw(bio) == WRITE) { + bio->bi_end_io = bounce_end_io_write; + copy_from_high_bio(bio, *bio_orig); } else - bh->b_end_io = bounce_end_io_read; - bh->b_private = (void *)bh_orig; - bh->b_rsector = bh_orig->b_rsector; -#ifdef HIGHMEM_DEBUG - memset(&bh->b_wait, -1, sizeof(bh->b_wait)); -#endif + bio->bi_end_io = bounce_end_io_read; - return bh; + *bio_orig = bio; } - diff -urN linux-2.5.1-pre1/mm/page_io.c linux/mm/page_io.c --- linux-2.5.1-pre1/mm/page_io.c Mon Nov 19 15:19:42 2001 +++ linux/mm/page_io.c Sat Dec 1 00:37:05 2001 @@ -36,7 +36,7 @@ static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page) { unsigned long offset; - int zones[PAGE_SIZE/512]; + sector_t zones[PAGE_SIZE/512]; int zones_used; kdev_t dev = 0; int block_size; diff -urN linux-2.5.1-pre1/mm/slab.c linux/mm/slab.c --- linux-2.5.1-pre1/mm/slab.c Tue Sep 18 14:16:26 2001 +++ linux/mm/slab.c Sat Dec 1 00:37:05 2001 @@ -109,9 +109,11 @@ #if DEBUG # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ SLAB_POISON | SLAB_HWCACHE_ALIGN | \ - SLAB_NO_REAP | SLAB_CACHE_DMA) + SLAB_NO_REAP | SLAB_CACHE_DMA | \ + SLAB_MUST_HWCACHE_ALIGN) #else -# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | SLAB_CACHE_DMA) +# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ + SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN) #endif /* @@ -649,7 +651,7 @@ flags &= ~SLAB_POISON; } #if FORCED_DEBUG - if (size < (PAGE_SIZE>>3)) + if ((size < (PAGE_SIZE>>3)) && !(flags & SLAB_MUST_HWCACHE_ALIGN)) /* * do not red zone large object, causes severe * fragmentation.