diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/CREDITS linux/CREDITS --- /opt/kernel/linux-2.4.14-pre6/CREDITS Wed Oct 24 12:49:57 2001 +++ linux/CREDITS Mon Oct 22 11:14:23 2001 @@ -140,9 +140,11 @@ D: VIA MVP-3/TX Pro III chipset IDE N: Jens Axboe -E: axboe@image.dk -D: Linux CD-ROM maintainer -D: jiffies wrap fixes + schedule timeouts depending on HZ == 100 +E: axboe@suse.de +D: Linux CD-ROM maintainer, DVD support +D: elevator + block layer rewrites +D: highmem I/O support +D: misc hacking on IDE, SCSI, block drivers, etc S: Peter Bangs Vej 258, 2TH S: 2500 Valby S: Denmark diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/Documentation/Configure.help linux/Documentation/Configure.help --- /opt/kernel/linux-2.4.14-pre6/Documentation/Configure.help Thu Nov 1 14:25:31 2001 +++ linux/Documentation/Configure.help Wed Oct 31 10:56:12 2001 @@ -5724,17 +5724,6 @@ Documentation/scsi.txt. The module will be called sg.o. If unsure, say N. -Debug new queueing code for SCSI -CONFIG_SCSI_DEBUG_QUEUES - This option turns on a lot of additional consistency checking for - the new queueing code. This will adversely affect performance, but - it is likely that bugs will be caught sooner if this is turned on. - This will typically cause the kernel to panic if an error is - detected, but it would have probably crashed if the panic weren't - there. Comments/questions/problems to linux-scsi mailing list - please. See http://www.andante.org/scsi_queue.html for more - up-to-date information. - Probe all LUNs on each SCSI device CONFIG_SCSI_MULTI_LUN If you have a SCSI device that supports more than one LUN (Logical diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/Documentation/DocBook/Makefile linux/Documentation/DocBook/Makefile --- /opt/kernel/linux-2.4.14-pre6/Documentation/DocBook/Makefile Wed Oct 31 09:39:12 2001 +++ linux/Documentation/DocBook/Makefile Thu Oct 25 13:08:49 2001 @@ -108,6 +108,7 @@ $(TOPDIR)/drivers/video/modedb.c \ $(TOPDIR)/fs/devfs/base.c \ $(TOPDIR)/fs/locks.c \ + $(TOPDIR)/fs/bio.c \ $(TOPDIR)/include/asm-i386/bitops.h \ $(TOPDIR)/kernel/pm.c \ $(TOPDIR)/kernel/ksyms.c \ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/Makefile linux/Makefile --- /opt/kernel/linux-2.4.14-pre6/Makefile Thu Nov 1 14:25:31 2001 +++ linux/Makefile Wed Oct 31 10:55:49 2001 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 14 -EXTRAVERSION =-pre6 +EXTRAVERSION =-pre6-bio15 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/arch/i386/kernel/setup.c linux/arch/i386/kernel/setup.c --- /opt/kernel/linux-2.4.14-pre6/arch/i386/kernel/setup.c Wed Oct 31 09:39:12 2001 +++ linux/arch/i386/kernel/setup.c Tue Oct 30 09:48:43 2001 @@ -156,6 +156,8 @@ static int disable_x86_serial_nr __initdata = 1; static int disable_x86_fxsr __initdata = 0; +unsigned long max_pfn; + /* * This is set up by the setup-routine at boot-time */ @@ -771,7 +773,7 @@ void __init setup_arch(char **cmdline_p) { unsigned long bootmap_size, low_mem_size; - unsigned long start_pfn, max_pfn, max_low_pfn; + unsigned long start_pfn, max_low_pfn; int i; #ifdef CONFIG_VISWS diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/DAC960.c linux/drivers/block/DAC960.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/DAC960.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/DAC960.c Tue Oct 30 09:48:43 2001 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -306,9 +307,9 @@ static void DAC960_WaitForCommand(DAC960_Controller_T *Controller) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&Controller->RequestQueue->queue_lock); __wait_event(Controller->CommandWaitQueue, Controller->FreeCommands); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&Controller->RequestQueue->queue_lock); } @@ -1922,76 +1923,6 @@ /* - DAC960_BackMergeFunction is the Back Merge Function for the DAC960 driver. -*/ - -static int DAC960_BackMergeFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - BufferHeader_T *BufferHeader, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - if (Request->bhtail->b_data + Request->bhtail->b_size == BufferHeader->b_data) - return true; - if (Request->nr_segments < MaxSegments && - Request->nr_segments < Controller->DriverScatterGatherLimit) - { - Request->nr_segments++; - return true; - } - return false; -} - - -/* - DAC960_FrontMergeFunction is the Front Merge Function for the DAC960 driver. -*/ - -static int DAC960_FrontMergeFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - BufferHeader_T *BufferHeader, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - if (BufferHeader->b_data + BufferHeader->b_size == Request->bh->b_data) - return true; - if (Request->nr_segments < MaxSegments && - Request->nr_segments < Controller->DriverScatterGatherLimit) - { - Request->nr_segments++; - return true; - } - return false; -} - - -/* - DAC960_MergeRequestsFunction is the Merge Requests Function for the - DAC960 driver. -*/ - -static int DAC960_MergeRequestsFunction(RequestQueue_T *RequestQueue, - IO_Request_T *Request, - IO_Request_T *NextRequest, - int MaxSegments) -{ - DAC960_Controller_T *Controller = - (DAC960_Controller_T *) RequestQueue->queuedata; - int TotalSegments = Request->nr_segments + NextRequest->nr_segments; - if (Request->bhtail->b_data + Request->bhtail->b_size - == NextRequest->bh->b_data) - TotalSegments--; - if (TotalSegments > MaxSegments || - TotalSegments > Controller->DriverScatterGatherLimit) - return false; - Request->nr_segments = TotalSegments; - return true; -} - - -/* DAC960_RegisterBlockDevice registers the Block Device structures associated with Controller. */ @@ -2015,15 +1946,15 @@ Initialize the I/O Request Queue. */ RequestQueue = BLK_DEFAULT_QUEUE(MajorNumber); - blk_init_queue(RequestQueue, DAC960_RequestFunction); + blk_init_queue(RequestQueue, DAC960_RequestFunction, "dac960"); blk_queue_headactive(RequestQueue, 0); - RequestQueue->back_merge_fn = DAC960_BackMergeFunction; - RequestQueue->front_merge_fn = DAC960_FrontMergeFunction; - RequestQueue->merge_requests_fn = DAC960_MergeRequestsFunction; RequestQueue->queuedata = Controller; + RequestQueue->max_segments = Controller->DriverScatterGatherLimit; + RequestQueue->max_sectors = Controller->MaxBlocksPerCommand; Controller->RequestQueue = RequestQueue; /* - Initialize the Max Sectors per Request array. + Initialize the Disk Partitions array, Partition Sizes array, Block Sizes + array, and Max Sectors per Request array. */ for (MinorNumber = 0; MinorNumber < DAC960_MinorCount; MinorNumber++) Controller->MaxSectorsPerRequest[MinorNumber] = @@ -2031,7 +1962,6 @@ Controller->GenericDiskInfo.part = Controller->DiskPartitions; Controller->GenericDiskInfo.sizes = Controller->PartitionSizes; blksize_size[MajorNumber] = Controller->BlockSizes; - max_sectors[MajorNumber] = Controller->MaxSectorsPerRequest; /* Initialize Read Ahead to 128 sectors. */ @@ -2080,9 +2010,7 @@ */ Controller->GenericDiskInfo.part = NULL; Controller->GenericDiskInfo.sizes = NULL; - blk_size[MajorNumber] = NULL; - blksize_size[MajorNumber] = NULL; - max_sectors[MajorNumber] = NULL; + blk_clear(MajorNumber); /* Remove the Generic Disk Information structure from the list. */ @@ -2813,23 +2741,24 @@ CommandMailbox->Type5.ScatterGatherCount = Command->SegmentCount; while (BufferHeader != NULL) { - if (BufferHeader->b_data == LastDataEndPointer) + if (bio_data(BufferHeader) == LastDataEndPointer) { ScatterGatherList[SegmentNumber-1].SegmentByteCount += - BufferHeader->b_size; - LastDataEndPointer += BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer += bio_size(BufferHeader); } else { ScatterGatherList[SegmentNumber].SegmentDataPointer = - Virtual_to_Bus32(BufferHeader->b_data); + Virtual_to_Bus32(bio_data(BufferHeader)); ScatterGatherList[SegmentNumber].SegmentByteCount = - BufferHeader->b_size; - LastDataEndPointer = BufferHeader->b_data + BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer = bio_data(BufferHeader) + + bio_size(BufferHeader); if (SegmentNumber++ > Controller->DriverScatterGatherLimit) panic("DAC960: Scatter/Gather Segment Overflow\n"); } - BufferHeader = BufferHeader->b_reqnext; + BufferHeader = BufferHeader->bi_next; } if (SegmentNumber != Command->SegmentCount) panic("DAC960: SegmentNumber != SegmentCount\n"); @@ -2903,23 +2832,24 @@ .ScatterGatherSegments; while (BufferHeader != NULL) { - if (BufferHeader->b_data == LastDataEndPointer) + if (bio_data(BufferHeader) == LastDataEndPointer) { ScatterGatherList[SegmentNumber-1].SegmentByteCount += - BufferHeader->b_size; - LastDataEndPointer += BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer += bio_size(BufferHeader); } else { ScatterGatherList[SegmentNumber].SegmentDataPointer = - Virtual_to_Bus64(BufferHeader->b_data); + Virtual_to_Bus64(bio_data(BufferHeader)); ScatterGatherList[SegmentNumber].SegmentByteCount = - BufferHeader->b_size; - LastDataEndPointer = BufferHeader->b_data + BufferHeader->b_size; + bio_size(BufferHeader); + LastDataEndPointer = bio_data(BufferHeader) + + bio_size(BufferHeader); if (SegmentNumber++ > Controller->DriverScatterGatherLimit) panic("DAC960: Scatter/Gather Segment Overflow\n"); } - BufferHeader = BufferHeader->b_reqnext; + BufferHeader = BufferHeader->bi_next; } if (SegmentNumber != Command->SegmentCount) panic("DAC960: SegmentNumber != SegmentCount\n"); @@ -2947,7 +2877,7 @@ while (true) { if (list_empty(RequestQueueHead)) return false; - Request = blkdev_entry_next_request(RequestQueueHead); + Request = elv_next_request(RequestQueue); Command = DAC960_AllocateCommand(Controller); if (Command != NULL) break; if (!WaitForCommand) return false; @@ -2958,12 +2888,10 @@ else Command->CommandType = DAC960_WriteCommand; Command->Completion = Request->waiting; Command->LogicalDriveNumber = DAC960_LogicalDriveNumber(Request->rq_dev); - Command->BlockNumber = - Request->sector - + Controller->GenericDiskInfo.part[MINOR(Request->rq_dev)].start_sect; + Command->BlockNumber = Request->sector; Command->BlockCount = Request->nr_sectors; Command->SegmentCount = Request->nr_segments; - Command->BufferHeader = Request->bh; + Command->BufferHeader = Request->bio; Command->RequestBuffer = Request->buffer; blkdev_dequeue_request(Request); blkdev_release_request(Request); @@ -3016,8 +2944,10 @@ static inline void DAC960_ProcessCompletedBuffer(BufferHeader_T *BufferHeader, boolean SuccessfulIO) { - blk_finished_io(BufferHeader->b_size >> 9); - BufferHeader->b_end_io(BufferHeader, SuccessfulIO); + if (SuccessfulIO) + set_bit(BIO_UPTODATE, &BufferHeader->bi_flags); + blk_finished_io(bio_sectors(BufferHeader)); + BufferHeader->bi_end_io(BufferHeader); } @@ -3071,13 +3001,13 @@ Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, Command->BlockNumber, Command->BlockNumber + Command->BlockCount - 1); - if (DAC960_PartitionNumber(Command->BufferHeader->b_rdev) > 0) + if (DAC960_PartitionNumber(Command->BufferHeader->bi_dev) > 0) DAC960_Error(" /dev/rd/c%dd%dp%d: relative blocks %u..%u\n", Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, - DAC960_PartitionNumber(Command->BufferHeader->b_rdev), - Command->BufferHeader->b_rsector, - Command->BufferHeader->b_rsector + Command->BlockCount - 1); + DAC960_PartitionNumber(Command->BufferHeader->bi_dev), + Command->BufferHeader->bi_sector, + Command->BufferHeader->bi_sector + Command->BlockCount - 1); } @@ -3104,8 +3034,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, true); BufferHeader = NextBufferHeader; } @@ -3119,7 +3049,7 @@ else if ((CommandStatus == DAC960_V1_IrrecoverableDataError || CommandStatus == DAC960_V1_BadDataEncountered) && BufferHeader != NULL && - BufferHeader->b_reqnext != NULL) + BufferHeader->bi_next != NULL) { DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; @@ -3133,10 +3063,10 @@ Command->CommandType = DAC960_WriteRetryCommand; CommandMailbox->Type5.CommandOpcode = DAC960_V1_Write; } - Command->BlockCount = BufferHeader->b_size >> DAC960_BlockSizeBits; + Command->BlockCount = bio_size(BufferHeader) >> DAC960_BlockSizeBits; CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; CommandMailbox->Type5.BusAddress = - Virtual_to_Bus32(BufferHeader->b_data); + Virtual_to_Bus32(bio_data(BufferHeader)); DAC960_QueueCommand(Command); return; } @@ -3149,8 +3079,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, false); BufferHeader = NextBufferHeader; } @@ -3164,8 +3094,8 @@ else if (CommandType == DAC960_ReadRetryCommand || CommandType == DAC960_WriteRetryCommand) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; /* Perform completion processing for this single buffer. */ @@ -3182,14 +3112,14 @@ DAC960_V1_CommandMailbox_T *CommandMailbox = &Command->V1.CommandMailbox; Command->BlockNumber += - BufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(BufferHeader) >> DAC960_BlockSizeBits; Command->BlockCount = - NextBufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(NextBufferHeader) >> DAC960_BlockSizeBits; Command->BufferHeader = NextBufferHeader; CommandMailbox->Type5.LD.TransferLength = Command->BlockCount; CommandMailbox->Type5.LogicalBlockAddress = Command->BlockNumber; CommandMailbox->Type5.BusAddress = - Virtual_to_Bus32(NextBufferHeader->b_data); + Virtual_to_Bus32(bio_data(NextBufferHeader)); DAC960_QueueCommand(Command); return; } @@ -3935,13 +3865,13 @@ Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, Command->BlockNumber, Command->BlockNumber + Command->BlockCount - 1); - if (DAC960_PartitionNumber(Command->BufferHeader->b_rdev) > 0) + if (DAC960_PartitionNumber(Command->BufferHeader->bi_dev) > 0) DAC960_Error(" /dev/rd/c%dd%dp%d: relative blocks %u..%u\n", Controller, Controller->ControllerNumber, Command->LogicalDriveNumber, - DAC960_PartitionNumber(Command->BufferHeader->b_rdev), - Command->BufferHeader->b_rsector, - Command->BufferHeader->b_rsector + Command->BlockCount - 1); + DAC960_PartitionNumber(Command->BufferHeader->bi_dev), + Command->BufferHeader->bi_sector, + Command->BufferHeader->bi_sector + Command->BlockCount - 1); } @@ -4210,8 +4140,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, true); BufferHeader = NextBufferHeader; } @@ -4225,19 +4155,19 @@ else if (Command->V2.RequestSense.SenseKey == DAC960_SenseKey_MediumError && BufferHeader != NULL && - BufferHeader->b_reqnext != NULL) + BufferHeader->bi_next != NULL) { if (CommandType == DAC960_ReadCommand) Command->CommandType = DAC960_ReadRetryCommand; else Command->CommandType = DAC960_WriteRetryCommand; - Command->BlockCount = BufferHeader->b_size >> DAC960_BlockSizeBits; + Command->BlockCount = bio_size(BufferHeader) >> DAC960_BlockSizeBits; CommandMailbox->SCSI_10.CommandControlBits .AdditionalScatterGatherListMemory = false; CommandMailbox->SCSI_10.DataTransferSize = Command->BlockCount << DAC960_BlockSizeBits; CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0].SegmentDataPointer = - Virtual_to_Bus64(BufferHeader->b_data); + Virtual_to_Bus64(bio_data(BufferHeader)); CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0].SegmentByteCount = CommandMailbox->SCSI_10.DataTransferSize; @@ -4255,8 +4185,8 @@ */ while (BufferHeader != NULL) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; DAC960_ProcessCompletedBuffer(BufferHeader, false); BufferHeader = NextBufferHeader; } @@ -4270,8 +4200,8 @@ else if (CommandType == DAC960_ReadRetryCommand || CommandType == DAC960_WriteRetryCommand) { - BufferHeader_T *NextBufferHeader = BufferHeader->b_reqnext; - BufferHeader->b_reqnext = NULL; + BufferHeader_T *NextBufferHeader = BufferHeader->bi_next; + BufferHeader->bi_next = NULL; /* Perform completion processing for this single buffer. */ @@ -4286,16 +4216,16 @@ if (NextBufferHeader != NULL) { Command->BlockNumber += - BufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(BufferHeader) >> DAC960_BlockSizeBits; Command->BlockCount = - NextBufferHeader->b_size >> DAC960_BlockSizeBits; + bio_size(NextBufferHeader) >> DAC960_BlockSizeBits; Command->BufferHeader = NextBufferHeader; CommandMailbox->SCSI_10.DataTransferSize = Command->BlockCount << DAC960_BlockSizeBits; CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0] .SegmentDataPointer = - Virtual_to_Bus64(NextBufferHeader->b_data); + Virtual_to_Bus64(bio_data(NextBufferHeader)); CommandMailbox->SCSI_10.DataTransferMemoryAddress .ScatterGatherSegments[0] .SegmentByteCount = @@ -5416,7 +5346,8 @@ int LogicalDriveNumber = DAC960_LogicalDriveNumber(Inode->i_rdev); DiskGeometry_T Geometry, *UserGeometry; DAC960_Controller_T *Controller; - int PartitionNumber; + int res; + if (File != NULL && (File->f_flags & O_NONBLOCK)) return DAC960_UserIOCTL(Inode, File, Request, Argument); if (ControllerNumber < 0 || ControllerNumber > DAC960_ControllerCount - 1) @@ -5465,61 +5396,27 @@ LogicalDeviceInfo->ConfigurableDeviceSize / (Geometry.heads * Geometry.sectors); } - Geometry.start = - Controller->GenericDiskInfo.part[MINOR(Inode->i_rdev)].start_sect; + Geometry.start = get_start_sect(Inode->i_rdev); return (copy_to_user(UserGeometry, &Geometry, sizeof(DiskGeometry_T)) ? -EFAULT : 0); case BLKGETSIZE: - /* Get Device Size. */ - if ((unsigned long *) Argument == NULL) return -EINVAL; - return put_user(Controller->GenericDiskInfo.part[MINOR(Inode->i_rdev)] - .nr_sects, - (unsigned long *) Argument); case BLKGETSIZE64: - if ((u64 *) Argument == NULL) return -EINVAL; - return put_user((u64) Controller->GenericDiskInfo - .part[MINOR(Inode->i_rdev)] - .nr_sects << 9, - (u64 *) Argument); case BLKRAGET: case BLKRASET: case BLKFLSBUF: case BLKBSZGET: case BLKBSZSET: return blk_ioctl(Inode->i_rdev, Request, Argument); + case BLKRRPART: /* Re-Read Partition Table. */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (Controller->LogicalDriveUsageCount[LogicalDriveNumber] > 1) return -EBUSY; - for (PartitionNumber = 0; - PartitionNumber < DAC960_MaxPartitions; - PartitionNumber++) - { - KernelDevice_T Device = DAC960_KernelDevice(ControllerNumber, - LogicalDriveNumber, - PartitionNumber); - int MinorNumber = DAC960_MinorNumber(LogicalDriveNumber, - PartitionNumber); - if (Controller->GenericDiskInfo.part[MinorNumber].nr_sects == 0) - continue; - /* - Flush all changes and invalidate buffered state. - */ - invalidate_device(Device, 1); - /* - Clear existing partition sizes. - */ - if (PartitionNumber > 0) - { - Controller->GenericDiskInfo.part[MinorNumber].start_sect = 0; - Controller->GenericDiskInfo.part[MinorNumber].nr_sects = 0; - } - /* - Reset the Block Size so that the partition table can be read. - */ - set_blocksize(Device, BLOCK_SIZE); - } + res = wipe_partitions(Inode->i_rdev); + if (res) /* nothing */ + return res; + DAC960_RegisterDisk(Controller, LogicalDriveNumber); return 0; } @@ -5641,11 +5538,11 @@ while (Controller->V1.DirectCommandActive[DCDB.Channel] [DCDB.TargetID]) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&Controller->RequestQueue->queue_lock); __wait_event(Controller->CommandWaitQueue, !Controller->V1.DirectCommandActive [DCDB.Channel][DCDB.TargetID]); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&Controller->RequestQueue->queue_lock); } Controller->V1.DirectCommandActive[DCDB.Channel] [DCDB.TargetID] = true; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/DAC960.h linux/drivers/block/DAC960.h --- /opt/kernel/linux-2.4.14-pre6/drivers/block/DAC960.h Wed Oct 24 12:49:57 2001 +++ linux/drivers/block/DAC960.h Thu Oct 18 09:45:14 2001 @@ -2191,7 +2191,7 @@ of the Linux Kernel and I/O Subsystem. */ -typedef struct buffer_head BufferHeader_T; +typedef struct bio BufferHeader_T; typedef struct file File_T; typedef struct block_device_operations BlockDeviceOperations_T; typedef struct completion Completion_T; @@ -2475,7 +2475,6 @@ DiskPartition_T DiskPartitions[DAC960_MinorCount]; int PartitionSizes[DAC960_MinorCount]; int BlockSizes[DAC960_MinorCount]; - int MaxSectorsPerRequest[DAC960_MinorCount]; unsigned char ProgressBuffer[DAC960_ProgressBufferSize]; unsigned char UserStatusBuffer[DAC960_UserMessageSize]; } @@ -2509,7 +2508,7 @@ void DAC960_AcquireControllerLock(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_lock_irqsave(&io_request_lock, *ProcessorFlags); + spin_lock_irqsave(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } @@ -2521,13 +2520,13 @@ void DAC960_ReleaseControllerLock(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags); + spin_unlock_irqrestore(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } /* DAC960_AcquireControllerLockRF acquires exclusive access to Controller, - but is only called from the request function with the io_request_lock held. + but is only called from the request function with the queue lock held. */ static inline @@ -2539,7 +2538,7 @@ /* DAC960_ReleaseControllerLockRF releases exclusive access to Controller, - but is only called from the request function with the io_request_lock held. + but is only called from the request function with the queue lock held. */ static inline @@ -2558,7 +2557,7 @@ void DAC960_AcquireControllerLockIH(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_lock_irqsave(&io_request_lock, *ProcessorFlags); + spin_lock_irqsave(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } @@ -2571,7 +2570,7 @@ void DAC960_ReleaseControllerLockIH(DAC960_Controller_T *Controller, ProcessorFlags_T *ProcessorFlags) { - spin_unlock_irqrestore(&io_request_lock, *ProcessorFlags); + spin_unlock_irqrestore(&Controller->RequestQueue->queue_lock, *ProcessorFlags); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/Makefile linux/drivers/block/Makefile --- /opt/kernel/linux-2.4.14-pre6/drivers/block/Makefile Sun Sep 9 21:00:55 2001 +++ linux/drivers/block/Makefile Mon Oct 15 10:41:43 2001 @@ -10,9 +10,9 @@ O_TARGET := block.o -export-objs := ll_rw_blk.o blkpg.o loop.o DAC960.o genhd.o +export-objs := elevator.o ll_rw_blk.o blkpg.o loop.o DAC960.o genhd.o -obj-y := ll_rw_blk.o blkpg.o genhd.o elevator.o +obj-y := elevator.o ll_rw_blk.o blkpg.o genhd.o obj-$(CONFIG_MAC_FLOPPY) += swim3.o obj-$(CONFIG_BLK_DEV_FD) += floppy.o diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/acsi.c linux/drivers/block/acsi.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/acsi.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/acsi.c Tue Oct 30 09:48:43 2001 @@ -1011,7 +1011,6 @@ goto repeat; } - block += acsi_part[dev].start_sect; target = acsi_info[DEVICE_NR(dev)].target; lun = acsi_info[DEVICE_NR(dev)].lun; @@ -1123,7 +1122,7 @@ put_user( 64, &geo->heads ); put_user( 32, &geo->sectors ); put_user( acsi_info[dev].size >> 11, &geo->cylinders ); - put_user( acsi_part[MINOR(inode->i_rdev)].start_sect, &geo->start ); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; } @@ -1134,14 +1133,8 @@ put_user( 0, &((Scsi_Idlun *) arg)->host_unique_id ); return 0; - case BLKGETSIZE: /* Return device size */ - return put_user(acsi_part[MINOR(inode->i_rdev)].nr_sects, - (unsigned long *) arg); - - case BLKGETSIZE64: /* Return device size */ - return put_user((u64)acsi_part[MINOR(inode->i_rdev)].nr_sects << 9, - (u64 *) arg); - + case BLKGETSIZE: + case BLKGETSIZE64: case BLKROSET: case BLKROGET: case BLKFLSBUF: @@ -1858,7 +1851,7 @@ { int device; struct gendisk * gdev; - int max_p, start, i; + int res; struct acsi_info_struct *aip; device = DEVICE_NR(MINOR(dev)); @@ -1873,16 +1866,7 @@ DEVICE_BUSY = 1; sti(); - max_p = gdev->max_p; - start = device << gdev->minor_shift; - - for( i = max_p - 1; i >= 0 ; i-- ) { - if (gdev->part[start + i].nr_sects != 0) { - invalidate_device(MKDEV(MAJOR_NR, start + i), 1); - gdev->part[start + i].nr_sects = 0; - } - gdev->part[start+i].start_sect = 0; - }; + res = wipe_partitions(dev); stdma_lock( NULL, NULL ); @@ -1897,12 +1881,13 @@ ENABLE_IRQ(); stdma_release(); - - grok_partitions(gdev, device, (aip->type==HARDDISK)?1<<4:1, aip->size); + + if (!res) + grok_partitions(dev, aip->size); DEVICE_BUSY = 0; wake_up(&busy_wait); - return 0; + return res; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/amiflop.c linux/drivers/block/amiflop.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/amiflop.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/amiflop.c Tue Oct 30 09:48:43 2001 @@ -1895,10 +1895,9 @@ free_irq(IRQ_AMIGA_DSKBLK, NULL); custom.dmacon = DMAF_DISK; /* disable DMA */ amiga_chip_free(raw_buf); - blk_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); release_mem_region(CUSTOM_PHYSADDR+0x20, 8); unregister_blkdev(MAJOR_NR, "fd"); + blk_clear(MAJOR_NR); } #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/blkpg.c linux/drivers/block/blkpg.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/blkpg.c Wed Oct 24 12:49:57 2001 +++ linux/drivers/block/blkpg.c Tue Oct 16 15:35:04 2001 @@ -195,7 +195,12 @@ int blk_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) { + struct gendisk *g; int intval; + u64 llval; + + if (!dev) + return -EINVAL; switch (cmd) { case BLKROSET: @@ -205,6 +210,7 @@ return -EFAULT; set_device_ro(dev, intval); return 0; + case BLKROGET: intval = (is_read_only(dev) != 0); return put_user(intval, (int *)(arg)); @@ -212,20 +218,47 @@ case BLKRASET: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - if(!dev || arg > 0xff) + if(arg > 0xff) return -EINVAL; read_ahead[MAJOR(dev)] = arg; return 0; + case BLKRAGET: if (!arg) return -EINVAL; return put_user(read_ahead[MAJOR(dev)], (long *) arg); + case BLKFRASET: + { + int *mr; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (!(mr = max_readahead[MAJOR(dev)])) + return -EINVAL; + mr[MINOR(dev)] = arg; + return 0; + } + + case BLKFRAGET: + { + int *mr; + if (!(mr = max_readahead[MAJOR(dev)])) + return -EINVAL; + return put_user(mr[MINOR(dev)], (long *) arg); + } + + case BLKSECTGET: + { + request_queue_t *q = blk_get_queue(dev); + if (!q) + return -ENODEV; + return put_user(q->max_sectors, (unsigned short *) arg); + } + case BLKFLSBUF: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - if (!dev) - return -EINVAL; fsync_dev(dev); invalidate_buffers(dev); return 0; @@ -235,18 +268,19 @@ intval = get_hardsect_size(dev); return put_user(intval, (int *) arg); -#if 0 case BLKGETSIZE: + case BLKGETSIZE64: /* Today get_gendisk() requires a linear scan; add this when dev has pointer type. */ - /* add BLKGETSIZE64 too */ g = get_gendisk(dev); if (!g) - ulongval = 0; + llval = 0; else - ulongval = g->part[MINOR(dev)].nr_sects; - return put_user(ulongval, (unsigned long *) arg); -#endif + llval = g->part[MINOR(dev)].nr_sects; + if (cmd == BLKGETSIZE) + return put_user((unsigned long) llval, (unsigned long *) arg); + else + return put_user(llval, (u64 *) arg); #if 0 case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) @@ -257,12 +291,35 @@ case BLKPG: return blkpg_ioctl(dev, (struct blkpg_ioctl_arg *) arg); + /* + * deprecated, use the /proc/iosched interface instead + */ case BLKELVGET: - return blkelvget_ioctl(&blk_get_queue(dev)->elevator, - (blkelv_ioctl_arg_t *) arg); case BLKELVSET: - return blkelvset_ioctl(&blk_get_queue(dev)->elevator, - (blkelv_ioctl_arg_t *) arg); + return -ENOTTY; + + case BLKHASHPROF: { +#ifdef BIO_HASH_PROFILING + request_queue_t *q = blk_get_queue(dev); + if (!q) + return -EINVAL; + if (copy_to_user((struct bio_hash_stats *) arg, &q->queue_hash.st, sizeof(struct bio_hash_stats))) + return -EFAULT; +#endif + return 0; + } + + case BLKHASHCLEAR: { +#ifdef BIO_HASH_PROFILING + request_queue_t *q = blk_get_queue(dev); + if (!q) + return -EINVAL; + spin_lock_irq(&q->queue_lock); + memset(&q->queue_hash.st, 0, sizeof(struct bio_hash_stats)); + spin_unlock_irq(&q->queue_lock); +#endif + return 0; + } case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/cciss.c linux/drivers/block/cciss.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/cciss.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/cciss.c Tue Oct 30 09:48:43 2001 @@ -84,7 +84,7 @@ #define MAX_CONFIG_WAIT 1000 #define READ_AHEAD 128 -#define NR_CMDS 128 /* #commands that can be outstanding */ +#define NR_CMDS 384 /* #commands that can be outstanding */ #define MAX_CTLR 8 #define CCISS_DMA_MASK 0xFFFFFFFF /* 32 bit DMA */ @@ -147,7 +147,6 @@ " IRQ: %d\n" " Logical drives: %d\n" " Current Q depth: %d\n" - " Current # commands on controller %d\n" " Max Q depth since init: %d\n" " Max # commands on controller since init: %d\n" " Max SG entries since init: %d\n\n", @@ -158,8 +157,7 @@ (unsigned long)h->vaddr, (unsigned int)h->intr, h->num_luns, - h->Qdepth, h->commands_outstanding, - h->maxQsinceinit, h->max_outstanding, h->maxSG); + h->Qdepth, h->maxQsinceinit, h->max_outstanding, h->maxSG); pos += size; len += size; for(i=0; inum_luns; i++) { @@ -237,7 +235,7 @@ i = find_first_zero_bit(h->cmd_pool_bits, NR_CMDS); if (i == NR_CMDS) return NULL; - } while(test_and_set_bit(i%32, h->cmd_pool_bits+(i/32)) != 0); + } while(test_and_set_bit(i & 31, h->cmd_pool_bits+(i/32)) != 0); #ifdef CCISS_DEBUG printk(KERN_DEBUG "cciss: using command buffer %d\n", i); #endif @@ -308,13 +306,10 @@ /* for each partition */ for(j=0; jblocksizes[(i<hardsizes[ (i<block_size; - } hba[ctlr]->gendisk.nr_real++; + (BLK_DEFAULT_QUEUE(MAJOR_NR + ctlr))->hardsect_size = drv->block_size; } } /* @@ -377,8 +372,6 @@ { int ctlr = MAJOR(inode->i_rdev) - MAJOR_NR; int dsk = MINOR(inode->i_rdev) >> NWD_SHIFT; - int diskinfo[4]; - struct hd_geometry *geo = (struct hd_geometry *)arg; #ifdef CCISS_DEBUG printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg); @@ -386,6 +379,10 @@ switch(cmd) { case HDIO_GETGEO: + { + struct hd_geometry *geo = (struct hd_geometry *)arg; + int diskinfo[4]; + if (hba[ctlr]->drv[dsk].cylinders) { diskinfo[0] = hba[ctlr]->drv[dsk].heads; diskinfo[1] = hba[ctlr]->drv[dsk].sectors; @@ -393,20 +390,18 @@ } else { diskinfo[0] = 0xff; diskinfo[1] = 0x3f; - diskinfo[2] = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f); } + diskinfo[2] = hba[ctlr]->drv[dsk].nr_blocks / (0xff*0x3f); + } put_user(diskinfo[0], &geo->heads); put_user(diskinfo[1], &geo->sectors); put_user(diskinfo[2], &geo->cylinders); - put_user(hba[ctlr]->hd[MINOR(inode->i_rdev)].start_sect, &geo->start); - return 0; - case BLKGETSIZE: - put_user(hba[ctlr]->hd[MINOR(inode->i_rdev)].nr_sects, (unsigned long *)arg); - return 0; - case BLKGETSIZE64: - put_user((u64)hba[ctlr]->hd[MINOR(inode->i_rdev)].nr_sects << 9, (u64*)arg); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; + } case BLKRRPART: return revalidate_logvol(inode->i_rdev, 1); + case BLKGETSIZE: + case BLKGETSIZE64: case BLKFLSBUF: case BLKBSZSET: case BLKBSZGET: @@ -415,9 +410,7 @@ case BLKRASET: case BLKRAGET: case BLKPG: - case BLKELVGET: - case BLKELVSET: - return( blk_ioctl(inode->i_rdev, cmd, arg)); + return blk_ioctl(inode->i_rdev, cmd, arg); case CCISS_GETPCIINFO: { cciss_pci_info_struct pciinfo; @@ -459,16 +452,7 @@ // printk("cciss_ioctl: delay and count cannot be 0\n"); return( -EINVAL); } - spin_lock_irqsave(&io_request_lock, flags); - /* Can only safely update if no commands outstanding */ - if (c->commands_outstanding > 0 ) - { -// printk("cciss_ioctl: cannot change coalasing " -// "%d commands outstanding on controller\n", -// c->commands_outstanding); - spin_unlock_irqrestore(&io_request_lock, flags); - return(-EINVAL); - } + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); /* Update the field, and then ring the doorbell */ writel( intinfo.delay, &(c->cfgtable->HostWrite.CoalIntDelay)); @@ -484,7 +468,7 @@ /* delay and try again */ udelay(1000); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); if (i >= MAX_CONFIG_WAIT) return( -EFAULT); return(0); @@ -515,7 +499,7 @@ if (copy_from_user(NodeName, (void *) arg, sizeof( NodeName_type))) return -EFAULT; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); /* Update the field, and then ring the doorbell */ for(i=0;i<16;i++) @@ -531,7 +515,7 @@ /* delay and try again */ udelay(1000); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); if (i >= MAX_CONFIG_WAIT) return( -EFAULT); return(0); @@ -658,11 +642,11 @@ c->SG[0].Ext = 0; // we are not chaining } /* Put the request on the tail of the request queue */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); addQ(&h->reqQ, c); h->Qdepth++; start_io(h); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); /* Wait for completion */ while(c->cmd_type != CMD_IOCTL_DONE) @@ -710,42 +694,32 @@ int ctlr, target; struct gendisk *gdev; unsigned long flags; - int max_p; - int start; - int i; + int res; target = MINOR(dev) >> NWD_SHIFT; ctlr = MAJOR(dev) - MAJOR_NR; gdev = &(hba[ctlr]->gendisk); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); if (hba[ctlr]->drv[target].usage_count > maxusage) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); printk(KERN_WARNING "cciss: Device busy for " "revalidation (usage=%d)\n", hba[ctlr]->drv[target].usage_count); return -EBUSY; } hba[ctlr]->drv[target].usage_count++; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); - max_p = gdev->max_p; - start = target << gdev->minor_shift; + res = wipe_partitions(dev); + if (res) + goto leave; - for(i=max_p-1; i>=0; i--) { - int minor = start+i; - invalidate_device(MKDEV(MAJOR_NR + ctlr, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; - - /* reset the blocksize so we can read the partition table */ - blksize_size[MAJOR_NR+ctlr][minor] = 1024; - } /* setup partitions per disk */ - grok_partitions(gdev, target, MAX_PART, - hba[ctlr]->drv[target].nr_blocks); + grok_partitions(dev, hba[ctlr]->drv[target].nr_blocks); +leave: hba[ctlr]->drv[target].usage_count--; - return 0; + return res; } static int frevalidate_logvol(kdev_t dev) @@ -776,15 +750,15 @@ if (MINOR(dev) != 0) return -ENXIO; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(ctlr), flags); if (hba[ctlr]->usage_count > 1) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); printk(KERN_WARNING "cciss: Device busy for volume" " revalidation (usage=%d)\n", hba[ctlr]->usage_count); return -EBUSY; } - spin_unlock_irqrestore(&io_request_lock, flags); hba[ctlr]->usage_count++; + spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags); /* * Set the partition and block size structures for all volumes @@ -793,7 +767,6 @@ memset(hba[ctlr]->hd, 0, sizeof(struct hd_struct) * 256); memset(hba[ctlr]->sizes, 0, sizeof(int) * 256); memset(hba[ctlr]->blocksizes, 0, sizeof(int) * 256); - memset(hba[ctlr]->hardsizes, 0, sizeof(int) * 256); memset(hba[ctlr]->drv, 0, sizeof(drive_info_struct) * CISS_MAX_LUN); hba[ctlr]->gendisk.nr_real = 0; @@ -1089,11 +1062,11 @@ while(( c = h->reqQ) != NULL ) { /* can't do anything if fifo is full */ - if ((h->access.fifo_full(h))) - { - printk(KERN_WARNING "cciss: fifo full \n"); - return; + if ((h->access.fifo_full(h))) { + printk(KERN_WARNING "cciss: fifo full\n"); + break; } + /* Get the frist entry from the Request Q */ removeQ(&(h->reqQ), c); h->Qdepth--; @@ -1106,17 +1079,16 @@ } } -static inline void complete_buffers( struct buffer_head *bh, int status) +static inline void complete_buffers( struct bio *bio, int status) { - struct buffer_head *xbh; + struct bio *xbh; - while(bh) - { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, status); - bh = xbh; + while(bio) { + xbh = bio->bi_next; + bio->bi_next = NULL; + blk_finished_io(bio_sectors(bio)); + bio_endio(bio, status); + bio = xbh; } } /* checks the status of the job and calls complete buffers to mark all @@ -1135,7 +1107,7 @@ { temp64.val32.lower = cmd->SG[i].Addr.lower; temp64.val32.upper = cmd->SG[i].Addr.upper; - pci_unmap_single(hba[cmd->ctlr]->pdev, + pci_unmap_page(hba[cmd->ctlr]->pdev, temp64.val, cmd->SG[i].Len, (cmd->Request.Type.Direction == XFER_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); @@ -1214,79 +1186,33 @@ status=0; } } - complete_buffers(cmd->bh, status); -} - - -static inline int cpq_new_segment(request_queue_t *q, struct request *rq, - int max_segments) -{ - if (rq->nr_segments < MAXSGENTRIES) { - rq->nr_segments++; - return 1; - } - return 0; -} - -static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (bh->b_data + bh->b_size == rq->bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, - struct request *nxt, int max_segments) -{ - int total_segments = rq->nr_segments + nxt->nr_segments; - - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) - total_segments--; - - if (total_segments > MAXSGENTRIES) - return 0; - - rq->nr_segments = total_segments; - return 1; + complete_buffers(cmd->bio, status); } /* * Get a request and submit it to the controller. - * Currently we do one request at a time. Ideally we would like to send - * everything to the controller on the first call, but there is a danger - * of holding the io_request_lock for to long. */ static void do_cciss_request(request_queue_t *q) { ctlr_info_t *h= q->queuedata; CommandList_struct *c; int log_unit, start_blk, seg, sect; - char *lastdataend; - struct buffer_head *bh; + unsigned long long lastdataend; + struct bio *bio; struct list_head *queue_head = &q->queue_head; struct request *creq; u64bit temp64; - struct my_sg tmp_sg[MAXSGENTRIES]; - int i; + struct scatterlist tmp_sg[MAXSGENTRIES]; + int i, dir; - // Loop till the queue is empty if or it is plugged - while (1) - { - if (q->plugged || list_empty(queue_head)) { - start_io(h); - return; - } + if (blk_queue_plugged(q)) + goto startio; + +queue: + if (list_empty(queue_head)) + goto startio; - creq = blkdev_entry_next_request(queue_head); + creq = elv_next_request(q); if (creq->nr_segments > MAXSGENTRIES) BUG(); @@ -1295,18 +1221,15 @@ printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); - start_io(h); - return; + complete_buffers(creq->bio, 0); + goto startio; } if (( c = cmd_alloc(h, 1)) == NULL) - { - start_io(h); - return; - } + goto startio; + c->cmd_type = CMD_RWREQ; - bh = c->bh = creq->bh; + bio = c->bio = creq->bio; /* fill in the request */ log_unit = MINOR(creq->rq_dev) >> NWD_SHIFT; @@ -1321,43 +1244,43 @@ (creq->cmd == READ) ? XFER_READ: XFER_WRITE; c->Request.Timeout = 0; // Don't time out c->Request.CDB[0] = (creq->cmd == READ) ? CCISS_READ : CCISS_WRITE; - start_blk = hba[h->ctlr]->hd[MINOR(creq->rq_dev)].start_sect + creq->sector; + start_blk = creq->sector; #ifdef CCISS_DEBUG - if (bh == NULL) - panic("cciss: bh== NULL?"); + if (bio == NULL) + panic("cciss: bio== NULL?"); printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",(int) creq->sector, (int) creq->nr_sectors); #endif /* CCISS_DEBUG */ - seg = 0; - lastdataend = NULL; - sect = 0; - while(bh) - { - sect += bh->b_size/512; - if (bh->b_data == lastdataend) + seg = sect = 0; + lastdataend = ~0ULL; + while(bio) { + sect += bio_sectors(bio); + if (bio_to_phys(bio) == lastdataend) { // tack it on to the last segment - tmp_sg[seg-1].len +=bh->b_size; - lastdataend += bh->b_size; - } else - { + tmp_sg[seg-1].length += bio_size(bio); + lastdataend += bio_size(bio); + } else { if (seg == MAXSGENTRIES) BUG(); - tmp_sg[seg].len = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; + tmp_sg[seg].length = bio_size(bio); + tmp_sg[seg].offset = bio_offset(bio); + tmp_sg[seg].page = bio_page(bio); + lastdataend = bio_to_phys(bio) + bio_size(bio); seg++; } - bh = bh->b_reqnext; + bio = bio->bi_next; } /* get the DMA records for the setup */ + if (c->Request.Type.Direction == XFER_READ) + dir = PCI_DMA_FROMDEVICE; + else + dir = PCI_DMA_TODEVICE; for (i=0; iSG[i].Len = tmp_sg[i].len; - temp64.val = (__u64) pci_map_single( h->pdev, - tmp_sg[i].start_addr, - tmp_sg[i].len, - (c->Request.Type.Direction == XFER_READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + c->SG[i].Len = tmp_sg[i].length; + temp64.val = (__u64) pci_map_page(h->pdev, tmp_sg[i].page, + tmp_sg[i].offset, tmp_sg[i].length, + dir); c->SG[i].Addr.lower = temp64.val32.lower; c->SG[i].Addr.upper = temp64.val32.upper; c->SG[i].Ext = 0; // we are not chaining @@ -1381,10 +1304,8 @@ c->Request.CDB[8]= sect & 0xff; c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0; - blkdev_dequeue_request(creq); - /* * ehh, we can't really end the request here since it's not * even started yet. for now it shouldn't hurt though @@ -1398,7 +1319,10 @@ h->Qdepth++; if(h->Qdepth > h->maxQsinceinit) h->maxQsinceinit = h->Qdepth; - } // while loop + + goto queue; +startio: + start_io(h); } static void do_cciss_intr(int irq, void *dev_id, struct pt_regs *regs) @@ -1417,7 +1341,7 @@ * If there are completed commands in the completion queue, * we had better do something about it. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); while( h->access.intr_pending(h)) { while((a = h->access.command_completed(h)) != FIFO_EMPTY) @@ -1450,11 +1374,16 @@ } } } + /* * See if we can queue up some more IO */ +#if 0 + blk_wake_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); +#else do_cciss_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); - spin_unlock_irqrestore(&io_request_lock, flags); +#endif + spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); } /* * We cannot read the structure directly, for portablity we must use @@ -1876,7 +1805,18 @@ sprintf(hba[i]->devname, "cciss%d", i); hba[i]->ctlr = i; hba[i]->pdev = pdev; - + + /* configure PCI DMA stuff */ + if (!pci_set_dma_mask(pdev, (u64) 0xffffffffffffffff)) + printk("cciss: using DAC cycles\n"); + else if (!pci_set_dma_mask(pdev, 0xffffffff)) + printk("cciss: not using DAC cycles\n"); + else { + printk("cciss: no suitable DMA available\n"); + free_hba(i); + return -ENODEV; + } + if( register_blkdev(MAJOR_NR+i, hba[i]->devname, &cciss_fops)) { printk(KERN_ERR "cciss: Unable to get major number " @@ -1945,20 +1885,16 @@ q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); q->queuedata = hba[i]; - blk_init_queue(q, do_cciss_request); + blk_init_queue(q, do_cciss_request, hba[i]->devname); blk_queue_headactive(q, 0); + blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask); + q->max_segments = MAXSGENTRIES; + blk_queue_max_sectors(q, 512); /* fill in the other Kernel structs */ blksize_size[MAJOR_NR+i] = hba[i]->blocksizes; - hardsect_size[MAJOR_NR+i] = hba[i]->hardsizes; read_ahead[MAJOR_NR+i] = READ_AHEAD; - /* Set the pointers to queue functions */ - q->back_merge_fn = cpq_back_merge_fn; - q->front_merge_fn = cpq_front_merge_fn; - q->merge_requests_fn = cpq_merge_requests_fn; - - /* Fill in the gendisk data */ hba[i]->gendisk.major = MAJOR_NR + i; hba[i]->gendisk.major_name = "cciss"; @@ -2007,12 +1943,11 @@ unregister_blkdev(MAJOR_NR+i, hba[i]->devname); remove_proc_entry(hba[i]->devname, proc_cciss); - /* remove it from the disk list */ del_gendisk(&(hba[i]->gendisk)); - pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), - hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); + pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), + hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof( ErrorInfo_struct), hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle); kfree(hba[i]->cmd_pool_bits); @@ -2020,32 +1955,31 @@ } static struct pci_driver cciss_pci_driver = { - name: "cciss", - probe: cciss_init_one, - remove: cciss_remove_one, - id_table: cciss_pci_device_id, /* id_table */ + name: "cciss", + probe: cciss_init_one, + remove: cciss_remove_one, + id_table: cciss_pci_device_id, /* id_table */ }; /* -* This is it. Register the PCI driver information for the cards we control -* the OS will call our registered routines when it finds one of our cards. -*/ + * This is it. Register the PCI driver information for the cards we control + * the OS will call our registered routines when it finds one of our cards. + */ int __init cciss_init(void) { - printk(KERN_INFO DRIVER_NAME "\n"); + /* Register for out PCI devices */ if (pci_register_driver(&cciss_pci_driver) > 0 ) return 0; else return -ENODEV; - } +} EXPORT_NO_SYMBOLS; static int __init init_cciss_module(void) { - return ( cciss_init()); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/cciss.h linux/drivers/block/cciss.h --- /opt/kernel/linux-2.4.14-pre6/drivers/block/cciss.h Tue May 22 19:23:16 2001 +++ linux/drivers/block/cciss.h Thu Nov 1 11:24:06 2001 @@ -15,11 +15,6 @@ #define MAJOR_NR COMPAQ_CISS_MAJOR -struct my_sg { - int len; - char *start_addr; -}; - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; @@ -85,9 +80,8 @@ struct gendisk gendisk; // indexed by minor numbers struct hd_struct hd[256]; - int sizes[256]; + int sizes[256]; int blocksizes[256]; - int hardsizes[256]; }; /* Defining the diffent access_menthods */ @@ -247,5 +241,8 @@ char *product_name; struct access_method *access; }; + +#define CCISS_LOCK(i) (&((BLK_DEFAULT_QUEUE(MAJOR_NR + i))->queue_lock)) + #endif /* CCISS_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/cciss_cmd.h linux/drivers/block/cciss_cmd.h --- /opt/kernel/linux-2.4.14-pre6/drivers/block/cciss_cmd.h Tue May 22 19:23:16 2001 +++ linux/drivers/block/cciss_cmd.h Mon Oct 15 10:41:43 2001 @@ -7,7 +7,7 @@ //general boundary defintions #define SENSEINFOBYTES 32//note that this value may vary between host implementations -#define MAXSGENTRIES 31 +#define MAXSGENTRIES 32 #define MAXREPLYQS 256 //Command Status value @@ -228,7 +228,7 @@ int cmd_type; struct _CommandList_struct *prev; struct _CommandList_struct *next; - struct buffer_head * bh; + struct bio * bio; } CommandList_struct; //Configuration Table Structure diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/cpqarray.c linux/drivers/block/cpqarray.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/cpqarray.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/cpqarray.c Tue Oct 30 09:48:43 2001 @@ -100,7 +100,6 @@ static struct hd_struct * ida; static int * ida_sizes; static int * ida_blocksizes; -static int * ida_hardsizes; static struct gendisk ida_gendisk[MAX_CTLR]; static struct proc_dir_entry *proc_array; @@ -145,7 +144,7 @@ static inline void addQ(cmdlist_t **Qptr, cmdlist_t *c); static inline cmdlist_t *removeQ(cmdlist_t **Qptr, cmdlist_t *c); -static inline void complete_buffers(struct buffer_head *bh, int ok); +static inline void complete_buffers(struct bio *bio, int ok); static inline void complete_command(cmdlist_t *cmd, int timeout); static void do_ida_intr(int irq, void *dev_id, struct pt_regs * regs); @@ -176,12 +175,11 @@ ida_sizes[(ctlr<nr_blks; - for(j=0; j<16; j++) { + for(j=0; j<16; j++) ida_blocksizes[(ctlr<blk_size; - } + + (BLK_DEFAULT_QUEUE(MAJOR_NR + ctlr))->hardsect_size = drv->blk_size; ida_gendisk[ctlr].nr_real++; } @@ -341,52 +339,10 @@ remove_proc_entry("cpqarray", proc_root_driver); kfree(ida); kfree(ida_sizes); - kfree(ida_hardsizes); kfree(ida_blocksizes); } #endif /* MODULE */ -static inline int cpq_new_segment(request_queue_t *q, struct request *rq, - int max_segments) -{ - if (rq->nr_segments < SG_MAX) { - rq->nr_segments++; - return 1; - } - return 0; -} - -static int cpq_back_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (rq->bhtail->b_data + rq->bhtail->b_size == bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_front_merge_fn(request_queue_t *q, struct request *rq, - struct buffer_head *bh, int max_segments) -{ - if (bh->b_data + bh->b_size == rq->bh->b_data) - return 1; - return cpq_new_segment(q, rq, max_segments); -} - -static int cpq_merge_requests_fn(request_queue_t *q, struct request *rq, - struct request *nxt, int max_segments) -{ - int total_segments = rq->nr_segments + nxt->nr_segments; - - if (rq->bhtail->b_data + rq->bhtail->b_size == nxt->bh->b_data) - total_segments--; - - if (total_segments > SG_MAX) - return 0; - - rq->nr_segments = total_segments; - return 1; -} - /* * This is it. Find all the controllers and register them. I really hate * stealing all these major device numbers. @@ -433,20 +389,9 @@ return(num_cntlrs_reg); } - ida_hardsizes = kmalloc(sizeof(int)*nr_ctlr*NWD*16, GFP_KERNEL); - if(ida_hardsizes==NULL) - { - kfree(ida); - kfree(ida_sizes); - kfree(ida_blocksizes); - printk( KERN_ERR "cpqarray: out of memory"); - return(num_cntlrs_reg); - } - memset(ida, 0, sizeof(struct hd_struct)*nr_ctlr*NWD*16); memset(ida_sizes, 0, sizeof(int)*nr_ctlr*NWD*16); memset(ida_blocksizes, 0, sizeof(int)*nr_ctlr*NWD*16); - memset(ida_hardsizes, 0, sizeof(int)*nr_ctlr*NWD*16); memset(ida_gendisk, 0, sizeof(struct gendisk)*MAX_CTLR); /* @@ -504,7 +449,6 @@ { kfree(ida); kfree(ida_sizes); - kfree(ida_hardsizes); kfree(ida_blocksizes); } return(num_cntlrs_reg); @@ -523,16 +467,13 @@ q = BLK_DEFAULT_QUEUE(MAJOR_NR + i); q->queuedata = hba[i]; - blk_init_queue(q, do_ida_request); + blk_init_queue(q, do_ida_request, hba[i]->devname); blk_queue_headactive(q, 0); + blk_queue_bounce_limit(q, hba[i]->pci_dev->dma_mask); + q->max_segments = SG_MAX; blksize_size[MAJOR_NR+i] = ida_blocksizes + (i*256); - hardsect_size[MAJOR_NR+i] = ida_hardsizes + (i*256); read_ahead[MAJOR_NR+i] = READ_AHEAD; - q->back_merge_fn = cpq_back_merge_fn; - q->front_merge_fn = cpq_front_merge_fn; - q->merge_requests_fn = cpq_merge_requests_fn; - ida_gendisk[i].major = MAJOR_NR + i; ida_gendisk[i].major_name = "ida"; ida_gendisk[i].minor_shift = NWD_SHIFT; @@ -912,22 +853,27 @@ ctlr_info_t *h = q->queuedata; cmdlist_t *c; int seg, sect; - char *lastdataend; struct list_head * queue_head = &q->queue_head; - struct buffer_head *bh; + struct bio *bio; struct request *creq; - struct my_sg tmp_sg[SG_MAX]; - int i; + struct scatterlist tmp_sg[SG_MAX]; + unsigned long lastdataend; + int i, dir; -// Loop till the queue is empty if or it is plugged + if (blk_queue_plugged(q)) { + start_io(h); + return; + } + +// Loop till the queue is empty while (1) { - if (q->plugged || list_empty(queue_head)) { + if (list_empty(queue_head)) { start_io(h); return; } - creq = blkdev_entry_next_request(queue_head); + creq = elv_next_request(q); if (creq->nr_segments > SG_MAX) BUG(); @@ -936,7 +882,7 @@ printk(KERN_WARNING "doreq cmd for %d, %x at %p\n", h->ctlr, creq->rq_dev, creq); blkdev_dequeue_request(creq); - complete_buffers(creq->bh, 0); + complete_buffers(creq->bio, 0); start_io(h); return; } @@ -947,47 +893,51 @@ return; } - bh = creq->bh; + bio = creq->bio; c->ctlr = h->ctlr; c->hdr.unit = MINOR(creq->rq_dev) >> NWD_SHIFT; c->hdr.size = sizeof(rblk_t) >> 2; c->size += sizeof(rblk_t); - c->req.hdr.blk = ida[(h->ctlr<rq_dev)].start_sect + creq->sector; - c->bh = bh; + c->req.hdr.blk = creq->sector; + c->bio = bio; DBGPX( - if (bh == NULL) - panic("bh == NULL?"); + if (bio == NULL) + panic("bio == NULL?"); printk("sector=%d, nr_sectors=%d\n", creq->sector, creq->nr_sectors); ); - seg = 0; lastdataend = NULL; - sect = 0; - while(bh) { - sect += bh->b_size/512; - if (bh->b_data == lastdataend) { - tmp_sg[seg-1].size += bh->b_size; - lastdataend += bh->b_size; + seg = sect = 0; + lastdataend = ~0UL; + while(bio) { + sect += bio_sectors(bio); + if (bio_to_phys(bio) == lastdataend) { + tmp_sg[seg-1].length += bio_size(bio); + lastdataend += bio_size(bio); } else { if (seg == SG_MAX) BUG(); - tmp_sg[seg].size = bh->b_size; - tmp_sg[seg].start_addr = bh->b_data; - lastdataend = bh->b_data + bh->b_size; + tmp_sg[seg].length = bio_size(bio); + tmp_sg[seg].page = bio_page(bio); + tmp_sg[seg].offset = bio_offset(bio); + lastdataend = bio_to_phys(bio) + bio_size(bio); seg++; } - bh = bh->b_reqnext; + bio = bio->bi_next; } /* Now do all the DMA Mappings */ + if (creq->cmd == READ) + dir = PCI_DMA_FROMDEVICE; + else + dir = PCI_DMA_TODEVICE; for( i=0; i < seg; i++) { - c->req.sg[i].size = tmp_sg[i].size; - c->req.sg[i].addr = (__u32) pci_map_single( - h->pci_dev, tmp_sg[i].start_addr, - tmp_sg[i].size, - (creq->cmd == READ) ? - PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); + c->req.sg[i].size = tmp_sg[i].length; + c->req.sg[i].addr = (__u32) pci_map_page(h->pci_dev, + tmp_sg[i].page, + tmp_sg[i].offset, + tmp_sg[i].length, dir); } DBGPX( printk("Submitting %d sectors in %d segments\n", sect, seg); ); c->req.hdr.sg_cnt = seg; @@ -1049,17 +999,17 @@ } } -static inline void complete_buffers(struct buffer_head *bh, int ok) +static inline void complete_buffers(struct bio *bio, int ok) { - struct buffer_head *xbh; - while(bh) { - xbh = bh->b_reqnext; - bh->b_reqnext = NULL; + struct bio *xbh; + while(bio) { + xbh = bio->bi_next; + bio->bi_next = NULL; - blk_finished_io(bh->b_size >> 9); - bh->b_end_io(bh, ok); + blk_finished_io(bio_sectors(bio)); + bio_endio(bio, ok); - bh = xbh; + bio = xbh; } } /* @@ -1068,7 +1018,7 @@ static inline void complete_command(cmdlist_t *cmd, int timeout) { int ok=1; - int i; + int i, ddir; if (cmd->req.hdr.rcode & RCODE_NONFATAL && (hba[cmd->ctlr]->misc_tflags & MISC_NONFATAL_WARN) == 0) { @@ -1090,13 +1040,15 @@ } if (timeout) ok = 0; /* unmap the DMA mapping for all the scatter gather elements */ + if (cmd->req.hdr.cmd == IDA_READ) + ddir = PCI_DMA_FROMDEVICE; + else + ddir = PCI_DMA_TODEVICE; for(i=0; ireq.hdr.sg_cnt; i++) - { - pci_unmap_single(hba[cmd->ctlr]->pci_dev, - cmd->req.sg[i].addr, cmd->req.sg[i].size, - (cmd->req.hdr.cmd == IDA_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE); - } - complete_buffers(cmd->bh, ok); + pci_unmap_page(hba[cmd->ctlr]->pci_dev, cmd->req.sg[i].addr, + cmd->req.sg[i].size, ddir); + + complete_buffers(cmd->bio, ok); } /* @@ -1121,7 +1073,7 @@ * If there are completed commands in the completion queue, * we had better do something about it. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); if (istat & FIFO_NOT_EMPTY) { while((a = h->access.command_completed(h))) { a1 = a; a &= ~3; @@ -1164,8 +1116,12 @@ /* * See if we can queue up some more IO */ +#if 0 + blk_wake_queue(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); +#else do_ida_request(BLK_DEFAULT_QUEUE(MAJOR_NR + h->ctlr)); - spin_unlock_irqrestore(&io_request_lock, flags); +#endif + spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); } /* @@ -1211,14 +1167,10 @@ put_user(diskinfo[0], &geo->heads); put_user(diskinfo[1], &geo->sectors); put_user(diskinfo[2], &geo->cylinders); - put_user(ida[(ctlr<i_rdev)].start_sect, &geo->start); + put_user(get_start_sect(inode->i_rdev), &geo->start); return 0; case IDAGETDRVINFO: return copy_to_user(&io->c.drv,&hba[ctlr]->drv[dsk],sizeof(drv_info_t)); - case BLKGETSIZE: - return put_user(ida[(ctlr<i_rdev)].nr_sects, (unsigned long *)arg); - case BLKGETSIZE64: - return put_user((u64)(ida[(ctlr<i_rdev)].nr_sects) << 9, (u64*)arg); case BLKRRPART: return revalidate_logvol(inode->i_rdev, 1); case IDAPASSTHRU: @@ -1254,6 +1206,8 @@ return(0); } + case BLKGETSIZE: + case BLKGETSIZE64: case BLKFLSBUF: case BLKBSZSET: case BLKBSZGET: @@ -1261,8 +1215,6 @@ case BLKROGET: case BLKRASET: case BLKRAGET: - case BLKELVGET: - case BLKELVSET: case BLKPG: return blk_ioctl(inode->i_rdev, cmd, arg); @@ -1362,11 +1314,11 @@ } /* Put the request on the tail of the request queue */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); addQ(&h->reqQ, c); h->Qdepth++; start_io(h); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); /* Wait for completion */ while(c->type != CMD_IOCTL_DONE) @@ -1580,15 +1532,15 @@ if (MINOR(dev) != 0) return -ENXIO; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); if (hba[ctlr]->usage_count > 1) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); printk(KERN_WARNING "cpqarray: Device busy for volume" " revalidation (usage=%d)\n", hba[ctlr]->usage_count); return -EBUSY; } - spin_unlock_irqrestore(&io_request_lock, flags); hba[ctlr]->usage_count++; + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); /* * Set the partition and block size structures for all volumes @@ -1597,7 +1549,6 @@ memset(ida+(ctlr*256), 0, sizeof(struct hd_struct)*NWD*16); memset(ida_sizes+(ctlr*256), 0, sizeof(int)*NWD*16); memset(ida_blocksizes+(ctlr*256), 0, sizeof(int)*NWD*16); - memset(ida_hardsizes+(ctlr*256), 0, sizeof(int)*NWD*16); memset(hba[ctlr]->drv, 0, sizeof(drv_info_t)*NWD); ida_gendisk[ctlr].nr_real = 0; @@ -1625,17 +1576,15 @@ int ctlr, target; struct gendisk *gdev; unsigned long flags; - int max_p; - int start; - int i; + int res; target = DEVICE_NR(dev); ctlr = MAJOR(dev) - MAJOR_NR; gdev = &ida_gendisk[ctlr]; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(IDA_LOCK(ctlr), flags); if (hba[ctlr]->drv[target].usage_count > maxusage) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); printk(KERN_WARNING "cpqarray: Device busy for " "revalidation (usage=%d)\n", hba[ctlr]->drv[target].usage_count); @@ -1643,25 +1592,14 @@ } hba[ctlr]->drv[target].usage_count++; - spin_unlock_irqrestore(&io_request_lock, flags); - - max_p = gdev->max_p; - start = target << gdev->minor_shift; + spin_unlock_irqrestore(IDA_LOCK(ctlr), flags); - for(i=max_p-1; i>=0; i--) { - int minor = start+i; - invalidate_device(MKDEV(MAJOR_NR + ctlr, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; + res = wipe_partitions(dev); + if (!res) + grok_partitions(dev, hba[ctlr]->drv[target].nr_blks); - /* reset the blocksize so we can read the partition table */ - blksize_size[MAJOR_NR+ctlr][minor] = 1024; - } - - /* 16 minors per disk... */ - grok_partitions(gdev, target, 16, hba[ctlr]->drv[target].nr_blks); hba[ctlr]->drv[target].usage_count--; - return 0; + return res; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/cpqarray.h linux/drivers/block/cpqarray.h --- /opt/kernel/linux-2.4.14-pre6/drivers/block/cpqarray.h Tue May 22 19:23:16 2001 +++ linux/drivers/block/cpqarray.h Thu Nov 1 11:24:06 2001 @@ -56,11 +56,6 @@ #ifdef __KERNEL__ -struct my_sg { - int size; - char *start_addr; -}; - struct ctlr_info; typedef struct ctlr_info ctlr_info_t; @@ -121,6 +116,9 @@ struct timer_list timer; unsigned int misc_tflags; }; + +#define IDA_LOCK(i) (&((BLK_DEFAULT_QUEUE(MAJOR_NR + i))->queue_lock)) + #endif #endif /* CPQARRAY_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/elevator.c linux/drivers/block/elevator.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/elevator.c Fri Jul 20 05:59:41 2001 +++ linux/drivers/block/elevator.c Thu Nov 1 10:35:15 2001 @@ -18,48 +18,63 @@ * Removed tests for max-bomb-segments, which was breaking elvtune * when run without -bN * + * Jens: + * - Rework again to work with bio instead of buffer_heads + * - added merge by hash-lookup + * - loose bi_dev comparisons, partition handling is right now + * - completely modularize elevator setup and teardown + * */ - +#include #include #include #include #include +#include #include +#include +#include +#include + #include /* - * This is a bit tricky. It's given that bh and rq are for the same + * This is a bit tricky. It's given that bio and rq are for the same * device, but the next request might of course not be. Run through * the tests below to check if we want to insert here if we can't merge - * bh into an existing request + * bio into an existing request */ -inline int bh_rq_in_between(struct buffer_head *bh, struct request *rq, - struct list_head *head) +inline int bio_rq_in_between(struct bio *bio, struct request *rq, + struct list_head *head) { struct list_head *next; struct request *next_rq; - next = rq->queue.next; + /* + * if .next is a valid request + */ + next = rq->queuelist.next; if (next == head) return 0; + next_rq = list_entry(next, struct request, queuelist); + /* - * if the device is different (usually on a different partition), - * just check if bh is after rq + * if the device is different (not a normal case) just check if + * bio is after rq */ - next_rq = blkdev_entry_to_request(next); if (next_rq->rq_dev != rq->rq_dev) - return bh->b_rsector > rq->sector; + return bio->bi_sector > rq->sector; /* - * ok, rq, next_rq and bh are on the same device. if bh is in between + * ok, rq, next_rq and bio are on the same device. if bio is in between * the two, this is the sweet spot */ - if (bh->b_rsector < next_rq->sector && bh->b_rsector > rq->sector) + if (bio->bi_sector < next_rq->sector && bio->bi_sector > rq->sector) return 1; /* - * next_rq is ordered wrt rq, but bh is not in between the two + * next_rq is ordered wrt rq, but bio is not in between the two */ if (next_rq->sector > rq->sector) return 0; @@ -68,66 +83,135 @@ * next_rq and rq not ordered, if we happen to be either before * next_rq or after rq insert here anyway */ - if (bh->b_rsector > rq->sector || bh->b_rsector < next_rq->sector) + if (bio->bi_sector > rq->sector || bio->bi_sector < next_rq->sector) return 1; return 0; } +/* + * can we safely merge with this request? + */ +inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) +{ + if (bio_data_dir(bio) == rq->cmd) { + if (rq->rq_dev == bio->bi_dev && !rq->waiting && !rq->special) + return 1; + } + + return 0; +} + +/* + * find a struct request that has a bio linked that we can merge with + */ +inline struct request *bio_get_hash_rq(kdev_t dev, sector_t sector, int vc) +{ + struct bio *bio = bio_hash_find(dev, sector, vc); + struct request *rq = NULL; + + if (bio) { + rq = bio->bi_req; + bio_put(bio); + BUG_ON(dev != rq->rq_dev); + } + + return rq; +} int elevator_linus_merge(request_queue_t *q, struct request **req, - struct list_head * head, - struct buffer_head *bh, int rw, - int max_sectors) + struct list_head *head, struct bio *bio) { - struct list_head *entry = &q->queue_head; - unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE; + unsigned int count = bio_sectors(bio); + struct elv_linus_data *edat = q->elevator.elevator_data; + unsigned int vc = q->hash_valid_counter; + struct list_head *entry; + struct request *__rq; + + /* + * first try a back merge, then front, then give up and scan. this + * will of course fail for different size bios on the same queue, + * however that isn't an issue + */ + if (likely(edat->flags & ELV_LINUS_BACK_MERGE)) { + __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector - count, vc); + if (__rq) { + if (&__rq->queuelist == head) + goto front; + + if (!elv_rq_merge_ok(__rq, bio)) + goto front; + + /* + * looks ok to merge + */ + if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { + *req = __rq; + return ELEVATOR_BACK_MERGE; + } + } + } +front: + if (likely(edat->flags & ELV_LINUS_FRONT_MERGE)) { + __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector + count, vc); + if (__rq) { + if (&__rq->queuelist == head) + goto scan; + + if (!elv_rq_merge_ok(__rq, bio)) + goto scan; + + /* + * looks ok to merge + */ + if (__rq->sector - count == bio->bi_sector) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; + } + } + } + + /* + * no merge possible, scan for insertion + */ +scan: + entry = &q->queue_head; while ((entry = entry->prev) != head) { - struct request *__rq = blkdev_entry_to_request(entry); + __rq = list_entry(entry, struct request, queuelist); + + prefetch(entry->prev); + + if (unlikely(__rq->waiting || __rq->special)) + continue; + if (!*req && bio_rq_in_between(bio, __rq, &q->queue_head)) + *req = __rq; /* * simply "aging" of requests in queue */ if (__rq->elevator_sequence-- <= 0) break; - - if (__rq->waiting) - continue; - if (__rq->rq_dev != bh->b_rdev) - continue; - if (!*req && bh_rq_in_between(bh, __rq, &q->queue_head)) - *req = __rq; - if (__rq->cmd != rw) - continue; - if (__rq->nr_sectors + count > max_sectors) - continue; - if (__rq->elevator_sequence < count) - break; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { - ret = ELEVATOR_BACK_MERGE; - *req = __rq; - break; - } else if (__rq->sector - count == bh->b_rsector) { - ret = ELEVATOR_FRONT_MERGE; - __rq->elevator_sequence -= count; - *req = __rq; + else if (__rq->elevator_sequence < count) break; - } } - return ret; + return ELEVATOR_NO_MERGE; } void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count) { - struct list_head *entry = &req->queue, *head = &q->queue_head; + struct list_head *entry; + + BUG_ON(req->q != q); /* * second pass scan of requests that got passed over, if any */ - while ((entry = entry->next) != head) { - struct request *tmp = blkdev_entry_to_request(entry); + entry = &req->queuelist; + while ((entry = entry->next) != &q->queue_head) { + struct request *tmp =list_entry(entry,struct request,queuelist); + prefetch(entry->next); tmp->elevator_sequence -= count; } } @@ -138,85 +222,100 @@ req->elevator_sequence = next->elevator_sequence; } +int elv_linus_init(request_queue_t *q, elevator_t *e) +{ + struct elv_linus_data *edata; + + edata = kmalloc(sizeof(struct elv_linus_data), GFP_ATOMIC); + if (!edata) + return -ENOMEM; + + /* + * default to doing both front and back merges + */ + edata->flags = ELV_LINUS_BACK_MERGE | ELV_LINUS_FRONT_MERGE; + e->elevator_data = edata; + return 0; +} + +void elv_linus_exit(request_queue_t *q, elevator_t *e) +{ + kfree(e->elevator_data); +} + /* * See if we can find a request that this buffer can be coalesced with. */ int elevator_noop_merge(request_queue_t *q, struct request **req, - struct list_head * head, - struct buffer_head *bh, int rw, - int max_sectors) + struct list_head * head, struct bio *bio) { - struct list_head *entry; - unsigned int count = bh->b_size >> 9; + struct request *__rq; + int count, ret; + unsigned int vc; + + count = bio_sectors(bio); + ret = ELEVATOR_NO_MERGE; + vc = q->hash_valid_counter; + + __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector - count, vc); + if (__rq) { + if (&__rq->queuelist == head) + goto front; - if (list_empty(&q->queue_head)) - return ELEVATOR_NO_MERGE; + if (!elv_rq_merge_ok(__rq, bio)) + goto front; - entry = &q->queue_head; - while ((entry = entry->prev) != head) { - struct request *__rq = blkdev_entry_to_request(entry); + if (__rq->sector + __rq->nr_sectors == bio->bi_sector) { + ret = ELEVATOR_BACK_MERGE; + goto out; + } + } - if (__rq->cmd != rw) - continue; - if (__rq->rq_dev != bh->b_rdev) - continue; - if (__rq->nr_sectors + count > max_sectors) - continue; - if (__rq->waiting) - continue; - if (__rq->sector + __rq->nr_sectors == bh->b_rsector) { - *req = __rq; - return ELEVATOR_BACK_MERGE; - } else if (__rq->sector - count == bh->b_rsector) { - *req = __rq; - return ELEVATOR_FRONT_MERGE; +front: + __rq = bio_get_hash_rq(bio->bi_dev, bio->bi_sector + count, vc); + if (__rq) { + if (&__rq->queuelist == head) + goto out; + + if (!elv_rq_merge_ok(__rq, bio)) + goto out; + + if (__rq->sector - count == bio->bi_sector) { + ret = ELEVATOR_FRONT_MERGE; + goto out; } } - *req = blkdev_entry_to_request(q->queue_head.prev); - return ELEVATOR_NO_MERGE; +out: + return ret; } void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {} void elevator_noop_merge_req(struct request *req, struct request *next) {} -int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg) +int elevator_init(request_queue_t *q, elevator_t *e, elevator_t type,char *name) { - blkelv_ioctl_arg_t output; + *e = type; - output.queue_ID = elevator->queue_ID; - output.read_latency = elevator->read_latency; - output.write_latency = elevator->write_latency; - output.max_bomb_segments = 0; + INIT_LIST_HEAD(&q->queue_head); + strncpy(e->queue_name, name, 15); - if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t))) - return -EFAULT; + if (e->elevator_init_fn) + return e->elevator_init_fn(q, e); return 0; } -int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg) +void elevator_exit(request_queue_t *q, elevator_t *e) { - blkelv_ioctl_arg_t input; - - if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t))) - return -EFAULT; - - if (input.read_latency < 0) - return -EINVAL; - if (input.write_latency < 0) - return -EINVAL; - - elevator->read_latency = input.read_latency; - elevator->write_latency = input.write_latency; - return 0; + if (e->elevator_exit_fn) + e->elevator_exit_fn(q, e); } -void elevator_init(elevator_t * elevator, elevator_t type) +int elevator_global_init(void) { - static unsigned int queue_ID; - - *elevator = type; - elevator->queue_ID = queue_ID++; + return 0; } + +module_init(elevator_global_init); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/floppy.c linux/drivers/block/floppy.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/floppy.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/floppy.c Tue Oct 30 09:48:43 2001 @@ -576,7 +576,7 @@ static struct floppy_struct *_floppy = floppy_type; static unsigned char current_drive; static long current_count_sectors; -static unsigned char sector_t; /* sector in track */ +static unsigned char fsector_t; /* sector in track */ static unsigned char in_sector_offset; /* offset within physical sector, * expressed in units of 512 bytes */ @@ -2276,8 +2276,8 @@ * logical buffer */ static void request_done(int uptodate) { - int block; unsigned long flags; + int block; probing = 0; reschedule_timeout(MAXTIMEOUT, "request done %d", uptodate); @@ -2296,7 +2296,7 @@ DRS->maxtrack = 1; /* unlock chained buffers */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&QUEUE->queue_lock, flags); while (current_count_sectors && !QUEUE_EMPTY && current_count_sectors >= CURRENT->current_nr_sectors){ current_count_sectors -= CURRENT->current_nr_sectors; @@ -2304,7 +2304,7 @@ CURRENT->sector += CURRENT->current_nr_sectors; end_request(1); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&QUEUE->queue_lock, flags); if (current_count_sectors && !QUEUE_EMPTY){ /* "unlock" last subsector */ @@ -2329,9 +2329,9 @@ DRWE->last_error_sector = CURRENT->sector; DRWE->last_error_generation = DRS->generation; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&QUEUE->queue_lock, flags); end_request(0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&QUEUE->queue_lock, flags); } } @@ -2377,7 +2377,7 @@ printk("rt=%d t=%d\n", R_TRACK, TRACK); printk("heads=%d eoc=%d\n", heads, eoc); printk("spt=%d st=%d ss=%d\n", SECT_PER_TRACK, - sector_t, ssize); + fsector_t, ssize); printk("in_sector_offset=%d\n", in_sector_offset); } #endif @@ -2424,7 +2424,7 @@ } else if (CT(COMMAND) == FD_READ){ buffer_track = raw_cmd->track; buffer_drive = current_drive; - INFBOUND(buffer_max, nr_sectors + sector_t); + INFBOUND(buffer_max, nr_sectors + fsector_t); } cont->redo(); } @@ -2432,19 +2432,19 @@ /* Compute maximal contiguous buffer size. */ static int buffer_chain_size(void) { - struct buffer_head *bh; + struct bio *bio; int size; char *base; base = CURRENT->buffer; size = CURRENT->current_nr_sectors << 9; - bh = CURRENT->bh; + bio = CURRENT->bio; - if (bh){ - bh = bh->b_reqnext; - while (bh && bh->b_data == base + size){ - size += bh->b_size; - bh = bh->b_reqnext; + if (bio){ + bio = bio->bi_next; + while (bio && bio_data(bio) == base + size){ + size += bio_size(bio); + bio = bio->bi_next; } } return size >> 9; @@ -2453,13 +2453,13 @@ /* Compute the maximal transfer size */ static int transfer_size(int ssize, int max_sector, int max_size) { - SUPBOUND(max_sector, sector_t + max_size); + SUPBOUND(max_sector, fsector_t + max_size); /* alignment */ max_sector -= (max_sector % _floppy->sect) % ssize; /* transfer size, beginning not aligned */ - current_count_sectors = max_sector - sector_t ; + current_count_sectors = max_sector - fsector_t ; return max_sector; } @@ -2470,7 +2470,7 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2) { int remaining; /* number of transferred 512-byte sectors */ - struct buffer_head *bh; + struct bio *bio; char *buffer, *dma_buffer; int size; @@ -2479,8 +2479,8 @@ CURRENT->nr_sectors); if (current_count_sectors <= 0 && CT(COMMAND) == FD_WRITE && - buffer_max > sector_t + CURRENT->nr_sectors) - current_count_sectors = minimum(buffer_max - sector_t, + buffer_max > fsector_t + CURRENT->nr_sectors) + current_count_sectors = minimum(buffer_max - fsector_t, CURRENT->nr_sectors); remaining = current_count_sectors << 9; @@ -2491,7 +2491,7 @@ printk("current_count_sectors=%ld\n", current_count_sectors); printk("remaining=%d\n", remaining >> 9); printk("CURRENT->nr_sectors=%ld\n",CURRENT->nr_sectors); - printk("CURRENT->current_nr_sectors=%ld\n", + printk("CURRENT->current_nr_sectors=%u\n", CURRENT->current_nr_sectors); printk("max_sector=%d\n", max_sector); printk("ssize=%d\n", ssize); @@ -2500,9 +2500,9 @@ buffer_max = maximum(max_sector, buffer_max); - dma_buffer = floppy_track_buffer + ((sector_t - buffer_min) << 9); + dma_buffer = floppy_track_buffer + ((fsector_t - buffer_min) << 9); - bh = CURRENT->bh; + bio = CURRENT->bio; size = CURRENT->current_nr_sectors << 9; buffer = CURRENT->buffer; @@ -2514,8 +2514,8 @@ dma_buffer < floppy_track_buffer){ DPRINT("buffer overrun in copy buffer %d\n", (int) ((floppy_track_buffer - dma_buffer) >>9)); - printk("sector_t=%d buffer_min=%d\n", - sector_t, buffer_min); + printk("fsector_t=%d buffer_min=%d\n", + fsector_t, buffer_min); printk("current_count_sectors=%ld\n", current_count_sectors); if (CT(COMMAND) == FD_READ) @@ -2536,15 +2536,15 @@ break; dma_buffer += size; - bh = bh->b_reqnext; + bio = bio->bi_next; #ifdef FLOPPY_SANITY_CHECK - if (!bh){ + if (!bio){ DPRINT("bh=null in copy buffer after copy\n"); break; } #endif - size = bh->b_size; - buffer = bh->b_data; + size = bio_size(bio); + buffer = bio_data(bio); } #ifdef FLOPPY_SANITY_CHECK if (remaining){ @@ -2636,7 +2636,7 @@ max_sector = _floppy->sect * _floppy->head; TRACK = CURRENT->sector / max_sector; - sector_t = CURRENT->sector % max_sector; + fsector_t = CURRENT->sector % max_sector; if (_floppy->track && TRACK >= _floppy->track) { if (CURRENT->current_nr_sectors & 1) { current_count_sectors = 1; @@ -2644,17 +2644,17 @@ } else return 0; } - HEAD = sector_t / _floppy->sect; + HEAD = fsector_t / _floppy->sect; if (((_floppy->stretch & FD_SWAPSIDES) || TESTF(FD_NEED_TWADDLE)) && - sector_t < _floppy->sect) + fsector_t < _floppy->sect) max_sector = _floppy->sect; /* 2M disks have phantom sectors on the first track */ if ((_floppy->rate & FD_2M) && (!TRACK) && (!HEAD)){ max_sector = 2 * _floppy->sect / 3; - if (sector_t >= max_sector){ - current_count_sectors = minimum(_floppy->sect - sector_t, + if (fsector_t >= max_sector){ + current_count_sectors = minimum(_floppy->sect - fsector_t, CURRENT->nr_sectors); return 1; } @@ -2676,7 +2676,7 @@ GAP = _floppy->gap; CODE2SIZE; SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; - SECTOR = ((sector_t % _floppy->sect) << 2 >> SIZECODE) + 1; + SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + 1; /* tracksize describes the size which can be filled up with sectors * of size ssize. @@ -2684,11 +2684,11 @@ tracksize = _floppy->sect - _floppy->sect % ssize; if (tracksize < _floppy->sect){ SECT_PER_TRACK ++; - if (tracksize <= sector_t % _floppy->sect) + if (tracksize <= fsector_t % _floppy->sect) SECTOR--; /* if we are beyond tracksize, fill up using smaller sectors */ - while (tracksize <= sector_t % _floppy->sect){ + while (tracksize <= fsector_t % _floppy->sect){ while(tracksize + ssize > _floppy->sect){ SIZECODE--; ssize >>= 1; @@ -2704,12 +2704,12 @@ max_sector = _floppy->sect; } - in_sector_offset = (sector_t % _floppy->sect) % ssize; - aligned_sector_t = sector_t - in_sector_offset; + in_sector_offset = (fsector_t % _floppy->sect) % ssize; + aligned_sector_t = fsector_t - in_sector_offset; max_size = CURRENT->nr_sectors; if ((raw_cmd->track == buffer_track) && (current_drive == buffer_drive) && - (sector_t >= buffer_min) && (sector_t < buffer_max)) { + (fsector_t >= buffer_min) && (fsector_t < buffer_max)) { /* data already in track buffer */ if (CT(COMMAND) == FD_READ) { copy_buffer(1, max_sector, buffer_max); @@ -2717,8 +2717,8 @@ } } else if (in_sector_offset || CURRENT->nr_sectors < ssize){ if (CT(COMMAND) == FD_WRITE){ - if (sector_t + CURRENT->nr_sectors > ssize && - sector_t + CURRENT->nr_sectors < ssize + ssize) + if (fsector_t + CURRENT->nr_sectors > ssize && + fsector_t + CURRENT->nr_sectors < ssize + ssize) max_size = ssize + ssize; else max_size = ssize; @@ -2731,7 +2731,7 @@ int direct, indirect; indirect= transfer_size(ssize,max_sector,max_buffer_sectors*2) - - sector_t; + fsector_t; /* * Do NOT use minimum() here---MAX_DMA_ADDRESS is 64 bits wide @@ -2746,7 +2746,7 @@ if (CROSS_64KB(CURRENT->buffer, max_size << 9)) max_size = (K_64 - ((unsigned long)CURRENT->buffer) % K_64)>>9; - direct = transfer_size(ssize,max_sector,max_size) - sector_t; + direct = transfer_size(ssize,max_sector,max_size) - fsector_t; /* * We try to read tracks, but if we get too many errors, we * go back to reading just one sector at a time. @@ -2765,8 +2765,8 @@ raw_cmd->length = current_count_sectors << 9; if (raw_cmd->length == 0){ DPRINT("zero dma transfer attempted from make_raw_request\n"); - DPRINT("indirect=%d direct=%d sector_t=%d", - indirect, direct, sector_t); + DPRINT("indirect=%d direct=%d fsector_t=%d", + indirect, direct, fsector_t); return 0; } /* check_dma_crossing(raw_cmd->kernel_data, @@ -2784,12 +2784,12 @@ /* claim buffer track if needed */ if (buffer_track != raw_cmd->track || /* bad track */ buffer_drive !=current_drive || /* bad drive */ - sector_t > buffer_max || - sector_t < buffer_min || + fsector_t > buffer_max || + fsector_t < buffer_min || ((CT(COMMAND) == FD_READ || (!in_sector_offset && CURRENT->nr_sectors >= ssize))&& max_sector > 2 * max_buffer_sectors + buffer_min && - max_size + sector_t > 2 * max_buffer_sectors + buffer_min) + max_size + fsector_t > 2 * max_buffer_sectors + buffer_min) /* not enough space */){ buffer_track = -1; buffer_drive = current_drive; @@ -2836,7 +2836,7 @@ floppy_track_buffer) >> 9), current_count_sectors); printk("st=%d ast=%d mse=%d msi=%d\n", - sector_t, aligned_sector_t, max_sector, max_size); + fsector_t, aligned_sector_t, max_sector, max_size); printk("ssize=%x SIZECODE=%d\n", ssize, SIZECODE); printk("command=%x SECTOR=%d HEAD=%d, TRACK=%d\n", COMMAND, SECTOR, HEAD, TRACK); @@ -2854,8 +2854,8 @@ raw_cmd->kernel_data + raw_cmd->length > floppy_track_buffer + (max_buffer_sectors << 10)){ DPRINT("buffer overrun in schedule dma\n"); - printk("sector_t=%d buffer_min=%d current_count=%ld\n", - sector_t, buffer_min, + printk("fsector_t=%d buffer_min=%d current_count=%ld\n", + fsector_t, buffer_min, raw_cmd->length >> 9); printk("current_count_sectors=%ld\n", current_count_sectors); @@ -2908,8 +2908,6 @@ } if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) panic(DEVICE_NAME ": request list destroyed"); - if (CURRENT->bh && !buffer_locked(CURRENT->bh)) - panic(DEVICE_NAME ": block not locked"); device = CURRENT->rq_dev; set_fdc(DRIVE(device)); @@ -4172,7 +4170,7 @@ blk_size[MAJOR_NR] = floppy_sizes; blksize_size[MAJOR_NR] = floppy_blocksizes; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST, "floppy"); reschedule_timeout(MAXTIMEOUT, "floppy init", MAXTIMEOUT); config_types(); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/genhd.c linux/drivers/block/genhd.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/genhd.c Wed Oct 24 12:49:57 2001 +++ linux/drivers/block/genhd.c Thu Oct 18 09:45:14 2001 @@ -28,14 +28,8 @@ /* * Global kernel list of partitioning information. - * - * XXX: you should _never_ access this directly. - * the only reason this is exported is source compatiblity. */ -/*static*/ struct gendisk *gendisk_head; - -EXPORT_SYMBOL(gendisk_head); - +static struct gendisk *gendisk_head; /** * add_gendisk - add partitioning information to kernel list @@ -122,6 +116,30 @@ EXPORT_SYMBOL(get_gendisk); + +unsigned long +get_start_sect(kdev_t dev) +{ + struct gendisk *gp; + + gp = get_gendisk(dev); + if (gp) + return gp->part[MINOR(dev)].start_sect; + return 0; +} + +EXPORT_SYMBOL(get_start_sect); + +unsigned long +get_nr_sects(kdev_t dev) +{ + struct gendisk *gp; + + gp = get_gendisk(dev); + if (gp) + return gp->part[MINOR(dev)].nr_sects; + return 0; +} #ifdef CONFIG_PROC_FS int diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/ida_cmd.h linux/drivers/block/ida_cmd.h --- /opt/kernel/linux-2.4.14-pre6/drivers/block/ida_cmd.h Wed Jul 25 23:12:01 2001 +++ linux/drivers/block/ida_cmd.h Thu Nov 1 11:24:06 2001 @@ -93,7 +93,7 @@ int ctlr; struct cmdlist *prev; struct cmdlist *next; - struct buffer_head *bh; + struct bio *bio; int type; } cmdlist_t; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/ll_rw_blk.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/ll_rw_blk.c Thu Nov 1 15:15:35 2001 @@ -6,6 +6,7 @@ * Elevator latency, (C) 2000 Andrea Arcangeli SuSE * Queue request tables / lock, selectable elevator, Jens Axboe * kernel-doc documentation started by NeilBrown - July2000 + * bio rewrite, highmem i/o, etc, Jens Axboe - may 2001 */ /* @@ -22,7 +23,9 @@ #include #include #include +#include #include +#include #include #include @@ -50,27 +53,13 @@ */ DECLARE_TASK_QUEUE(tq_disk); -/* - * Protect the request list against multiple users.. - * - * With this spinlock the Linux block IO subsystem is 100% SMP threaded - * from the IRQ event side, and almost 100% SMP threaded from the syscall - * side (we still have protect against block device array operations, and - * the do_request() side is casually still unsafe. The kernel lock protects - * this part currently.). - * - * there is a fair chance that things will work just OK if these functions - * are called with no global kernel lock held ... - */ -spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED; - /* This specifies how many sectors to read ahead on the disk. */ int read_ahead[MAX_BLKDEV]; /* blk_dev_struct is: - * *request_fn - * *current_request + * request_queue + * *queue */ struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */ @@ -94,42 +83,28 @@ int * blksize_size[MAX_BLKDEV]; /* - * hardsect_size contains the size of the hardware sector of a device. - * - * hardsect_size[MAJOR][MINOR] - * - * if (!hardsect_size[MAJOR]) - * then 512 bytes is assumed. - * else - * sector_size is hardsect_size[MAJOR][MINOR] - * This is currently set by some scsi devices and read by the msdos fs driver. - * Other uses may appear later. - */ -int * hardsect_size[MAX_BLKDEV]; - -/* * The following tunes the read-ahead algorithm in mm/filemap.c */ int * max_readahead[MAX_BLKDEV]; /* - * Max number of sectors per request - */ -int * max_sectors[MAX_BLKDEV]; - -/* * How many reqeusts do we allocate per queue, * and how many do we "batch" on freeing them? */ -static int queue_nr_requests, batch_requests; - -static inline int get_max_sectors(kdev_t dev) -{ - if (!max_sectors[MAJOR(dev)]) - return MAX_SECTORS; - return max_sectors[MAJOR(dev)][MINOR(dev)]; -} +int queue_nr_requests, batch_requests; +unsigned long blk_max_low_pfn, blk_max_pfn; +/** + * blk_get_queue: - return the queue that matches the given device + * @dev: device + * + * Description: + * Given a specific device, return the queue that will hold I/O + * for it. This is either a &struct blk_dev_struct lookup and a + * call to the ->queue() function defined, or the default queue + * stored in the same location. + * + **/ inline request_queue_t *blk_get_queue(kdev_t dev) { struct blk_dev_struct *bdev = blk_dev + MAJOR(dev); @@ -140,53 +115,6 @@ return &blk_dev[MAJOR(dev)].request_queue; } -static int __blk_cleanup_queue(struct request_list *list) -{ - struct list_head *head = &list->free; - struct request *rq; - int i = 0; - - while (!list_empty(head)) { - rq = list_entry(head->next, struct request, queue); - list_del(&rq->queue); - kmem_cache_free(request_cachep, rq); - i++; - }; - - if (i != list->count) - printk("request list leak!\n"); - - list->count = 0; - return i; -} - -/** - * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed - * @q: the request queue to be released - * - * Description: - * blk_cleanup_queue is the pair to blk_init_queue(). It should - * be called when a request queue is being released; typically - * when a block device is being de-registered. Currently, its - * primary task it to free all the &struct request structures that - * were allocated to the queue. - * Caveat: - * Hopefully the low level driver will have finished any - * outstanding requests first... - **/ -void blk_cleanup_queue(request_queue_t * q) -{ - int count = queue_nr_requests; - - count -= __blk_cleanup_queue(&q->rq[READ]); - count -= __blk_cleanup_queue(&q->rq[WRITE]); - - if (count) - printk("blk_cleanup_queue: leaked requests (%d)\n", count); - - memset(q, 0, sizeof(*q)); -} - /** * blk_queue_headactive - indicate whether head of request queue may be active * @q: The queue which this applies to. @@ -210,10 +138,9 @@ * * When a queue is plugged the head will be assumed to be inactive. **/ - void blk_queue_headactive(request_queue_t * q, int active) { - q->head_active = active; + set_bit(QUEUE_FLAG_HEADACTIVE, &q->queue_flags); } /** @@ -222,7 +149,7 @@ * @mfn: the alternate make_request function * * Description: - * The normal way for &struct buffer_heads to be passed to a device + * The normal way for &struct bios to be passed to a device * driver is for them to be collected into requests on a request * queue, and then to allow the device driver to select requests * off that queue when it is ready. This works well for many block @@ -234,19 +161,129 @@ * * Caveat: * The driver that does this *must* be able to deal appropriately - * with buffers in "highmemory", either by calling bh_kmap() to get - * a kernel mapping, to by calling create_bounce() to create a - * buffer in normal memory. + * with buffers in "highmemory". This can be accomplished by either calling + * bio_kmap() to get a temporary kernel mapping, or by calling + * blk_queue_bounce() to create a buffer in normal memory. **/ - void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) { + /* + * set defaults + */ + q->max_segments = MAX_SEGMENTS; q->make_request_fn = mfn; + blk_queue_max_sectors(q, MAX_SECTORS); + blk_queue_hardsect_size(q, 512); + + q->queue_state = Queue_up; + init_waitqueue_head(&q->queue_wait); + + q->queue_magic = 0x12345678; } -static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments) +/** + * blk_queue_bounce_limit - set bounce buffer limit for queue + * @q: the request queue for the device + * @dma_addr: bus address limit + * + * Description: + * Different hardware can have different requirements as to what pages + * it can do I/O directly to. A low level driver can call + * blk_queue_bounce_limit to have lower memory pages allocated as bounce + * buffers for doing I/O to pages residing above @page. By default + * the block layer sets this to the highest numbered "low" memory page. + **/ +void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) { - if (req->nr_segments < max_segments) { + unsigned long mb = dma_addr >> 20; + struct page *bounce_page = mem_map + (dma_addr >> PAGE_SHIFT); + + /* + * just make sure that no pages are considered above this one... + */ + if (dma_addr == BLK_BOUNCE_ANY) + bounce_page = (struct page *) BLK_BOUNCE_ANY; + + /* + * keep this for debugging for now... + */ + if (dma_addr != BLK_BOUNCE_HIGH) { + printk("blk: queue %p, ", q); + if (dma_addr == BLK_BOUNCE_ANY) + printk("no I/O memory limit\n"); + else + printk("I/O limit %luMb (mask %Lx)\n", mb, (u64) dma_addr); + } + + q->bounce_limit = bounce_page; +} + +/** + * blk_queue_max_sectors - set max sectors for a request for this queue + * @q: the request queue for the device + * @max_sectors: max sectors in the usual 512b unit + * + * Description: + * Enables a low level driver to set an upper limit on the size of + * received requests. + **/ +void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors) +{ + q->max_sectors = max_sectors; +} + +/** + * blk_queue_max_segments - set max segments for a request for this queue + * @q: the request queue for the device + * @max_segments: max number of segments + * + * Description: + * Enables a low level driver to set an upper limit on the number of + * data segments in a request + **/ +void blk_queue_max_segments(request_queue_t *q, unsigned short max_segments) +{ + q->max_segments = max_segments; +} + +/** + * blk_queue_hardsect_size - set hardware sector size for the queue + * @q: the request queue for the device + * @size: the hardware sector size, in bytes + * + * Description: + * This should typically be set to the lowest possible sector size + * that the hardware can operate on (possible without reverting to + * even internal read-modify-write operations). Usually the default + * of 512 covers most hardware. + **/ +void blk_queue_hardsect_size(request_queue_t *q, unsigned short size) +{ + q->hardsect_size = size; +} + +/* + * can we merge the two segments, or do we need to start a new one? + */ +inline int blk_same_segment(request_queue_t *q, struct bio *bio, + struct bio *nxt) +{ + if (!BIO_CONTIG(bio, nxt)) + return 0; + + if (BIO_PHYS_4G(bio, nxt)) + return 1; + + return 0; +} + +/* + * the standard queue merge functions, can be overridden with device + * specific ones if so desired + */ +static inline int ll_new_segment(request_queue_t *q, struct request *req) +{ + if (req->nr_segments < q->max_segments) { req->nr_segments++; return 1; } @@ -254,36 +291,69 @@ } static int ll_back_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) + struct bio *bio) { - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) + if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) + return 0; + if (blk_same_segment(q, req->biotail, bio)) return 1; - return ll_new_segment(q, req, max_segments); + + return ll_new_segment(q, req); } static int ll_front_merge_fn(request_queue_t *q, struct request *req, - struct buffer_head *bh, int max_segments) + struct bio *bio) { - if (bh->b_data + bh->b_size == req->bh->b_data) + if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) + return 0; + if (blk_same_segment(q, bio, req->bio)) return 1; - return ll_new_segment(q, req, max_segments); + + return ll_new_segment(q, req); } static int ll_merge_requests_fn(request_queue_t *q, struct request *req, - struct request *next, int max_segments) + struct request *next) { int total_segments = req->nr_segments + next->nr_segments; - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) + if (blk_same_segment(q, req->biotail, next->bio)) total_segments--; - if (total_segments > max_segments) + if (total_segments > q->max_segments) return 0; req->nr_segments = total_segments; return 1; } +/** + * blk_wake_queue - restart a queue that wasn't fully emptied at request_fn time + * @q: The &request_queue_t in question + * + * Description: + * Sometimes hardware can run out of resources, so no more commands can + * be queued. If a driver breaks out of request_fn while there are still + * requests left on there to be serviced, it will be left in a state where + * it is still unplugged but not be recalled by the block layer. + * not be replugged, and thus request_fn will be run. Once a driver has + * freed enough resources to start queueing new requests again, it must + * call blk_wake_queue to start processing again. + * + * Return: + * 0 - queue was already plugged or did not get plugged + * 1 - queue was successfully plugged + **/ +int blk_wake_queue(request_queue_t *q) +{ + if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { + queue_task(&q->plug_tq, &tq_disk); + return 1; + } + + return 0; +} + /* * "plug" the device if there are no outstanding requests: this will * force the transfer to start only after we have put all the requests @@ -292,16 +362,15 @@ * This is called with interrupts off and no requests on the queue. * (and with the request spinlock acquired) */ -static void generic_plug_device(request_queue_t *q, kdev_t dev) +static int blk_plug_device(request_queue_t *q) { /* - * no need to replug device + * common case */ - if (!list_empty(&q->queue_head) || q->plugged) - return; + if (!elv_queue_empty(q)) + return 0; - q->plugged = 1; - queue_task(&q->plug_tq, &tq_disk); + return blk_wake_queue(q); } /* @@ -309,24 +378,83 @@ */ static inline void __generic_unplug_device(request_queue_t *q) { - if (q->plugged) { - q->plugged = 0; - if (!list_empty(&q->queue_head)) + if (test_and_clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) + if (!elv_queue_empty(q)) q->request_fn(q); - } } +/** + * generic_unplug_device - fire a request queue + * @q: The &request_queue_t in question + * + * Description: + * Linux uses plugging to build bigger requests queues before letting + * the device have at them. If a queue is plugged, the I/O scheduler + * is still adding and merging requests on the queue. Once the queue + * gets unplugged (either by manually calling this function, or by + * running the tq_disk task queue), the request_fn defined for the + * queue is invoked and transfers started. + **/ void generic_unplug_device(void *data) { request_queue_t *q = (request_queue_t *) data; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); __generic_unplug_device(q); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); +} + +static int __blk_cleanup_queue(struct request_list *list) +{ + struct list_head *head = &list->free; + struct request *rq; + int i = 0; + + while (!list_empty(head)) { + rq = list_entry(head->next, struct request, queuelist); + list_del(&rq->queuelist); + kmem_cache_free(request_cachep, rq); + i++; + } + + if (i != list->count) + printk("request list leak!\n"); + + list->count = 0; + return i; +} + +/** + * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed + * @q: the request queue to be released + * + * Description: + * blk_cleanup_queue is the pair to blk_init_queue(). It should + * be called when a request queue is being released; typically + * when a block device is being de-registered. Currently, its + * primary task it to free all the &struct request structures that + * were allocated to the queue. + * Caveat: + * Hopefully the low level driver will have finished any + * outstanding requests first... + **/ +void blk_cleanup_queue(request_queue_t * q) +{ + int count = queue_nr_requests; + + count -= __blk_cleanup_queue(&q->rq[READ]); + count -= __blk_cleanup_queue(&q->rq[WRITE]); + + if (count) + printk("blk_cleanup_queue: leaked requests (%d)\n", count); + + elevator_exit(q, &q->elevator); + + memset(q, 0, sizeof(*q)); } -static void blk_init_free_list(request_queue_t *q) +static int blk_init_free_list(request_queue_t *q) { struct request *rq; int i; @@ -341,22 +469,30 @@ */ for (i = 0; i < queue_nr_requests; i++) { rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL); - if (rq == NULL) { - /* We'll get a `leaked requests' message from blk_cleanup_queue */ - printk(KERN_EMERG "blk_init_free_list: error allocating requests\n"); - break; - } + if (!rq) + goto nomem; + memset(rq, 0, sizeof(struct request)); rq->rq_status = RQ_INACTIVE; - list_add(&rq->queue, &q->rq[i&1].free); - q->rq[i&1].count++; + if (i < queue_nr_requests >> 1) { + list_add(&rq->queuelist, &q->rq[READ].free); + q->rq[READ].count++; + } else { + list_add(&rq->queuelist, &q->rq[WRITE].free); + q->rq[WRITE].count++; + } } - init_waitqueue_head(&q->wait_for_request); + init_waitqueue_head(&q->wait_for_request[READ]); + init_waitqueue_head(&q->wait_for_request[WRITE]); spin_lock_init(&q->queue_lock); + return 0; +nomem: + blk_cleanup_queue(q); + return 1; } -static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh); +static int __make_request(request_queue_t *, struct bio *); /** * blk_init_queue - prepare a request queue for use with a block device @@ -379,8 +515,8 @@ * requests on the queue, it is responsible for arranging that the requests * get dealt with eventually. * - * A global spin lock $io_request_lock must be held while manipulating the - * requests on the request queue. + * The queue spin lock must be held while manipulating the requests on the + * request queue. * * The request on the head of the queue is by default assumed to be * potentially active, and it is not considered for re-ordering or merging @@ -391,33 +527,40 @@ * blk_init_queue() must be paired with a blk_cleanup_queue() call * when the block device is deactivated (such as at module unload). **/ -void blk_init_queue(request_queue_t * q, request_fn_proc * rfn) +int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, char *name) { - INIT_LIST_HEAD(&q->queue_head); - elevator_init(&q->elevator, ELEVATOR_LINUS); - blk_init_free_list(q); + int ret; + + if (blk_init_free_list(q)) + return -ENOMEM; + + if ((ret = elevator_init(q, &q->elevator, ELEVATOR_LINUS, name))) { + blk_cleanup_queue(q); + return ret; + } + q->request_fn = rfn; q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; q->merge_requests_fn = ll_merge_requests_fn; - q->make_request_fn = __make_request; q->plug_tq.sync = 0; q->plug_tq.routine = &generic_unplug_device; q->plug_tq.data = q; - q->plugged = 0; + q->queue_flags = 0; + /* - * These booleans describe the queue properties. We set the - * default (and most common) values here. Other drivers can - * use the appropriate functions to alter the queue properties. - * as appropriate. + * by default assume old behaviour and bounce for any highmem page */ - q->plug_device_fn = generic_plug_device; - q->head_active = 1; + blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); + + blk_queue_make_request(q, __make_request); + blk_mark_headactive(q); + return 0; } -#define blkdev_free_rq(list) list_entry((list)->next, struct request, queue); +#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) /* - * Get a free request. io_request_lock must be held and interrupts + * Get a free request. queue lock must be held and interrupts * disabled on the way in. */ static inline struct request *get_request(request_queue_t *q, int rw) @@ -427,7 +570,7 @@ if (!list_empty(&rl->free)) { rq = blkdev_free_rq(&rl->free); - list_del(&rq->queue); + list_del(&rq->queuelist); rl->count--; rq->rq_status = RQ_ACTIVE; rq->special = NULL; @@ -440,38 +583,28 @@ /* * No available requests for this queue, unplug the device. */ -static struct request *__get_request_wait(request_queue_t *q, int rw) +static struct request *get_request_wait(request_queue_t *q, int rw) { - register struct request *rq; DECLARE_WAITQUEUE(wait, current); + struct request *rq; + + spin_lock_prefetch(&q->queue_lock); generic_unplug_device(q); - add_wait_queue(&q->wait_for_request, &wait); + add_wait_queue(&q->wait_for_request[rw], &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); if (q->rq[rw].count < batch_requests) schedule(); - spin_lock_irq(&io_request_lock); - rq = get_request(q,rw); - spin_unlock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); + rq = get_request(q, rw); + spin_unlock_irq(&q->queue_lock); } while (rq == NULL); - remove_wait_queue(&q->wait_for_request, &wait); + remove_wait_queue(&q->wait_for_request[rw], &wait); current->state = TASK_RUNNING; return rq; } -static inline struct request *get_request_wait(request_queue_t *q, int rw) -{ - register struct request *rq; - - spin_lock_irq(&io_request_lock); - rq = get_request(q, rw); - spin_unlock_irq(&io_request_lock); - if (rq) - return rq; - return __get_request_wait(q, rw); -} - /* RO fail safe mechanism */ static long ro_bits[MAX_BLKDEV][8]; @@ -497,8 +630,7 @@ else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31)); } -inline void drive_stat_acct (kdev_t dev, int rw, - unsigned long nr_sectors, int new_io) +void drive_stat_acct (kdev_t dev, int rw, unsigned long nr_sectors, int new_io) { unsigned int major = MAJOR(dev); unsigned int index; @@ -520,7 +652,7 @@ /* * add-request adds a request to the linked list. - * io_request_lock is held and interrupts disabled, as we muck with the + * queue lock is held and interrupts disabled, as we muck with the * request queue list. * * By this point, req->cmd is always either READ/WRITE, never READA, @@ -529,24 +661,25 @@ static inline void add_request(request_queue_t * q, struct request * req, struct list_head *insert_here) { + elevator_t *e = &q->elevator; + drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1); - if (!q->plugged && q->head_active && insert_here == &q->queue_head) { - spin_unlock_irq(&io_request_lock); + if (!blk_queue_plugged(q) && blk_queue_headlive(q) + && insert_here == &q->queue_head) BUG(); - } /* * elevator indicated where it wants this request to be * inserted at elevator_merge time */ - list_add(&req->queue, insert_here); + e->elevator_add_req_fn(q, req, insert_here); } /* - * Must be called with io_request_lock held and interrupts disabled + * Must be called with queue lock held and interrupts disabled */ -inline void blkdev_release_request(struct request *req) +void blkdev_release_request(struct request *req) { request_queue_t *q = req->q; int rw = req->cmd; @@ -559,165 +692,201 @@ * assume it has free buffers and check waiters */ if (q) { - list_add(&req->queue, &q->rq[rw].free); - if (++q->rq[rw].count >= batch_requests && waitqueue_active(&q->wait_for_request)) - wake_up(&q->wait_for_request); + list_add(&req->queuelist, &q->rq[rw].free); + if (++q->rq[rw].count >= batch_requests && waitqueue_active(&q->wait_for_request[rw])) + wake_up(&q->wait_for_request[rw]); } } /* * Has to be called with the request spinlock acquired */ -static void attempt_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) -{ - struct request *next; - - next = blkdev_next_request(req); +static void attempt_merge(request_queue_t *q, struct request *req) +{ + struct request *next = blkdev_next_request(req); + if (req->sector + req->nr_sectors != next->sector) return; + if (req->cmd != next->cmd || req->rq_dev != next->rq_dev - || req->nr_sectors + next->nr_sectors > max_sectors - || next->waiting) + || req->nr_sectors + next->nr_sectors > q->max_sectors + || next->waiting || next->special) return; + /* * If we are not allowed to merge these requests, then * return. If we are allowed to merge, then the count * will have been updated to the appropriate number, * and we shouldn't do it here too. */ - if (!q->merge_requests_fn(q, req, next, max_segments)) - return; + if (q->merge_requests_fn(q, req, next)) { + struct bio *bio; + + q->elevator.elevator_merge_req_fn(req, next); + + blkdev_dequeue_request(next); + + for (bio = next->bio; bio; bio = bio->bi_next) + bio->bi_req = req; + + req->biotail->bi_next = next->bio; + req->biotail = next->biotail; - q->elevator.elevator_merge_req_fn(req, next); - req->bhtail->b_reqnext = next->bh; - req->bhtail = next->bhtail; - req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; - list_del(&next->queue); - blkdev_release_request(next); + req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors; + + blkdev_release_request(next); + } } -static inline void attempt_back_merge(request_queue_t * q, - struct request *req, - int max_sectors, - int max_segments) +static inline void attempt_back_merge(request_queue_t *q, struct request *rq) { - if (&req->queue == q->queue_head.prev) - return; - attempt_merge(q, req, max_sectors, max_segments); + if (&rq->queuelist != q->queue_head.prev) + attempt_merge(q, rq); } -static inline void attempt_front_merge(request_queue_t * q, - struct list_head * head, - struct request *req, - int max_sectors, - int max_segments) +static inline void attempt_front_merge(request_queue_t *q, + struct list_head *head, + struct request *rq) { - struct list_head * prev; + struct list_head *prev = rq->queuelist.prev; - prev = req->queue.prev; - if (head == prev) - return; - attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments); + if (prev != head) + attempt_merge(q, blkdev_entry_to_request(prev)); +} + +static inline void __blk_attempt_remerge(request_queue_t *q, struct request *rq) +{ + if (rq->queuelist.next != &q->queue_head) + attempt_merge(q, rq); +} +/** + * blk_attempt_remerge - attempt to remerge active head with next request + * @q: The &request_queue_t belonging to the device + * @rq: The head request (usually) + * + * Description: + * For head-active devices, the queue can easily be unplugged so quickly + * that proper merging is not done on the front request. This may hurt + * performance greatly for some devices. The block layer cannot safely + * do merging on that first request for these queues, but the driver can + * call this function and make it happen any way. Only the driver knows + * when it is safe to do so. + **/ +void blk_attempt_remerge(request_queue_t *q, struct request *rq) +{ + unsigned long flags; + + spin_lock_irqsave(&q->queue_lock, flags); + __blk_attempt_remerge(q, rq); + spin_unlock_irqrestore(&q->queue_lock, flags); +} + +#if 0 +int dummy_write(struct bio *bio) +{ + if (MAJOR(bio->bi_dev) == 8) + return 0; + if (MAJOR(bio->bi_dev) == 3 && MINOR(bio->bi_dev) >= 64) + return 0; + + if (bio_data_dir(bio) == WRITE) + return 1; + + return 0; } +#endif -static int __make_request(request_queue_t * q, int rw, - struct buffer_head * bh) +static int __make_request(request_queue_t *q, struct bio *bio) { - unsigned int sector, count; - int max_segments = MAX_SEGMENTS; - struct request * req, *freereq = NULL; - int rw_ahead, max_sectors, el_ret; + struct request *req, *freereq = NULL; + int el_ret, latency = 0, rw, count, barrier; struct list_head *head, *insert_here; - int latency; elevator_t *elevator = &q->elevator; + sector_t sector; - count = bh->b_size >> 9; - sector = bh->b_rsector; + sector = bio->bi_sector; + count = bio_sectors(bio); + rw = bio_data_dir(bio); - rw_ahead = 0; /* normal case; gets changed below for READA */ - switch (rw) { - case READA: - rw_ahead = 1; - rw = READ; /* drop into READ */ - case READ: - case WRITE: - latency = elevator_request_latency(elevator, rw); - break; - default: - BUG(); - goto end_io; - } + latency = elevator_request_latency(elevator, rw); - /* We'd better have a real physical mapping! - Check this bit only if the buffer was dirty and just locked - down by us so at this point flushpage will block and - won't clear the mapped bit under us. */ - if (!buffer_mapped(bh)) - BUG(); + barrier = test_bit(BIO_BARRIER, &bio->bi_flags); /* - * Temporary solution - in 2.5 this will be done by the lowlevel - * driver. Create a bounce buffer if the buffer data points into - * high memory - keep the original buffer otherwise. + * low level driver can indicate that it wants pages above a + * certain limit bounced to low memory (ie for highmem, or even + * ISA dma in theory) */ -#if CONFIG_HIGHMEM - bh = create_bounce(rw, bh); -#endif + blk_queue_bounce(q, &bio); -/* look for a free request. */ - /* - * Try to coalesce the new request with old requests - */ - max_sectors = get_max_sectors(bh->b_rdev); + spin_lock_prefetch(&q->queue_lock); + +#if 0 + if (dummy_write(bio)) { + set_bit(BIO_UPTODATE, &bio->bi_flags); + goto end_io; + } +#endif again: req = NULL; head = &q->queue_head; + + spin_lock_irq(&q->queue_lock); + /* - * Now we acquire the request spinlock, we have to be mega careful - * not to schedule or do something nonatomic + * barrier write must not be passed - so insert with 0 latency at + * the back of the queue and invalidate the entire existing merge hash + * for this device */ - spin_lock_irq(&io_request_lock); + if (barrier && !freereq) { + latency = 0; + bio_hash_invalidate(q, bio->bi_dev); + } insert_here = head->prev; - if (list_empty(head)) { - q->plug_device_fn(q, bh->b_rdev); /* is atomic */ + if (blk_plug_device(q) || barrier) goto get_rq; - } else if (q->head_active && !q->plugged) + else if (blk_queue_headlive(q) && !blk_queue_plugged(q)) head = head->next; - el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,max_sectors); + el_ret = elevator->elevator_merge_fn(q, &req, head, bio); switch (el_ret) { - case ELEVATOR_BACK_MERGE: - if (!q->back_merge_fn(q, req, bh, max_segments)) + if (!q->back_merge_fn(q, req, bio)) break; + if (req->biotail->bi_sector + bio_sectors(req->biotail) != bio->bi_sector) + BUG(); elevator->elevator_merge_cleanup_fn(q, req, count); - req->bhtail->b_reqnext = bh; - req->bhtail = bh; + BUG_ON(bio_data_dir(bio) != bio_data_dir(req->biotail)); + req->biotail->bi_next = bio; + req->biotail = bio; req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); - attempt_back_merge(q, req, max_sectors, max_segments); + attempt_back_merge(q, req); goto out; case ELEVATOR_FRONT_MERGE: - if (!q->front_merge_fn(q, req, bh, max_segments)) + if (!q->front_merge_fn(q, req, bio)) break; + if (bio->bi_sector + bio_sectors(bio) != req->bio->bi_sector) + BUG(); elevator->elevator_merge_cleanup_fn(q, req, count); - bh->b_reqnext = req->bh; - req->bh = bh; - req->buffer = bh->b_data; - req->current_nr_sectors = count; + BUG_ON(bio_data_dir(bio) != bio_data_dir(req->bio)); + bio->bi_next = req->bio; + req->bio = bio; + /* + * may not be valid. if the low level driver said + * it didn't need a bounce buffer then it better + * not touch req->buffer either... + */ + req->buffer = bio_data(bio); + req->current_nr_sectors = req->hard_cur_sectors = count; req->sector = req->hard_sector = sector; req->nr_sectors = req->hard_nr_sectors += count; - blk_started_io(count); drive_stat_acct(req->rq_dev, req->cmd, count, 0); - attempt_front_merge(q, head, req, max_sectors, max_segments); + attempt_front_merge(q, head, req); goto out; /* @@ -730,14 +899,14 @@ * of the queue */ if (req) - insert_here = &req->queue; + insert_here = &req->queuelist; break; default: printk("elevator returned crap (%d)\n", el_ret); BUG(); } - + /* * Grab a free request from the freelist - if that is empty, check * if we are doing read ahead and abort instead of blocking for @@ -748,107 +917,134 @@ req = freereq; freereq = NULL; } else if ((req = get_request(q, rw)) == NULL) { - spin_unlock_irq(&io_request_lock); - if (rw_ahead) + + spin_unlock_irq(&q->queue_lock); + + /* + * READA bit set + */ + if (bio->bi_rw & RWA_MASK) { + set_bit(BIO_RW_BLOCK, &bio->bi_flags); goto end_io; + } - freereq = __get_request_wait(q, rw); + freereq = get_request_wait(q, rw); goto again; } -/* fill up the request-info, and add it to the queue */ + + /* + * fill up the request-info, and add it to the queue + */ req->elevator_sequence = latency; req->cmd = rw; req->errors = 0; req->hard_sector = req->sector = sector; req->hard_nr_sectors = req->nr_sectors = count; - req->current_nr_sectors = count; - req->nr_segments = 1; /* Always 1 for a new request. */ - req->nr_hw_segments = 1; /* Always 1 for a new request. */ - req->buffer = bh->b_data; + req->current_nr_sectors = req->hard_cur_sectors = count; + req->nr_segments = 1; /* Always 1 for a new request. */ + req->nr_hw_segments = 1; /* Always 1 for a new request. */ + req->buffer = bio_data(bio); /* see ->buffer comment above */ req->waiting = NULL; - req->bh = bh; - req->bhtail = bh; - req->rq_dev = bh->b_rdev; - blk_started_io(count); + req->bio = req->biotail = bio; + req->rq_dev = bio->bi_dev; add_request(q, req, insert_here); out: if (freereq) blkdev_release_request(freereq); - spin_unlock_irq(&io_request_lock); + + bio->bi_req = req; + spin_unlock_irq(&q->queue_lock); + bio_hash_add_unique(bio, q->hash_valid_counter); return 0; + end_io: - bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); + bio->bi_end_io(bio); return 0; } + +/* + * If bio->bi_dev is a partition, remap the location + */ +static inline void blk_partition_remap(struct bio *bio) +{ + int major, minor, drive, minor0; + struct gendisk *g; + kdev_t dev0; + + BUG_ON(test_bit(BIO_HASHED, &bio->bi_flags)); + + major = MAJOR(bio->bi_dev); + if ((g = get_gendisk(bio->bi_dev))) { + minor = MINOR(bio->bi_dev); + drive = (minor >> g->minor_shift); + minor0 = (drive << g->minor_shift); /* whole disk device */ + /* that is, minor0 = (minor & ~((1<minor_shift)-1)); */ + dev0 = MKDEV(major, minor0); + if (dev0 != bio->bi_dev) { + bio->bi_dev = dev0; + bio->bi_sector += g->part[minor].start_sect; + } + /* lots of checks are possible */ + } +} + /** - * generic_make_request: hand a buffer head to it's device driver for I/O - * @rw: READ, WRITE, or READA - what sort of I/O is desired. - * @bh: The buffer head describing the location in memory and on the device. + * generic_make_request: hand a buffer to it's device driver for I/O + * @bio: The bio describing the location in memory and on the device. * * generic_make_request() is used to make I/O requests of block - * devices. It is passed a &struct buffer_head and a &rw value. The - * %READ and %WRITE options are (hopefully) obvious in meaning. The - * %READA value means that a read is required, but that the driver is - * free to fail the request if, for example, it cannot get needed - * resources immediately. + * devices. It is passed a &struct bio, which describes the I/O that needs + * to be done. * * generic_make_request() does not return any status. The * success/failure status of the request, along with notification of - * completion, is delivered asynchronously through the bh->b_end_io + * completion, is delivered asynchronously through the bio->bi_end_io * function described (one day) else where. * - * The caller of generic_make_request must make sure that b_page, - * b_addr, b_size are set to describe the memory buffer, that b_rdev - * and b_rsector are set to describe the device address, and the - * b_end_io and optionally b_private are set to describe how - * completion notification should be signaled. BH_Mapped should also - * be set (to confirm that b_dev and b_blocknr are valid). - * - * generic_make_request and the drivers it calls may use b_reqnext, - * and may change b_rdev and b_rsector. So the values of these fields + * The caller of generic_make_request must make sure that bi_io_vec + * are set to describe the memory buffer, and that bi_dev and bi_sector are + & set to describe the device address, and the + * bi_end_io and optionally bi_private are set to describe how + * completion notification should be signaled. + * + * generic_make_request and the drivers it calls may use bi_next if this + * bio happens to be merged with someone else, and may change bi_dev and + * bi_rsector for remaps as it sees fit. So the values of these fields * should NOT be depended on after the call to generic_make_request. - * Because of this, the caller should record the device address - * information in b_dev and b_blocknr. * - * Apart from those fields mentioned above, no other fields, and in - * particular, no other flags, are changed by generic_make_request or - * any lower level drivers. * */ -void generic_make_request (int rw, struct buffer_head * bh) +void generic_make_request(struct bio *bio) { - int major = MAJOR(bh->b_rdev); - int minorsize = 0; + int major = MAJOR(bio->bi_dev); + int minor = MINOR(bio->bi_dev); request_queue_t *q; + sector_t minorsize = 0; - if (!bh->b_end_io) - BUG(); - - /* Test device size, when known. */ + /* Test device or partition size, when known. */ if (blk_size[major]) - minorsize = blk_size[major][MINOR(bh->b_rdev)]; + minorsize = blk_size[major][minor]; if (minorsize) { unsigned long maxsector = (minorsize << 1) + 1; - unsigned long sector = bh->b_rsector; - unsigned int count = bh->b_size >> 9; + unsigned long sector = bio->bi_sector; + unsigned int count = bio_sectors(bio); if (maxsector < count || maxsector - count < sector) { - /* Yecch */ - bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped); - - /* This may well happen - the kernel calls bread() - without checking the size of the device, e.g., - when mounting a device. */ - printk(KERN_INFO - "attempt to access beyond end of device\n"); - printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n", - kdevname(bh->b_rdev), rw, - (sector + count)>>1, minorsize); - - /* Yecch again */ - bh->b_end_io(bh, 0); - return; + if (blk_size[major][minor]) { + + /* This may well happen - the kernel calls + * bread() without checking the size of the + * device, e.g., when mounting a device. */ + printk(KERN_INFO + "attempt to access beyond end of device\n"); + printk(KERN_INFO "%s: rw=%ld, want=%ld, limit=%Lu\n", + kdevname(bio->bi_dev), bio->bi_rw, + (sector + count)>>1, + (u64) blk_size[major][minor]); + } + set_bit(BIO_EOF, &bio->bi_flags); + goto end_io; } } @@ -856,63 +1052,129 @@ * Resolve the mapping until finished. (drivers are * still free to implement/resolve their own stacking * by explicitly returning 0) - */ - /* NOTE: we don't repeat the blk_size check for each new device. + * + * NOTE: we don't repeat the blk_size check for each new device. * Stacking drivers are expected to know what they are doing. */ do { - q = blk_get_queue(bh->b_rdev); + enum blk_queue_state state; + + q = blk_get_queue(bio->bi_dev); if (!q) { printk(KERN_ERR - "generic_make_request: Trying to access " - "nonexistent block-device %s (%ld)\n", - kdevname(bh->b_rdev), bh->b_rsector); - buffer_IO_error(bh); + "generic_make_request: Trying to access nonexistent block-device %s (%Lu)\n", + kdevname(bio->bi_dev), (u64) bio->bi_sector); +end_io: + bio->bi_end_io(bio); break; } - } while (q->make_request_fn(q, rw, bh)); + + BUG_ON(q->queue_magic != 0x12345678); + + /* + * change state to flush queue, for instance + */ + state = q->queue_state; + while (state != Queue_up) { + BUG(); /* just testing... */ + wait_event(q->queue_wait, q->queue_state != state); + state = q->queue_state; + } + + /* + * If this device has partitions, remap block n + * of partition p to block n+start(p) of the disk. + */ + blk_partition_remap(bio); + + } while (q->make_request_fn(q, bio)); } +/* + * our default bio end_io callback handler for a buffer_head mapping. + */ +static void end_bio_bh_io_sync(struct bio *bio) +{ + struct buffer_head *bh = bio->bi_private; + + bio_hash_remove(bio); + bio->bi_req = NULL; + + bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags)); + bio_put(bio); +} /** - * submit_bh: submit a buffer_head to the block device later for I/O + * submit_bio: submit a bio to the block device layer for I/O * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) - * @bh: The &struct buffer_head which describes the I/O + * @bio: The &struct bio which describes the I/O * - * submit_bh() is very similar in purpose to generic_make_request(), and - * uses that function to do most of the work. + * submit_bio() is very similar in purpose to generic_make_request(), and + * uses that function to do most of the work. Both are fairly rough + * interfaces, @bio must be presetup and ready for I/O. * - * The extra functionality provided by submit_bh is to determine - * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev. - * This is is appropriate for IO requests that come from the buffer - * cache and page cache which (currently) always use aligned blocks. */ -void submit_bh(int rw, struct buffer_head * bh) +int submit_bio(int rw, struct bio *bio) { - int count = bh->b_size >> 9; + int count = bio_sectors(bio); + + /* + * do some validity checks... + */ + BUG_ON(!bio->bi_end_io); - if (!test_bit(BH_Lock, &bh->b_state)) + if (bio_size(bio) > PAGE_SIZE) { + printk("bio: invalid size %d\n", bio_size(bio)); + BUG(); + } else if ((bio_offset(bio) + bio_size(bio)) > PAGE_SIZE) { + printk("bio: size/off %d/%d\n", bio_size(bio), bio_offset(bio)); BUG(); + } + + bio->bi_rw = rw; + + if (rw & WRITE) + kstat.pgpgout += count; + else + kstat.pgpgin += count; + + generic_make_request(bio); + return 1; +} + +/** + * submit_bh: submit a buffer_head to the block device layer for I/O + * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) + * @bh: The &struct buffer_head which describes the I/O + * + **/ +int submit_bh(int rw, struct buffer_head * bh) +{ + struct bio *bio; + + BUG_ON(!test_bit(BH_Lock, &bh->b_state)); + BUG_ON(!buffer_mapped(bh)); + BUG_ON(!bh->b_end_io); set_bit(BH_Req, &bh->b_state); /* - * First step, 'identity mapping' - RAID or LVM might - * further remap this. + * from here on down, it's all bio -- do the initial mapping, + * submit_bio -> generic_make_request may further map this bio around */ - bh->b_rdev = bh->b_dev; - bh->b_rsector = bh->b_blocknr * count; + bio = bio_alloc(GFP_NOIO); - generic_make_request(rw, bh); + bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_dev = bh->b_dev; + bio->bi_next = NULL; + bio->bi_private = bh; + bio->bi_end_io = end_bio_bh_io_sync; + + bio->bi_io_vec.bv_page = bh->b_page; + bio->bi_io_vec.bv_len = bh->b_size; + bio->bi_io_vec.bv_offset = bh_offset(bh); - switch (rw) { - case WRITE: - kstat.pgpgout += count; - break; - default: - kstat.pgpgin += count; - break; - } + return submit_bio(rw, bio); } /** @@ -944,8 +1206,9 @@ * * Caveat: * All of the buffers must be for the same device, and must also be - * of the current approved size for the device. */ - + * a multiple of the current approved size for the device. + * + **/ void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]) { unsigned int major; @@ -963,7 +1226,7 @@ /* Verify requested block sizes. */ for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (bh->b_size % correct_size) { + if (bh->b_size & (correct_size - 1)) { printk(KERN_NOTICE "ll_rw_block: device %s: " "only %d-char blocks implemented (%u)\n", kdevname(bhs[0]->b_dev), @@ -1024,12 +1287,11 @@ extern int stram_device_init (void); #endif - /** * end_that_request_first - end I/O on one buffer. + * &q: queue that finished request * @req: the request being processed * @uptodate: 0 for I/O error - * @name: the name printed for an I/O error * * Description: * Ends I/O on the first buffer attached to @req, and sets it up @@ -1038,40 +1300,42 @@ * Return: * 0 - we are done with this request, call end_that_request_last() * 1 - still buffers pending for this request - * - * Caveat: - * Drivers implementing their own end_request handling must call - * blk_finished_io() appropriately. **/ -int end_that_request_first (struct request *req, int uptodate, char *name) +int end_that_request_first(struct request *req, int uptodate) { - struct buffer_head * bh; + struct bio *bio; int nsect; req->errors = 0; if (!uptodate) - printk("end_request: I/O error, dev %s (%s), sector %lu\n", - kdevname(req->rq_dev), name, req->sector); + printk("end_request: I/O error, dev %s, sector %lu\n", + kdevname(req->rq_dev), req->sector); - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; - blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { + if ((bio = req->bio) != NULL) { + int rw = bio_data_dir(bio); + if (rw != req->cmd) + BUG(); + nsect = bio_sectors(bio); + req->bio = bio->bi_next; + bio->bi_next = NULL; + bio_endio(bio, uptodate); + if ((bio = req->bio) != NULL) { + if (rw != bio_data_dir(bio)) + BUG(); req->hard_sector += nsect; req->hard_nr_sectors -= nsect; req->sector = req->hard_sector; req->nr_sectors = req->hard_nr_sectors; - req->current_nr_sectors = bh->b_size >> 9; + req->current_nr_sectors = bio_sectors(bio); + req->hard_cur_sectors = req->current_nr_sectors; if (req->nr_sectors < req->current_nr_sectors) { - req->nr_sectors = req->current_nr_sectors; printk("end_request: buffer-list destroyed\n"); + printk("%s: %lu < %u\n", kdevname(req->rq_dev), req->nr_sectors, req->current_nr_sectors); + req->nr_sectors = req->current_nr_sectors; } - req->buffer = bh->b_data; + req->buffer = bio_data(bio); return 1; } } @@ -1080,9 +1344,7 @@ void end_that_request_last(struct request *req) { - if (req->waiting != NULL) - complete(req->waiting); - + complete(req->waiting); blkdev_release_request(req); } @@ -1105,7 +1367,6 @@ memset(ro_bits,0,sizeof(ro_bits)); memset(max_readahead, 0, sizeof(max_readahead)); - memset(max_sectors, 0, sizeof(max_sectors)); total_ram = nr_free_pages() << (PAGE_SHIFT - 10); @@ -1115,122 +1376,30 @@ */ queue_nr_requests = 64; if (total_ram > MB(32)) - queue_nr_requests = 128; + queue_nr_requests = 256; /* * Batch frees according to queue length */ - batch_requests = queue_nr_requests/4; + batch_requests = queue_nr_requests / 4; printk("block: %d slots per queue, batch=%d\n", queue_nr_requests, batch_requests); -#ifdef CONFIG_AMIGA_Z2RAM - z2_init(); -#endif -#ifdef CONFIG_STRAM_SWAP - stram_device_init(); -#endif -#ifdef CONFIG_BLK_DEV_RAM - rd_init(); -#endif -#ifdef CONFIG_ISP16_CDI - isp16_init(); -#endif + blk_max_low_pfn = max_low_pfn; + blk_max_pfn = max_pfn; + #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE) ide_init(); /* this MUST precede hd_init */ #endif #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD) hd_init(); #endif -#ifdef CONFIG_BLK_DEV_PS2 - ps2esdi_init(); -#endif -#ifdef CONFIG_BLK_DEV_XD - xd_init(); -#endif -#ifdef CONFIG_BLK_DEV_MFM - mfm_init(); -#endif -#ifdef CONFIG_PARIDE - { extern void paride_init(void); paride_init(); }; -#endif -#ifdef CONFIG_MAC_FLOPPY - swim3_init(); -#endif -#ifdef CONFIG_BLK_DEV_SWIM_IOP - swimiop_init(); -#endif -#ifdef CONFIG_AMIGA_FLOPPY - amiga_floppy_init(); -#endif -#ifdef CONFIG_ATARI_FLOPPY - atari_floppy_init(); -#endif -#ifdef CONFIG_BLK_DEV_FD - floppy_init(); -#else #if defined(__i386__) /* Do we even need this? */ outb_p(0xc, 0x3f2); #endif -#endif -#ifdef CONFIG_CDU31A - cdu31a_init(); -#endif -#ifdef CONFIG_ATARI_ACSI - acsi_init(); -#endif -#ifdef CONFIG_MCD - mcd_init(); -#endif -#ifdef CONFIG_MCDX - mcdx_init(); -#endif -#ifdef CONFIG_SBPCD - sbpcd_init(); -#endif -#ifdef CONFIG_AZTCD - aztcd_init(); -#endif -#ifdef CONFIG_CDU535 - sony535_init(); -#endif -#ifdef CONFIG_GSCD - gscd_init(); -#endif -#ifdef CONFIG_CM206 - cm206_init(); -#endif -#ifdef CONFIG_OPTCD - optcd_init(); -#endif -#ifdef CONFIG_SJCD - sjcd_init(); -#endif -#ifdef CONFIG_APBLOCK - ap_init(); -#endif -#ifdef CONFIG_DDV - ddv_init(); -#endif -#ifdef CONFIG_MDISK - mdisk_init(); -#endif -#ifdef CONFIG_DASD - dasd_init(); -#endif -#if defined(CONFIG_S390_TAPE) && defined(CONFIG_S390_TAPE_BLOCK) - tapeblock_init(); -#endif -#ifdef CONFIG_BLK_DEV_XPRAM - xpram_init(); -#endif -#ifdef CONFIG_SUN_JSFLASH - jsfd_init(); -#endif return 0; }; -EXPORT_SYMBOL(io_request_lock); EXPORT_SYMBOL(end_that_request_first); EXPORT_SYMBOL(end_that_request_last); EXPORT_SYMBOL(blk_init_queue); @@ -1238,6 +1407,13 @@ EXPORT_SYMBOL(blk_cleanup_queue); EXPORT_SYMBOL(blk_queue_headactive); EXPORT_SYMBOL(blk_queue_make_request); +EXPORT_SYMBOL(blk_queue_bounce_limit); EXPORT_SYMBOL(generic_make_request); EXPORT_SYMBOL(blkdev_release_request); EXPORT_SYMBOL(generic_unplug_device); +EXPORT_SYMBOL(blk_wake_queue); +EXPORT_SYMBOL(blk_attempt_remerge); +EXPORT_SYMBOL(blk_max_low_pfn); +EXPORT_SYMBOL(blk_queue_max_sectors); +EXPORT_SYMBOL(blk_queue_max_segments); +EXPORT_SYMBOL(blk_queue_hardsect_size); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/loop.c linux/drivers/block/loop.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/loop.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/loop.c Tue Oct 30 09:48:43 2001 @@ -76,7 +76,7 @@ #define MAJOR_NR LOOP_MAJOR static int max_loop = 8; -static struct loop_device *loop_dev; +static struct loop_device *loop_dev, **loop_lookup; static int *loop_sizes; static int *loop_blksizes; static devfs_handle_t devfs_handle; /* For the directory */ @@ -168,8 +168,7 @@ lo->lo_device); } -static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_send(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ struct address_space *mapping = file->f_dentry->d_inode->i_mapping; @@ -182,8 +181,8 @@ index = pos >> PAGE_CACHE_SHIFT; offset = pos & (PAGE_CACHE_SIZE - 1); - len = bh->b_size; - data = bh->b_data; + len = bio_size(bio); + data = bio_data(bio); while (len > 0) { int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize; size = PAGE_CACHE_SIZE - offset; @@ -255,18 +254,17 @@ return size; } -static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos) { struct lo_read_data cookie; read_descriptor_t desc; struct file *file; cookie.lo = lo; - cookie.data = bh->b_data; + cookie.data = bio_data(bio); cookie.bsize = bsize; desc.written = 0; - desc.count = bh->b_size; + desc.count = bio_size(bio); desc.buf = (char*)&cookie; desc.error = 0; spin_lock_irq(&lo->lo_lock); @@ -302,46 +300,46 @@ return IV; } -static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw) +static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) { loff_t pos; int ret; - pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset; + pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; - if (rw == WRITE) - ret = lo_send(lo, bh, loop_get_bs(lo), pos); + if (bio_rw(bio) == WRITE) + ret = lo_send(lo, bio, loop_get_bs(lo), pos); else - ret = lo_receive(lo, bh, loop_get_bs(lo), pos); + ret = lo_receive(lo, bio, loop_get_bs(lo), pos); return ret; } -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate); -static void loop_put_buffer(struct buffer_head *bh) +static void loop_end_io_transfer(struct bio *); +static void loop_put_buffer(struct bio *bio) { /* - * check b_end_io, may just be a remapped bh and not an allocated one + * check bi_end_io, may just be a remapped bio */ - if (bh && bh->b_end_io == loop_end_io_transfer) { - __free_page(bh->b_page); - kmem_cache_free(bh_cachep, bh); + if (bio && bio->bi_end_io == loop_end_io_transfer) { + __free_page(bio_page(bio)); + bio_put(bio); } } /* - * Add buffer_head to back of pending list + * Add bio to back of pending list */ -static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh) +static void loop_add_bio(struct loop_device *lo, struct bio *bio) { unsigned long flags; spin_lock_irqsave(&lo->lo_lock, flags); - if (lo->lo_bhtail) { - lo->lo_bhtail->b_reqnext = bh; - lo->lo_bhtail = bh; + if (lo->lo_biotail) { + lo->lo_biotail->bi_next = bio; + lo->lo_biotail = bio; } else - lo->lo_bh = lo->lo_bhtail = bh; + lo->lo_bio = lo->lo_biotail = bio; spin_unlock_irqrestore(&lo->lo_lock, flags); up(&lo->lo_bh_mutex); @@ -350,70 +348,58 @@ /* * Grab first pending buffer */ -static struct buffer_head *loop_get_bh(struct loop_device *lo) +static struct bio *loop_get_bio(struct loop_device *lo) { - struct buffer_head *bh; + struct bio *bio; spin_lock_irq(&lo->lo_lock); - if ((bh = lo->lo_bh)) { - if (bh == lo->lo_bhtail) - lo->lo_bhtail = NULL; - lo->lo_bh = bh->b_reqnext; - bh->b_reqnext = NULL; + if ((bio = lo->lo_bio)) { + if (bio == lo->lo_biotail) + lo->lo_biotail = NULL; + lo->lo_bio = bio->bi_next; + bio->bi_next = NULL; } spin_unlock_irq(&lo->lo_lock); - return bh; + return bio; } /* - * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE - * and lo->transfer stuff has already been done. if not, it was a READ - * so queue it for the loop thread and let it do the transfer out of - * b_end_io context (we don't want to do decrypt of a page with irqs + * if this was a WRITE lo->transfer stuff has already been done. for READs, + * queue it for the loop thread and let it do the transfer out of + * bi_end_io context (we don't want to do decrypt of a page with irqs * disabled) */ -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate) +static void loop_end_io_transfer(struct bio *bio) { - struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)]; + struct loop_device *lo = loop_lookup[MINOR(bio->bi_dev)]; + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) { - struct buffer_head *rbh = bh->b_private; + if (!uptodate || bio_rw(bio) == WRITE) { + struct bio *rbh = bio->bi_private; - rbh->b_end_io(rbh, uptodate); + bio_endio(rbh, uptodate); if (atomic_dec_and_test(&lo->lo_pending)) up(&lo->lo_bh_mutex); - loop_put_buffer(bh); + loop_put_buffer(bio); } else - loop_add_bh(lo, bh); + loop_add_bio(lo, bio); } -static struct buffer_head *loop_get_buffer(struct loop_device *lo, - struct buffer_head *rbh) +static struct bio *loop_get_buffer(struct loop_device *lo, struct bio *rbh) { - struct buffer_head *bh; + struct page *page; + struct bio *bio; /* * for xfer_funcs that can operate on the same bh, do that */ if (lo->lo_flags & LO_FLAGS_BH_REMAP) { - bh = rbh; + bio = rbh; goto out_bh; } - do { - bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO); - if (bh) - break; - - run_task_queue(&tq_disk); - schedule_timeout(HZ); - } while (1); - memset(bh, 0, sizeof(*bh)); - - bh->b_size = rbh->b_size; - bh->b_dev = rbh->b_rdev; - bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock); + bio = bio_alloc(GFP_NOIO); /* * easy way out, although it does waste some memory for < PAGE_SIZE @@ -421,41 +407,42 @@ * so can we :-) */ do { - bh->b_page = alloc_page(GFP_NOIO); - if (bh->b_page) + page = alloc_page(GFP_NOIO); + if (page) break; run_task_queue(&tq_disk); schedule_timeout(HZ); } while (1); - bh->b_data = page_address(bh->b_page); - bh->b_end_io = loop_end_io_transfer; - bh->b_private = rbh; - init_waitqueue_head(&bh->b_wait); + bio->bi_io_vec.bv_page = page; + bio->bi_io_vec.bv_len = bio_size(rbh); + bio->bi_io_vec.bv_offset = bio_offset(rbh); + + bio->bi_end_io = loop_end_io_transfer; + bio->bi_private = rbh; out_bh: - bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9); + bio->bi_sector = rbh->bi_sector + (lo->lo_offset >> 9); + bio->bi_rw = rbh->bi_rw; spin_lock_irq(&lo->lo_lock); - bh->b_rdev = lo->lo_device; + bio->bi_dev = lo->lo_device; spin_unlock_irq(&lo->lo_lock); - return bh; + return bio; } -static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh) +static int loop_make_request(request_queue_t *q, struct bio *rbh) { - struct buffer_head *bh = NULL; + struct bio *bh = NULL; struct loop_device *lo; unsigned long IV; + int rw = bio_rw(rbh); - if (!buffer_locked(rbh)) - BUG(); - - if (MINOR(rbh->b_rdev) >= max_loop) + if (MINOR(rbh->bi_dev) >= max_loop) goto out; - lo = &loop_dev[MINOR(rbh->b_rdev)]; + lo = &loop_dev[MINOR(rbh->bi_dev)]; spin_lock_irq(&lo->lo_lock); if (lo->lo_state != Lo_bound) goto inactive; @@ -468,25 +455,17 @@ } else if (rw == READA) { rw = READ; } else if (rw != READ) { - printk(KERN_ERR "loop: unknown command (%d)\n", rw); + printk(KERN_ERR "loop: unknown command (%x)\n", rw); goto err; } -#if CONFIG_HIGHMEM - rbh = create_bounce(rw, rbh); -#endif + blk_queue_bounce(q, &rbh); /* * file backed, queue for loop_thread to handle */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - /* - * rbh locked at this point, noone else should clear - * the dirty flag - */ - if (rw == WRITE) - set_bit(BH_Dirty, &rbh->b_state); - loop_add_bh(lo, rbh); + loop_add_bio(lo, rbh); return 0; } @@ -494,15 +473,14 @@ * piggy old buffer on original, and submit for I/O */ bh = loop_get_buffer(lo, rbh); - IV = loop_get_iv(lo, rbh->b_rsector); + IV = loop_get_iv(lo, rbh->bi_sector); if (rw == WRITE) { - set_bit(BH_Dirty, &bh->b_state); - if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data, - bh->b_size, IV)) + if (lo_do_transfer(lo, WRITE, bio_data(bh), bio_data(rbh), + bio_size(bh), IV)) goto err; } - generic_make_request(rw, bh); + generic_make_request(bh); return 0; err: @@ -510,14 +488,14 @@ up(&lo->lo_bh_mutex); loop_put_buffer(bh); out: - buffer_IO_error(rbh); + bio_io_error(rbh); return 0; inactive: spin_unlock_irq(&lo->lo_lock); goto out; } -static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh) +static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) { int ret; @@ -525,19 +503,17 @@ * For block backed loop, we know this is a READ */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state); - - ret = do_bh_filebacked(lo, bh, rw); - bh->b_end_io(bh, !ret); + ret = do_bio_filebacked(lo, bio); + bio_endio(bio, !ret); } else { - struct buffer_head *rbh = bh->b_private; - unsigned long IV = loop_get_iv(lo, rbh->b_rsector); + struct bio *rbh = bio->bi_private; + unsigned long IV = loop_get_iv(lo, rbh->bi_sector); - ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data, - bh->b_size, IV); + ret = lo_do_transfer(lo, READ, bio_data(bio), bio_data(rbh), + bio_size(bio), IV); - rbh->b_end_io(rbh, !ret); - loop_put_buffer(bh); + bio_endio(rbh, !ret); + loop_put_buffer(bio); } } @@ -550,7 +526,7 @@ static int loop_thread(void *data) { struct loop_device *lo = data; - struct buffer_head *bh; + struct bio *bio; daemonize(); exit_files(current); @@ -584,12 +560,12 @@ if (!atomic_read(&lo->lo_pending)) break; - bh = loop_get_bh(lo); - if (!bh) { - printk("loop: missing bh\n"); + bio = loop_get_bio(lo); + if (!bio) { + printk("loop: missing bio\n"); continue; } - loop_handle_bh(lo, bh); + loop_handle_bio(lo, bio); /* * upped both for pending work and tear-down, lo_pending @@ -666,6 +642,7 @@ figure_loop_size(lo); lo->old_gfp_mask = inode->i_mapping->gfp_mask; inode->i_mapping->gfp_mask = GFP_NOIO; + loop_lookup[MINOR(lo_device)] = lo; bs = 0; if (blksize_size[MAJOR(lo_device)]) @@ -675,7 +652,7 @@ set_blocksize(dev, bs); - lo->lo_bh = lo->lo_bhtail = NULL; + lo->lo_bio = lo->lo_biotail = NULL; kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); down(&lo->lo_sem); @@ -865,7 +842,7 @@ err = -ENXIO; break; } - err = put_user((unsigned long)loop_sizes[lo->lo_number] << 1, (unsigned long *) arg); + err = put_user((unsigned long) loop_sizes[lo->lo_number] << 1, (unsigned long *) arg); break; case BLKGETSIZE64: if (lo->lo_state != Lo_bound) { @@ -1009,13 +986,17 @@ if (!loop_dev) return -ENOMEM; + loop_lookup = kmalloc(max_loop*sizeof(struct loop_device *),GFP_KERNEL); + if (!loop_lookup) + goto out_mem; + loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_sizes) - goto out_sizes; + goto out_mem; loop_blksizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); if (!loop_blksizes) - goto out_blksizes; + goto out_mem; blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request); @@ -1031,6 +1012,7 @@ memset(loop_sizes, 0, max_loop * sizeof(int)); memset(loop_blksizes, 0, max_loop * sizeof(int)); + memset(loop_lookup, 0, max_loop * sizeof(struct loop_device *)); blk_size[MAJOR_NR] = loop_sizes; blksize_size[MAJOR_NR] = loop_blksizes; for (i = 0; i < max_loop; i++) @@ -1039,9 +1021,9 @@ printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop); return 0; -out_sizes: +out_mem: kfree(loop_dev); -out_blksizes: + kfree(loop_lookup); kfree(loop_sizes); printk(KERN_ERR "loop: ran out of memory\n"); return -ENOMEM; @@ -1054,6 +1036,7 @@ printk(KERN_WARNING "loop: cannot unregister blkdev\n"); kfree(loop_dev); + kfree(loop_lookup); kfree(loop_sizes); kfree(loop_blksizes); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/nbd.c linux/drivers/block/nbd.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/nbd.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/nbd.c Tue Oct 30 09:48:43 2001 @@ -165,14 +165,14 @@ FAIL("Sendmsg failed for control."); if (req->cmd == WRITE) { - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; DEBUG("data, "); do { - result = nbd_xmit(1, sock, bh->b_data, bh->b_size, bh->b_reqnext == NULL ? 0 : MSG_MORE); + result = nbd_xmit(1, sock, bio_data(bio), bio_size(bio), bio->bi_next == NULL ? 0 : MSG_MORE); if (result <= 0) FAIL("Send data failed."); - bh = bh->b_reqnext; - } while(bh); + bio = bio->bi_next; + } while(bio); } return; @@ -205,14 +205,14 @@ if (ntohl(reply.error)) FAIL("Other side returned error."); if (req->cmd == READ) { - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; DEBUG("data, "); do { - result = nbd_xmit(0, lo->sock, bh->b_data, bh->b_size, MSG_WAITALL); + result = nbd_xmit(0, lo->sock, bio_data(bio), bio_size(bio), MSG_WAITALL); if (result <= 0) HARDFAIL("Recv data failed."); - bh = bh->b_reqnext; - } while(bh); + bio = bio->bi_next; + } while(bio); } DEBUG("done.\n"); return req; @@ -250,7 +250,7 @@ goto out; } #endif - list_del(&req->queue); + blkdev_dequeue_request(req); up (&lo->queue_lock); nbd_end_request(req); @@ -285,7 +285,7 @@ } #endif req->errors++; - list_del(&req->queue); + blkdev_dequeue_request(req); up(&lo->queue_lock); nbd_end_request(req); @@ -333,22 +333,22 @@ #endif req->errors = 0; blkdev_dequeue_request(req); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); down (&lo->queue_lock); - list_add(&req->queue, &lo->queue_head); + list_add(&req->queuelist, &lo->queue_head); nbd_send_req(lo->sock, req); /* Why does this block? */ up (&lo->queue_lock); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); continue; error_out: req->errors++; blkdev_dequeue_request(req); - spin_unlock(&io_request_lock); + spin_unlock(&q->queue_lock); nbd_end_request(req); - spin_lock(&io_request_lock); + spin_lock(&q->queue_lock); } return; } @@ -501,7 +501,7 @@ #endif blksize_size[MAJOR_NR] = nbd_blksizes; blk_size[MAJOR_NR] = nbd_sizes; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request, "nbd"); blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); for (i = 0; i < MAX_NBD; i++) { nbd_dev[i].refcnt = 0; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/paride/pd.c linux/drivers/block/paride/pd.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/paride/pd.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/paride/pd.c Tue Oct 30 09:48:43 2001 @@ -287,7 +287,6 @@ static struct hd_struct pd_hd[PD_DEVS]; static int pd_sizes[PD_DEVS]; static int pd_blocksizes[PD_DEVS]; -static int pd_maxsectors[PD_DEVS]; #define PD_NAMELEN 8 @@ -330,7 +329,6 @@ static int pd_cmd; /* current command READ/WRITE */ static int pd_unit; /* unit of current request */ static int pd_dev; /* minor of current request */ -static int pd_poffs; /* partition offset of current minor */ static char * pd_buf; /* buffer for request in progress */ static DECLARE_WAIT_QUEUE_HEAD(pd_wait_open); @@ -397,6 +395,7 @@ } q = BLK_DEFAULT_QUEUE(MAJOR_NR); blk_init_queue(q, DEVICE_REQUEST); + blk_queue_max_sectors(q, cluster); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read ahead */ pd_gendisk.major = major; @@ -406,9 +405,6 @@ for(i=0;ii_rdev)) return -EINVAL; - dev = MINOR(inode->i_rdev); + if (!inode || !inode->i_rdev) + return -EINVAL; unit = DEVICE_NR(inode->i_rdev); - if (dev >= PD_DEVS) return -EINVAL; - if (!PD.present) return -ENODEV; + if (!PD.present) + return -ENODEV; - switch (cmd) { + switch (cmd) { case CDROMEJECT: if (PD.access == 1) pd_eject(unit); return 0; - case HDIO_GETGEO: - if (!geo) return -EINVAL; - err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); - if (err) return err; + case HDIO_GETGEO: + if (!geo) return -EINVAL; + err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); + if (err) return err; if (PD.alt_geom) { - put_user(PD.capacity/(PD_LOG_HEADS*PD_LOG_SECTS), + put_user(PD.capacity/(PD_LOG_HEADS*PD_LOG_SECTS), (short *) &geo->cylinders); - put_user(PD_LOG_HEADS, (char *) &geo->heads); - put_user(PD_LOG_SECTS, (char *) &geo->sectors); + put_user(PD_LOG_HEADS, (char *) &geo->heads); + put_user(PD_LOG_SECTS, (char *) &geo->sectors); } else { - put_user(PD.cylinders, (short *) &geo->cylinders); - put_user(PD.heads, (char *) &geo->heads); - put_user(PD.sectors, (char *) &geo->sectors); + put_user(PD.cylinders, (short *) &geo->cylinders); + put_user(PD.heads, (char *) &geo->heads); + put_user(PD.sectors, (char *) &geo->sectors); } - put_user(pd_hd[dev].start_sect,(long *)&geo->start); - return 0; - case BLKGETSIZE: - if (!arg) return -EINVAL; - err = verify_area(VERIFY_WRITE,(unsigned long *) arg,sizeof(unsigned long)); - if (err) return (err); - put_user(pd_hd[dev].nr_sects,(unsigned long *) arg); - return (0); - case BLKGETSIZE64: - return put_user((u64)pd_hd[dev].nr_sects << 9, (u64 *)arg); - case BLKRRPART: + put_user(get_start_sect(inode->i_rdev), (long *)&geo->start); + return 0; + case BLKRRPART: if (!capable(CAP_SYS_ADMIN)) return -EACCES; - return pd_revalidate(inode->i_rdev); + return pd_revalidate(inode->i_rdev); + case BLKGETSIZE: + case BLKGETSIZE64: case BLKROSET: case BLKROGET: case BLKRASET: @@ -494,9 +484,9 @@ case BLKFLSBUF: case BLKPG: return blk_ioctl(inode->i_rdev, cmd, arg); - default: - return -EINVAL; - } + default: + return -EINVAL; + } } static int pd_release (struct inode *inode, struct file *file) @@ -532,36 +522,32 @@ } static int pd_revalidate(kdev_t dev) +{ + int unit, res; + long flags; -{ int p, unit, minor; - long flags; - - unit = DEVICE_NR(dev); - if ((unit >= PD_UNITS) || (!PD.present)) return -ENODEV; - - save_flags(flags); - cli(); - if (PD.access > 1) { - restore_flags(flags); - return -EBUSY; - } - pd_valid = 0; - restore_flags(flags); + unit = DEVICE_NR(dev); + if ((unit >= PD_UNITS) || !PD.present) + return -ENODEV; - for (p=(PD_PARTNS-1);p>=0;p--) { - minor = p + unit*PD_PARTNS; - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - pd_hd[minor].start_sect = 0; - pd_hd[minor].nr_sects = 0; - } + save_flags(flags); + cli(); + if (PD.access > 1) { + restore_flags(flags); + return -EBUSY; + } + pd_valid = 0; + restore_flags(flags); - if (pd_identify(unit)) - grok_partitions(&pd_gendisk,unit,1<cmd; - pd_poffs = pd_hd[pd_dev].start_sect; - pd_block += pd_poffs; pd_buf = CURRENT->buffer; pd_retries = 0; @@ -908,7 +890,7 @@ (CURRENT->cmd != pd_cmd) || (MINOR(CURRENT->rq_dev) != pd_dev) || (CURRENT->rq_status == RQ_INACTIVE) || - (CURRENT->sector+pd_poffs != pd_block)) + (CURRENT->sector != pd_block)) printk("%s: OUCH: request list changed unexpectedly\n", PD.name); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/paride/pf.c linux/drivers/block/paride/pf.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/paride/pf.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/paride/pf.c Tue Oct 30 09:48:43 2001 @@ -458,7 +458,7 @@ if (PF.access == 1) { pf_eject(unit); return 0; - } + } case HDIO_GETGEO: if (!geo) return -EINVAL; err = verify_area(VERIFY_WRITE,geo,sizeof(*geo)); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/ps2esdi.c linux/drivers/block/ps2esdi.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/ps2esdi.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/ps2esdi.c Tue Oct 30 09:48:43 2001 @@ -65,6 +65,7 @@ #define TYPE_0_CMD_BLK_LENGTH 2 #define TYPE_1_CMD_BLK_LENGTH 4 +#define PS2ESDI_LOCK (&((BLK_DEFAULT_QUEUE(MAJOR_NR))->queue_lock)) static void reset_ctrl(void); @@ -117,7 +118,6 @@ static char ps2esdi_valid[MAX_HD]; static int ps2esdi_sizes[MAX_HD << 6]; static int ps2esdi_blocksizes[MAX_HD << 6]; -static int ps2esdi_maxsect[MAX_HD << 6]; static int ps2esdi_drives; static struct hd_struct ps2esdi[MAX_HD << 6]; static u_short io_base; @@ -220,8 +220,7 @@ } void -cleanup_module(void) -{ +cleanup_module(void) { if(ps2esdi_slot) { mca_mark_as_unused(ps2esdi_slot); mca_set_adapter_procfn(ps2esdi_slot, NULL, NULL); @@ -230,8 +229,9 @@ free_dma(dma_arb_level); free_irq(PS2ESDI_IRQ, NULL); devfs_unregister_blkdev(MAJOR_NR, "ed"); - del_gendisk(&ps2esdi_gendisk); blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); + del_gendisk(&ps2esdi_gendisk); + blk_clear(MAJOR_NR); } #endif /* MODULE */ @@ -414,16 +414,13 @@ ps2esdi_gendisk.nr_real = ps2esdi_drives; - /* 128 was old default, maybe maxsect=255 is ok too? - Paul G. */ - for (i = 0; i < (MAX_HD << 6); i++) { - ps2esdi_maxsect[i] = 128; + for (i = 0; i < (MAX_HD << 6); i++) ps2esdi_blocksizes[i] = 1024; - } request_dma(dma_arb_level, "ed"); request_region(io_base, 4, "ed"); blksize_size[MAJOR_NR] = ps2esdi_blocksizes; - max_sectors[MAJOR_NR] = ps2esdi_maxsect; + blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 128); for (i = 0; i < ps2esdi_drives; i++) { register_disk(&ps2esdi_gendisk,MKDEV(MAJOR_NR,i<<6),1<<6, @@ -494,13 +491,9 @@ CURRENT->current_nr_sectors); #endif - - block = CURRENT->sector + ps2esdi[MINOR(CURRENT->rq_dev)].start_sect; - -#if 0 - printk("%s: blocknumber : %d\n", DEVICE_NAME, block); -#endif + block = CURRENT->sector; count = CURRENT->current_nr_sectors; + switch (CURRENT->cmd) { case READ: ps2esdi_readwrite(READ, CURRENT_DEV, block, count); @@ -957,10 +950,10 @@ break; } if(ending != -1) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(PS2ESDI_LOCK, flags); end_request(ending); do_ps2esdi_request(BLK_DEFAULT_QUEUE(MAJOR_NR)); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(PS2ESDI_LOCK, flags); } } /* handle interrupts */ @@ -1099,31 +1092,20 @@ put_user(ps2esdi_info[dev].head, (char *) &geometry->heads); put_user(ps2esdi_info[dev].sect, (char *) &geometry->sectors); put_user(ps2esdi_info[dev].cyl, (short *) &geometry->cylinders); - put_user(ps2esdi[MINOR(inode->i_rdev)].start_sect, + put_user(get_start_sect(inode->i_rdev), (long *) &geometry->start); - return (0); - } - break; - - case BLKGETSIZE: - if (arg) { - if ((err = verify_area(VERIFY_WRITE, (unsigned long *) arg, sizeof(unsigned long)))) - return (err); - put_user(ps2esdi[MINOR(inode->i_rdev)].nr_sects, (unsigned long *) arg); - - return (0); + return 0; } break; - case BLKGETSIZE64: - return put_user((u64)ps2esdi[MINOR(inode->i_rdev)].nr_sects << 9, (u64 *) arg); - case BLKRRPART: if (!capable(CAP_SYS_ADMIN)) return -EACCES; return (ps2esdi_reread_partitions(inode->i_rdev)); + case BLKGETSIZE: + case BLKGETSIZE64: case BLKROSET: case BLKROGET: case BLKRASET: @@ -1142,8 +1124,7 @@ static int ps2esdi_reread_partitions(kdev_t dev) { int target = DEVICE_NR(dev); - int start = target << ps2esdi_gendisk.minor_shift; - int partition; + int res; cli(); ps2esdi_valid[target] = (access_count[target] != 1); @@ -1151,21 +1132,16 @@ if (ps2esdi_valid[target]) return (-EBUSY); - for (partition = ps2esdi_gendisk.max_p - 1; - partition >= 0; partition--) { - int minor = (start | partition); - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - ps2esdi_gendisk.part[minor].start_sect = 0; - ps2esdi_gendisk.part[minor].nr_sects = 0; - } - - grok_partitions(&ps2esdi_gendisk, target, 1<<6, - ps2esdi_info[target].head * ps2esdi_info[target].cyl * ps2esdi_info[target].sect); - + res = wipe_partitions(dev); + if (res == 0) + grok_partitions(dev, ps2esdi_info[target].head + * ps2esdi_info[target].cyl + * ps2esdi_info[target].sect); + ps2esdi_valid[target] = 1; wake_up(&ps2esdi_wait_open); - return (0); + return (res); } static void ps2esdi_reset_timer(unsigned long unused) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/rd.c linux/drivers/block/rd.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/rd.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/rd.c Tue Oct 30 09:48:43 2001 @@ -98,7 +98,7 @@ static unsigned long rd_length[NUM_RAMDISKS]; /* Size of RAM disks in bytes */ static int rd_hardsec[NUM_RAMDISKS]; /* Size of real blocks in bytes */ static int rd_blocksizes[NUM_RAMDISKS]; /* Size of 1024 byte blocks :) */ -static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ +static int rd_kbsize[NUM_RAMDISKS]; /* Size in blocks of 1024 bytes */ static devfs_handle_t devfs_handle; static struct block_device *rd_bdev[NUM_RAMDISKS];/* Protected device data */ @@ -227,19 +227,18 @@ commit_write: ramdisk_commit_write, }; -static int rd_blkdev_pagecache_IO(int rw, struct buffer_head * sbh, int minor) +static int rd_blkdev_pagecache_IO(int rw, struct bio *sbh, int minor) { struct address_space * mapping; unsigned long index; int offset, size, err; err = -EIO; - err = 0; mapping = rd_bdev[minor]->bd_inode->i_mapping; index = sbh->b_rsector >> (PAGE_CACHE_SHIFT - 9); - offset = (sbh->b_rsector << 9) & ~PAGE_CACHE_MASK; - size = sbh->b_size; + offset = (sbh->bi_sector << 9) & ~PAGE_CACHE_MASK; + size = bio_size(sbh); do { int count; @@ -276,18 +275,18 @@ if (rw == READ) { src = kmap(page); src += offset; - dst = bh_kmap(sbh); + dst = bio_kmap(sbh); } else { dst = kmap(page); dst += offset; - src = bh_kmap(sbh); + src = bio_kmap(sbh); } offset = 0; memcpy(dst, src, count); kunmap(page); - bh_kunmap(sbh); + bio_kunmap(sbh); if (rw == READ) { flush_dcache_page(page); @@ -311,19 +310,19 @@ * 19-JAN-1998 Richard Gooch Added devfs support * */ -static int rd_make_request(request_queue_t * q, int rw, struct buffer_head *sbh) +static int rd_make_request(request_queue_t * q, struct bio *sbh) { unsigned int minor; unsigned long offset, len; + int rw = bio->bi_rw; - minor = MINOR(sbh->b_rdev); + minor = MINOR(sbh->bi_dev); if (minor >= NUM_RAMDISKS) goto fail; - - offset = sbh->b_rsector << 9; - len = sbh->b_size; + offset = sbh->bi_sector << 9; + len = bio_size(sbh); if ((offset + len) > rd_length[minor]) goto fail; @@ -338,10 +337,11 @@ if (rd_blkdev_pagecache_IO(rw, sbh, minor)) goto fail; - sbh->b_end_io(sbh,1); + set_bit(BIO_UPTODATE, &sbh->bi_flags); + sbh->bi_end_io(sbh); return 0; fail: - sbh->b_end_io(sbh,0); + bio_io_error(sbh); return 0; } @@ -477,9 +477,7 @@ devfs_unregister (devfs_handle); unregister_blkdev( MAJOR_NR, "ramdisk" ); - hardsect_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; + blk_clear(MAJOR_NR); } #endif @@ -524,7 +522,6 @@ register_disk(NULL, MKDEV(MAJOR_NR,INITRD_MINOR), 1, &rd_bd_op, rd_size<<1); #endif - hardsect_size[MAJOR_NR] = rd_hardsec; /* Size of the RAM disk blocks */ blksize_size[MAJOR_NR] = rd_blocksizes; /* Avoid set_blocksize() check */ blk_size[MAJOR_NR] = rd_kbsize; /* Size of the RAM disk in kB */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/block/xd.c linux/drivers/block/xd.c --- /opt/kernel/linux-2.4.14-pre6/drivers/block/xd.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/block/xd.c Tue Oct 30 09:48:43 2001 @@ -257,7 +257,6 @@ } xd_gendisk.nr_real = xd_drives; - } /* xd_open: open a device */ @@ -292,7 +291,7 @@ if (CURRENT_DEV < xd_drives && CURRENT->sector + CURRENT->nr_sectors <= xd_struct[MINOR(CURRENT->rq_dev)].nr_sects) { - block = CURRENT->sector + xd_struct[MINOR(CURRENT->rq_dev)].start_sect; + block = CURRENT->sector; count = CURRENT->nr_sectors; switch (CURRENT->cmd) { @@ -329,20 +328,16 @@ g.heads = xd_info[dev].heads; g.sectors = xd_info[dev].sectors; g.cylinders = xd_info[dev].cylinders; - g.start = xd_struct[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user(geometry, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: - if (!arg) return -EINVAL; - return put_user(xd_struct[MINOR(inode->i_rdev)].nr_sects,(unsigned long *) arg); - case BLKGETSIZE64: - return put_user((u64)xd_struct[MINOR(inode->i_rdev)].nr_sects << 9, (u64 *)arg); case HDIO_SET_DMA: if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (xdc_busy) return -EBUSY; nodma = !arg; if (nodma && xd_dma_buffer) { - xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200); + xd_dma_mem_free((unsigned long)xd_dma_buffer, + xd_maxsectors * 0x200); xd_dma_buffer = 0; } return 0; @@ -355,6 +350,8 @@ return -EACCES; return xd_reread_partitions(inode->i_rdev); + case BLKGETSIZE: + case BLKGETSIZE64: case BLKFLSBUF: case BLKROSET: case BLKROGET: @@ -381,11 +378,9 @@ static int xd_reread_partitions(kdev_t dev) { int target; - int start; - int partition; + int res; target = DEVICE_NR(dev); - start = target << xd_gendisk.minor_shift; cli(); xd_valid[target] = (xd_access[target] != 1); @@ -393,20 +388,16 @@ if (xd_valid[target]) return -EBUSY; - for (partition = xd_gendisk.max_p - 1; partition >= 0; partition--) { - int minor = (start | partition); - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - xd_gendisk.part[minor].start_sect = 0; - xd_gendisk.part[minor].nr_sects = 0; - }; - - grok_partitions(&xd_gendisk, target, 1<<6, - xd_info[target].heads * xd_info[target].cylinders * xd_info[target].sectors); + res = wipe_partitions(dev); + if (!res) + grok_partitions(dev, xd_info[target].heads + * xd_info[target].cylinders + * xd_info[target].sectors); xd_valid[target] = 1; wake_up(&xd_wait_open); - return 0; + return res; } /* xd_readwrite: handle a read/write request */ @@ -1108,12 +1099,9 @@ static void xd_done (void) { - blksize_size[MAJOR_NR] = NULL; blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); - blk_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - read_ahead[MAJOR_NR] = 0; del_gendisk(&xd_gendisk); + blk_clear(MAJOR_NR); release_region(xd_iobase,4); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/cdrom/cdu31a.c linux/drivers/cdrom/cdu31a.c --- /opt/kernel/linux-2.4.14-pre6/drivers/cdrom/cdu31a.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/cdrom/cdu31a.c Tue Oct 30 09:48:43 2001 @@ -1583,7 +1583,10 @@ /* Make sure we have a valid TOC. */ sony_get_toc(); - spin_unlock_irq(&io_request_lock); + /* + * jens: driver has lots of races + */ + spin_unlock_irq(&q->queue_lock); /* Make sure the timer is cancelled. */ del_timer(&cdu31a_abort_timer); @@ -1730,7 +1733,7 @@ } end_do_cdu31a_request: - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); #if 0 /* After finished, cancel any pending operations. */ abort_read(); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/cdrom/cm206.c linux/drivers/cdrom/cm206.c --- /opt/kernel/linux-2.4.14-pre6/drivers/cdrom/cm206.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/cdrom/cm206.c Tue Oct 30 09:48:43 2001 @@ -866,7 +866,7 @@ end_request(0); continue; } - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); error = 0; for (i = 0; i < CURRENT->nr_sectors; i++) { int e1, e2; @@ -893,7 +893,7 @@ debug(("cm206_request: %d %d\n", e1, e2)); } } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); end_request(!error); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/cdrom/sbpcd.c linux/drivers/cdrom/sbpcd.c --- /opt/kernel/linux-2.4.14-pre6/drivers/cdrom/sbpcd.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/cdrom/sbpcd.c Tue Oct 30 09:48:43 2001 @@ -4930,7 +4930,7 @@ sbpcd_end_request(req, 0); if (req -> sector == -1) sbpcd_end_request(req, 0); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); down(&ioctl_read_sem); if (req->cmd != READ) @@ -4970,7 +4970,7 @@ xnr, req, req->sector, req->nr_sectors, jiffies); #endif up(&ioctl_read_sem); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 1); goto request_loop; } @@ -5011,7 +5011,7 @@ xnr, req, req->sector, req->nr_sectors, jiffies); #endif up(&ioctl_read_sem); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 1); goto request_loop; } @@ -5027,7 +5027,7 @@ #endif up(&ioctl_read_sem); sbp_sleep(0); /* wait a bit, try again */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); sbpcd_end_request(req, 0); goto request_loop; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/char/raw.c linux/drivers/char/raw.c --- /opt/kernel/linux-2.4.14-pre6/drivers/char/raw.c Sun Sep 23 05:35:43 2001 +++ linux/drivers/char/raw.c Mon Oct 15 10:41:43 2001 @@ -126,10 +126,8 @@ if (is_mounted(rdev)) { if (blksize_size[MAJOR(rdev)]) sector_size = blksize_size[MAJOR(rdev)][MINOR(rdev)]; - } else { - if (hardsect_size[MAJOR(rdev)]) - sector_size = hardsect_size[MAJOR(rdev)][MINOR(rdev)]; - } + } else + sector_size = get_hardsect_size(rdev); set_blocksize(rdev, sector_size); raw_devices[minor].sector_size = sector_size; @@ -273,16 +271,14 @@ struct kiobuf * iobuf; int new_iobuf; int err = 0; - unsigned long blocknr, blocks; + unsigned long blocks; size_t transferred; int iosize; - int i; int minor; kdev_t dev; unsigned long limit; - int sector_size, sector_bits, sector_mask; - int max_sectors; + sector_t blocknr; /* * First, a few checks on device size limits @@ -307,7 +303,6 @@ sector_size = raw_devices[minor].sector_size; sector_bits = raw_devices[minor].sector_bits; sector_mask = sector_size- 1; - max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9); if (blk_size[MAJOR(dev)]) limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits; @@ -325,18 +320,10 @@ if ((*offp >> sector_bits) >= limit) goto out_free; - /* - * Split the IO into KIO_MAX_SECTORS chunks, mapping and - * unmapping the single kiobuf as we go to perform each chunk of - * IO. - */ - transferred = 0; blocknr = *offp >> sector_bits; while (size > 0) { blocks = size >> sector_bits; - if (blocks > max_sectors) - blocks = max_sectors; if (blocks > limit - blocknr) blocks = limit - blocknr; if (!blocks) @@ -348,10 +335,7 @@ if (err) break; - for (i=0; i < blocks; i++) - iobuf->blocks[i] = blocknr++; - - err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size); + err = brw_kiovec(rw, 1, &iobuf, dev, &blocknr, sector_size); if (rw == READ && err > 0) mark_dirty_kiobuf(iobuf, err); @@ -361,6 +345,8 @@ size -= err; buf += err; } + + blocknr += blocks; unmap_kiobuf(iobuf); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/aec62xx.c linux/drivers/ide/aec62xx.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/aec62xx.c Tue Jun 20 16:52:36 2000 +++ linux/drivers/ide/aec62xx.c Mon Oct 15 10:41:43 2001 @@ -557,6 +557,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) hwif->dmaproc = &aec62xx_dmaproc; + hwif->highmem = 1; #else /* !CONFIG_BLK_DEV_IDEDMA */ hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/cmd64x.c linux/drivers/ide/cmd64x.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/cmd64x.c Fri Jul 28 01:40:57 2000 +++ linux/drivers/ide/cmd64x.c Mon Oct 15 10:41:43 2001 @@ -795,5 +795,7 @@ default: break; } + + hwif->highmem = 1; #endif /* CONFIG_BLK_DEV_IDEDMA */ } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/cs5530.c linux/drivers/ide/cs5530.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/cs5530.c Wed Jan 3 01:58:45 2001 +++ linux/drivers/ide/cs5530.c Mon Oct 15 10:41:43 2001 @@ -352,9 +352,10 @@ unsigned int basereg, d0_timings; #ifdef CONFIG_BLK_DEV_IDEDMA - hwif->dmaproc = &cs5530_dmaproc; + hwif->dmaproc = &cs5530_dmaproc; + hwif->highmem = 1; #else - hwif->autodma = 0; + hwif->autodma = 0; #endif /* CONFIG_BLK_DEV_IDEDMA */ hwif->tuneproc = &cs5530_tuneproc; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/cy82c693.c linux/drivers/ide/cy82c693.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/cy82c693.c Sun May 20 02:43:06 2001 +++ linux/drivers/ide/cy82c693.c Mon Oct 15 10:41:43 2001 @@ -441,6 +441,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { + hwif->highmem = 1; hwif->dmaproc = &cy82c693_dmaproc; if (!noautodma) hwif->autodma = 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/hd.c linux/drivers/ide/hd.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/hd.c Wed Oct 24 12:49:57 2001 +++ linux/drivers/ide/hd.c Mon Oct 15 10:41:43 2001 @@ -107,7 +107,6 @@ static int hd_sizes[MAX_HD<<6]; static int hd_blocksizes[MAX_HD<<6]; static int hd_hardsectsizes[MAX_HD<<6]; -static int hd_maxsect[MAX_HD<<6]; static struct timer_list device_timer; @@ -560,19 +559,18 @@ dev = MINOR(CURRENT->rq_dev); block = CURRENT->sector; nsect = CURRENT->nr_sectors; - if (dev >= (NR_HD<<6) || block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) { -#ifdef DEBUG - if (dev >= (NR_HD<<6)) + if (dev >= (NR_HD<<6) || (dev & 0x3f) || + block >= hd[dev].nr_sects || ((block+nsect) > hd[dev].nr_sects)) { + if (dev >= (NR_HD<<6) || (dev & 0x3f)) printk("hd: bad minor number: device=%s\n", kdevname(CURRENT->rq_dev)); else printk("hd%c: bad access: block=%d, count=%d\n", (MINOR(CURRENT->rq_dev)>>6)+'a', block, nsect); -#endif end_request(0); goto repeat; } - block += hd[dev].start_sect; + dev >>= 6; if (special_op[dev]) { if (do_special_op(dev)) @@ -634,22 +632,17 @@ g.heads = hd_info[dev].head; g.sectors = hd_info[dev].sect; g.cylinders = hd_info[dev].cyl; - g.start = hd[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: /* Return device size */ - return put_user(hd[MINOR(inode->i_rdev)].nr_sects, - (unsigned long *) arg); - case BLKGETSIZE64: - return put_user((u64)hd[MINOR(inode->i_rdev)].nr_sects << 9, - (u64 *) arg); - case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; return revalidate_hddisk(inode->i_rdev, 1); + case BLKGETSIZE: + case BLKGETSIZE64: case BLKROSET: case BLKROGET: case BLKRASET: @@ -733,11 +726,9 @@ for(drive=0; drive < (MAX_HD << 6); drive++) { hd_blocksizes[drive] = 1024; hd_hardsectsizes[drive] = 512; - hd_maxsect[drive]=255; } blksize_size[MAJOR_NR] = hd_blocksizes; hardsect_size[MAJOR_NR] = hd_hardsectsizes; - max_sectors[MAJOR_NR] = hd_maxsect; #ifdef __i386__ if (!NR_HD) { @@ -840,6 +831,7 @@ return -1; } blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); + blk_queue_max_sectors(BLK_DEFAULT_QUEUE(MAJOR_NR), 255); read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ add_gendisk(&hd_gendisk); init_timer(&device_timer); @@ -868,9 +860,7 @@ { int target; struct gendisk * gdev; - int max_p; - int start; - int i; + int res; long flags; target = DEVICE_NR(dev); @@ -885,25 +875,20 @@ DEVICE_BUSY = 1; restore_flags(flags); - max_p = gdev->max_p; - start = target << gdev->minor_shift; - - for (i=max_p - 1; i >=0 ; i--) { - int minor = start + i; - invalidate_device(MKDEV(MAJOR_NR, minor), 1); - gdev->part[minor].start_sect = 0; - gdev->part[minor].nr_sects = 0; - } + res = wipe_partitions(dev); + if (res) + goto leave; #ifdef MAYBE_REINIT MAYBE_REINIT; #endif - grok_partitions(gdev, target, 1<<6, CAPACITY); + grok_partitions(dev, CAPACITY); +leave: DEVICE_BUSY = 0; wake_up(&busy_wait); - return 0; + return res; } static int parse_hd_setup (char *line) { diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/hpt34x.c linux/drivers/ide/hpt34x.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/hpt34x.c Sun May 20 02:43:06 2001 +++ linux/drivers/ide/hpt34x.c Mon Oct 15 10:41:43 2001 @@ -425,6 +425,7 @@ hwif->autodma = 0; hwif->dmaproc = &hpt34x_dmaproc; + hwif->highmem = 1; } else { hwif->drives[0].autotune = 1; hwif->drives[1].autotune = 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/hpt366.c linux/drivers/ide/hpt366.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/hpt366.c Wed Aug 15 05:01:07 2001 +++ linux/drivers/ide/hpt366.c Mon Oct 15 10:41:43 2001 @@ -730,6 +730,7 @@ hwif->autodma = 1; else hwif->autodma = 0; + hwif->highmem = 1; } else { hwif->autodma = 0; hwif->drives[0].autotune = 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-cd.c linux/drivers/ide/ide-cd.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-cd.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/ide/ide-cd.c Tue Oct 30 16:17:48 2001 @@ -926,7 +926,7 @@ /* If we're not done filling the current buffer, complain. Otherwise, complete the command normally. */ if (rq->current_nr_sectors > 0) { - printk ("%s: cdrom_read_intr: data underrun (%ld blocks)\n", + printk ("%s: cdrom_read_intr: data underrun (%u blocks)\n", drive->name, rq->current_nr_sectors); cdrom_end_request (0, drive); } else @@ -959,8 +959,7 @@ /* First, figure out if we need to bit-bucket any of the leading sectors. */ - nskip = MIN ((int)(rq->current_nr_sectors - (rq->bh->b_size >> SECTOR_BITS)), - sectors_to_transfer); + nskip = MIN(rq->current_nr_sectors - bio_sectors(rq->bio), sectors_to_transfer); while (nskip > 0) { /* We need to throw away a sector. */ @@ -1058,7 +1057,7 @@ represent the number of sectors to skip at the start of a transfer will fail. I think that this will never happen, but let's be paranoid and check. */ - if (rq->current_nr_sectors < (rq->bh->b_size >> SECTOR_BITS) && + if (rq->current_nr_sectors < bio_sectors(rq->bio) && (rq->sector % SECTORS_PER_FRAME) != 0) { printk ("%s: cdrom_read_from_buffer: buffer botch (%ld)\n", drive->name, rq->sector); @@ -1097,9 +1096,9 @@ nskip = (sector % SECTORS_PER_FRAME); if (nskip > 0) { /* Sanity check... */ - if (rq->current_nr_sectors != (rq->bh->b_size >> SECTOR_BITS) && + if (rq->current_nr_sectors != bio_sectors(rq->bio) && (rq->sector % CD_FRAMESIZE != 0)) { - printk ("%s: cdrom_start_read_continuation: buffer botch (%lu)\n", + printk ("%s: cdrom_start_read_continuation: buffer botch (%u)\n", drive->name, rq->current_nr_sectors); cdrom_end_request (0, drive); return ide_stopped; @@ -1192,66 +1191,17 @@ return cdrom_start_packet_command (drive, 0, cdrom_start_seek_continuation); } -static inline int cdrom_merge_requests(struct request *rq, struct request *nxt) -{ - int ret = 1; - - /* - * partitions not really working, but better check anyway... - */ - if (rq->cmd == nxt->cmd && rq->rq_dev == nxt->rq_dev) { - rq->nr_sectors += nxt->nr_sectors; - rq->hard_nr_sectors += nxt->nr_sectors; - rq->bhtail->b_reqnext = nxt->bh; - rq->bhtail = nxt->bhtail; - list_del(&nxt->queue); - blkdev_release_request(nxt); - ret = 0; - } - - return ret; -} - -/* - * the current request will always be the first one on the list - */ -static void cdrom_attempt_remerge(ide_drive_t *drive, struct request *rq) -{ - struct list_head *entry; - struct request *nxt; - unsigned long flags; - - spin_lock_irqsave(&io_request_lock, flags); - - while (1) { - entry = rq->queue.next; - if (entry == &drive->queue.queue_head) - break; - - nxt = blkdev_entry_to_request(entry); - if (rq->sector + rq->nr_sectors != nxt->sector) - break; - else if (rq->nr_sectors + nxt->nr_sectors > SECTORS_MAX) - break; - - if (cdrom_merge_requests(rq, nxt)) - break; - } - - spin_unlock_irqrestore(&io_request_lock, flags); -} - /* Fix up a possibly partially-processed request so that we can - start it over entirely, or even put it back on the request queue. */ + start it over entirely */ static void restore_request (struct request *rq) { - if (rq->buffer != rq->bh->b_data) { - int n = (rq->buffer - rq->bh->b_data) / SECTOR_SIZE; - rq->buffer = rq->bh->b_data; + if (rq->buffer != bio_data(rq->bio)) { + int n = (rq->buffer - (char *) bio_data(rq->bio)) / SECTOR_SIZE; + rq->buffer = bio_data(rq->bio); rq->nr_sectors += n; rq->sector -= n; } - rq->current_nr_sectors = rq->bh->b_size >> SECTOR_BITS; + rq->hard_cur_sectors = rq->current_nr_sectors = bio_sectors(rq->bio); rq->hard_nr_sectors = rq->nr_sectors; rq->hard_sector = rq->sector; } @@ -1281,7 +1231,7 @@ if (cdrom_read_from_buffer(drive)) return ide_stopped; - cdrom_attempt_remerge(drive, rq); + blk_attempt_remerge(&drive->queue, rq); /* Clear the local sector buffer. */ info->nsectors_buffered = 0; @@ -1577,7 +1527,7 @@ */ uptodate = 1; if (rq->current_nr_sectors > 0) { - printk("%s: write_intr: data underrun (%ld blocks)\n", + printk("%s: write_intr: data underrun (%u blocks)\n", drive->name, rq->current_nr_sectors); uptodate = 0; } @@ -1674,7 +1624,7 @@ * remerge requests, often the plugging will not have had time * to do this properly */ - cdrom_attempt_remerge(drive, rq); + blk_attempt_remerge(&drive->queue, rq); info->nsectors_buffered = 0; @@ -2202,7 +2152,9 @@ pc.quiet = cgc->quiet; pc.timeout = cgc->timeout; pc.sense = cgc->sense; - return cgc->stat = cdrom_queue_packet_command(drive, &pc); + cgc->stat = cdrom_queue_packet_command(drive, &pc); + cgc->buflen -= pc.buflen; + return cgc->stat; } static @@ -2711,7 +2663,6 @@ ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); ide_add_setting(drive, "dsc_overlap", SETTING_RW, -1, -1, TYPE_BYTE, 0, 1, 1, 1, &drive->dsc_overlap, NULL); } @@ -2875,7 +2826,7 @@ MOD_INC_USE_COUNT; if (info->buffer == NULL) info->buffer = (char *) kmalloc(SECTOR_BUFFER_SIZE, GFP_KERNEL); - if ((info->buffer == NULL) || (rc = cdrom_open(ip, fp))) { + if ((info->buffer == NULL) || (rc = cdrom_open(ip, fp))) { drive->usage--; MOD_DEC_USE_COUNT; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-cd.h linux/drivers/ide/ide-cd.h --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-cd.h Thu Oct 11 08:45:00 2001 +++ linux/drivers/ide/ide-cd.h Thu Nov 1 11:24:08 2001 @@ -435,7 +435,7 @@ byte curlba[3]; byte nslots; - __u8 short slot_tablelen; + __u16 short slot_tablelen; }; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-disk.c Wed Oct 24 12:49:57 2001 +++ linux/drivers/ide/ide-disk.c Thu Nov 1 13:46:13 2001 @@ -27,6 +27,7 @@ * Version 1.09 added increment of rq->sector in ide_multwrite * added UDMA 3/4 reporting * Version 1.10 request queue changes, Ultra DMA 100 + * Version 1.11 Highmem I/O support, Jens Axboe */ #define IDEDISK_VERSION "1.10" @@ -139,7 +140,9 @@ byte stat; int i; unsigned int msect, nsect; + unsigned long flags; struct request *rq; + char *to; /* new way for dealing with premature shared PCI interrupts */ if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) { @@ -150,8 +153,8 @@ ide_set_handler(drive, &read_intr, WAIT_CMD, NULL); return ide_started; } + msect = drive->mult_count; - read_next: rq = HWGROUP(drive)->rq; if (msect) { @@ -160,14 +163,15 @@ msect -= nsect; } else nsect = 1; - idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS); + to = ide_map_buffer(rq, &flags); + idedisk_input_data(drive, to, nsect * SECTOR_WORDS); #ifdef DEBUG printk("%s: read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n", drive->name, rq->sector, rq->sector+nsect-1, (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect); #endif + ide_unmap_buffer(to, &flags); rq->sector += nsect; - rq->buffer += nsect<<9; rq->errors = 0; i = (rq->nr_sectors -= nsect); if (((long)(rq->current_nr_sectors -= nsect)) <= 0) @@ -201,14 +205,16 @@ #endif if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) { rq->sector++; - rq->buffer += 512; rq->errors = 0; i = --rq->nr_sectors; --rq->current_nr_sectors; if (((long)rq->current_nr_sectors) <= 0) ide_end_request(1, hwgroup); if (i > 0) { - idedisk_output_data (drive, rq->buffer, SECTOR_WORDS); + unsigned long flags; + char *to = ide_map_buffer(rq, &flags); + idedisk_output_data (drive, to, SECTOR_WORDS); + ide_unmap_buffer(to, &flags); ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); return ide_started; } @@ -238,28 +244,28 @@ do { char *buffer; int nsect = rq->current_nr_sectors; - + unsigned long flags; + if (nsect > mcount) nsect = mcount; mcount -= nsect; - buffer = rq->buffer; + buffer = ide_map_buffer(rq, &flags); rq->sector += nsect; - rq->buffer += nsect << 9; rq->nr_sectors -= nsect; rq->current_nr_sectors -= nsect; /* Do we move to the next bh after this? */ if (!rq->current_nr_sectors) { - struct buffer_head *bh = rq->bh->b_reqnext; + struct bio *bio = rq->bio->bi_next; /* end early early we ran out of requests */ - if (!bh) { + if (!bio) { mcount = 0; } else { - rq->bh = bh; - rq->current_nr_sectors = bh->b_size >> 9; - rq->buffer = bh->b_data; + rq->bio = bio; + rq->current_nr_sectors = bio_sectors(bio); + rq->hard_cur_sectors = rq->current_nr_sectors; } } @@ -268,6 +274,7 @@ * re-entering us on the last transfer. */ idedisk_output_data(drive, buffer, nsect<<7); + ide_unmap_buffer(buffer, &flags); } while (mcount); return 0; @@ -367,6 +374,8 @@ */ static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) { + unsigned long flags; + if (IDE_CONTROL_REG) OUT_BYTE(drive->ctl,IDE_CONTROL_REG); OUT_BYTE(0x00, IDE_FEATURE_REG); @@ -444,16 +453,17 @@ hwgroup->wrq = *rq; /* scratchpad */ ide_set_handler (drive, &multwrite_intr, WAIT_CMD, NULL); if (ide_multwrite(drive, drive->mult_count)) { - unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return ide_stopped; } } else { + char *buffer = ide_map_buffer(rq, &flags); ide_set_handler (drive, &write_intr, WAIT_CMD, NULL); - idedisk_output_data(drive, rq->buffer, SECTOR_WORDS); + idedisk_output_data(drive, buffer, SECTOR_WORDS); + ide_unmap_buffer(buffer, &flags); } return ide_started; } @@ -482,7 +492,8 @@ { if (drive->removable && !drive->usage) { invalidate_bdev(inode->i_bdev, 0); - if (drive->doorlocking && ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL)) + if (drive->doorlocking && + ide_wait_cmd(drive, WIN_DOORUNLOCK, 0, 0, 0, NULL)) drive->doorlocking = 0; } MOD_DEC_USE_COUNT; @@ -495,9 +506,7 @@ static void idedisk_revalidate (ide_drive_t *drive) { - grok_partitions(HWIF(drive)->gd, drive->select.b.unit, - 1<nowerr = arg; drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&ide_lock); return 0; } @@ -691,7 +700,6 @@ ide_add_setting(drive, "nowerr", SETTING_RW, HDIO_GET_NOWERR, HDIO_SET_NOWERR, TYPE_BYTE, 0, 1, 1, 1, &drive->nowerr, set_nowerr); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); ide_add_setting(drive, "lun", SETTING_RW, -1, -1, TYPE_INT, 0, 7, 1, 1, &drive->lun, NULL); ide_add_setting(drive, "failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->failures, NULL); ide_add_setting(drive, "max_failures", SETTING_RW, -1, -1, TYPE_INT, 0, 65535, 1, 1, &drive->max_failures, NULL); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-dma.c linux/drivers/ide/ide-dma.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-dma.c Sun Sep 9 19:43:02 2001 +++ linux/drivers/ide/ide-dma.c Thu Nov 1 14:02:02 2001 @@ -203,25 +203,6 @@ #endif /* CONFIG_IDEDMA_NEW_DRIVE_LISTINGS */ /* - * Our Physical Region Descriptor (PRD) table should be large enough - * to handle the biggest I/O request we are likely to see. Since requests - * can have no more than 256 sectors, and since the typical blocksize is - * two or more sectors, we could get by with a limit of 128 entries here for - * the usual worst case. Most requests seem to include some contiguous blocks, - * further reducing the number of table entries required. - * - * The driver reverts to PIO mode for individual requests that exceed - * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling - * 100% of all crazy scenarios here is not necessary. - * - * As it turns out though, we must allocate a full 4KB page for this, - * so the two PRD tables (ide0 & ide1) will each get half of that, - * allowing each to have about 256 entries (8 bytes each) from this. - */ -#define PRD_BYTES 8 -#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) - -/* * dma_intr() is the handler for disk read/write DMA interrupts */ ide_startstop_t ide_dma_intr (ide_drive_t *drive) @@ -249,35 +230,42 @@ static int ide_build_sglist (ide_hwif_t *hwif, struct request *rq) { - struct buffer_head *bh; struct scatterlist *sg = hwif->sg_table; - int nents = 0; + struct bio *bio = rq->bio; + unsigned long lastdataend; + int nents; - if (hwif->sg_dma_active) - BUG(); - if (rq->cmd == READ) hwif->sg_dma_direction = PCI_DMA_FROMDEVICE; else hwif->sg_dma_direction = PCI_DMA_TODEVICE; - bh = rq->bh; - do { - unsigned char *virt_addr = bh->b_data; - unsigned int size = bh->b_size; - if (nents >= PRD_ENTRIES) - return 0; + lastdataend = ~0UL; + bio = rq->bio; + nents = 0; + do { + /* + * continue segment from before? + */ + if (bio_to_phys(bio) == lastdataend) { + sg[nents - 1].length += bio_size(bio); + lastdataend += bio_size(bio); + } else { + /* + * start new segment + */ + if (nents >= PRD_ENTRIES) + BUG(); + + sg[nents].address = NULL; + sg[nents].page = bio_page(bio); + sg[nents].length = bio_size(bio); + sg[nents].offset = bio_offset(bio); - while ((bh = bh->b_reqnext) != NULL) { - if ((virt_addr + size) != (unsigned char *) bh->b_data) - break; - size += bh->b_size; + lastdataend = bio_to_phys(bio) + bio_size(bio); + nents++; } - memset(&sg[nents], 0, sizeof(*sg)); - sg[nents].address = virt_addr; - sg[nents].length = size; - nents++; - } while (bh != NULL); + } while ((bio = bio->bi_next) != NULL); return pci_map_sg(hwif->pci_dev, sg, nents, hwif->sg_dma_direction); } @@ -289,9 +277,10 @@ */ int ide_build_dmatable (ide_drive_t *drive, ide_dma_action_t func) { - unsigned int *table = HWIF(drive)->dmatable_cpu; + ide_hwif_t *hwif = HWIF(drive); + unsigned int *table = hwif->dmatable_cpu; #ifdef CONFIG_BLK_DEV_TRM290 - unsigned int is_trm290_chipset = (HWIF(drive)->chipset == ide_trm290); + unsigned int is_trm290_chipset = (hwif->chipset == ide_trm290); #else const int is_trm290_chipset = 0; #endif @@ -299,13 +288,12 @@ int i; struct scatterlist *sg; - HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq); - + hwif->sg_nents = i = ide_build_sglist(hwif, HWGROUP(drive)->rq); if (!i) return 0; - sg = HWIF(drive)->sg_table; - while (i && sg_dma_len(sg)) { + sg = hwif->sg_table; + while (i) { u32 cur_addr; u32 cur_len; @@ -319,55 +307,50 @@ */ while (cur_len) { - if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - goto use_pio_instead; - } else { - u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); - - if (bcount > cur_len) - bcount = cur_len; - *table++ = cpu_to_le32(cur_addr); - xcount = bcount & 0xffff; - if (is_trm290_chipset) - xcount = ((xcount >> 2) - 1) << 16; - if (xcount == 0x0000) { - /* - * Most chipsets correctly interpret a length of 0x0000 as 64KB, - * but at least one (e.g. CS5530) misinterprets it as zero (!). - * So here we break the 64KB entry into two 32KB entries instead. - */ - if (count++ >= PRD_ENTRIES) { - printk("%s: DMA table too small\n", drive->name); - goto use_pio_instead; - } - *table++ = cpu_to_le32(0x8000); - *table++ = cpu_to_le32(cur_addr + 0x8000); - xcount = 0x8000; + u32 xcount, bcount = 0x10000 - (cur_addr & 0xffff); + + if (count++ >= PRD_ENTRIES) + BUG(); + + if (bcount > cur_len) + bcount = cur_len; + *table++ = cpu_to_le32(cur_addr); + xcount = bcount & 0xffff; + if (is_trm290_chipset) + xcount = ((xcount >> 2) - 1) << 16; + if (xcount == 0x0000) { + /* + * Most chipsets correctly interpret a length of + * 0x0000 as 64KB, but at least one (e.g. CS5530) + * misinterprets it as zero (!). So here we break + * the 64KB entry into two 32KB entries instead. + */ + if (count++ >= PRD_ENTRIES) { + pci_unmap_sg(hwif->pci_dev, sg, + hwif->sg_nents, + hwif->sg_dma_direction); + return 0; } - *table++ = cpu_to_le32(xcount); - cur_addr += bcount; - cur_len -= bcount; + + *table++ = cpu_to_le32(0x8000); + *table++ = cpu_to_le32(cur_addr + 0x8000); + xcount = 0x8000; } + *table++ = cpu_to_le32(xcount); + cur_addr += bcount; + cur_len -= bcount; } sg++; i--; } - if (count) { - if (!is_trm290_chipset) - *--table |= cpu_to_le32(0x80000000); - return count; - } - printk("%s: empty DMA table?\n", drive->name); -use_pio_instead: - pci_unmap_sg(HWIF(drive)->pci_dev, - HWIF(drive)->sg_table, - HWIF(drive)->sg_nents, - HWIF(drive)->sg_dma_direction); - HWIF(drive)->sg_dma_active = 0; - return 0; /* revert to PIO for this request */ + if (!count) + printk("%s: empty DMA table?\n", drive->name); + else if (!is_trm290_chipset) + *--table |= cpu_to_le32(0x80000000); + + return count; } /* Teardown mappings after DMA has completed. */ @@ -378,7 +361,6 @@ int nents = HWIF(drive)->sg_nents; pci_unmap_sg(dev, sg, nents, HWIF(drive)->sg_dma_direction); - HWIF(drive)->sg_dma_active = 0; } /* @@ -532,6 +514,20 @@ } #endif /* CONFIG_BLK_DEV_IDEDMA_TIMEOUT */ +static void ide_toggle_bounce(ide_drive_t *drive, int on) +{ + dma64_addr_t addr = BLK_BOUNCE_HIGH; + + if (on && drive->media == ide_disk && HWIF(drive)->highmem) { + if (!PCI_DMA_BUS_IS_PHYS) + addr = BLK_BOUNCE_ANY; + else + addr = HWIF(drive)->pci_dev->dma_mask; + } + + blk_queue_bounce_limit(&drive->queue, addr); +} + /* * ide_dmaproc() initiates/aborts DMA read/write operations on a drive. * @@ -550,19 +546,20 @@ */ int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive) { -// ide_hwgroup_t *hwgroup = HWGROUP(drive); - ide_hwif_t *hwif = HWIF(drive); - unsigned long dma_base = hwif->dma_base; - byte unit = (drive->select.b.unit & 0x01); - unsigned int count, reading = 0; + ide_hwif_t *hwif = HWIF(drive); + unsigned long dma_base = hwif->dma_base; + byte unit = (drive->select.b.unit & 0x01); + unsigned int count, reading = 0, set_high = 1; byte dma_stat; switch (func) { case ide_dma_off: printk("%s: DMA disabled\n", drive->name); + set_high = 0; case ide_dma_off_quietly: outb(inb(dma_base+2) & ~(1<<(5+unit)), dma_base+2); case ide_dma_on: + ide_toggle_bounce(drive, set_high); drive->using_dma = (func == ide_dma_on); if (drive->using_dma) outb(inb(dma_base+2)|(1<<(5+unit)), dma_base+2); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-floppy.c linux/drivers/ide/ide-floppy.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-floppy.c Wed Oct 24 12:49:57 2001 +++ linux/drivers/ide/ide-floppy.c Mon Oct 15 10:41:43 2001 @@ -707,24 +707,24 @@ static void idefloppy_input_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount) { struct request *rq = pc->rq; - struct buffer_head *bh = rq->bh; + struct bio *bio = rq->bio; int count; while (bcount) { - if (pc->b_count == bh->b_size) { + if (pc->b_count == bio_size(bio)) { rq->sector += rq->current_nr_sectors; rq->nr_sectors -= rq->current_nr_sectors; idefloppy_end_request (1, HWGROUP(drive)); - if ((bh = rq->bh) != NULL) + if ((bio = rq->bio) != NULL) pc->b_count = 0; } - if (bh == NULL) { - printk (KERN_ERR "%s: bh == NULL in idefloppy_input_buffers, bcount == %d\n", drive->name, bcount); + if (bio == NULL) { + printk (KERN_ERR "%s: bio == NULL in idefloppy_input_buffers, bcount == %d\n", drive->name, bcount); idefloppy_discard_data (drive, bcount); return; } - count = IDEFLOPPY_MIN (bh->b_size - pc->b_count, bcount); - atapi_input_bytes (drive, bh->b_data + pc->b_count, count); + count = IDEFLOPPY_MIN (bio_size(bio) - pc->b_count, bcount); + atapi_input_bytes (drive, bio_data(bio) + pc->b_count, count); bcount -= count; pc->b_count += count; } } @@ -732,7 +732,7 @@ static void idefloppy_output_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount) { struct request *rq = pc->rq; - struct buffer_head *bh = rq->bh; + struct bio *bio = rq->bio; int count; while (bcount) { @@ -740,13 +740,13 @@ rq->sector += rq->current_nr_sectors; rq->nr_sectors -= rq->current_nr_sectors; idefloppy_end_request (1, HWGROUP(drive)); - if ((bh = rq->bh) != NULL) { - pc->b_data = bh->b_data; - pc->b_count = bh->b_size; + if ((bio = rq->bio) != NULL) { + pc->b_data = bio_data(bio); + pc->b_count = bio_size(bio); } } - if (bh == NULL) { - printk (KERN_ERR "%s: bh == NULL in idefloppy_output_buffers, bcount == %d\n", drive->name, bcount); + if (bio == NULL) { + printk (KERN_ERR "%s: bio == NULL in idefloppy_output_buffers, bcount == %d\n", drive->name, bcount); idefloppy_write_zeros (drive, bcount); return; } @@ -760,9 +760,9 @@ static void idefloppy_update_buffers (ide_drive_t *drive, idefloppy_pc_t *pc) { struct request *rq = pc->rq; - struct buffer_head *bh = rq->bh; + struct bio *bio = rq->bio; - while ((bh = rq->bh) != NULL) + while ((bio = rq->bio) != NULL) idefloppy_end_request (1, HWGROUP(drive)); } #endif /* CONFIG_BLK_DEV_IDEDMA */ @@ -1210,7 +1210,7 @@ pc->callback = &idefloppy_rw_callback; pc->rq = rq; pc->b_data = rq->buffer; - pc->b_count = rq->cmd == READ ? 0 : rq->bh->b_size; + pc->b_count = rq->cmd == READ ? 0 : bio_size(rq->bio); if (rq->cmd == WRITE) set_bit (PC_WRITING, &pc->flags); pc->buffer = NULL; @@ -1778,9 +1778,7 @@ */ static void idefloppy_revalidate (ide_drive_t *drive) { - grok_partitions(HWIF(drive)->gd, drive->select.b.unit, - 1<bios_sect, NULL); ide_add_setting(drive, "breada_readahead", SETTING_RW, BLKRAGET, BLKRASET, TYPE_INT, 0, 255, 1, 2, &read_ahead[major], NULL); ide_add_setting(drive, "file_readahead", SETTING_RW, BLKFRAGET, BLKFRASET, TYPE_INTA, 0, INT_MAX, 1, 1024, &max_readahead[major][minor], NULL); - ide_add_setting(drive, "max_kb_per_request", SETTING_RW, BLKSECTGET, BLKSECTSET, TYPE_INTA, 1, 255, 1, 2, &max_sectors[major][minor], NULL); } @@ -1930,8 +1927,7 @@ static void idefloppy_setup (ide_drive_t *drive, idefloppy_floppy_t *floppy) { struct idefloppy_id_gcw gcw; - int major = HWIF(drive)->major, i; - int minor = drive->select.b.unit << PARTN_BITS; + int i; *((unsigned short *) &gcw) = drive->id->config; drive->driver_data = floppy; @@ -1953,34 +1949,17 @@ */ if (strcmp(drive->id->model, "IOMEGA ZIP 100 ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; - } - /* - * Guess what? The IOMEGA Clik! drive also needs the - * above fix. It makes nasty clicking noises without - * it, so please don't remove this. - */ - if (strcmp(drive->id->model, "IOMEGA Clik! 40 CZ ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; - set_bit(IDEFLOPPY_CLIK_DRIVE, &floppy->flags); - } + blk_queue_max_sectors(&drive->queue, 64); /* * Guess what? The IOMEGA Clik! drive also needs the * above fix. It makes nasty clicking noises without * it, so please don't remove this. */ - if (strcmp(drive->id->model, "IOMEGA Clik! 40 CZ ATAPI") == 0) - { - for (i = 0; i < 1 << PARTN_BITS; i++) - max_sectors[major][minor + i] = 64; + if (strcmp(drive->id->model, "IOMEGA Clik! 40 CZ ATAPI") == 0) { + blk_queue_max_sectors(&drive->queue, 64); set_bit(IDEFLOPPY_CLIK_DRIVE, &floppy->flags); } - (void) idefloppy_get_capacity (drive); idefloppy_add_settings(drive); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-probe.c linux/drivers/ide/ide-probe.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-probe.c Wed Oct 24 12:49:57 2001 +++ linux/drivers/ide/ide-probe.c Mon Oct 15 10:41:43 2001 @@ -594,9 +594,21 @@ static void ide_init_queue(ide_drive_t *drive) { request_queue_t *q = &drive->queue; + int max_sectors; q->queuedata = HWGROUP(drive); - blk_init_queue(q, do_ide_request); + blk_init_queue(q, do_ide_request, drive->name); + + /* IDE can do up to 128K per request, pdc4030 needs smaller limit */ +#ifdef CONFIG_BLK_DEV_PDC4030 + max_sectors = 127; +#else + max_sectors = 255; +#endif + blk_queue_max_sectors(q, max_sectors); + + /* IDE DMA can do PRD_ENTRIES number of segments */ + q->max_segments = PRD_ENTRIES; } /* @@ -670,7 +682,7 @@ hwgroup->rq = NULL; hwgroup->handler = NULL; hwgroup->drive = NULL; - hwgroup->busy = 0; + hwgroup->flags = 0; init_timer(&hwgroup->timer); hwgroup->timer.function = &ide_timer_expiry; hwgroup->timer.data = (unsigned long) hwgroup; @@ -749,7 +761,7 @@ { struct gendisk *gd; unsigned int unit, units, minors; - int *bs, *max_sect, *max_ra; + int *bs, *max_ra; extern devfs_handle_t ide_devfs_handle; /* figure out maximum drive number on the interface */ @@ -762,23 +774,15 @@ gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL); gd->part = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL); bs = kmalloc (minors*sizeof(int), GFP_KERNEL); - max_sect = kmalloc (minors*sizeof(int), GFP_KERNEL); max_ra = kmalloc (minors*sizeof(int), GFP_KERNEL); memset(gd->part, 0, minors * sizeof(struct hd_struct)); /* cdroms and msdos f/s are examples of non-1024 blocksizes */ blksize_size[hwif->major] = bs; - max_sectors[hwif->major] = max_sect; max_readahead[hwif->major] = max_ra; for (unit = 0; unit < minors; ++unit) { *bs++ = BLOCK_SIZE; -#ifdef CONFIG_BLK_DEV_PDC4030 - *max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 255); -#else - /* IDE can do up to 128K per request. */ - *max_sect++ = 255; -#endif *max_ra++ = MAX_READAHEAD; } @@ -870,13 +874,6 @@ read_ahead[hwif->major] = 8; /* (4kB) */ hwif->present = 1; /* success */ -#if (DEBUG_SPINLOCK > 0) -{ - static int done = 0; - if (!done++) - printk("io_request_lock is %p\n", &io_request_lock); /* FIXME */ -} -#endif return hwif->present; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-proc.c linux/drivers/ide/ide-proc.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-proc.c Fri Sep 7 18:28:38 2001 +++ linux/drivers/ide/ide-proc.c Mon Oct 15 10:41:43 2001 @@ -190,7 +190,7 @@ if (hwif->mate && hwif->mate->hwgroup) mategroup = (ide_hwgroup_t *)(hwif->mate->hwgroup); cli(); /* all CPUs; ensure all writes are done together */ - while (mygroup->busy || (mategroup && mategroup->busy)) { + while (test_bit(IDE_BUSY, &mygroup->flags) || (mategroup && test_bit(IDE_BUSY, &mategroup->flags))) { sti(); /* all CPUs */ if (0 < (signed long)(jiffies - timeout)) { printk("/proc/ide/%s/config: channel(s) busy, cannot write\n", hwif->name); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-tape.c linux/drivers/ide/ide-tape.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide-tape.c Mon Aug 13 23:56:19 2001 +++ linux/drivers/ide/ide-tape.c Mon Oct 15 10:41:43 2001 @@ -1887,8 +1887,7 @@ printk("ide-tape: %s: skipping over config parition..\n", tape->name); #endif tape->onstream_write_error = OS_PART_ERROR; - if (tape->waiting) - complete(tape->waiting); + complete(tape->waiting); } } remove_stage = 1; @@ -1904,8 +1903,7 @@ tape->nr_pending_stages++; tape->next_stage = tape->first_stage; rq->current_nr_sectors = rq->nr_sectors; - if (tape->waiting) - complete(tape->waiting); + complete(tape->waiting); } } } else if (rq->cmd == IDETAPE_READ_RQ) { diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide.c linux/drivers/ide/ide.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/ide.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/ide/ide.c Thu Nov 1 13:57:16 2001 @@ -113,6 +113,8 @@ * Version 6.31 Debug Share INTR's and request queue streaming * Native ATA-100 support * Prep for Cascades Project + * Version 6.32 4GB highmem support for DMA, and mapping of those for + * PIO transfer (Jens Axboe) * * Some additional driver compile-time options are in ./include/linux/ide.h * @@ -121,8 +123,8 @@ * */ -#define REVISION "Revision: 6.31" -#define VERSION "Id: ide.c 6.31 2000/06/09" +#define REVISION "Revision: 6.32" +#define VERSION "Id: ide.c 6.32 2001/05/24" #undef REALLY_SLOW_IO /* most systems can safely undef this */ @@ -170,6 +172,7 @@ static int idebus_parameter; /* holds the "idebus=" parameter */ static int system_bus_speed; /* holds what we think is VESA/PCI bus speed */ static int initializing; /* set while initializing built-in drivers */ +spinlock_t ide_lock = SPIN_LOCK_UNLOCKED; #ifdef CONFIG_BLK_DEV_IDEPCI static int ide_scan_direction; /* THIS was formerly 2.2.x pci=reverse */ @@ -180,7 +183,7 @@ * ide_lock is used by the Atari code to obtain access to the IDE interrupt, * which is shared between several drivers. */ -static int ide_lock; +static int ide_intr_lock; #endif /* __mc68000__ || CONFIG_APUS */ int noautodma = 0; @@ -551,7 +554,7 @@ unsigned long flags; ide_drive_t *drive = hwgroup->drive; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); rq = hwgroup->rq; /* @@ -563,13 +566,15 @@ hwgroup->hwif->dmaproc(ide_dma_on, drive); } - if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) { + if (!end_that_request_first(rq, uptodate)) { add_blkdev_randomness(MAJOR(rq->rq_dev)); + spin_lock(DRIVE_LOCK(drive)); blkdev_dequeue_request(rq); hwgroup->rq = NULL; end_that_request_last(rq); + spin_unlock(DRIVE_LOCK(drive)); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -585,7 +590,7 @@ unsigned long flags; ide_hwgroup_t *hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); if (hwgroup->handler != NULL) { printk("%s: ide_set_handler: handler not null; old=%p, new=%p\n", drive->name, hwgroup->handler, handler); @@ -594,7 +599,7 @@ hwgroup->expiry = expiry; hwgroup->timer.expires = jiffies + timeout; add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -844,9 +849,9 @@ unsigned long flags; struct request *rq; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); rq = HWGROUP(drive)->rq; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); if (rq->cmd == IDE_DRIVE_CMD) { byte *args = (byte *) rq->buffer; @@ -869,11 +874,13 @@ args[6] = IN_BYTE(IDE_SELECT_REG); } } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); + spin_lock(DRIVE_LOCK(drive)); blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; end_that_request_last(rq); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock(DRIVE_LOCK(drive)); + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -1195,14 +1202,15 @@ static ide_startstop_t start_request (ide_drive_t *drive) { ide_startstop_t startstop; - unsigned long block, blockend; - struct request *rq = blkdev_entry_next_request(&drive->queue.queue_head); + unsigned long block; + struct request *rq = HWGROUP(drive)->rq; unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS; ide_hwif_t *hwif = HWIF(drive); #ifdef DEBUG printk("%s: start_request: current=0x%08lx\n", hwif->name, (unsigned long) rq); #endif + /* bail early if we've exceeded max_failures */ if (drive->max_failures && (drive->failures > drive->max_failures)) { goto kill_rq; @@ -1219,16 +1227,11 @@ } #endif block = rq->sector; - blockend = block + rq->nr_sectors; + /* Strange disk manager remap */ if ((rq->cmd == READ || rq->cmd == WRITE) && (drive->media == ide_disk || drive->media == ide_floppy)) { - if ((blockend < block) || (blockend > drive->part[minor&PARTN_MASK].nr_sects)) { - printk("%s%c: bad access: block=%ld, count=%ld\n", drive->name, - (minor&PARTN_MASK)?'0'+(minor&PARTN_MASK):' ', block, rq->nr_sectors); - goto kill_rq; - } - block += drive->part[minor&PARTN_MASK].start_sect + drive->sect0; + block += drive->sect0; } /* Yecch - this will shift the entire interval, possibly killing some innocent following sector */ @@ -1240,7 +1243,8 @@ #endif SELECT_DRIVE(hwif, drive); - if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) { + if (ide_wait_stat(&startstop, drive, drive->ready_stat, + BUSY_STAT|DRQ_STAT, WAIT_READY)) { printk("%s: drive not ready for command\n", drive->name); return startstop; } @@ -1251,7 +1255,8 @@ if (drive->driver != NULL) { return (DRIVER(drive)->do_request(drive, rq, block)); } - printk("%s: media type %d not supported\n", drive->name, drive->media); + printk("%s: media type %d not supported\n", + drive->name, drive->media); goto kill_rq; } return do_special(drive); @@ -1268,10 +1273,10 @@ ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return start_request(drive); } @@ -1305,7 +1310,7 @@ || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep))) || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive)))) { - if( !drive->queue.plugged ) + if (!blk_queue_plugged(&drive->queue)) best = drive; } } @@ -1334,7 +1339,7 @@ /* * Issue a new request to a drive from hwgroup - * Caller must have already done spin_lock_irqsave(&io_request_lock, ..); + * Caller must have already done spin_lock_irqsave(DRIVE_LOCK(drive), ...) * * A hwgroup is a serialized group of IDE interfaces. Usually there is * exactly one hwif (interface) per hwgroup, but buggy controllers (eg. CMD640) @@ -1346,26 +1351,21 @@ * possibly along with many other devices. This is especially common in * PCI-based systems with off-board IDE controller cards. * - * The IDE driver uses the single global io_request_lock spinlock to protect - * access to the request queues, and to protect the hwgroup->busy flag. + * The IDE driver uses the queue spinlock to protect access to the request + * queues. * * The first thread into the driver for a particular hwgroup sets the - * hwgroup->busy flag to indicate that this hwgroup is now active, + * hwgroup->flags IDE_BUSY flag to indicate that this hwgroup is now active, * and then initiates processing of the top request from the request queue. * * Other threads attempting entry notice the busy setting, and will simply - * queue their new requests and exit immediately. Note that hwgroup->busy - * remains set even when the driver is merely awaiting the next interrupt. + * queue their new requests and exit immediately. Note that hwgroup->flags + * remains busy even when the driver is merely awaiting the next interrupt. * Thus, the meaning is "this hwgroup is busy processing a request". * * When processing of a request completes, the completing thread or IRQ-handler * will start the next request from the queue. If no more work remains, - * the driver will clear the hwgroup->busy flag and exit. - * - * The io_request_lock (spinlock) is used to protect all access to the - * hwgroup->busy flag, but is otherwise not needed for most processing in - * the driver. This makes the driver much more friendlier to shared IRQs - * than previous designs, while remaining 100% (?) SMP safe and capable. + * the driver will clear the hwgroup->flags IDE_BUSY flag and exit. */ static void ide_do_request(ide_hwgroup_t *hwgroup, int masked_irq) { @@ -1373,12 +1373,11 @@ ide_hwif_t *hwif; ide_startstop_t startstop; - ide_get_lock(&ide_lock, ide_intr, hwgroup); /* for atari only: POSSIBLY BROKEN HERE(?) */ + ide_get_lock(&ide_intr_lock, ide_intr, hwgroup);/* for atari only: POSSIBLY BROKEN HERE(?) */ __cli(); /* necessary paranoia: ensure IRQs are masked on local CPU */ - while (!hwgroup->busy) { - hwgroup->busy = 1; + while (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) { drive = choose_drive(hwgroup); if (drive == NULL) { unsigned long sleep = 0; @@ -1401,13 +1400,13 @@ if (timer_pending(&hwgroup->timer)) printk("ide_set_handler: timer already active\n"); #endif - hwgroup->sleeping = 1; /* so that ide_timer_expiry knows what to do */ + set_bit(IDE_SLEEP, &hwgroup->flags); mod_timer(&hwgroup->timer, sleep); - /* we purposely leave hwgroup->busy==1 while sleeping */ + /* we purposely leave hwgroup busy while sleeping */ } else { /* Ugly, but how can we sleep for the lock otherwise? perhaps from tq_disk? */ - ide_release_lock(&ide_lock); /* for atari only */ - hwgroup->busy = 0; + ide_release_lock(&ide_intr_lock);/* for atari only */ + clear_bit(IDE_BUSY, &hwgroup->flags); } return; /* no more work for this hwgroup (for now) */ } @@ -1421,9 +1420,16 @@ drive->sleep = 0; drive->service_start = jiffies; - if ( drive->queue.plugged ) /* paranoia */ + if (blk_queue_plugged(&drive->queue)) /* paranoia */ printk("%s: Huh? nuking plugged queue\n", drive->name); - hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head); + + /* + * just continuing an interrupted request maybe + */ + spin_lock(DRIVE_LOCK(drive)); + hwgroup->rq = elv_next_request(&drive->queue); + spin_unlock(DRIVE_LOCK(drive)); + /* * Some systems have trouble with IDE IRQs arriving while * the driver is still setting things up. So, here we disable @@ -1434,14 +1440,14 @@ */ if (masked_irq && hwif->irq != masked_irq) disable_irq_nosync(hwif->irq); - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); ide__sti(); /* allow other IRQs while we start this request */ startstop = start_request(drive); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); if (masked_irq && hwif->irq != masked_irq) enable_irq(hwif->irq); if (startstop == ide_stopped) - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); } } @@ -1460,7 +1466,19 @@ */ void do_ide_request(request_queue_t *q) { + unsigned long flags; + + /* + * release queue lock, grab IDE global lock and restore when + * we leave... + */ + spin_unlock(&q->queue_lock); + + spin_lock_irqsave(&ide_lock, flags); ide_do_request(q->queuedata, 0); + spin_unlock_irqrestore(&ide_lock, flags); + + spin_lock(&q->queue_lock); } /* @@ -1501,9 +1519,14 @@ HWGROUP(drive)->rq = NULL; rq->errors = 0; - rq->sector = rq->bh->b_rsector; - rq->current_nr_sectors = rq->bh->b_size >> 9; - rq->buffer = rq->bh->b_data; + rq->sector = rq->bio->bi_sector; + rq->current_nr_sectors = bio_sectors(rq->bio); + + /* + * just to make sure... + */ + if (rq->bio) + rq->buffer = NULL; } /* @@ -1519,7 +1542,11 @@ unsigned long flags; unsigned long wait; - spin_lock_irqsave(&io_request_lock, flags); + /* + * a global lock protects timers etc -- shouldn't get contention + * worth mentioning + */ + spin_lock_irqsave(&ide_lock, flags); del_timer(&hwgroup->timer); if ((handler = hwgroup->handler) == NULL) { @@ -1529,10 +1556,8 @@ * or we were "sleeping" to give other devices a chance. * Either way, we don't really want to complain about anything. */ - if (hwgroup->sleeping) { - hwgroup->sleeping = 0; - hwgroup->busy = 0; - } + if (test_and_clear_bit(IDE_SLEEP, &hwgroup->flags)) + clear_bit(IDE_BUSY, &hwgroup->flags); } else { ide_drive_t *drive = hwgroup->drive; if (!drive) { @@ -1541,17 +1566,16 @@ } else { ide_hwif_t *hwif; ide_startstop_t startstop; - if (!hwgroup->busy) { - hwgroup->busy = 1; /* paranoia */ - printk("%s: ide_timer_expiry: hwgroup->busy was 0 ??\n", drive->name); - } + /* paranoia */ + if (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) + printk("%s: ide_timer_expiry: hwgroup was not busy??\n", drive->name); if ((expiry = hwgroup->expiry) != NULL) { /* continue */ if ((wait = expiry(drive)) != 0) { /* reset timer */ hwgroup->timer.expires = jiffies + wait; add_timer(&hwgroup->timer); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } } @@ -1561,7 +1585,7 @@ * the handler() function, which means we need to globally * mask the specific IRQ: */ - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); hwif = HWIF(drive); #if DISABLE_IRQ_NOSYNC disable_irq_nosync(hwif->irq); @@ -1587,13 +1611,13 @@ set_recovery_timer(hwif); drive->service_time = jiffies - drive->service_start; enable_irq(hwif->irq); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); if (startstop == ide_stopped) - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); } } ide_do_request(hwgroup, 0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -1656,11 +1680,11 @@ ide_handler_t *handler; ide_startstop_t startstop; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); hwif = hwgroup->hwif; if (!ide_ack_intr(hwif)) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } @@ -1694,7 +1718,7 @@ (void) IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]); #endif /* CONFIG_BLK_DEV_IDEPCI */ } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } drive = hwgroup->drive; @@ -1702,7 +1726,7 @@ /* * This should NEVER happen, and there isn't much we could do about it here. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } if (!drive_is_ready(drive)) { @@ -1712,21 +1736,20 @@ * the IRQ before their status register is up to date. Hopefully we have * enough advance overhead that the latter isn't a problem. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return; } - if (!hwgroup->busy) { - hwgroup->busy = 1; /* paranoia */ - printk("%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name); - } + /* paranoia */ + if (!test_and_set_bit(IDE_BUSY, &hwgroup->flags)) + printk("%s: ide_intr: hwgroup was not busy??\n", drive->name); hwgroup->handler = NULL; del_timer(&hwgroup->timer); - spin_unlock(&io_request_lock); + spin_unlock(&ide_lock); if (drive->unmask) ide__sti(); /* local CPU only */ startstop = handler(drive); /* service this interrupt, may set handler for next interrupt */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); /* * Note that handler() may have set things up for another @@ -1739,13 +1762,13 @@ drive->service_time = jiffies - drive->service_start; if (startstop == ide_stopped) { if (hwgroup->handler == NULL) { /* paranoia */ - hwgroup->busy = 0; + clear_bit(IDE_BUSY, &hwgroup->flags); ide_do_request(hwgroup, hwif->irq); } else { printk("%s: ide_intr: huh? expected NULL handler on exit\n", drive->name); } } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); } /* @@ -1755,9 +1778,6 @@ ide_drive_t *get_info_ptr (kdev_t i_rdev) { int major = MAJOR(i_rdev); -#if 0 - int minor = MINOR(i_rdev) & PARTN_MASK; -#endif unsigned int h; for (h = 0; h < MAX_HWIFS; ++h) { @@ -1766,11 +1786,7 @@ unsigned unit = DEVICE_NR(i_rdev); if (unit < MAX_DRIVES) { ide_drive_t *drive = &hwif->drives[unit]; -#if 0 - if ((drive->present) && (drive->part[minor].nr_sects)) -#else if (drive->present) -#endif return drive; } break; @@ -1830,7 +1846,8 @@ rq->rq_dev = MKDEV(major,(drive->select.b.unit)<waiting = &wait; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); + spin_lock(DRIVE_LOCK(drive)); if (list_empty(queue_head) || action == ide_preempt) { if (action == ide_preempt) hwgroup->rq = NULL; @@ -1840,9 +1857,10 @@ } else queue_head = queue_head->next; } - list_add(&rq->queue, queue_head); + list_add(&rq->queuelist, queue_head); + spin_unlock(DRIVE_LOCK(drive)); ide_do_request(hwgroup, 0); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); if (action == ide_wait) { wait_for_completion(&wait); /* wait for it to be serviced */ return rq->errors ? -EIO : 0; /* return -EIO if errors */ @@ -1851,6 +1869,16 @@ } +/* Common for ide-floppy.c and ide-disk.c */ +void ide_revalidate_drive (ide_drive_t *drive) +{ + struct gendisk *g = HWIF(drive)->gd; + int minor = (drive->select.b.unit << g->minor_shift); + kdev_t dev = MKDEV(g->major, minor); + + grok_partitions(dev, current_capacity(drive)); +} + /* * This routine is called to flush all partitions and partition tables * for a changed disk, and then re-read the new partition table. @@ -1863,40 +1891,33 @@ { ide_drive_t *drive; ide_hwgroup_t *hwgroup; - unsigned int p, major, minor; - long flags; + unsigned long flags; + int res; if ((drive = get_info_ptr(i_rdev)) == NULL) return -ENODEV; - major = MAJOR(i_rdev); - minor = drive->select.b.unit << PARTN_BITS; hwgroup = HWGROUP(drive); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); if (drive->busy || (drive->usage > 1)) { - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); return -EBUSY; - }; + } drive->busy = 1; MOD_INC_USE_COUNT; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); - for (p = 0; p < (1<part[p].nr_sects > 0) { - kdev_t devp = MKDEV(major, minor+p); - invalidate_device(devp, 1); - set_blocksize(devp, 1024); - } - drive->part[p].start_sect = 0; - drive->part[p].nr_sects = 0; - }; + res = wipe_partitions(i_rdev); + if (res) + goto leave; if (DRIVER(drive)->revalidate) DRIVER(drive)->revalidate(drive); + leave: drive->busy = 0; wake_up(&drive->wqueue); MOD_DEC_USE_COUNT; - return 0; + return res; } static void revalidate_drives (void) @@ -2169,11 +2190,10 @@ */ unregister_blkdev(hwif->major, hwif->name); kfree(blksize_size[hwif->major]); - kfree(max_sectors[hwif->major]); kfree(max_readahead[hwif->major]); blk_dev[hwif->major].data = NULL; blk_dev[hwif->major].queue = NULL; - blksize_size[hwif->major] = NULL; + blk_clear(hwif->major); gd = hwif->gd; if (gd) { del_gendisk(gd); @@ -2403,7 +2423,7 @@ unsigned long flags; if ((setting->rw & SETTING_READ)) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&ide_lock, flags); switch(setting->data_type) { case TYPE_BYTE: val = *((u8 *) setting->data); @@ -2416,7 +2436,7 @@ val = *((u32 *) setting->data); break; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&ide_lock, flags); } return val; } @@ -2426,11 +2446,11 @@ ide_hwgroup_t *hwgroup = HWGROUP(drive); unsigned long timeout = jiffies + (3 * HZ); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); - while (hwgroup->busy) { + while (test_bit(IDE_BUSY, &hwgroup->flags)) { unsigned long lflags; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&ide_lock); __save_flags(lflags); /* local CPU only */ __sti(); /* local CPU only; needed for jiffies */ if (0 < (signed long)(jiffies - timeout)) { @@ -2439,7 +2459,7 @@ return -EBUSY; } __restore_flags(lflags); /* local CPU only */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&ide_lock); } return 0; } @@ -2480,7 +2500,7 @@ *p = val; break; } - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&ide_lock); return 0; } @@ -2620,24 +2640,14 @@ { struct hd_geometry *loc = (struct hd_geometry *) arg; unsigned short bios_cyl = drive->bios_cyl; /* truncate */ - if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; - if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT; - if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT; - if (put_user(bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT; - if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect, - (unsigned long *) &loc->start)) return -EFAULT; - return 0; - } - - case HDIO_GETGEO_BIG: - { - struct hd_big_geometry *loc = (struct hd_big_geometry *) arg; - if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL; - if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT; - if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT; - if (put_user(drive->bios_cyl, (unsigned int *) &loc->cylinders)) return -EFAULT; - if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect, - (unsigned long *) &loc->start)) return -EFAULT; + if (!loc || (drive->media != ide_disk && drive->media != ide_floppy)) + return -EINVAL; + if (put_user(drive->bios_head, &loc->heads) || + put_user(drive->bios_sect, &loc->sectors) || + put_user(bios_cyl, &loc->cylinders) || + put_user(get_start_sect(inode->i_rdev), + &loc->start)) + return -EFAULT; return 0; } @@ -2653,11 +2663,6 @@ return 0; } - case BLKGETSIZE: /* Return device size */ - return put_user(drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects, (unsigned long *) arg); - case BLKGETSIZE64: - return put_user((u64)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects << 9, (u64 *) arg); - case BLKRRPART: /* Re-read partition tables */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; return ide_revalidate_disk(inode->i_rdev); @@ -2775,6 +2780,8 @@ } return 0; + case BLKGETSIZE: + case BLKGETSIZE64: case BLKROSET: case BLKROGET: case BLKFLSBUF: @@ -2784,6 +2791,8 @@ case BLKELVSET: case BLKBSZGET: case BLKBSZSET: + case BLKHASHPROF: + case BLKHASHCLEAR: return blk_ioctl(inode->i_rdev, cmd, arg); case HDIO_GET_BUSSTATE: @@ -3409,7 +3418,7 @@ #ifdef CONFIG_BLK_DEV_IDE #if defined(__mc68000__) || defined(CONFIG_APUS) if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) { - ide_get_lock(&ide_lock, NULL, NULL); /* for atari only */ + ide_get_lock(&ide_intr_lock, NULL, NULL);/* for atari only */ disable_irq(ide_hwifs[0].irq); /* disable_irq_nosync ?? */ // disable_irq_nosync(ide_hwifs[0].irq); } @@ -3420,7 +3429,7 @@ #if defined(__mc68000__) || defined(CONFIG_APUS) if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) { enable_irq(ide_hwifs[0].irq); - ide_release_lock(&ide_lock); /* for atari only */ + ide_release_lock(&ide_intr_lock);/* for atari only */ } #endif /* __mc68000__ || CONFIG_APUS */ #endif /* CONFIG_BLK_DEV_IDE */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/pdc202xx.c linux/drivers/ide/pdc202xx.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/pdc202xx.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/ide/pdc202xx.c Tue Oct 30 09:48:43 2001 @@ -892,6 +892,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { hwif->dmaproc = &pdc202xx_dmaproc; + hwif->highmem = 1; if (!noautodma) hwif->autodma = 1; } else { diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/piix.c linux/drivers/ide/piix.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/piix.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/ide/piix.c Tue Oct 30 09:48:43 2001 @@ -523,6 +523,7 @@ if (!hwif->dma_base) return; + hwif->highmem = 1; #ifndef CONFIG_BLK_DEV_IDEDMA hwif->autodma = 0; #else /* CONFIG_BLK_DEV_IDEDMA */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/serverworks.c linux/drivers/ide/serverworks.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/serverworks.c Sun Sep 9 19:43:02 2001 +++ linux/drivers/ide/serverworks.c Mon Oct 15 10:41:43 2001 @@ -593,6 +593,7 @@ if (!noautodma) hwif->autodma = 1; hwif->dmaproc = &svwks_dmaproc; + hwif->highmem = 1; } else { hwif->autodma = 0; hwif->drives[0].autotune = 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/sis5513.c linux/drivers/ide/sis5513.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/sis5513.c Fri Sep 7 18:28:38 2001 +++ linux/drivers/ide/sis5513.c Mon Oct 15 10:41:43 2001 @@ -671,6 +671,7 @@ case PCI_DEVICE_ID_SI_5591: if (!noautodma) hwif->autodma = 1; + hwif->highmem = 1; hwif->dmaproc = &sis5513_dmaproc; break; #endif /* CONFIG_BLK_DEV_IDEDMA */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/slc90e66.c linux/drivers/ide/slc90e66.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/slc90e66.c Mon Jul 16 01:22:23 2001 +++ linux/drivers/ide/slc90e66.c Mon Oct 15 10:41:43 2001 @@ -373,6 +373,7 @@ return; hwif->autodma = 0; + hwif->highmem = 1; #ifdef CONFIG_BLK_DEV_IDEDMA if (!noautodma) hwif->autodma = 1; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/ide/via82cxxx.c linux/drivers/ide/via82cxxx.c --- /opt/kernel/linux-2.4.14-pre6/drivers/ide/via82cxxx.c Tue Sep 11 17:40:36 2001 +++ linux/drivers/ide/via82cxxx.c Mon Oct 15 10:41:43 2001 @@ -520,6 +520,7 @@ #ifdef CONFIG_BLK_DEV_IDEDMA if (hwif->dma_base) { + hwif->highmem = 1; hwif->dmaproc = &via82cxxx_dmaproc; #ifdef CONFIG_IDEDMA_AUTO if (!noautodma) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/md/lvm-snap.c linux/drivers/md/lvm-snap.c --- /opt/kernel/linux-2.4.14-pre6/drivers/md/lvm-snap.c Mon Sep 10 17:00:55 2001 +++ linux/drivers/md/lvm-snap.c Mon Oct 15 10:41:43 2001 @@ -273,7 +273,7 @@ COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t)); /* COW table block to write next */ - iobuf->blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10); + lv_snap->blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10); /* store new COW_table entry */ lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[idx].rdev_org)); @@ -289,7 +289,7 @@ iobuf->nr_pages = 1; if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, - iobuf->blocks, blksize_snap) != blksize_snap) + lv_snap->blocks, blksize_snap) != blksize_snap) goto fail_raw_write; @@ -308,11 +308,12 @@ snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; blksize_snap = lvm_get_blksize(snap_phys_dev); - iobuf->blocks[0] = snap_pe_start >> (blksize_snap >> 10); - } else iobuf->blocks[0]++; + lv_snap->blocks[0] = snap_pe_start >> (blksize_snap >> 10); + } else + lv_snap->blocks[0]++; if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, - iobuf->blocks, blksize_snap) != blksize_snap) + lv_snap->blocks, blksize_snap) != blksize_snap) goto fail_raw_write; } @@ -388,7 +389,7 @@ blksize_snap = lvm_get_blksize(snap_phys_dev); max_blksize = max(blksize_org, blksize_snap); min_blksize = min(blksize_org, blksize_snap); - max_sectors = KIO_MAX_SECTORS * (min_blksize>>9); + max_sectors = LVM_MAX_SECTORS * (min_blksize>>9); if (chunk_size % (max_blksize>>9)) goto fail_blksize; @@ -400,16 +401,16 @@ iobuf->length = nr_sectors << 9; - lvm_snapshot_prepare_blocks(iobuf->blocks, org_start, + lvm_snapshot_prepare_blocks(lv_snap->blocks, org_start, nr_sectors, blksize_org); if (brw_kiovec(READ, 1, &iobuf, org_phys_dev, - iobuf->blocks, blksize_org) != (nr_sectors<<9)) + lv_snap->blocks, blksize_org) != (nr_sectors<<9)) goto fail_raw_read; - lvm_snapshot_prepare_blocks(iobuf->blocks, snap_start, + lvm_snapshot_prepare_blocks(lv_snap->blocks, snap_start, nr_sectors, blksize_snap); if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, - iobuf->blocks, blksize_snap) != (nr_sectors<<9)) + lv_snap->blocks, blksize_snap) !=(nr_sectors<<9)) goto fail_raw_write; } @@ -534,7 +535,7 @@ goto out; blocksize = lvm_blocksizes[MINOR(lv_snap->lv_dev)]; - max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9); + max_sectors = LVM_MAX_SECTORS << (PAGE_SHIFT-9); err = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors); if (err) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/md/lvm.c linux/drivers/md/lvm.c --- /opt/kernel/linux-2.4.14-pre6/drivers/md/lvm.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/md/lvm.c Tue Oct 30 09:48:43 2001 @@ -231,7 +231,7 @@ /* * External function prototypes */ -static int lvm_make_request_fn(request_queue_t*, int, struct buffer_head*); +static int lvm_make_request_fn(request_queue_t *, struct bio *); static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong); static int lvm_blk_open(struct inode *, struct file *); @@ -270,7 +270,7 @@ #ifdef LVM_HD_NAME extern void (*lvm_hd_name_ptr) (char *, int); #endif -static int lvm_map(struct buffer_head *, int); +static int lvm_map(struct bio *, int); static int lvm_do_lock_lvm(void); static int lvm_do_le_remap(vg_t *, void *); @@ -415,14 +415,18 @@ lvm_proc_dir = create_proc_entry (LVM_DIR, S_IFDIR, &proc_root); if (lvm_proc_dir != NULL) { - lvm_proc_vg_subdir = create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, lvm_proc_dir); + lvm_proc_vg_subdir = + create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, + lvm_proc_dir); pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir); - if ( pde != NULL) pde->read_proc = &lvm_proc_get_global_info; + if (pde != NULL) + pde->read_proc = &lvm_proc_get_global_info; } lvm_init_vars(); lvm_geninit(&lvm_gendisk); + /* insert our gendisk at the corresponding major */ add_gendisk(&lvm_gendisk); #ifdef LVM_HD_NAME @@ -435,7 +439,7 @@ /* optional read root VGDA */ /* - if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg); + if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg); */ printk(KERN_INFO @@ -466,12 +470,8 @@ printk(KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name); } - del_gendisk(&lvm_gendisk); - - blk_size[MAJOR_NR] = NULL; - blksize_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; + blk_clear(MAJOR_NR); remove_proc_entry(LVM_GLOBAL, lvm_proc_dir); remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir); @@ -483,7 +483,6 @@ #endif printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name); - return; } /* lvm_cleanup() */ @@ -882,7 +881,6 @@ return -EFAULT; break; - case BLKFLSBUF: /* flush buffer cache */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -1045,24 +1043,25 @@ static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result) { - struct buffer_head bh; + struct bio bio; + struct bio_vec *bvec = &bio.bi_io_vec; unsigned long block; int err; if (get_user(block, &user_result->lv_block)) return -EFAULT; - memset(&bh,0,sizeof bh); - bh.b_blocknr = block; - bh.b_dev = bh.b_rdev = inode->i_rdev; - bh.b_size = lvm_get_blksize(bh.b_dev); - if ((err=lvm_map(&bh, READ)) < 0) { + memset(&bio,0,sizeof(struct bio)); + bio.bi_dev = inode->i_rdev; + bvec->bv_len = lvm_get_blksize(bio.bi_dev); + bio.bi_sector = block * (bvec->bv_len >> 9); + if ((err=lvm_map(&bio, READ)) < 0) { printk("lvm map failed: %d\n", err); return -EINVAL; } - return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) || - put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block) ? + return put_user(kdev_t_to_nr(bio.bi_dev), &user_result->lv_dev) || + put_user(bio.bi_sector/(bvec->bv_len>>9), &user_result->lv_block) ? -EFAULT : 0; } @@ -1453,16 +1452,16 @@ * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c * (see init_module/lvm_init) */ -static int lvm_map(struct buffer_head *bh, int rw) +static int lvm_map(struct bio *bio, int rw) { - int minor = MINOR(bh->b_rdev); + int minor = MINOR(bio->bi_dev); int ret = 0; ulong index; ulong pe_start; - ulong size = bh->b_size >> 9; - ulong rsector_tmp = bh->b_rsector; + ulong size = bio_sectors(bio); + ulong rsector_tmp = bio->bi_sector; ulong rsector_sav; - kdev_t rdev_tmp = bh->b_rdev; + kdev_t rdev_tmp = bio->bi_dev; kdev_t rdev_sav; vg_t *vg_this = vg[VG_BLK(minor)]; lv_t *lv = vg_this->lv[LV_BLK(minor)]; @@ -1615,8 +1614,8 @@ up(&lv->lv_snapshot_sem); } } - bh->b_rdev = rdev_tmp; - bh->b_rsector = rsector_tmp; + bio->bi_dev = rdev_tmp; + bio->bi_sector = rsector_tmp; return ret; } /* lvm_map() */ @@ -1649,14 +1648,12 @@ /* * make request function */ -static int lvm_make_request_fn(request_queue_t *q, - int rw, - struct buffer_head *bh) +static int lvm_make_request_fn(request_queue_t *q, struct bio *bio) { - if (lvm_map(bh, rw) >= 0) + if (lvm_map(bio, bio_rw(bio)) >= 0) return 1; - buffer_IO_error(bh); + bio_io_error(bio); return 0; } @@ -3019,7 +3016,6 @@ blk_size[MAJOR_NR] = lvm_size; blksize_size[MAJOR_NR] = lvm_blocksizes; - hardsect_size[MAJOR_NR] = lvm_blocksizes; return; } /* lvm_gen_init() */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/md/md.c linux/drivers/md/md.c --- /opt/kernel/linux-2.4.14-pre6/drivers/md/md.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/md/md.c Tue Oct 30 09:48:43 2001 @@ -105,7 +105,6 @@ */ struct hd_struct md_hd_struct[MAX_MD_DEVS]; static int md_blocksizes[MAX_MD_DEVS]; -static int md_hardsect_sizes[MAX_MD_DEVS]; static int md_maxreadahead[MAX_MD_DEVS]; static mdk_thread_t *md_recovery_thread; @@ -172,14 +171,14 @@ mddev_map[minor].data = NULL; } -static int md_make_request(request_queue_t *q, int rw, struct buffer_head * bh) +static int md_make_request (request_queue_t *q, struct bio *bio) { - mddev_t *mddev = kdev_to_mddev(bh->b_rdev); + mddev_t *mddev = kdev_to_mddev(bio->bi_dev); if (mddev && mddev->pers) - return mddev->pers->make_request(mddev, rw, bh); + return mddev->pers->make_request(mddev, bio_rw(bio), bio); else { - buffer_IO_error(bh); + bio_io_error(bio); return 0; } } @@ -1701,19 +1700,14 @@ * device. * Also find largest hardsector size */ - md_hardsect_sizes[mdidx(mddev)] = 512; ITERATE_RDEV(mddev,rdev,tmp) { if (rdev->faulty) continue; invalidate_device(rdev->dev, 1); - if (get_hardsect_size(rdev->dev) - > md_hardsect_sizes[mdidx(mddev)]) - md_hardsect_sizes[mdidx(mddev)] = - get_hardsect_size(rdev->dev); - } - md_blocksizes[mdidx(mddev)] = 1024; - if (md_blocksizes[mdidx(mddev)] < md_hardsect_sizes[mdidx(mddev)]) - md_blocksizes[mdidx(mddev)] = md_hardsect_sizes[mdidx(mddev)]; + md_blocksizes[mdidx(mddev)] = 1024; + if (get_hardsect_size(rdev->dev) > md_blocksizes[mdidx(mddev)]) + md_blocksizes[mdidx(mddev)] = get_hardsect_size(rdev->dev); + } mddev->pers = pers[pnum]; err = mddev->pers->run(mddev); @@ -2769,7 +2763,7 @@ (short *) &loc->cylinders); if (err) goto abort_unlock; - err = md_put_user (md_hd_struct[minor].start_sect, + err = md_put_user (get_start_sect(dev), (long *) &loc->start); goto done_unlock; } @@ -3621,13 +3615,11 @@ for(i = 0; i < MAX_MD_DEVS; i++) { md_blocksizes[i] = 1024; md_size[i] = 0; - md_hardsect_sizes[i] = 512; md_maxreadahead[i] = MD_READAHEAD; } blksize_size[MAJOR_NR] = md_blocksizes; blk_size[MAJOR_NR] = md_size; max_readahead[MAJOR_NR] = md_maxreadahead; - hardsect_size[MAJOR_NR] = md_hardsect_sizes; dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t)); @@ -3670,7 +3662,8 @@ md_recovery_thread = md_register_thread(md_do_recovery, NULL, name); if (!md_recovery_thread) - printk(KERN_ALERT "md: bug: couldn't allocate md_recovery_thread\n"); + printk(KERN_ALERT + "md: bug: couldn't allocate md_recovery_thread\n"); md_register_reboot_notifier(&md_notifier); raid_table_header = register_sysctl_table(raid_root_table, 1); @@ -4008,15 +4001,10 @@ #endif del_gendisk(&md_gendisk); - blk_dev[MAJOR_NR].queue = NULL; - blksize_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; - max_readahead[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - + blk_clear(MAJOR_NR); + free_device_names(); - } #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/message/i2o/i2o_block.c linux/drivers/message/i2o/i2o_block.c --- /opt/kernel/linux-2.4.14-pre6/drivers/message/i2o/i2o_block.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/message/i2o/i2o_block.c Tue Oct 30 09:51:53 2001 @@ -114,15 +114,16 @@ #define I2O_BSA_DSC_VOLUME_CHANGED 0x000D #define I2O_BSA_DSC_TIMEOUT 0x000E +#define I2O_UNIT(dev) (i2ob_dev[MINOR((dev)) & 0xf0]) +#define I2O_LOCK(unit) (i2ob_dev[(unit)].req_queue->queue_lock) + /* * Some of these can be made smaller later */ static int i2ob_blksizes[MAX_I2OB<<4]; -static int i2ob_hardsizes[MAX_I2OB<<4]; static int i2ob_sizes[MAX_I2OB<<4]; static int i2ob_media_change_flag[MAX_I2OB]; -static u32 i2ob_max_sectors[MAX_I2OB<<4]; static int i2ob_context; @@ -252,9 +253,9 @@ unsigned long mptr; u64 offset; struct request *req = ireq->req; - struct buffer_head *bh = req->bh; + struct bio *bio = req->bio; int count = req->nr_sectors<<9; - char *last = NULL; + unsigned long last = ~0UL; unsigned short size = 0; // printk(KERN_INFO "i2ob_send called\n"); @@ -283,30 +284,30 @@ if(req->cmd == READ) { __raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4); - while(bh!=NULL) + while(bio) { - if(bh->b_data == last) { - size += bh->b_size; - last += bh->b_size; - if(bh->b_reqnext) + if (bio_to_phys(bio) == last) { + size += bio_size(bio); + last += bio_size(bio); + if(bio->bi_next) __raw_writel(0x14000000|(size), mptr-8); else __raw_writel(0xD4000000|(size), mptr-8); } else { - if(bh->b_reqnext) - __raw_writel(0x10000000|(bh->b_size), mptr); + if(bio->bi_next) + __raw_writel(0x10000000|bio_size(bio), mptr); else - __raw_writel(0xD0000000|(bh->b_size), mptr); - __raw_writel(virt_to_bus(bh->b_data), mptr+4); + __raw_writel(0xD0000000|bio_size(bio), mptr); + __raw_writel(bio_to_phys(bio), mptr+4); mptr += 8; - size = bh->b_size; - last = bh->b_data + size; + size = bio_size(bio); + last = bio_to_phys(bio) + bio_size(bio); } - count -= bh->b_size; - bh = bh->b_reqnext; + count -= bio_size(bio); + bio = bio->bi_next; } /* * Heuristic for now since the block layer doesnt give @@ -322,30 +323,30 @@ else if(req->cmd == WRITE) { __raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4); - while(bh!=NULL) + while(bio) { - if(bh->b_data == last) { - size += bh->b_size; - last += bh->b_size; - if(bh->b_reqnext) + if (bio_to_phys(bio) == last) { + size += bio_size(bio); + last += bio_size(bio); + if(bio->bi_next) __raw_writel(0x14000000|(size), mptr-8); else __raw_writel(0xD4000000|(size), mptr-8); } else { - if(bh->b_reqnext) - __raw_writel(0x14000000|(bh->b_size), mptr); + if(bio->bi_next) + __raw_writel(0x14000000|bio_size(bio), mptr); else - __raw_writel(0xD4000000|(bh->b_size), mptr); - __raw_writel(virt_to_bus(bh->b_data), mptr+4); + __raw_writel(0xD4000000|bio_size(bio), mptr); + __raw_writel(bio_to_phys(bio), mptr+4); mptr += 8; - size = bh->b_size; - last = bh->b_data + size; + size = bio_size(bio); + last = bio_to_phys(bio) + bio_size(bio); } - count -= bh->b_size; - bh = bh->b_reqnext; + count -= bio_size(bio); + bio = bio->bi_next; } if(c->battery) @@ -409,7 +410,8 @@ * unlocked. */ - while (end_that_request_first( req, !req->errors, "i2o block" )); + while (end_that_request_first(req, !req->errors)) + ; /* * It is now ok to complete the request. @@ -417,61 +419,6 @@ end_that_request_last( req ); } -/* - * Request merging functions - */ -static inline int i2ob_new_segment(request_queue_t *q, struct request *req, - int __max_segments) -{ - int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments; - - if (__max_segments < max_segments) - max_segments = __max_segments; - - if (req->nr_segments < max_segments) { - req->nr_segments++; - return 1; - } - return 0; -} - -static int i2ob_back_merge(request_queue_t *q, struct request *req, - struct buffer_head *bh, int __max_segments) -{ - if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data) - return 1; - return i2ob_new_segment(q, req, __max_segments); -} - -static int i2ob_front_merge(request_queue_t *q, struct request *req, - struct buffer_head *bh, int __max_segments) -{ - if (bh->b_data + bh->b_size == req->bh->b_data) - return 1; - return i2ob_new_segment(q, req, __max_segments); -} - -static int i2ob_merge_requests(request_queue_t *q, - struct request *req, - struct request *next, - int __max_segments) -{ - int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments; - int total_segments = req->nr_segments + next->nr_segments; - - if (__max_segments < max_segments) - max_segments = __max_segments; - - if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) - total_segments--; - - if (total_segments > max_segments) - return 0; - - req->nr_segments = total_segments; - return 1; -} - static int i2ob_flush(struct i2o_controller *c, struct i2ob_device *d, int unit) { unsigned long msg; @@ -535,10 +482,10 @@ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; ireq->req->errors++; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* Now flush the message by making it a NOP */ m[0]&=0x00FFFFFF; @@ -559,12 +506,12 @@ if(msg->function == I2O_CMD_BLOCK_CFLUSH) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); dev->constipated=0; DEBUG(("unconstipated\n")); if(i2ob_backlog_request(c, dev)==0) i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -580,10 +527,10 @@ ireq=&i2ob_queues[c->unit]->request_queue[m[3]]; ireq->req->errors++; printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n"); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -629,7 +576,7 @@ */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); if(err==4) { /* @@ -674,7 +621,7 @@ */ i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* * and out @@ -682,7 +629,7 @@ return; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name, bsa_errors[m[4]&0XFFFF]); if(m[4]&0x00FF0000) @@ -697,8 +644,8 @@ * Dequeue the request. We use irqsave locks as one day we * may be running polled controllers from a BH... */ - - spin_lock_irqsave(&io_request_lock, flags); + + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); i2ob_unhook_request(ireq, c->unit); i2ob_end_request(ireq->req); atomic_dec(&i2ob_queues[c->unit]->queue_depth); @@ -710,7 +657,7 @@ if(i2ob_backlog_request(c, dev)==0) i2ob_request(dev->req_queue); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); } /* @@ -789,8 +736,7 @@ for(i = unit; i <= unit+15; i++) { i2ob_sizes[i] = 0; - i2ob_hardsizes[i] = 0; - i2ob_max_sectors[i] = 0; + blk_queue_max_sectors(i2ob_dev[i].req_queue, 0); i2ob[i].nr_sects = 0; i2ob_gendisk.part[i].nr_sects = 0; } @@ -824,11 +770,11 @@ if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 ) i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(unit), flags); i2ob_sizes[unit] = (int)(size>>10); i2ob_gendisk.part[unit].nr_sects = size>>9; i2ob[unit].nr_sects = (int)(size>>9); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(unit), flags); break; } @@ -881,13 +827,14 @@ static void i2ob_timer_handler(unsigned long q) { + request_queue_t *req_queue = (request_queue_t *) q; unsigned long flags; /* * We cannot touch the request queue or the timer - * flag without holding the io_request_lock. + * flag without holding the queue_lock */ - spin_lock_irqsave(&io_request_lock,flags); + spin_lock_irqsave(&req_queue->queue_lock,flags); /* * Clear the timer started flag so that @@ -898,12 +845,12 @@ /* * Restart any requests. */ - i2ob_request((request_queue_t*)q); + i2ob_request(req_queue); /* * Free the lock. */ - spin_unlock_irqrestore(&io_request_lock,flags); + spin_unlock_irqrestore(&req_queue->queue_lock,flags); } static int i2ob_backlog_request(struct i2o_controller *c, struct i2ob_device *dev) @@ -1132,34 +1079,23 @@ static int i2ob_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - struct i2ob_device *dev; - int minor; - /* Anyone capable of this syscall can do *real bad* things */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!inode) + if (!inode || !inode->i_rdev) return -EINVAL; - minor = MINOR(inode->i_rdev); - if (minor >= (MAX_I2OB<<4)) - return -ENODEV; - dev = &i2ob_dev[minor]; switch (cmd) { - case BLKGETSIZE: - return put_user(i2ob[minor].nr_sects, (long *) arg); - case BLKGETSIZE64: - return put_user((u64)i2ob[minor].nr_sects << 9, (u64 *)arg); - case HDIO_GETGEO: { struct hd_geometry g; - int u=minor&0xF0; + int u = MINOR(inode->i_rdev) & 0xF0; i2o_block_biosparam(i2ob_sizes[u]<<1, &g.cylinders, &g.heads, &g.sectors); - g.start = i2ob[minor].start_sect; - return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0; + g.start = get_start_sect(inode->i_rdev); + return copy_to_user((void *)arg, &g, sizeof(g)) + ? -EFAULT : 0; } case BLKRRPART: @@ -1167,6 +1103,8 @@ return -EACCES; return do_i2ob_revalidate(inode->i_rdev,1); + case BLKGETSIZE: + case BLKGETSIZE64: case BLKFLSBUF: case BLKROSET: case BLKROGET: @@ -1354,8 +1292,6 @@ i2ob_query_device(dev, 0x0000, 5, &flags, 4); i2ob_query_device(dev, 0x0000, 6, &status, 4); i2ob_sizes[unit] = (int)(size>>10); - for(i=unit; i <= unit+15 ; i++) - i2ob_hardsizes[i] = blocksize; i2ob_gendisk.part[unit].nr_sects = size>>9; i2ob[unit].nr_sects = (int)(size>>9); @@ -1366,26 +1302,27 @@ /* * Max number of Scatter-Gather Elements */ - for(i=unit;i<=unit+15;i++) { - i2ob_max_sectors[i] = 256; - i2ob_dev[i].max_segments = (d->controller->status_block->inbound_frame_size - 8)/2; + request_queue_t *q = i2ob_dev[unit].req_queue; + + blk_queue_max_sectors(q, 256); + blk_queue_max_segments(q, (d->controller->status_block->inbound_frame_size - 8)/2); if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy == 2) i2ob_dev[i].depth = 32; if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.queue_buggy == 1) { - i2ob_max_sectors[i] = 32; - i2ob_dev[i].max_segments = 8; + blk_queue_max_sectors(q, 32); + blk_queue_max_segments(q, 8); i2ob_dev[i].depth = 4; } if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.short_req) { - i2ob_max_sectors[i] = 8; - i2ob_dev[i].max_segments = 8; + blk_queue_max_sectors(q, 8); + blk_queue_max_segments(q, 8); } } @@ -1430,7 +1367,7 @@ } printk(".\n"); printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n", - d->dev_name, i2ob_max_sectors[unit]); + d->dev_name, i2ob_dev[unit].req_queue->max_sectors); /* * If this is the first I2O block device found on this IOP, @@ -1450,7 +1387,7 @@ */ dev->req_queue = &i2ob_queues[c->unit]->req_queue; - grok_partitions(&i2ob_gendisk, unit>>4, 1<<4, (long)(size>>9)); + grok_partitions(MKDEV(MAJOR_NR, unit), (long)(size>>9)); /* * Register for the events we're interested in and that the @@ -1468,6 +1405,7 @@ */ static int i2ob_init_iop(unsigned int unit) { + char name[16]; int i; i2ob_queues[unit] = (struct i2ob_iop_queue*) @@ -1491,11 +1429,9 @@ i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0]; atomic_set(&i2ob_queues[unit]->queue_depth, 0); - blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request); + sprintf(name, "i2o%d", unit); + blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request, name); blk_queue_headactive(&i2ob_queues[unit]->req_queue, 0); - i2ob_queues[unit]->req_queue.back_merge_fn = i2ob_back_merge; - i2ob_queues[unit]->req_queue.front_merge_fn = i2ob_front_merge; - i2ob_queues[unit]->req_queue.merge_requests_fn = i2ob_merge_requests; i2ob_queues[unit]->req_queue.queuedata = &i2ob_queues[unit]; return 0; @@ -1506,11 +1442,11 @@ */ static request_queue_t* i2ob_get_queue(kdev_t dev) { - int unit = MINOR(dev)&0xF0; - - return i2ob_dev[unit].req_queue; + return I2O_UNIT(dev).req_queue; } + + /* * Probe the I2O subsytem for block class devices */ @@ -1708,7 +1644,7 @@ int i = 0; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&I2O_LOCK(c->unit), flags); /* * Need to do this...we somtimes get two events from the IRTOS @@ -1730,7 +1666,7 @@ if(unit >= MAX_I2OB<<4) { printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n"); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); return; } @@ -1743,12 +1679,11 @@ { i2ob_dev[i].i2odev = NULL; i2ob_sizes[i] = 0; - i2ob_hardsizes[i] = 0; - i2ob_max_sectors[i] = 0; + blk_queue_max_sectors(i2ob_dev[i].req_queue, 0); i2ob[i].nr_sects = 0; i2ob_gendisk.part[i].nr_sects = 0; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&I2O_LOCK(c->unit), flags); /* * Decrease usage count for module @@ -1891,12 +1826,10 @@ */ blksize_size[MAJOR_NR] = i2ob_blksizes; - hardsect_size[MAJOR_NR] = i2ob_hardsizes; blk_size[MAJOR_NR] = i2ob_sizes; - max_sectors[MAJOR_NR] = i2ob_max_sectors; blk_dev[MAJOR_NR].queue = i2ob_get_queue; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request); + blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request, "i2o"); blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); for (i = 0; i < MAX_I2OB << 4; i++) { @@ -1909,7 +1842,6 @@ i2ob_dev[i].tail = NULL; i2ob_dev[i].depth = MAX_I2OB_DEPTH; i2ob_blksizes[i] = 1024; - i2ob_max_sectors[i] = 2; } /* @@ -1977,7 +1909,6 @@ MODULE_AUTHOR("Red Hat Software"); MODULE_DESCRIPTION("I2O Block Device OSM"); MODULE_LICENSE("GPL"); - void cleanup_module(void) { diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/message/i2o/i2o_core.c linux/drivers/message/i2o/i2o_core.c --- /opt/kernel/linux-2.4.14-pre6/drivers/message/i2o/i2o_core.c Wed Oct 24 12:49:57 2001 +++ linux/drivers/message/i2o/i2o_core.c Wed Oct 24 09:27:48 2001 @@ -125,6 +125,7 @@ * Function table to send to bus specific layers * See for explanation of this */ +#ifdef CONFIG_I2O_PCI_MODULE static struct i2o_core_func_table i2o_core_functions = { i2o_install_controller, @@ -135,7 +136,6 @@ i2o_delete_controller }; -#ifdef CONFIG_I2O_PCI_MODULE extern int i2o_pci_core_attach(struct i2o_core_func_table *); extern void i2o_pci_core_detach(void); #endif /* CONFIG_I2O_PCI_MODULE */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/mtd/ftl.c linux/drivers/mtd/ftl.c --- /opt/kernel/linux-2.4.14-pre6/drivers/mtd/ftl.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/mtd/ftl.c Tue Oct 30 09:48:43 2001 @@ -1166,7 +1166,7 @@ put_user(1, (char *)&geo->heads); put_user(8, (char *)&geo->sectors); put_user((sect>>3), (short *)&geo->cylinders); - put_user(ftl_hd[minor].start_sect, (u_long *)&geo->start); + put_user(get_start_sect(inode->i_rdev), (u_long *)&geo->start); break; case BLKGETSIZE: ret = put_user(ftl_hd[minor].nr_sects, (unsigned long *)arg); @@ -1206,42 +1206,27 @@ ======================================================================*/ -static int ftl_reread_partitions(int minor) +static int ftl_reread_partitions(kdev_t dev) { + int minor = MINOR(dev); partition_t *part = myparts[minor >> 4]; - int i, whole; + int res; DEBUG(0, "ftl_cs: ftl_reread_partition(%d)\n", minor); if ((atomic_read(&part->open) > 1)) { return -EBUSY; } - whole = minor & ~(MAX_PART-1); - i = MAX_PART - 1; - while (i-- > 0) { - if (ftl_hd[whole+i].nr_sects > 0) { - kdev_t rdev = MKDEV(FTL_MAJOR, whole+i); - - invalidate_device(rdev, 1); - } - ftl_hd[whole+i].start_sect = 0; - ftl_hd[whole+i].nr_sects = 0; - } + res = wipe_partitions(dev); + if (res) + goto leave; scan_header(part); register_disk(&ftl_gendisk, whole >> PART_BITS, MAX_PART, &ftl_blk_fops, le32_to_cpu(part->header.FormattedSize)/SECTOR_SIZE); -#ifdef PCMCIA_DEBUG - for (i = 0; i < MAX_PART; i++) { - if (ftl_hd[whole+i].nr_sects > 0) - printk(KERN_INFO " %d: start %ld size %ld\n", i, - ftl_hd[whole+i].start_sect, - ftl_hd[whole+i].nr_sects); - } -#endif - return 0; + return res; } /*====================================================================== @@ -1431,7 +1416,7 @@ unregister_blkdev(FTL_MAJOR, "ftl"); blk_cleanup_queue(BLK_DEFAULT_QUEUE(FTL_MAJOR)); - blksize_size[FTL_MAJOR] = NULL; + bklk_clear(FTL_MAJOR); del_gendisk(&ftl_gendisk); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/mtd/mtdblock.c linux/drivers/mtd/mtdblock.c --- /opt/kernel/linux-2.4.14-pre6/drivers/mtd/mtdblock.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/mtd/mtdblock.c Tue Oct 30 09:48:43 2001 @@ -29,7 +29,7 @@ #if LINUX_VERSION_CODE < 0x20300 #define QUEUE_PLUGGED (blk_dev[MAJOR_NR].plug_tq.sync) #else -#define QUEUE_PLUGGED (blk_dev[MAJOR_NR].request_queue.plugged) +#define QUEUE_PLUGGED (blk_queue_plugged(QUEUE)) #endif #ifdef CONFIG_DEVFS_FS @@ -402,7 +402,7 @@ /* * This is a special request_fn because it is executed in a process context - * to be able to sleep independently of the caller. The io_request_lock + * to be able to sleep independently of the caller. The queue_lock * is held upon entry and exit. * The head of our request queue is considered active so there is no need * to dequeue requests before we are done. @@ -416,7 +416,7 @@ for (;;) { INIT_REQUEST; req = CURRENT; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); mtdblk = mtdblks[MINOR(req->rq_dev)]; res = 0; @@ -458,7 +458,7 @@ } end_req: - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); end_request(res); } } @@ -490,16 +490,16 @@ while (!leaving) { add_wait_queue(&thr_wq, &wait); set_current_state(TASK_INTERRUPTIBLE); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); if (QUEUE_EMPTY || QUEUE_PLUGGED) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); schedule(); remove_wait_queue(&thr_wq, &wait); } else { remove_wait_queue(&thr_wq, &wait); set_current_state(TASK_RUNNING); handle_mtdblock_request(); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/mtd/nftlcore.c linux/drivers/mtd/nftlcore.c --- /opt/kernel/linux-2.4.14-pre6/drivers/mtd/nftlcore.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/mtd/nftlcore.c Tue Oct 30 09:48:43 2001 @@ -59,11 +59,6 @@ /* .. for the Linux partition table handling. */ struct hd_struct part_table[256]; -#if LINUX_VERSION_CODE < 0x20328 -static void dummy_init (struct gendisk *crap) -{} -#endif - static struct gendisk nftl_gendisk = { major: MAJOR_NR, major_name: "nftl", @@ -166,7 +161,8 @@ #if LINUX_VERSION_CODE < 0x20328 resetup_one_dev(&nftl_gendisk, firstfree); #else - grok_partitions(&nftl_gendisk, firstfree, 1<nr_sects); + grok_partitions(MKDEV(MAJOR_NR,firstfree<nr_sects); #endif } @@ -786,7 +782,7 @@ static int nftl_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg) { struct NFTLrecord *nftl; - int p; + int res; nftl = NFTLs[MINOR(inode->i_rdev) >> NFTL_PARTN_BITS]; @@ -799,16 +795,9 @@ g.heads = nftl->heads; g.sectors = nftl->sectors; g.cylinders = nftl->cylinders; - g.start = part_table[MINOR(inode->i_rdev)].start_sect; + g.start = get_start_sect(inode->i_rdev); return copy_to_user((void *)arg, &g, sizeof g) ? -EFAULT : 0; } - case BLKGETSIZE: /* Return device size */ - return put_user(part_table[MINOR(inode->i_rdev)].nr_sects, - (unsigned long *) arg); - case BLKGETSIZE64: - return put_user((u64)part_table[MINOR(inode->i_rdev)].nr_sects << 9, - (u64 *)arg); - case BLKFLSBUF: if (!capable(CAP_SYS_ADMIN)) return -EACCES; fsync_dev(inode->i_rdev); @@ -825,27 +814,17 @@ * or we won't be able to re-use the partitions, * if there was a change and we don't want to reboot */ - p = (1< 0) { - kdev_t devp = MKDEV(MAJOR(inode->i_dev), MINOR(inode->i_dev)+p); - if (part_table[p].nr_sects > 0) - invalidate_device (devp, 1); + res = wipe_partitions(inode->i_rdev); + if (!res) + grok_partitions(inode->i_rdev, nftl->nr_sects); - part_table[MINOR(inode->i_dev)+p].start_sect = 0; - part_table[MINOR(inode->i_dev)+p].nr_sects = 0; - } - -#if LINUX_VERSION_CODE < 0x20328 - resetup_one_dev(&nftl_gendisk, MINOR(inode->i_rdev) >> NFTL_PARTN_BITS); -#else - grok_partitions(&nftl_gendisk, MINOR(inode->i_rdev) >> NFTL_PARTN_BITS, - 1<nr_sects); -#endif - return 0; + return res; #if (LINUX_VERSION_CODE < 0x20303) RO_IOCTLS(inode->i_rdev, arg); /* ref. linux/blk.h */ #else + case BLKGETSIZE: + case BLKGETSIZE64: case BLKROSET: case BLKROGET: case BLKSSZGET: @@ -859,7 +838,7 @@ void nftl_request(RQFUNC_ARG) { - unsigned int dev, block, nsect; + unsigned int dev, unit, block, nsect; struct NFTLrecord *nftl; char *buffer; struct request *req; @@ -871,10 +850,11 @@ /* We can do this because the generic code knows not to touch the request at the head of the queue */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&QUEUE->queue_lock); DEBUG(MTD_DEBUG_LEVEL2, "NFTL_request\n"); - DEBUG(MTD_DEBUG_LEVEL3, "NFTL %s request, from sector 0x%04lx for 0x%04lx sectors\n", + DEBUG(MTD_DEBUG_LEVEL3, + "NFTL %s request, from sector 0x%04lx for 0x%04lx sectors\n", (req->cmd == READ) ? "Read " : "Write", req->sector, req->current_nr_sectors); @@ -884,8 +864,8 @@ buffer = req->buffer; res = 1; /* succeed */ - if (dev >= MAX_NFTLS * (1<> NFTL_PARTN_BITS; + if (unit >= MAX_NFTLS || dev != (unit << NFTL_PARTN_BITS)) { printk("nftl: bad minor number: device = %s\n", kdevname(req->rq_dev)); res = 0; /* fail */ @@ -906,8 +886,6 @@ goto repeat; } - block += part_table[dev].start_sect; - if (req->cmd == READ) { DEBUG(MTD_DEBUG_LEVEL2, "NFTL read request of 0x%x sectors @ %x " "(req->nr_sectors == %lx)\n", nsect, block, req->nr_sectors); @@ -953,7 +931,7 @@ } repeat: DEBUG(MTD_DEBUG_LEVEL3, "end_request(%d)\n", res); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&QUEUE->queue_lock); end_request(res); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/s390/block/dasd.c linux/drivers/s390/block/dasd.c --- /opt/kernel/linux-2.4.14-pre6/drivers/s390/block/dasd.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/s390/block/dasd.c Tue Oct 30 09:48:43 2001 @@ -726,13 +726,6 @@ goto out_hardsect_size; memset (hardsect_size[major], 0, (1 << MINORBITS) * sizeof (int)); - /* init max_sectors */ - max_sectors[major] = - (int *) kmalloc ((1 << MINORBITS) * sizeof (int), GFP_ATOMIC); - if (!max_sectors[major]) - goto out_max_sectors; - memset (max_sectors[major], 0, (1 << MINORBITS) * sizeof (int)); - /* finally do the gendisk stuff */ major_info->gendisk.part = kmalloc ((1 << MINORBITS) * sizeof (struct hd_struct), @@ -751,10 +744,6 @@ /* error handling - free the prior allocated memory */ out_gendisk: - kfree (max_sectors[major]); - max_sectors[major] = NULL; - - out_max_sectors: kfree (hardsect_size[major]); hardsect_size[major] = NULL; @@ -821,12 +810,8 @@ kfree (blk_size[major]); kfree (blksize_size[major]); kfree (hardsect_size[major]); - kfree (max_sectors[major]); - blk_size[major] = NULL; - blksize_size[major] = NULL; - hardsect_size[major] = NULL; - max_sectors[major] = NULL; + blk_clear(major); rc = devfs_unregister_blkdev (major, DASD_NAME); if (rc < 0) { @@ -1685,10 +1670,6 @@ dasd_end_request (req, 0); dasd_dequeue_request (queue,req); } else { - /* relocate request according to partition table */ - req->sector += - device->major_info->gendisk. - part[MINOR (req->rq_dev)].start_sect; cqr = device->discipline->build_cp_from_req (device, req); if (cqr == NULL) { @@ -1697,10 +1678,7 @@ "on request %p\n", device->devinfo.devno, req); - /* revert relocation of request */ - req->sector -= - device->major_info->gendisk. - part[MINOR (req->rq_dev)].start_sect; + break; /* terminate request queue loop */ } @@ -1750,10 +1728,10 @@ dasd_run_bh (dasd_device_t * device) { long flags; - spin_lock_irqsave (&io_request_lock, flags); + spin_lock_irqsave (&device->request_queue.queue_lock, flags); atomic_set (&device->bh_scheduled, 0); dasd_process_queues (device); - spin_unlock_irqrestore (&io_request_lock, flags); + spin_unlock_irqrestore (&device->request_queue.queue_lock, flags); } /* @@ -2461,14 +2439,12 @@ dasd_info.chanq_len = 0; if (device->request_queue->request_fn) { struct list_head *l; + request_queue_t *q = drive->request_queue; ccw_req_t *cqr = device->queue.head; - spin_lock_irqsave (&io_request_lock, flags); - list_for_each (l, - &device->request_queue-> - queue_head) { + spin_lock_irqsave (&q->queue_lock, flags); + list_for_each (l, q->queue_head, queue_head) dasd_info.req_queue_len++; - } - spin_unlock_irqrestore (&io_request_lock, + spin_unlock_irqrestore (&q->queue_lock, flags); s390irq_spin_lock_irqsave (device->devinfo.irq, flags); @@ -2658,15 +2634,14 @@ /* SECTION: Management of device list */ int -dasd_fillgeo(int kdev,struct hd_geometry *geo) +dasd_fillgeo(kdev_t kdev,struct hd_geometry *geo) { dasd_device_t *device = dasd_device_from_kdev (kdev); if (!device->discipline->fill_geometry) return -EINVAL; device->discipline->fill_geometry (device, geo); - geo->start = device->major_info->gendisk.part[MINOR(kdev)].start_sect - >> device->sizes.s2b_shift;; + geo->start = get_start_sect(kdev); return 0; } @@ -3349,6 +3324,12 @@ int major = MAJOR(device->kdev); int minor = MINOR(device->kdev); + device->request_queue = kmalloc(sizeof(request_queue_t),GFP_KERNEL); + device->request_queue->queuedata = device; + blk_init_queue (device->request_queue, do_dasd_request); + blk_queue_headactive (device->request_queue, 0); + elevator_init (&(device->request_queue->elevator),ELEVATOR_NOOP); + for (i = 0; i < (1 << DASD_PARTN_BITS); i++) { if (i == 0) device->major_info->gendisk.sizes[minor] = @@ -3358,17 +3339,11 @@ device->major_info->gendisk.sizes[minor + i] = 0; hardsect_size[major][minor + i] = device->sizes.bp_block; blksize_size[major][minor + i] = device->sizes.bp_block; - max_sectors[major][minor + i] = - device->discipline->max_blocks << - device->sizes.s2b_shift; + blk_queue_max_sectors(device->request_queue, + device->discipline->max_blocks << device->sizes.s2b_shift); device->major_info->gendisk.part[minor+i].start_sect = 0; device->major_info->gendisk.part[minor+i].nr_sects = 0; } - device->request_queue = kmalloc(sizeof(request_queue_t),GFP_KERNEL); - device->request_queue->queuedata = device; - blk_init_queue (device->request_queue, do_dasd_request); - blk_queue_headactive (device->request_queue, 0); - elevator_init (&(device->request_queue->elevator),ELEVATOR_NOOP); return rc; } @@ -3395,7 +3370,6 @@ device->major_info->gendisk.sizes[minor + i] = 0; hardsect_size[major][minor + i] = 0; blksize_size[major][minor + i] = 0; - max_sectors[major][minor + i] = 0; } if (device->request_queue) { blk_cleanup_queue (device->request_queue); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/s390/block/xpram.c linux/drivers/s390/block/xpram.c --- /opt/kernel/linux-2.4.14-pre6/drivers/s390/block/xpram.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/s390/block/xpram.c Tue Oct 30 09:48:43 2001 @@ -1213,8 +1213,7 @@ { int i; - /* first of all, flush it all and reset all the data structures */ - + /* first of all, flush it all and reset all the data structures */ for (i=0; irequest_queue; + + spin_lock_irqsave (&q->queue_lock, flags_ior); s390irq_spin_lock_irqsave(ti->devinfo.irq,flags_390irq); atomic_set(&ti->bh_scheduled,0); tapeblock_exec_IO(ti); s390irq_spin_unlock_irqrestore(ti->devinfo.irq,flags_390irq); - spin_unlock_irqrestore (&io_request_lock, flags_ior); + spin_unlock_irqrestore (&q->queue_lock, flags_ior); } void diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/Config.in linux/drivers/scsi/Config.in --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/Config.in Wed Oct 24 12:49:58 2001 +++ linux/drivers/scsi/Config.in Mon Oct 15 10:41:43 2001 @@ -20,10 +20,6 @@ comment 'Some SCSI devices (e.g. CD jukebox) support multiple LUNs' -#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then - bool ' Enable extra checks in new queueing code' CONFIG_SCSI_DEBUG_QUEUES -#fi - bool ' Probe all LUNs on each SCSI device' CONFIG_SCSI_MULTI_LUN bool ' Verbose SCSI error reporting (kernel size +=12K)' CONFIG_SCSI_CONSTANTS diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/aic7xxx/aic7xxx_linux.c linux/drivers/scsi/aic7xxx/aic7xxx_linux.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/aic7xxx/aic7xxx_linux.c Wed Oct 31 09:39:12 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_linux.c Tue Oct 30 09:48:43 2001 @@ -1122,9 +1122,9 @@ if (host == NULL) return (ENOMEM); - ahc_lock(ahc, &s); *((struct ahc_softc **)host->hostdata) = ahc; ahc->platform_data->host = host; + ahc_lock(ahc, &s); host->can_queue = AHC_MAX_QUEUE; host->cmd_per_lun = 2; host->sg_tablesize = AHC_NSEG; @@ -1271,7 +1271,9 @@ TAILQ_INIT(&ahc->platform_data->completeq); TAILQ_INIT(&ahc->platform_data->device_runq); ahc->platform_data->hw_dma_mask = 0xFFFFFFFF; - ahc_lockinit(ahc); + /* + * ahc_lockinit done by scsi_register, as we don't own that lock + */ ahc_done_lockinit(ahc); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,0) init_MUTEX_LOCKED(&ahc->platform_data->eh_sem); @@ -1529,22 +1531,17 @@ int ahc_linux_queue(Scsi_Cmnd * cmd, void (*scsi_done) (Scsi_Cmnd *)) { - struct ahc_softc *ahc; + struct ahc_softc *ahc = *(struct ahc_softc **)cmd->host->hostdata; struct ahc_linux_device *dev; - u_long flags; - - ahc = *(struct ahc_softc **)cmd->host->hostdata; /* * Save the callback on completion function. */ cmd->scsi_done = scsi_done; - ahc_lock(ahc, &flags); dev = ahc_linux_get_device(ahc, cmd->channel, cmd->target, cmd->lun, /*alloc*/TRUE); if (dev == NULL) { - ahc_unlock(ahc, &flags); printf("aic7xxx_linux_queue: Unable to allocate device!\n"); return (-ENOMEM); } @@ -1555,7 +1552,6 @@ dev->flags |= AHC_DEV_ON_RUN_LIST; ahc_linux_run_device_queues(ahc); } - ahc_unlock(ahc, &flags); return (0); } @@ -2407,12 +2403,10 @@ flag == SCB_ABORT ? "n ABORT" : " TARGET RESET"); /* - * It is a bug that the upper layer takes - * this lock just prior to calling us. + * we used to drop io_request_lock and lock ahc from here, but + * now that the global lock is gone the upper layer have already + * done what ahc_lock would do /jens */ - spin_unlock_irq(&io_request_lock); - - ahc_lock(ahc, &s); /* * First determine if we currently own this command. @@ -2660,7 +2654,7 @@ ahc_unlock(ahc, &s); if (acmd != NULL) ahc_linux_run_complete_queue(ahc, acmd); - spin_lock_irq(&io_request_lock); + ahc_lock(ahc, &s); return (retval); } @@ -2703,14 +2697,7 @@ u_long s; int found; - /* - * It is a bug that the upper layer takes - * this lock just prior to calling us. - */ - spin_unlock_irq(&io_request_lock); - ahc = *(struct ahc_softc **)cmd->host->hostdata; - ahc_lock(ahc, &s); found = ahc_reset_channel(ahc, cmd->channel + 'A', /*initiate reset*/TRUE); acmd = TAILQ_FIRST(&ahc->platform_data->completeq); @@ -2723,7 +2710,7 @@ if (acmd != NULL) ahc_linux_run_complete_queue(ahc, acmd); - spin_lock_irq(&io_request_lock); + ahc_lock(ahc, &s); return SUCCESS; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/aic7xxx/aic7xxx_linux_host.h linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Wed Oct 31 09:39:12 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_linux_host.h Tue Oct 30 09:48:43 2001 @@ -89,7 +89,8 @@ present: 0, /* number of 7xxx's present */\ unchecked_isa_dma: 0, /* no memory DMA restrictions */\ use_clustering: ENABLE_CLUSTERING, \ - use_new_eh_code: 1 \ + use_new_eh_code: 1, \ + can_dma_32: 1 \ } #endif /* _AIC7XXX_LINUX_HOST_H_ */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/aic7xxx/aic7xxx_osm.h linux/drivers/scsi/aic7xxx/aic7xxx_osm.h --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/aic7xxx/aic7xxx_osm.h Wed Oct 31 09:39:12 2001 +++ linux/drivers/scsi/aic7xxx/aic7xxx_osm.h Thu Nov 1 11:38:39 2001 @@ -575,9 +575,6 @@ TAILQ_HEAD(, ahc_linux_device) device_runq; struct ahc_completeq completeq; -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,1,0) - spinlock_t spin_lock; -#endif u_int qfrozen; struct timer_list reset_timer; struct semaphore eh_sem; @@ -716,20 +713,20 @@ static __inline void ahc_lockinit(struct ahc_softc *ahc) { - spin_lock_init(&ahc->platform_data->spin_lock); + spin_lock_init(&ahc->platform_data->host->host_lock); } static __inline void ahc_lock(struct ahc_softc *ahc, unsigned long *flags) { *flags = 0; - spin_lock_irqsave(&ahc->platform_data->spin_lock, *flags); + spin_lock_irqsave(&ahc->platform_data->host->host_lock, *flags); } static __inline void ahc_unlock(struct ahc_softc *ahc, unsigned long *flags) { - spin_unlock_irqrestore(&ahc->platform_data->spin_lock, *flags); + spin_unlock_irqrestore(&ahc->platform_data->host->host_lock, *flags); } static __inline void @@ -741,14 +738,18 @@ static __inline void ahc_done_lock(struct ahc_softc *ahc, unsigned long *flags) { + struct Scsi_Host *host = ahc->platform_data->host; + *flags = 0; - spin_lock_irqsave(&io_request_lock, *flags); + spin_lock_irqsave(&host->host_lock, *flags); } static __inline void ahc_done_unlock(struct ahc_softc *ahc, unsigned long *flags) { - spin_unlock_irqrestore(&io_request_lock, *flags); + struct Scsi_Host *host = ahc->platform_data->host; + + spin_unlock_irqrestore(&host->host_lock, *flags); } #else /* LINUX_VERSION_CODE < KERNEL_VERSION(2,1,0) */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/aic7xxx_old.c linux/drivers/scsi/aic7xxx_old.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/aic7xxx_old.c Wed Oct 24 12:49:58 2001 +++ linux/drivers/scsi/aic7xxx_old.c Mon Oct 15 10:41:43 2001 @@ -4126,7 +4126,7 @@ unsigned long cpu_flags = 0; struct aic7xxx_scb *scb; - spin_lock_irqsave(&io_request_lock, cpu_flags); + spin_lock_irqsave(&p->host->host_lock, cpu_flags); p->dev_timer_active &= ~(0x01 << MAX_TARGETS); if ( (p->dev_timer_active & (0x01 << p->scsi_id)) && time_after_eq(jiffies, p->dev_expires[p->scsi_id]) ) @@ -4183,7 +4183,7 @@ } aic7xxx_run_waiting_queues(p); - spin_unlock_irqrestore(&io_request_lock, cpu_flags); + spin_unlock_irqrestore(&p->host->host_lock, cpu_flags); } /*+F************************************************************************* @@ -7010,7 +7010,7 @@ p = (struct aic7xxx_host *)dev_id; if(!p) return; - spin_lock_irqsave(&io_request_lock, cpu_flags); + spin_lock_irqsave(&p->host->host_lock, cpu_flags); p->flags |= AHC_IN_ISR; do { @@ -7019,7 +7019,7 @@ aic7xxx_done_cmds_complete(p); aic7xxx_run_waiting_queues(p); p->flags &= ~AHC_IN_ISR; - spin_unlock_irqrestore(&io_request_lock, cpu_flags); + spin_unlock_irqrestore(&p->host->host_lock, cpu_flags); } /*+F************************************************************************* @@ -11147,7 +11147,7 @@ disable_irq(p->irq); aic7xxx_print_card(p); aic7xxx_print_scratch_ram(p); - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&p->host->host_lock); for(;;) barrier(); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/hosts.c linux/drivers/scsi/hosts.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/hosts.c Thu Jul 5 20:28:17 2001 +++ linux/drivers/scsi/hosts.c Mon Oct 15 10:41:43 2001 @@ -160,6 +160,7 @@ break; } } + spin_lock_init(&retval->host_lock); atomic_set(&retval->host_active,0); retval->host_busy = 0; retval->host_failed = 0; @@ -235,6 +236,7 @@ retval->cmd_per_lun = tpnt->cmd_per_lun; retval->unchecked_isa_dma = tpnt->unchecked_isa_dma; retval->use_clustering = tpnt->use_clustering; + retval->can_dma_32 = tpnt->can_dma_32; retval->select_queue_depths = tpnt->select_queue_depths; retval->max_sectors = tpnt->max_sectors; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/hosts.h linux/drivers/scsi/hosts.h --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/hosts.h Thu Oct 11 08:47:24 2001 +++ linux/drivers/scsi/hosts.h Thu Nov 1 11:21:14 2001 @@ -291,6 +291,8 @@ */ unsigned emulated:1; + unsigned can_dma_32:1; + /* * Name of proc directory */ @@ -317,6 +319,7 @@ struct Scsi_Host * next; Scsi_Device * host_queue; + spinlock_t host_lock; struct task_struct * ehandler; /* Error recovery thread. */ struct semaphore * eh_wait; /* The error recovery thread waits on @@ -390,6 +393,7 @@ unsigned in_recovery:1; unsigned unchecked_isa_dma:1; unsigned use_clustering:1; + unsigned can_dma_32:1; /* * True if this host was loaded as a loadable module */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/ide-scsi.c linux/drivers/scsi/ide-scsi.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/ide-scsi.c Sun Sep 30 21:26:07 2001 +++ linux/drivers/scsi/ide-scsi.c Mon Oct 15 10:41:43 2001 @@ -235,13 +235,13 @@ kfree(atapi_buf); } -static inline void idescsi_free_bh (struct buffer_head *bh) +static inline void idescsi_free_bio (struct bio *bio) { - struct buffer_head *bhp; + struct bio *bhp; - while (bh) { - bhp = bh; - bh = bh->b_reqnext; + while (bio) { + bhp = bio; + bio = bio->bi_next; kfree (bhp); } } @@ -263,6 +263,7 @@ struct request *rq = hwgroup->rq; idescsi_pc_t *pc = (idescsi_pc_t *) rq->buffer; int log = test_bit(IDESCSI_LOG_CMD, &scsi->log); + struct Scsi_Host *host; u8 *scsi_buf; unsigned long flags; @@ -291,10 +292,11 @@ } else printk("\n"); } } - spin_lock_irqsave(&io_request_lock,flags); + host = pc->scsi_cmd->host; + spin_lock_irqsave(&host->host_lock, flags); pc->done(pc->scsi_cmd); - spin_unlock_irqrestore(&io_request_lock,flags); - idescsi_free_bh (rq->bh); + spin_unlock_irqrestore(&host->host_lock, flags); + idescsi_free_bio (rq->bio); kfree(pc); kfree(rq); scsi->pc = NULL; } @@ -427,7 +429,7 @@ pc->current_position=pc->buffer; bcount = IDE_MIN (pc->request_transfer, 63 * 1024); /* Request to transfer the entire buffer at once */ - if (drive->using_dma && rq->bh) + if (drive->using_dma && rq->special) dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive); SELECT_DRIVE(HWIF(drive), drive); @@ -653,25 +655,24 @@ return -EINVAL; } -static inline struct buffer_head *idescsi_kmalloc_bh (int count) +static inline struct bio *idescsi_kmalloc_bio (int count) { - struct buffer_head *bh, *bhp, *first_bh; + struct bio *bh, *bhp, *first_bh; - if ((first_bh = bhp = bh = kmalloc (sizeof(struct buffer_head), GFP_ATOMIC)) == NULL) + if ((first_bh = bhp = bh = bio_alloc(GFP_ATOMIC)) == NULL) goto abort; - memset (bh, 0, sizeof (struct buffer_head)); - bh->b_reqnext = NULL; + memset (bh, 0, sizeof (struct bio)); while (--count) { - if ((bh = kmalloc (sizeof(struct buffer_head), GFP_ATOMIC)) == NULL) + if ((bh = bio_alloc(GFP_ATOMIC)) == NULL) goto abort; - memset (bh, 0, sizeof (struct buffer_head)); - bhp->b_reqnext = bh; + memset (bh, 0, sizeof (struct bio)); + bhp->bi_next = bh; bhp = bh; - bh->b_reqnext = NULL; + bh->bi_next = NULL; } return first_bh; abort: - idescsi_free_bh (first_bh); + idescsi_free_bio (first_bh); return NULL; } @@ -689,9 +690,9 @@ } } -static inline struct buffer_head *idescsi_dma_bh (ide_drive_t *drive, idescsi_pc_t *pc) +static inline struct bio *idescsi_dma_bio(ide_drive_t *drive, idescsi_pc_t *pc) { - struct buffer_head *bh = NULL, *first_bh = NULL; + struct bio *bh = NULL, *first_bh = NULL; int segments = pc->scsi_cmd->use_sg; struct scatterlist *sg = pc->scsi_cmd->request_buffer; @@ -700,25 +701,27 @@ if (idescsi_set_direction(pc)) return NULL; if (segments) { - if ((first_bh = bh = idescsi_kmalloc_bh (segments)) == NULL) + if ((first_bh = bh = idescsi_kmalloc_bio (segments)) == NULL) return NULL; #if IDESCSI_DEBUG_LOG printk ("ide-scsi: %s: building DMA table, %d segments, %dkB total\n", drive->name, segments, pc->request_transfer >> 10); #endif /* IDESCSI_DEBUG_LOG */ while (segments--) { - bh->b_data = sg->address; - bh->b_size = sg->length; - bh = bh->b_reqnext; + bh->bi_io_vec.bv_page = sg->page; + bh->bi_io_vec.bv_len = sg->length; + bh->bi_io_vec.bv_offset = sg->offset; + bh = bh->bi_next; sg++; } } else { - if ((first_bh = bh = idescsi_kmalloc_bh (1)) == NULL) + if ((first_bh = bh = idescsi_kmalloc_bio (1)) == NULL) return NULL; #if IDESCSI_DEBUG_LOG printk ("ide-scsi: %s: building DMA table for a single buffer (%dkB)\n", drive->name, pc->request_transfer >> 10); #endif /* IDESCSI_DEBUG_LOG */ - bh->b_data = pc->scsi_cmd->request_buffer; - bh->b_size = pc->request_transfer; + bh->bi_io_vec.bv_page = virt_to_page(pc->scsi_cmd->request_buffer); + bh->bi_io_vec.bv_len = pc->request_transfer; + bh->bi_io_vec.bv_offset = (unsigned long) pc->scsi_cmd->request_buffer & ~PAGE_MASK; } return first_bh; } @@ -783,11 +786,11 @@ ide_init_drive_cmd (rq); rq->buffer = (char *) pc; - rq->bh = idescsi_dma_bh (drive, pc); + rq->bio = idescsi_dma_bio (drive, pc); rq->cmd = IDESCSI_PC_RQ; - spin_unlock(&io_request_lock); + spin_unlock(&cmd->host->host_lock); (void) ide_do_drive_cmd (drive, rq, ide_end); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&cmd->host->host_lock); return 0; abort: if (pc) kfree (pc); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/megaraid.c linux/drivers/scsi/megaraid.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/megaraid.c Wed Oct 31 09:39:13 2001 +++ linux/drivers/scsi/megaraid.c Tue Oct 30 09:48:43 2001 @@ -586,8 +586,10 @@ #define DRIVER_LOCK(p) #define DRIVER_UNLOCK(p) #define IO_LOCK_T unsigned long io_flags = 0 -#define IO_LOCK spin_lock_irqsave(&io_request_lock,io_flags); -#define IO_UNLOCK spin_unlock_irqrestore(&io_request_lock,io_flags); +#define IO_LOCK(host) spin_lock_irqsave(&(host)->host_lock,io_flags) +#define IO_UNLOCK(host) spin_unlock_irqrestore(&(host)->host_lock,io_flags) +#define IO_LOCK_IRQ(host) spin_lock_irq(&(host)->host_lock) +#define IO_UNLOCK_IRQ(host) spin_unlock_irq(&(host)->host_lock) #define queue_task_irq(a,b) queue_task(a,b) #define queue_task_irq_off(a,b) queue_task(a,b) @@ -612,8 +614,8 @@ #define DRIVER_LOCK(p) #define DRIVER_UNLOCK(p) #define IO_LOCK_T unsigned long io_flags = 0 -#define IO_LOCK spin_lock_irqsave(&io_request_lock,io_flags); -#define IO_UNLOCK spin_unlock_irqrestore(&io_request_lock,io_flags); +#define IO_LOCK(host) spin_lock_irqsave(&io_request_lock,io_flags); +#define IO_UNLOCK(host) spin_unlock_irqrestore(&io_request_lock,io_flags); #define pci_free_consistent(a,b,c,d) #define pci_unmap_single(a,b,c,d) @@ -2101,7 +2103,7 @@ for (idx = 0; idx < MAX_FIRMWARE_STATUS; idx++) completed[idx] = 0; - IO_LOCK; + IO_LOCK(megaCfg->host); megaCfg->nInterrupts++; qCnt = 0xff; @@ -2220,7 +2222,7 @@ megaCfg->flag &= ~IN_ISR; /* Loop through any pending requests */ mega_runpendq (megaCfg); - IO_UNLOCK; + IO_UNLOCK(megaCfg->host); } @@ -3032,9 +3034,7 @@ sizeof (mega_mailbox64), &(megaCfg->dma_handle64)); - mega_register_mailbox (megaCfg, - virt_to_bus ((void *) megaCfg-> - mailbox64ptr)); + mega_register_mailbox (megaCfg, megaCfg->dma_handle64); #else mega_register_mailbox (megaCfg, virt_to_bus ((void *) &megaCfg-> @@ -3800,7 +3800,7 @@ if (pScb->SCpnt->cmnd[0] == M_RD_IOCTL_CMD_NEW) { init_MUTEX_LOCKED (&pScb->ioctl_sem); - spin_unlock_irq (&io_request_lock); + IO_UNLOCK_IRQ(megaCfg->host); down (&pScb->ioctl_sem); user_area = (char *)*((u32*)&pScb->SCpnt->cmnd[4]); if (copy_to_user @@ -3809,7 +3809,7 @@ ("megaraid: Error copying ioctl return value to user buffer.\n"); pScb->SCpnt->result = (DID_ERROR << 16); } - spin_lock_irq (&io_request_lock); + IO_LOCK_IRQ(megaCfg->host); DRIVER_LOCK (megaCfg); kfree (pScb->buff_ptr); pScb->buff_ptr = NULL; @@ -4744,10 +4744,10 @@ init_MUTEX_LOCKED(&mimd_ioctl_sem); - IO_LOCK; + IO_LOCK(shpnt); megaraid_queue(scsicmd, megadev_ioctl_done); - IO_UNLOCK; + IO_UNLOCK(shpnt); down(&mimd_ioctl_sem); @@ -4893,10 +4893,10 @@ init_MUTEX_LOCKED (&mimd_ioctl_sem); - IO_LOCK; + IO_LOCK(shpnt); megaraid_queue (scsicmd, megadev_ioctl_done); - IO_UNLOCK; + IO_UNLOCK(shpnt); down (&mimd_ioctl_sem); if (!scsicmd->result && outlen) { diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/megaraid.h linux/drivers/scsi/megaraid.h --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/megaraid.h Wed Oct 31 09:39:13 2001 +++ linux/drivers/scsi/megaraid.h Tue Oct 30 09:48:43 2001 @@ -223,7 +223,8 @@ cmd_per_lun: MAX_CMD_PER_LUN, /* SCSI Commands per LUN */\ present: 0, /* Present */\ unchecked_isa_dma: 0, /* Default Unchecked ISA DMA */\ - use_clustering: ENABLE_CLUSTERING /* Enable Clustering */\ + use_clustering: ENABLE_CLUSTERING, /* Enable Clustering */\ + can_dma_32: 1, \ } #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/qlogicfc.h linux/drivers/scsi/qlogicfc.h --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/qlogicfc.h Wed Oct 24 12:49:58 2001 +++ linux/drivers/scsi/qlogicfc.h Mon Oct 22 11:14:23 2001 @@ -95,7 +95,8 @@ cmd_per_lun: QLOGICFC_CMD_PER_LUN, \ present: 0, \ unchecked_isa_dma: 0, \ - use_clustering: ENABLE_CLUSTERING \ + use_clustering: ENABLE_CLUSTERING, \ + can_dma_32: 1 \ } #endif /* _QLOGICFC_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi.c linux/drivers/scsi/scsi.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi.c Wed Oct 24 12:49:58 2001 +++ linux/drivers/scsi/scsi.c Thu Oct 18 09:45:13 2001 @@ -178,10 +178,17 @@ * handler in the list - ultimately they call scsi_request_fn * to do the dirty deed. */ -void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) { - blk_init_queue(&SDpnt->request_queue, scsi_request_fn); - blk_queue_headactive(&SDpnt->request_queue, 0); - SDpnt->request_queue.queuedata = (void *) SDpnt; +void scsi_initialize_queue(Scsi_Device * SDpnt, struct Scsi_Host * SHpnt) +{ + char name[16]; + + request_queue_t *q = &SDpnt->request_queue; + + sprintf(name, "scsi%d%d%d", SDpnt->id, SDpnt->lun, SDpnt->channel); + blk_init_queue(q, scsi_request_fn, name); + blk_queue_headactive(q, 0); + q->queuedata = (void *) SDpnt; + q->max_segments = SHpnt->sg_tablesize; } #ifdef MODULE @@ -219,9 +226,7 @@ req = &SCpnt->request; req->rq_status = RQ_SCSI_DONE; /* Busy, but indicate request done */ - if (req->waiting != NULL) { - complete(req->waiting); - } + complete(req->waiting); } /* @@ -612,8 +617,6 @@ unsigned long flags = 0; unsigned long timeout; - ASSERT_LOCK(&io_request_lock, 0); - #if DEBUG unsigned long *ret = 0; #ifdef __mips__ @@ -625,6 +628,8 @@ host = SCpnt->host; + ASSERT_LOCK(&host->host_lock, 0); + /* Assign a unique nonzero serial_number. */ if (++serial_number == 0) serial_number = 1; @@ -679,41 +684,41 @@ * passes a meaningful return value. */ if (host->hostt->use_new_eh_code) { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); rtn = host->hostt->queuecommand(SCpnt, scsi_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); if (rtn != 0) { scsi_delete_timer(SCpnt); scsi_mlqueue_insert(SCpnt, SCSI_MLQUEUE_HOST_BUSY); SCSI_LOG_MLQUEUE(3, printk("queuecommand : request rejected\n")); } } else { - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); host->hostt->queuecommand(SCpnt, scsi_old_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } } else { int temp; SCSI_LOG_MLQUEUE(3, printk("command() : routine at %p\n", host->hostt->command)); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); temp = host->hostt->command(SCpnt); SCpnt->result = temp; #ifdef DEBUG_DELAY - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); clock = jiffies + 4 * HZ; while (time_before(jiffies, clock)) barrier(); printk("done(host = %d, result = %04x) : routine at %p\n", host->host_no, temp, host->hostt->command); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); #endif if (host->hostt->use_new_eh_code) { scsi_done(SCpnt); } else { scsi_old_done(SCpnt); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } SCSI_LOG_MLQUEUE(3, printk("leaving scsi_dispatch_cmnd()\n")); return rtn; @@ -781,7 +786,7 @@ Scsi_Device * SDpnt = SRpnt->sr_device; struct Scsi_Host *host = SDpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCSI_LOG_MLQUEUE(4, { @@ -877,7 +882,7 @@ { struct Scsi_Host *host = SCpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCpnt->owner = SCSI_OWNER_MIDLEVEL; SRpnt->sr_command = SCpnt; @@ -967,7 +972,7 @@ { struct Scsi_Host *host = SCpnt->host; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCpnt->pid = scsi_pid++; SCpnt->owner = SCSI_OWNER_MIDLEVEL; @@ -1317,11 +1322,11 @@ Scsi_Request * SRpnt; unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); - host = SCpnt->host; device = SCpnt->device; + ASSERT_LOCK(&host->host_lock, 0); + /* * We need to protect the decrement, as otherwise a race condition * would exist. Fiddling with SCpnt isn't a problem as the @@ -1329,10 +1334,10 @@ * one execution context, but the device and host structures are * shared. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); host->host_busy--; /* Indicate that we are free */ device->device_busy--; /* Decrement device usage counter. */ - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); /* * Clear the flags which say that the device/host is no longer @@ -1823,7 +1828,6 @@ Scsi_Device *SDpnt; struct Scsi_Device_Template *sdtpnt; const char *name; - unsigned long flags; int out_of_space = 0; if (tpnt->next || !tpnt->detect) @@ -1833,7 +1837,7 @@ /* If max_sectors isn't set, default to max */ if (!tpnt->max_sectors) - tpnt->max_sectors = MAX_SECTORS; + tpnt->max_sectors = 1024; pcount = next_scsi_host; @@ -1847,10 +1851,11 @@ using the new scsi code. NOTE: the detect routine could redefine the value tpnt->use_new_eh_code. (DB, 13 May 1998) */ + /* + * detect should do its own locking + */ if (tpnt->use_new_eh_code) { - spin_lock_irqsave(&io_request_lock, flags); tpnt->present = tpnt->detect(tpnt); - spin_unlock_irqrestore(&io_request_lock, flags); } else tpnt->present = tpnt->detect(tpnt); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi.h linux/drivers/scsi/scsi.h --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi.h Wed Oct 24 12:49:58 2001 +++ linux/drivers/scsi/scsi.h Thu Nov 1 11:21:14 2001 @@ -386,15 +386,6 @@ #define ASKED_FOR_SENSE 0x20 #define SYNC_RESET 0x40 -#if defined(__mc68000__) || defined(CONFIG_APUS) -#include -#define CONTIGUOUS_BUFFERS(X,Y) \ - (virt_to_phys((X)->b_data+(X)->b_size-1)+1==virt_to_phys((Y)->b_data)) -#else -#define CONTIGUOUS_BUFFERS(X,Y) ((X->b_data+X->b_size) == Y->b_data) -#endif - - /* * This is the crap from the old error handling code. We have it in a special * place so that we can more easily delete it later on. diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi_error.c linux/drivers/scsi/scsi_error.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi_error.c Sun Sep 9 19:52:35 2001 +++ linux/drivers/scsi/scsi_error.c Mon Oct 15 10:41:43 2001 @@ -423,8 +423,6 @@ unsigned char scsi_result0[256], *scsi_result = NULL; int saved_result; - ASSERT_LOCK(&io_request_lock, 0); - memcpy((void *) SCpnt->cmnd, (void *) generic_sense, sizeof(generic_sense)); @@ -583,16 +581,14 @@ STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout) { unsigned long flags; - struct Scsi_Host *host; - - ASSERT_LOCK(&io_request_lock, 0); + struct Scsi_Host *host = SCpnt->host; - host = SCpnt->host; + ASSERT_LOCK(&host->host_lock, 0); - retry: +retry: /* - * We will use a queued command if possible, otherwise we will emulate the - * queuing and calling of completion function ourselves. + * We will use a queued command if possible, otherwise we will + * emulate the queuing and calling of completion function ourselves. */ SCpnt->owner = SCSI_OWNER_LOWLEVEL; @@ -609,9 +605,9 @@ SCpnt->host->eh_action = &sem; SCpnt->request.rq_status = RQ_SCSI_BUSY; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); host->hostt->queuecommand(SCpnt, scsi_eh_done); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); down(&sem); @@ -634,10 +630,10 @@ * abort a timed out command or not. Not sure how * we should treat them differently anyways. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); if (SCpnt->host->hostt->eh_abort_handler) SCpnt->host->hostt->eh_abort_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); SCpnt->request.rq_status = RQ_SCSI_DONE; SCpnt->owner = SCSI_OWNER_ERROR_HANDLER; @@ -650,13 +646,13 @@ int temp; /* - * We damn well had better never use this code. There is no timeout - * protection here, since we would end up waiting in the actual low - * level driver, we don't know how to wake it up. + * We damn well had better never use this code. There is no + * timeout protection here, since we would end up waiting in + * the actual low level driver, we don't know how to wake it up. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); temp = host->hostt->command(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); SCpnt->result = temp; /* Fall through to code below to examine status. */ @@ -664,8 +660,8 @@ } /* - * Now examine the actual status codes to see whether the command actually - * did complete normally. + * Now examine the actual status codes to see whether the command + * actually did complete normally. */ if (SCpnt->eh_state == SUCCESS) { int ret = scsi_eh_completed_normally(SCpnt); @@ -776,9 +772,9 @@ SCpnt->owner = SCSI_OWNER_LOWLEVEL; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); return rtn; } @@ -808,9 +804,9 @@ } SCpnt->owner = SCSI_OWNER_LOWLEVEL; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -841,9 +837,9 @@ return FAILED; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -887,9 +883,9 @@ if (SCpnt->host->hostt->eh_host_reset_handler == NULL) { return FAILED; } - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&SCpnt->host->host_lock, flags); rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&SCpnt->host->host_lock, flags); if (rtn == SUCCESS) SCpnt->eh_state = SUCCESS; @@ -1230,7 +1226,7 @@ Scsi_Device *SDpnt; unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); /* * Next free up anything directly waiting upon the host. This will be @@ -1247,19 +1243,22 @@ * now that error recovery is done, we will need to ensure that these * requests are started. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) { - request_queue_t *q; + request_queue_t *q = &SDpnt->request_queue; + if ((host->can_queue > 0 && (host->host_busy >= host->can_queue)) || (host->host_blocked) || (host->host_self_blocked) || (SDpnt->device_blocked)) { break; } - q = &SDpnt->request_queue; + + spin_lock(&q->queue_lock); q->request_fn(q); + spin_unlock(&q->queue_lock); } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } /* @@ -1306,7 +1305,7 @@ Scsi_Cmnd *SCdone; int timed_out; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&host->host_lock, 0); SCdone = NULL; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi_lib.c Wed Oct 24 12:49:58 2001 +++ linux/drivers/scsi/scsi_lib.c Tue Oct 16 11:55:42 2001 @@ -61,7 +61,7 @@ * data - private data * at_head - insert request at head or tail of queue * - * Lock status: Assumed that io_request_lock is not held upon entry. + * Lock status: Assumed that queue lock is not held upon entry. * * Returns: Nothing */ @@ -70,11 +70,12 @@ { unsigned long flags; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); rq->cmd = SPECIAL; rq->special = data; rq->q = NULL; + rq->bio = rq->biotail = NULL; rq->nr_segments = 0; rq->elevator_sequence = 0; @@ -84,15 +85,15 @@ * head of the queue for things like a QUEUE_FULL message from a * device, or a host that is unable to accept a particular command. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); if (at_head) - list_add(&rq->queue, &q->queue_head); + list_add(&rq->queuelist, &q->queue_head); else - list_add_tail(&rq->queue, &q->queue_head); + list_add_tail(&rq->queuelist, &q->queue_head); q->request_fn(q); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } @@ -167,8 +168,6 @@ */ int scsi_init_cmd_errh(Scsi_Cmnd * SCpnt) { - ASSERT_LOCK(&io_request_lock, 0); - SCpnt->owner = SCSI_OWNER_MIDLEVEL; SCpnt->reset_chain = NULL; SCpnt->serial_number = 0; @@ -250,9 +249,9 @@ Scsi_Device *SDpnt; struct Scsi_Host *SHpnt; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); if (SCpnt != NULL) { /* @@ -262,7 +261,7 @@ * the bad sector. */ SCpnt->request.special = (void *) SCpnt; - list_add(&SCpnt->request.queue, &q->queue_head); + list_add(&SCpnt->request.queuelist, &q->queue_head); } /* @@ -280,14 +279,10 @@ * with special case code, then spin off separate versions and * use function pointers to pick the right one. */ - if (SDpnt->single_lun - && list_empty(&q->queue_head) - && SDpnt->device_busy == 0) { + if (SDpnt->single_lun && list_empty(&q->queue_head) && SDpnt->device_busy == 0) { request_queue_t *q; - for (SDpnt = SHpnt->host_queue; - SDpnt; - SDpnt = SDpnt->next) { + for (SDpnt = SHpnt->host_queue; SDpnt; SDpnt = SDpnt->next) { if (((SHpnt->can_queue > 0) && (SHpnt->host_busy >= SHpnt->can_queue)) || (SHpnt->host_blocked) @@ -295,6 +290,7 @@ || (SDpnt->device_blocked)) { break; } + q = &SDpnt->request_queue; q->request_fn(q); } @@ -328,7 +324,7 @@ SHpnt->some_device_starved = 0; } } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } /* @@ -360,57 +356,30 @@ int requeue, int frequeue) { + request_queue_t *q = &SCpnt->device->request_queue; struct request *req; - struct buffer_head *bh; - Scsi_Device * SDpnt; - int nsect; - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); req = &SCpnt->request; - req->errors = 0; - if (!uptodate) { - printk(" I/O error: dev %s, sector %lu\n", - kdevname(req->rq_dev), req->sector); - } do { - if ((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; - blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - sectors -= nsect; - bh->b_end_io(bh, uptodate); - if ((bh = req->bh) != NULL) { - req->hard_sector += nsect; - req->hard_nr_sectors -= nsect; - req->sector += nsect; - req->nr_sectors -= nsect; - - req->current_nr_sectors = bh->b_size >> 9; - if (req->nr_sectors < req->current_nr_sectors) { - req->nr_sectors = req->current_nr_sectors; - printk("scsi_end_request: buffer-list destroyed\n"); - } - } + if (!req->bio) { + printk("scsi_end_request: missing bio\n"); + break; } - } while (sectors && bh); + sectors -= bio_sectors(req->bio); + if (!end_that_request_first(req, 1)) + break; + } while (sectors > 0); /* * If there are blocks left over at the end, set up the command * to queue the remainder of them. */ - if (req->bh) { - request_queue_t *q; - - if( !requeue ) - { + if (req->bio) { + if (!requeue) return SCpnt; - } - - q = &SCpnt->device->request_queue; - req->buffer = bh->b_data; /* * Bleah. Leftovers again. Stick the leftovers in * the front of the queue, and goose the queue again. @@ -418,17 +387,14 @@ scsi_queue_next_request(q, SCpnt); return SCpnt; } + /* * This request is done. If there is someone blocked waiting for this - * request, wake them up. Typically used to wake up processes trying - * to swap a page into memory. + * request, wake them up. */ - if (req->waiting != NULL) { - complete(req->waiting); - } - add_blkdev_randomness(MAJOR(req->rq_dev)); + complete(req->waiting); - SDpnt = SCpnt->device; + add_blkdev_randomness(MAJOR(req->rq_dev)); /* * This will goose the queue request function at the end, so we don't @@ -436,12 +402,9 @@ */ __scsi_release_command(SCpnt); - if( frequeue ) { - request_queue_t *q; + if (frequeue) + scsi_queue_next_request(q, NULL); - q = &SDpnt->request_queue; - scsi_queue_next_request(q, NULL); - } return NULL; } @@ -489,7 +452,9 @@ */ static void scsi_release_buffers(Scsi_Cmnd * SCpnt) { - ASSERT_LOCK(&io_request_lock, 0); + struct request *req = &SCpnt->request; + + ASSERT_LOCK(&SCpnt->device->request_queue.queue_lock, 0); /* * Free up any indirection buffers we allocated for DMA purposes. @@ -510,9 +475,8 @@ } scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); } else { - if (SCpnt->request_buffer != SCpnt->request.buffer) { - scsi_free(SCpnt->request_buffer, SCpnt->request_bufflen); - } + if (SCpnt->request_buffer != req->buffer) + scsi_free(SCpnt->request_buffer,SCpnt->request_bufflen); } /* @@ -548,6 +512,7 @@ int result = SCpnt->result; int this_count = SCpnt->bufflen >> 9; request_queue_t *q = &SCpnt->device->request_queue; + struct request *req = &SCpnt->request; /* * We must do one of several things here: @@ -562,7 +527,7 @@ * would be used if we just wanted to retry, for example. * */ - ASSERT_LOCK(&io_request_lock, 0); + ASSERT_LOCK(&q->queue_lock, 0); /* * Free up any indirection buffers we allocated for DMA purposes. @@ -591,10 +556,13 @@ } scsi_free(SCpnt->buffer, SCpnt->sglist_len); } else { - if (SCpnt->buffer != SCpnt->request.buffer) { - if (SCpnt->request.cmd == READ) { - memcpy(SCpnt->request.buffer, SCpnt->buffer, - SCpnt->bufflen); + if (SCpnt->buffer != req->buffer) { + if (req->cmd == READ) { + unsigned long flags; + char *to = bio_kmap_irq(req->bio, &flags); + + memcpy(to, SCpnt->buffer, SCpnt->bufflen); + bio_kunmap_irq(to, &flags); } scsi_free(SCpnt->buffer, SCpnt->bufflen); } @@ -615,11 +583,10 @@ */ if (good_sectors > 0) { SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d sectors done.\n", - SCpnt->request.nr_sectors, - good_sectors)); + req->nr_sectors good_sectors)); SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n ", SCpnt->use_sg)); - SCpnt->request.errors = 0; + req->errors = 0; /* * If multiple sectors are requested in one buffer, then * they will have been finished off by the first command. @@ -716,7 +683,7 @@ break; case NOT_READY: printk(KERN_INFO "Device %s not ready.\n", - kdevname(SCpnt->request.rq_dev)); + kdevname(req->rq_dev)); SCpnt = scsi_end_request(SCpnt, 0, this_count); return; break; @@ -760,7 +727,7 @@ * We sometimes get this cruft in the event that a medium error * isn't properly reported. */ - SCpnt = scsi_end_request(SCpnt, 0, SCpnt->request.current_nr_sectors); + SCpnt = scsi_end_request(SCpnt, 0, req->current_nr_sectors); return; } } @@ -774,7 +741,7 @@ * Arguments: request - I/O request we are preparing to queue. * * Lock status: No locks assumed to be held, but as it happens the - * io_request_lock is held when this is called. + * q->queue_lock is held when this is called. * * Returns: Nothing * @@ -788,7 +755,7 @@ kdev_t dev = req->rq_dev; int major = MAJOR(dev); - ASSERT_LOCK(&io_request_lock, 1); + ASSERT_LOCK(&req->q->queue_lock, 1); for (spnt = scsi_devicelist; spnt; spnt = spnt->next) { /* @@ -846,7 +813,7 @@ struct Scsi_Host *SHpnt; struct Scsi_Device_Template *STpnt; - ASSERT_LOCK(&io_request_lock, 1); + ASSERT_LOCK(&q->queue_lock, 1); SDpnt = (Scsi_Device *) q->queuedata; if (!SDpnt) { @@ -864,10 +831,17 @@ * released the lock and grabbed it again, so each time * we need to check to see if the queue is plugged or not. */ - if (SHpnt->in_recovery || q->plugged) + if (SHpnt->in_recovery || blk_queue_plugged(q)) return; /* + * if we are at the max queue depth, don't attempt to queue + * more + */ + if (SHpnt->host_busy == SDpnt->queue_depth) + break; + + /* * If the device cannot accept another request, then quit. */ if (SDpnt->device_blocked) { @@ -913,9 +887,9 @@ */ SDpnt->was_reset = 0; if (SDpnt->removable && !in_interrupt()) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); scsi_ioctl(SDpnt, SCSI_IOCTL_DOORLOCK, 0); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); continue; } } @@ -924,14 +898,14 @@ * If we couldn't find a request that could be queued, then we * can also quit. */ - if (list_empty(&q->queue_head)) + if (blk_queue_empty(q)) break; /* - * Loop through all of the requests in this queue, and find - * one that is queueable. + * get next queueable request. cur_rq would be set if we + * previously had to abort for some reason */ - req = blkdev_entry_next_request(&q->queue_head); + req = elv_next_request(q); /* * Find the actual device driver associated with this command. @@ -951,9 +925,8 @@ if( SRpnt->sr_magic == SCSI_REQ_MAGIC ) { SCpnt = scsi_allocate_device(SRpnt->sr_device, FALSE, FALSE); - if( !SCpnt ) { + if (!SCpnt) break; - } scsi_init_cmd_from_req(SCpnt, SRpnt); } @@ -973,7 +946,7 @@ * scatter-gather segments here - the * normal case code assumes this to be * correct, as it would be a performance - * lose to always recount. Handling + * loss to always recount. Handling * errors is always unusual, of course. */ recount_segments(SCpnt); @@ -985,9 +958,8 @@ * while the queue is locked and then break out of the * loop. Otherwise loop around and try another request. */ - if (!SCpnt) { + if (!SCpnt) break; - } } /* @@ -1024,7 +996,7 @@ * another. */ req = NULL; - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&q->queue_lock); if (SCpnt->request.cmd != SPECIAL) { /* @@ -1054,7 +1026,7 @@ { panic("Should not have leftover blocks\n"); } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); SHpnt->host_busy--; SDpnt->device_busy--; continue; @@ -1070,7 +1042,7 @@ { panic("Should not have leftover blocks\n"); } - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); SHpnt->host_busy--; SDpnt->device_busy--; continue; @@ -1091,7 +1063,7 @@ * Now we need to grab the lock again. We are about to mess * with the request queue and try to find another command. */ - spin_lock_irq(&io_request_lock); + spin_lock_irq(&q->queue_lock); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi_merge.c Wed Oct 31 09:39:13 2001 +++ linux/drivers/scsi/scsi_merge.c Thu Nov 1 12:30:19 2001 @@ -6,6 +6,7 @@ * Based upon conversations with large numbers * of people at Linux Expo. * Support for dynamic DMA mapping: Jakub Jelinek (jakub@redhat.com). + * Support for highmem I/O: Jens Axboe */ /* @@ -71,51 +72,6 @@ */ #define DMA_SEGMENT_SIZE_LIMITED -#ifdef CONFIG_SCSI_DEBUG_QUEUES -/* - * Enable a bunch of additional consistency checking. Turn this off - * if you are benchmarking. - */ -static int dump_stats(struct request *req, - int use_clustering, - int dma_host, - int segments) -{ - struct buffer_head *bh; - - /* - * Dump the information that we have. We know we have an - * inconsistency. - */ - printk("nr_segments is %x\n", req->nr_segments); - printk("counted segments is %x\n", segments); - printk("Flags %d %d\n", use_clustering, dma_host); - for (bh = req->bh; bh->b_reqnext != NULL; bh = bh->b_reqnext) - { - printk("Segment 0x%p, blocks %d, addr 0x%lx\n", - bh, - bh->b_size >> 9, - virt_to_phys(bh->b_data - 1)); - } - panic("Ththththaats all folks. Too dangerous to continue.\n"); -} - - -/* - * Simple sanity check that we will use for the first go around - * in order to ensure that we are doing the counting correctly. - * This can be removed for optimization. - */ -#define SANITY_CHECK(req, _CLUSTER, _DMA) \ - if( req->nr_segments != __count_segments(req, _CLUSTER, _DMA, NULL) ) \ - { \ - printk("Incorrect segment count at 0x%p", current_text_addr()); \ - dump_stats(req, _CLUSTER, _DMA, __count_segments(req, _CLUSTER, _DMA, NULL)); \ - } -#else -#define SANITY_CHECK(req, _CLUSTER, _DMA) -#endif - static void dma_exhausted(Scsi_Cmnd * SCpnt, int i) { int jj; @@ -191,8 +147,7 @@ { int ret = 1; int reqsize = 0; - struct buffer_head *bh; - struct buffer_head *bhnext; + struct bio *bio, *bionext; if( remainder != NULL ) { reqsize = *remainder; @@ -201,21 +156,21 @@ /* * Add in the size increment for the first buffer. */ - bh = req->bh; + bio = req->bio; #ifdef DMA_SEGMENT_SIZE_LIMITED - if( reqsize + bh->b_size > PAGE_SIZE ) { + if( reqsize + bio_size(bio) > PAGE_SIZE ) { ret++; - reqsize = bh->b_size; + reqsize = bio_size(bio); } else { - reqsize += bh->b_size; + reqsize += bio_size(bio); } #else - reqsize += bh->b_size; + reqsize += bio_size(bio); #endif - for (bh = req->bh, bhnext = bh->b_reqnext; - bhnext != NULL; - bh = bhnext, bhnext = bh->b_reqnext) { + for (bio = req->bio, bionext = bio->bi_next; + bionext != NULL; + bio = bionext, bionext = bio->bi_next) { if (use_clustering) { /* * See if we can do this without creating another @@ -223,11 +178,10 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bhnext->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(bionext) - 1 == ISA_DMA_THRESHOLD) { ret++; - reqsize = bhnext->b_size; - } else if (CONTIGUOUS_BUFFERS(bh, bhnext)) { + reqsize = bio_size(bionext); + } else if (BIO_CONTIG(bio, bionext)) { /* * This one is OK. Let it go. */ @@ -241,23 +195,22 @@ * kind of screwed and we need to start * another segment. */ - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD - && reqsize + bhnext->b_size > PAGE_SIZE ) + if(dma_host && bio_to_phys(bionext) - 1 >= ISA_DMA_THRESHOLD + && reqsize + bio_size(bionext) > PAGE_SIZE ) { ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); continue; } #endif - reqsize += bhnext->b_size; + reqsize += bio_size(bionext); continue; } ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); } else { ret++; - reqsize = bhnext->b_size; + reqsize = bio_size(bionext); } } if( remainder != NULL ) { @@ -304,14 +257,13 @@ } #define MERGEABLE_BUFFERS(X,Y) \ -(((((long)(X)->b_data+(X)->b_size)|((long)(Y)->b_data)) & \ +(((((long)bio_to_phys((X))+bio_size((X)))|((long)bio_to_phys((Y)))) & \ (DMA_CHUNK_SIZE - 1)) == 0) #ifdef DMA_CHUNK_SIZE static inline int scsi_new_mergeable(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { /* * pci_map_sg will be able to merge these two @@ -320,26 +272,26 @@ * scsi.c allocates for this purpose * min(64,sg_tablesize) entries. */ - if (req->nr_segments >= max_segments || - req->nr_segments >= SHpnt->sg_tablesize) + if (req->nr_segments >= q->max_segments) return 0; + req->nr_segments++; return 1; } static inline int scsi_new_segment(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { /* * pci_map_sg won't be able to map these two * into a single hardware sg entry, so we have to * check if things fit into sg_tablesize. */ - if (req->nr_hw_segments >= SHpnt->sg_tablesize || - req->nr_segments >= SHpnt->sg_tablesize) + if (req->nr_hw_segments >= q->max_segments || + req->nr_segments >= q->max_segments) return 0; + req->nr_hw_segments++; req->nr_segments++; return 1; @@ -347,20 +299,18 @@ #else static inline int scsi_new_segment(request_queue_t * q, struct request * req, - struct Scsi_Host *SHpnt, - int max_segments) + struct Scsi_Host *SHpnt) { - if (req->nr_segments < SHpnt->sg_tablesize && - req->nr_segments < max_segments) { + if (req->nr_segments < q->max_segments) { /* * This will form the start of a new segment. Bump the * counter. */ req->nr_segments++; return 1; - } else { - return 0; } + + return 0; } #endif @@ -371,7 +321,7 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * use_clustering - 1 if this host wishes to use clustering * dma_host - 1 if this host has ISA DMA issues (bus doesn't * expose all of the address lines, so that DMA cannot @@ -380,7 +330,7 @@ * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Some drivers have limited scatter-gather table sizes, and * thus they cannot queue an infinitely large command. This @@ -399,8 +349,7 @@ */ __inline static int __scsi_back_merge_fn(request_queue_t * q, struct request *req, - struct buffer_head *bh, - int max_segments, + struct bio *bio, int use_clustering, int dma_host) { @@ -412,12 +361,16 @@ SDpnt = (Scsi_Device *) q->queuedata; SHpnt = SDpnt->host; + /* + * FIXME: remember to look into this /jens + */ #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; + q->max_segments = 64; #endif - if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) + if (req->nr_sectors + bio_sectors(bio) > SHpnt->max_sectors) + return 0; + else if (!BIO_PHYS_4G(req->biotail, bio)) return 0; if (use_clustering) { @@ -427,17 +380,15 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(req->biotail) - 1 == ISA_DMA_THRESHOLD) { goto new_end_segment; } - if (CONTIGUOUS_BUFFERS(req->bhtail, bh)) { + if (BIO_CONTIG(req->biotail, bio)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { + if( dma_host && bio_to_phys(bio) - 1 >= ISA_DMA_THRESHOLD ) { segment_size = 0; count = __count_segments(req, use_clustering, dma_host, &segment_size); - if( segment_size + bh->b_size > PAGE_SIZE ) { + if( segment_size + bio_size(bio) > PAGE_SIZE ) { goto new_end_segment; } } @@ -450,16 +401,15 @@ } new_end_segment: #ifdef DMA_CHUNK_SIZE - if (MERGEABLE_BUFFERS(req->bhtail, bh)) - return scsi_new_mergeable(q, req, SHpnt, max_segments); + if (MERGEABLE_BUFFERS(req->biotail, bio)) + return scsi_new_mergeable(q, req, SHpnt); #endif - return scsi_new_segment(q, req, SHpnt, max_segments); + return scsi_new_segment(q, req, SHpnt); } __inline static int __scsi_front_merge_fn(request_queue_t * q, struct request *req, - struct buffer_head *bh, - int max_segments, + struct bio *bio, int use_clustering, int dma_host) { @@ -472,11 +422,12 @@ SHpnt = SDpnt->host; #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; + q->max_segments = 64; #endif - if ((req->nr_sectors + (bh->b_size >> 9)) > SHpnt->max_sectors) + if (req->nr_sectors + bio_sectors(bio) > SHpnt->max_sectors) + return 0; + else if (!BIO_PHYS_4G(bio, req->bio)) return 0; if (use_clustering) { @@ -486,15 +437,13 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(bh->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(bio) - 1 == ISA_DMA_THRESHOLD) { goto new_start_segment; } - if (CONTIGUOUS_BUFFERS(bh, req->bh)) { + if (BIO_CONTIG(bio, req->bio)) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( dma_host - && virt_to_phys(bh->b_data) - 1 >= ISA_DMA_THRESHOLD ) { - segment_size = bh->b_size; + if( dma_host && bio_to_phys(bio) - 1 >= ISA_DMA_THRESHOLD ) { + segment_size = bio_size(bio); count = __count_segments(req, use_clustering, dma_host, &segment_size); if( count != req->nr_segments ) { goto new_start_segment; @@ -509,10 +458,10 @@ } new_start_segment: #ifdef DMA_CHUNK_SIZE - if (MERGEABLE_BUFFERS(bh, req->bh)) - return scsi_new_mergeable(q, req, SHpnt, max_segments); + if (MERGEABLE_BUFFERS(bio, req->bio)) + return scsi_new_mergeable(q, req, SHpnt); #endif - return scsi_new_segment(q, req, SHpnt, max_segments); + return scsi_new_segment(q, req, SHpnt); } /* @@ -522,12 +471,12 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. @@ -535,15 +484,12 @@ #define MERGEFCT(_FUNCTION, _BACK_FRONT, _CLUSTER, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ - struct buffer_head * bh, \ - int max_segments) \ + struct bio *bio) \ { \ int ret; \ - SANITY_CHECK(req, _CLUSTER, _DMA); \ ret = __scsi_ ## _BACK_FRONT ## _merge_fn(q, \ req, \ - bh, \ - max_segments, \ + bio, \ _CLUSTER, \ _DMA); \ return ret; \ @@ -576,7 +522,7 @@ * Returns: 1 if it is OK to merge the two requests. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Some drivers have limited scatter-gather table sizes, and * thus they cannot queue an infinitely large command. This @@ -596,7 +542,6 @@ __inline static int __scsi_merge_requests_fn(request_queue_t * q, struct request *req, struct request *next, - int max_segments, int use_clustering, int dma_host) { @@ -609,31 +554,30 @@ */ if (req->special || next->special) return 0; + else if (!BIO_PHYS_4G(req->biotail, next->bio)) + return 0; SDpnt = (Scsi_Device *) q->queuedata; SHpnt = SDpnt->host; #ifdef DMA_CHUNK_SIZE - if (max_segments > 64) - max_segments = 64; + q->max_segments = 64; /* If it would not fit into prepared memory space for sg chain, * then don't allow the merge. */ - if (req->nr_segments + next->nr_segments - 1 > max_segments || - req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments - 1 > q->max_segments) return 0; - } - if (req->nr_hw_segments + next->nr_hw_segments - 1 > SHpnt->sg_tablesize) { + + if (req->nr_hw_segments + next->nr_hw_segments - 1 > q->max_segments) return 0; - } #else /* * If the two requests together are too large (even assuming that we * can merge the boundary requests into one segment, then don't * allow the merge. */ - if (req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments - 1 > q->max_segments) { return 0; } #endif @@ -652,8 +596,7 @@ * DMA capable host, make sure that a segment doesn't span * the DMA threshold boundary. */ - if (dma_host && - virt_to_phys(req->bhtail->b_data) - 1 == ISA_DMA_THRESHOLD) { + if (dma_host && bio_to_phys(req->biotail) - 1 == ISA_DMA_THRESHOLD) { goto dont_combine; } #ifdef DMA_SEGMENT_SIZE_LIMITED @@ -662,8 +605,8 @@ * buffers in chunks of PAGE_SIZE or less. */ if (dma_host - && CONTIGUOUS_BUFFERS(req->bhtail, next->bh) - && virt_to_phys(req->bhtail->b_data) - 1 >= ISA_DMA_THRESHOLD ) + && BIO_CONTIG(req->biotail, next->bio) + && bio_to_phys(req->biotail) - 1 >= ISA_DMA_THRESHOLD ) { int segment_size = 0; int count = 0; @@ -675,7 +618,7 @@ } } #endif - if (CONTIGUOUS_BUFFERS(req->bhtail, next->bh)) { + if (BIO_CONTIG(req->biotail, next->bio)) { /* * This one is OK. Let it go. */ @@ -688,17 +631,16 @@ } dont_combine: #ifdef DMA_CHUNK_SIZE - if (req->nr_segments + next->nr_segments > max_segments || - req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments > q->max_segments) return 0; - } + /* If dynamic DMA mapping can merge last segment in req with * first segment in next, then the check for hw segments was * done above already, so we can always merge. */ - if (MERGEABLE_BUFFERS (req->bhtail, next->bh)) { + if (MERGEABLE_BUFFERS (req->biotail, next->bio)) { req->nr_hw_segments += next->nr_hw_segments - 1; - } else if (req->nr_hw_segments + next->nr_hw_segments > SHpnt->sg_tablesize) { + } else if (req->nr_hw_segments + next->nr_hw_segments > q->max_segments) return 0; } else { req->nr_hw_segments += next->nr_hw_segments; @@ -711,8 +653,7 @@ * Make sure we can fix something that is the sum of the two. * A slightly stricter test than we had above. */ - if (req->nr_segments + next->nr_segments > max_segments || - req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) { + if (req->nr_segments + next->nr_segments > q->max_segments) { return 0; } else { /* @@ -732,12 +673,12 @@ * * Arguments: q - Queue for which we are merging request. * req - request into which we wish to merge. - * bh - Block which we may wish to merge into request + * bio - Block which we may wish to merge into request * * Returns: 1 if it is OK to merge the block into the request. 0 * if it is not OK. * - * Lock status: io_request_lock is assumed to be held here. + * Lock status: queue lock is assumed to be held here. * * Notes: Optimized for different cases depending upon whether * ISA DMA is in use and whether clustering should be used. @@ -745,12 +686,10 @@ #define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA) \ static int _FUNCTION(request_queue_t * q, \ struct request * req, \ - struct request * next, \ - int max_segments) \ + struct request * next) \ { \ int ret; \ - SANITY_CHECK(req, _CLUSTER, _DMA); \ - ret = __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \ + ret = __scsi_merge_requests_fn(q, req, next, _CLUSTER, _DMA); \ return ret; \ } @@ -798,8 +737,8 @@ int use_clustering, int dma_host) { - struct buffer_head * bh; - struct buffer_head * bhprev; + struct bio * bio; + struct bio * bioprev; char * buff; int count; int i; @@ -809,30 +748,8 @@ int this_count; void ** bbpnt; - /* - * FIXME(eric) - don't inline this - it doesn't depend on the - * integer flags. Come to think of it, I don't think this is even - * needed any more. Need to play with it and see if we hit the - * panic. If not, then don't bother. - */ - if (!SCpnt->request.bh) { - /* - * Case of page request (i.e. raw device), or unlinked buffer - * Typically used for swapping, but this isn't how we do - * swapping any more. - */ - panic("I believe this is dead code. If we hit this, I was wrong"); -#if 0 - SCpnt->request_bufflen = SCpnt->request.nr_sectors << 9; - SCpnt->request_buffer = SCpnt->request.buffer; - SCpnt->use_sg = 0; - /* - * FIXME(eric) - need to handle DMA here. - */ -#endif - return 1; - } req = &SCpnt->request; + /* * First we need to know how many scatter gather segments are needed. */ @@ -848,16 +765,15 @@ * buffer. */ if (dma_host && scsi_dma_free_sectors <= 10) { - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; goto single_segment; } + /* - * Don't bother with scatter-gather if there is only one segment. + * we used to not use scatter-gather for single segment request, + * but now we do (it makes highmem I/O easier to support without + * kmapping pages) */ - if (count == 1) { - this_count = SCpnt->request.nr_sectors; - goto single_segment; - } SCpnt->use_sg = count; /* @@ -875,19 +791,22 @@ * round it up. */ SCpnt->sglist_len = (SCpnt->sglist_len + 511) & ~511; - + sgpnt = (struct scatterlist *) scsi_malloc(SCpnt->sglist_len); /* * Now fill the scatter-gather table. */ if (!sgpnt) { + struct Scsi_Host *SHpnt = SCpnt->host; + /* * If we cannot allocate the scatter-gather table, then * simply write the first buffer all by itself. */ printk("Warning - running *really* short on DMA buffers\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; + printk("SCSI: depth is %d, # segs %d, # hw segs %d\n", SHpnt->host_busy, req->nr_segments, req->nr_hw_segments); goto single_segment; } /* @@ -897,7 +816,8 @@ memset(sgpnt, 0, SCpnt->sglist_len); SCpnt->request_buffer = (char *) sgpnt; SCpnt->request_bufflen = 0; - bhprev = NULL; + req->buffer = NULL; + bioprev = NULL; if (dma_host) bbpnt = (void **) ((char *)sgpnt + @@ -907,13 +827,11 @@ SCpnt->bounce_buffers = bbpnt; - for (count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { - if (use_clustering && bhprev != NULL) { - if (dma_host && - virt_to_phys(bhprev->b_data) - 1 == ISA_DMA_THRESHOLD) { + for (count = 0, bio = req->bio; bio; bio = bio->bi_next) { + if (use_clustering && bioprev != NULL) { + if (dma_host && bio_to_phys(bioprev) - 1 == ISA_DMA_THRESHOLD) { /* Nothing - fall through */ - } else if (CONTIGUOUS_BUFFERS(bhprev, bh)) { + } else if (BIO_CONTIG(bioprev, bio)) { /* * This one is OK. Let it go. Note that we * do not have the ability to allocate @@ -922,33 +840,45 @@ */ if( dma_host ) { #ifdef DMA_SEGMENT_SIZE_LIMITED - if( virt_to_phys(bh->b_data) - 1 < ISA_DMA_THRESHOLD - || sgpnt[count - 1].length + bh->b_size <= PAGE_SIZE ) { - sgpnt[count - 1].length += bh->b_size; - bhprev = bh; + if( bio_to_phys(bio) - 1 < ISA_DMA_THRESHOLD + || sgpnt[count - 1].length + bio_size(bio) <= PAGE_SIZE ) { + sgpnt[count - 1].length += bio_size(bio); + bioprev = bio; continue; } #else - sgpnt[count - 1].length += bh->b_size; - bhprev = bh; + sgpnt[count - 1].length += bio_size(bio); + bioprev = bio; continue; #endif } else { - sgpnt[count - 1].length += bh->b_size; - SCpnt->request_bufflen += bh->b_size; - bhprev = bh; + sgpnt[count - 1].length += bio_size(bio); + SCpnt->request_bufflen += bio_size(bio); + bioprev = bio; continue; } } } - count++; - sgpnt[count - 1].address = bh->b_data; - sgpnt[count - 1].page = NULL; - sgpnt[count - 1].length += bh->b_size; - if (!dma_host) { - SCpnt->request_bufflen += bh->b_size; + + if (SCpnt->host->can_dma_32) { + sgpnt[count].address = NULL; + sgpnt[count].page = bio_page(bio); + sgpnt[count].offset = bio_offset(bio); + } else { + if (PageHighMem(bio_page(bio))) + BUG(); + + sgpnt[count].address = bio_data(bio); + sgpnt[count].page = NULL; } - bhprev = bh; + + sgpnt[count].length = bio_size(bio); + + if (!dma_host) + SCpnt->request_bufflen += bio_size(bio); + + count++; + bioprev = bio; } /* @@ -956,13 +886,14 @@ */ if (count != SCpnt->use_sg) { printk("Incorrect number of segments after building list\n"); -#ifdef CONFIG_SCSI_DEBUG_QUEUES - dump_stats(req, use_clustering, dma_host, count); -#endif + scsi_free(SCpnt->request_buffer, SCpnt->sglist_len); + this_count = req->current_nr_sectors; + goto single_segment; } - if (!dma_host) { + + if (!dma_host) return 1; - } + /* * Now allocate bounce buffers, if needed. */ @@ -971,7 +902,7 @@ sectors = (sgpnt[i].length >> 9); SCpnt->request_bufflen += sgpnt[i].length; if (virt_to_phys(sgpnt[i].address) + sgpnt[i].length - 1 > - ISA_DMA_THRESHOLD) { + ISA_DMA_THRESHOLD) { if( scsi_dma_free_sectors - sectors <= 10 ) { /* * If this would nearly drain the DMA @@ -989,9 +920,12 @@ break; } - bbpnt[i] = sgpnt[i].address; - sgpnt[i].address = - (char *) scsi_malloc(sgpnt[i].length); + /* + * this is not a dma host, so it will never + * be a highmem page + */ + bbpnt[i] = page_address(sgpnt[i].page) +sgpnt[i].offset; + sgpnt[i].address = (char *)scsi_malloc(sgpnt[i].length); /* * If we cannot allocate memory for this DMA bounce * buffer, then queue just what we have done so far. @@ -1005,7 +939,7 @@ } break; } - if (SCpnt->request.cmd == WRITE) { + if (req->cmd == WRITE) { memcpy(sgpnt[i].address, bbpnt[i], sgpnt[i].length); } @@ -1050,21 +984,20 @@ * single-block requests if we had hundreds of free sectors. */ if( scsi_dma_free_sectors > 30 ) { - for (this_count = 0, bh = SCpnt->request.bh; - bh; bh = bh->b_reqnext) { + for (this_count = 0, bio = req->bio; bio; bio = bio->bi_next) { if( scsi_dma_free_sectors - this_count < 30 || this_count == sectors ) { break; } - this_count += bh->b_size >> 9; + this_count += bio_sectors(bio); } } else { /* * Yow! Take the absolute minimum here. */ - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; } /* @@ -1077,28 +1010,33 @@ * segment. Possibly the entire request, or possibly a small * chunk of the entire request. */ - bh = SCpnt->request.bh; - buff = SCpnt->request.buffer; - if (dma_host) { + bio = req->bio; + buff = req->buffer = bio_data(bio); + + if (dma_host || PageHighMem(bio_page(bio))) { /* * Allocate a DMA bounce buffer. If the allocation fails, fall * back and allocate a really small one - enough to satisfy * the first buffer. */ - if (virt_to_phys(SCpnt->request.bh->b_data) - + (this_count << 9) - 1 > ISA_DMA_THRESHOLD) { + if (bio_to_phys(bio) + bio_size(bio) - 1 > ISA_DMA_THRESHOLD) { buff = (char *) scsi_malloc(this_count << 9); if (!buff) { printk("Warning - running low on DMA memory\n"); - this_count = SCpnt->request.current_nr_sectors; + this_count = req->current_nr_sectors; buff = (char *) scsi_malloc(this_count << 9); if (!buff) { dma_exhausted(SCpnt, 0); + return 0; } } - if (SCpnt->request.cmd == WRITE) - memcpy(buff, (char *) SCpnt->request.buffer, this_count << 9); + if (req->cmd == WRITE) { + unsigned long flags; + char *buf = bio_kmap_irq(bio, &flags); + memcpy(buff, buf, this_count << 9); + bio_kunmap_irq(buf, &flags); + } } } SCpnt->request_bufflen = this_count << 9; @@ -1139,11 +1077,9 @@ */ void initialize_merge_fn(Scsi_Device * SDpnt) { - request_queue_t *q; - struct Scsi_Host *SHpnt; - SHpnt = SDpnt->host; - - q = &SDpnt->request_queue; + struct Scsi_Host *SHpnt = SDpnt->host; + request_queue_t *q = &SDpnt->request_queue; + dma64_addr_t bounce_limit; /* * If the host has already selected a merge manager, then don't @@ -1185,4 +1121,20 @@ q->merge_requests_fn = scsi_merge_requests_fn_dc; SDpnt->scsi_init_io_fn = scsi_init_io_vdc; } + + /* + * now enable highmem I/O, if appropriate + */ + bounce_limit = BLK_BOUNCE_HIGH; + if (SHpnt->can_dma_32 && (SDpnt->type == TYPE_DISK)) { + if (!PCI_DMA_BUS_IS_PHYS) + /* Platforms with virtual-DMA translation + * hardware have no practical limit. + */ + bounce_limit = BLK_BOUNCE_ANY; + else + bounce_limit = SHpnt->pci_dev->dma_mask; + } + + blk_queue_bounce_limit(q, bounce_limit); } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi_obsolete.c linux/drivers/scsi/scsi_obsolete.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi_obsolete.c Thu Jul 5 20:28:17 2001 +++ linux/drivers/scsi/scsi_obsolete.c Mon Oct 15 10:41:43 2001 @@ -145,9 +145,10 @@ void scsi_old_times_out(Scsi_Cmnd * SCpnt) { + struct Scsi_Host *host = SCpnt->host; unsigned long flags; - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&host->host_lock, flags); /* Set the serial_number_at_timeout to the current serial_number */ SCpnt->serial_number_at_timeout = SCpnt->serial_number; @@ -164,7 +165,7 @@ break; case IN_ABORT: printk("SCSI host %d abort (pid %ld) timed out - resetting\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); if (!scsi_reset(SCpnt, SCSI_RESET_ASYNCHRONOUS)) break; case IN_RESET: @@ -175,7 +176,7 @@ */ printk("SCSI host %d channel %d reset (pid %ld) timed out - " "trying harder\n", - SCpnt->host->host_no, SCpnt->channel, SCpnt->pid); + host->host_no, SCpnt->channel, SCpnt->pid); SCpnt->internal_timeout &= ~IN_RESET; SCpnt->internal_timeout |= IN_RESET2; scsi_reset(SCpnt, @@ -188,7 +189,7 @@ * Maybe the HBA itself crashed and this will shake it loose. */ printk("SCSI host %d reset (pid %ld) timed out - trying to shake it loose\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); SCpnt->internal_timeout &= ~(IN_RESET | IN_RESET2); SCpnt->internal_timeout |= IN_RESET3; scsi_reset(SCpnt, @@ -197,19 +198,19 @@ default: printk("SCSI host %d reset (pid %ld) timed out again -\n", - SCpnt->host->host_no, SCpnt->pid); + host->host_no, SCpnt->pid); printk("probably an unrecoverable SCSI bus or device hang.\n"); break; } - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&host->host_lock, flags); } /* * From what I can find in scsi_obsolete.c, this function is only called * by scsi_old_done and scsi_reset. Both of these functions run with the - * io_request_lock already held, so we need do nothing here about grabbing + * host_lock already held, so we need do nothing here about grabbing * any locks. */ static void scsi_request_sense(Scsi_Cmnd * SCpnt) @@ -217,7 +218,6 @@ SCpnt->flags |= WAS_SENSE | ASKED_FOR_SENSE; update_timeout(SCpnt, SENSE_TIMEOUT); - memcpy((void *) SCpnt->cmnd, (void *) generic_sense, sizeof(generic_sense)); memset((void *) SCpnt->sense_buffer, 0, @@ -238,9 +238,9 @@ * Ugly, ugly. The newer interfaces all assume that the lock * isn't held. Mustn't disappoint, or we deadlock the system. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&SCpnt->host->host_lock); scsi_dispatch_cmd(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&SCpnt->host->host_lock); } @@ -646,9 +646,9 @@ * assume that the lock isn't held. Mustn't * disappoint, or we deadlock the system. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); scsi_dispatch_cmd(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } break; default: @@ -674,7 +674,7 @@ * use, the upper code is run from a bottom half handler, so * it isn't an issue. */ - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); SRpnt = SCpnt->sc_request; if( SRpnt != NULL ) { SRpnt->sr_result = SRpnt->sr_command->result; @@ -686,7 +686,7 @@ } SCpnt->done(SCpnt); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } #undef CMD_FINISHED #undef REDO @@ -725,10 +725,10 @@ return 0; } if (SCpnt->internal_timeout & IN_ABORT) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); while (SCpnt->internal_timeout & IN_ABORT) barrier(); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } else { SCpnt->internal_timeout |= IN_ABORT; oldto = update_timeout(SCpnt, ABORT_TIMEOUT); @@ -908,10 +908,10 @@ return 0; } if (SCpnt->internal_timeout & IN_RESET) { - spin_unlock_irq(&io_request_lock); + spin_unlock_irq(&host->host_lock); while (SCpnt->internal_timeout & IN_RESET) barrier(); - spin_lock_irq(&io_request_lock); + spin_lock_irq(&host->host_lock); } else { SCpnt->internal_timeout |= IN_RESET; update_timeout(SCpnt, RESET_TIMEOUT); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi_queue.c linux/drivers/scsi/scsi_queue.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/scsi_queue.c Fri Feb 9 20:30:23 2001 +++ linux/drivers/scsi/scsi_queue.c Mon Oct 15 10:41:43 2001 @@ -80,6 +80,7 @@ { struct Scsi_Host *host; unsigned long flags; + request_queue_t *q = &cmd->device->request_queue; SCSI_LOG_MLQUEUE(1, printk("Inserting command %p into mlqueue\n", cmd)); @@ -137,10 +138,10 @@ * Decrement the counters, since these commands are no longer * active on the host/device. */ - spin_lock_irqsave(&io_request_lock, flags); + spin_lock_irqsave(&q->queue_lock, flags); cmd->host->host_busy--; cmd->device->device_busy--; - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); /* * Insert this command at the head of the queue for it's device. diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/sd.c linux/drivers/scsi/sd.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/sd.c Wed Oct 31 09:39:13 2001 +++ linux/drivers/scsi/sd.c Tue Oct 30 09:48:43 2001 @@ -61,10 +61,6 @@ #include -/* - * static const char RCSid[] = "$Header:"; - */ - #define SD_MAJOR(i) (!(i) ? SCSI_DISK0_MAJOR : SCSI_DISK1_MAJOR-1+(i)) #define SCSI_DISKS_PER_MAJOR 16 @@ -72,8 +68,7 @@ #define SD_MINOR_NUMBER(i) ((i) & 255) #define MKDEV_SD_PARTITION(i) MKDEV(SD_MAJOR_NUMBER(i), (i) & 255) #define MKDEV_SD(index) MKDEV_SD_PARTITION((index) << 4) -#define N_USED_SCSI_DISKS (sd_template.dev_max + SCSI_DISKS_PER_MAJOR - 1) -#define N_USED_SD_MAJORS (N_USED_SCSI_DISKS / SCSI_DISKS_PER_MAJOR) +#define N_USED_SD_MAJORS (1 + ((sd_template.dev_max - 1) >> 4)) #define MAX_RETRIES 5 @@ -89,7 +84,6 @@ static Scsi_Disk *rscsi_disks; static int *sd_sizes; static int *sd_blocksizes; -static int *sd_hardsizes; /* Hardware sector size */ static int *sd_max_sectors; static int check_scsidisk_media_change(kdev_t); @@ -97,7 +91,6 @@ static int sd_init_onedisk(int); - static int sd_init(void); static void sd_finish(void); static int sd_attach(Scsi_Device *); @@ -124,7 +117,6 @@ init_command:sd_init_command, }; - static void rw_intr(Scsi_Cmnd * SCpnt); #if defined(CONFIG_PPC) @@ -181,9 +173,11 @@ diskinfo[0] = 0x40; diskinfo[1] = 0x20; - diskinfo[2] = rscsi_disks[DEVICE_NR(dev)].capacity >> 11; + diskinfo[2] = + rscsi_disks[DEVICE_NR(dev)].capacity >> 11; - /* override with calculated, extended default, or driver values */ + /* override with calculated, extended default, + or driver values */ if(host->hostt->bios_param != NULL) host->hostt->bios_param(&rscsi_disks[DEVICE_NR(dev)], @@ -193,48 +187,15 @@ dev, &diskinfo[0]); if (put_user(diskinfo[0], &loc->heads) || - put_user(diskinfo[1], &loc->sectors) || - put_user(diskinfo[2], &loc->cylinders) || - put_user(sd[SD_PARTITION(inode->i_rdev)].start_sect, &loc->start)) - return -EFAULT; - return 0; - } - case HDIO_GETGEO_BIG: - { - struct hd_big_geometry *loc = (struct hd_big_geometry *) arg; - - if(!loc) - return -EINVAL; - - host = rscsi_disks[DEVICE_NR(dev)].device->host; - - /* default to most commonly used values */ - - diskinfo[0] = 0x40; - diskinfo[1] = 0x20; - diskinfo[2] = rscsi_disks[DEVICE_NR(dev)].capacity >> 11; - - /* override with calculated, extended default, or driver values */ - - if(host->hostt->bios_param != NULL) - host->hostt->bios_param(&rscsi_disks[DEVICE_NR(dev)], - dev, - &diskinfo[0]); - else scsicam_bios_param(&rscsi_disks[DEVICE_NR(dev)], - dev, &diskinfo[0]); - - if (put_user(diskinfo[0], &loc->heads) || - put_user(diskinfo[1], &loc->sectors) || - put_user(diskinfo[2], (unsigned int *) &loc->cylinders) || - put_user(sd[SD_PARTITION(inode->i_rdev)].start_sect, &loc->start)) + put_user(diskinfo[1], &loc->sectors) || + put_user(diskinfo[2], &loc->cylinders) || + put_user(get_start_sect(inode->i_rdev), + &loc->start)) return -EFAULT; return 0; } - case BLKGETSIZE: /* Return device size */ - return put_user(sd[SD_PARTITION(inode->i_rdev)].nr_sects, (unsigned long *) arg); + case BLKGETSIZE: case BLKGETSIZE64: - return put_user((u64)sd[SD_PARTITION(inode->i_rdev)].nr_sects << 9, (u64 *)arg); - case BLKROSET: case BLKROGET: case BLKRASET: @@ -242,10 +203,12 @@ case BLKFLSBUF: case BLKSSZGET: case BLKPG: - case BLKELVGET: - case BLKELVSET: + case BLKELVGET: + case BLKELVSET: case BLKBSZGET: case BLKBSZSET: + case BLKHASHPROF: + case BLKHASHCLEAR: return blk_ioctl(inode->i_rdev, cmd, arg); case BLKRRPART: /* Re-read partition tables */ @@ -254,7 +217,8 @@ return revalidate_scsidisk(dev, 1); default: - return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device , cmd, (void *) arg); + return scsi_ioctl(rscsi_disks[DEVICE_NR(dev)].device, + cmd, (void *) arg); } } @@ -304,7 +268,7 @@ SCSI_LOG_HLQUEUE(1, printk("Doing sd request, dev = %d, block = %d\n", devm, block)); dpnt = &rscsi_disks[dev]; - if (devm >= (sd_template.dev_max << 4) || + if (devm >= (sd_template.dev_max << 4) || (devm & 0xf) || !dpnt || !dpnt->device->online || block + SCpnt->request.nr_sectors > sd[devm].nr_sects) { @@ -312,7 +276,7 @@ SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt)); return 0; } - block += sd[devm].start_sect; + if (dpnt->device->changed) { /* * quietly refuse to do anything to a changed disc until the changed @@ -619,8 +583,8 @@ (SCpnt->sense_buffer[4] << 16) | (SCpnt->sense_buffer[5] << 8) | SCpnt->sense_buffer[6]; - if (SCpnt->request.bh != NULL) - block_sectors = SCpnt->request.bh->b_size >> 9; + if (SCpnt->request.bio != NULL) + block_sectors = bio_sectors(SCpnt->request.bio); switch (SCpnt->device->sector_size) { case 1024: error_sector <<= 1; @@ -643,7 +607,7 @@ default: break; } - error_sector -= sd[SD_PARTITION(SCpnt->request.rq_dev)].start_sect; + error_sector &= ~(block_sectors - 1); good_sectors = error_sector - SCpnt->request.sector; if (good_sectors < 0 || good_sectors >= this_count) @@ -971,15 +935,11 @@ * So I have created this table. See ll_rw_blk.c * Jacques Gelinas (Jacques@solucorp.qc.ca) */ - int m; int hard_sector = sector_size; int sz = rscsi_disks[i].capacity * (hard_sector/256); /* There are 16 minors allocated for each major device */ - for (m = i << 4; m < ((i + 1) << 4); m++) { - sd_hardsizes[m] = hard_sector; - } - + blk_queue_hardsect_size(blk_get_queue(SD_MAJOR(i)), hard_sector); printk("SCSI device %s: " "%d %d-byte hdwr sectors (%d MB)\n", nbuff, rscsi_disks[i].capacity, @@ -1064,7 +1024,7 @@ static int sd_init() { - int i; + int i, maxparts; if (sd_template.dev_noticed == 0) return 0; @@ -1075,10 +1035,17 @@ if (sd_template.dev_max > N_SD_MAJORS * SCSI_DISKS_PER_MAJOR) sd_template.dev_max = N_SD_MAJORS * SCSI_DISKS_PER_MAJOR; + /* At most 16 partitions on each scsi disk. */ + maxparts = (sd_template.dev_max << 4); + if (maxparts == 0) + return 0; + if (!sd_registered) { for (i = 0; i < N_USED_SD_MAJORS; i++) { - if (devfs_register_blkdev(SD_MAJOR(i), "sd", &sd_fops)) { - printk("Unable to get major %d for SCSI disk\n", SD_MAJOR(i)); + if (devfs_register_blkdev(SD_MAJOR(i), "sd", + &sd_fops)) { + printk("Unable to get major %d for SCSI disk\n", + SD_MAJOR(i)); return 1; } } @@ -1088,32 +1055,26 @@ if (rscsi_disks) return 0; - rscsi_disks = kmalloc(sd_template.dev_max * sizeof(Scsi_Disk), GFP_ATOMIC); - if (!rscsi_disks) - goto cleanup_devfs; - memset(rscsi_disks, 0, sd_template.dev_max * sizeof(Scsi_Disk)); - - /* for every (necessary) major: */ - sd_sizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_sizes) - goto cleanup_disks; - memset(sd_sizes, 0, (sd_template.dev_max << 4) * sizeof(int)); - - sd_blocksizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_blocksizes) - goto cleanup_sizes; - - sd_hardsizes = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_hardsizes) - goto cleanup_blocksizes; - - sd_max_sectors = kmalloc((sd_template.dev_max << 4) * sizeof(int), GFP_ATOMIC); - if (!sd_max_sectors) - goto cleanup_max_sectors; + /* allocate memory */ +#define init_mem_lth(x,n) x = kmalloc((n) * sizeof(*x), GFP_ATOMIC) +#define zero_mem_lth(x,n) memset(x, 0, (n) * sizeof(*x)) + + init_mem_lth(rscsi_disks, sd_template.dev_max); + init_mem_lth(sd_sizes, maxparts); + init_mem_lth(sd_blocksizes, maxparts); + init_mem_lth(sd, maxparts); + init_mem_lth(sd_gendisks, N_USED_SD_MAJORS); + init_mem_lth(sd_max_sectors, sd_template.dev_max << 4); + + if (!rscsi_disks || !sd_sizes || !sd_blocksizes || !sd || !sd_gendisks) + goto cleanup_mem; + + zero_mem_lth(rscsi_disks, sd_template.dev_max); + zero_mem_lth(sd_sizes, maxparts); + zero_mem_lth(sd, maxparts); - for (i = 0; i < sd_template.dev_max << 4; i++) { + for (i = 0; i < maxparts; i++) { sd_blocksizes[i] = 1024; - sd_hardsizes[i] = 512; /* * Allow lowlevel device drivers to generate 512k large scsi * commands if they know what they're doing and they ask for it @@ -1123,45 +1084,34 @@ } for (i = 0; i < N_USED_SD_MAJORS; i++) { - blksize_size[SD_MAJOR(i)] = sd_blocksizes + i * (SCSI_DISKS_PER_MAJOR << 4); - hardsect_size[SD_MAJOR(i)] = sd_hardsizes + i * (SCSI_DISKS_PER_MAJOR << 4); - max_sectors[SD_MAJOR(i)] = sd_max_sectors + i * (SCSI_DISKS_PER_MAJOR << 4); - } - /* - * FIXME: should unregister blksize_size, hardsect_size and max_sectors when - * the module is unloaded. - */ - sd = kmalloc((sd_template.dev_max << 4) * - sizeof(struct hd_struct), - GFP_ATOMIC); - if (!sd) - goto cleanup_sd; - memset(sd, 0, (sd_template.dev_max << 4) * sizeof(struct hd_struct)); - - if (N_USED_SD_MAJORS > 1) - sd_gendisks = kmalloc(N_USED_SD_MAJORS * sizeof(struct gendisk), GFP_ATOMIC); - if (!sd_gendisks) - goto cleanup_sd_gendisks; + request_queue_t *q = blk_get_queue(SD_MAJOR(i)); + int parts_per_major = (SCSI_DISKS_PER_MAJOR << 4); + + blksize_size[SD_MAJOR(i)] = + sd_blocksizes + i * parts_per_major; + blk_queue_hardsect_size(q, 512); + } + for (i = 0; i < N_USED_SD_MAJORS; i++) { + int N = SCSI_DISKS_PER_MAJOR; + sd_gendisks[i] = sd_gendisk; - sd_gendisks[i].de_arr = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr, - GFP_ATOMIC); - if (!sd_gendisks[i].de_arr) - goto cleanup_gendisks_de_arr; - memset (sd_gendisks[i].de_arr, 0, - SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].de_arr); - sd_gendisks[i].flags = kmalloc (SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags, - GFP_ATOMIC); - if (!sd_gendisks[i].flags) - goto cleanup_gendisks_flags; - memset (sd_gendisks[i].flags, 0, - SCSI_DISKS_PER_MAJOR * sizeof *sd_gendisks[i].flags); + + init_mem_lth(sd_gendisks[i].de_arr, N); + init_mem_lth(sd_gendisks[i].flags, N); + + if (!sd_gendisks[i].de_arr || !sd_gendisks[i].flags) + goto cleanup_gendisks; + + zero_mem_lth(sd_gendisks[i].de_arr, N); + zero_mem_lth(sd_gendisks[i].flags, N); + sd_gendisks[i].major = SD_MAJOR(i); sd_gendisks[i].major_name = "sd"; sd_gendisks[i].minor_shift = 4; sd_gendisks[i].max_p = 1 << 4; - sd_gendisks[i].part = sd + (i * SCSI_DISKS_PER_MAJOR << 4); - sd_gendisks[i].sizes = sd_sizes + (i * SCSI_DISKS_PER_MAJOR << 4); + sd_gendisks[i].part = sd + i * (N << 4); + sd_gendisks[i].sizes = sd_sizes + i * (N << 4); sd_gendisks[i].nr_real = 0; sd_gendisks[i].real_devices = (void *) (rscsi_disks + i * SCSI_DISKS_PER_MAJOR); @@ -1169,27 +1119,21 @@ return 0; -cleanup_gendisks_flags: - kfree(sd_gendisks[i].de_arr); -cleanup_gendisks_de_arr: - while (--i >= 0 ) { +#undef init_mem_lth +#undef zero_mem_lth + +cleanup_gendisks: + /* kfree can handle NULL, so no test is required here */ + for (i = 0; i < N_USED_SD_MAJORS; i++) { kfree(sd_gendisks[i].de_arr); kfree(sd_gendisks[i].flags); } +cleanup_mem: kfree(sd_gendisks); -cleanup_sd_gendisks: kfree(sd); -cleanup_sd: - kfree(sd_max_sectors); -cleanup_max_sectors: - kfree(sd_hardsizes); -cleanup_blocksizes: kfree(sd_blocksizes); -cleanup_sizes: kfree(sd_sizes); -cleanup_disks: kfree(rscsi_disks); -cleanup_devfs: for (i = 0; i < N_USED_SD_MAJORS; i++) { devfs_unregister_blkdev(SD_MAJOR(i), "sd"); } @@ -1204,7 +1148,7 @@ for (i = 0; i < N_USED_SD_MAJORS; i++) { blk_dev[SD_MAJOR(i)].queue = sd_find_queue; - add_gendisk(&sd_gendisks[i]); + add_gendisk(&(sd_gendisks[i])); } for (i = 0; i < sd_template.dev_max; ++i) @@ -1294,9 +1238,7 @@ int revalidate_scsidisk(kdev_t dev, int maxusage) { int target; - int max_p; - int start; - int i; + int res; target = DEVICE_NR(dev); @@ -1306,36 +1248,18 @@ } DEVICE_BUSY = 1; - max_p = sd_gendisks->max_p; - start = target << sd_gendisks->minor_shift; - - for (i = max_p - 1; i >= 0; i--) { - int index = start + i; - invalidate_device(MKDEV_SD_PARTITION(index), 1); - sd_gendisks->part[index].start_sect = 0; - sd_gendisks->part[index].nr_sects = 0; - /* - * Reset the blocksize for everything so that we can read - * the partition table. Technically we will determine the - * correct block size when we revalidate, but we do this just - * to make sure that everything remains consistent. - */ - sd_blocksizes[index] = 1024; - if (rscsi_disks[target].device->sector_size == 2048) - sd_blocksizes[index] = 2048; - else - sd_blocksizes[index] = 1024; - } + res = wipe_partitions(dev); + if (res) + goto leave; #ifdef MAYBE_REINIT MAYBE_REINIT; #endif - grok_partitions(&SD_GENDISK(target), target % SCSI_DISKS_PER_MAJOR, - 1<<4, CAPACITY); - + grok_partitions(dev, CAPACITY); +leave: DEVICE_BUSY = 0; - return 0; + return res; } static int fop_revalidate_scsidisk(kdev_t dev) @@ -1345,6 +1269,7 @@ static void sd_detach(Scsi_Device * SDp) { Scsi_Disk *dpnt; + kdev_t dev; int i, j; int max_p; int start; @@ -1352,18 +1277,13 @@ for (dpnt = rscsi_disks, i = 0; i < sd_template.dev_max; i++, dpnt++) if (dpnt->device == SDp) { - /* If we are disconnecting a disk driver, sync and invalidate - * everything */ max_p = sd_gendisk.max_p; start = i << sd_gendisk.minor_shift; + dev = MKDEV_SD_PARTITION(start); + wipe_partitions(dev); + for (j = max_p - 1; j >= 0; j--) + sd_sizes[start + j] = 0; - for (j = max_p - 1; j >= 0; j--) { - int index = start + j; - invalidate_device(MKDEV_SD_PARTITION(index), 1); - sd_gendisks->part[index].start_sect = 0; - sd_gendisks->part[index].nr_sects = 0; - sd_sizes[index] = 0; - } devfs_register_partitions (&SD_GENDISK (i), SD_MINOR_NUMBER (start), 1); /* unregister_disk() */ @@ -1376,7 +1296,6 @@ SD_GENDISK(i).nr_real--; return; } - return; } static int __init init_sd(void) @@ -1399,14 +1318,11 @@ kfree(rscsi_disks); kfree(sd_sizes); kfree(sd_blocksizes); - kfree(sd_hardsizes); kfree((char *) sd); } for (i = 0; i < N_USED_SD_MAJORS; i++) { - del_gendisk(&sd_gendisks[i]); - blk_size[SD_MAJOR(i)] = NULL; - hardsect_size[SD_MAJOR(i)] = NULL; - read_ahead[SD_MAJOR(i)] = 0; + del_gendisk(&(sd_gendisks[i])); + blk_clear(SD_MAJOR(i)); } sd_template.dev_max = 0; if (sd_gendisks != &sd_gendisk) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/sr.c linux/drivers/scsi/sr.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/sr.c Wed Oct 31 09:39:13 2001 +++ linux/drivers/scsi/sr.c Tue Oct 30 09:48:43 2001 @@ -88,7 +88,6 @@ static int *sr_sizes; static int *sr_blocksizes; -static int *sr_hardsizes; static int sr_open(struct cdrom_device_info *, int); void get_sectorsize(int); @@ -218,8 +217,8 @@ (SCpnt->sense_buffer[4] << 16) | (SCpnt->sense_buffer[5] << 8) | SCpnt->sense_buffer[6]; - if (SCpnt->request.bh != NULL) - block_sectors = SCpnt->request.bh->b_size >> 9; + if (SCpnt->request.bio != NULL) + block_sectors = bio_sectors(SCpnt->request.bio); if (block_sectors < 4) block_sectors = 4; if (scsi_CDs[device_nr].device->sector_size == 2048) @@ -663,6 +662,7 @@ scsi_CDs[i].needs_sector_size = 0; sr_sizes[i] = scsi_CDs[i].capacity >> (BLOCK_SIZE_BITS - 9); }; + blk_queue_hardsect_size(blk_get_queue(MAJOR_NR), sector_size); scsi_free(buffer, 512); } @@ -811,21 +811,14 @@ if (!sr_blocksizes) goto cleanup_sizes; - sr_hardsizes = kmalloc(sr_template.dev_max * sizeof(int), GFP_ATOMIC); - if (!sr_hardsizes) - goto cleanup_blocksizes; /* * These are good guesses for the time being. */ - for (i = 0; i < sr_template.dev_max; i++) { + for (i = 0; i < sr_template.dev_max; i++) sr_blocksizes[i] = 2048; - sr_hardsizes[i] = 2048; - } + blksize_size[MAJOR_NR] = sr_blocksizes; - hardsect_size[MAJOR_NR] = sr_hardsizes; return 0; -cleanup_blocksizes: - kfree(sr_blocksizes); cleanup_sizes: kfree(sr_sizes); cleanup_cds: @@ -897,7 +890,6 @@ else read_ahead[MAJOR_NR] = 4; /* 4 sector read-ahead */ - return; } static void sr_detach(Scsi_Device * SDp) @@ -905,17 +897,18 @@ Scsi_CD *cpnt; int i; - for (cpnt = scsi_CDs, i = 0; i < sr_template.dev_max; i++, cpnt++) + for (cpnt = scsi_CDs, i = 0; i < sr_template.dev_max; i++, cpnt++) { if (cpnt->device == SDp) { /* - * Since the cdrom is read-only, no need to sync the device. + * Since the cdrom is read-only, no need to sync + * the device. * We should be kind to our buffer cache, however. */ invalidate_device(MKDEV(MAJOR_NR, i), 0); /* - * Reset things back to a sane state so that one can re-load a new - * driver (perhaps the same one). + * Reset things back to a sane state so that one can + * re-load a new driver (perhaps the same one). */ unregister_cdrom(&(cpnt->cdi)); cpnt->device = NULL; @@ -926,7 +919,7 @@ sr_sizes[i] = 0; return; } - return; + } } static int __init init_sr(void) @@ -948,13 +941,9 @@ kfree(sr_blocksizes); sr_blocksizes = NULL; - kfree(sr_hardsizes); - sr_hardsizes = NULL; } - blksize_size[MAJOR_NR] = NULL; - hardsect_size[MAJOR_NR] = NULL; - blk_size[MAJOR_NR] = NULL; read_ahead[MAJOR_NR] = 0; + blk_clear(MAJOR_NR); sr_template.dev_max = 0; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/sym53c8xx.c linux/drivers/scsi/sym53c8xx.c --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/sym53c8xx.c Wed Oct 24 12:49:58 2001 +++ linux/drivers/scsi/sym53c8xx.c Thu Oct 18 09:45:13 2001 @@ -642,10 +642,10 @@ #define NCR_LOCK_NCB(np, flags) spin_lock_irqsave(&np->smp_lock, flags) #define NCR_UNLOCK_NCB(np, flags) spin_unlock_irqrestore(&np->smp_lock, flags) -#define NCR_LOCK_SCSI_DONE(np, flags) \ - spin_lock_irqsave(&io_request_lock, flags) -#define NCR_UNLOCK_SCSI_DONE(np, flags) \ - spin_unlock_irqrestore(&io_request_lock, flags) +#define NCR_LOCK_SCSI_DONE(host, flags) \ + spin_lock_irqsave(&((host)->host_lock), flags) +#define NCR_UNLOCK_SCSI_DONE(host, flags) \ + spin_unlock_irqrestore(&((host)->host_lock), flags) #else @@ -656,8 +656,8 @@ #define NCR_LOCK_NCB(np, flags) do { save_flags(flags); cli(); } while (0) #define NCR_UNLOCK_NCB(np, flags) do { restore_flags(flags); } while (0) -#define NCR_LOCK_SCSI_DONE(np, flags) do {;} while (0) -#define NCR_UNLOCK_SCSI_DONE(np, flags) do {;} while (0) +#define NCR_LOCK_SCSI_DONE(host, flags) do {;} while (0) +#define NCR_UNLOCK_SCSI_DONE(host, flags) do {;} while (0) #endif @@ -13676,9 +13676,9 @@ if (DEBUG_FLAGS & DEBUG_TINY) printk ("]\n"); if (done_list) { - NCR_LOCK_SCSI_DONE(np, flags); + NCR_LOCK_SCSI_DONE(done_list->host, flags); ncr_flush_done_cmds(done_list); - NCR_UNLOCK_SCSI_DONE(np, flags); + NCR_UNLOCK_SCSI_DONE(done_list->host, flags); } } @@ -13699,9 +13699,9 @@ NCR_UNLOCK_NCB(np, flags); if (done_list) { - NCR_LOCK_SCSI_DONE(np, flags); + NCR_LOCK_SCSI_DONE(done_list->host, flags); ncr_flush_done_cmds(done_list); - NCR_UNLOCK_SCSI_DONE(np, flags); + NCR_UNLOCK_SCSI_DONE(done_list->host, flags); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/drivers/scsi/sym53c8xx.h linux/drivers/scsi/sym53c8xx.h --- /opt/kernel/linux-2.4.14-pre6/drivers/scsi/sym53c8xx.h Thu Oct 11 08:48:01 2001 +++ linux/drivers/scsi/sym53c8xx.h Wed Oct 31 11:04:33 2001 @@ -96,8 +96,9 @@ this_id: 7, \ sg_tablesize: SCSI_NCR_SG_TABLESIZE, \ cmd_per_lun: SCSI_NCR_CMD_PER_LUN, \ - max_sectors: MAX_SEGMENTS*8, \ - use_clustering: DISABLE_CLUSTERING} + max_sectors: MAX_SEGMENTS*8, \ + use_clustering: DISABLE_CLUSTERING, \ + can_dma_32: 1} #else diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/fs/Makefile linux/fs/Makefile --- /opt/kernel/linux-2.4.14-pre6/fs/Makefile Fri Oct 5 00:13:18 2001 +++ linux/fs/Makefile Mon Oct 15 10:41:43 2001 @@ -7,12 +7,12 @@ O_TARGET := fs.o -export-objs := filesystems.o open.o dcache.o +export-objs := filesystems.o open.o dcache.o bio.o mod-subdirs := nls obj-y := open.o read_write.o devices.o file_table.o buffer.o \ - super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \ - fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ + bio.o super.o block_dev.o char_dev.o stat.o exec.o pipe.o \ + namei.o fcntl.o ioctl.o readdir.o select.o fifo.o locks.o \ dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \ filesystems.o namespace.o diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/fs/bio.c linux/fs/bio.c --- /opt/kernel/linux-2.4.14-pre6/fs/bio.c Thu Jan 1 01:00:00 1970 +++ linux/fs/bio.c Thu Nov 1 10:13:18 2001 @@ -0,0 +1,553 @@ +/* + * Copyright (C) 2001 Jens Axboe + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +kmem_cache_t *bio_cachep; +static spinlock_t bio_lock = SPIN_LOCK_UNLOCKED; +static struct bio *bio_pool; +static DECLARE_WAIT_QUEUE_HEAD(bio_pool_wait); + +struct bio_hash_bucket *bio_hash_table; +unsigned int bio_hash_bits, bio_hash_mask; + +static unsigned int bio_pool_free; + +/* + * optimized for 2^BIO_HASH_SCALE kB block size + */ +#define BIO_HASH_SCALE 3 +#define BIO_HASH_BLOCK(sector) ((sector) >> BIO_HASH_SCALE) + +/* + * pending further testing, grabbed from fs/buffer.c hash so far... + */ +#define __bio_hash(dev,block) \ + (((((dev)<<(bio_hash_bits - 6)) ^ ((dev)<<(bio_hash_bits - 9))) ^ \ + (((block)<<(bio_hash_bits - 6)) ^ ((block) >> 13) ^ \ + ((block) << (bio_hash_bits - 12)))) & bio_hash_mask) + +#define bio_hash(dev, sector) &((bio_hash_table + __bio_hash(dev, BIO_HASH_BLOCK((sector))))->hash) + +#define bio_hash_bucket(dev, sector) (bio_hash_table + __bio_hash(dev, BIO_HASH_BLOCK((sector)))) + +#define __BIO_HASH_RWLOCK(dev, sector) \ + &((bio_hash_table + __bio_hash((dev), BIO_HASH_BLOCK((sector))))->lock) +#define BIO_HASH_RWLOCK(bio) \ + __BIO_HASH_RWLOCK((bio)->bi_dev, (bio)->bi_sector) + +/* + * TODO: change this to use slab reservation scheme once that infrastructure + * is in place... + */ +#define BIO_POOL_SIZE 256 + +void __init bio_hash_init(unsigned long mempages) +{ + unsigned long htable_size, order; + int i; + + htable_size = mempages * sizeof(struct bio_hash_bucket *); + for (order = 0; (PAGE_SIZE << order) < htable_size; order++) + ; + + do { + unsigned long tmp = (PAGE_SIZE << order) / sizeof(struct bio_hash_bucket); + + bio_hash_bits = 0; + while ((tmp >>= 1UL) != 0UL) + bio_hash_bits++; + + bio_hash_table = (struct bio_hash_bucket *) __get_free_pages(GFP_ATOMIC, order); + } while (bio_hash_table == NULL && --order > 0); + + if (!bio_hash_table) + panic("Failed to allocate page hash table\n"); + + printk("Bio-cache hash table entries: %ld (order: %ld, %ld bytes)\n", + BIO_HASH_SIZE, order, (PAGE_SIZE << order)); + + for (i = 0; i < BIO_HASH_SIZE; i++) { + struct bio_hash_bucket *hb = &bio_hash_table[i]; + + rwlock_init(&hb->lock); + hb->hash = NULL; + } + + bio_hash_mask = BIO_HASH_SIZE - 1; +} + +inline void __bio_hash_remove(struct bio *bio) +{ + bio_hash_t *entry = &bio->bi_hash; + + if (test_and_clear_bit(BIO_HASHED, &bio->bi_flags)) { + bio_hash_t *nxt = entry->next_hash; + bio_hash_t **pprev = entry->pprev_hash; + + if (nxt) + nxt->pprev_hash = pprev; + + *pprev = nxt; + entry->pprev_hash = NULL; + entry->valid_counter = 0; + } +} + +inline void bio_hash_remove(struct bio *bio) +{ + rwlock_t *hash_lock = BIO_HASH_RWLOCK(bio); + unsigned long flags; + + write_lock_irqsave(hash_lock, flags); + __bio_hash_remove(bio); + write_unlock_irqrestore(hash_lock, flags); +} + +inline void __bio_hash_add(struct bio *bio, bio_hash_t **hash, unsigned int vc) +{ + bio_hash_t *entry = &bio->bi_hash; + bio_hash_t *nxt = *hash; + + BUG_ON(test_and_set_bit(BIO_HASHED, &bio->bi_flags)); + + *hash = entry; + entry->next_hash = nxt; + entry->pprev_hash = hash; + entry->valid_counter = vc; + + if (nxt) + nxt->pprev_hash = &entry->next_hash; +} + +inline void bio_hash_add(struct bio *bio, unsigned int vc) +{ + struct bio_hash_bucket *hb =bio_hash_bucket(bio->bi_dev,bio->bi_sector); + unsigned long flags; + + write_lock_irqsave(&hb->lock, flags); + __bio_hash_add(bio, &hb->hash, vc); + write_unlock_irqrestore(&hb->lock, flags); +} + +inline struct bio *__bio_hash_find(kdev_t dev, sector_t sector, + bio_hash_t **hash, unsigned int vc) +{ + bio_hash_t *next = *hash, *entry; + struct bio *bio; + int nr = 0; + + while ((entry = next)) { + next = entry->next_hash; + prefetch(next); + bio = bio_hash_entry(entry); + + BUG_ON(!test_bit(BIO_HASHED, &bio->bi_flags)); + + if (entry->valid_counter == vc) { + if (bio->bi_sector == sector && bio->bi_dev == dev) { +#ifdef BIO_HASH_PROFILING + if (nr > htable->st.max_bucket_size) + htable->st.max_bucket_size = nr; + if (nr <= MAX_PROFILE_BUCKETS) + htable->st.bucket_size[nr]++; +#endif + bio_get(bio); + return bio; + } + } + nr++; + } + + return NULL; +} + +inline struct bio *bio_hash_find(kdev_t dev, sector_t sector, unsigned int vc) +{ + struct bio_hash_bucket *hb = bio_hash_bucket(dev, sector); + unsigned long flags; + struct bio *bio; + + read_lock_irqsave(&hb->lock, flags); + bio = __bio_hash_find(dev, sector, &hb->hash, vc); + read_unlock_irqrestore(&hb->lock, flags); + + return bio; +} + +inline int __bio_hash_add_unique(struct bio *bio, bio_hash_t **hash, + unsigned int vc) +{ + struct bio *alias = __bio_hash_find(bio->bi_dev, bio->bi_sector, hash, vc); + + if (!alias) { + __bio_hash_add(bio, hash, vc); + return 0; + } + + /* + * release reference to alias + */ + bio_put(alias); + return 1; +} + +inline int bio_hash_add_unique(struct bio *bio, unsigned int vc) +{ + struct bio_hash_bucket *hb =bio_hash_bucket(bio->bi_dev,bio->bi_sector); + unsigned long flags; + int ret; + + write_lock_irqsave(&hb->lock, flags); + ret = __bio_hash_add_unique(bio, &hb->hash, vc); + write_unlock_irqrestore(&hb->lock, flags); + + return ret; +} + +/* + * increment validity counter on barrier inserts. if it wraps, we must + * prune all existing entries for this device to be completely safe + * + * q->queue_lock must be held by caller + */ +void bio_hash_invalidate(request_queue_t *q, kdev_t dev) +{ + bio_hash_t *hash; + struct bio *bio; + int i; + + if (++q->hash_valid_counter) + return; + + /* + * it wrapped... + */ + for (i = 0; i < (1 << bio_hash_bits); i++) { + struct bio_hash_bucket *hb = &bio_hash_table[i]; + unsigned long flags; + + write_lock_irqsave(&hb->lock, flags); + while ((hash = hb->hash) != NULL) { + bio = bio_hash_entry(hash); + if (bio->bi_dev != dev) + __bio_hash_remove(bio); + } + write_unlock_irqrestore(&hb->lock, flags); + } + + /* + * entries pruned, reset validity counter + */ + q->hash_valid_counter = 1; +} + + +/* + * if need be, add bio_pool_get_irq() to match... + */ +static inline struct bio *__bio_pool_get(void) +{ + struct bio *bio; + + if ((bio = bio_pool)) { + bio_pool = bio->bi_next; + bio->bi_next = NULL; + bio_pool_free--; + } + + return bio; +} + +static inline struct bio *bio_pool_get(void) +{ + unsigned long flags; + struct bio *bio; + + spin_lock_irqsave(&bio_lock, flags); + bio = __bio_pool_get(); + spin_unlock_irqrestore(&bio_lock, flags); + + return bio; +} + +static inline void bio_pool_put(struct bio *bio) +{ + unsigned long flags; + + spin_lock_irqsave(&bio_lock, flags); + + /* + * if the pool has enough free entries, just slab free the bio + */ + if (bio_pool_free >= BIO_POOL_SIZE) { + spin_unlock_irqrestore(&bio_lock, flags); + kmem_cache_free(bio_cachep, bio); + } else { + bio->bi_next = bio_pool; + bio_pool = bio; + bio_pool_free++; + spin_unlock_irqrestore(&bio_lock, flags); + } + + /* + * use wake-one + */ + if (waitqueue_active(&bio_pool_wait)) + wake_up_nr(&bio_pool_wait, 1); +} + +/** + * bio_alloc - allocate a bio for I/O + * @gfp_mask: the GFP_ mask given to the slab allocator + * + * Description: + * bio_alloc will first try it's on internal pool to satisfy the allocation + * and if that fails fall back to the bio slab cache. In the latter case, + * the @gfp_mask specifies the priority of the allocation. In particular, + * if %__GFP_WAIT is set then we will block on the internal pool waiting + * for a &struct bio to become free. + **/ +struct bio *bio_alloc(int gfp_mask) +{ + DECLARE_WAITQUEUE(wait, current); + struct bio *bio; + + /* + * first try our reserved pool + */ + if ((bio = bio_pool_get())) + goto gotit; + + /* + * no such luck, try slab alloc + */ + if ((bio = kmem_cache_alloc(bio_cachep, gfp_mask))) + goto gotit; + + /* + * hrmpf, not much luck. if __GFP_WAIT is set, wait on + * bio_pool. + */ + if ((gfp_mask & (__GFP_WAIT | __GFP_IO)) == (__GFP_WAIT | __GFP_IO)) { + add_wait_queue_exclusive(&bio_pool_wait, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if ((bio = bio_pool_get())) + break; + + run_task_queue(&tq_disk); + schedule(); + } + remove_wait_queue(&bio_pool_wait, &wait); + __set_current_state(TASK_RUNNING); + } + + if (bio) { +gotit: + /* + * FIXME: optimize this + */ + memset(bio, 0, sizeof(*bio)); + atomic_set(&bio->bi_cnt, 1); + } + + return bio; +} + +/* + * queue lock assumed held! + */ +static inline void bio_free(struct bio *bio) +{ + BUG_ON(test_bit(BIO_HASHED, &bio->bi_flags)); + + bio_pool_put(bio); +} + +/** + * bio_put - release a reference to a bio + * @bio: bio to release reference to + * + * Description: + * Put a reference to a &struct bio, either one you have gotten with + * bio_alloc or bio_get. The last put of a bio will free it. + **/ +void bio_put(struct bio *bio) +{ + BUG_ON(!atomic_read(&bio->bi_cnt)); + + /* + * last put frees it + */ + if (atomic_dec_and_test(&bio->bi_cnt)) { + BUG_ON(bio->bi_next); + + bio_free(bio); + } +} + +#ifdef BIO_PAGEIO +static int bio_end_io_page(struct bio *bio) +{ + struct page *page = bio_page(bio); + + if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + SetPageError(page); + if (!PageError(page)) + SetPageUptodate(page); + + /* + * Run the hooks that have to be done when a page I/O has completed. + */ + if (PageTestandClearDecrAfter(page)) + atomic_dec(&nr_async_pages); + + UnlockPage(page); + bio_put(bio); + return 1; +} +#endif + +static void bio_end_io_kio(struct bio *bio) +{ + struct kiobuf *kio = (struct kiobuf *) bio->bi_private; + + end_kio_request(kio, test_bit(BIO_UPTODATE, &bio->bi_flags)); + bio_put(bio); +} + +/** + * ll_rw_kio - submit a &struct kiobuf for I/O + * @rw: %READ or %WRITE + * @kio: the kiobuf to do I/O on + * @dev: target device + * @sector: start location on disk + * + * Description: + * ll_rw_kio will map the page list inside the &struct kiobuf to + * &struct bio and queue them for I/O. The kiobuf given must describe + * a continous range of data, and must be fully prepared for I/O. + **/ +void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, sector_t sector) +{ + struct bio *bio; + int i, offset, len, size; + + if ((rw & WRITE) && is_read_only(dev)) { + printk("ll_rw_bio: WRITE to ro device %s\n", kdevname(dev)); + kio->errno = -EPERM; + return; + } + + offset = kio->offset & ~PAGE_MASK; + + len = kio->length; + for (i = 0; i < kio->nr_pages; i++) { + bio = bio_alloc(GFP_NOIO); + + bio->bi_dev = dev; + bio->bi_sector = sector; + + size = PAGE_SIZE - offset; + if (size > len) + size = len; + + bio->bi_io_vec.bv_page = kio->maplist[i]; + bio->bi_io_vec.bv_len = size; + bio->bi_io_vec.bv_offset = offset; + + bio->bi_end_io = bio_end_io_kio; + bio->bi_private = kio; + + /* + * kiobuf only has an offset into the first page + */ + offset = 0; + len -= size; + sector += (size >> 9); + + atomic_inc(&kio->io_count); + submit_bio(rw, bio); + } +} + +static void bio_init(void *foo, kmem_cache_t *cachep, unsigned long flg) +{ + if ((flg & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { + struct bio *bio = foo; + + bio->bi_next = NULL; + bio->bi_flags = 0; + bio->bi_end_io = NULL; + } +} + +static int __init bio_init_pool(void) +{ + struct bio *bio; + int i; + + for (i = 0; i < BIO_POOL_SIZE; i++) { + bio = kmem_cache_alloc(bio_cachep, GFP_ATOMIC); + if (!bio) + panic("bio: cannot init pool\n"); + + bio_pool_put(bio); + } + + return i; +} + +static int __init init_bio(void) +{ + int nr; + + bio_cachep = kmem_cache_create("bio", sizeof(struct bio), 0, + SLAB_HWCACHE_ALIGN, bio_init, NULL); + if (!bio_cachep) + panic("bio: can't create bio_cachep slab cache\n"); + + nr = bio_init_pool(); + printk("BIO: pool of %d setup, %uKb (%d bytes/bio)\n", nr, nr * sizeof(struct bio) >> 10, sizeof(struct bio)); + + return 0; +} + +module_init(init_bio); + +EXPORT_SYMBOL(bio_alloc); +EXPORT_SYMBOL(bio_put); +EXPORT_SYMBOL(ll_rw_kio); +EXPORT_SYMBOL(bio_hash_remove); +EXPORT_SYMBOL(bio_hash_add); +EXPORT_SYMBOL(bio_hash_add_unique); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/fs/block_dev.c linux/fs/block_dev.c --- /opt/kernel/linux-2.4.14-pre6/fs/block_dev.c Wed Oct 31 09:39:13 2001 +++ linux/fs/block_dev.c Tue Oct 30 09:48:43 2001 @@ -102,7 +102,7 @@ return 0; } -static int blkdev_get_block(struct inode * inode, long iblock, struct buffer_head * bh, int create) +static int blkdev_get_block(struct inode * inode, sector_t iblock, struct buffer_head * bh, int create) { if (iblock >= max_block(inode->i_rdev)) return -EIO; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/fs/buffer.c linux/fs/buffer.c --- /opt/kernel/linux-2.4.14-pre6/fs/buffer.c Wed Oct 31 09:39:13 2001 +++ linux/fs/buffer.c Tue Oct 30 09:48:43 2001 @@ -538,7 +538,7 @@ __remove_from_lru_list(bh); } -struct buffer_head * get_hash_table(kdev_t dev, int block, int size) +struct buffer_head * get_hash_table(kdev_t dev, sector_t block, int size) { struct buffer_head *bh, **p = &hash(dev, block); @@ -1000,7 +1000,7 @@ * 14.02.92: changed it to sync dirty buffers a bit: better performance * when the filesystem starts to get full of dirty blocks (I hope). */ -struct buffer_head * getblk(kdev_t dev, int block, int size) +struct buffer_head * getblk(kdev_t dev, sector_t block, int size) { for (;;) { struct buffer_head * bh; @@ -1931,7 +1931,8 @@ goto done; } -int generic_block_bmap(struct address_space *mapping, long block, get_block_t *get_block) +sector_t generic_block_bmap(struct address_space *mapping, sector_t block, + get_block_t *get_block) { struct buffer_head tmp; struct inode *inode = mapping->host; @@ -1942,57 +1943,6 @@ } /* - * IO completion routine for a buffer_head being used for kiobuf IO: we - * can't dispatch the kiobuf callback until io_count reaches 0. - */ - -static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate) -{ - struct kiobuf *kiobuf; - - mark_buffer_uptodate(bh, uptodate); - - kiobuf = bh->b_private; - unlock_buffer(bh); - end_kio_request(kiobuf, uptodate); -} - -/* - * For brw_kiovec: submit a set of buffer_head temporary IOs and wait - * for them to complete. Clean up the buffer_heads afterwards. - */ - -static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size) -{ - int iosize, err; - int i; - struct buffer_head *tmp; - - iosize = 0; - err = 0; - - for (i = nr; --i >= 0; ) { - iosize += size; - tmp = bh[i]; - if (buffer_locked(tmp)) { - wait_on_buffer(tmp); - } - - if (!buffer_uptodate(tmp)) { - /* We are traversing bh'es in reverse order so - clearing iosize on error calculates the - amount of IO before the first error. */ - iosize = 0; - err = -EIO; - } - } - - if (iosize) - return iosize; - return err; -} - -/* * Start I/O on a physical range of kernel memory, defined by a vector * of kiobuf structs (much like a user-space iovec list). * @@ -2004,21 +1954,12 @@ * passed in to completely map the iobufs to disk. */ -int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - kdev_t dev, unsigned long b[], int size) +int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], kdev_t dev, sector_t b[], + int size) { - int err; - int length; int transferred; int i; - int bufind; - int pageind; - int bhind; - int offset; - unsigned long blocknr; - struct kiobuf * iobuf = NULL; - struct page * map; - struct buffer_head *tmp, **bhs = NULL; + struct kiobuf * iobuf; if (!nr) return 0; @@ -2028,8 +1969,7 @@ */ for (i = 0; i < nr; i++) { iobuf = iovec[i]; - if ((iobuf->offset & (size-1)) || - (iobuf->length & (size-1))) + if ((iobuf->offset & (size-1)) || (iobuf->length & (size-1))) return -EINVAL; if (!iobuf->nr_pages) panic("brw_kiovec: iobuf not initialised"); @@ -2038,94 +1978,24 @@ /* * OK to walk down the iovec doing page IO on each page we find. */ - bufind = bhind = transferred = err = 0; for (i = 0; i < nr; i++) { iobuf = iovec[i]; - offset = iobuf->offset; - length = iobuf->length; iobuf->errno = 0; - if (!bhs) - bhs = iobuf->bh; - for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { - map = iobuf->maplist[pageind]; - if (!map) { - err = -EFAULT; - goto finished; - } - - while (length > 0) { - blocknr = b[bufind++]; - if (blocknr == -1UL) { - if (rw == READ) { - /* there was an hole in the filesystem */ - memset(kmap(map) + offset, 0, size); - flush_dcache_page(map); - kunmap(map); - - transferred += size; - goto skip_block; - } else - BUG(); - } - tmp = bhs[bhind++]; + ll_rw_kio(rw, iobuf, dev, b[i] * (size >> 9)); + } - tmp->b_size = size; - set_bh_page(tmp, map, offset); - tmp->b_this_page = tmp; - - init_buffer(tmp, end_buffer_io_kiobuf, iobuf); - tmp->b_dev = dev; - tmp->b_blocknr = blocknr; - tmp->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req); - - if (rw == WRITE) { - set_bit(BH_Uptodate, &tmp->b_state); - clear_bit(BH_Dirty, &tmp->b_state); - } else - set_bit(BH_Uptodate, &tmp->b_state); - - atomic_inc(&iobuf->io_count); - submit_bh(rw, tmp); - /* - * Wait for IO if we have got too much - */ - if (bhind >= KIO_MAX_SECTORS) { - kiobuf_wait_for_io(iobuf); /* wake-one */ - err = wait_kio(rw, bhind, bhs, size); - if (err >= 0) - transferred += err; - else - goto finished; - bhind = 0; - } + /* + * now they are all submitted, wait for completion + */ + transferred = 0; + for (i = 0; i < nr; i++) { + iobuf = iovec[i]; + kiobuf_wait_for_io(iobuf); + transferred += iobuf->length; + } - skip_block: - length -= size; - offset += size; - - if (offset >= PAGE_SIZE) { - offset = 0; - break; - } - } /* End of block loop */ - } /* End of page loop */ - } /* End of iovec loop */ - - /* Is there any IO still left to submit? */ - if (bhind) { - kiobuf_wait_for_io(iobuf); /* wake-one */ - err = wait_kio(rw, bhind, bhs, size); - if (err >= 0) - transferred += err; - else - goto finished; - } - - finished: - if (transferred) - return transferred; - return err; + return transferred; } /* @@ -2140,7 +2010,7 @@ * FIXME: we need a swapper_inode->get_block function to remove * some of the bmap kludges and interface ugliness here. */ -int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size) +int brw_page(int rw, struct page *page, kdev_t dev, sector_t b[], int size) { struct buffer_head *head, *bh; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/fs/ext2/inode.c linux/fs/ext2/inode.c --- /opt/kernel/linux-2.4.14-pre6/fs/ext2/inode.c Wed Oct 24 12:49:58 2001 +++ linux/fs/ext2/inode.c Mon Oct 15 10:41:43 2001 @@ -505,7 +505,7 @@ * reachable from inode. */ -static int ext2_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create) +static int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int err = -EIO; int offsets[4]; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/fs/iobuf.c linux/fs/iobuf.c --- /opt/kernel/linux-2.4.14-pre6/fs/iobuf.c Fri Apr 27 23:23:25 2001 +++ linux/fs/iobuf.c Mon Oct 15 10:41:43 2001 @@ -8,7 +8,6 @@ #include #include -#include void end_kio_request(struct kiobuf *kiobuf, int uptodate) { @@ -26,52 +25,23 @@ { memset(iobuf, 0, sizeof(*iobuf)); init_waitqueue_head(&iobuf->wait_queue); + atomic_set(&iobuf->io_count, 0); iobuf->array_len = KIO_STATIC_PAGES; iobuf->maplist = iobuf->map_array; } -int alloc_kiobuf_bhs(struct kiobuf * kiobuf) -{ - int i; - - for (i = 0; i < KIO_MAX_SECTORS; i++) - if (!(kiobuf->bh[i] = kmem_cache_alloc(bh_cachep, SLAB_KERNEL))) { - while (i--) { - kmem_cache_free(bh_cachep, kiobuf->bh[i]); - kiobuf->bh[i] = NULL; - } - return -ENOMEM; - } - return 0; -} - -void free_kiobuf_bhs(struct kiobuf * kiobuf) -{ - int i; - - for (i = 0; i < KIO_MAX_SECTORS; i++) { - kmem_cache_free(bh_cachep, kiobuf->bh[i]); - kiobuf->bh[i] = NULL; - } -} - int alloc_kiovec(int nr, struct kiobuf **bufp) { int i; struct kiobuf *iobuf; for (i = 0; i < nr; i++) { - iobuf = vmalloc(sizeof(struct kiobuf)); + iobuf = kmalloc(sizeof(struct kiobuf), GFP_KERNEL); if (!iobuf) { free_kiovec(i, bufp); return -ENOMEM; } kiobuf_init(iobuf); - if (alloc_kiobuf_bhs(iobuf)) { - vfree(iobuf); - free_kiovec(i, bufp); - return -ENOMEM; - } bufp[i] = iobuf; } @@ -89,8 +59,7 @@ unlock_kiovec(1, &iobuf); if (iobuf->array_len > KIO_STATIC_PAGES) kfree (iobuf->maplist); - free_kiobuf_bhs(iobuf); - vfree(bufp[i]); + kfree(bufp[i]); } } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/fs/isofs/inode.c linux/fs/isofs/inode.c --- /opt/kernel/linux-2.4.14-pre6/fs/isofs/inode.c Wed Oct 31 09:39:13 2001 +++ linux/fs/isofs/inode.c Wed Oct 31 11:03:49 2001 @@ -888,7 +888,7 @@ * or getblk() if they are not. Returns the number of blocks inserted * (0 == error.) */ -int isofs_get_blocks(struct inode *inode, long iblock, +int isofs_get_blocks(struct inode *inode, sector_t iblock, struct buffer_head **bh_result, unsigned long nblocks) { unsigned long b_off; @@ -976,7 +976,7 @@ /* * Used by the standard interfaces. */ -static int isofs_get_block(struct inode *inode, long iblock, +static int isofs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { if ( create ) { diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/fs/partitions/check.c linux/fs/partitions/check.c --- /opt/kernel/linux-2.4.14-pre6/fs/partitions/check.c Wed Oct 24 12:49:58 2001 +++ linux/fs/partitions/check.c Mon Oct 15 10:41:43 2001 @@ -1,4 +1,6 @@ /* + * fs/partitions/check.c + * * Code extracted from drivers/block/genhd.c * Copyright (C) 1991-1998 Linus Torvalds * Re-organised Feb 1998 Russell King @@ -34,8 +36,6 @@ #include "ibm.h" #include "ultrix.h" -extern int *blk_size[]; - int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ static int (*check_part[])(struct gendisk *hd, struct block_device *bdev, unsigned long first_sect, int first_minor) = { @@ -369,38 +369,50 @@ { if (!gdev) return; - grok_partitions(gdev, MINOR(dev)>>gdev->minor_shift, minors, size); + grok_partitions(dev, size); } -void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size) +void grok_partitions(kdev_t dev, long size) { - int i; - int first_minor = drive << dev->minor_shift; - int end_minor = first_minor + dev->max_p; + int i, minors, first_minor, end_minor; + struct gendisk *g = get_gendisk(dev); + + if (!g) + return; + + minors = 1 << g->minor_shift; + first_minor = MINOR(dev); + if (first_minor & (minors-1)) { + printk("grok_partitions: bad device 0x%02x:%02x\n", + MAJOR(dev), first_minor); + first_minor &= ~(minors-1); + } + end_minor = first_minor + minors; + + if (!g->sizes) + blk_size[g->major] = NULL; - if(!dev->sizes) - blk_size[dev->major] = NULL; + g->part[first_minor].nr_sects = size; - dev->part[first_minor].nr_sects = size; /* No such device or no minors to use for partitions */ if (!size || minors == 1) return; - if (dev->sizes) { - dev->sizes[first_minor] = size >> (BLOCK_SIZE_BITS - 9); + if (g->sizes) { + g->sizes[first_minor] = size >> (BLOCK_SIZE_BITS - 9); for (i = first_minor + 1; i < end_minor; i++) - dev->sizes[i] = 0; + g->sizes[i] = 0; } - blk_size[dev->major] = dev->sizes; - check_partition(dev, MKDEV(dev->major, first_minor), 1 + first_minor); + blk_size[g->major] = g->sizes; + check_partition(g, MKDEV(g->major, first_minor), 1 + first_minor); /* * We need to set the sizes array before we will be able to access * any of the partitions on this device. */ - if (dev->sizes != NULL) { /* optional safeguard in ll_rw_blk.c */ + if (g->sizes != NULL) { /* optional safeguard in ll_rw_blk.c */ for (i = first_minor; i < end_minor; i++) - dev->sizes[i] = dev->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9); + g->sizes[i] = g->part[i].nr_sects >> (BLOCK_SIZE_BITS - 9); } } @@ -425,4 +437,44 @@ } p->v = NULL; return NULL; +} + +int wipe_partitions(kdev_t dev) +{ + struct gendisk *g; + kdev_t devp; + int p, major, minor, minor0, max_p, res; + + g = get_gendisk(dev); + if (g == NULL) + return -EINVAL; + + max_p = 1 << g->minor_shift; + major = MAJOR(dev); + minor = MINOR(dev); + minor0 = minor & ~(max_p - 1); + if (minor0 != minor) /* for now only whole-disk reread */ + return -EINVAL; /* %%% later.. */ + + /* invalidate stuff */ + for (p = max_p - 1; p >= 0; p--) { + minor = minor0 + p; + devp = MKDEV(major,minor); +#if 0 /* %%% superfluous? */ + if (g->part[minor].nr_sects == 0) + continue; +#endif + res = invalidate_device(devp, 1); + if (res) + return res; + g->part[minor].start_sect = 0; + g->part[minor].nr_sects = 0; + } + + /* some places do blksize_size[major][minor] = 1024, + as preparation for reading partition table - superfluous */ + /* sd.c used to set blksize_size to 2048 in case + rscsi_disks[target].device->sector_size == 2048 */ + + return 0; } diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/fs/partitions/check.h linux/fs/partitions/check.h --- /opt/kernel/linux-2.4.14-pre6/fs/partitions/check.h Tue Oct 2 05:03:26 2001 +++ linux/fs/partitions/check.h Mon Oct 15 10:41:43 2001 @@ -1,5 +1,5 @@ /* - * add_partition adds a partitions details to the devices partition + * add_gd_partition adds a partitions details to the devices partition * description. */ void add_gd_partition(struct gendisk *hd, int minor, int start, int size); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/fs/udf/inode.c linux/fs/udf/inode.c --- /opt/kernel/linux-2.4.14-pre6/fs/udf/inode.c Wed Oct 24 12:49:58 2001 +++ linux/fs/udf/inode.c Mon Oct 15 10:41:43 2001 @@ -61,7 +61,7 @@ static void udf_update_extents(struct inode *, long_ad [EXTENT_MERGE_SIZE], int, int, lb_addr, Uint32, struct buffer_head **); -static int udf_get_block(struct inode *, long, struct buffer_head *, int); +static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); /* * udf_put_inode @@ -314,7 +314,7 @@ return dbh; } -static int udf_get_block(struct inode *inode, long block, struct buffer_head *bh_result, int create) +static int udf_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create) { int err, new; struct buffer_head *bh; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/asm-i386/checksum.h linux/include/asm-i386/checksum.h --- /opt/kernel/linux-2.4.14-pre6/include/asm-i386/checksum.h Thu Jul 26 22:41:22 2001 +++ linux/include/asm-i386/checksum.h Mon Oct 15 10:41:43 2001 @@ -69,25 +69,24 @@ unsigned int ihl) { unsigned int sum; - __asm__ __volatile__(" - movl (%1), %0 - subl $4, %2 - jbe 2f - addl 4(%1), %0 - adcl 8(%1), %0 - adcl 12(%1), %0 -1: adcl 16(%1), %0 - lea 4(%1), %1 - decl %2 - jne 1b - adcl $0, %0 - movl %0, %2 - shrl $16, %0 - addw %w2, %w0 - adcl $0, %0 - notl %0 -2: - " + __asm__ __volatile__( + "movl (%1), %0 ;\n" + "subl $4, %2 ;\n" + "jbe 2f ;\n" + "addl 4(%1), %0 ;\n" + "adcl 8(%1), %0 ;\n" + "adcl 12(%1), %0 ;\n" +"1: adcl 16(%1), %0 ;\n" + "lea 4(%1), %1 ;\n" + "decl %2 ;\n" + "jne 1b ;\n" + "adcl $0, %0 ;\n" + "movl %0, %2 ;\n" + "shrl $16, %0 ;\n" + "addw %w2, %w0 ;\n" + "adcl $0, %0 ;\n" + "notl %0 ;\n" +"2: ;\n" /* Since the input registers which are loaded with iph and ipl are modified, we must also specify them as outputs, or gcc will assume they contain their original values. */ @@ -102,10 +101,9 @@ static inline unsigned int csum_fold(unsigned int sum) { - __asm__(" - addl %1, %0 - adcl $0xffff, %0 - " + __asm__( + "addl %1, %0 ;\n" + "adcl $0xffff, %0 ;\n" : "=r" (sum) : "r" (sum << 16), "0" (sum & 0xffff0000) ); @@ -118,12 +116,11 @@ unsigned short proto, unsigned int sum) { - __asm__(" - addl %1, %0 - adcl %2, %0 - adcl %3, %0 - adcl $0, %0 - " + __asm__( + "addl %1, %0 ;\n" + "adcl %2, %0 ;\n" + "adcl %3, %0 ;\n" + "adcl $0, %0 ;\n" : "=r" (sum) : "g" (daddr), "g"(saddr), "g"((ntohs(len)<<16)+proto*256), "0"(sum)); return sum; @@ -158,19 +155,18 @@ unsigned short proto, unsigned int sum) { - __asm__(" - addl 0(%1), %0 - adcl 4(%1), %0 - adcl 8(%1), %0 - adcl 12(%1), %0 - adcl 0(%2), %0 - adcl 4(%2), %0 - adcl 8(%2), %0 - adcl 12(%2), %0 - adcl %3, %0 - adcl %4, %0 - adcl $0, %0 - " + __asm__( + "addl 0(%1), %0" + "adcl 4(%1), %0" + "adcl 8(%1), %0" + "adcl 12(%1), %0" + "adcl 0(%2), %0" + "adcl 4(%2), %0" + "adcl 8(%2), %0" + "adcl 12(%2), %0" + "adcl %3, %0" + "adcl %4, %0" + "adcl $0, %0" : "=&r" (sum) : "r" (saddr), "r" (daddr), "r"(htonl(len)), "r"(htonl(proto)), "0"(sum)); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/asm-i386/floppy.h linux/include/asm-i386/floppy.h --- /opt/kernel/linux-2.4.14-pre6/include/asm-i386/floppy.h Wed Oct 31 09:39:13 2001 +++ linux/include/asm-i386/floppy.h Thu Nov 1 11:20:13 2001 @@ -75,28 +75,28 @@ #ifndef NO_FLOPPY_ASSEMBLER __asm__ ( - "testl %1,%1 - je 3f -1: inb %w4,%b0 - andb $160,%b0 - cmpb $160,%b0 - jne 2f - incw %w4 - testl %3,%3 - jne 4f - inb %w4,%b0 - movb %0,(%2) - jmp 5f -4: movb (%2),%0 - outb %b0,%w4 -5: decw %w4 - outb %0,$0x80 - decl %1 - incl %2 - testl %1,%1 - jne 1b -3: inb %w4,%b0 -2: " + "testl %1,%1" + "je 3f" +"1: inb %w4,%b0" + "andb $160,%b0" + "cmpb $160,%b0" + "jne 2f" + "incw %w4" + "testl %3,%3" + "jne 4f" + "inb %w4,%b0" + "movb %0,(%2)" + "jmp 5f" +"4: movb (%2),%0" + "outb %b0,%w4" +"5: decw %w4" + "outb %0,$0x80" + "decl %1" + "incl %2" + "testl %1,%1" + "jne 1b" +"3: inb %w4,%b0" +"2: " : "=a" ((char) st), "=c" ((long) virtual_dma_count), "=S" ((long) virtual_dma_addr) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/asm-i386/kmap_types.h linux/include/asm-i386/kmap_types.h --- /opt/kernel/linux-2.4.14-pre6/include/asm-i386/kmap_types.h Mon Sep 17 22:16:30 2001 +++ linux/include/asm-i386/kmap_types.h Mon Oct 15 10:41:43 2001 @@ -7,6 +7,7 @@ KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, + KM_BIO_IRQ, KM_TYPE_NR }; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/asm-i386/page.h linux/include/asm-i386/page.h --- /opt/kernel/linux-2.4.14-pre6/include/asm-i386/page.h Wed Oct 24 12:49:58 2001 +++ linux/include/asm-i386/page.h Thu Nov 1 11:20:04 2001 @@ -101,6 +101,12 @@ BUG(); \ } while (0) +#define BUG_ON(condition) \ + do { \ + if (unlikely((int)(condition))) \ + BUG(); \ + } while (0) + /* Pure 2^n version of get_order */ static __inline__ int get_order(unsigned long size) { diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/asm-m68k/machdep.h linux/include/asm-m68k/machdep.h --- /opt/kernel/linux-2.4.14-pre6/include/asm-m68k/machdep.h Tue Nov 28 02:57:34 2000 +++ linux/include/asm-m68k/machdep.h Mon Oct 15 10:41:43 2001 @@ -5,7 +5,6 @@ struct kbd_repeat; struct mktime; struct hwclk_time; -struct gendisk; struct buffer_head; extern void (*mach_sched_init) (void (*handler)(int, void *, struct pt_regs *)); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/asm-ppc/kmap_types.h linux/include/asm-ppc/kmap_types.h --- /opt/kernel/linux-2.4.14-pre6/include/asm-ppc/kmap_types.h Mon Sep 17 22:16:30 2001 +++ linux/include/asm-ppc/kmap_types.h Thu Nov 1 14:33:06 2001 @@ -11,6 +11,7 @@ KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, + KM_BIO_IRQ, KM_TYPE_NR }; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/asm-sparc/kmap_types.h linux/include/asm-sparc/kmap_types.h --- /opt/kernel/linux-2.4.14-pre6/include/asm-sparc/kmap_types.h Mon Sep 17 22:16:30 2001 +++ linux/include/asm-sparc/kmap_types.h Thu Nov 1 14:33:25 2001 @@ -7,6 +7,7 @@ KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, + KM_BIO_IRQ, KM_TYPE_NR }; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/bio.h linux/include/linux/bio.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/bio.h Thu Jan 1 01:00:00 1970 +++ linux/include/linux/bio.h Wed Oct 31 14:49:07 2001 @@ -0,0 +1,196 @@ +/* + * New 2.5 block I/O model + * + * Copyright (C) 2001 Jens Axboe + * + * This program is free software; you can redistribute it and/or mo + * it under the terms of the GNU General Public License as publishe + * the Free Software Foundation; either version 2 of the License, o + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ +#ifndef __LINUX_BIO_H +#define __LINUX_BIO_H + +struct bio_vec { + struct page *bv_page; + unsigned short bv_len; + unsigned short bv_offset; +}; + +typedef struct bio_hash_s { + struct bio_hash_s *next_hash; + struct bio_hash_s **pprev_hash; + unsigned int valid_counter; +} bio_hash_t; + +struct bio_hash_bucket { + rwlock_t lock; + bio_hash_t *hash; +} __attribute__((__aligned__(16))); + +#define BIO_HASH_BITS (bio_hash_bits) +#define BIO_HASH_SIZE (1UL << BIO_HASH_BITS) + +#define BLKHASHPROF _IOR(0x12,108,sizeof(struct bio_hash_stats)) +#define BLKHASHCLEAR _IO(0x12,109) + +#define MAX_PROFILE_BUCKETS 64 + +struct bio_hash_stats { + unsigned long nr_lookups; + unsigned long nr_hits; + unsigned long nr_inserts; + unsigned long max_bucket_size; + unsigned long bucket_size[MAX_PROFILE_BUCKETS + 1]; + + unsigned long q_nr_back_lookups; + unsigned long q_nr_back_hits; + unsigned long q_nr_back_merges; + unsigned long q_nr_front_lookups; + unsigned long q_nr_front_hits; + unsigned long q_nr_front_merges; +}; + +/* + * hash table must be a power of two + */ +typedef struct bio_hash_table_s { + bio_hash_t **table; + unsigned short shift; + unsigned int valid_counter; +#ifdef BIO_HASH_PROFILING + struct bio_hash_stats st; +#endif +} bio_hash_table_t; + +/* + * shamelessly stolen from the list.h implementation + */ +#define hash_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) +#define bio_hash_entry(ptr) \ + hash_entry((ptr), struct bio, bi_hash) + +/* + * main unit of I/O for the block layer and lower layers (ie drivers) + */ +struct bio { + sector_t bi_sector; + struct bio *bi_next; /* request queue link */ + bio_hash_t bi_hash; + atomic_t bi_cnt; /* free when it hits zero */ + kdev_t bi_dev; + struct bio_vec bi_io_vec; + unsigned long bi_flags; /* status, command, etc */ + unsigned long bi_rw; /* bottom bits READ/WRITE, + * top bits priority + */ + void (*bi_end_io)(struct bio *bio); + void *bi_private; + struct request *bi_req; /* linked to this request */ +}; + +#define BIO_SECTOR_BITS 9 +#define BIO_OFFSET_MASK ((1UL << (PAGE_CACHE_SHIFT - BIO_SECTOR_BITS)) - 1) +#define BIO_PAGE_MASK (PAGE_CACHE_SIZE - 1) + +/* + * bio flags, do NOT change the order of these (rw mask is important) + */ +#define BIO_UPTODATE 0 /* ok after I/O completion */ +#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ +#define BIO_EOF 2 /* out-out-bounds error */ +#define BIO_HASHED 3 /* on hash */ + +/* + * bio bi_rw flags + * + * bit 0 -- read (not set) or write (set) + * bit 1 -- rw-ahead when set + * bit 2 -- barrier + */ +#define BIO_BARRIER 2 + +/* + * various member access, note that bio_data should of course not be used + * on highmem page vectors + */ +#define bio_iovec(bio) (&(bio)->bi_io_vec) +#define bio_page(bio) bio_iovec((bio))->bv_page +#define bio_size(bio) bio_iovec((bio))->bv_len +#define bio_offset(bio) bio_iovec((bio))->bv_offset +#define bio_sectors(bio) (bio_size((bio)) >> BIO_SECTOR_BITS) +#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) +#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + bio_offset((bio))) +#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_BARRIER)) + +/* + * queues that have highmem support enabled may still need to revert to + * PIO transfers occasionally and thus map high pages temporarily. For + * permanent PIO fall back, user is probably better off disabling highmem + * I/O completely on that queue (see ide-dma for example) + */ +#define bio_kmap(bio) (kmap(bio_page((bio))) + bio_offset((bio))) +#define bio_kunmap(bio) kunmap(bio_page((bio))) + +#define BIO_CONTIG(bio, nxt) \ + (bio_to_phys((bio)) + bio_size((bio)) == bio_to_phys((nxt))) +#define BIO_PHYS_4G(b1, b2) \ + ((bio_to_phys((b1)) | 0xffffffff) == ((bio_to_phys((b2)) + bio_size((b2)) - 1) | 0xffffffff)) + +typedef void (bi_end_io_t) (struct bio *); + +#define bio_endio(bio, ok) \ + do { \ + if (ok) \ + set_bit(BIO_UPTODATE, &(bio)->bi_flags); \ + else \ + clear_bit(BIO_UPTODATE, &(bio)->bi_flags); \ + \ + (bio)->bi_end_io((bio)); \ + } while (0) + +#define bio_io_error(bio) bio_endio((bio), 0) + +/* + * get a reference to a bio, so it won't disappear. the intended use is + * something like: + * + * bio_get(bio); + * submit_bio(rw, bio); + * if (bio->bi_flags ...) + * do_something + * bio_put(bio); + * + * without the bio_get(), it could potentially complete I/O before submit_bio + * returns. and then bio would be freed memory when if (bio->bi_flags ...) + * runs + */ +#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) + +extern struct bio *bio_alloc(int); +extern void bio_put(struct bio *); + +/* + * the hash stuff is pretty closely tied to the request queue (needed for + * locking etc anyway, and it's in no way an attempt at a generic hash) + */ +struct request_queue; + +extern inline void bio_hash_remove(struct bio *); +extern inline void bio_hash_add(struct bio *, unsigned int); +extern inline struct bio *bio_hash_find(kdev_t, sector_t, unsigned int); +extern inline int bio_hash_add_unique(struct bio *, unsigned int); +extern void bio_hash_invalidate(struct request_queue *, kdev_t); + +#endif /* __LINUX_BIO_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/blk.h linux/include/linux/blk.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/blk.h Thu Oct 11 08:47:07 2001 +++ linux/include/linux/blk.h Thu Nov 1 11:23:46 2001 @@ -7,13 +7,6 @@ #include /* - * Spinlock for protecting the request queue which - * is mucked around with in interrupts on potentially - * multiple CPU's.. - */ -extern spinlock_t io_request_lock; - -/* * Initialization functions. */ extern int isp16_init(void); @@ -85,13 +78,13 @@ * code duplication in drivers. */ -static inline void blkdev_dequeue_request(struct request * req) +static inline void blkdev_dequeue_request(struct request *req) { - list_del(&req->queue); + list_del(&req->queuelist); } -int end_that_request_first(struct request *req, int uptodate, char *name); -void end_that_request_last(struct request *req); +int end_that_request_first(struct request *, int uptodate); +void end_that_request_last(struct request *); #if defined(MAJOR_NR) || defined(IDE_DRIVER) @@ -336,12 +329,16 @@ #if !defined(IDE_DRIVER) #ifndef CURRENT -#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head) +#define CURRENT elv_next_request(&blk_dev[MAJOR_NR].request_queue) +#endif +#ifndef QUEUE +#define QUEUE (&blk_dev[MAJOR_NR].request_queue) #endif #ifndef QUEUE_EMPTY -#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head) +#define QUEUE_EMPTY blk_queue_empty(QUEUE) #endif + #ifndef DEVICE_NAME #define DEVICE_NAME "unknown" #endif @@ -365,16 +362,14 @@ #endif #define INIT_REQUEST \ - if (QUEUE_EMPTY) {\ + if (QUEUE_EMPTY) { \ CLEAR_INTR; \ - return; \ + return; \ } \ if (MAJOR(CURRENT->rq_dev) != MAJOR_NR) \ panic(DEVICE_NAME ": request list destroyed"); \ - if (CURRENT->bh) { \ - if (!buffer_locked(CURRENT->bh)) \ - panic(DEVICE_NAME ": block not locked"); \ - } + if (!CURRENT->bio) \ + panic(DEVICE_NAME ": no bio"); \ #endif /* !defined(IDE_DRIVER) */ @@ -383,10 +378,11 @@ #if ! SCSI_BLK_MAJOR(MAJOR_NR) && (MAJOR_NR != COMPAQ_SMART2_MAJOR) -static inline void end_request(int uptodate) { +static inline void end_request(int uptodate) +{ struct request *req = CURRENT; - if (end_that_request_first(req, uptodate, DEVICE_NAME)) + if (end_that_request_first(req, uptodate)) return; #ifndef DEVICE_NO_RANDOM diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/blkdev.h linux/include/linux/blkdev.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/blkdev.h Wed Oct 31 09:39:13 2001 +++ linux/include/linux/blkdev.h Thu Nov 1 11:20:04 2001 @@ -12,54 +12,46 @@ struct elevator_s; typedef struct elevator_s elevator_t; -/* - * Ok, this is an expanded form so that we can use the same - * request for paging requests. - */ struct request { - struct list_head queue; + struct list_head queuelist; /* looking for ->queue? you must _not_ + * access it directly, use + * blkdev_dequeue_request! */ int elevator_sequence; - volatile int rq_status; /* should split this into a few status bits */ -#define RQ_INACTIVE (-1) -#define RQ_ACTIVE 1 -#define RQ_SCSI_BUSY 0xffff -#define RQ_SCSI_DONE 0xfffe -#define RQ_SCSI_DISCONNECTING 0xffe0 - + int rq_status; /* should split this into a few status bits */ kdev_t rq_dev; int cmd; /* READ or WRITE */ int errors; - unsigned long sector; + sector_t sector; unsigned long nr_sectors; unsigned long hard_sector, hard_nr_sectors; - unsigned int nr_segments; - unsigned int nr_hw_segments; - unsigned long current_nr_sectors; - void * special; - char * buffer; - struct completion * waiting; - struct buffer_head * bh; - struct buffer_head * bhtail; + unsigned short nr_segments; + unsigned short nr_hw_segments; + unsigned short current_nr_sectors; + unsigned short hard_cur_sectors; + void *special; + char *buffer; + struct completion *waiting; + struct bio *bio, *biotail; request_queue_t *q; }; #include -typedef int (merge_request_fn) (request_queue_t *q, - struct request *req, - struct buffer_head *bh, - int); -typedef int (merge_requests_fn) (request_queue_t *q, - struct request *req, - struct request *req2, - int); +typedef int (merge_request_fn) (request_queue_t *, struct request *, + struct bio *); +typedef int (merge_requests_fn) (request_queue_t *, struct request *, + struct request *); typedef void (request_fn_proc) (request_queue_t *q); typedef request_queue_t * (queue_proc) (kdev_t dev); -typedef int (make_request_fn) (request_queue_t *q, int rw, struct buffer_head *bh); -typedef void (plug_device_fn) (request_queue_t *q, kdev_t device); +typedef int (make_request_fn) (request_queue_t *q, struct bio *bio); typedef void (unplug_device_fn) (void *q); +enum blk_queue_state { + Queue_down, + Queue_up, +}; + /* * Default nr free requests per queue, ll_rw_blk will scale it down * according to available RAM at init time @@ -89,7 +81,7 @@ merge_request_fn * front_merge_fn; merge_requests_fn * merge_requests_fn; make_request_fn * make_request_fn; - plug_device_fn * plug_device_fn; + /* * The queue owner gets to use this for whatever they like. * ll_rw_blk doesn't touch it. @@ -97,33 +89,116 @@ void * queuedata; /* - * This is used to remove the plug when tq_disk runs. + * queue needs bounce pages for pages above this limit (phys addr) */ - struct tq_struct plug_tq; + struct page *bounce_limit; + int bounce_gfp; /* - * Boolean that indicates whether this queue is plugged or not. + * This is used to remove the plug when tq_disk runs. */ - char plugged; + struct tq_struct plug_tq; /* - * Boolean that indicates whether current_request is active or - * not. + * various queue flags, see QUEUE_* below */ - char head_active; + unsigned long queue_flags; /* - * Is meant to protect the queue in the future instead of - * io_request_lock + * protects queue structures from reentrancy */ spinlock_t queue_lock; /* * Tasks wait here for free request */ - wait_queue_head_t wait_for_request; + wait_queue_head_t wait_for_request[2]; + + /* + * queue settings + */ + unsigned short max_sectors; + unsigned short max_segments; + unsigned short hardsect_size; + + /* + * queue state + */ + enum blk_queue_state queue_state; + + wait_queue_head_t queue_wait; + + unsigned int hash_valid_counter; + + unsigned long queue_magic; }; +#define RQ_INACTIVE (-1) +#define RQ_ACTIVE 1 +#define RQ_SCSI_BUSY 0xffff +#define RQ_SCSI_DONE 0xfffe +#define RQ_SCSI_DISCONNECTING 0xffe0 + +#define QUEUE_FLAG_PLUGGED 0 /* queue is plugged */ +#define QUEUE_FLAG_HEADACTIVE 1 /* has active head (going away) */ + +#define blk_queue_flag(q, flag) test_bit(QUEUE_FLAG_##flag, &(q)->queue_flags) +#define blk_set_flag(q, flag) set_bit(QUEUE_FLAG_##flag, &(q)->queue_flags) +#define blk_clear_flag(q, flag) clear_bit(QUEUE_FLAG_##flag, &(q)->queue_flags) + +#define blk_queue_plugged(q) blk_queue_flag(q, PLUGGED) +#define blk_queue_headlive(q) blk_queue_flag(q, HEADACTIVE) + +#define blk_mark_plugged(q) blk_set_flag(q, PLUGGED) +#define blk_mark_headactive(q) blk_set_flag(q, HEADACTIVE) + +#define blk_queue_empty(q) elv_queue_empty(q) + +#ifdef CONFIG_HIGHMEM +extern void create_bounce(struct bio **, int); +extern inline void blk_queue_bounce(request_queue_t *q, struct bio **bio) +{ + if (bio_page(*bio) > q->bounce_limit) + create_bounce(bio, q->bounce_gfp); +} +#else +#define blk_queue_bounce(q, bio) do { } while (0) +#endif + +extern unsigned long blk_max_low_pfn, blk_max_pfn; + +static inline struct request *elv_next_request(request_queue_t *q) +{ + struct request *rq = q->elevator.elevator_next_req_fn(q); + + if (rq) { + if (rq->bio) + bio_hash_remove(rq->bio); + if (rq->biotail) + bio_hash_remove(rq->biotail); + } + + return rq; +} + +static inline void elv_add_request_fn(request_queue_t *q, struct request *rq, + struct list_head *insert_here) +{ + /* + * insert into queue pending list, merge hash, and possible latency + * list + */ + list_add(&rq->queuelist, insert_here); +} + +static inline struct request *elv_next_request_fn(request_queue_t *q) +{ + return list_entry(q->queue_head.next, struct request, queuelist); +} + +#define BLK_BOUNCE_HIGH (blk_max_low_pfn * PAGE_SIZE) +#define BLK_BOUNCE_ANY (blk_max_pfn * PAGE_SIZE) + struct blk_dev_struct { /* * queue_proc has to be atomic @@ -148,58 +223,66 @@ extern struct sec_size * blk_sec[MAX_BLKDEV]; extern struct blk_dev_struct blk_dev[MAX_BLKDEV]; -extern void grok_partitions(struct gendisk *dev, int drive, unsigned minors, long size); +extern void grok_partitions(kdev_t dev, long size); +extern int wipe_partitions(kdev_t dev); extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size); -extern void generic_make_request(int rw, struct buffer_head * bh); +extern void generic_make_request(struct bio *bio); extern inline request_queue_t *blk_get_queue(kdev_t dev); extern void blkdev_release_request(struct request *); +extern int blk_wake_queue(request_queue_t *); +extern void blk_attempt_remerge(request_queue_t *, struct request *); /* * Access functions for manipulating queue properties */ -extern void blk_init_queue(request_queue_t *, request_fn_proc *); +extern int blk_init_queue(request_queue_t *, request_fn_proc *, char *); extern void blk_cleanup_queue(request_queue_t *); extern void blk_queue_headactive(request_queue_t *, int); extern void blk_queue_make_request(request_queue_t *, make_request_fn *); +extern void blk_queue_bounce_limit(request_queue_t *, unsigned long long); +extern void blk_queue_max_sectors(request_queue_t *q, unsigned short); +extern void blk_queue_max_segments(request_queue_t *q, unsigned short); +extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short); extern void generic_unplug_device(void *); extern int * blk_size[MAX_BLKDEV]; extern int * blksize_size[MAX_BLKDEV]; -extern int * hardsect_size[MAX_BLKDEV]; - extern int * max_readahead[MAX_BLKDEV]; -extern int * max_sectors[MAX_BLKDEV]; - -extern int * max_segments[MAX_BLKDEV]; - #define MAX_SEGMENTS 128 #define MAX_SECTORS 255 -#define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK) - /* read-ahead in pages.. */ #define MAX_READAHEAD 31 #define MIN_READAHEAD 3 -#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue) +#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) #define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next) #define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev) -#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next) -#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev) +#define blkdev_next_request(req) blkdev_entry_to_request((req)->queuelist.next) +#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queuelist.prev) extern void drive_stat_acct (kdev_t dev, int rw, unsigned long nr_sectors, int new_io); +static inline void blk_clear(int major) +{ + blk_size[major] = NULL; +#if 0 + blk_size_in_bytes[major] = NULL; +#endif + blksize_size[major] = NULL; + max_readahead[major] = NULL; + read_ahead[major] = 0; +} + static inline int get_hardsect_size(kdev_t dev) { - extern int *hardsect_size[]; - if (hardsect_size[MAJOR(dev)] != NULL) - return hardsect_size[MAJOR(dev)][MINOR(dev)]; - else - return 512; + request_queue_t *q = blk_get_queue(dev); + + return q ? q->hardsect_size : 512; } #define blk_finished_io(nsects) do { } while (0) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/bootmem.h linux/include/linux/bootmem.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/bootmem.h Thu Oct 11 08:46:36 2001 +++ linux/include/linux/bootmem.h Thu Nov 1 11:20:04 2001 @@ -18,6 +18,11 @@ extern unsigned long min_low_pfn; /* + * highest page + */ +extern unsigned long max_pfn; + +/* * node_bootmem_map is a map pointer - the bits represent all physical * memory pages (including holes) on the node. */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/devfs_fs_kernel.h linux/include/linux/devfs_fs_kernel.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/devfs_fs_kernel.h Thu Oct 11 08:46:04 2001 +++ linux/include/linux/devfs_fs_kernel.h Thu Nov 1 11:20:04 2001 @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/elevator.h linux/include/linux/elevator.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/elevator.h Fri Feb 16 01:58:34 2001 +++ linux/include/linux/elevator.h Mon Oct 15 10:41:43 2001 @@ -5,13 +5,20 @@ struct list_head *, struct list_head *, int); -typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *, - struct buffer_head *, int, int); +typedef int (elevator_merge_fn) (request_queue_t *, struct request **, + struct list_head *, struct bio *); typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int); typedef void (elevator_merge_req_fn) (struct request *, struct request *); +typedef struct request *(elevator_next_req_fn) (request_queue_t *); + +typedef void (elevator_add_req_fn) (request_queue_t *, struct request *, struct list_head *); + +typedef int (elevator_init_fn) (request_queue_t *, elevator_t *); +typedef void (elevator_exit_fn) (request_queue_t *, elevator_t *); + struct elevator_s { int read_latency; @@ -21,31 +28,44 @@ elevator_merge_cleanup_fn *elevator_merge_cleanup_fn; elevator_merge_req_fn *elevator_merge_req_fn; - unsigned int queue_ID; + elevator_next_req_fn *elevator_next_req_fn; + elevator_add_req_fn *elevator_add_req_fn; + + elevator_init_fn *elevator_init_fn; + elevator_exit_fn *elevator_exit_fn; + + /* + * per-elevator private data + */ + void *elevator_data; + + char queue_name[16]; }; -int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int); void elevator_noop_merge_req(struct request *, struct request *); -int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int); +int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int); void elevator_linus_merge_req(struct request *, struct request *); +int elv_linus_init(request_queue_t *, elevator_t *); +void elv_linus_exit(request_queue_t *, elevator_t *); +/* + * use the /proc/iosched interface, all the below is history -> + */ typedef struct blkelv_ioctl_arg_s { int queue_ID; int read_latency; int write_latency; int max_bomb_segments; } blkelv_ioctl_arg_t; - #define BLKELVGET _IOR(0x12,106,sizeof(blkelv_ioctl_arg_t)) #define BLKELVSET _IOW(0x12,107,sizeof(blkelv_ioctl_arg_t)) -extern int blkelvget_ioctl(elevator_t *, blkelv_ioctl_arg_t *); -extern int blkelvset_ioctl(elevator_t *, const blkelv_ioctl_arg_t *); - -extern void elevator_init(elevator_t *, elevator_t); +extern int elevator_init(request_queue_t *, elevator_t *, elevator_t, char *); +extern void elevator_exit(request_queue_t *, elevator_t *); /* * Return values from elevator merger @@ -81,6 +101,24 @@ return latency; } +/* + * will change once we move to a more complex data structure than a simple + * list for pending requests + */ +#define elv_queue_empty(q) list_empty(&(q)->queue_head) + +/* + * elevator private data + */ +struct elv_linus_data { + unsigned long flags; +}; + +#define ELV_DAT(e) ((struct elv_linus_data *)(e)->elevator_data) + +#define ELV_LINUS_BACK_MERGE 1 +#define ELV_LINUS_FRONT_MERGE 2 + #define ELEVATOR_NOOP \ ((elevator_t) { \ 0, /* read_latency */ \ @@ -89,6 +127,10 @@ elevator_noop_merge, /* elevator_merge_fn */ \ elevator_noop_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_noop_merge_req, /* elevator_merge_req_fn */ \ + elv_next_request_fn, \ + elv_add_request_fn, \ + elv_linus_init, \ + elv_linus_exit, \ }) #define ELEVATOR_LINUS \ @@ -99,6 +141,10 @@ elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \ + elv_next_request_fn, \ + elv_add_request_fn, \ + elv_linus_init, \ + elv_linus_exit, \ }) #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/fs.h linux/include/linux/fs.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/fs.h Wed Oct 31 09:39:13 2001 +++ linux/include/linux/fs.h Thu Nov 1 11:20:04 2001 @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -74,6 +75,8 @@ #define FMODE_READ 1 #define FMODE_WRITE 2 +#define RW_MASK 1 +#define RWA_MASK 2 #define READ 0 #define WRITE 1 #define READA 2 /* read-ahead - don't block if no resources */ @@ -201,6 +204,7 @@ extern void update_atime (struct inode *); #define UPDATE_ATIME(inode) update_atime (inode) +extern void bio_hash_init(unsigned long); extern void buffer_init(unsigned long); extern void inode_init(unsigned long); extern void mnt_init(unsigned long); @@ -236,28 +240,24 @@ struct buffer_head { /* First cache line: */ struct buffer_head *b_next; /* Hash queue list */ - unsigned long b_blocknr; /* block number */ + sector_t b_blocknr; /* block number */ unsigned short b_size; /* block size */ unsigned short b_list; /* List that this buffer appears */ kdev_t b_dev; /* device (B_FREE = free) */ atomic_t b_count; /* users using this block */ - kdev_t b_rdev; /* Real device */ unsigned long b_state; /* buffer state bitmap (see above) */ unsigned long b_flushtime; /* Time when (dirty) buffer should be written */ struct buffer_head *b_next_free;/* lru/free list linkage */ struct buffer_head *b_prev_free;/* doubly linked list of buffers */ struct buffer_head *b_this_page;/* circular list of buffers in one page */ - struct buffer_head *b_reqnext; /* request queue */ - struct buffer_head **b_pprev; /* doubly linked list of hash-queue */ char * b_data; /* pointer to data block */ struct page *b_page; /* the page this bh is mapped to */ void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */ void *b_private; /* reserved for b_end_io */ - unsigned long b_rsector; /* Real buffer location on disk */ wait_queue_head_t b_wait; struct inode * b_inode; @@ -1157,12 +1157,18 @@ static inline void buffer_IO_error(struct buffer_head * bh) { mark_buffer_clean(bh); + /* - * b_end_io has to clear the BH_Uptodate bitflag in the error case! + * b_end_io has to clear the BH_Uptodate bitflag in the read error + * case, however buffer contents are not necessarily bad if a + * write fails */ - bh->b_end_io(bh, 0); + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); } +#define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) +#define bio_data_dir(bio) (!!((bio)->bi_rw & 1)) + extern void buffer_insert_inode_queue(struct buffer_head *, struct inode *); static inline void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) { @@ -1329,10 +1335,11 @@ extern void remove_inode_hash(struct inode *); extern struct file * get_empty_filp(void); extern void file_move(struct file *f, struct list_head *list); -extern struct buffer_head * get_hash_table(kdev_t, int, int); -extern struct buffer_head * getblk(kdev_t, int, int); +extern struct buffer_head * get_hash_table(kdev_t, sector_t, int); +extern struct buffer_head * getblk(kdev_t, sector_t, int); extern void ll_rw_block(int, int, struct buffer_head * bh[]); -extern void submit_bh(int, struct buffer_head *); +extern int submit_bh(int, struct buffer_head *); +extern int submit_bio(int, struct bio *); extern int is_read_only(kdev_t); extern void __brelse(struct buffer_head *); static inline void brelse(struct buffer_head *buf) @@ -1350,9 +1357,9 @@ extern struct buffer_head * bread(kdev_t, int, int); extern void wakeup_bdflush(void); -extern int brw_page(int, struct page *, kdev_t, int [], int); +extern int brw_page(int, struct page *, kdev_t, sector_t [], int); -typedef int (get_block_t)(struct inode*,long,struct buffer_head*,int); +typedef int (get_block_t)(struct inode*,sector_t,struct buffer_head*,int); /* Generic buffer handling for block filesystems.. */ extern int discard_bh_page(struct page *, unsigned long, int); @@ -1367,7 +1374,7 @@ extern int block_commit_write(struct page *page, unsigned from, unsigned to); extern int block_sync_page(struct page *); -int generic_block_bmap(struct address_space *, long, get_block_t *); +sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); int block_truncate_page(struct address_space *, loff_t, get_block_t *); extern void create_empty_buffers(struct page *, kdev_t, unsigned long); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/genhd.h linux/include/linux/genhd.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/genhd.h Wed Oct 24 12:49:58 2001 +++ linux/include/linux/genhd.h Thu Nov 1 11:20:04 2001 @@ -86,11 +86,11 @@ }; /* drivers/block/genhd.c */ -extern struct gendisk *gendisk_head; - extern void add_gendisk(struct gendisk *gp); extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(kdev_t dev); +extern unsigned long get_start_sect(kdev_t dev); +extern unsigned long get_nr_sects(kdev_t dev); #endif /* __KERNEL__ */ @@ -244,35 +244,10 @@ extern void devfs_register_partitions (struct gendisk *dev, int minor, int unregister); - - -/* - * FIXME: this should use genhd->minor_shift, but that is slow to look up. - */ static inline unsigned int disk_index (kdev_t dev) { - int major = MAJOR(dev); - int minor = MINOR(dev); - unsigned int index; - - switch (major) { - case DAC960_MAJOR+0: - index = (minor & 0x00f8) >> 3; - break; - case SCSI_DISK0_MAJOR: - index = (minor & 0x00f0) >> 4; - break; - case IDE0_MAJOR: /* same as HD_MAJOR */ - case XT_DISK_MAJOR: - index = (minor & 0x0040) >> 6; - break; - case IDE1_MAJOR: - index = ((minor & 0x0040) >> 6) + 2; - break; - default: - return 0; - } - return index; + struct gendisk *g = get_gendisk(dev); + return g ? (MINOR(dev) >> g->minor_shift) : 0; } #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/highmem.h linux/include/linux/highmem.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/highmem.h Thu Oct 11 08:44:34 2001 +++ linux/include/linux/highmem.h Thu Nov 1 11:22:32 2001 @@ -2,6 +2,7 @@ #define _LINUX_HIGHMEM_H #include +#include #include #ifdef CONFIG_HIGHMEM @@ -13,7 +14,7 @@ /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); -extern struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig); +extern void create_bounce(struct bio **bio_orig, int gfp_mask); static inline char *bh_kmap(struct buffer_head *bh) @@ -26,6 +27,44 @@ kunmap(bh->b_page); } +/* + * remember to add offset! and never ever reenable interrupts between a + * bio_kmap_irq and bio_kunmap_irq!! + */ +static inline char *bio_kmap_irq(struct bio *bio, unsigned long *flags) +{ + unsigned long addr; + + __save_flags(*flags); + + /* + * could be low + */ + if (!PageHighMem(bio_page(bio))) + return bio_data(bio); + + /* + * it's a highmem page + */ + __cli(); + addr = (unsigned long) kmap_atomic(bio_page(bio), KM_BIO_IRQ); + + if (addr & ~PAGE_MASK) + BUG(); + + return (char *) addr + bio_offset(bio); +} + +static inline void bio_kunmap_irq(char *buffer, unsigned long *flags) +{ + unsigned long ptr = (unsigned long) buffer & PAGE_MASK; + + kunmap_atomic((void *) ptr, KM_BIO_IRQ); + __restore_flags(*flags); +} + + + #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } @@ -39,6 +78,9 @@ #define bh_kmap(bh) ((bh)->b_data) #define bh_kunmap(bh) do { } while (0) + +#define bio_kmap_irq(bio, flags) (bio_data(bio)) +#define bio_kunmap_irq(buf, flags) do { *(flags) = 0; } while (0) #endif /* CONFIG_HIGHMEM */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/ide.h linux/include/linux/ide.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/ide.h Thu Oct 11 08:47:07 2001 +++ linux/include/linux/ide.h Thu Nov 1 11:24:08 2001 @@ -149,6 +149,21 @@ #define DATA_READY (DRQ_STAT) /* + * Our Physical Region Descriptor (PRD) table should be large enough + * to handle the biggest I/O request we are likely to see. Since requests + * can have no more than 256 sectors, and since the typical blocksize is + * two or more sectors, we could get by with a limit of 128 entries here for + * the usual worst case. Most requests seem to include some contiguous blocks, + * further reducing the number of table entries required. + * + * As it turns out though, we must allocate a full 4KB page for this, + * so the two PRD tables (ide0 & ide1) will each get half of that, + * allowing each to have about 256 entries (8 bytes each) from this. + */ +#define PRD_BYTES 8 +#define PRD_ENTRIES (PAGE_SIZE / (2 * PRD_BYTES)) + +/* * Some more useful definitions */ #define IDE_MAJOR_NAME "hd" /* the same for all i/f; see also genhd.c */ @@ -488,7 +503,6 @@ struct scatterlist *sg_table; /* Scatter-gather list used to build the above */ int sg_nents; /* Current number of entries in it */ int sg_dma_direction; /* dma transfer direction */ - int sg_dma_active; /* is it in use */ struct hwif_s *mate; /* other hwif from same PCI chip */ unsigned long dma_base; /* base addr for dma ports */ unsigned dma_extra; /* extra addr for dma ports */ @@ -507,6 +521,7 @@ unsigned reset : 1; /* reset after probe */ unsigned autodma : 1; /* automatically try to enable DMA at boot */ unsigned udma_four : 1; /* 1=ATA-66 capable, 0=default */ + unsigned highmem : 1; /* can do full 32-bit dma */ byte channel; /* for dual-port chips: 0=primary, 1=secondary */ #ifdef CONFIG_BLK_DEV_IDEPCI struct pci_dev *pci_dev; /* for pci chipsets */ @@ -541,10 +556,12 @@ */ typedef int (ide_expiry_t)(ide_drive_t *); +#define IDE_BUSY 0 +#define IDE_SLEEP 1 + typedef struct hwgroup_s { ide_handler_t *handler;/* irq handler, if active */ - volatile int busy; /* BOOL: protects all fields below */ - int sleeping; /* BOOL: wake us up on timer expiry */ + unsigned long flags; /* BUSY, SLEEPING */ ide_drive_t *drive; /* current drive */ ide_hwif_t *hwif; /* ptr to current hwif in linked-list */ struct request *rq; /* current request */ @@ -787,6 +804,11 @@ unsigned long current_capacity (ide_drive_t *drive); /* + * Revalidate (read partition tables) + */ +void ide_revalidate_drive (ide_drive_t *drive); + +/* * Start a reset operation for an IDE interface. * The caller should return immediately after invoking this. */ @@ -814,6 +836,21 @@ } ide_action_t; /* + * temporarily mapping a (possible) highmem bio for PIO transfer + */ +#define ide_rq_offset(rq) (((rq)->hard_cur_sectors - (rq)->current_nr_sectors) << 9) + +extern inline void *ide_map_buffer(struct request *rq, unsigned long *flags) +{ + return bio_kmap_irq(rq->bio, flags) + ide_rq_offset(rq); +} + +extern inline void ide_unmap_buffer(char *buffer, unsigned long *flags) +{ + bio_kunmap_irq(buffer, flags); +} + +/* * This function issues a special IDE device request * onto the request queue. * @@ -960,5 +997,8 @@ #endif void hwif_unregister (ide_hwif_t *hwif); + +#define DRIVE_LOCK(drive) (&(drive)->queue.queue_lock) +extern spinlock_t ide_lock; #endif /* _IDE_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/iobuf.h linux/include/linux/iobuf.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/iobuf.h Thu Oct 11 08:44:51 2001 +++ linux/include/linux/iobuf.h Thu Nov 1 11:20:04 2001 @@ -26,7 +26,6 @@ #define KIO_MAX_ATOMIC_IO 512 /* in kb */ #define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1) -#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2) /* The main kiobuf struct used for all our IO! */ @@ -48,8 +47,6 @@ /* Always embed enough struct pages for atomic IO */ struct page * map_array[KIO_STATIC_PAGES]; - struct buffer_head * bh[KIO_MAX_SECTORS]; - unsigned long blocks[KIO_MAX_SECTORS]; /* Dynamic state for IO completion: */ atomic_t io_count; /* IOs still in progress */ @@ -81,6 +78,9 @@ /* fs/buffer.c */ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], - kdev_t dev, unsigned long b[], int size); + kdev_t dev, sector_t [], int size); + +/* fs/bio.c */ +void ll_rw_kio(int rw, struct kiobuf *kio, kdev_t dev, sector_t block); #endif /* __LINUX_IOBUF_H */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/iso_fs.h linux/include/linux/iso_fs.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/iso_fs.h Wed Oct 31 09:39:13 2001 +++ linux/include/linux/iso_fs.h Thu Nov 1 11:20:20 2001 @@ -220,7 +220,7 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *); extern struct buffer_head *isofs_bread(struct inode *, unsigned int, unsigned int); -extern int isofs_get_blocks(struct inode *, long, struct buffer_head **, unsigned long); +extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); extern struct inode_operations isofs_dir_inode_operations; extern struct file_operations isofs_dir_operations; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/loop.h linux/include/linux/loop.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/loop.h Mon Sep 17 22:16:30 2001 +++ linux/include/linux/loop.h Mon Oct 15 10:41:43 2001 @@ -49,8 +49,8 @@ int old_gfp_mask; spinlock_t lo_lock; - struct buffer_head *lo_bh; - struct buffer_head *lo_bhtail; + struct bio *lo_bio; + struct bio *lo_biotail; int lo_state; struct semaphore lo_sem; struct semaphore lo_ctl_mutex; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/lvm.h linux/include/linux/lvm.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/lvm.h Mon Sep 17 22:25:26 2001 +++ linux/include/linux/lvm.h Mon Oct 15 10:41:43 2001 @@ -584,6 +584,12 @@ } lv_bmap_t; /* + * fixme... + */ +#define LVM_MAX_ATOMIC_IO 512 +#define LVM_MAX_SECTORS (LVM_MAX_ATOMIC_IO * 2) + +/* * Structure Logical Volume (LV) Version 3 */ @@ -621,6 +627,7 @@ uint lv_snapshot_minor; #ifdef __KERNEL__ struct kiobuf *lv_iobuf; + sector_t blocks[LVM_MAX_SECTORS]; struct semaphore lv_snapshot_sem; struct list_head *lv_snapshot_hash_table; ulong lv_snapshot_hash_table_size; diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/nbd.h linux/include/linux/nbd.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/nbd.h Tue May 1 23:20:25 2001 +++ linux/include/linux/nbd.h Mon Oct 15 10:41:43 2001 @@ -37,24 +37,25 @@ static void nbd_end_request(struct request *req) { - struct buffer_head *bh; + struct bio *bio; unsigned nsect; unsigned long flags; int uptodate = (req->errors == 0) ? 1 : 0; + request_queue_t *q = req->q; #ifdef PARANOIA requests_out++; #endif - spin_lock_irqsave(&io_request_lock, flags); - while((bh = req->bh) != NULL) { - nsect = bh->b_size >> 9; + spin_lock_irqsave(&q->queue_lock, flags); + while((bio = req->bio) != NULL) { + nsect = bio_sectors(bio); blk_finished_io(nsect); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); + req->bio = bio->bi_next; + bio->bi_next = NULL; + bio_endio(bio, uptodate); } blkdev_release_request(req); - spin_unlock_irqrestore(&io_request_lock, flags); + spin_unlock_irqrestore(&q->queue_lock, flags); } #define MAX_NBD 128 diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/raid/md_k.h linux/include/linux/raid/md_k.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/raid/md_k.h Fri Sep 14 23:21:51 2001 +++ linux/include/linux/raid/md_k.h Mon Oct 15 10:41:43 2001 @@ -219,7 +219,7 @@ struct mdk_personality_s { char *name; - int (*make_request)(mddev_t *mddev, int rw, struct buffer_head * bh); + int (*make_request)(mddev_t *mddev, int rw, struct bio *bio); int (*run)(mddev_t *mddev); int (*stop)(mddev_t *mddev); int (*status)(char *page, mddev_t *mddev); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/slab.h linux/include/linux/slab.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/slab.h Wed Oct 24 12:49:58 2001 +++ linux/include/linux/slab.h Thu Nov 1 11:20:04 2001 @@ -76,6 +76,7 @@ extern kmem_cache_t *bh_cachep; extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sigact_cachep; +extern kmem_cache_t *bio_cachep; #endif /* __KERNEL__ */ diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/include/linux/types.h linux/include/linux/types.h --- /opt/kernel/linux-2.4.14-pre6/include/linux/types.h Thu Oct 11 08:44:33 2001 +++ linux/include/linux/types.h Thu Nov 1 11:20:04 2001 @@ -113,6 +113,17 @@ typedef __s64 int64_t; #endif +/* + * transition to 64-bit sector_t, possibly making it an option... + */ +#undef BLK_64BIT_SECTOR + +#ifdef BLK_64BIT_SECTOR +typedef u64 sector_t; +#else +typedef unsigned long sector_t; +#endif + #endif /* __KERNEL_STRICT_NAMES */ /* diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/init/main.c linux/init/main.c --- /opt/kernel/linux-2.4.14-pre6/init/main.c Thu Nov 1 14:25:31 2001 +++ linux/init/main.c Mon Oct 15 10:42:30 2001 @@ -636,6 +636,7 @@ vfs_caches_init(mempages); buffer_init(mempages); page_cache_init(mempages); + bio_hash_init(mempages); #if defined(CONFIG_ARCH_S390) ccwcache_init(); #endif diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/kernel/ksyms.c linux/kernel/ksyms.c --- /opt/kernel/linux-2.4.14-pre6/kernel/ksyms.c Thu Nov 1 14:25:31 2001 +++ linux/kernel/ksyms.c Wed Oct 31 10:58:17 2001 @@ -134,6 +134,8 @@ EXPORT_SYMBOL(kunmap_high); EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(create_bounce); +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); #endif /* filesystem internal functions */ @@ -300,7 +302,6 @@ /* block device driver support */ EXPORT_SYMBOL(blksize_size); -EXPORT_SYMBOL(hardsect_size); EXPORT_SYMBOL(blk_size); EXPORT_SYMBOL(blk_dev); EXPORT_SYMBOL(is_read_only); @@ -317,8 +318,8 @@ EXPORT_SYMBOL(tq_disk); EXPORT_SYMBOL(init_buffer); EXPORT_SYMBOL(refile_buffer); -EXPORT_SYMBOL(max_sectors); EXPORT_SYMBOL(max_readahead); +EXPORT_SYMBOL(wipe_partitions); /* tty routines */ EXPORT_SYMBOL(tty_hangup); diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/kernel/sched.c linux/kernel/sched.c --- /opt/kernel/linux-2.4.14-pre6/kernel/sched.c Wed Oct 24 12:49:58 2001 +++ linux/kernel/sched.c Thu Oct 18 09:45:13 2001 @@ -768,12 +768,13 @@ void complete(struct completion *x) { - unsigned long flags; - - spin_lock_irqsave(&x->wait.lock, flags); - x->done++; - __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0); - spin_unlock_irqrestore(&x->wait.lock, flags); + if (x) { + unsigned long flags; + spin_lock_irqsave(&x->wait.lock, flags); + x->done++; + __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0); + spin_unlock_irqrestore(&x->wait.lock, flags); + } } void wait_for_completion(struct completion *x) diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/mm/highmem.c linux/mm/highmem.c --- /opt/kernel/linux-2.4.14-pre6/mm/highmem.c Wed Oct 24 12:49:58 2001 +++ linux/mm/highmem.c Wed Oct 24 09:26:46 2001 @@ -21,6 +21,9 @@ #include #include #include +#include + +#include /* * Virtual_count is not a pure "count". @@ -186,7 +189,7 @@ wake_up(&pkmap_map_wait); } -#define POOL_SIZE 32 +#define POOL_SIZE 64 /* * This lock gets no contention at all, normally. @@ -200,77 +203,41 @@ static LIST_HEAD(emergency_bhs); /* - * Simple bounce buffer support for highmem pages. - * This will be moved to the block layer in 2.5. + * Simple bounce buffer support for highmem pages. Depending on the + * queue gfp mask set, *to may or may not be a highmem page. kmap it + * always, it will do the Right Thing */ - -static inline void copy_from_high_bh (struct buffer_head *to, - struct buffer_head *from) +static inline void copy_from_high_bio(struct bio *to, struct bio *from) { - struct page *p_from; - char *vfrom; + unsigned char *vto, *vfrom; + + if (unlikely(in_interrupt())) + BUG(); + + vto = bio_kmap(to); + vfrom = bio_kmap(from); - p_from = from->b_page; + memcpy(vto, vfrom + bio_offset(from), bio_size(to)); - vfrom = kmap_atomic(p_from, KM_USER0); - memcpy(to->b_data, vfrom + bh_offset(from), to->b_size); - kunmap_atomic(vfrom, KM_USER0); + bio_kunmap(from); + bio_kunmap(to); } -static inline void copy_to_high_bh_irq (struct buffer_head *to, - struct buffer_head *from) +static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from) { - struct page *p_to; - char *vto; + unsigned char *vto, *vfrom; unsigned long flags; - p_to = to->b_page; __save_flags(flags); __cli(); - vto = kmap_atomic(p_to, KM_BOUNCE_READ); - memcpy(vto + bh_offset(to), from->b_data, to->b_size); + vto = kmap_atomic(bio_page(to), KM_BOUNCE_READ); + vfrom = kmap_atomic(bio_page(from), KM_BOUNCE_READ); + memcpy(vto + bio_offset(to), vfrom, bio_size(to)); + kunmap_atomic(vfrom, KM_BOUNCE_READ); kunmap_atomic(vto, KM_BOUNCE_READ); __restore_flags(flags); } -static inline void bounce_end_io (struct buffer_head *bh, int uptodate) -{ - struct page *page; - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); - unsigned long flags; - - bh_orig->b_end_io(bh_orig, uptodate); - - page = bh->b_page; - - spin_lock_irqsave(&emergency_lock, flags); - if (nr_emergency_pages >= POOL_SIZE) - __free_page(page); - else { - /* - * We are abusing page->list to manage - * the highmem emergency pool: - */ - list_add(&page->list, &emergency_pages); - nr_emergency_pages++; - } - - if (nr_emergency_bhs >= POOL_SIZE) { -#ifdef HIGHMEM_DEBUG - /* Don't clobber the constructed slab cache */ - init_waitqueue_head(&bh->b_wait); -#endif - kmem_cache_free(bh_cachep, bh); - } else { - /* - * Ditto in the bh case, here we abuse b_inode_buffers: - */ - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } - spin_unlock_irqrestore(&emergency_lock, flags); -} - static __init int init_emergency_pool(void) { struct sysinfo i; @@ -290,44 +257,61 @@ list_add(&page->list, &emergency_pages); nr_emergency_pages++; } - while (nr_emergency_bhs < POOL_SIZE) { - struct buffer_head * bh = kmem_cache_alloc(bh_cachep, SLAB_ATOMIC); - if (!bh) { - printk("couldn't refill highmem emergency bhs"); - break; - } - list_add(&bh->b_inode_buffers, &emergency_bhs); - nr_emergency_bhs++; - } spin_unlock_irq(&emergency_lock); - printk("allocated %d pages and %d bhs reserved for the highmem bounces\n", - nr_emergency_pages, nr_emergency_bhs); - + printk("allocated %d pages reserved for the highmem bounces\n", nr_emergency_pages); return 0; } __initcall(init_emergency_pool); -static void bounce_end_io_write (struct buffer_head *bh, int uptodate) +static inline void bounce_end_io (struct bio *bio) { - bounce_end_io(bh, uptodate); + struct bio *bio_orig = bio->bi_private; + struct page *page = bio_page(bio); + unsigned long flags; + + bio_endio(bio_orig, test_bit(BIO_UPTODATE, &bio->bi_flags)); + + spin_lock_irqsave(&emergency_lock, flags); + if (nr_emergency_pages >= POOL_SIZE) { + spin_unlock_irqrestore(&emergency_lock, flags); + __free_page(page); + } else { + /* + * We are abusing page->list to manage + * the highmem emergency pool: + */ + list_add(&page->list, &emergency_pages); + nr_emergency_pages++; + spin_unlock_irqrestore(&emergency_lock, flags); + } + + bio->bi_req = NULL; + bio_hash_remove(bio); + bio_put(bio); } -static void bounce_end_io_read (struct buffer_head *bh, int uptodate) +static void bounce_end_io_write (struct bio *bio) { - struct buffer_head *bh_orig = (struct buffer_head *)(bh->b_private); + bounce_end_io(bio); +} + +static void bounce_end_io_read (struct bio *bio) +{ + struct bio *bio_orig = bio->bi_private; - if (uptodate) - copy_to_high_bh_irq(bh_orig, bh); - bounce_end_io(bh, uptodate); + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + copy_to_high_bio_irq(bio_orig, bio); + + bounce_end_io(bio); } -struct page *alloc_bounce_page (void) +struct page *alloc_bounce_page(int gfp_mask) { struct list_head *tmp; struct page *page; - page = alloc_page(GFP_NOHIGHIO); + page = alloc_page(gfp_mask); if (page) return page; /* @@ -360,91 +344,35 @@ goto repeat_alloc; } -struct buffer_head *alloc_bounce_bh (void) +void create_bounce(struct bio **bio_orig, int gfp_mask) { - struct list_head *tmp; - struct buffer_head *bh; + struct page *page; + struct bio *bio; - bh = kmem_cache_alloc(bh_cachep, SLAB_NOHIGHIO); - if (bh) - return bh; - /* - * No luck. First, kick the VM so it doesnt idle around while - * we are using up our emergency rations. - */ - wakeup_bdflush(); + bio = bio_alloc(GFP_NOHIGHIO); -repeat_alloc: /* - * Try to allocate from the emergency pool. + * wasteful for 1kB fs, but machines with lots of ram are less likely + * to have 1kB fs for anything that needs to go fast. so all things + * considered, it should be ok. */ - tmp = &emergency_bhs; - spin_lock_irq(&emergency_lock); - if (!list_empty(tmp)) { - bh = list_entry(tmp->next, struct buffer_head, b_inode_buffers); - list_del(tmp->next); - nr_emergency_bhs--; - } - spin_unlock_irq(&emergency_lock); - if (bh) - return bh; + page = alloc_bounce_page(gfp_mask); - /* we need to wait I/O completion */ - run_task_queue(&tq_disk); + bio->bi_dev = (*bio_orig)->bi_dev; + bio->bi_sector = (*bio_orig)->bi_sector; + bio->bi_rw = (*bio_orig)->bi_rw; - current->policy |= SCHED_YIELD; - __set_current_state(TASK_RUNNING); - schedule(); - goto repeat_alloc; -} + bio->bi_io_vec.bv_page = page; + bio->bi_io_vec.bv_len = bio_size(*bio_orig); + bio->bi_io_vec.bv_offset = 0; -struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig) -{ - struct page *page; - struct buffer_head *bh; + bio->bi_private = *bio_orig; - if (!PageHighMem(bh_orig->b_page)) - return bh_orig; - - bh = alloc_bounce_bh(); - /* - * This is wasteful for 1k buffers, but this is a stopgap measure - * and we are being ineffective anyway. This approach simplifies - * things immensly. On boxes with more than 4GB RAM this should - * not be an issue anyway. - */ - page = alloc_bounce_page(); - - set_bh_page(bh, page, 0); - - bh->b_next = NULL; - bh->b_blocknr = bh_orig->b_blocknr; - bh->b_size = bh_orig->b_size; - bh->b_list = -1; - bh->b_dev = bh_orig->b_dev; - bh->b_count = bh_orig->b_count; - bh->b_rdev = bh_orig->b_rdev; - bh->b_state = bh_orig->b_state; -#ifdef HIGHMEM_DEBUG - bh->b_flushtime = jiffies; - bh->b_next_free = NULL; - bh->b_prev_free = NULL; - /* bh->b_this_page */ - bh->b_reqnext = NULL; - bh->b_pprev = NULL; -#endif - /* bh->b_page */ - if (rw == WRITE) { - bh->b_end_io = bounce_end_io_write; - copy_from_high_bh(bh, bh_orig); + if (bio_rw(bio) == WRITE) { + bio->bi_end_io = bounce_end_io_write; + copy_from_high_bio(bio, *bio_orig); } else - bh->b_end_io = bounce_end_io_read; - bh->b_private = (void *)bh_orig; - bh->b_rsector = bh_orig->b_rsector; -#ifdef HIGHMEM_DEBUG - memset(&bh->b_wait, -1, sizeof(bh->b_wait)); -#endif + bio->bi_end_io = bounce_end_io_read; - return bh; + *bio_orig = bio; } - diff -urN --exclude-from /home/axboe/exclude /opt/kernel/linux-2.4.14-pre6/mm/page_io.c linux/mm/page_io.c --- /opt/kernel/linux-2.4.14-pre6/mm/page_io.c Wed Oct 31 09:39:13 2001 +++ linux/mm/page_io.c Tue Oct 30 09:48:43 2001 @@ -36,7 +36,7 @@ static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page) { unsigned long offset; - int zones[PAGE_SIZE/512]; + sector_t zones[PAGE_SIZE/512]; int zones_used; kdev_t dev = 0; int block_size;