Index: drivers/block/ll_rw_blk.c =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/drivers/block/ll_rw_blk.c,v retrieving revision 1.62 diff -u -r1.62 ll_rw_blk.c --- drivers/block/ll_rw_blk.c 2001/02/27 00:36:49 1.62 +++ drivers/block/ll_rw_blk.c 2001/03/06 15:21:46 @@ -1561,9 +1561,6 @@ #ifdef CONFIG_BLK_DEV_RAM rd_init(); #endif -#ifdef CONFIG_BLK_DEV_LOOP - loop_init(); -#endif #ifdef CONFIG_ISP16_CDI isp16_init(); #endif Index: drivers/block/loop.c =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/drivers/block/loop.c,v retrieving revision 1.28 diff -u -r1.28 loop.c --- drivers/block/loop.c 2001/02/22 21:09:04 1.28 +++ drivers/block/loop.c 2001/03/06 15:21:46 @@ -31,11 +31,14 @@ * max_loop=<1-255> to the kernel on boot. * Erik I. Bolsų, , Oct 31, 1999 * + * Completely rewrite request handling to be make_request_fn style and + * non blocking, pushing work to a helper thread. Lots of fixes from + * Al Viro too. + * Jens Axboe , Nov 2000 + * * Still To Fix: * - Advisory locking is ignored here. * - Should use an own CAP_* category instead of CAP_SYS_ADMIN - * - Should use the underlying filesystems/devices read function if possible - * to support read ahead (and for write) * * WARNING/FIXME: * - The block number as IV passing to low level transfer functions is broken: @@ -48,6 +51,7 @@ * number. */ +#include #include #include @@ -56,9 +60,13 @@ #include #include #include - +#include +#include #include #include +#include +#include +#include #include @@ -66,40 +74,28 @@ #define MAJOR_NR LOOP_MAJOR -#define DEVICE_NAME "loop" -#define DEVICE_REQUEST do_lo_request -#define DEVICE_NR(device) (MINOR(device)) -#define DEVICE_ON(device) -#define DEVICE_OFF(device) -#define DEVICE_NO_RANDOM -#define TIMEOUT_VALUE (6 * HZ) -#include - -#include static int max_loop = 8; static struct loop_device *loop_dev; static int *loop_sizes; static int *loop_blksizes; static devfs_handle_t devfs_handle; /* For the directory */ -#define FALSE 0 -#define TRUE (!FALSE) - /* * Transfer functions */ static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf, - char *loop_buf, int size, int real_block) + char *loop_buf, int size, int real_block) { if (cmd == READ) memcpy(loop_buf, raw_buf, size); else memcpy(raw_buf, loop_buf, size); + return 0; } static int transfer_xor(struct loop_device *lo, int cmd, char *raw_buf, - char *loop_buf, int size, int real_block) + char *loop_buf, int size, int real_block) { char *in, *out, *key; int i, keysize; @@ -111,17 +107,18 @@ in = loop_buf; out = raw_buf; } + key = lo->lo_encrypt_key; keysize = lo->lo_encrypt_key_size; - for (i=0; i < size; i++) + for (i = 0; i < size; i++) *out++ = *in++ ^ key[(i & 511) % keysize]; return 0; } static int none_status(struct loop_device *lo, struct loop_info *info) { - return 0; -} + return 0; +} static int xor_status(struct loop_device *lo, struct loop_info *info) { @@ -133,7 +130,7 @@ struct loop_func_table none_funcs = { number: LO_CRYPT_NONE, transfer: transfer_none, - init: none_status + init: none_status, }; struct loop_func_table xor_funcs = { @@ -150,39 +147,41 @@ #define MAX_DISK_SIZE 1024*1024*1024 -static void figure_loop_size(struct loop_device *lo) +static int compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry, kdev_t lodev) { - int size; - - if (S_ISREG(lo->lo_dentry->d_inode->i_mode)) - size = (lo->lo_dentry->d_inode->i_size - lo->lo_offset) >> BLOCK_SIZE_BITS; - else { - kdev_t lodev = lo->lo_device; - if (blk_size[MAJOR(lodev)]) - size = blk_size[MAJOR(lodev)][MINOR(lodev)] - + if (S_ISREG(lo_dentry->d_inode->i_mode)) + return (lo_dentry->d_inode->i_size - lo->lo_offset) >> BLOCK_SIZE_BITS; + if (blk_size[MAJOR(lodev)]) + return blk_size[MAJOR(lodev)][MINOR(lodev)] - (lo->lo_offset >> BLOCK_SIZE_BITS); - else - size = MAX_DISK_SIZE; - } + return MAX_DISK_SIZE; +} - loop_sizes[lo->lo_number] = size; +static void figure_loop_size(struct loop_device *lo) +{ + loop_sizes[lo->lo_number] = compute_loop_size(lo, + lo->lo_backing_file->f_dentry, + lo->lo_device); } -static int lo_send(struct loop_device *lo, char *data, int len, loff_t pos, - int blksize) +static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize, + loff_t pos) { struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ - struct address_space *mapping = lo->lo_dentry->d_inode->i_mapping; + struct address_space *mapping = file->f_dentry->d_inode->i_mapping; struct address_space_operations *aops = mapping->a_ops; struct page *page; - char *kaddr; + char *kaddr, *data; unsigned long index; unsigned size, offset; + int len; index = pos >> PAGE_CACHE_SHIFT; offset = pos & (PAGE_CACHE_SIZE - 1); + len = bh->b_size; + data = bh->b_data; while (len > 0) { - int IV = index * (PAGE_CACHE_SIZE/blksize) + offset/blksize; + int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize; size = PAGE_CACHE_SIZE - offset; if (size > len) size = len; @@ -193,7 +192,8 @@ if (aops->prepare_write(file, page, offset, offset+size)) goto unlock; kaddr = page_address(page); - if ((lo->transfer)(lo, WRITE, kaddr+offset, data, size, IV)) + flush_dcache_page(page); + if (lo_do_transfer(lo, WRITE, kaddr + offset, data, size, IV)) goto write_fail; if (aops->commit_write(file, page, offset, offset+size)) goto unlock; @@ -203,6 +203,7 @@ index++; pos += size; UnlockPage(page); + deactivate_page(page); page_cache_release(page); } return 0; @@ -213,6 +214,7 @@ kunmap(page); unlock: UnlockPage(page); + deactivate_page(page); page_cache_release(page); fail: return -1; @@ -221,7 +223,7 @@ struct lo_read_data { struct loop_device *lo; char *data; - int blksize; + int bsize; }; static int lo_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) @@ -230,16 +232,15 @@ unsigned long count = desc->count; struct lo_read_data *p = (struct lo_read_data*)desc->buf; struct loop_device *lo = p->lo; - int IV = page->index * (PAGE_CACHE_SIZE/p->blksize) + offset/p->blksize; + int IV = page->index * (PAGE_CACHE_SIZE/p->bsize) + offset/p->bsize; if (size > count) size = count; kaddr = kmap(page); - if ((lo->transfer)(lo,READ,kaddr+offset,p->data,size,IV)) { + if (lo_do_transfer(lo, READ, kaddr + offset, p->data, size, IV)) { size = 0; - printk(KERN_ERR "loop: transfer error block %ld\n", - page->index); + printk(KERN_ERR "loop: transfer error block %ld\n",page->index); desc->error = -EINVAL; } kunmap(page); @@ -250,160 +251,347 @@ return size; } -static int lo_receive(struct loop_device *lo, char *data, int len, loff_t pos, - int blksize) +static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize, + loff_t pos) { - struct file *file = lo->lo_backing_file; struct lo_read_data cookie; read_descriptor_t desc; + struct file *file; cookie.lo = lo; - cookie.data = data; - cookie.blksize = blksize; + cookie.data = bh->b_data; + cookie.bsize = bsize; desc.written = 0; - desc.count = len; + desc.count = bh->b_size; desc.buf = (char*)&cookie; desc.error = 0; + spin_lock_irq(&lo->lo_lock); + file = lo->lo_backing_file; + spin_unlock_irq(&lo->lo_lock); do_generic_file_read(file, &pos, &desc, lo_read_actor); return desc.error; } -static void do_lo_request(request_queue_t * q) +static inline int loop_get_bs(struct loop_device *lo) { - int block, offset, len, blksize, size; - char *dest_addr; - struct loop_device *lo; - struct buffer_head *bh; - struct request *current_request; + int bs = 0; + + if (blksize_size[MAJOR(lo->lo_device)]) + bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)]; + if (!bs) + bs = BLOCK_SIZE; + + return bs; +} + +static inline unsigned long loop_get_iv(struct loop_device *lo, + unsigned long sector) +{ + int bs = loop_get_bs(lo); + unsigned long offset, IV; + + IV = sector / (bs >> 9) + lo->lo_offset / bs; + offset = ((sector % (bs >> 9)) << 9) + lo->lo_offset % bs; + if (offset >= bs) + IV++; + + return IV; +} + +static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw) +{ loff_t pos; + int ret; -repeat: - INIT_REQUEST; - current_request=CURRENT; - blkdev_dequeue_request(current_request); - if (MINOR(current_request->rq_dev) >= max_loop) - goto error_out; - lo = &loop_dev[MINOR(current_request->rq_dev)]; - if (!lo->lo_dentry || !lo->transfer) - goto error_out; - if (current_request->cmd == WRITE) { + pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset; + + if (rw == WRITE) + ret = lo_send(lo, bh, loop_get_bs(lo), pos); + else + ret = lo_receive(lo, bh, loop_get_bs(lo), pos); + + return ret; +} + +static void loop_put_buffer(struct buffer_head *bh) +{ + if (bh) { + kunmap(bh->b_page); + __free_page(bh->b_page); + kmem_cache_free(bh_cachep, bh); + } +} + +/* + * Add buffer_head to back of pending list + */ +static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh) +{ + unsigned long flags; + + spin_lock_irqsave(&lo->lo_lock, flags); + if (lo->lo_bhtail) { + lo->lo_bhtail->b_reqnext = bh; + lo->lo_bhtail = bh; + } else + lo->lo_bh = lo->lo_bhtail = bh; + spin_unlock_irqrestore(&lo->lo_lock, flags); + + up(&lo->lo_bh_mutex); +} + +/* + * Grab first pending buffer + */ +static struct buffer_head *loop_get_bh(struct loop_device *lo) +{ + struct buffer_head *bh; + + spin_lock_irq(&lo->lo_lock); + if ((bh = lo->lo_bh)) { + if (bh == lo->lo_bhtail) + lo->lo_bhtail = NULL; + lo->lo_bh = bh->b_reqnext; + bh->b_reqnext = NULL; + } + spin_unlock_irq(&lo->lo_lock); + + return bh; +} + +/* + * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE + * and lo->transfer stuff has already been done. if not, it was a READ + * so queue it for the loop thread and let it do the transfer out of + * b_end_io context (we don't want to do decrypt of a page with irqs + * disabled) + */ +static void loop_end_io_transfer(struct buffer_head *bh, int uptodate) +{ + struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)]; + + if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) { + struct buffer_head *rbh = bh->b_private; + + rbh->b_end_io(rbh, uptodate); + if (atomic_dec_and_test(&lo->lo_pending)) + up(&lo->lo_bh_mutex); + loop_put_buffer(bh); + } else + loop_add_bh(lo, bh); +} + +static struct buffer_head *loop_get_buffer(struct loop_device *lo, + struct buffer_head *rbh) +{ + struct buffer_head *bh; + + do { + bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER); + if (bh) + break; + + run_task_queue(&tq_disk); + schedule_timeout(HZ); + } while (1); + memset(bh, 0, sizeof(*bh)); + + bh->b_size = rbh->b_size; + bh->b_dev = rbh->b_rdev; + spin_lock_irq(&lo->lo_lock); + bh->b_rdev = lo->lo_device; + spin_unlock_irq(&lo->lo_lock); + bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock); + + /* + * easy way out, although it does waste some memory for < PAGE_SIZE + * blocks... if highmem bounce buffering can get away with it, + * so can we :-) + */ + bh->b_page = alloc_page(GFP_BUFFER); + bh->b_data = kmap(bh->b_page); + + bh->b_end_io = loop_end_io_transfer; + bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9); + init_waitqueue_head(&bh->b_wait); + + return bh; +} + +static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh, + struct kiobuf *kio, kdev_t dev, unsigned int block, + unsigned int bsize) +{ + struct buffer_head *bh = NULL; + struct loop_device *lo; + unsigned long IV; + + if (!buffer_locked(rbh)) + BUG(); + + if (MINOR(rbh->b_rdev) >= max_loop) + goto out; + + lo = &loop_dev[MINOR(rbh->b_rdev)]; + spin_lock_irq(&lo->lo_lock); + if (lo->lo_state != Lo_bound) + goto inactive; + atomic_inc(&lo->lo_pending); + spin_unlock_irq(&lo->lo_lock); + + if (rw == WRITE) { if (lo->lo_flags & LO_FLAGS_READ_ONLY) - goto error_out; - } else if (current_request->cmd != READ) { - printk(KERN_ERR "unknown loop device command (%d)?!?", - current_request->cmd); - goto error_out; + goto err; + } else if (rw == READA) { + rw = READ; + } else if (rw != READ) { + printk(KERN_ERR "loop: unknown command (%d)\n", rw); + goto err; } - dest_addr = current_request->buffer; - len = current_request->current_nr_sectors << 9; +#if CONFIG_HIGHMEM + rbh = create_bounce(rw, rbh); +#endif - blksize = BLOCK_SIZE; - if (blksize_size[MAJOR(lo->lo_device)]) { - blksize = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)]; - if (!blksize) - blksize = BLOCK_SIZE; + /* + * file backed, queue for loop_thread to handle + */ + if (lo->lo_flags & LO_FLAGS_DO_BMAP) { + if (rw == WRITE) + set_bit(BH_Dirty, &rbh->b_state); + loop_add_bh(lo, rbh); + return 0; } - if (lo->lo_flags & LO_FLAGS_DO_BMAP) - goto file_backed; + /* + * piggy old buffer on original, and submit for I/O + */ + bh = loop_get_buffer(lo, rbh); + bh->b_private = rbh; + IV = loop_get_iv(lo, bh->b_rsector); + if (rw == WRITE) { + set_bit(BH_Dirty, &bh->b_state); + if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data, bh->b_size, IV)) + goto err; + } - if (blksize < 512) { - block = current_request->sector * (512/blksize); - offset = 0; + generic_make_request(rw, bh, NULL, 0, 0, 0); + return 0; + +err: + if (atomic_dec_and_test(&lo->lo_pending)) + up(&lo->lo_bh_mutex); + loop_put_buffer(bh); +out: + buffer_IO_error(rbh); + return 0; +inactive: + spin_unlock_irq(&lo->lo_lock); + goto out; +} + +static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh) +{ + int ret; + + /* + * For block backed loop, we know this is a READ + */ + if (lo->lo_flags & LO_FLAGS_DO_BMAP) { + int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state); + + ret = do_bh_filebacked(lo, bh, rw); + bh->b_end_io(bh, !ret); } else { - block = current_request->sector / (blksize >> 9); - offset = (current_request->sector % (blksize >> 9)) << 9; - } - block += lo->lo_offset / blksize; - offset += lo->lo_offset % blksize; - if (offset >= blksize) { - block++; - offset -= blksize; + struct buffer_head *rbh = bh->b_private; + unsigned long IV = loop_get_iv(lo, rbh->b_rsector); + + ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data, + bh->b_size, IV); + + rbh->b_end_io(rbh, !ret); + loop_put_buffer(bh); } - spin_unlock_irq(&io_request_lock); +} - while (len > 0) { +/* + * worker thread that handles reads/writes to file backed loop devices, + * to avoid blocking in our make_request_fn. it also does loop decrypting + * on reads for block backed loop, as that is too heavy to do from + * b_end_io context where irqs may be disabled. + */ +static int loop_thread(void *data) +{ + struct loop_device *lo = data; + struct buffer_head *bh; - size = blksize - offset; - if (size > len) - size = len; + daemonize(); + exit_files(current); - bh = getblk(lo->lo_device, block, blksize); - if (!bh) { - printk(KERN_ERR "loop: device %s: getblk(-, %d, %d) returned NULL", - kdevname(lo->lo_device), - block, blksize); - goto error_out_lock; - } - if (!buffer_uptodate(bh) && ((current_request->cmd == READ) || - (offset || (len < blksize)))) { - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - brelse(bh); - goto error_out_lock; - } - } + sprintf(current->comm, "loop%d", lo->lo_number); - if ((lo->transfer)(lo, current_request->cmd, - bh->b_data + offset, - dest_addr, size, block)) { - printk(KERN_ERR "loop: transfer error block %d\n", - block); - brelse(bh); - goto error_out_lock; - } + spin_lock_irq(¤t->sigmask_lock); + sigfillset(¤t->blocked); + flush_signals(current); + spin_unlock_irq(¤t->sigmask_lock); + + current->policy = SCHED_OTHER; + current->nice = -20; + + spin_lock_irq(&lo->lo_lock); + lo->lo_state = Lo_bound; + atomic_inc(&lo->lo_pending); + spin_unlock_irq(&lo->lo_lock); + + /* + * up sem, we are running + */ + up(&lo->lo_sem); - if (current_request->cmd == WRITE) { - mark_buffer_uptodate(bh, 1); - mark_buffer_dirty(bh); + for (;;) { + down_interruptible(&lo->lo_bh_mutex); + /* + * could be upped because of tear-down, not because of + * pending work + */ + if (!atomic_read(&lo->lo_pending)) + break; + + bh = loop_get_bh(lo); + if (!bh) { + printk("loop: missing bh\n"); + continue; } - brelse(bh); - dest_addr += size; - len -= size; - offset = 0; - block++; - } - goto done; + loop_handle_bh(lo, bh); -file_backed: - pos = ((loff_t)current_request->sector << 9) + lo->lo_offset; - spin_unlock_irq(&io_request_lock); - if (current_request->cmd == WRITE) { - if (lo_send(lo, dest_addr, len, pos, blksize)) - goto error_out_lock; - } else { - if (lo_receive(lo, dest_addr, len, pos, blksize)) - goto error_out_lock; + /* + * upped both for pending work and tear-down, lo_pending + * will hit zero then + */ + if (atomic_dec_and_test(&lo->lo_pending)) + break; } -done: - spin_lock_irq(&io_request_lock); - current_request->sector += current_request->current_nr_sectors; - current_request->nr_sectors -= current_request->current_nr_sectors; - list_add(¤t_request->queue, &q->queue_head); - end_request(1); - goto repeat; -error_out_lock: - spin_lock_irq(&io_request_lock); -error_out: - list_add(¤t_request->queue, &q->queue_head); - end_request(0); - goto repeat; + + up(&lo->lo_sem); + return 0; } -static int loop_set_fd(struct loop_device *lo, kdev_t dev, unsigned int arg) +static int loop_set_fd(struct loop_device *lo, struct file *lo_file, kdev_t dev, + unsigned int arg) { struct file *file; struct inode *inode; - int error; + kdev_t lo_device; + int lo_flags = 0; + int error; + int bs; MOD_INC_USE_COUNT; error = -EBUSY; - if (lo->lo_dentry) + if (lo->lo_state != Lo_unbound) goto out; - + error = -EBADF; file = fget(arg); if (!file) @@ -412,24 +600,13 @@ error = -EINVAL; inode = file->f_dentry->d_inode; - if (S_ISBLK(inode->i_mode)) { - /* dentry will be wired, so... */ - error = blkdev_get(inode->i_bdev, file->f_mode, - file->f_flags, BDEV_FILE); - - lo->lo_device = inode->i_rdev; - lo->lo_flags = 0; - - /* Backed by a block device - don't need to hold onto - a file structure */ - lo->lo_backing_file = NULL; + if (!(file->f_mode & FMODE_WRITE)) + lo_flags |= LO_FLAGS_READ_ONLY; - if (error) - goto out_putf; + if (S_ISBLK(inode->i_mode)) { + lo_device = inode->i_rdev; } else if (S_ISREG(inode->i_mode)) { - struct address_space_operations *aops; - - aops = inode->i_mapping->a_ops; + struct address_space_operations *aops = inode->i_mapping->a_ops; /* * If we can't read - sorry. If we only can't write - well, * it's going to be read-only. @@ -439,57 +616,50 @@ goto out_putf; if (!aops->prepare_write || !aops->commit_write) - lo->lo_flags |= LO_FLAGS_READ_ONLY; + lo_flags |= LO_FLAGS_READ_ONLY; - error = get_write_access(inode); - if (error) - goto out_putf; - - /* Backed by a regular file - we need to hold onto a file - structure for this file. Friggin' NFS can't live without - it on write and for reading we use do_generic_file_read(), - so... We create a new file structure based on the one - passed to us via 'arg'. This is to avoid changing the file - structure that the caller is using */ - - lo->lo_device = inode->i_dev; - lo->lo_flags |= LO_FLAGS_DO_BMAP; - - error = -ENFILE; - lo->lo_backing_file = get_empty_filp(); - if (lo->lo_backing_file == NULL) { - put_write_access(inode); - goto out_putf; - } - - lo->lo_backing_file->f_mode = file->f_mode; - lo->lo_backing_file->f_pos = file->f_pos; - lo->lo_backing_file->f_flags = file->f_flags; - lo->lo_backing_file->f_owner = file->f_owner; - lo->lo_backing_file->f_dentry = file->f_dentry; - lo->lo_backing_file->f_vfsmnt = mntget(file->f_vfsmnt); - lo->lo_backing_file->f_op = fops_get(file->f_op); - lo->lo_backing_file->private_data = file->private_data; - file_moveto(lo->lo_backing_file, file); - + lo_device = inode->i_dev; + lo_flags |= LO_FLAGS_DO_BMAP; error = 0; - } + } else + goto out_putf; + + get_file(file); - if (IS_RDONLY (inode) || is_read_only(lo->lo_device)) - lo->lo_flags |= LO_FLAGS_READ_ONLY; + if (IS_RDONLY (inode) || is_read_only(lo_device) + || !(lo_file->f_mode & FMODE_WRITE)) + lo_flags |= LO_FLAGS_READ_ONLY; - set_device_ro(dev, (lo->lo_flags & LO_FLAGS_READ_ONLY)!=0); + set_device_ro(dev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); - lo->lo_dentry = dget(file->f_dentry); + lo->lo_device = lo_device; + lo->lo_flags = lo_flags; + lo->lo_backing_file = file; lo->transfer = NULL; lo->ioctl = NULL; figure_loop_size(lo); + lo->old_gfp_mask = inode->i_mapping->gfp_mask; + inode->i_mapping->gfp_mask = GFP_BUFFER; + + bs = 0; + if (blksize_size[MAJOR(inode->i_rdev)]) + bs = blksize_size[MAJOR(inode->i_rdev)][MINOR(inode->i_rdev)]; + if (!bs) + bs = BLOCK_SIZE; + + set_blocksize(dev, bs); + + lo->lo_bh = lo->lo_bhtail = NULL; + kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); + down(&lo->lo_sem); + + fput(file); + return 0; out_putf: fput(file); out: - if (error) - MOD_DEC_USE_COUNT; + MOD_DEC_USE_COUNT; return error; } @@ -525,27 +695,25 @@ static int loop_clr_fd(struct loop_device *lo, kdev_t dev) { - struct dentry *dentry = lo->lo_dentry; + struct file *filp = lo->lo_backing_file; + int gfp = lo->old_gfp_mask; - if (!dentry) + if (lo->lo_state != Lo_bound) return -ENXIO; if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */ return -EBUSY; + if (filp==NULL) + return -EINVAL; - if (S_ISBLK(dentry->d_inode->i_mode)) - blkdev_put(dentry->d_inode->i_bdev, BDEV_FILE); + spin_lock_irq(&lo->lo_lock); + lo->lo_state = Lo_rundown; + if (atomic_dec_and_test(&lo->lo_pending)) + up(&lo->lo_bh_mutex); + spin_unlock_irq(&lo->lo_lock); - lo->lo_dentry = NULL; + down(&lo->lo_sem); - if (lo->lo_backing_file != NULL) { - struct file *filp = lo->lo_backing_file; - if ((filp->f_mode & FMODE_WRITE) == 0) - put_write_access(filp->f_dentry->d_inode); - fput(filp); - lo->lo_backing_file = NULL; - } else { - dput(dentry); - } + lo->lo_backing_file = NULL; loop_release_xfer(lo); lo->transfer = NULL; @@ -554,10 +722,14 @@ lo->lo_encrypt_type = 0; lo->lo_offset = 0; lo->lo_encrypt_key_size = 0; + lo->lo_flags = 0; memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); memset(lo->lo_name, 0, LO_NAME_SIZE); loop_sizes[lo->lo_number] = 0; invalidate_buffers(dev); + filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp; + lo->lo_state = Lo_unbound; + fput(filp); MOD_DEC_USE_COUNT; return 0; } @@ -571,7 +743,7 @@ if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid && !capable(CAP_SYS_ADMIN)) return -EPERM; - if (!lo->lo_dentry) + if (lo->lo_state != Lo_bound) return -ENXIO; if (copy_from_user(&info, arg, sizeof (struct loop_info))) return -EFAULT; @@ -608,15 +780,16 @@ static int loop_get_status(struct loop_device *lo, struct loop_info *arg) { struct loop_info info; + struct file *file = lo->lo_backing_file; - if (!lo->lo_dentry) + if (lo->lo_state != Lo_bound) return -ENXIO; if (!arg) return -EINVAL; memset(&info, 0, sizeof(info)); info.lo_number = lo->lo_number; - info.lo_device = kdev_t_to_nr(lo->lo_dentry->d_inode->i_dev); - info.lo_inode = lo->lo_dentry->d_inode->i_ino; + info.lo_device = kdev_t_to_nr(file->f_dentry->d_inode->i_dev); + info.lo_inode = file->f_dentry->d_inode->i_ino; info.lo_rdevice = kdev_t_to_nr(lo->lo_device); info.lo_offset = lo->lo_offset; info.lo_flags = lo->lo_flags; @@ -634,7 +807,7 @@ unsigned int cmd, unsigned long arg) { struct loop_device *lo; - int dev; + int dev, err; if (!inode) return -EINVAL; @@ -647,25 +820,36 @@ if (dev >= max_loop) return -ENODEV; lo = &loop_dev[dev]; + down(&lo->lo_ctl_mutex); switch (cmd) { case LOOP_SET_FD: - return loop_set_fd(lo, inode->i_rdev, arg); + err = loop_set_fd(lo, file, inode->i_rdev, arg); + break; case LOOP_CLR_FD: - return loop_clr_fd(lo, inode->i_rdev); + err = loop_clr_fd(lo, inode->i_rdev); + break; case LOOP_SET_STATUS: - return loop_set_status(lo, (struct loop_info *) arg); + err = loop_set_status(lo, (struct loop_info *) arg); + break; case LOOP_GET_STATUS: - return loop_get_status(lo, (struct loop_info *) arg); - case BLKGETSIZE: /* Return device size */ - if (!lo->lo_dentry) - return -ENXIO; - if (!arg) - return -EINVAL; - return put_user(loop_sizes[lo->lo_number] << 1, (long *) arg); + err = loop_get_status(lo, (struct loop_info *) arg); + break; + case BLKGETSIZE: + if (lo->lo_state != Lo_bound) { + err = -ENXIO; + break; + } + if (!arg) { + err = -EINVAL; + break; + } + err = put_user(loop_sizes[lo->lo_number] << 1, (long *) arg); + break; default: - return lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; + err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; } - return 0; + up(&lo->lo_ctl_mutex); + return err; } static int lo_open(struct inode *inode, struct file *file) @@ -673,7 +857,6 @@ struct loop_device *lo; int dev, type; - if (!inode) return -EINVAL; if (MAJOR(inode->i_rdev) != MAJOR_NR) { @@ -681,23 +864,25 @@ return -ENODEV; } dev = MINOR(inode->i_rdev); - if (dev >= max_loop) { + if (dev >= max_loop) return -ENODEV; - } + lo = &loop_dev[dev]; + MOD_INC_USE_COUNT; + down(&lo->lo_ctl_mutex); type = lo->lo_encrypt_type; if (type && xfer_funcs[type] && xfer_funcs[type]->lock) xfer_funcs[type]->lock(lo); lo->lo_refcnt++; - MOD_INC_USE_COUNT; + up(&lo->lo_ctl_mutex); return 0; } static int lo_release(struct inode *inode, struct file *file) { struct loop_device *lo; - int dev; + int dev, type; if (!inode) return 0; @@ -709,17 +894,16 @@ dev = MINOR(inode->i_rdev); if (dev >= max_loop) return 0; + lo = &loop_dev[dev]; - if (lo->lo_refcnt <= 0) - printk(KERN_ERR "lo_release: refcount(%d) <= 0\n", - lo->lo_refcnt); - else { - int type = lo->lo_encrypt_type; - --lo->lo_refcnt; - if (xfer_funcs[type] && xfer_funcs[type]->unlock) - xfer_funcs[type]->unlock(lo); - MOD_DEC_USE_COUNT; - } + down(&lo->lo_ctl_mutex); + type = lo->lo_encrypt_type; + --lo->lo_refcnt; + if (xfer_funcs[type] && xfer_funcs[type]->unlock) + xfer_funcs[type]->unlock(lo); + + up(&lo->lo_ctl_mutex); + MOD_DEC_USE_COUNT; return 0; } @@ -732,11 +916,8 @@ /* * And now the modules code and kernel interface. */ -#ifdef MODULE -#define loop_init init_module MODULE_PARM(max_loop, "i"); MODULE_PARM_DESC(max_loop, "Maximum number of loop devices (1-255)"); -#endif int loop_register_transfer(struct loop_func_table *funcs) { @@ -767,88 +948,88 @@ EXPORT_SYMBOL(loop_register_transfer); EXPORT_SYMBOL(loop_unregister_transfer); -static void no_plug_device(request_queue_t *q, kdev_t device) -{ -} - int __init loop_init(void) { int i; - if (devfs_register_blkdev(MAJOR_NR, "loop", &lo_fops)) { - printk(KERN_WARNING "Unable to get major number %d for loop device\n", - MAJOR_NR); - return -EIO; - } - devfs_handle = devfs_mk_dir (NULL, "loop", NULL); - devfs_register_series (devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT, - MAJOR_NR, 0, - S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, - &lo_fops, NULL); - if ((max_loop < 1) || (max_loop > 255)) { - printk (KERN_WARNING "loop: invalid max_loop (must be between 1 and 255), using default (8)\n"); + printk(KERN_WARNING "loop: invalid max_loop (must be between" + " 1 and 255), using default (8)\n"); max_loop = 8; } - - printk(KERN_INFO "loop: enabling %d loop devices\n", max_loop); - loop_dev = kmalloc (max_loop * sizeof(struct loop_device), GFP_KERNEL); - if (!loop_dev) { - printk (KERN_ERR "loop: Unable to create loop_dev\n"); - return -ENOMEM; + if (devfs_register_blkdev(MAJOR_NR, "loop", &lo_fops)) { + printk(KERN_WARNING "Unable to get major number %d for loop" + " device\n", MAJOR_NR); + return -EIO; } - loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); - if (!loop_sizes) { - printk (KERN_ERR "loop: Unable to create loop_sizes\n"); - kfree (loop_dev); - return -ENOMEM; - } + devfs_handle = devfs_mk_dir(NULL, "loop", NULL); + devfs_register_series(devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT, + MAJOR_NR, 0, + S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, + &lo_fops, NULL); - loop_blksizes = kmalloc (max_loop * sizeof(int), GFP_KERNEL); - if (!loop_blksizes) { - printk (KERN_ERR "loop: Unable to create loop_blksizes\n"); - kfree (loop_dev); - kfree (loop_sizes); + loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL); + if (!loop_dev) return -ENOMEM; - } + + loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); + if (!loop_sizes) + goto out_sizes; - blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); - blk_queue_pluggable(BLK_DEFAULT_QUEUE(MAJOR_NR), no_plug_device); - blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); - for (i=0; i < max_loop; i++) { - memset(&loop_dev[i], 0, sizeof(struct loop_device)); - loop_dev[i].lo_number = i; + loop_blksizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL); + if (!loop_blksizes) + goto out_blksizes; + + blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request); + + for (i = 0; i < max_loop; i++) { + struct loop_device *lo = &loop_dev[i]; + memset(lo, 0, sizeof(struct loop_device)); + init_MUTEX(&lo->lo_ctl_mutex); + init_MUTEX_LOCKED(&lo->lo_sem); + init_MUTEX_LOCKED(&lo->lo_bh_mutex); + lo->lo_number = i; + spin_lock_init(&lo->lo_lock); } + memset(loop_sizes, 0, max_loop * sizeof(int)); memset(loop_blksizes, 0, max_loop * sizeof(int)); blk_size[MAJOR_NR] = loop_sizes; blksize_size[MAJOR_NR] = loop_blksizes; - for (i=0; i < max_loop; i++) - register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &lo_fops, 0); + for (i = 0; i < max_loop; i++) + register_disk(NULL, MKDEV(MAJOR_NR, i), 1, &lo_fops, 0); + printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop); return 0; + +out_sizes: + kfree(loop_dev); +out_blksizes: + kfree(loop_sizes); + printk(KERN_ERR "loop: ran out of memory\n"); + return -ENOMEM; } -#ifdef MODULE -void cleanup_module(void) +void loop_exit(void) { - devfs_unregister (devfs_handle); - if (devfs_unregister_blkdev(MAJOR_NR, "loop") != 0) + devfs_unregister(devfs_handle); + if (devfs_unregister_blkdev(MAJOR_NR, "loop")) printk(KERN_WARNING "loop: cannot unregister blkdev\n"); - blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR)); - kfree (loop_dev); - kfree (loop_sizes); - kfree (loop_blksizes); + kfree(loop_dev); + kfree(loop_sizes); + kfree(loop_blksizes); } -#endif + +module_init(loop_init); +module_exit(loop_exit); #ifndef MODULE static int __init max_loop_setup(char *str) { - max_loop = simple_strtol(str,NULL,0); + max_loop = simple_strtol(str, NULL, 0); return 1; } Index: fs/Makefile =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/fs/Makefile,v retrieving revision 1.27 diff -u -r1.27 Makefile --- fs/Makefile 2001/02/01 17:10:24 1.27 +++ fs/Makefile 2001/03/06 15:21:47 @@ -7,7 +7,7 @@ O_TARGET := fs.o -export-objs := filesystems.o +export-objs := filesystems.o dcache.o mod-subdirs := nls obj-y := open.o read_write.o devices.o file_table.o buffer.o \ Index: fs/inode.c =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/fs/inode.c,v retrieving revision 1.37 diff -u -r1.37 inode.c --- fs/inode.c 2001/02/22 21:09:04 1.37 +++ fs/inode.c 2001/03/06 15:21:48 @@ -613,6 +613,7 @@ inode->i_bdev = NULL; inode->i_data.a_ops = &empty_aops; inode->i_data.host = inode; + inode->i_data.gfp_mask = GFP_HIGHUSER; inode->i_mapping = &inode->i_data; } Index: fs/nfs/dir.c =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/fs/nfs/dir.c,v retrieving revision 1.25 diff -u -r1.25 dir.c --- fs/nfs/dir.c 2001/02/22 21:09:04 1.25 +++ fs/nfs/dir.c 2001/03/06 15:21:49 @@ -321,7 +321,7 @@ desc->page = NULL; } - page = page_cache_alloc(); + page = alloc_page(GFP_HIGHUSER); if (!page) { status = -ENOMEM; goto out; Index: include/linux/fs.h =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/include/linux/fs.h,v retrieving revision 1.82 diff -u -r1.82 fs.h --- include/linux/fs.h 2001/02/28 03:12:02 1.82 +++ include/linux/fs.h 2001/03/06 15:21:49 @@ -396,6 +396,7 @@ struct vm_area_struct *i_mmap; /* list of private mappings */ struct vm_area_struct *i_mmap_shared; /* list of shared mappings */ spinlock_t i_shared_lock; /* and spinlock protecting it */ + int gfp_mask; /* how to allocate the pages */ }; struct block_device { Index: include/linux/loop.h =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/include/linux/loop.h,v retrieving revision 1.2 diff -u -r1.2 loop.h --- include/linux/loop.h 1999/06/25 17:32:48 1.2 +++ include/linux/loop.h 2001/03/06 15:21:49 @@ -9,17 +9,23 @@ * Written by Theodore Ts'o, 3/29/93. * * Copyright 1993 by Theodore Ts'o. Redistribution of this file is - * permitted under the GNU Public License. + * permitted under the GNU General Public License. */ #define LO_NAME_SIZE 64 #define LO_KEY_SIZE 32 #ifdef __KERNEL__ - + +/* Possible states of device */ +enum { + Lo_unbound, + Lo_bound, + Lo_rundown, +}; + struct loop_device { int lo_number; - struct dentry *lo_dentry; int lo_refcnt; kdev_t lo_device; int lo_offset; @@ -39,19 +45,38 @@ struct file * lo_backing_file; void *key_data; char key_reserved[48]; /* for use by the filter modules */ + + int old_gfp_mask; + + spinlock_t lo_lock; + struct buffer_head *lo_bh; + struct buffer_head *lo_bhtail; + int lo_state; + struct semaphore lo_sem; + struct semaphore lo_ctl_mutex; + struct semaphore lo_bh_mutex; + atomic_t lo_pending; }; typedef int (* transfer_proc_t)(struct loop_device *, int cmd, char *raw_buf, char *loop_buf, int size, int real_block); +extern inline int lo_do_transfer(struct loop_device *lo, int cmd, char *rbuf, + char *lbuf, int size, int rblock) +{ + if (!lo->transfer) + return 0; + + return lo->transfer(lo, cmd, rbuf, lbuf, size, rblock); +} #endif /* __KERNEL__ */ /* * Loop flags */ -#define LO_FLAGS_DO_BMAP 0x00000001 -#define LO_FLAGS_READ_ONLY 0x00000002 +#define LO_FLAGS_DO_BMAP 1 +#define LO_FLAGS_READ_ONLY 2 /* * Note that this structure gets the wrong offsets when directly used @@ -102,9 +127,8 @@ /* Support for loadable transfer modules */ struct loop_func_table { int number; /* filter type */ - int (*transfer)(struct loop_device *lo, int cmd, - char *raw_buf, char *loop_buf, int size, - int real_block); + int (*transfer)(struct loop_device *lo, int cmd, char *raw_buf, + char *loop_buf, int size, int real_block); int (*init)(struct loop_device *, struct loop_info *); /* release is called from loop_unregister_transfer or clr_fd */ int (*release)(struct loop_device *); Index: include/linux/pagemap.h =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/include/linux/pagemap.h,v retrieving revision 1.23 diff -u -r1.23 pagemap.h --- include/linux/pagemap.h 2001/03/01 03:17:37 1.23 +++ include/linux/pagemap.h 2001/03/06 15:21:49 @@ -29,9 +29,13 @@ #define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK) #define page_cache_get(x) get_page(x) -#define page_cache_alloc() alloc_pages(GFP_HIGHUSER, 0) #define page_cache_free(x) __free_page(x) #define page_cache_release(x) __free_page(x) + +static inline struct page *page_cache_alloc(struct address_space *x) +{ + return alloc_pages(x->gfp_mask, 0); +} /* * From a kernel address, get the "struct page *" Index: init/main.c =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/init/main.c,v retrieving revision 1.49 diff -u -r1.49 main.c --- init/main.c 2001/02/22 21:09:04 1.49 +++ init/main.c 2001/03/06 15:21:50 @@ -159,6 +159,7 @@ { "nfs", 0x00ff }, { "hda", 0x0300 }, { "hdb", 0x0340 }, + { "loop", 0x0700 }, { "hdc", 0x1600 }, { "hdd", 0x1640 }, { "hde", 0x2100 }, Index: kernel/ksyms.c =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/kernel/ksyms.c,v retrieving revision 1.81 diff -u -r1.81 ksyms.c --- kernel/ksyms.c 2001/03/05 18:53:08 1.81 +++ kernel/ksyms.c 2001/03/06 15:21:50 @@ -135,6 +135,7 @@ EXPORT_SYMBOL(kmap_high); EXPORT_SYMBOL(kunmap_high); EXPORT_SYMBOL(highmem_start_page); +EXPORT_SYMBOL(create_bounce); #endif /* filesystem internal functions */ Index: mm/filemap.c =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/mm/filemap.c,v retrieving revision 1.70 diff -u -r1.70 filemap.c --- mm/filemap.c 2001/03/01 03:17:37 1.70 +++ mm/filemap.c 2001/03/06 15:21:50 @@ -597,7 +597,7 @@ if (page) return 0; - page = page_cache_alloc(); + page = page_cache_alloc(mapping); if (!page) return -ENOMEM; @@ -1236,7 +1236,7 @@ #endif if (!cached_page) { spin_unlock(&pagecache_lock); - cached_page = page_cache_alloc(); + cached_page = page_cache_alloc(mapping); if (!cached_page) { desc->error = -ENOMEM; break; @@ -1536,7 +1536,7 @@ */ old_page = page; if (no_share) { - struct page *new_page = page_cache_alloc(); + struct page *new_page = alloc_page(GFP_HIGHUSER); if (new_page) { copy_user_highpage(new_page, old_page, address); @@ -2381,7 +2381,7 @@ page = __find_get_page(mapping, index, hash); if (!page) { if (!cached_page) { - cached_page = page_cache_alloc(); + cached_page = page_cache_alloc(mapping); if (!cached_page) return ERR_PTR(-ENOMEM); } @@ -2444,7 +2444,7 @@ page = __find_lock_page(mapping, index, hash); if (!page) { if (!*cached_page) { - *cached_page = page_cache_alloc(); + *cached_page = page_cache_alloc(mapping); if (!*cached_page) return NULL; } Index: mm/memory.c =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/mm/memory.c,v retrieving revision 1.45 diff -u -r1.45 memory.c --- mm/memory.c 2001/02/22 21:09:04 1.45 +++ mm/memory.c 2001/03/06 15:21:50 @@ -874,7 +874,7 @@ * Ok, we need to copy. Oh, well.. */ spin_unlock(&mm->page_table_lock); - new_page = page_cache_alloc(); + new_page = alloc_page(GFP_HIGHUSER); if (!new_page) return -1; spin_lock(&mm->page_table_lock); Index: mm/shmem.c =================================================================== RCS file: /cvs/linux-2.4-xfs/linux/mm/shmem.c,v retrieving revision 1.3 diff -u -r1.3 shmem.c --- mm/shmem.c 2001/02/01 17:10:24 1.3 +++ mm/shmem.c 2001/03/06 15:21:50 @@ -321,7 +321,7 @@ inode->i_sb->u.shmem_sb.free_blocks--; spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock); /* Ok, get a new page */ - page = page_cache_alloc(); + page = page_cache_alloc(mapping); if (!page) goto oom; clear_user_highpage(page, address); @@ -338,7 +338,7 @@ up(&inode->i_sem); if (no_share) { - struct page *new_page = page_cache_alloc(); + struct page *new_page = page_cache_alloc(inode->i_mapping); if (new_page) { copy_user_highpage(new_page, page, address);