Binary files rawioref/ID and rawio/ID differ diff -urN rawioref/drivers/char/Makefile rawio/drivers/char/Makefile --- rawioref/drivers/char/Makefile Fri Apr 20 22:31:16 2001 +++ rawio/drivers/char/Makefile Sat Apr 21 16:34:38 2001 @@ -20,7 +20,7 @@ O_TARGET := char.o M_OBJS := -O_OBJS := tty_io.o n_tty.o tty_ioctl.o mem.o random.o +O_OBJS := tty_io.o n_tty.o tty_ioctl.o mem.o random.o raw.o OX_OBJS := pty.o misc.o obj-y := obj-m := diff -urN rawioref/drivers/char/raw.c rawio/drivers/char/raw.c --- rawioref/drivers/char/raw.c Thu Jan 1 01:00:00 1970 +++ rawio/drivers/char/raw.c Sat Apr 21 18:52:22 2001 @@ -0,0 +1,438 @@ +/* + * linux/drivers/char/raw.c + * + * Front-end raw character devices. These can be bound to any block + * devices to provide genuine Unix raw character device semantics. + * + * We reserve minor number 0 for a control interface. ioctl()s on this + * device are used to bind the other minor numbers to block devices. + */ + +#include +#include +#include +#include +#include +#include + +#define dprintk(x...) + +typedef struct raw_device_data_s { + struct kiobuf * iobuf; + long iobuf_lock; + kdev_t binding; + int inuse, sector_size, sector_bits; + struct semaphore mutex; +} raw_device_data_t; + +static raw_device_data_t raw_devices[256]; + +extern struct file_operations * get_blkfops(unsigned int major); + +static ssize_t rw_raw_dev(int rw, struct file *, char *, size_t, loff_t *); + +ssize_t raw_read(struct file *, char *, size_t, loff_t *); +ssize_t raw_write(struct file *, const char *, size_t, loff_t *); +int raw_open(struct inode *, struct file *); +int raw_release(struct inode *, struct file *); +int raw_ctl_ioctl(struct inode *, struct file *, unsigned int, unsigned long); + + +static struct file_operations raw_fops = { + NULL, /* llseek */ + raw_read, /* read */ + raw_write, /* write */ + NULL, /* readdir */ + NULL, /* poll */ + NULL, /* ioctl */ + NULL, /* mmap */ + raw_open, /* open */ + NULL, /* flush */ + raw_release, /* release */ + NULL /* fsync */ +}; + +static struct file_operations raw_ctl_fops = { + NULL, /* llseek */ + NULL, /* read */ + NULL, /* write */ + NULL, /* readdir */ + NULL, /* poll */ + raw_ctl_ioctl, /* ioctl */ + NULL, /* mmap */ + raw_open, /* open */ + NULL, /* flush */ + NULL, /* no special release code */ + NULL /* fsync */ +}; + +static int __init raw_init(void) +{ + int i; + register_chrdev(RAW_MAJOR, "raw", &raw_fops); + + for (i = 0; i < 256; i++) { + init_MUTEX(&raw_devices[i].mutex); + raw_devices[i].binding = NODEV; + } + + return 0; +} + +__initcall(raw_init); + +/* + * The raw IO open and release code needs to fake appropriate + * open/release calls to the underlying block devices. + */ + +static int bdev_open(kdev_t dev, int mode) +{ + int err = 0; + struct file dummy_file = {}; + struct dentry dummy_dentry = {}; + struct inode * inode = get_empty_inode(); + + if (!inode) + return -ENOMEM; + + dummy_file.f_op = get_blkfops(MAJOR(dev)); + if (!dummy_file.f_op) { + err = -ENODEV; + goto done; + } + + if (dummy_file.f_op->open) { + inode->i_rdev = dev; + dummy_dentry.d_inode = inode; + dummy_file.f_dentry = &dummy_dentry; + dummy_file.f_mode = mode; + err = dummy_file.f_op->open(inode, &dummy_file); + } + + done: + iput(inode); + return err; +} + +static int bdev_close(kdev_t dev) +{ + int err; + struct inode * inode = get_empty_inode(); + + if (!inode) + return -ENOMEM; + + inode->i_rdev = dev; + err = blkdev_release(inode); + iput(inode); + return err; +} + + + +/* + * Open/close code for raw IO. + */ + +int raw_open(struct inode *inode, struct file *filp) +{ + int minor; + kdev_t bdev; + int err; + int sector_size; + int sector_bits; + + minor = MINOR(inode->i_rdev); + + /* + * Is it the control device? + */ + + if (minor == 0) { + filp->f_op = &raw_ctl_fops; + return 0; + } + + down(&raw_devices[minor].mutex); + /* + * No, it is a normal raw device. All we need to do on open is + * to check that the device is bound, and force the underlying + * block device to a sector-size blocksize. + */ + + bdev = raw_devices[minor].binding; + err = -ENODEV; + if (bdev == NODEV) + goto out; + + err = bdev_open(bdev, filp->f_mode); + if (err) + goto out; + + /* + * Don't change the blocksize if we already have users using + * this device + */ + + if (raw_devices[minor].inuse++) + goto out; + + err = alloc_kiovec(1, &raw_devices[minor].iobuf); + if (err) { + raw_devices[minor].inuse--; + up(&raw_devices[minor].mutex); + bdev_close(bdev); + return err; + } + + /* + * Don't interfere with mounted devices: we cannot safely set + * the blocksize on a device which is already mounted. + */ + + sector_size = 512; + if (lookup_vfsmnt(bdev) != NULL) { + if (blksize_size[MAJOR(bdev)]) + sector_size = blksize_size[MAJOR(bdev)][MINOR(bdev)]; + } else { + if (hardsect_size[MAJOR(bdev)]) + sector_size = hardsect_size[MAJOR(bdev)][MINOR(bdev)]; + } + + set_blocksize(bdev, sector_size); + raw_devices[minor].sector_size = sector_size; + + for (sector_bits = 0; !(sector_size & 1); ) + sector_size>>=1, sector_bits++; + raw_devices[minor].sector_bits = sector_bits; + + out: + up(&raw_devices[minor].mutex); + return err; +} + +int raw_release(struct inode *inode, struct file *filp) +{ + int minor; + kdev_t bdev; + + minor = MINOR(inode->i_rdev); + down(&raw_devices[minor].mutex); + bdev = raw_devices[minor].binding; + if (!--raw_devices[minor].inuse) + free_kiovec(1, &raw_devices[minor].iobuf); + up(&raw_devices[minor].mutex); + bdev_close(bdev); + return 0; +} + + + +/* + * Deal with ioctls against the raw-device control interface, to bind + * and unbind other raw devices. + */ + +int raw_ctl_ioctl(struct inode *inode, + struct file *flip, + unsigned int command, + unsigned long arg) +{ + struct raw_config_request rq; + int err = 0; + int minor; + + switch (command) { + case RAW_SETBIND: + case RAW_GETBIND: + + /* First, find out which raw minor we want */ + + err = copy_from_user(&rq, (void *) arg, sizeof(rq)); + if (err) + break; + + minor = rq.raw_minor; + if (minor == 0 || minor > MINORMASK) { + err = -EINVAL; + break; + } + + if (command == RAW_SETBIND) { + /* + * This is like making block devices, so demand the + * same capability + */ + if (!capable(CAP_SYS_ADMIN)) { + err = -EPERM; + break; + } + + /* + * For now, we don't need to check that the underlying + * block device is present or not: we can do that when + * the raw device is opened. Just check that the + * major/minor numbers make sense. + */ + + if ((rq.block_major == NODEV && + rq.block_minor != NODEV) || + rq.block_major > MAX_BLKDEV || + rq.block_minor > MINORMASK) { + err = -EINVAL; + break; + } + + down(&raw_devices[minor].mutex); + if (raw_devices[minor].inuse) { + up(&raw_devices[minor].mutex); + err = -EBUSY; + break; + } + raw_devices[minor].binding = + MKDEV(rq.block_major, rq.block_minor); + up(&raw_devices[minor].mutex); + } else { + rq.block_major = MAJOR(raw_devices[minor].binding); + rq.block_minor = MINOR(raw_devices[minor].binding); + err = copy_to_user((void *) arg, &rq, sizeof(rq)); + } + break; + + default: + err = -EINVAL; + } + + return err; +} + + + +ssize_t raw_read(struct file *filp, char * buf, + size_t size, loff_t *offp) +{ + return rw_raw_dev(READ, filp, buf, size, offp); +} + +ssize_t raw_write(struct file *filp, const char *buf, + size_t size, loff_t *offp) +{ + return rw_raw_dev(WRITE, filp, (char *) buf, size, offp); +} + +#define SECTOR_BITS 9 +#define SECTOR_SIZE (1U << SECTOR_BITS) +#define SECTOR_MASK (SECTOR_SIZE - 1) + +ssize_t rw_raw_dev(int rw, struct file *filp, char *buf, + size_t size, loff_t *offp) +{ + struct kiobuf * iobuf; + int new_iobuf; + int err = 0; + unsigned long blocknr, blocks; + size_t transferred; + int iosize; + int i; + int minor; + kdev_t dev; + unsigned long limit; + + int sector_size, sector_bits, sector_mask; + int max_sectors; + + /* + * First, a few checks on device size limits + */ + + minor = MINOR(filp->f_dentry->d_inode->i_rdev); + + new_iobuf = 0; + iobuf = raw_devices[minor].iobuf; + if (test_and_set_bit(0, &raw_devices[minor].iobuf_lock)) { + /* + * A parallel read/write is using the preallocated iobuf + * so just run slow and allocate a new one. + */ + err = alloc_kiovec(1, &iobuf); + if (err) + goto out; + new_iobuf = 1; + } + + dev = raw_devices[minor].binding; + sector_size = raw_devices[minor].sector_size; + sector_bits = raw_devices[minor].sector_bits; + sector_mask = sector_size- 1; + max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9); + + if (blk_size[MAJOR(dev)]) + limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits; + else + limit = INT_MAX; + dprintk ("rw_raw_dev: dev %d:%d (+%d)\n", + MAJOR(dev), MINOR(dev), limit); + + err = -EINVAL; + if ((*offp & sector_mask) || (size & sector_mask)) + goto out_free; + err = 0; + if (size) + err = -ENXIO; + if ((*offp >> sector_bits) >= limit) + goto out_free; + + /* + * Split the IO into KIO_MAX_SECTORS chunks, mapping and + * unmapping the single kiobuf as we go to perform each chunk of + * IO. + */ + + transferred = 0; + blocknr = *offp >> sector_bits; + while (size > 0) { + blocks = size >> sector_bits; + if (blocks > max_sectors) + blocks = max_sectors; + if (blocks > limit - blocknr) + blocks = limit - blocknr; + if (!blocks) + break; + + iosize = blocks << sector_bits; + + err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize); + if (err) + break; + + for (i=0; i < blocks; i++) + iobuf->blocks[i] = blocknr++; + + err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size); + + if (err >= 0) { + transferred += err; + size -= err; + buf += err; + } + + unmap_kiobuf(iobuf); + + if (err != iosize) + break; + } + + if (transferred) { + *offp += transferred; + err = transferred; + } + + out_free: + if (!new_iobuf) + clear_bit(0, &raw_devices[minor].iobuf_lock); + else + free_kiovec(1, &iobuf); + out: + return err; +} diff -urN rawioref/fs/Makefile rawio/fs/Makefile --- rawioref/fs/Makefile Thu Aug 26 14:20:19 1999 +++ rawio/fs/Makefile Sat Apr 21 16:34:38 2001 @@ -13,7 +13,7 @@ O_OBJS = open.o read_write.o devices.o file_table.o buffer.o \ super.o block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o fifo.o locks.o filesystems.o \ - dcache.o inode.o attr.o bad_inode.o file.o $(BINFMTS) + dcache.o inode.o attr.o bad_inode.o file.o iobuf.o $(BINFMTS) MOD_LIST_NAME := FS_MODULES ALL_SUB_DIRS = coda minix ext2 fat msdos vfat proc isofs nfs umsdos ntfs \ diff -urN rawioref/fs/buffer.c rawio/fs/buffer.c --- rawioref/fs/buffer.c Sat Apr 21 16:34:05 2001 +++ rawio/fs/buffer.c Sat Apr 21 18:48:11 2001 @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -1239,6 +1240,33 @@ wake_up(&buffer_wait); } +int alloc_kiobuf_bhs(struct kiobuf * kiobuf) +{ + int i; + + for (i = 0; i < KIO_MAX_SECTORS; i++) + if (!(kiobuf->bh[i] = get_unused_buffer_head(0))) { + while (i--) { + put_unused_buffer_head(kiobuf->bh[i]); + kiobuf->bh[i] = NULL; + } + wake_up(&buffer_wait); + return -ENOMEM; + } + return 0; +} + +void free_kiobuf_bhs(struct kiobuf * kiobuf) +{ + int i; + + for (i = 0; i < KIO_MAX_SECTORS; i++) { + put_unused_buffer_head(kiobuf->bh[i]); + kiobuf->bh[i] = NULL; + } + wake_up(&buffer_wait); +} + static void end_buffer_io_async(struct buffer_head * bh, int uptodate) { unsigned long flags; @@ -1299,6 +1327,215 @@ bad_count: printk ("Whoops: end_buffer_io_async: b_count != 1 on async io.\n"); return; +} + +/* + * IO completion routine for a buffer_head being used for kiobuf IO: we + * can't dispatch the kiobuf callback until io_count reaches 0. + */ + +static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate) +{ + struct kiobuf *kiobuf; + + mark_buffer_uptodate(bh, uptodate); + + kiobuf = bh->b_dev_id; + unlock_buffer(bh); + end_kio_request(kiobuf, uptodate); +} + +/* + * For brw_kiovec: submit a set of buffer_head temporary IOs and wait + * for them to complete. Clean up the buffer_heads afterwards. + */ + +static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size) +{ + int iosize, err; + int i; + struct buffer_head *tmp; + + iosize = 0; + err = 0; + + for (i = nr; --i >= 0; ) { + iosize += size; + tmp = bh[i]; + wait_on_buffer(tmp); + if (!buffer_uptodate(tmp)) { + /* We are traversing bh'es in reverse order so + clearing iosize on error calculates the + amount of IO before the first error. */ + iosize = 0; + err = -EIO; + } + } + + if (iosize) + return iosize; + return err; +} + +/* + * Clean up the bounce buffers potentially used by brw_kiovec. All of + * the kiovec's bounce buffers must be cleared of temporarily allocated + * bounce pages, but only READ pages for whom IO completed successfully + * can actually be transferred back to user space. + */ + +void cleanup_bounce_buffers(int rw, int nr, struct kiobuf *iovec[], + int transferred) +{ + int i; + for (i = 0; i < nr; i++) { + struct kiobuf *iobuf = iovec[i]; + if (iobuf->bounced) { + if (transferred > 0 && !(rw & WRITE)) + kiobuf_copy_bounce(iobuf, COPY_FROM_BOUNCE, + transferred); + + clear_kiobuf_bounce_pages(iobuf); + } + transferred -= iobuf->length; + } +} + +/* + * Start I/O on a physical range of kernel memory, defined by a vector + * of kiobuf structs (much like a user-space iovec list). + * + * IO is submitted asynchronously: you need to check page->locked, + * page->uptodate, and maybe wait on page->wait. + * + * It is up to the caller to make sure that there are enough blocks + * passed in to completely map the iobufs to disk. + */ + +int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], + kdev_t dev, unsigned long b[], int size) +{ + int err; + int length; + int transferred; + int i; + int bufind; + int pageind; + int bhind; + int offset; + unsigned long blocknr; + struct kiobuf * iobuf = NULL; + unsigned long page; + unsigned long bounce; + struct page * map; + struct buffer_head *tmp, **bhs = NULL; + + /* + * First, do some alignment and validity checks + */ + for (i = 0; i < nr; i++) { + iobuf = iovec[i]; + if ((iobuf->offset & (size-1)) || + (iobuf->length & (size-1))) + return -EINVAL; + if (!iobuf->nr_pages) + panic("brw_kiovec: iobuf not initialised"); + } + + /* DEBUG */ +#if 0 + return iobuf->length; +#endif + + /* + * OK to walk down the iovec doing page IO on each page we find. + */ + bufind = bhind = transferred = err = 0; + for (i = 0; i < nr; i++) { + iobuf = iovec[i]; + err = setup_kiobuf_bounce_pages(iobuf, GFP_USER); + if (err) + goto finished; + if (rw & WRITE) + kiobuf_copy_bounce(iobuf, COPY_TO_BOUNCE, -1); + + offset = iobuf->offset; + length = iobuf->length; + if (!bhs) + bhs = iobuf->bh; + + for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { + map = iobuf->maplist[pageind]; + bounce = iobuf->bouncelist[pageind]; + + if (bounce) + page = bounce; + else + page = iobuf->pagelist[pageind]; + + while (length > 0) { + blocknr = b[bufind++]; + tmp = bhs[bhind++]; + + tmp->b_dev = B_FREE; + tmp->b_size = size; + tmp->b_data = (char *) (page + offset); + tmp->b_this_page = tmp; + + init_buffer(tmp, dev, blocknr, + end_buffer_io_kiobuf, iobuf); + if (rw == WRITE) { + set_bit(BH_Uptodate, &tmp->b_state); + set_bit(BH_Dirty, &tmp->b_state); + } else + clear_bit(BH_Uptodate, &tmp->b_state); + + length -= size; + offset += size; + + atomic_inc(&iobuf->io_count); + + /* + * Start the IO if we have got too much or if + * this is the end of the last iobuf + */ + if (bhind >= KIO_MAX_SECTORS) { + ll_rw_block(rw, bhind, bhs); + kiobuf_wait_for_io(iobuf); + err = wait_kio(rw, bhind, bhs, size); + if (err >= 0) + transferred += err; + else + goto finished; + bhind = 0; + } + + if (offset >= PAGE_SIZE) { + offset = 0; + break; + } + } /* End of block loop */ + } /* End of page loop */ + } /* End of iovec loop */ + + /* Is there any IO still left to submit? */ + if (bhind) { + ll_rw_block(rw, bhind, bhs); + kiobuf_wait_for_io(iobuf); + err = wait_kio(rw, bhind, bhs, size); + if (err >= 0) + transferred += err; + else + goto finished; + } + + finished: + + cleanup_bounce_buffers(rw, nr, iovec, transferred); + + if (transferred) + return transferred; + return err; } /* diff -urN rawioref/fs/iobuf.c rawio/fs/iobuf.c --- rawioref/fs/iobuf.c Thu Jan 1 01:00:00 1970 +++ rawio/fs/iobuf.c Sat Apr 21 18:51:38 2001 @@ -0,0 +1,269 @@ +/* + * iobuf.c + * + * Keep track of the general-purpose IO-buffer structures used to track + * abstract kernel-space io buffers. + * + */ + +#include +#include +#include +#include +#include +#include + +void end_kio_request(struct kiobuf *kiobuf, int uptodate) +{ + if ((!uptodate) && !kiobuf->errno) + kiobuf->errno = -EIO; + + if (atomic_dec_and_test(&kiobuf->io_count)) { + if (kiobuf->end_io) + kiobuf->end_io(kiobuf); + wake_up(&kiobuf->wait_queue); + } +} + +int alloc_kiovec(int nr, struct kiobuf **bufp) +{ + int i; + struct kiobuf *iobuf; + + for (i = 0; i < nr; i++) { + lock_kernel(); + iobuf = vmalloc(sizeof(struct kiobuf)); + unlock_kernel(); + if (!iobuf) { + free_kiovec(i, bufp); + return -ENOMEM; + } + + memset(iobuf, 0, sizeof(*iobuf)); + iobuf->array_len = KIO_STATIC_PAGES; + iobuf->pagelist = iobuf->page_array; + iobuf->maplist = iobuf->map_array; + iobuf->bouncelist = iobuf->bounce_array; + init_waitqueue_head(&iobuf->wait_queue); + if (alloc_kiobuf_bhs(iobuf)) { + lock_kernel(); + vfree(iobuf); + unlock_kernel(); + free_kiovec(i, bufp); + return -ENOMEM; + } + *bufp++ = iobuf; + } + + return 0; +} + +void clear_kiobuf_bounce_pages(struct kiobuf *iobuf) +{ + int i; + + if (!iobuf->bounced) + return; + + for (i = 0; i < iobuf->nr_pages; i++) { + unsigned long page = iobuf->bouncelist[i]; + if (page) + free_page(page); + } + iobuf->bounced = 0; +} + +void free_kiovec(int nr, struct kiobuf **bufp) +{ + struct kiobuf *iobuf; + int i; + + for (i = 0; i < nr; i++) { + iobuf = bufp[i]; + clear_kiobuf_bounce_pages(iobuf); + if (iobuf->array_len > KIO_STATIC_PAGES) + kfree (iobuf->pagelist); + free_kiobuf_bhs(iobuf); + lock_kernel(); + vfree(bufp[i]); + unlock_kernel(); + } +} + +int expand_kiobuf(struct kiobuf *iobuf, int wanted) +{ + unsigned long * pagelist, * bouncelist; + struct page ** maplist; + + if (iobuf->array_len >= wanted) + return 0; + + /* + * kmalloc enough space for the page, map and bounce lists all + * at once. + */ + pagelist = (unsigned long *) + kmalloc(3 * wanted * sizeof(unsigned long), GFP_KERNEL); + if (!pagelist) + return -ENOMEM; + + /* Did it grow while we waited? */ + if (iobuf->array_len >= wanted) { + kfree(pagelist); + return 0; + } + + maplist = (struct page **) (pagelist + wanted); + bouncelist = pagelist + 2 * wanted; + + memcpy (pagelist, iobuf->pagelist, + iobuf->array_len * sizeof(unsigned long)); + memcpy (maplist, iobuf->maplist, + iobuf->array_len * sizeof(struct page **)); + memcpy (bouncelist, iobuf->bouncelist, + iobuf->array_len * sizeof(unsigned long)); + + if (iobuf->array_len > KIO_STATIC_PAGES) + kfree (iobuf->pagelist); + + iobuf->pagelist = pagelist; + iobuf->maplist = maplist; + iobuf->bouncelist = bouncelist; + iobuf->array_len = wanted; + return 0; +} + +void kiobuf_wait_for_io(struct kiobuf *kiobuf) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + if (atomic_read(&kiobuf->io_count) == 0) + return; + + add_wait_queue(&kiobuf->wait_queue, &wait); +repeat: + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (atomic_read(&kiobuf->io_count) != 0) { + run_task_queue(&tq_disk); + schedule(); + if (atomic_read(&kiobuf->io_count) != 0) + goto repeat; + } + tsk->state = TASK_RUNNING; + remove_wait_queue(&kiobuf->wait_queue, &wait); +} + +/* + * Test whether a given page from the bounce buffer matches the given + * gfp_mask. Return true if a bounce buffer is required for this + * page. + */ + +static inline int test_bounce_page(unsigned long page, + struct page * map, + int gfp_mask) +{ + /* Unmapped pages from PCI memory or BIGMEM pages always need a + * bounce buffer unless the caller is prepared to accept + * GFP_BIGMEM pages. */ + + if (!map || PageBIGMEM(map) ) + /* Careful, the following must return the right value + * even if CONFIG_BIGMEM is not set */ + return !(gfp_mask & __GFP_BIGMEM); + + /* A DMA-able page never needs a bounce buffer */ + if (PageDMA(map)) + return 0; + + /* Otherwise it is a non-ISA-DMA-capable page and needs bounce + * buffers if GFP_DMA is requested */ + return gfp_mask & __GFP_DMA; +} + +int setup_kiobuf_bounce_pages(struct kiobuf *iobuf, int gfp_mask) +{ + int i; + + clear_kiobuf_bounce_pages(iobuf); + + for (i = 0; i < iobuf->nr_pages; i++) { + struct page *map = iobuf->maplist[i]; + unsigned long page = iobuf->pagelist[i]; + unsigned long bounce_page; + + if (!test_bounce_page(page, map, gfp_mask)) { + iobuf->bouncelist[i] = 0; + continue; + } + + bounce_page = __get_free_page(gfp_mask); + if (!bounce_page) + goto error; + + iobuf->bouncelist[i] = bounce_page; + iobuf->bounced = 1; + } + return 0; + + error: + clear_kiobuf_bounce_pages(iobuf); + return -ENOMEM; +} + +/* + * Copy a bounce buffer. For completion of partially-failed read IOs, + * we need to be able to place an upper limit on the data successfully + * transferred from bounce buffers to the user's own buffers. + */ + +void kiobuf_copy_bounce(struct kiobuf *iobuf, int direction, int max) +{ + int i; + int offset, length; + + if (!iobuf->bounced) + return; + + offset = iobuf->offset; + length = iobuf->length; + if (max >= 0 && length > max) + length = max; + + i = 0; + + if (offset > PAGE_SIZE) { + i = (offset >> PAGE_SHIFT); + offset &= ~PAGE_MASK; + } + + for (; i < iobuf->nr_pages && length > 0; i++) { + unsigned long page = iobuf->pagelist[i]; + unsigned long bounce_page = iobuf->bouncelist[i]; + unsigned long kin, kout; + int pagelen = length; + + if ((pagelen+offset) > PAGE_SIZE) + pagelen = PAGE_SIZE - offset; + + if (bounce_page) { + if (direction == COPY_TO_BOUNCE) { + kin = kmap(page, KM_READ); + kout = kmap(bounce_page, KM_WRITE); + } else { + kin = kmap(bounce_page, KM_READ); + kout = kmap(page, KM_WRITE); + } + + memcpy((char *) (kout+offset), + (char *) (kin+offset), + pagelen); + kunmap(kout, KM_WRITE); + kunmap(kin, KM_READ); + } + + length -= pagelen; + offset = 0; + } +} diff -urN rawioref/include/linux/iobuf.h rawio/include/linux/iobuf.h --- rawioref/include/linux/iobuf.h Thu Jan 1 01:00:00 1970 +++ rawio/include/linux/iobuf.h Sat Apr 21 19:11:46 2001 @@ -0,0 +1,94 @@ +/* + * iobuf.h + * + * Defines the structures used to track abstract kernel-space io buffers. + * + */ + +#ifndef __LINUX_IOBUF_H +#define __LINUX_IOBUF_H + +#include +#include + +/* + * The kiobuf structure describes a physical set of pages reserved + * locked for IO. The reference counts on each page will have been + * incremented, and the flags field will indicate whether or not we have + * pre-locked all of the pages for IO. + * + * kiobufs may be passed in arrays to form a kiovec, but we must + * preserve the property that no page is present more than once over the + * entire iovec. + */ + +#define KIO_MAX_ATOMIC_IO 128 /* in kb */ +#define KIO_MAX_ATOMIC_BYTES (64 * 1024) +#define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1) +#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2) + +struct kiobuf +{ + int nr_pages; /* Pages actually referenced */ + int array_len; /* Space in the allocated lists */ + int offset; /* Offset to start of valid data */ + int length; /* Number of valid bytes of data */ + + /* Keep separate track of the physical addresses and page + * structs involved. If we do IO to a memory-mapped device + * region, there won't necessarily be page structs defined for + * every address. */ + + unsigned long * pagelist; + struct page ** maplist; + unsigned long * bouncelist; + + unsigned int locked : 1; /* If set, pages has been locked */ + unsigned int bounced : 1; /* If set, bounce pages are set up */ + + /* Always embed enough struct pages for 64k of IO */ + unsigned long page_array[KIO_STATIC_PAGES]; + struct page * map_array[KIO_STATIC_PAGES]; + unsigned long bounce_array[KIO_STATIC_PAGES]; + struct buffer_head * bh[KIO_MAX_SECTORS]; + unsigned long blocks[KIO_MAX_SECTORS]; + + /* Dynamic state for IO completion: */ + atomic_t io_count; /* IOs still in progress */ + int errno; /* Status of completed IO */ + void (*end_io) (struct kiobuf *); /* Completion callback */ + wait_queue_head_t wait_queue; +}; + + +/* mm/memory.c */ + +int map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len); +void unmap_kiobuf(struct kiobuf *iobuf); + +/* fs/iobuf.c */ + +extern void end_kio_request(struct kiobuf *, int); +extern void simple_wakeup_kiobuf(struct kiobuf *); +int alloc_kiovec(int nr, struct kiobuf **); +void free_kiovec(int nr, struct kiobuf **); +int expand_kiobuf(struct kiobuf *, int); +int setup_kiobuf_bounce_pages(struct kiobuf *, int gfp_mask); +void clear_kiobuf_bounce_pages(struct kiobuf *); +void kiobuf_copy_bounce(struct kiobuf *, int direction, int max); +extern void kiobuf_wait_for_io(struct kiobuf *); +extern int alloc_kiobuf_bhs(struct kiobuf *); +extern void free_kiobuf_bhs(struct kiobuf *); + +/* Direction codes for kiobuf_copy_bounce: */ +enum { + COPY_TO_BOUNCE, + COPY_FROM_BOUNCE +}; + +/* fs/buffer.c */ + +int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], + kdev_t dev, unsigned long b[], int size); + +#endif /* __LINUX_IOBUF_H */ diff -urN rawioref/include/linux/major.h rawio/include/linux/major.h --- rawioref/include/linux/major.h Fri Apr 20 22:31:21 2001 +++ rawio/include/linux/major.h Sat Apr 21 16:34:38 2001 @@ -126,6 +126,8 @@ #define AURORA_MAJOR 79 +#define RAW_MAJOR 162 + #define UNIX98_PTY_MASTER_MAJOR 128 #define UNIX98_PTY_MAJOR_COUNT 8 #define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) diff -urN rawioref/include/linux/raw.h rawio/include/linux/raw.h --- rawioref/include/linux/raw.h Thu Jan 1 01:00:00 1970 +++ rawio/include/linux/raw.h Sat Apr 21 18:57:08 2001 @@ -0,0 +1,16 @@ +#ifndef __LINUX_RAW_H +#define __LINUX_RAW_H + +#include + +#define RAW_SETBIND _IO( 0xac, 0 ) +#define RAW_GETBIND _IO( 0xac, 1 ) + +struct raw_config_request +{ + int raw_minor; + __u64 block_major; + __u64 block_minor; +}; + +#endif /* __LINUX_RAW_H */ diff -urN rawioref/init/main.c rawio/init/main.c --- rawioref/init/main.c Fri Apr 20 22:31:21 2001 +++ rawio/init/main.c Sat Apr 21 18:52:00 2001 @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff -urN rawioref/kernel/ksyms.c rawio/kernel/ksyms.c --- rawioref/kernel/ksyms.c Mon Dec 11 16:58:06 2000 +++ rawio/kernel/ksyms.c Sat Apr 21 16:34:38 2001 @@ -37,6 +37,7 @@ #include #include #include +#include #if defined(CONFIG_PROC_FS) #include @@ -252,6 +253,14 @@ EXPORT_SYMBOL(max_sectors); EXPORT_SYMBOL(max_segments); EXPORT_SYMBOL(max_readahead); + +/* kiobuf support */ +EXPORT_SYMBOL(map_user_kiobuf); +EXPORT_SYMBOL(unmap_kiobuf); +EXPORT_SYMBOL(alloc_kiovec); +EXPORT_SYMBOL(free_kiovec); +EXPORT_SYMBOL(expand_kiobuf); +EXPORT_SYMBOL(brw_kiovec); /* tty routines */ EXPORT_SYMBOL(tty_hangup); diff -urN rawioref/mm/memory.c rawio/mm/memory.c --- rawioref/mm/memory.c Sat Apr 21 16:34:24 2001 +++ rawio/mm/memory.c Sat Apr 21 16:34:38 2001 @@ -41,6 +41,8 @@ #include #include #include +#include +#include #include #include @@ -399,6 +401,223 @@ if (mm->rss < 0) mm->rss = 0; } +} + + +/* + * Do a quick page-table lookup for a single page. + */ +static unsigned long get_page(unsigned long address, int write) +{ + pgd_t *pgd; + pmd_t *pmd; + + pgd = pgd_offset(current->mm, address); + pmd = pmd_offset(pgd, address); + if (pmd) { + pte_t * pte = pte_offset(pmd, address); + if (pte && pte_present(*pte)) { + if (!write || + (pte_write(*pte) && pte_dirty(*pte))) + return pte_page(*pte); + } + } + + return 0; +} + +/* + * Given a physical address, is there a useful struct page pointing to it? + */ + +static struct page * get_page_map(unsigned long page) +{ + struct page *map; + + if (MAP_NR(page) >= max_mapnr) + return 0; + if (page == ZERO_PAGE(page)) + return 0; + map = mem_map + MAP_NR(page); + if (PageReserved(map)) + return 0; + return map; +} + +/* + * Force in an entire range of pages from the current process's user VA, + * and pin and lock the pages for IO. + */ + +#define dprintk(x...) +int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) +{ + unsigned long ptr, end; + int err; + struct mm_struct * mm; + struct vm_area_struct * vma, * prev_vma; + unsigned long page; + struct page * map; + int doublepage = 0; + int repeat = 0; + int i; + int write = (rw == READ); /* if we read from disk + it means we write + to memory */ + + /* Make sure the iobuf is not already mapped somewhere. */ + if (iobuf->nr_pages) + return -EINVAL; + + mm = current->mm; + dprintk ("map_user_kiobuf: begin\n"); + + ptr = va & PAGE_MASK; + end = (va + len + PAGE_SIZE - 1) & PAGE_MASK; + err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT); + if (err) + return err; + + repeat: + down(&mm->mmap_sem); + + err = -EFAULT; + iobuf->locked = write; + iobuf->offset = va & ~PAGE_MASK; + iobuf->length = len; + vma = NULL; + + i = 0; + + /* + * First of all, try to fault in all of the necessary pages + */ + while (ptr < end) { + if (!vma || ptr >= vma->vm_end) { + vma = find_vma_prev(mm, ptr, &prev_vma); + if (!vma) + goto out; + if (vma->vm_start > ptr) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out; + if (expand_stack(vma, ptr, prev_vma)) + goto out; + } + err = -EACCES; + if (write) { + if (!(vma->vm_flags & VM_WRITE)) + goto out; + } else { + if (!(vma->vm_flags & VM_READ)) + goto out; + } + err = -EFAULT; + } + while (!(page = get_page(ptr, write))) { + int ret; + + ret = handle_mm_fault(current, vma, ptr, write); + if (ret <= 0) { + if (!ret) + goto out; + else { + err = -ENOMEM; + goto out; + } + } + } + map = get_page_map(page); + if (map) { + if (write) { + /* + * Lock down the pages only if we're going + * to write to memory. If we're reading + * from memory we're free to go ahead + * only after pinning the page on the + * physical side. + */ + if (PageLocked(map)) + goto retry; + set_bit(PG_locked, &map->flags); + } + flush_dcache_page(page_address(map)); + atomic_inc(&map->count); + } + dprintk ("Installing page %p %p: %d\n", (void *)page, map, i); + iobuf->pagelist[i] = page; + iobuf->maplist[i] = map; + iobuf->nr_pages = ++i; + + ptr += PAGE_SIZE; + } + + up(&mm->mmap_sem); + dprintk ("map_user_kiobuf: end OK\n"); + return 0; + + out: + up(&mm->mmap_sem); + unmap_kiobuf(iobuf); + dprintk ("map_user_kiobuf: end %d\n", err); + return err; + + + retry: + + /* + * Undo the locking so far, wait on the page we got to, and try again. + */ + up(&mm->mmap_sem); + unmap_kiobuf(iobuf); + ptr = va & PAGE_MASK; + + /* + * Did the release also unlock the page we got stuck on? + */ + if (!PageLocked(map)) { + /* If so, we may well have the page mapped twice in the + * IO address range. Bad news. Of course, it _might_ + * just be a coincidence, but if it happens more than + * once, chances are we have a double-mapped page. */ + if (++doublepage >= 3) { + return -EINVAL; + } + } + + /* + * Try again... + */ + wait_on_page(map); + if (++repeat < 16) + goto repeat; + return -EAGAIN; +} + + +/* + * Unmap all of the pages referenced by a kiobuf. We release the pages, + * and unlock them if they were locked. + */ + +void unmap_kiobuf (struct kiobuf *iobuf) +{ + int i; + struct page *map; + + for (i = 0; i < iobuf->nr_pages; i++) { + map = iobuf->maplist[i]; + + if (map) { + if (iobuf->locked) { + clear_bit(PG_locked, &map->flags); + wake_up(&map->wait); + } + __free_page(map); + } + } + + iobuf->nr_pages = 0; + iobuf->locked = 0; } static inline void zeromap_pte_range(pte_t * pte, unsigned long address,