Binary files rawioref/ID and rawio/ID differ
diff -urN rawioref/drivers/char/Makefile rawio/drivers/char/Makefile
--- rawioref/drivers/char/Makefile	Fri Apr 20 22:31:16 2001
+++ rawio/drivers/char/Makefile	Sat Apr 21 16:34:38 2001
@@ -20,7 +20,7 @@
 
 O_TARGET := char.o
 M_OBJS   :=
-O_OBJS   := tty_io.o n_tty.o tty_ioctl.o mem.o random.o
+O_OBJS   := tty_io.o n_tty.o tty_ioctl.o mem.o random.o raw.o
 OX_OBJS  := pty.o misc.o
 obj-y 	 :=
 obj-m	 :=
diff -urN rawioref/drivers/char/raw.c rawio/drivers/char/raw.c
--- rawioref/drivers/char/raw.c	Thu Jan  1 01:00:00 1970
+++ rawio/drivers/char/raw.c	Sat Apr 21 18:52:22 2001
@@ -0,0 +1,438 @@
+/*
+ * linux/drivers/char/raw.c
+ *
+ * Front-end raw character devices.  These can be bound to any block
+ * devices to provide genuine Unix raw character device semantics.
+ *
+ * We reserve minor number 0 for a control interface.  ioctl()s on this
+ * device are used to bind the other minor numbers to block devices.
+ */
+
+#include <linux/fs.h>
+#include <linux/iobuf.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/raw.h>
+#include <asm/uaccess.h>
+
+#define dprintk(x...) 
+
+typedef struct raw_device_data_s {
+	struct kiobuf * iobuf;
+	long iobuf_lock;
+	kdev_t binding;
+	int inuse, sector_size, sector_bits;
+	struct semaphore mutex;
+} raw_device_data_t;
+
+static raw_device_data_t raw_devices[256];
+
+extern struct file_operations * get_blkfops(unsigned int major);
+
+static ssize_t rw_raw_dev(int rw, struct file *, char *, size_t, loff_t *);
+
+ssize_t	raw_read(struct file *, char *, size_t, loff_t *);
+ssize_t	raw_write(struct file *, const char *, size_t, loff_t *);
+int	raw_open(struct inode *, struct file *);
+int	raw_release(struct inode *, struct file *);
+int	raw_ctl_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+
+
+static struct file_operations raw_fops = {
+	NULL,		/* llseek */
+	raw_read,	/* read */
+	raw_write,	/* write */
+	NULL,		/* readdir */
+	NULL,		/* poll */
+	NULL,		/* ioctl */
+	NULL,		/* mmap */
+	raw_open,	/* open */
+	NULL,		/* flush */
+	raw_release,	/* release */
+	NULL		/* fsync */
+};
+
+static struct file_operations raw_ctl_fops = {
+	NULL,		/* llseek */
+	NULL,		/* read */
+	NULL,		/* write */
+	NULL,		/* readdir */
+	NULL,		/* poll */
+	raw_ctl_ioctl,	/* ioctl */
+	NULL,		/* mmap */
+	raw_open,	/* open */
+	NULL,		/* flush */
+	NULL,		/* no special release code */
+	NULL		/* fsync */
+};
+
+static int __init raw_init(void)
+{
+	int i;
+	register_chrdev(RAW_MAJOR, "raw", &raw_fops);
+
+	for (i = 0; i < 256; i++) {
+		init_MUTEX(&raw_devices[i].mutex);
+		raw_devices[i].binding = NODEV;
+	}
+
+	return 0;
+}
+
+__initcall(raw_init);
+
+/*
+ * The raw IO open and release code needs to fake appropriate
+ * open/release calls to the underlying block devices.  
+ */
+
+static int bdev_open(kdev_t dev, int mode)
+{
+	int err = 0;
+	struct file dummy_file = {};
+	struct dentry dummy_dentry = {};
+	struct inode * inode = get_empty_inode();
+	
+	if (!inode)
+		return -ENOMEM;
+	
+	dummy_file.f_op = get_blkfops(MAJOR(dev));
+	if (!dummy_file.f_op) {
+		err = -ENODEV;
+		goto done;
+	}
+	
+	if (dummy_file.f_op->open) {
+		inode->i_rdev = dev;
+		dummy_dentry.d_inode = inode;
+		dummy_file.f_dentry = &dummy_dentry;
+		dummy_file.f_mode = mode;
+		err = dummy_file.f_op->open(inode, &dummy_file);
+	}
+
+ done:
+	iput(inode);
+	return err;
+}
+
+static int bdev_close(kdev_t dev)
+{
+	int err;
+	struct inode * inode = get_empty_inode();
+
+	if (!inode)
+		return -ENOMEM;
+	
+	inode->i_rdev = dev;
+	err = blkdev_release(inode);
+	iput(inode);
+	return err;
+}
+
+
+
+/* 
+ * Open/close code for raw IO.
+ */
+
+int raw_open(struct inode *inode, struct file *filp)
+{
+	int minor;
+	kdev_t bdev;
+	int err;
+	int sector_size;
+	int sector_bits;
+
+	minor = MINOR(inode->i_rdev);
+	
+	/* 
+	 * Is it the control device? 
+	 */
+	
+	if (minor == 0) {
+		filp->f_op = &raw_ctl_fops;
+		return 0;
+	}
+	
+	down(&raw_devices[minor].mutex);
+	/*
+	 * No, it is a normal raw device.  All we need to do on open is
+	 * to check that the device is bound, and force the underlying
+	 * block device to a sector-size blocksize. 
+	 */
+
+	bdev = raw_devices[minor].binding;
+	err = -ENODEV;
+	if (bdev == NODEV) 
+		goto out;
+
+	err = bdev_open(bdev, filp->f_mode);
+	if (err)
+		goto out;
+	
+	/*
+	 * Don't change the blocksize if we already have users using
+	 * this device 
+	 */
+
+	if (raw_devices[minor].inuse++)
+		goto out;
+
+	err = alloc_kiovec(1, &raw_devices[minor].iobuf);
+	if (err) {
+		raw_devices[minor].inuse--;
+		up(&raw_devices[minor].mutex);
+		bdev_close(bdev);
+		return err;
+	}
+
+	/* 
+	 * Don't interfere with mounted devices: we cannot safely set
+	 * the blocksize on a device which is already mounted.  
+	 */
+	
+	sector_size = 512;
+	if (lookup_vfsmnt(bdev) != NULL) {
+		if (blksize_size[MAJOR(bdev)])
+			sector_size = blksize_size[MAJOR(bdev)][MINOR(bdev)];
+	} else {
+		if (hardsect_size[MAJOR(bdev)])
+			sector_size = hardsect_size[MAJOR(bdev)][MINOR(bdev)];
+	}
+
+	set_blocksize(bdev, sector_size);
+	raw_devices[minor].sector_size = sector_size;
+
+	for (sector_bits = 0; !(sector_size & 1); )
+		sector_size>>=1, sector_bits++;
+	raw_devices[minor].sector_bits = sector_bits;
+	
+ out:
+	up(&raw_devices[minor].mutex);
+	return err;
+}
+
+int raw_release(struct inode *inode, struct file *filp)
+{
+	int minor;
+	kdev_t bdev;
+	
+	minor = MINOR(inode->i_rdev);
+	down(&raw_devices[minor].mutex);
+	bdev = raw_devices[minor].binding;
+	if (!--raw_devices[minor].inuse)
+		free_kiovec(1, &raw_devices[minor].iobuf);
+	up(&raw_devices[minor].mutex);
+	bdev_close(bdev);
+	return 0;
+}
+
+
+
+/*
+ * Deal with ioctls against the raw-device control interface, to bind
+ * and unbind other raw devices.  
+ */
+
+int raw_ctl_ioctl(struct inode *inode, 
+		  struct file *flip,
+		  unsigned int command, 
+		  unsigned long arg)
+{
+	struct raw_config_request rq;
+	int err = 0;
+	int minor;
+	
+	switch (command) {
+	case RAW_SETBIND:
+	case RAW_GETBIND:
+
+		/* First, find out which raw minor we want */
+
+		err = copy_from_user(&rq, (void *) arg, sizeof(rq));
+		if (err)
+			break;
+		
+		minor = rq.raw_minor;
+		if (minor == 0 || minor > MINORMASK) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (command == RAW_SETBIND) {
+			/*
+			 * This is like making block devices, so demand the
+			 * same capability
+			 */
+			if (!capable(CAP_SYS_ADMIN)) {
+				err = -EPERM;
+				break;
+			}
+
+			/* 
+			 * For now, we don't need to check that the underlying
+			 * block device is present or not: we can do that when
+			 * the raw device is opened.  Just check that the
+			 * major/minor numbers make sense. 
+			 */
+
+			if ((rq.block_major == NODEV && 
+			     rq.block_minor != NODEV) ||
+			    rq.block_major > MAX_BLKDEV ||
+			    rq.block_minor > MINORMASK) {
+				err = -EINVAL;
+				break;
+			}
+
+			down(&raw_devices[minor].mutex);
+			if (raw_devices[minor].inuse) {
+				up(&raw_devices[minor].mutex);
+				err = -EBUSY;
+				break;
+			}
+			raw_devices[minor].binding = 
+				MKDEV(rq.block_major, rq.block_minor);
+			up(&raw_devices[minor].mutex);
+		} else {
+			rq.block_major = MAJOR(raw_devices[minor].binding);
+			rq.block_minor = MINOR(raw_devices[minor].binding);
+			err = copy_to_user((void *) arg, &rq, sizeof(rq));
+		}
+		break;
+		
+	default:
+		err = -EINVAL;
+	}
+	
+	return err;
+}
+
+
+
+ssize_t	raw_read(struct file *filp, char * buf, 
+		 size_t size, loff_t *offp)
+{
+	return rw_raw_dev(READ, filp, buf, size, offp);
+}
+
+ssize_t	raw_write(struct file *filp, const char *buf, 
+		  size_t size, loff_t *offp)
+{
+	return rw_raw_dev(WRITE, filp, (char *) buf, size, offp);
+}
+
+#define SECTOR_BITS 9
+#define SECTOR_SIZE (1U << SECTOR_BITS)
+#define SECTOR_MASK (SECTOR_SIZE - 1)
+
+ssize_t	rw_raw_dev(int rw, struct file *filp, char *buf, 
+		   size_t size, loff_t *offp)
+{
+	struct kiobuf * iobuf;
+	int		new_iobuf;
+	int		err = 0;
+	unsigned long	blocknr, blocks;
+	size_t		transferred;
+	int		iosize;
+	int		i;
+	int		minor;
+	kdev_t		dev;
+	unsigned long	limit;
+
+	int		sector_size, sector_bits, sector_mask;
+	int		max_sectors;
+	
+	/*
+	 * First, a few checks on device size limits 
+	 */
+
+	minor = MINOR(filp->f_dentry->d_inode->i_rdev);
+
+	new_iobuf = 0;
+	iobuf = raw_devices[minor].iobuf;
+	if (test_and_set_bit(0, &raw_devices[minor].iobuf_lock)) {
+		/*
+		 * A parallel read/write is using the preallocated iobuf
+		 * so just run slow and allocate a new one.
+		 */
+		err = alloc_kiovec(1, &iobuf);
+		if (err)
+			goto out;
+		new_iobuf = 1;
+	}
+
+	dev = raw_devices[minor].binding;
+	sector_size = raw_devices[minor].sector_size;
+	sector_bits = raw_devices[minor].sector_bits;
+	sector_mask = sector_size- 1;
+	max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9);
+	
+	if (blk_size[MAJOR(dev)])
+		limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits;
+	else
+		limit = INT_MAX;
+	dprintk ("rw_raw_dev: dev %d:%d (+%d)\n",
+		 MAJOR(dev), MINOR(dev), limit);
+	
+	err = -EINVAL;
+	if ((*offp & sector_mask) || (size & sector_mask))
+		goto out_free;
+	err = 0;
+	if (size)
+		err = -ENXIO;
+	if ((*offp >> sector_bits) >= limit)
+		goto out_free;
+
+	/*
+	 * Split the IO into KIO_MAX_SECTORS chunks, mapping and
+	 * unmapping the single kiobuf as we go to perform each chunk of
+	 * IO.  
+	 */
+
+	transferred = 0;
+	blocknr = *offp >> sector_bits;
+	while (size > 0) {
+		blocks = size >> sector_bits;
+		if (blocks > max_sectors)
+			blocks = max_sectors;
+		if (blocks > limit - blocknr)
+			blocks = limit - blocknr;
+		if (!blocks)
+			break;
+
+		iosize = blocks << sector_bits;
+
+		err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
+		if (err)
+			break;
+
+		for (i=0; i < blocks; i++) 
+			iobuf->blocks[i] = blocknr++;
+		
+		err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size);
+
+		if (err >= 0) {
+			transferred += err;
+			size -= err;
+			buf += err;
+		}
+
+		unmap_kiobuf(iobuf);
+
+		if (err != iosize)
+			break;
+	}
+	
+	if (transferred) {
+		*offp += transferred;
+		err = transferred;
+	}
+
+ out_free:
+	if (!new_iobuf)
+		clear_bit(0, &raw_devices[minor].iobuf_lock);
+	else
+		free_kiovec(1, &iobuf);
+ out:	
+	return err;
+}
diff -urN rawioref/fs/Makefile rawio/fs/Makefile
--- rawioref/fs/Makefile	Thu Aug 26 14:20:19 1999
+++ rawio/fs/Makefile	Sat Apr 21 16:34:38 2001
@@ -13,7 +13,7 @@
 O_OBJS    = open.o read_write.o devices.o file_table.o buffer.o \
 		super.o  block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
 		ioctl.o readdir.o select.o fifo.o locks.o filesystems.o \
-		dcache.o inode.o attr.o bad_inode.o file.o $(BINFMTS) 
+		dcache.o inode.o attr.o bad_inode.o file.o iobuf.o $(BINFMTS) 
 
 MOD_LIST_NAME := FS_MODULES
 ALL_SUB_DIRS = coda minix ext2 fat msdos vfat proc isofs nfs umsdos ntfs \
diff -urN rawioref/fs/buffer.c rawio/fs/buffer.c
--- rawioref/fs/buffer.c	Sat Apr 21 16:34:05 2001
+++ rawio/fs/buffer.c	Sat Apr 21 18:48:11 2001
@@ -43,6 +43,7 @@
 #include <linux/file.h>
 #include <linux/init.h>
 #include <linux/quotaops.h>
+#include <linux/iobuf.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -1239,6 +1240,33 @@
 	wake_up(&buffer_wait);
 }
 
+int alloc_kiobuf_bhs(struct kiobuf * kiobuf)
+{
+	int i;
+
+	for (i = 0; i < KIO_MAX_SECTORS; i++)
+		if (!(kiobuf->bh[i] = get_unused_buffer_head(0))) {
+			while (i--) {
+				put_unused_buffer_head(kiobuf->bh[i]);
+				kiobuf->bh[i] = NULL;
+			}
+			wake_up(&buffer_wait);
+			return -ENOMEM;
+		}
+	return 0;
+}
+
+void free_kiobuf_bhs(struct kiobuf * kiobuf)
+{
+	int i;
+
+	for (i = 0; i < KIO_MAX_SECTORS; i++) {
+		put_unused_buffer_head(kiobuf->bh[i]);
+		kiobuf->bh[i] = NULL;
+	}
+	wake_up(&buffer_wait);
+}
+
 static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
 {
 	unsigned long flags;
@@ -1299,6 +1327,215 @@
 bad_count:
 	printk ("Whoops: end_buffer_io_async: b_count != 1 on async io.\n");
 	return;
+}
+
+/*
+ * IO completion routine for a buffer_head being used for kiobuf IO: we
+ * can't dispatch the kiobuf callback until io_count reaches 0.  
+ */
+
+static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate)
+{
+	struct kiobuf *kiobuf;
+	
+	mark_buffer_uptodate(bh, uptodate);
+
+	kiobuf = bh->b_dev_id;
+	unlock_buffer(bh);
+	end_kio_request(kiobuf, uptodate);
+}
+
+/*
+ * For brw_kiovec: submit a set of buffer_head temporary IOs and wait
+ * for them to complete.  Clean up the buffer_heads afterwards.  
+ */
+
+static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size)
+{
+	int iosize, err;
+	int i;
+	struct buffer_head *tmp;
+
+	iosize = 0;
+	err = 0;
+	
+	for (i = nr; --i >= 0; ) {
+		iosize += size;
+		tmp = bh[i];
+		wait_on_buffer(tmp);
+		if (!buffer_uptodate(tmp)) {
+			/* We are traversing bh'es in reverse order so
+                           clearing iosize on error calculates the
+                           amount of IO before the first error. */
+			iosize = 0;
+			err = -EIO;
+		}
+	}
+	
+	if (iosize)
+		return iosize;
+	return err;
+}
+
+/*
+ * Clean up the bounce buffers potentially used by brw_kiovec.  All of
+ * the kiovec's bounce buffers must be cleared of temporarily allocated
+ * bounce pages, but only READ pages for whom IO completed successfully
+ * can actually be transferred back to user space. 
+ */
+
+void cleanup_bounce_buffers(int rw, int nr, struct kiobuf *iovec[], 
+			    int transferred)
+{
+	int i;
+	for (i = 0; i < nr; i++) {
+		struct kiobuf *iobuf = iovec[i];
+		if (iobuf->bounced) {
+			if (transferred > 0 && !(rw & WRITE))
+				kiobuf_copy_bounce(iobuf, COPY_FROM_BOUNCE, 
+						   transferred);
+			
+			clear_kiobuf_bounce_pages(iobuf);
+		}
+		transferred -= iobuf->length;
+	}
+}
+
+/*
+ * Start I/O on a physical range of kernel memory, defined by a vector
+ * of kiobuf structs (much like a user-space iovec list).
+ *
+ * IO is submitted asynchronously: you need to check page->locked,
+ * page->uptodate, and maybe wait on page->wait.
+ *
+ * It is up to the caller to make sure that there are enough blocks
+ * passed in to completely map the iobufs to disk.
+ */
+
+int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
+	       kdev_t dev, unsigned long b[], int size)
+{
+	int		err;
+	int		length;
+	int		transferred;
+	int		i;
+	int		bufind;
+	int		pageind;
+	int		bhind;
+	int		offset;
+	unsigned long	blocknr;
+	struct kiobuf *	iobuf = NULL;
+	unsigned long	page;
+	unsigned long	bounce;
+	struct page *	map;
+	struct buffer_head *tmp, **bhs = NULL;
+
+	/* 
+	 * First, do some alignment and validity checks 
+	 */
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+		if ((iobuf->offset & (size-1)) ||
+		    (iobuf->length & (size-1)))
+			return -EINVAL;
+		if (!iobuf->nr_pages)
+			panic("brw_kiovec: iobuf not initialised");
+	}
+
+	/* DEBUG */
+#if 0
+	return iobuf->length;
+#endif
+	
+	/* 
+	 * OK to walk down the iovec doing page IO on each page we find. 
+	 */
+	bufind = bhind = transferred = err = 0;
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+		err = setup_kiobuf_bounce_pages(iobuf, GFP_USER);
+		if (err) 
+			goto finished;
+		if (rw & WRITE)
+			kiobuf_copy_bounce(iobuf, COPY_TO_BOUNCE, -1);
+		
+		offset = iobuf->offset;
+		length = iobuf->length;
+		if (!bhs)
+			bhs = iobuf->bh;
+
+		for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
+			map    = iobuf->maplist[pageind];
+			bounce = iobuf->bouncelist[pageind];
+
+			if (bounce)
+				page = bounce;
+			else
+				page = iobuf->pagelist[pageind];
+
+			while (length > 0) {
+				blocknr = b[bufind++];
+				tmp = bhs[bhind++];
+
+				tmp->b_dev = B_FREE;
+				tmp->b_size = size;
+				tmp->b_data = (char *) (page + offset);
+				tmp->b_this_page = tmp;
+
+				init_buffer(tmp, dev, blocknr,
+					    end_buffer_io_kiobuf, iobuf);
+				if (rw == WRITE) {
+					set_bit(BH_Uptodate, &tmp->b_state);
+					set_bit(BH_Dirty, &tmp->b_state);
+				} else
+					clear_bit(BH_Uptodate, &tmp->b_state);
+
+				length -= size;
+				offset += size;
+
+				atomic_inc(&iobuf->io_count);
+
+				/* 
+				 * Start the IO if we have got too much or if
+				 * this is the end of the last iobuf 
+				 */
+				if (bhind >= KIO_MAX_SECTORS) {
+					ll_rw_block(rw, bhind, bhs);
+					kiobuf_wait_for_io(iobuf);
+					err = wait_kio(rw, bhind, bhs, size);
+					if (err >= 0)
+						transferred += err;
+					else
+						goto finished;
+					bhind = 0;
+				}
+				
+				if (offset >= PAGE_SIZE) {
+					offset = 0;
+					break;
+				}
+			} /* End of block loop */
+		} /* End of page loop */		
+	} /* End of iovec loop */
+
+	/* Is there any IO still left to submit? */
+	if (bhind) {
+		ll_rw_block(rw, bhind, bhs);
+		kiobuf_wait_for_io(iobuf);
+		err = wait_kio(rw, bhind, bhs, size);
+		if (err >= 0)
+			transferred += err;
+		else
+			goto finished;
+	}
+
+ finished:
+
+	cleanup_bounce_buffers(rw, nr, iovec, transferred);
+	
+	if (transferred)
+		return transferred;
+	return err;
 }
 
 /*
diff -urN rawioref/fs/iobuf.c rawio/fs/iobuf.c
--- rawioref/fs/iobuf.c	Thu Jan  1 01:00:00 1970
+++ rawio/fs/iobuf.c	Sat Apr 21 18:51:38 2001
@@ -0,0 +1,269 @@
+/*
+ * iobuf.c
+ *
+ * Keep track of the general-purpose IO-buffer structures used to track
+ * abstract kernel-space io buffers.
+ * 
+ */
+
+#include <linux/iobuf.h>
+#include <linux/malloc.h>
+#include <linux/slab.h>
+#include <linux/bigmem.h>
+#include <linux/vmalloc.h>
+#include <linux/smp_lock.h>
+
+void end_kio_request(struct kiobuf *kiobuf, int uptodate)
+{
+	if ((!uptodate) && !kiobuf->errno)
+		kiobuf->errno = -EIO;
+
+	if (atomic_dec_and_test(&kiobuf->io_count)) {
+		if (kiobuf->end_io)
+			kiobuf->end_io(kiobuf);
+		wake_up(&kiobuf->wait_queue);
+	}
+}
+
+int alloc_kiovec(int nr, struct kiobuf **bufp)
+{
+	int i;
+	struct kiobuf *iobuf;
+	
+	for (i = 0; i < nr; i++) {
+		lock_kernel();
+		iobuf = vmalloc(sizeof(struct kiobuf));
+		unlock_kernel();
+		if (!iobuf) {
+			free_kiovec(i, bufp);
+			return -ENOMEM;
+		}
+		
+		memset(iobuf, 0, sizeof(*iobuf));
+		iobuf->array_len  = KIO_STATIC_PAGES;
+		iobuf->pagelist   = iobuf->page_array;
+		iobuf->maplist    = iobuf->map_array;
+		iobuf->bouncelist = iobuf->bounce_array;
+		init_waitqueue_head(&iobuf->wait_queue);
+		if (alloc_kiobuf_bhs(iobuf)) {
+			lock_kernel();
+			vfree(iobuf);
+			unlock_kernel();
+			free_kiovec(i, bufp);
+			return -ENOMEM;
+		}
+		*bufp++ = iobuf;
+	}
+	
+	return 0;
+}
+
+void clear_kiobuf_bounce_pages(struct kiobuf *iobuf)
+{
+	int i;
+	
+	if (!iobuf->bounced)
+		return;
+	
+	for (i = 0; i < iobuf->nr_pages; i++) {
+		unsigned long page = iobuf->bouncelist[i];
+		if (page)
+			free_page(page);
+	}
+	iobuf->bounced = 0;
+}
+
+void free_kiovec(int nr, struct kiobuf **bufp) 
+{
+	struct kiobuf *iobuf;
+	int i;
+	
+	for (i = 0; i < nr; i++) {
+		iobuf = bufp[i];
+		clear_kiobuf_bounce_pages(iobuf);
+		if (iobuf->array_len > KIO_STATIC_PAGES)
+			kfree (iobuf->pagelist);
+		free_kiobuf_bhs(iobuf);
+		lock_kernel();
+		vfree(bufp[i]);
+		unlock_kernel();
+	}
+}
+
+int expand_kiobuf(struct kiobuf *iobuf, int wanted)
+{
+	unsigned long *	pagelist, * bouncelist;
+	struct page ** maplist;
+	
+	if (iobuf->array_len >= wanted)
+		return 0;
+
+	/*
+	 * kmalloc enough space for the page, map and bounce lists all
+	 * at once. 
+	 */
+	pagelist = (unsigned long *) 
+		kmalloc(3 * wanted * sizeof(unsigned long), GFP_KERNEL);
+	if (!pagelist)
+		return -ENOMEM;
+
+	/* Did it grow while we waited? */
+	if (iobuf->array_len >= wanted) {
+		kfree(pagelist);
+		return 0;
+	}
+	
+	maplist    = (struct page **) (pagelist + wanted);
+	bouncelist = pagelist + 2 * wanted;
+
+	memcpy (pagelist, iobuf->pagelist,
+		iobuf->array_len * sizeof(unsigned long));
+	memcpy (maplist, iobuf->maplist,
+		iobuf->array_len * sizeof(struct page **));
+	memcpy (bouncelist, iobuf->bouncelist,
+		iobuf->array_len * sizeof(unsigned long));
+
+	if (iobuf->array_len > KIO_STATIC_PAGES)
+		kfree (iobuf->pagelist);
+	
+	iobuf->pagelist   = pagelist;
+	iobuf->maplist    = maplist;
+	iobuf->bouncelist = bouncelist;
+	iobuf->array_len  = wanted;
+	return 0;
+}
+
+void kiobuf_wait_for_io(struct kiobuf *kiobuf)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+
+	if (atomic_read(&kiobuf->io_count) == 0)
+		return;
+
+	add_wait_queue(&kiobuf->wait_queue, &wait);
+repeat:
+	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+	if (atomic_read(&kiobuf->io_count) != 0) {
+		run_task_queue(&tq_disk);
+		schedule();
+		if (atomic_read(&kiobuf->io_count) != 0)
+			goto repeat;
+	}
+	tsk->state = TASK_RUNNING;
+	remove_wait_queue(&kiobuf->wait_queue, &wait);
+}
+
+/*
+ * Test whether a given page from the bounce buffer matches the given
+ * gfp_mask.  Return true if a bounce buffer is required for this
+ * page. 
+ */
+
+static inline int test_bounce_page(unsigned long page, 
+				   struct page * map,
+				   int gfp_mask)
+{
+	/* Unmapped pages from PCI memory or BIGMEM pages always need a
+	 * bounce buffer unless the caller is prepared to accept
+	 * GFP_BIGMEM pages. */
+	
+	if (!map || PageBIGMEM(map) )
+		/* Careful, the following must return the right value
+		 * even if CONFIG_BIGMEM is not set */
+		return !(gfp_mask & __GFP_BIGMEM);
+	
+	/* A DMA-able page never needs a bounce buffer */
+	if (PageDMA(map))
+		return 0;
+	
+	/* Otherwise it is a non-ISA-DMA-capable page and needs bounce
+	 * buffers if GFP_DMA is requested */
+	return gfp_mask & __GFP_DMA;
+}
+
+int setup_kiobuf_bounce_pages(struct kiobuf *iobuf, int gfp_mask)
+{
+	int i;
+	
+	clear_kiobuf_bounce_pages(iobuf);
+	
+	for (i = 0; i < iobuf->nr_pages; i++) {
+		struct page *map = iobuf->maplist[i];
+		unsigned long page = iobuf->pagelist[i];
+		unsigned long bounce_page;
+		
+		if (!test_bounce_page(page, map, gfp_mask)) {
+			iobuf->bouncelist[i] = 0;
+			continue;
+		}
+		
+		bounce_page = __get_free_page(gfp_mask);
+		if (!bounce_page)
+			goto error;
+
+		iobuf->bouncelist[i] = bounce_page;
+		iobuf->bounced = 1;
+	}
+	return 0;
+	
+ error:
+	clear_kiobuf_bounce_pages(iobuf);
+	return -ENOMEM;
+}
+
+/*
+ * Copy a bounce buffer.  For completion of partially-failed read IOs,
+ * we need to be able to place an upper limit on the data successfully
+ * transferred from bounce buffers to the user's own buffers.  
+ */
+
+void kiobuf_copy_bounce(struct kiobuf *iobuf, int direction, int max)
+{
+	int i;
+	int offset, length;
+	
+	if (!iobuf->bounced)
+		return;
+	
+	offset = iobuf->offset;
+	length = iobuf->length;
+	if (max >= 0 && length > max)
+		length = max;
+	
+	i = 0;
+
+	if (offset > PAGE_SIZE) {
+		i = (offset >> PAGE_SHIFT);
+		offset &= ~PAGE_MASK;
+	}
+	
+	for (; i < iobuf->nr_pages && length > 0; i++) {
+		unsigned long page = iobuf->pagelist[i];
+		unsigned long bounce_page = iobuf->bouncelist[i];
+		unsigned long kin, kout;
+		int pagelen = length;
+		
+		if ((pagelen+offset) > PAGE_SIZE)
+			pagelen = PAGE_SIZE - offset;
+
+		if (bounce_page) {
+			if (direction == COPY_TO_BOUNCE) {
+				kin  = kmap(page, KM_READ);
+				kout = kmap(bounce_page, KM_WRITE);
+			} else {
+				kin  = kmap(bounce_page, KM_READ);
+				kout = kmap(page, KM_WRITE);
+			}
+			
+			memcpy((char *) (kout+offset), 
+			       (char *) (kin+offset),
+			       pagelen);
+			kunmap(kout, KM_WRITE);
+			kunmap(kin, KM_READ);
+		}
+		
+		length -= pagelen;
+		offset = 0;
+	}
+}
diff -urN rawioref/include/linux/iobuf.h rawio/include/linux/iobuf.h
--- rawioref/include/linux/iobuf.h	Thu Jan  1 01:00:00 1970
+++ rawio/include/linux/iobuf.h	Sat Apr 21 19:11:46 2001
@@ -0,0 +1,94 @@
+/*
+ * iobuf.h
+ *
+ * Defines the structures used to track abstract kernel-space io buffers.
+ *
+ */
+
+#ifndef __LINUX_IOBUF_H
+#define __LINUX_IOBUF_H
+
+#include <linux/mm.h>
+#include <linux/init.h>
+
+/*
+ * The kiobuf structure describes a physical set of pages reserved
+ * locked for IO.  The reference counts on each page will have been
+ * incremented, and the flags field will indicate whether or not we have
+ * pre-locked all of the pages for IO.
+ *
+ * kiobufs may be passed in arrays to form a kiovec, but we must
+ * preserve the property that no page is present more than once over the
+ * entire iovec.
+ */
+
+#define KIO_MAX_ATOMIC_IO	128 /* in kb */
+#define KIO_MAX_ATOMIC_BYTES	(64 * 1024)
+#define KIO_STATIC_PAGES	(KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1)
+#define KIO_MAX_SECTORS		(KIO_MAX_ATOMIC_IO * 2)
+
+struct kiobuf 
+{
+	int		nr_pages;	/* Pages actually referenced */
+	int		array_len;	/* Space in the allocated lists */
+	int		offset;		/* Offset to start of valid data */
+	int		length;		/* Number of valid bytes of data */
+
+	/* Keep separate track of the physical addresses and page
+	 * structs involved.  If we do IO to a memory-mapped device
+	 * region, there won't necessarily be page structs defined for
+	 * every address. */
+
+	unsigned long *	pagelist;
+	struct page **	maplist;
+	unsigned long *	bouncelist;
+
+	unsigned int	locked : 1;	/* If set, pages has been locked */
+	unsigned int	bounced : 1;	/* If set, bounce pages are set up */
+	
+	/* Always embed enough struct pages for 64k of IO */
+	unsigned long	page_array[KIO_STATIC_PAGES];
+	struct page *	map_array[KIO_STATIC_PAGES];
+	unsigned long	bounce_array[KIO_STATIC_PAGES];
+	struct buffer_head * bh[KIO_MAX_SECTORS];
+	unsigned long blocks[KIO_MAX_SECTORS];
+
+	/* Dynamic state for IO completion: */
+	atomic_t	io_count;	/* IOs still in progress */
+	int		errno;		/* Status of completed IO */
+	void		(*end_io) (struct kiobuf *); /* Completion callback */
+	wait_queue_head_t wait_queue;
+};
+
+
+/* mm/memory.c */
+
+int	map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len);
+void	unmap_kiobuf(struct kiobuf *iobuf);
+
+/* fs/iobuf.c */
+
+extern void end_kio_request(struct kiobuf *, int);
+extern void simple_wakeup_kiobuf(struct kiobuf *);
+int	alloc_kiovec(int nr, struct kiobuf **);
+void	free_kiovec(int nr, struct kiobuf **);
+int	expand_kiobuf(struct kiobuf *, int);
+int	setup_kiobuf_bounce_pages(struct kiobuf *, int gfp_mask);
+void	clear_kiobuf_bounce_pages(struct kiobuf *);
+void	kiobuf_copy_bounce(struct kiobuf *, int direction, int max);
+extern void kiobuf_wait_for_io(struct kiobuf *);
+extern int alloc_kiobuf_bhs(struct kiobuf *);
+extern void free_kiobuf_bhs(struct kiobuf *);
+
+/* Direction codes for kiobuf_copy_bounce: */
+enum {
+	COPY_TO_BOUNCE,
+	COPY_FROM_BOUNCE
+};
+
+/* fs/buffer.c */
+
+int	brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
+		   kdev_t dev, unsigned long b[], int size);
+
+#endif /* __LINUX_IOBUF_H */
diff -urN rawioref/include/linux/major.h rawio/include/linux/major.h
--- rawioref/include/linux/major.h	Fri Apr 20 22:31:21 2001
+++ rawio/include/linux/major.h	Sat Apr 21 16:34:38 2001
@@ -126,6 +126,8 @@
 
 #define AURORA_MAJOR 79
 
+#define RAW_MAJOR	162
+
 #define UNIX98_PTY_MASTER_MAJOR	128
 #define UNIX98_PTY_MAJOR_COUNT	8
 #define UNIX98_PTY_SLAVE_MAJOR	(UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT)
diff -urN rawioref/include/linux/raw.h rawio/include/linux/raw.h
--- rawioref/include/linux/raw.h	Thu Jan  1 01:00:00 1970
+++ rawio/include/linux/raw.h	Sat Apr 21 18:57:08 2001
@@ -0,0 +1,16 @@
+#ifndef __LINUX_RAW_H
+#define __LINUX_RAW_H
+
+#include <linux/types.h>
+
+#define RAW_SETBIND	_IO( 0xac, 0 )
+#define RAW_GETBIND	_IO( 0xac, 1 )
+
+struct raw_config_request 
+{
+	int	raw_minor;
+	__u64	block_major;
+	__u64	block_minor;
+};
+
+#endif /* __LINUX_RAW_H */
diff -urN rawioref/init/main.c rawio/init/main.c
--- rawioref/init/main.c	Fri Apr 20 22:31:21 2001
+++ rawio/init/main.c	Sat Apr 21 18:52:00 2001
@@ -22,6 +22,7 @@
 #include <linux/smp_lock.h>
 #include <linux/blk.h>
 #include <linux/hdreg.h>
+#include <linux/iobuf.h>
 #include <linux/init.h>
 
 #include <asm/io.h>
diff -urN rawioref/kernel/ksyms.c rawio/kernel/ksyms.c
--- rawioref/kernel/ksyms.c	Mon Dec 11 16:58:06 2000
+++ rawio/kernel/ksyms.c	Sat Apr 21 16:34:38 2001
@@ -37,6 +37,7 @@
 #include <linux/poll.h>
 #include <linux/mm.h>
 #include <linux/capability.h>
+#include <linux/iobuf.h>
 
 #if defined(CONFIG_PROC_FS)
 #include <linux/proc_fs.h>
@@ -252,6 +253,14 @@
 EXPORT_SYMBOL(max_sectors);
 EXPORT_SYMBOL(max_segments);
 EXPORT_SYMBOL(max_readahead);
+
+/* kiobuf support */
+EXPORT_SYMBOL(map_user_kiobuf);
+EXPORT_SYMBOL(unmap_kiobuf);
+EXPORT_SYMBOL(alloc_kiovec);
+EXPORT_SYMBOL(free_kiovec);
+EXPORT_SYMBOL(expand_kiobuf);
+EXPORT_SYMBOL(brw_kiovec);
 
 /* tty routines */
 EXPORT_SYMBOL(tty_hangup);
diff -urN rawioref/mm/memory.c rawio/mm/memory.c
--- rawioref/mm/memory.c	Sat Apr 21 16:34:24 2001
+++ rawio/mm/memory.c	Sat Apr 21 16:34:38 2001
@@ -41,6 +41,8 @@
 #include <linux/swap.h>
 #include <linux/smp_lock.h>
 #include <linux/bigmem.h>
+#include <linux/pagemap.h>
+#include <linux/iobuf.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -399,6 +401,223 @@
 		if (mm->rss < 0)
 			mm->rss = 0;
 	}
+}
+
+
+/*
+ * Do a quick page-table lookup for a single page. 
+ */
+static unsigned long get_page(unsigned long address, int write)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+
+	pgd = pgd_offset(current->mm, address);
+	pmd = pmd_offset(pgd, address);
+	if (pmd) {
+		pte_t * pte = pte_offset(pmd, address);
+		if (pte && pte_present(*pte)) {
+			if (!write ||
+			    (pte_write(*pte) && pte_dirty(*pte)))
+				return pte_page(*pte);
+		}
+	}
+	
+	return 0;
+}
+
+/* 
+ * Given a physical address, is there a useful struct page pointing to it?
+ */
+
+static struct page * get_page_map(unsigned long page)
+{
+	struct page *map;
+	
+	if (MAP_NR(page) >= max_mapnr)
+		return 0;
+	if (page == ZERO_PAGE(page))
+		return 0;
+	map = mem_map + MAP_NR(page);
+	if (PageReserved(map))
+		return 0;
+	return map;
+}
+
+/*
+ * Force in an entire range of pages from the current process's user VA,
+ * and pin and lock the pages for IO.  
+ */
+
+#define dprintk(x...)
+int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
+{
+	unsigned long		ptr, end;
+	int			err;
+	struct mm_struct *	mm;
+	struct vm_area_struct *	vma, * prev_vma;
+	unsigned long		page;
+	struct page *		map;
+	int			doublepage = 0;
+	int			repeat = 0;
+	int			i;
+	int			write = (rw == READ); /* if we read from disk
+							 it means we write
+							 to memory */
+	
+	/* Make sure the iobuf is not already mapped somewhere. */
+	if (iobuf->nr_pages)
+		return -EINVAL;
+
+	mm = current->mm;
+	dprintk ("map_user_kiobuf: begin\n");
+	
+	ptr = va & PAGE_MASK;
+	end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
+	err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
+	if (err)
+		return err;
+
+ repeat:
+	down(&mm->mmap_sem);
+
+	err = -EFAULT;
+	iobuf->locked = write;
+	iobuf->offset = va & ~PAGE_MASK;
+	iobuf->length = len;
+	vma = NULL;
+	
+	i = 0;
+	
+	/* 
+	 * First of all, try to fault in all of the necessary pages
+	 */
+	while (ptr < end) {
+		if (!vma || ptr >= vma->vm_end) {
+			vma = find_vma_prev(mm, ptr, &prev_vma);
+			if (!vma)
+				goto out;
+			if (vma->vm_start > ptr) {
+				if (!(vma->vm_flags & VM_GROWSDOWN))
+					goto out;
+				if (expand_stack(vma, ptr, prev_vma))
+					goto out;
+			}
+			err = -EACCES;
+			if (write) {
+				if (!(vma->vm_flags & VM_WRITE))
+					goto out;
+			} else {
+				if (!(vma->vm_flags & VM_READ))
+					goto out;
+			}
+			err = -EFAULT;
+		}
+		while (!(page = get_page(ptr, write))) {
+			int ret;
+
+			ret = handle_mm_fault(current, vma, ptr, write);
+			if (ret <= 0) {
+				if (!ret)
+					goto out;
+				else {
+					err = -ENOMEM;
+					goto out;
+				}
+			}
+		}
+		map = get_page_map(page);
+		if (map) {
+			if (write) {
+				/*
+				 * Lock down the pages only if we're going
+				 * to write to memory. If we're reading
+				 * from memory we're free to go ahead
+				 * only after pinning the page on the
+				 * physical side.
+				 */
+				if (PageLocked(map))
+					goto retry;
+				set_bit(PG_locked, &map->flags);
+			}
+			flush_dcache_page(page_address(map));
+			atomic_inc(&map->count);
+		}
+		dprintk ("Installing page %p %p: %d\n", (void *)page, map, i);
+		iobuf->pagelist[i] = page;
+		iobuf->maplist[i] = map;
+		iobuf->nr_pages = ++i;
+		
+		ptr += PAGE_SIZE;
+	}
+
+	up(&mm->mmap_sem);
+	dprintk ("map_user_kiobuf: end OK\n");
+	return 0;
+
+ out:
+	up(&mm->mmap_sem);
+	unmap_kiobuf(iobuf);
+	dprintk ("map_user_kiobuf: end %d\n", err);
+	return err;
+
+
+ retry:
+
+	/* 
+	 * Undo the locking so far, wait on the page we got to, and try again.
+	 */
+	up(&mm->mmap_sem);
+	unmap_kiobuf(iobuf);
+	ptr = va & PAGE_MASK;
+
+	/* 
+	 * Did the release also unlock the page we got stuck on?
+	 */
+	if (!PageLocked(map)) {
+		/* If so, we may well have the page mapped twice in the
+		 * IO address range.  Bad news.  Of course, it _might_
+		 * just be a coincidence, but if it happens more than
+		 * once, chances are we have a double-mapped page. */
+		if (++doublepage >= 3) {
+			return -EINVAL;
+		}
+	}
+	
+	/*
+	 * Try again...
+	 */
+	wait_on_page(map);
+	if (++repeat < 16)
+		goto repeat;
+	return -EAGAIN;
+}
+
+
+/*
+ * Unmap all of the pages referenced by a kiobuf.  We release the pages,
+ * and unlock them if they were locked. 
+ */
+
+void unmap_kiobuf (struct kiobuf *iobuf) 
+{
+	int i;
+	struct page *map;
+	
+	for (i = 0; i < iobuf->nr_pages; i++) {
+		map = iobuf->maplist[i];
+		
+		if (map) {
+			if (iobuf->locked) {
+				clear_bit(PG_locked, &map->flags);
+				wake_up(&map->wait);
+			}
+			__free_page(map);
+		}
+	}
+	
+	iobuf->nr_pages = 0;
+	iobuf->locked = 0;
 }
 
 static inline void zeromap_pte_range(pte_t * pte, unsigned long address,