diff -urN 2.3.46pre1/CREDITS 2.3.46pre1aa1/CREDITS
--- 2.3.46pre1/CREDITS	Tue Feb 15 03:06:47 2000
+++ 2.3.46pre1aa1/CREDITS	Wed Feb 16 00:28:24 2000
@@ -1519,6 +1519,13 @@
 D: XF86_8514
 D: cfdisk (curses based disk partitioning program)
 
+N: Heinz Mauelshagen
+E: mge@EZ-Darmstadt.Telekom.de
+D: Logical Volume Manager
+S: Bartningstr. 12
+S: 64289 Darmstadt
+S: Germany
+
 N: Mike McLagan
 E: mike.mclagan@linux.org
 W: http://www.invlogic.com/~mmclagan
diff -urN 2.3.46pre1/Documentation/Configure.help 2.3.46pre1aa1/Documentation/Configure.help
--- 2.3.46pre1/Documentation/Configure.help	Fri Feb 11 00:05:31 2000
+++ 2.3.46pre1aa1/Documentation/Configure.help	Wed Feb 16 00:28:24 2000
@@ -1260,6 +1260,30 @@
   called on26.o. You must also have a high-level driver for the type
   of device that you want to support.
 
+Logical Volume Manager (LVM) support
+CONFIG_BLK_DEV_LVM
+  This driver lets you combine several hard disks, hard disk partitions,
+  multiple devices or even loop devices (for evaluation purposes) into
+  a volume group. Imagine a volume group as a kind of virtual disk.
+  Logical volumes, which can be thought of as virtual partitions,
+  can be created in the volume group.  You can resize volume groups and
+  logical volumes after creation time, corresponding to new capacity needs.
+  Logical volumes are accessed as block devices named
+  /dev/VolumeGroupName/LogicalVolumeName.
+
+  For details see /usr/src/linux/Documentaion/LVM-HOWTO.
+
+  To get the newest software see <http://linux.msede.com/lvm>.
+
+Logical Volume Manager proc filesystem information
+CONFIG_LVM_PROC_FS
+  If you say Y here, you are able to access overall Logical Volume Manager,
+  Volume Group, Logical and Physical Volume information in /proc/lvm.
+  
+  To use this option, you have to check, that the "proc filesystem support"
+  (CONFIG_PROC_FS) is enabled too.
+
+
 Multiple devices driver support
 CONFIG_BLK_DEV_MD
   This driver lets you combine several hard disk partitions into one
diff -urN 2.3.46pre1/Documentation/LVM-HOWTO 2.3.46pre1aa1/Documentation/LVM-HOWTO
--- 2.3.46pre1/Documentation/LVM-HOWTO	Thu Jan  1 01:00:00 1970
+++ 2.3.46pre1aa1/Documentation/LVM-HOWTO	Wed Feb 16 00:28:24 2000
@@ -0,0 +1,118 @@
+Heinz Mauelshagen's LVM (Logical Volume Manager) howto.             01/28/1999
+
+
+Abstract:
+---------
+The LVM adds a kind of virtual disks and virtual partitions functionality
+to the Linux operating system
+
+It achieves this by adding an additional layer between the physical peripherals
+and the i/o interface in the kernel.
+
+This allows the concatenation of several disk partitions or total disks
+(so-called physical volumes or PVs) or even multiple devices
+to form a storage pool (so-called Volume Group or VG) with
+allocation units called physical extents (called PE).
+You can think of the volume group as a virtual disk.
+Please see scenario below.
+
+Some or all PEs of this VG then can be allocated to so-called Logical Volumes
+or LVs in units called logical extents or LEs.
+Each LE is mapped to a corresponding PE.
+LEs and PEs are equal in size.
+Logical volumes are a kind of virtual partitions.
+
+
+The LVs can be used through device special files similar to the known
+/dev/sd[a-z]* or /dev/hd[a-z]* named /dev/VolumeGroupName/LogicalVolumeName.
+
+But going beyond this, you are able to extend or reduce
+VGs _AND_ LVs at runtime!
+
+So...
+If for example the capacity of a LV gets too small and your VG containing
+this LV is full, you could add another PV to that VG and simply extend
+the LV afterwards.
+If you reduce or delete a LV you can use the freed capacity for different
+LVs in the same VG.
+
+
+The above scenario looks like this:
+
+     /------------------------------------------\
+     |  /--PV2---\      VG 1      /--PVn---\    |
+     |  |-VGDA---|                |-VGDA-- |    |
+     |  |PE1PE2..|                |PE1PE2..|    |
+     |  |        |     ......     |        |    |
+     |  |        |                |        |    |
+     |  |    /-----------------------\     |    |
+     |  |    \-------LV 1------------/     |    |
+     |  |   ..PEn|                |   ..PEn|    |
+     |  \--------/                \--------/    |
+     \------------------------------------------/
+
+PV 1 could be /dev/sdc1 sized 3GB
+PV n could be /dev/sde1 sized 4GB
+VG 1 could be test_vg
+LV 1 could be /dev/test_vg/test_lv
+VGDA is the volume group descriptor area holding the LVM metadata
+PE1 up to PEn is the number of physical extents on each disk(partition)
+
+
+
+Installation steps see INSTALL and insmod(1)/modprobe(1), kmod/kerneld(8)
+to load the logical volume manager module if you did not bind it
+into the kernel.
+
+
+Configuration steps for getting the above scenario:
+
+1. Set the partition system id to 0xFE on /dev/sdc1 and /dev/sde1.
+
+2. do a "pvcreate /dev/sd[ce]1"
+   For testing purposes you can use more than one partition on a disk.
+   You should not use more than one partition because in the case of
+   a striped LV you'll have a performance breakdown.
+
+3. do a "vgcreate test_vg /dev/sd[ce]1" to create the new VG named "test_vg"
+   which has the total capacity of both partitions.
+   vgcreate activates (transfers the metadata into the LVM driver in the kernel)
+   the new volume group too to be able to create LVs in the next step.
+
+4. do a "lvcreate -L1500 -ntest_lv test_vg" to get a 1500MB linear LV named
+   "test_lv" and it's block device special "/dev/test_vg/test_lv".
+
+   Or do a "lvcreate -i2 -I4 -l1500 -nanother_test_lv test_vg" to get a 100 LE
+   large logical volume with 2 stripes and stripesize 4 KB.
+
+5. For example generate a filesystem in one LV with
+   "mke2fs /dev/test_vg/test_lv" and mount it.
+
+6. extend /dev/test_vg/test_lv to 1600MB with relative size by
+   "lvextend -L+100 /dev/test_vg/test_lv"
+   or with absolute size by
+   "lvextend -L1600 /dev/test_vg/test_lv"
+ 
+7. reduce /dev/test_vg/test_lv to 900 logical extents with relative extents by
+   "lvreduce -l-700 /dev/test_vg/test_lv"
+   or with absolute extents by
+   "lvreduce -l900 /dev/test_vg/test_lv"
+ 
+9. rename a VG by deactivating it with
+   "vgchange -an test_vg"   # only VGs with _no_ open LVs can be deactivated!
+   "vgrename test_vg whatever"
+   and reactivate it again by
+   "vgchange -ay whatever"
+
+9. rename a LV after closing it by
+   "lvchange -an /dev/whatever/test_lv" # only closed LVs can be deactivated
+   "lvrename  /dev/whatever/test_lv  /dev/whatever/whatvolume"
+   or by
+   "lvrename  whatever test_lv whatvolume"
+   and reactivate it again by
+   "lvchange -ay /dev/whatever/whatvolume"
+
+10. if you own Ted Tso's resize2fs program, you are able to resize the
+    ext2 type filesystems contained in logical volumes without destroyiing
+    the data by
+    "e2fsadm -L+100 /dev/test_vg/another_test_lv"
diff -urN 2.3.46pre1/MAINTAINERS 2.3.46pre1aa1/MAINTAINERS
--- 2.3.46pre1/MAINTAINERS	Sat Feb 12 21:03:23 2000
+++ 2.3.46pre1aa1/MAINTAINERS	Wed Feb 16 00:28:24 2000
@@ -578,6 +578,13 @@
 W:	http://people.redhat.com/zab/maestro/
 S:	Supported
 
+LOGICAL VOLUME MANAGER
+P:	Heinz Mauelshagen
+M:	linux-LVM@EZ-Darmstadt.Telekom.de
+L:	linux-LVM@msede.com
+W:	http://linux.msede.com/lvm
+S:	Maintained
+
 M68K
 P:	Jes Sorensen
 M:	Jes.Sorensen@cern.ch
diff -urN 2.3.46pre1/Makefile 2.3.46pre1aa1/Makefile
--- 2.3.46pre1/Makefile	Tue Feb 15 03:07:27 2000
+++ 2.3.46pre1aa1/Makefile	Wed Feb 16 00:28:25 2000
@@ -330,13 +330,15 @@
 	   echo \#define LINUX_COMPILE_DOMAIN ; \
 	 fi >> .ver
 	@echo \#define LINUX_COMPILER \"`$(CC) $(CFLAGS) -v 2>&1 | tail -1`\" >> .ver
-	@mv -f .ver $@
+	@cp .ver $@
+	@rm .ver
 
 include/linux/version.h: ./Makefile
 	@echo \#define UTS_RELEASE \"$(KERNELRELEASE)\" > .ver
 	@echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + $(PATCHLEVEL) \\* 256 + $(SUBLEVEL)` >> .ver
 	@echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' >>.ver
-	@mv -f .ver $@
+	@cp .ver $@
+	@rm .ver
 
 init/version.o: init/version.c include/linux/compile.h include/config/MARKER
 	$(CC) $(CFLAGS) -DUTS_MACHINE='"$(ARCH)"' -c -o init/version.o init/version.c
diff -urN 2.3.46pre1/arch/alpha/kernel/Makefile 2.3.46pre1aa1/arch/alpha/kernel/Makefile
--- 2.3.46pre1/arch/alpha/kernel/Makefile	Fri Feb 11 00:05:32 2000
+++ 2.3.46pre1aa1/arch/alpha/kernel/Makefile	Wed Feb 16 00:28:24 2000
@@ -14,7 +14,7 @@
 
 O_TARGET := kernel.o
 O_OBJS   := entry.o traps.o process.o osf_sys.o irq.o signal.o setup.o \
-	    ptrace.o time.o semaphore.o
+	    ptrace.o time.o semaphore.o i8259.o rtc_irq.o
 OX_OBJS  := alpha_ksyms.o
 
 
diff -urN 2.3.46pre1/arch/alpha/kernel/alpha_ksyms.c 2.3.46pre1aa1/arch/alpha/kernel/alpha_ksyms.c
--- 2.3.46pre1/arch/alpha/kernel/alpha_ksyms.c	Fri Feb 11 00:05:32 2000
+++ 2.3.46pre1aa1/arch/alpha/kernel/alpha_ksyms.c	Wed Feb 16 00:28:24 2000
@@ -36,6 +36,7 @@
 extern struct hwrpb_struct *hwrpb;
 extern void dump_thread(struct pt_regs *, struct user *);
 extern int dump_fpu(struct pt_regs *, elf_fpregset_t *);
+extern spinlock_t kernel_flag;
 
 /* these are C runtime functions with special calling conventions: */
 extern void __divl (void);
@@ -158,13 +159,16 @@
  */
 
 #ifdef __SMP__
+EXPORT_SYMBOL(kernel_flag);
 EXPORT_SYMBOL(synchronize_irq);
 EXPORT_SYMBOL(flush_tlb_all);
 EXPORT_SYMBOL(flush_tlb_mm);
 EXPORT_SYMBOL(flush_tlb_page);
 EXPORT_SYMBOL(flush_tlb_range);
+EXPORT_SYMBOL(smp_imb);
 EXPORT_SYMBOL(cpu_data);
 EXPORT_SYMBOL(__cpu_number_map);
+EXPORT_SYMBOL(smp_num_cpus);
 EXPORT_SYMBOL(global_irq_holder);
 EXPORT_SYMBOL(__global_cli);
 EXPORT_SYMBOL(__global_sti);
diff -urN 2.3.46pre1/arch/alpha/kernel/i8259.c 2.3.46pre1aa1/arch/alpha/kernel/i8259.c
--- 2.3.46pre1/arch/alpha/kernel/i8259.c	Thu Jan  1 01:00:00 1970
+++ 2.3.46pre1aa1/arch/alpha/kernel/i8259.c	Wed Feb 16 00:28:24 2000
@@ -0,0 +1,123 @@
+/* started hacking from linux-2.3.30pre6/arch/i386/kernel/i8259.c */
+
+#include <linux/init.h>
+#include <linux/cache.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+#include <asm/io.h>
+#include <asm/delay.h>
+
+/*
+ * This is the 'legacy' 8259A Programmable Interrupt Controller,
+ * present in the majority of PC/AT boxes.
+ */
+
+static void enable_8259A_irq(unsigned int irq);
+static void disable_8259A_irq(unsigned int irq);
+
+/* shutdown is same as "disable" */
+#define end_8259A_irq		enable_8259A_irq
+#define shutdown_8259A_irq	disable_8259A_irq
+
+static void mask_and_ack_8259A(unsigned int);
+
+static unsigned int startup_8259A_irq(unsigned int irq)
+{
+	enable_8259A_irq(irq);
+	return 0; /* never anything pending */
+}
+
+static struct hw_interrupt_type i8259A_irq_type = {
+	"XT-PIC",
+	startup_8259A_irq,
+	shutdown_8259A_irq,
+	enable_8259A_irq,
+	disable_8259A_irq,
+	mask_and_ack_8259A,
+	end_8259A_irq
+};
+
+/*
+ * 8259A PIC functions to handle ISA devices:
+ */
+
+/*
+ * This contains the irq mask for both 8259A irq controllers,
+ */
+static unsigned int cached_irq_mask = 0xffff;
+
+#define __byte(x,y) 	(((unsigned char *)&(y))[x])
+#define cached_21	(__byte(0,cached_irq_mask))
+#define cached_A1	(__byte(1,cached_irq_mask))
+
+/*
+ * These have to be protected by the irq controller spinlock
+ * before being called.
+ */
+static void disable_8259A_irq(unsigned int irq)
+{
+	unsigned int mask = 1 << irq;
+	cached_irq_mask |= mask;
+	if (irq & 8)
+		outb(cached_A1,0xA1);
+	else
+		outb(cached_21,0x21);
+}
+
+static void enable_8259A_irq(unsigned int irq)
+{
+	unsigned int mask = ~(1 << irq);
+	cached_irq_mask &= mask;
+	if (irq & 8)
+		outb(cached_A1,0xA1);
+	else
+		outb(cached_21,0x21);
+}
+
+static void mask_and_ack_8259A(unsigned int irq)
+{
+	disable_8259A_irq(irq);
+
+	/* Ack the interrupt making it the lowest priority */
+	/*  First the slave .. */
+	if (irq > 7) {
+		outb(0xE0 | (irq - 8), 0xa0);
+		irq = 2;
+	}
+	/* .. then the master */
+	outb(0xE0 | irq, 0x20);
+}
+
+static void init_8259A(void)
+{
+	outb(0xff, 0x21);	/* mask all of 8259A-1 */
+	outb(0xff, 0xA1);	/* mask all of 8259A-2 */
+}
+
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL};
+
+void __init
+init_ISA_irqs (void)
+{
+	int i;
+
+	for (i = 0; i < NR_IRQS; i++) {
+		if (i == RTC_IRQ)
+			continue;
+		if (i >= 16)
+			break;
+		irq_desc[i].status = IRQ_DISABLED;
+		/*
+		 * 16 old-style INTA-cycle interrupts:
+		 */
+		irq_desc[i].handler = &i8259A_irq_type;
+	}
+
+	init_8259A();
+	setup_irq(2, &irq2);
+}
diff -urN 2.3.46pre1/arch/alpha/kernel/irq.c 2.3.46pre1aa1/arch/alpha/kernel/irq.c
--- 2.3.46pre1/arch/alpha/kernel/irq.c	Fri Feb 11 00:05:32 2000
+++ 2.3.46pre1aa1/arch/alpha/kernel/irq.c	Wed Feb 16 00:28:24 2000
@@ -39,6 +39,7 @@
 #ifndef __SMP__
 int __local_irq_count;
 int __local_bh_count;
+unsigned long __irq_attempt[NR_IRQS];
 #endif
 
 #if NR_IRQS > 128
@@ -57,12 +58,6 @@
 
 
 /*
- * Shadow-copy of masked interrupts.
- */
-
-unsigned long _alpha_irq_masks[2] = { ~0UL, ~0UL };
-
-/*
  * The ack_irq routine used by 80% of the systems.
  */
 
@@ -135,7 +130,7 @@
 			return;
 		}
 	}
-	handle_irq(j, j, regs);
+	handle_irq(j, regs);
 #else
 	unsigned long pic;
 
@@ -169,77 +164,201 @@
 void 
 srm_device_interrupt(unsigned long vector, struct pt_regs * regs)
 {
-	int irq, ack;
+	int irq;
 
-	ack = irq = (vector - 0x800) >> 4;
-	handle_irq(irq, ack, regs);
+	irq = (vector - 0x800) >> 4;
+	handle_irq(irq, regs);
 }
 
 
 /*
+ * Special irq handlers.
+ */
+
+void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+
+/*
  * Initial irq handlers.
  */
 
-static struct irqaction timer_irq = { NULL, 0, 0, NULL, NULL, NULL};
-spinlock_t irq_controller_lock = SPIN_LOCK_UNLOCKED;
-irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = { [0 ... NR_IRQS-1] = {0,} };
+static void enable_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+static void disable_none(unsigned int irq) { }
+static void ack_none(unsigned int irq)
+{
+	printk("unexpected IRQ trap at vector %02x\n", irq);
+}
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define shutdown_none	disable_none
+#define end_none	enable_none
+
+struct hw_interrupt_type no_irq_type = {
+	"none",
+	startup_none,
+	shutdown_none,
+	enable_none,
+	disable_none,
+	ack_none,
+	end_none
+};
 
+spinlock_t irq_controller_lock = SPIN_LOCK_UNLOCKED;
+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
+				{ [0 ... NR_IRQS-1] = { 0, &no_irq_type, }};
 
-static inline void
-mask_irq(unsigned long irq)
+int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
 {
-	set_bit(irq, _alpha_irq_masks);
-	alpha_mv.update_irq_hw(irq, alpha_irq_mask, 0);
-}
+	int status;
+	int cpu = smp_processor_id();
 
-static inline void
-unmask_irq(unsigned long irq)
-{
-	clear_bit(irq, _alpha_irq_masks);
-	alpha_mv.update_irq_hw(irq, alpha_irq_mask, 1);
+	kstat.irqs[cpu][irq]++;
+	irq_enter(cpu, irq);
+
+	status = 1;	/* Force the "do bottom halves" bit */
+
+	do {
+		if (!(action->flags & SA_INTERRUPT))
+			__sti();
+		else
+			__cli();
+
+		status |= action->flags;
+		action->handler(irq, action->dev_id, regs);
+		action = action->next;
+	} while (action);
+	if (status & SA_SAMPLE_RANDOM)
+		add_interrupt_randomness(irq);
+	__cli();
+
+	irq_exit(cpu, irq);
+
+	return status;
 }
 
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock. 
+ */
 void
-disable_irq_nosync(unsigned int irq_nr)
+disable_irq_nosync(unsigned int irq)
 {
 	unsigned long flags;
 
-	save_and_cli(flags);
-	mask_irq(irq_nr);
-	restore_flags(flags);
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	if (!irq_desc[irq].depth++) {
+		irq_desc[irq].status |= IRQ_DISABLED;
+		irq_desc[irq].handler->disable(irq);
+	}
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 
+/*
+ * Synchronous version of the above, making sure the IRQ is
+ * no longer running on any other IRQ..
+ */
 void
-disable_irq(unsigned int irq_nr)
+disable_irq(unsigned int irq)
 {
-	/* This works non-SMP, and SMP until we write code to distribute
-	   interrupts to more that cpu 0.  */
-	disable_irq_nosync(irq_nr);
+	disable_irq_nosync(irq);
+
+	if (!local_irq_count(smp_processor_id())) {
+		do {
+			barrier();
+		} while (irq_desc[irq].status & IRQ_INPROGRESS);
+	}
 }
 
 void
-enable_irq(unsigned int irq_nr)
+enable_irq(unsigned int irq)
 {
 	unsigned long flags;
 
-	save_and_cli(flags);
-	unmask_irq(irq_nr);
-	restore_flags(flags);
+	spin_lock_irqsave(&irq_controller_lock, flags);
+	switch (irq_desc[irq].depth) {
+	case 1: {
+		unsigned int status = irq_desc[irq].status & ~IRQ_DISABLED;
+		irq_desc[irq].status = status;
+		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+			irq_desc[irq].status = status | IRQ_REPLAY;
+			hw_resend_irq(irq_desc[irq].handler,irq); /* noop */
+		}
+		irq_desc[irq].handler->enable(irq);
+		/* fall-through */
+	}
+	default:
+		irq_desc[irq].depth--;
+		break;
+	case 0:
+		printk("enable_irq() unbalanced from %p\n",
+		       __builtin_return_address(0));
+	}
+	spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 
 int
-check_irq(unsigned int irq)
+setup_irq(unsigned int irq, struct irqaction * new)
 {
-	return irq_desc[irq].action ? -EBUSY : 0;
+	int shared = 0;
+	struct irqaction *old, **p;
+	unsigned long flags;
+
+	/*
+	 * Some drivers like serial.c use request_irq() heavily,
+	 * so we have to be careful not to interfere with a
+	 * running system.
+	 */
+	if (new->flags & SA_SAMPLE_RANDOM) {
+		/*
+		 * This function might sleep, we want to call it first,
+		 * outside of the atomic block.
+		 * Yes, this might clear the entropy pool if the wrong
+		 * driver is attempted to be loaded, without actually
+		 * installing a new handler, but is this really a problem,
+		 * only the sysadmin is able to do this.
+		 */
+		rand_initialize_irq(irq);
+	}
+
+	/*
+	 * The following block of code has to be executed atomically
+	 */
+	spin_lock_irqsave(&irq_controller_lock,flags);
+	p = &irq_desc[irq].action;
+	if ((old = *p) != NULL) {
+		/* Can't share interrupts unless both agree to */
+		if (!(old->flags & new->flags & SA_SHIRQ)) {
+			spin_unlock_irqrestore(&irq_controller_lock,flags);
+			return -EBUSY;
+		}
+
+		/* add new interrupt at end of irq queue */
+		do {
+			p = &old->next;
+			old = *p;
+		} while (old);
+		shared = 1;
+	}
+
+	*p = new;
+
+	if (!shared) {
+		irq_desc[irq].depth = 0;
+		irq_desc[irq].status &= ~IRQ_DISABLED;
+		irq_desc[irq].handler->startup(irq);
+	}
+	spin_unlock_irqrestore(&irq_controller_lock,flags);
+	return 0;
 }
 
 int
 request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *),
 	    unsigned long irqflags, const char * devname, void *dev_id)
 {
-	int shared = 0;
-	struct irqaction * action, **p;
-	unsigned long flags;
+	int retval;
+	struct irqaction * action;
 
 	if (irq >= ACTUAL_NR_IRQS)
 		return -EINVAL;
@@ -248,36 +367,25 @@
 	if (!handler)
 		return -EINVAL;
 
-	p = &irq_desc[irq].action;
-	action = *p;
-	if (action) {
-		/* Can't share interrupts unless both agree to */
-		if (!(action->flags & irqflags & SA_SHIRQ))
-			return -EBUSY;
-
-		/* Can't share interrupts unless both are same type */
-		if ((action->flags ^ irqflags) & SA_INTERRUPT)
-			return -EBUSY;
-
-		/* Add new interrupt at end of irq queue */
-		do {
-			p = &action->next;
-			action = *p;
-		} while (action);
-		shared = 1;
+#if 1
+	/*
+	 * Sanity-check: shared interrupts should REALLY pass in
+	 * a real dev-ID, otherwise we'll have trouble later trying
+	 * to figure out which interrupt is which (messes up the
+	 * interrupt freeing logic etc).
+	 */
+	if (irqflags & SA_SHIRQ) {
+		if (!dev_id)
+			printk("Bad boy: %s (at %p) called us without a dev_id!\n",
+			       devname, __builtin_return_address(0));
 	}
+#endif
 
-	action = &timer_irq;
-	if (irq != TIMER_IRQ) {
-		action = (struct irqaction *)
+	action = (struct irqaction *)
 			kmalloc(sizeof(struct irqaction), GFP_KERNEL);
-	}
 	if (!action)
 		return -ENOMEM;
 
-	if (irqflags & SA_SAMPLE_RANDOM)
-		rand_initialize_irq(irq);
-
 	action->handler = handler;
 	action->flags = irqflags;
 	action->mask = 0;
@@ -285,20 +393,16 @@
 	action->next = NULL;
 	action->dev_id = dev_id;
 
-	save_and_cli(flags);
-	*p = action;
-
-	if (!shared)
-		unmask_irq(irq);
-
-	restore_flags(flags);
-	return 0;
+	retval = setup_irq(irq, action);
+	if (retval)
+		kfree(action);
+	return retval;
 }
-		
+
 void
 free_irq(unsigned int irq, void *dev_id)
 {
-	struct irqaction * action, **p;
+	struct irqaction **p;
 	unsigned long flags;
 
 	if (irq >= ACTUAL_NR_IRQS) {
@@ -309,25 +413,39 @@
 		printk("Trying to free reserved IRQ %d\n", irq);
 		return;
 	}
-	for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) {
-		if (action->dev_id != dev_id)
-			continue;
+	spin_lock_irqsave(&irq_controller_lock,flags);
+	p = &irq_desc[irq].action;
+	for (;;) {
+		struct irqaction * action = *p;
+		if (action) {
+			struct irqaction **pp = p;
+			p = &action->next;
+			if (action->dev_id != dev_id)
+				continue;
 
-		/* Found it - now free it */
-		save_and_cli(flags);
-		*p = action->next;
-		if (!irq_desc[irq].action)
-			mask_irq(irq);
-		restore_flags(flags);
-		kfree(action);
+			/* Found it - now remove it from the list of entries */
+			*pp = action->next;
+			if (!irq_desc[irq].action) {
+				irq_desc[irq].status |= IRQ_DISABLED;
+				irq_desc[irq].handler->shutdown(irq);
+			}
+			spin_unlock_irqrestore(&irq_controller_lock,flags);
+
+			/* Wait to make sure it's not being used on another CPU */
+			while (irq_desc[irq].status & IRQ_INPROGRESS)
+				barrier();
+			kfree(action);
+			return;
+		}
+		printk("Trying to free free IRQ%d\n",irq);
+		spin_unlock_irqrestore(&irq_controller_lock,flags);
 		return;
 	}
-	printk("Trying to free free IRQ%d\n",irq);
 }
 
 int get_irq_list(char *buf)
 {
-	int i;
+	int i, j;
 	struct irqaction * action;
 	char *p = buf;
 
@@ -335,6 +453,8 @@
 	p += sprintf(p, "           ");
 	for (i = 0; i < smp_num_cpus; i++)
 		p += sprintf(p, "CPU%d       ", i);
+	for (i = 0; i < smp_num_cpus; i++)
+		p += sprintf(p, "TRY%d       ", i);
 	*p++ = '\n';
 #endif
 
@@ -346,13 +466,14 @@
 #ifndef __SMP__
 		p += sprintf(p, "%10u ", kstat_irqs(i));
 #else
-		{
-		  int j;
-		  for (j = 0; j < smp_num_cpus; j++)
-			  p += sprintf(p, "%10u ",
-				       kstat.irqs[cpu_logical_map(j)][i]);
-		}
+		for (j = 0; j < smp_num_cpus; j++)
+			p += sprintf(p, "%10u ",
+				     kstat.irqs[cpu_logical_map(j)][i]);
+		for (j = 0; j < smp_num_cpus; j++)
+			p += sprintf(p, "%10lu ",
+				     irq_attempt(cpu_logical_map(j), i));
 #endif
+		p += sprintf(p, " %14s", irq_desc[i].handler->typename);
 		p += sprintf(p, "  %c%s",
 			     (action->flags & SA_INTERRUPT)?'+':' ',
 			     action->name);
@@ -364,6 +485,13 @@
 		}
 		*p++ = '\n';
 	}
+#if CONFIG_SMP
+	p += sprintf(p, "LOC: ");
+	for (j = 0; j < smp_num_cpus; j++)
+		p += sprintf(p, "%10lu ",
+			     cpu_data[cpu_logical_map(j)].smp_local_irq_count);
+	p += sprintf(p, "\n");
+#endif
 	return p - buf;
 }
 
@@ -605,139 +733,157 @@
 }
 #endif /* __SMP__ */
 
-static void
-unexpected_irq(int irq, struct pt_regs * regs)
-{
-#if 0
-#if 1
-	printk("device_interrupt: unexpected interrupt %d\n", irq);
-#else
-	struct irqaction *action;
-	int i;
-
-	printk("IO device interrupt, irq = %d\n", irq);
-	printk("PC = %016lx PS=%04lx\n", regs->pc, regs->ps);
-	printk("Expecting: ");
-	for (i = 0; i < ACTUAL_NR_IRQS; i++)
-		if ((action = irq_desc[i].action))
-			while (action->handler) {
-				printk("[%s:%d] ", action->name, i);
-				action = action->next;
-			}
-	printk("\n");
-#endif
-#endif
-
-#if defined(CONFIG_ALPHA_JENSEN)
-	/* ??? Is all this just debugging, or are the inb's and outb's
-	   necessary to make things work?  */
-	printk("64=%02x, 60=%02x, 3fa=%02x 2fa=%02x\n",
-	       inb(0x64), inb(0x60), inb(0x3fa), inb(0x2fa));
-	outb(0x0c, 0x3fc);
-	outb(0x0c, 0x2fc);
-	outb(0,0x61);
-	outb(0,0x461);
-#endif
-}
-
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
 void
-handle_irq(int irq, int ack, struct pt_regs * regs)
-{
-	struct irqaction * action;
+handle_irq(int irq, struct pt_regs * regs)
+{	
+	/* 
+	 * We ack quickly, we don't want the irq controller
+	 * thinking we're snobs just because some other CPU has
+	 * disabled global interrupts (we have already done the
+	 * INT_ACK cycles, it's too late to try to pretend to the
+	 * controller that we aren't taking the interrupt).
+	 *
+	 * 0 return value means that this irq is already being
+	 * handled by some other CPU. (or is disabled)
+	 */
 	int cpu = smp_processor_id();
+	irq_desc_t *desc;
+	struct irqaction * action;
+	unsigned int status;
 
 	if ((unsigned) irq > ACTUAL_NR_IRQS) {
 		printk("device_interrupt: illegal interrupt %d\n", irq);
 		return;
 	}
 
-#if 0
-	/* A useful bit of code to find out if an interrupt is going wild.  */
-	{
-	  static unsigned int last_msg, last_cc;
-	  static int last_irq, count;
-	  unsigned int cc;
-
-	  __asm __volatile("rpcc %0" : "=r"(cc));
-	  ++count;
-	  if (cc - last_msg > 150000000 || irq != last_irq) {
-		printk("handle_irq: irq %d count %d cc %u @ %p\n",
-		       irq, count, cc-last_cc, regs->pc);
-		count = 0;
-		last_msg = cc;
-		last_irq = irq;
-	  }
-	  last_cc = cc;
+	irq_attempt(cpu, irq)++;
+	desc = irq_desc + irq;
+	spin_lock_irq(&irq_controller_lock); /* mask also the RTC */
+	desc->handler->ack(irq);
+	/*
+	   REPLAY is when Linux resends an IRQ that was dropped earlier
+	   WAITING is used by probe to mark irqs that are being tested
+	   */
+	status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+	status |= IRQ_PENDING; /* we _want_ to handle it */
+
+	/*
+	 * If the IRQ is disabled for whatever reason, we cannot
+	 * use the action we have.
+	 */
+	action = NULL;
+	if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
+		action = desc->action;
+		status &= ~IRQ_PENDING; /* we commit to handling */
+		status |= IRQ_INPROGRESS; /* we are handling it */
 	}
-#endif
+	desc->status = status;
+	spin_unlock(&irq_controller_lock);
 
-	irq_enter(cpu, irq);
-	kstat.irqs[cpu][irq] += 1;
-	action = irq_desc[irq].action;
+	/*
+	 * If there is no IRQ handler or it was disabled, exit early.
+	   Since we set PENDING, if another processor is handling
+	   a different instance of this same irq, the other processor
+	   will take care of it.
+	 */
+	if (!action)
+		return;
 
 	/*
-	 * For normal interrupts, we mask it out, and then ACK it.
-	 * This way another (more timing-critical) interrupt can
-	 * come through while we're doing this one.
-	 *
-	 * Note! An irq without a handler gets masked and acked, but
-	 * never unmasked. The autoirq stuff depends on this (it looks
-	 * at the masks before and after doing the probing).
-	 */
-	if (ack >= 0) {
-		mask_irq(ack);
-		alpha_mv.ack_irq(ack);
-	}
-	if (action) {
-		if (action->flags & SA_SAMPLE_RANDOM)
-			add_interrupt_randomness(irq);
-		do {
-			action->handler(irq, action->dev_id, regs);
-			action = action->next;
-		} while (action);
-		if (ack >= 0)
-			unmask_irq(ack);
-	} else {
-		unexpected_irq(irq, regs);
+	 * Edge triggered interrupts need to remember
+	 * pending events.
+	 * This applies to any hw interrupts that allow a second
+	 * instance of the same irq to arrive while we are in do_IRQ
+	 * or in the handler. But the code here only handles the _second_
+	 * instance of the irq, not the third or fourth. So it is mostly
+	 * useful for irq hardware that does not mask cleanly in an
+	 * SMP environment.
+	 */
+	for (;;) {
+		handle_IRQ_event(irq, regs, action);
+		spin_lock(&irq_controller_lock);
+		
+		if (!(desc->status & IRQ_PENDING)
+		    || (desc->status & IRQ_LEVEL))
+			break;
+		desc->status &= ~IRQ_PENDING;
+		spin_unlock(&irq_controller_lock);
 	}
-	irq_exit(cpu, irq);
+	desc->status &= ~IRQ_INPROGRESS;
+	if (!(desc->status & IRQ_DISABLED))
+		desc->handler->end(irq);
+	spin_unlock(&irq_controller_lock);
 }
 
-
 /*
- * Start listening for interrupts..
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_WAITING" cleared and the interrupt
+ * disabled.
  */
-
 unsigned long
 probe_irq_on(void)
 {
-	struct irqaction * action;
-	unsigned long irqs = 0;
-	unsigned long delay;
 	unsigned int i;
+	unsigned long delay;
 
-	/* Handle only the first 64 IRQs here.  This is enough for
-	   [E]ISA, which is the only thing that needs probing anyway.  */
-	for (i = (ACTUAL_NR_IRQS - 1) & 63; i > 0; i--) {
-		if (!(PROBE_MASK & (1UL << i))) {
-			continue;
-		}
-		action = irq_desc[i].action;
-		if (!action) {
-			enable_irq(i);
-			irqs |= (1UL << i);
+	/* Something may have generated an irq long ago and we want to
+	   flush such a longstanding irq before considering it as spurious. */
+	spin_lock_irq(&irq_controller_lock);
+	for (i = NR_IRQS-1; i > 0; i--) 
+		if (!irq_desc[i].action) 
+			irq_desc[i].handler->startup(i);
+	spin_unlock_irq(&irq_controller_lock);
+
+	/* Wait for longstanding interrupts to trigger. */
+	for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+		/* about 20ms delay */ synchronize_irq();
+
+	/* enable any unassigned irqs (we must startup again here because
+	   if a longstanding irq happened in the previous stage, it may have
+	   masked itself) first, enable any unassigned irqs. */
+	spin_lock_irq(&irq_controller_lock);
+	for (i = NR_IRQS-1; i > 0; i--) {
+		if (!irq_desc[i].action) {
+			irq_desc[i].status |= IRQ_AUTODETECT | IRQ_WAITING;
+			if(irq_desc[i].handler->startup(i))
+				irq_desc[i].status |= IRQ_PENDING;
 		}
 	}
+	spin_unlock_irq(&irq_controller_lock);
+
+	/*
+	 * Wait for spurious interrupts to trigger
+	 */
+	for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+		/* about 100ms delay */ synchronize_irq();
 
 	/*
-	 * Wait about 100ms for spurious interrupts to mask themselves
-	 * out again...
+	 * Now filter out any obviously spurious interrupts
 	 */
-	for (delay = jiffies + HZ/10; time_before(jiffies, delay); )
-		barrier();
+	spin_lock_irq(&irq_controller_lock);
+	for (i=0; i<NR_IRQS; i++) {
+		unsigned int status = irq_desc[i].status;
 
-	/* Now filter out any obviously spurious interrupts.  */
-	return irqs & ~alpha_irq_mask;
+		if (!(status & IRQ_AUTODETECT))
+			continue;
+		
+		/* It triggered already - consider it spurious. */
+		if (!(status & IRQ_WAITING)) {
+			irq_desc[i].status = status & ~IRQ_AUTODETECT;
+			irq_desc[i].handler->shutdown(i);
+		}
+	}
+	spin_unlock_irq(&irq_controller_lock);
+
+	return 0x12345678;
 }
 
 /*
@@ -747,19 +893,35 @@
  */
 
 int
-probe_irq_off(unsigned long irqs)
+probe_irq_off(unsigned long unused)
 {
-	int i;
-	
-	/* Handle only the first 64 IRQs here.  This is enough for
-	   [E]ISA, which is the only thing that needs probing anyway.  */
-        irqs &= alpha_irq_mask;
-	if (!irqs)
-		return 0;
-	i = ffz(~irqs);
-	if (irqs != (1UL << i))
-		i = -i;
-	return i;
+	int i, irq_found, nr_irqs;
+
+	if (unused != 0x12345678)
+		printk("Bad IRQ probe from %lx\n", (&unused)[-1]);
+
+	nr_irqs = 0;
+	irq_found = 0;
+	spin_lock_irq(&irq_controller_lock);
+	for (i=0; i<NR_IRQS; i++) {
+		unsigned int status = irq_desc[i].status;
+
+		if (!(status & IRQ_AUTODETECT))
+			continue;
+
+		if (!(status & IRQ_WAITING)) {
+			if (!nr_irqs)
+				irq_found = i;
+			nr_irqs++;
+		}
+		irq_desc[i].status = status & ~IRQ_AUTODETECT;
+		irq_desc[i].handler->shutdown(i);
+	}
+	spin_unlock_irq(&irq_controller_lock);
+
+	if (nr_irqs > 1)
+		irq_found = -irq_found;
+	return irq_found;
 }
 
 
@@ -782,7 +944,12 @@
 #endif
 		break;
 	case 1:
-		handle_irq(RTC_IRQ, -1, &regs);
+#ifdef __SMP__
+		cpu_data[smp_processor_id()].smp_local_irq_count++;
+		smp_percpu_timer_interrupt(&regs);
+		if (smp_processor_id() == smp_boot_cpuid)
+#endif
+			handle_irq(RTC_IRQ, &regs);
 		return;
 	case 2:
 		alpha_mv.machine_check(vector, la_ptr, &regs);
diff -urN 2.3.46pre1/arch/alpha/kernel/process.c 2.3.46pre1aa1/arch/alpha/kernel/process.c
--- 2.3.46pre1/arch/alpha/kernel/process.c	Wed Dec  8 00:05:25 1999
+++ 2.3.46pre1aa1/arch/alpha/kernel/process.c	Wed Feb 16 00:28:24 2000
@@ -30,9 +30,11 @@
 #include <linux/reboot.h>
 #include <linux/console.h>
 
+#if 0
 #ifdef CONFIG_RTC
 #include <linux/mc146818rtc.h>
 #endif
+#endif
 
 #include <asm/reg.h>
 #include <asm/uaccess.h>
@@ -139,9 +141,11 @@
 #endif
 	}
 
+#if 0
 #ifdef CONFIG_RTC
 	/* Reset rtc to defaults.  */
 	rtc_kill_pit();
+#endif
 #endif
 
 	if (alpha_mv.kill_arch)
diff -urN 2.3.46pre1/arch/alpha/kernel/rtc_irq.c 2.3.46pre1aa1/arch/alpha/kernel/rtc_irq.c
--- 2.3.46pre1/arch/alpha/kernel/rtc_irq.c	Thu Jan  1 01:00:00 1970
+++ 2.3.46pre1aa1/arch/alpha/kernel/rtc_irq.c	Wed Feb 16 00:28:24 2000
@@ -0,0 +1,26 @@
+/* RTC irq callbacks, 1999 Andrea Arcangeli <andrea@suse.de> */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+
+static void enable_rtc(unsigned int irq) { }
+static unsigned int startup_rtc(unsigned int irq) { return 0; }
+#define shutdown_rtc	enable_rtc
+#define end_rtc		enable_rtc
+#define ack_rtc		enable_rtc
+#define disable_rtc	enable_rtc
+
+void __init
+init_RTC_irq(void)
+{
+	static struct hw_interrupt_type rtc_irq_type = { "RTC",
+							 startup_rtc,
+							 shutdown_rtc,
+							 enable_rtc,
+							 disable_rtc,
+							 ack_rtc,
+							 end_rtc };
+	irq_desc[RTC_IRQ].status = IRQ_DISABLED;
+	irq_desc[RTC_IRQ].handler = &rtc_irq_type;
+}
diff -urN 2.3.46pre1/arch/alpha/kernel/setup.c 2.3.46pre1aa1/arch/alpha/kernel/setup.c
--- 2.3.46pre1/arch/alpha/kernel/setup.c	Fri Feb 11 00:05:32 2000
+++ 2.3.46pre1aa1/arch/alpha/kernel/setup.c	Wed Feb 16 00:28:24 2000
@@ -30,9 +30,11 @@
 #include <linux/ioport.h>
 #include <linux/bootmem.h>
 
+#if 0
 #ifdef CONFIG_RTC
 #include <linux/timex.h>
 #endif
+#endif
 #ifdef CONFIG_BLK_DEV_INITRD
 #include <linux/blk.h>
 #endif
@@ -453,6 +455,7 @@
 	/* Reserve standard resources.  */
 	reserve_std_resources();
 
+#if 0
 	/* Initialize the timers.  */
 	/* ??? There is some circumstantial evidence that this needs
 	   to be done now rather than later in time_init, which would
@@ -461,6 +464,7 @@
 	rtc_init_pit();
 #else
 	alpha_mv.init_pit();
+#endif
 #endif
 
 	/* 
diff -urN 2.3.46pre1/arch/alpha/kernel/signal.c 2.3.46pre1aa1/arch/alpha/kernel/signal.c
--- 2.3.46pre1/arch/alpha/kernel/signal.c	Wed Nov 24 18:22:03 1999
+++ 2.3.46pre1aa1/arch/alpha/kernel/signal.c	Wed Feb 16 00:28:25 2000
@@ -437,6 +437,8 @@
 		err |= __copy_to_user(frame->extramask, &set->sig[1], 
 				      sizeof(frame->extramask));
 	}
+	if (err)
+		goto give_sigsegv;
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
@@ -499,6 +501,8 @@
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, sw,
 				set->sig[0], oldsp);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		goto give_sigsegv;
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
diff -urN 2.3.46pre1/arch/alpha/kernel/smp.c 2.3.46pre1aa1/arch/alpha/kernel/smp.c
--- 2.3.46pre1/arch/alpha/kernel/smp.c	Fri Feb 11 00:05:32 2000
+++ 2.3.46pre1aa1/arch/alpha/kernel/smp.c	Wed Feb 16 00:28:24 2000
@@ -62,6 +62,7 @@
 static unsigned long smp_secondary_alive;
 
 unsigned long cpu_present_mask;	/* Which cpus ids came online.  */
+static unsigned long __cpu_present_mask __initdata = 0; /* cpu reported in the hwrpb */
 
 static int max_cpus = -1;	/* Command-line limitation.  */
 int smp_boot_cpuid;		/* Which processor we booted from.  */
@@ -506,7 +507,7 @@
 			if ((cpu->flags & 0x1cc) == 0x1cc) {
 				smp_num_probed++;
 				/* Assume here that "whami" == index */
-				cpu_present_mask |= (1L << i);
+				__cpu_present_mask |= (1L << i);
 				cpu->pal_revision = boot_cpu_palrev;
 			}
 
@@ -517,11 +518,12 @@
 		}
 	} else {
 		smp_num_probed = 1;
-		cpu_present_mask = (1L << smp_boot_cpuid);
+		__cpu_present_mask = (1L << smp_boot_cpuid);
 	}
+	cpu_present_mask = 1L << smp_boot_cpuid;
 
 	printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_mask = %lx\n",
-	       smp_num_probed, cpu_present_mask);
+	       smp_num_probed, __cpu_present_mask);
 }
 
 /*
@@ -565,12 +567,13 @@
 		if (i == smp_boot_cpuid)
 			continue;
 
-		if (((cpu_present_mask >> i) & 1) == 0)
+		if (((__cpu_present_mask >> i) & 1) == 0)
 			continue;
 
 		if (smp_boot_one_cpu(i, cpu_count))
 			continue;
 
+		cpu_present_mask |= 1L << i;
 		cpu_count++;
 	}
 
@@ -865,6 +868,22 @@
 	}
 
 	return 0;
+}
+
+static void
+ipi_imb(void)
+{
+	imb();
+}
+
+void
+smp_imb(void)
+{
+	/* Must wait other processors to flush their icache before continue. */
+	if (smp_call_function(ipi_imb, NULL, 1, 1))
+		printk(KERN_CRIT "smp_imb: timed out\n");
+
+	imb();
 }
 
 static void
diff -urN 2.3.46pre1/arch/alpha/kernel/sys_dp264.c 2.3.46pre1aa1/arch/alpha/kernel/sys_dp264.c
--- 2.3.46pre1/arch/alpha/kernel/sys_dp264.c	Wed Dec  8 00:05:25 1999
+++ 2.3.46pre1aa1/arch/alpha/kernel/sys_dp264.c	Wed Feb 16 00:28:24 2000
@@ -14,6 +14,7 @@
 #include <linux/sched.h>
 #include <linux/pci.h>
 #include <linux/init.h>
+#include <linux/irq.h>
 
 #include <asm/ptrace.h>
 #include <asm/system.h>
@@ -36,60 +37,158 @@
  * HACK ALERT! only the boot cpu is used for interrupts.
  */
 
+static void enable_tsunami_irq(unsigned int irq);
+static void disable_tsunami_irq(unsigned int irq);
+static void enable_clipper_irq(unsigned int irq);
+static void disable_clipper_irq(unsigned int irq);
+
+#define end_tsunami_irq		enable_tsunami_irq
+#define shutdown_tsunami_irq	disable_tsunami_irq
+#define mask_and_ack_tsunami_irq	disable_tsunami_irq
+
+#define end_clipper_irq		enable_clipper_irq
+#define shutdown_clipper_irq	disable_clipper_irq
+#define mask_and_ack_clipper_irq	disable_clipper_irq
+
+
+static unsigned int
+startup_tsunami_irq(unsigned int irq)
+{ 
+	enable_tsunami_irq(irq);
+	return 0; /* never anything pending */
+}
+
+static unsigned int
+startup_clipper_irq(unsigned int irq)
+{ 
+	enable_clipper_irq(irq);
+	return 0; /* never anything pending */
+}
+
+static struct hw_interrupt_type tsunami_irq_type = {
+	"TSUNAMI",
+	startup_tsunami_irq,
+	shutdown_tsunami_irq,
+	enable_tsunami_irq,
+	disable_tsunami_irq,
+	mask_and_ack_tsunami_irq,
+	end_tsunami_irq
+};
+
+static struct hw_interrupt_type clipper_irq_type = {
+	"CLIPPER",
+	startup_clipper_irq,
+	shutdown_clipper_irq,
+	enable_clipper_irq,
+	disable_clipper_irq,
+	mask_and_ack_clipper_irq,
+	end_clipper_irq
+};
+
+static unsigned long cached_irq_mask = ~0UL;
+
+#define TSUNAMI_SET_IRQ_MASK(cpu, value)	\
+do {						\
+	volatile unsigned long *csr;		\
+						\
+	csr = &TSUNAMI_cchip->dim##cpu##.csr;	\
+	*csr = (value);				\
+	mb();					\
+	*csr;					\
+} while(0)
+
+static inline void
+do_flush_irq_mask(unsigned long value)
+{
+	switch (TSUNAMI_bootcpu)
+	{
+	case 0:
+		TSUNAMI_SET_IRQ_MASK(0, value);
+		break;
+	case 1:
+		TSUNAMI_SET_IRQ_MASK(1, value);
+		break;
+	case 2:
+		TSUNAMI_SET_IRQ_MASK(2, value);
+		break;
+	case 3:
+		TSUNAMI_SET_IRQ_MASK(3, value);
+		break;
+	}
+}
+
+#ifdef CONFIG_SMP
+static inline void
+do_flush_smp_irq_mask(unsigned long value)
+{
+	extern unsigned long cpu_present_mask;
+	unsigned long other_cpus = cpu_present_mask & ~(1L << TSUNAMI_bootcpu);
+
+	if (other_cpus & 1)
+		TSUNAMI_SET_IRQ_MASK(0, value);
+	if (other_cpus & 2)
+		TSUNAMI_SET_IRQ_MASK(1, value);
+	if (other_cpus & 4)
+		TSUNAMI_SET_IRQ_MASK(2, value);
+	if (other_cpus & 8)
+		TSUNAMI_SET_IRQ_MASK(3, value);
+}
+#endif
+
 static void
-dp264_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p)
+dp264_flush_irq_mask(unsigned long mask)
 {
-	volatile unsigned long *csr;
+	unsigned long value;
 
-	if (TSUNAMI_bootcpu < 2) {
-		if (!TSUNAMI_bootcpu)
-			csr = &TSUNAMI_cchip->dim0.csr;
-		else
-			csr = &TSUNAMI_cchip->dim1.csr;
-	} else {
-		if (TSUNAMI_bootcpu == 2)
-			csr = &TSUNAMI_cchip->dim2.csr;
-		else
-			csr = &TSUNAMI_cchip->dim3.csr;
-	}
+#ifdef CONFIG_SMP
+	value = ~mask;
+	do_flush_smp_irq_mask(value);
+#endif
 
-	*csr = ~mask;
-	mb();
-	*csr;
-
-	if (irq < 16) {
-		if (irq >= 8)
-			outb(mask >> 8, 0xA1);	/* ISA PIC2 */
-		else
-			outb(mask, 0x21);	/* ISA PIC1 */
-	}
+	value = ~mask | (1UL << 55) | 0xffff; /* isa irqs always enabled */
+	do_flush_irq_mask(value);
 }
 
 static void
-clipper_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p)
+enable_tsunami_irq(unsigned int irq)
 {
-	if (irq >= 16) {
-		volatile unsigned long *csr;
+	cached_irq_mask &= ~(1UL << irq);
+	dp264_flush_irq_mask(cached_irq_mask);
+}
 
-		if (TSUNAMI_bootcpu < 2)
-			if (!TSUNAMI_bootcpu)
-				csr = &TSUNAMI_cchip->dim0.csr;
-			else
-				csr = &TSUNAMI_cchip->dim1.csr;
-		else
-			if (TSUNAMI_bootcpu == 2)
-				csr = &TSUNAMI_cchip->dim2.csr;
-			else
-				csr = &TSUNAMI_cchip->dim3.csr;
-		
-		*csr = (~mask >> 16) | (1UL << 55); /* master ISA enable */
-		mb();
-		*csr;
-	}
-	else if (irq >= 8)
-		outb(mask >> 8, 0xA1);	/* ISA PIC2 */
-	else
-		outb(mask, 0x21);	/* ISA PIC1 */
+static void
+disable_tsunami_irq(unsigned int irq)
+{
+	cached_irq_mask |= 1UL << irq;
+	dp264_flush_irq_mask(cached_irq_mask);
+}
+
+static void
+clipper_flush_irq_mask(unsigned long mask)
+{
+	unsigned long value;
+
+#ifdef CONFIG_SMP
+	value = ~mask >> 16;
+	do_flush_smp_irq_mask(value);
+#endif
+
+	value = (~mask >> 16) | (1UL << 55); /* master ISA enable */
+	do_flush_irq_mask(value);
+}
+
+static void
+enable_clipper_irq(unsigned int irq)
+{
+	cached_irq_mask &= ~(1UL << irq);
+	clipper_flush_irq_mask(cached_irq_mask);
+}
+
+static void
+disable_clipper_irq(unsigned int irq)
+{
+	cached_irq_mask |= 1UL << irq;
+	clipper_flush_irq_mask(cached_irq_mask);
 }
 
 static void
@@ -126,9 +225,9 @@
 static void 
 dp264_srm_device_interrupt(unsigned long vector, struct pt_regs * regs)
 {
-	int irq, ack;
+	int irq;
 
-	ack = irq = (vector - 0x800) >> 4;
+	irq = (vector - 0x800) >> 4;
 
 	/*
 	 * The SRM console reports PCI interrupts with a vector calculated by:
@@ -142,17 +241,17 @@
 	 * so we don't count them.
 	 */
 	if (irq >= 32)
-		ack = irq = irq - 16;
+		irq -= 16;
 
-	handle_irq(irq, ack, regs);
+	handle_irq(irq, regs);
 }
 
 static void 
 clipper_srm_device_interrupt(unsigned long vector, struct pt_regs * regs)
 {
-	int irq, ack;
+	int irq;
 
-	ack = irq = (vector - 0x800) >> 4;
+	irq = (vector - 0x800) >> 4;
 
 	/*
 	 * The SRM console reports PCI interrupts with a vector calculated by:
@@ -166,7 +265,22 @@
 	 *
 	 * Eg IRQ 24 is DRIR bit 8, etc, etc
 	 */
-	handle_irq(irq, ack, regs);
+	handle_irq(irq, regs);
+}
+
+static void __init
+init_TSUNAMI_irqs(struct hw_interrupt_type * ops)
+{
+	int i;
+
+	for (i = 0; i < NR_IRQS; i++) {
+		if (i == RTC_IRQ)
+			continue;
+		if (i < 16)
+			continue;
+		irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
+		irq_desc[i].handler = ops;
+	}
 }
 
 static void __init
@@ -180,10 +294,11 @@
 	if (alpha_using_srm)
 		alpha_mv.device_interrupt = dp264_srm_device_interrupt;
 
-	dp264_update_irq_hw(16, alpha_irq_mask, 0);
+	init_ISA_irqs();
+	init_RTC_irq();
+	init_TSUNAMI_irqs(&tsunami_irq_type);
 
-        enable_irq(55);     /* Enable ISA interrupt controller.  */
-	enable_irq(2);
+	dp264_flush_irq_mask(~0UL);
 }
 
 static void __init
@@ -197,10 +312,11 @@
 	if (alpha_using_srm)
 		alpha_mv.device_interrupt = clipper_srm_device_interrupt;
 
-	clipper_update_irq_hw(16, alpha_irq_mask, 0);
+	init_ISA_irqs();
+	init_RTC_irq();
+	init_TSUNAMI_irqs(&clipper_irq_type);
 
-        enable_irq(55);     /* Enable ISA interrupt controller.  */
-	enable_irq(2);
+	clipper_flush_irq_mask(~0UL);
 }
 
 
@@ -431,9 +547,6 @@
 	min_mem_address:	DEFAULT_MEM_BASE,
 
 	nr_irqs:		64,
-	irq_probe_mask:		TSUNAMI_PROBE_MASK,
-	update_irq_hw:		dp264_update_irq_hw,
-	ack_irq:		common_ack_irq,
 	device_interrupt:	dp264_device_interrupt,
 
 	init_arch:		tsunami_init_arch,
@@ -458,9 +571,6 @@
 	min_mem_address:	DEFAULT_MEM_BASE,
 
 	nr_irqs:		64,
-	irq_probe_mask:		TSUNAMI_PROBE_MASK,
-	update_irq_hw:		dp264_update_irq_hw,
-	ack_irq:		common_ack_irq,
 	device_interrupt:	dp264_device_interrupt,
 
 	init_arch:		tsunami_init_arch,
@@ -484,9 +594,6 @@
 	min_mem_address:	DEFAULT_MEM_BASE,
 
 	nr_irqs:		64,
-	irq_probe_mask:		TSUNAMI_PROBE_MASK,
-	update_irq_hw:		dp264_update_irq_hw,
-	ack_irq:		common_ack_irq,
 	device_interrupt:	dp264_device_interrupt,
 
 	init_arch:		tsunami_init_arch,
@@ -510,9 +617,6 @@
 	min_mem_address:	DEFAULT_MEM_BASE,
 
 	nr_irqs:		64,
-	irq_probe_mask:		TSUNAMI_PROBE_MASK,
-	update_irq_hw:		clipper_update_irq_hw,
-	ack_irq:		common_ack_irq,
 	device_interrupt:	dp264_device_interrupt,
 
 	init_arch:		tsunami_init_arch,
diff -urN 2.3.46pre1/arch/alpha/kernel/sys_sx164.c 2.3.46pre1aa1/arch/alpha/kernel/sys_sx164.c
--- 2.3.46pre1/arch/alpha/kernel/sys_sx164.c	Wed Dec  8 00:05:25 1999
+++ 2.3.46pre1aa1/arch/alpha/kernel/sys_sx164.c	Wed Feb 16 00:28:24 2000
@@ -14,6 +14,8 @@
 #include <linux/sched.h>
 #include <linux/pci.h>
 #include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
 
 #include <asm/ptrace.h>
 #include <asm/system.h>
@@ -26,47 +28,83 @@
 #include <asm/core_pyxis.h>
 
 #include "proto.h"
-#include <asm/hw_irq.h>
 #include "pci_impl.h"
 #include "machvec_impl.h"
 
 
+/* Note invert on MASK bits. */
+static unsigned long cached_irq_mask;
+
+static inline void
+sx164_change_irq_mask(unsigned long mask)
+{
+	*(vulp)PYXIS_INT_MASK = mask;
+	mb();
+	*(vulp)PYXIS_INT_MASK;
+}
+
+static inline void
+sx164_enable_irq(unsigned int irq)
+{
+	sx164_change_irq_mask(cached_irq_mask |= 1UL << (irq - 16));
+}
+
 static void
-sx164_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p)
+sx164_disable_irq(unsigned int irq)
 {
-	if (irq >= 16) {
-		/* Make CERTAIN none of the bogus ints get enabled */
-		*(vulp)PYXIS_INT_MASK =
-			~((long)mask >> 16) & ~0x000000000000003bUL;
-		mb();
-		/* ... and read it back to make sure it got written.  */
-		*(vulp)PYXIS_INT_MASK;
-	}
-	else if (irq >= 8)
-		outb(mask >> 8, 0xA1);	/* ISA PIC2 */
-	else
-		outb(mask, 0x21);	/* ISA PIC1 */
+	sx164_change_irq_mask(cached_irq_mask &= ~(1UL << (irq - 16)));
+}
+
+static unsigned int
+sx164_startup_irq(unsigned int irq)
+{
+	sx164_enable_irq(irq);
+	return 0;
+}
+
+static inline void
+sx164_srm_enable_irq(unsigned int irq)
+{
+	cserve_ena(irq - 16);
 }
 
 static void
-sx164_srm_update_irq_hw(unsigned long irq, unsigned long mask, int unmask_p)
+sx164_srm_disable_irq(unsigned int irq)
 {
-	if (irq >= 16) {
-		if (unmask_p)
-			cserve_ena(irq - 16);
-		else
-			cserve_dis(irq - 16);
-	}
-	else if (irq >= 8)
-		outb(mask >> 8, 0xA1);	/* ISA PIC2 */
-	else
-		outb(mask, 0x21);	/* ISA PIC1 */
+	cserve_dis(irq - 16);
 }
 
+static unsigned int
+sx164_srm_startup_irq(unsigned int irq)
+{
+	sx164_srm_enable_irq(irq);
+	return 0;
+}
+
+static struct hw_interrupt_type sx164_irq_type = {
+	typename:	"SX164",
+	startup:	sx164_startup_irq,
+	shutdown:	sx164_disable_irq,
+	enable:		sx164_enable_irq,
+	disable:	sx164_disable_irq,
+	ack:		sx164_disable_irq,
+	end:		sx164_enable_irq,
+};
+
+static struct hw_interrupt_type sx164_srm_irq_type = {
+	typename:	"SX164-SRM",
+	startup:	sx164_srm_startup_irq,
+	shutdown:	sx164_srm_disable_irq,
+	enable:		sx164_srm_enable_irq,
+	disable:	sx164_srm_disable_irq,
+	ack:		sx164_srm_disable_irq,
+	end:		sx164_srm_enable_irq,
+};
+
 static void 
 sx164_device_interrupt(unsigned long vector, struct pt_regs *regs)
 {
-	unsigned long pld, tmp;
+	unsigned long pld;
 	unsigned int i;
 
 	/* Read the interrupt summary register of PYXIS */
@@ -93,35 +131,48 @@
 			continue;
 		} else {
 			/* if not timer int */
-			handle_irq(16 + i, 16 + i, regs);
+			handle_irq(16 + i, regs);
 		}
-		*(vulp)PYXIS_INT_REQ = 1UL << i; mb();
-		tmp = *(vulp)PYXIS_INT_REQ;
+
+		*(vulp)PYXIS_INT_REQ = 1UL << i;
+		mb();
+		*(vulp)PYXIS_INT_REQ;
 	}
 }
 
 static void
 sx164_init_irq(void)
 {
+	struct hw_interrupt_type *ops;
+	long i;
+
 	outb(0, DMA1_RESET_REG);
 	outb(0, DMA2_RESET_REG);
 	outb(DMA_MODE_CASCADE, DMA2_MODE_REG);
 	outb(0, DMA2_MASK_REG);
 
+	init_ISA_irqs();
+	init_RTC_irq();
+
 	if (alpha_using_srm) {
-		alpha_mv.update_irq_hw = sx164_srm_update_irq_hw;
 		alpha_mv.device_interrupt = srm_device_interrupt;
+		ops = &sx164_srm_irq_type;
 	}
 	else {
-		/* Note invert on MASK bits. */
-		*(vulp)PYXIS_INT_MASK  = ~((long)alpha_irq_mask >> 16);
-		mb();
-		*(vulp)PYXIS_INT_MASK;
+		sx164_change_irq_mask(0);
+		ops = &sx164_irq_type;
+	}
+
+	for (i = 16; i < 40; ++i) {
+		/* Make CERTAIN none of the bogus ints get enabled.  */
+		if ((0x3b0000 >> i) & 1)
+			continue;
+		irq_desc[i].status = IRQ_DISABLED;
+		irq_desc[i].handler = ops;
 	}
 
-	enable_irq(16 + 6);	/* enable timer */
-	enable_irq(16 + 7);	/* enable ISA PIC cascade */
-	enable_irq(2);		/* enable cascade */
+	ops->startup(16 + 6);	/* enable timer */
+	ops->startup(16 + 7);	/* enable ISA PIC cascade */
 }
 
 /*
@@ -202,9 +253,6 @@
 	min_mem_address:	DEFAULT_MEM_BASE,
 
 	nr_irqs:		40,
-	irq_probe_mask:		_PROBE_MASK(40),
-	update_irq_hw:		sx164_update_irq_hw,
-	ack_irq:		common_ack_irq,
 	device_interrupt:	sx164_device_interrupt,
 
 	init_arch:		pyxis_init_arch,
diff -urN 2.3.46pre1/arch/alpha/kernel/time.c 2.3.46pre1aa1/arch/alpha/kernel/time.c
--- 2.3.46pre1/arch/alpha/kernel/time.c	Wed Dec  8 00:05:25 1999
+++ 2.3.46pre1aa1/arch/alpha/kernel/time.c	Wed Feb 16 00:28:24 2000
@@ -31,6 +31,8 @@
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -88,13 +90,7 @@
 	__u32 now;
 	long nticks;
 
-#ifdef __SMP__
-	/* When SMP, do this for *all* CPUs, but only do the rest for
-           the boot CPU.  */
-	smp_percpu_timer_interrupt(regs);
-	if (smp_processor_id() != smp_boot_cpuid)
-		return;
-#else
+#ifndef __SMP__
 	/* Not SMP, do kernel PC profiling here.  */
 	if (!user_mode(regs))
 		alpha_do_profile(regs->pc);
@@ -167,6 +163,7 @@
 	  )*60 + sec; /* finally seconds */
 }
 
+#if 0
 /*
  * Initialize Programmable Interval Timers with standard values.  Some
  * drivers depend on them being initialized (e.g., joystick driver).
@@ -213,6 +210,7 @@
 	sti();
 }
 #endif
+#endif
 
 void
 common_init_pit (void)
@@ -248,10 +246,15 @@
 void
 time_init(void)
 {
-	void (*irq_handler)(int, void *, struct pt_regs *);
 	unsigned int year, mon, day, hour, min, sec, cc1, cc2;
 	unsigned long cycle_freq, one_percent;
 	long diff;
+	static struct irqaction timer_irqaction  = { timer_interrupt,
+						     SA_INTERRUPT, 0, "timer",
+						     NULL, NULL};
+
+	/* Startup the timer source. */
+	alpha_mv.init_pit();
 
 	/*
 	 * The Linux interpretation of the CMOS clock register contents:
@@ -337,9 +340,7 @@
 	state.partial_tick = 0L;
 
 	/* setup timer */ 
-	irq_handler = timer_interrupt;
-	if (request_irq(TIMER_IRQ, irq_handler, 0, "timer", NULL))
-		panic("Could not allocate timer IRQ!");
+	setup_irq(TIMER_IRQ, &timer_irqaction);
 }
 
 /*
diff -urN 2.3.46pre1/arch/alpha/mm/fault.c 2.3.46pre1aa1/arch/alpha/mm/fault.c
--- 2.3.46pre1/arch/alpha/mm/fault.c	Wed Nov 24 18:22:03 1999
+++ 2.3.46pre1aa1/arch/alpha/mm/fault.c	Wed Feb 16 00:28:25 2000
@@ -130,13 +130,13 @@
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
+survive:
 	fault = handle_mm_fault(current, vma, address, cause > 0);
-	up(&mm->mmap_sem);
-
 	if (fault < 0)
 		goto out_of_memory;
 	if (fault == 0)
 		goto do_sigbus;
+	up(&mm->mmap_sem);
 
 	return;
 
@@ -177,13 +177,23 @@
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
-	printk(KERN_ALERT "VM: killing process %s(%d)\n",
-	       current->comm, current->pid);
-	if (!user_mode(regs))
-		goto no_context;
-	do_exit(SIGKILL);
+	if (current->pid == 1)
+	{
+		current->policy |= SCHED_YIELD;
+		schedule();
+		goto survive;
+	}
+	up(&mm->mmap_sem);
+	if (user_mode(regs))
+	{
+		printk(KERN_ALERT "VM: killing process %s(%d)\n",
+		       current->comm, current->pid);
+		do_exit(SIGKILL);
+	}
+	goto no_context;
 
 do_sigbus:
+	up(&mm->mmap_sem);
 	/*
 	 * Send a sigbus, regardless of whether we were in kernel
 	 * or user mode.
diff -urN 2.3.46pre1/arch/i386/kernel/irq.c 2.3.46pre1aa1/arch/i386/kernel/irq.c
--- 2.3.46pre1/arch/i386/kernel/irq.c	Fri Feb 11 00:05:32 2000
+++ 2.3.46pre1aa1/arch/i386/kernel/irq.c	Wed Feb 16 00:28:25 2000
@@ -679,8 +679,24 @@
 	unsigned long delay;
 	unsigned long val;
 
+	/* 
+	 * something may have generated an irq long ago and we want to
+	 * flush such a longstanding irq before considering it as spurious. 
+	 */
+	spin_lock_irq(&irq_controller_lock);
+	for (i = NR_IRQS-1; i > 0; i--) 
+		if (!irq_desc[i].action) 
+			irq_desc[i].handler->startup(i);
+	spin_unlock_irq(&irq_controller_lock);
+
+	/* Wait for longstanding interrupts to trigger. */
+	for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+		/* about 20ms delay */ synchronize_irq();
+
 	/*
-	 * first, enable any unassigned irqs
+	 * enable any unassigned irqs
+	 * (we must startup again here because if a longstanding irq
+	 * happened in the previous stage, it may have masked itself)
 	 */
 	spin_lock_irq(&irq_controller_lock);
 	for (i = NR_IRQS-1; i > 0; i--) {
diff -urN 2.3.46pre1/arch/i386/kernel/signal.c 2.3.46pre1aa1/arch/i386/kernel/signal.c
--- 2.3.46pre1/arch/i386/kernel/signal.c	Sun Jan 30 15:43:34 2000
+++ 2.3.46pre1aa1/arch/i386/kernel/signal.c	Wed Feb 16 00:28:25 2000
@@ -419,13 +419,19 @@
 		           ? current->exec_domain->signal_invmap[sig]
 		           : sig),
 		          &frame->sig);
+	if (err)
+		goto give_sigsegv;
 
 	err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
+	if (err)
+		goto give_sigsegv;
 
 	if (_NSIG_WORDS > 1) {
 		err |= __copy_to_user(frame->extramask, &set->sig[1],
 				      sizeof(frame->extramask));
 	}
+	if (err)
+		goto give_sigsegv;
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
@@ -486,6 +492,8 @@
 	err |= __put_user(&frame->info, &frame->pinfo);
 	err |= __put_user(&frame->uc, &frame->puc);
 	err |= __copy_to_user(&frame->info, info, sizeof(*info));
+	if (err)
+		goto give_sigsegv;
 
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
@@ -497,6 +505,8 @@
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
 			        regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		goto give_sigsegv;
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
diff -urN 2.3.46pre1/arch/i386/mm/fault.c 2.3.46pre1aa1/arch/i386/mm/fault.c
--- 2.3.46pre1/arch/i386/mm/fault.c	Sun Jan 30 15:43:27 2000
+++ 2.3.46pre1aa1/arch/i386/mm/fault.c	Wed Feb 16 00:28:25 2000
@@ -32,6 +32,7 @@
 {
 	struct vm_area_struct * vma;
 	unsigned long start = (unsigned long) addr;
+	int fault;
 
 	if (!size)
 		return 1;
@@ -51,8 +52,12 @@
 	start &= PAGE_MASK;
 
 	for (;;) {
-		if (handle_mm_fault(current, vma, start, 1) <= 0)
-			goto bad_area;
+survive:
+		fault =  handle_mm_fault(current, vma, start, 1);
+		if (!fault)
+			goto do_sigbus;
+		if (fault < 0)
+			goto out_of_memory;
 		if (!size)
 			break;
 		size--;
@@ -75,6 +80,19 @@
 
 bad_area:
 	return 0;
+
+do_sigbus:
+	force_sig(SIGBUS, current);
+	goto bad_area;
+
+out_of_memory:
+	if (current->pid == 1)
+	{
+		current->policy |= SCHED_YIELD;
+		schedule();
+		goto survive;
+	}
+	goto bad_area;
 }
 
 static void __init handle_wp_test (void)
@@ -192,6 +210,7 @@
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
+survive:
 	{
 		int fault = handle_mm_fault(tsk, vma, address, write);
 		if (fault < 0)
@@ -288,10 +307,39 @@
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
+	if (tsk->pid == 1)
+	{
+		tsk->policy |= SCHED_YIELD;
+		schedule();
+		goto survive;
+	}
 	up(&mm->mmap_sem);
-	printk("VM: killing process %s\n", tsk->comm);
 	if (error_code & 4)
-		do_exit(SIGKILL);
+	{
+		if (tsk->oom_kill_try++ > 10 ||
+		    !((regs->eflags >> 12) & 3))
+		{
+			printk(KERN_ALERT "VM: killing process %s\n",
+			       tsk->comm);
+			do_exit(SIGKILL);
+		}
+		else
+		{
+			/*
+			 * The task is running with privilegies and so we
+			 * trust it and we give it a chance to die gracefully.
+			 */
+			printk(KERN_ALERT "VM: terminating process %s\n",
+			       tsk->comm);
+			force_sig(SIGTERM, current);
+			if (tsk->oom_kill_try > 1)
+			{
+				tsk->policy |= SCHED_YIELD;
+				schedule();
+			}
+			return;
+		}
+	}
 	goto no_context;
 
 do_sigbus:
diff -urN 2.3.46pre1/arch/m68k/atari/stram.c 2.3.46pre1aa1/arch/m68k/atari/stram.c
--- 2.3.46pre1/arch/m68k/atari/stram.c	Sun Jan 30 15:43:34 2000
+++ 2.3.46pre1aa1/arch/m68k/atari/stram.c	Wed Feb 16 00:28:24 2000
@@ -1168,7 +1168,7 @@
 {
 	unsigned long start, len;
 
-	while( CURRENT ) {
+	while( !QUEUE_EMPTY ) {
 		if (MAJOR(CURRENT->rq_dev) != MAJOR_NR)
 			panic("stram: request list destroyed");
 		if (CURRENT->bh) {
diff -urN 2.3.46pre1/drivers/acorn/block/fd1772.c 2.3.46pre1aa1/drivers/acorn/block/fd1772.c
--- 2.3.46pre1/drivers/acorn/block/fd1772.c	Thu Jan 13 05:17:19 2000
+++ 2.3.46pre1aa1/drivers/acorn/block/fd1772.c	Wed Feb 16 00:28:24 2000
@@ -591,7 +591,7 @@
 {
 	printk("FDC1772: fd_error\n");
 	/*panic("fd1772: fd_error"); *//* DAG tmp */
-	if (!CURRENT)
+	if (QUEUE_EMPTY)
 		return;
 	CURRENT->errors++;
 	if (CURRENT->errors >= MAX_ERRORS) {
@@ -1230,14 +1230,14 @@
 
 	DPRINT(("redo_fd_request: CURRENT=%08lx CURRENT->rq_dev=%04x CURRENT->sector=%ld\n",
 		(unsigned long) CURRENT, CURRENT ? CURRENT->rq_dev : 0,
-		CURRENT ? CURRENT->sector : 0));
+		!QUEUE_EMPTY ? CURRENT->sector : 0));
 
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE)
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE)
 		goto the_end;
 
       repeat:
 
-	if (!CURRENT)
+	if (QUEUE_EMPTY)
 		goto the_end;
 
 	if (MAJOR(CURRENT->rq_dev) != MAJOR_NR)
diff -urN 2.3.46pre1/drivers/acorn/block/mfmhd.c 2.3.46pre1aa1/drivers/acorn/block/mfmhd.c
--- 2.3.46pre1/drivers/acorn/block/mfmhd.c	Sun Jan 30 15:43:27 2000
+++ 2.3.46pre1aa1/drivers/acorn/block/mfmhd.c	Wed Feb 16 00:28:24 2000
@@ -758,7 +758,7 @@
 		/* No - its the end of the line */
 		/* end_request's should have happened at the end of sector DMAs */
 		/* Turns Drive LEDs off - may slow it down? */
-		if (!CURRENT)
+		if (QUEUE_EMPTY)
 			issue_command(CMD_CKV, block, 2);
 
 		Busy = 0;
@@ -891,7 +891,7 @@
 {
 	DBG("mfm_request CURRENT=%p Busy=%d\n", CURRENT, Busy);
 
-	if (!CURRENT) {
+	if (QUEUE_EMPTY) {
 		DBG("mfm_request: Exited due to NULL Current 1\n");
 		return;
 	}
@@ -918,7 +918,7 @@
 
 		DBG("mfm_request: before INIT_REQUEST\n");
 
-		if (!CURRENT) {
+		if (QUEUE_EMPTY) {
 			printk("mfm_request: Exiting due to !CURRENT (pre)\n");
 			CLEAR_INTR;
 			Busy = 0;
diff -urN 2.3.46pre1/drivers/block/Config.in 2.3.46pre1aa1/drivers/block/Config.in
--- 2.3.46pre1/drivers/block/Config.in	Fri Feb 11 00:05:33 2000
+++ 2.3.46pre1aa1/drivers/block/Config.in	Wed Feb 16 00:28:24 2000
@@ -198,6 +198,10 @@
 
 comment 'Additional Block Devices'
 
+tristate 'Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM N
+if [ "$CONFIG_BLK_DEV_LVM" != "n" ]; then
+  bool '   LVM information in proc filesystem' CONFIG_LVM_PROC_FS Y
+fi
 tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
 if [ "$CONFIG_NET" = "y" ]; then
    tristate 'Network block device support' CONFIG_BLK_DEV_NBD
diff -urN 2.3.46pre1/drivers/block/DAC960.c 2.3.46pre1aa1/drivers/block/DAC960.c
--- 2.3.46pre1/drivers/block/DAC960.c	Sun Jan 30 15:43:37 2000
+++ 2.3.46pre1aa1/drivers/block/DAC960.c	Wed Feb 16 00:28:24 2000
@@ -1010,16 +1010,19 @@
 
 
 static int DAC_merge_fn(request_queue_t *q, struct request *req, 
-			struct buffer_head *bh) 
+			struct buffer_head *bh, int __max_segments)
 {
 	int max_segments;
 	DAC960_Controller_T * Controller = q->queuedata;
 
 	max_segments = Controller->MaxSegmentsPerRequest[MINOR(req->rq_dev)];
+	if (__max_segments < max_segments)
+		max_segments = __max_segments;
 
 	if (req->bhtail->b_data + req->bhtail->b_size != bh->b_data) {
 		if (req->nr_segments < max_segments) {
 			req->nr_segments++;
+			q->nr_segments++;
 			return 1;
 		}
 		return 0;
@@ -1030,16 +1033,22 @@
 
 static int DAC_merge_requests_fn(request_queue_t *q,
 				 struct request *req,
-				 struct request *next)
+				 struct request *next,
+				 int __max_segments)
 {
 	int max_segments;
 	DAC960_Controller_T * Controller = q->queuedata;
 	int total_segments = req->nr_segments + next->nr_segments;
 
 	max_segments = Controller->MaxSegmentsPerRequest[MINOR(req->rq_dev)];
+	if (__max_segments < max_segments)
+		max_segments = __max_segments;
 
 	if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+	{
 		total_segments--;
+		q->nr_segments--;
+	}
     
 	if (total_segments > max_segments)
 		return 0;
@@ -1156,7 +1165,6 @@
   blk_size[MajorNumber] = NULL;
   blksize_size[MajorNumber] = NULL;
   max_sectors[MajorNumber] = NULL;
-  max_segments[MajorNumber] = NULL;
   /*
     Remove the Generic Disk Information structure from the list.
   */
@@ -1305,15 +1313,17 @@
 static boolean DAC960_ProcessRequest(DAC960_Controller_T *Controller,
 				     boolean WaitForCommand)
 {
-  IO_Request_T **RequestQueuePointer =
-    &blk_dev[DAC960_MAJOR + Controller->ControllerNumber].request_queue.current_request;
+  struct list_head * queue_head;
   IO_Request_T *Request;
   DAC960_Command_T *Command;
   char *RequestBuffer;
+
+  queue_head = &blk_dev[DAC960_MAJOR + Controller->ControllerNumber].request_queue.queue_head;
   while (true)
     {
-      Request = *RequestQueuePointer;
-      if (Request == NULL || Request->rq_status == RQ_INACTIVE) return false;
+      if (list_empty(queue_head)) return false;
+      Request = blkdev_entry_next_request(queue_head);
+      if (Request->rq_status == RQ_INACTIVE) return false;
       Command = DAC960_AllocateCommand(Controller);
       if (Command != NULL) break;
       if (!WaitForCommand) return false;
@@ -1335,7 +1345,7 @@
   Command->BufferHeader = Request->bh;
   RequestBuffer = Request->buffer;
   Request->rq_status = RQ_INACTIVE;
-  *RequestQueuePointer = Request->next;
+  blkdev_dequeue_request(Request);
   wake_up(&wait_for_request);
   if (Command->SegmentCount == 1)
     {
diff -urN 2.3.46pre1/drivers/block/Makefile 2.3.46pre1aa1/drivers/block/Makefile
--- 2.3.46pre1/drivers/block/Makefile	Fri Feb 11 00:05:33 2000
+++ 2.3.46pre1aa1/drivers/block/Makefile	Wed Feb 16 00:28:24 2000
@@ -326,6 +326,14 @@
   endif
 endif
 
+ifeq ($(CONFIG_BLK_DEV_LVM),y)
+L_OBJS += lvm.o lvm-snap.o
+else
+   ifeq ($(CONFIG_BLK_DEV_LVM),m)
+   M_OBJS += lvm-mod.o
+   endif
+endif
+
 ifeq ($(CONFIG_BLK_DEV_MD),y)
 LX_OBJS += md.o
 
@@ -407,3 +415,6 @@
 
 ide-probe-mod.o: ide-probe.o ide-geometry.o
 	$(LD) $(LD_RFLAG) -r -o $@ ide-probe.o ide-geometry.o
+
+lvm-mod.o: lvm.o lvm-snap.o
+	$(LD) -r -o $@ lvm.o lvm-snap.o
diff -urN 2.3.46pre1/drivers/block/README.lvm 2.3.46pre1aa1/drivers/block/README.lvm
--- 2.3.46pre1/drivers/block/README.lvm	Thu Jan  1 01:00:00 1970
+++ 2.3.46pre1aa1/drivers/block/README.lvm	Wed Feb 16 00:28:24 2000
@@ -0,0 +1,8 @@
+
+This is the Logical Volume Manager driver for Linux,
+
+Tools, library that manage logical volumes can be found
+at <http://linux.msede.com/lvm>.
+
+There you can obtain actual driver versions too.
+
diff -urN 2.3.46pre1/drivers/block/acsi.c 2.3.46pre1aa1/drivers/block/acsi.c
--- 2.3.46pre1/drivers/block/acsi.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/block/acsi.c	Wed Feb 16 00:28:24 2000
@@ -769,7 +769,7 @@
 static void bad_rw_intr( void )
 
 {
-	if (!CURRENT)
+	if (QUEUE_EMPTY)
 		return;
 
 	if (++CURRENT->errors >= MAX_ERRORS)
@@ -843,7 +843,7 @@
 
 	DEVICE_INTR = NULL;
 	printk( KERN_ERR "ACSI timeout\n" );
-	if (!CURRENT) return;
+	if (QUEUE_EMPTY) return;
 	if (++CURRENT->errors >= MAX_ERRORS) {
 #ifdef DEBUG
 		printk( KERN_ERR "ACSI: too many errors.\n" );
@@ -953,7 +953,7 @@
 	unsigned long		pbuffer;
 	struct buffer_head	*bh;
 	
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE) {
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE) {
 		if (!DEVICE_INTR) {
 			ENABLE_IRQ();
 			stdma_release();
@@ -969,7 +969,7 @@
 	/* Another check here: An interrupt or timer event could have
 	 * happened since the last check!
 	 */
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE) {
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE) {
 		if (!DEVICE_INTR) {
 			ENABLE_IRQ();
 			stdma_release();
@@ -979,7 +979,7 @@
 	if (DEVICE_INTR)
 		return;
 
-	if (!CURRENT) {
+	if (QUEUE_EMPTY) {
 		CLEAR_INTR;
 		ENABLE_IRQ();
 		stdma_release();
diff -urN 2.3.46pre1/drivers/block/amiflop.c 2.3.46pre1aa1/drivers/block/amiflop.c
--- 2.3.46pre1/drivers/block/amiflop.c	Sun Jan 30 15:43:37 2000
+++ 2.3.46pre1aa1/drivers/block/amiflop.c	Wed Feb 16 00:28:24 2000
@@ -1385,12 +1385,12 @@
 	char *data;
 	unsigned long flags;
 
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE){
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE){
 		return;
 	}
 
  repeat:
-	if (!CURRENT) {
+	if (QUEUE_EMPTY) {
 		/* Nothing left to do */
 		return;
 	}
diff -urN 2.3.46pre1/drivers/block/ataflop.c 2.3.46pre1aa1/drivers/block/ataflop.c
--- 2.3.46pre1/drivers/block/ataflop.c	Thu Jan 13 05:17:19 2000
+++ 2.3.46pre1aa1/drivers/block/ataflop.c	Wed Feb 16 00:28:24 2000
@@ -624,7 +624,7 @@
 		return;
 	}
 		
-	if (!CURRENT) return;
+	if (QUEUE_EMPTY) return;
 	CURRENT->errors++;
 	if (CURRENT->errors >= MAX_ERRORS) {
 		printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive );
@@ -1450,18 +1450,18 @@
 	int device, drive, type;
   
 	DPRINT(("redo_fd_request: CURRENT=%08lx CURRENT->dev=%04x CURRENT->sector=%ld\n",
-		(unsigned long)CURRENT, CURRENT ? CURRENT->rq_dev : 0,
-		CURRENT ? CURRENT->sector : 0 ));
+		(unsigned long)CURRENT, !QUEUE_EMPTY ? CURRENT->rq_dev : 0,
+		!QUEUE_EMPTY ? CURRENT->sector : 0 ));
 
 	IsFormatting = 0;
 
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE){
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE){
 		return;
 	}
 
 repeat:
     
-	if (!CURRENT)
+	if (QUEUE_EMPTY)
 		goto the_end;
 
 	if (MAJOR(CURRENT->rq_dev) != MAJOR_NR)
diff -urN 2.3.46pre1/drivers/block/cpqarray.c 2.3.46pre1aa1/drivers/block/cpqarray.c
--- 2.3.46pre1/drivers/block/cpqarray.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/block/cpqarray.c	Wed Feb 16 00:28:24 2000
@@ -880,14 +880,16 @@
 	cmdlist_t *c;
 	int seg, sect;
 	char *lastdataend;
-	request_queue_t * q;
+	struct list_head * queue_head;
 	struct buffer_head *bh;
 	struct request *creq;
 
-	q = &blk_dev[MAJOR_NR+ctlr].request_queue;
+	queue_head = &blk_dev[MAJOR_NR+ctlr].request_queue.queue_head;
 
-	creq = q->current_request;
-	if (creq == NULL || creq->rq_status == RQ_INACTIVE)
+	if (list_empty(queue_head))
+		goto doreq_done;
+	creq = blkdev_entry_next_request(queue_head);
+	if (creq->rq_status == RQ_INACTIVE)
 		goto doreq_done;
 
 	if (ctlr != MAJOR(creq->rq_dev)-MAJOR_NR ||
@@ -961,10 +963,9 @@
 		bh->b_reqnext = NULL;
 DBGPX(		printk("More to do on same request %p\n", creq); );
 	} else {
-DBGPX(		printk("Done with %p, queueing %p\n", creq, creq->next); );
-		creq->rq_status = RQ_INACTIVE;
-		q->current_request = creq->next;
-		wake_up(&wait_for_request);
+DBGPX(		printk("Done with %p\n", creq); );
+		blkdev_dequeue_request(creq);
+		end_that_request_last(creq);
 	}
 
 	c->req.hdr.cmd = (creq->cmd == READ) ? IDA_READ : IDA_WRITE;
diff -urN 2.3.46pre1/drivers/block/floppy.c 2.3.46pre1aa1/drivers/block/floppy.c
--- 2.3.46pre1/drivers/block/floppy.c	Fri Feb 11 00:05:33 2000
+++ 2.3.46pre1aa1/drivers/block/floppy.c	Wed Feb 16 00:28:24 2000
@@ -2274,7 +2274,7 @@
 	probing = 0;
 	reschedule_timeout(MAXTIMEOUT, "request done %d", uptodate);
 
-	if (!CURRENT){
+	if (QUEUE_EMPTY){
 		DPRINT("request list destroyed in floppy request done\n");
 		return;
 	}
@@ -2288,14 +2288,14 @@
 			DRS->maxtrack = 1;
 
 		/* unlock chained buffers */
-		while (current_count_sectors && CURRENT &&
+		while (current_count_sectors && !QUEUE_EMPTY &&
 		       current_count_sectors >= CURRENT->current_nr_sectors){
 			current_count_sectors -= CURRENT->current_nr_sectors;
 			CURRENT->nr_sectors -= CURRENT->current_nr_sectors;
 			CURRENT->sector += CURRENT->current_nr_sectors;
 			end_request(1);
 		}
-		if (current_count_sectors && CURRENT){
+		if (current_count_sectors && !QUEUE_EMPTY){
 			/* "unlock" last subsector */
 			CURRENT->buffer += current_count_sectors <<9;
 			CURRENT->current_nr_sectors -= current_count_sectors;
@@ -2304,7 +2304,7 @@
 			return;
 		}
 
-		if (current_count_sectors && !CURRENT)
+		if (current_count_sectors && QUEUE_EMPTY)
 			DPRINT("request list destroyed in floppy request done\n");
 
 	} else {
@@ -2867,14 +2867,14 @@
 	if (current_drive < N_DRIVE)
 		floppy_off(current_drive);
 
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE){
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE){
 		CLEAR_INTR;
 		unlock_fdc();
 		return;
 	}
 
 	while(1){
-		if (!CURRENT) {
+		if (QUEUE_EMPTY) {
 			CLEAR_INTR;
 			unlock_fdc();
 			return;
diff -urN 2.3.46pre1/drivers/block/hd.c 2.3.46pre1aa1/drivers/block/hd.c
--- 2.3.46pre1/drivers/block/hd.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/block/hd.c	Wed Feb 16 00:28:24 2000
@@ -145,7 +145,7 @@
 	unsigned long flags;
 	char devc;
 
-	devc = CURRENT ? 'a' + DEVICE_NR(CURRENT->rq_dev) : '?';
+	devc = !QUEUE_EMPTY ? 'a' + DEVICE_NR(CURRENT->rq_dev) : '?';
 	save_flags (flags);
 	sti();
 #ifdef VERBOSE_ERRORS
@@ -174,7 +174,7 @@
 		if (hd_error & (BBD_ERR|ECC_ERR|ID_ERR|MARK_ERR)) {
 			printk(", CHS=%d/%d/%d", (inb(HD_HCYL)<<8) + inb(HD_LCYL),
 				inb(HD_CURRENT) & 0xf, inb(HD_SECTOR));
-			if (CURRENT)
+			if (!QUEUE_EMPTY)
 				printk(", sector=%ld", CURRENT->sector);
 		}
 		printk("\n");
@@ -351,7 +351,7 @@
 {
 	int dev;
 
-	if (!CURRENT)
+	if (QUEUE_EMPTY)
 		return;
 	dev = DEVICE_NR(CURRENT->rq_dev);
 	if (++CURRENT->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) {
@@ -414,7 +414,7 @@
 #if (HD_DELAY > 0)
 	last_req = read_timer();
 #endif
-	if (CURRENT)
+	if (!QUEUE_EMPTY)
 		hd_request();
 	return;
 }
@@ -475,7 +475,7 @@
 	unsigned int dev;
 
 	DEVICE_INTR = NULL;
-	if (!CURRENT)
+	if (QUEUE_EMPTY)
 		return;
 	disable_irq(HD_IRQ);
 	sti();
@@ -522,7 +522,7 @@
 {
 	unsigned int dev, block, nsect, sec, track, head, cyl;
 
-	if (CURRENT && CURRENT->rq_status == RQ_INACTIVE) return;
+	if (!QUEUE_EMPTY && CURRENT->rq_status == RQ_INACTIVE) return;
 	if (DEVICE_INTR)
 		return;
 repeat:
diff -urN 2.3.46pre1/drivers/block/ide.c 2.3.46pre1aa1/drivers/block/ide.c
--- 2.3.46pre1/drivers/block/ide.c	Fri Feb 11 00:05:33 2000
+++ 2.3.46pre1aa1/drivers/block/ide.c	Wed Feb 16 00:28:25 2000
@@ -501,8 +501,7 @@
 
 	if (!end_that_request_first(rq, uptodate, hwgroup->drive->name)) {
 		add_blkdev_randomness(MAJOR(rq->rq_dev));
-		hwgroup->drive->queue.current_request = rq->next;
-		blk_dev[MAJOR(rq->rq_dev)].request_queue.current_request = NULL;
+		blkdev_dequeue_request(rq);
         	hwgroup->rq = NULL;
 		end_that_request_last(rq);
 	}
@@ -772,8 +771,7 @@
 		}
 	}
 	spin_lock_irqsave(&io_request_lock, flags);
-	drive->queue.current_request = rq->next;
-	blk_dev[MAJOR(rq->rq_dev)].request_queue.current_request = NULL;
+	blkdev_dequeue_request(rq);
 	HWGROUP(drive)->rq = NULL;
 	rq->rq_status = RQ_INACTIVE;
 	spin_unlock_irqrestore(&io_request_lock, flags);
@@ -1076,7 +1074,7 @@
 {
 	ide_startstop_t startstop;
 	unsigned long block, blockend;
-	struct request *rq = drive->queue.current_request;
+	struct request *rq = blkdev_entry_next_request(&drive->queue.queue_head);
 	unsigned int minor = MINOR(rq->rq_dev), unit = minor >> PARTN_BITS;
 	ide_hwif_t *hwif = HWIF(drive);
 
@@ -1159,7 +1157,7 @@
 	best = NULL;
 	drive = hwgroup->drive;
 	do {
-		if (drive->queue.current_request && (!drive->sleep || 0 <= (signed long)(jiffies - drive->sleep))) {
+		if (!list_empty(&drive->queue.queue_head) && (!drive->sleep || 0 <= (signed long)(jiffies - drive->sleep))) {
 			if (!best
 			 || (drive->sleep && (!best->sleep || 0 < (signed long)(best->sleep - drive->sleep)))
 			 || (!best->sleep && 0 < (signed long)(WAKEUP(best) - WAKEUP(drive))))
@@ -1247,8 +1245,6 @@
 			drive = hwgroup->drive;
 			do {
 				bdev = &blk_dev[HWIF(drive)->major];
-				if( !bdev->request_queue.plugged )
-					bdev->request_queue.current_request = NULL;		/* (broken since patch-2.1.15) */
 				if (drive->sleep && (!sleep || 0 < (signed long)(sleep - drive->sleep)))
 					sleep = drive->sleep;
 			} while ((drive = drive->next) != hwgroup->drive);
@@ -1288,7 +1284,7 @@
 		bdev = &blk_dev[hwif->major];
 		if ( bdev->request_queue.plugged )	/* FIXME: paranoia */
 			printk("%s: Huh? nuking plugged queue\n", drive->name);
-		bdev->request_queue.current_request = hwgroup->rq = drive->queue.current_request;
+		hwgroup->rq = blkdev_entry_next_request(&drive->queue.queue_head);
 		/*
 		 * Some systems have trouble with IDE IRQs arriving while
 		 * the driver is still setting things up.  So, here we disable
@@ -1670,7 +1666,7 @@
 	rq->sem = NULL;
 	rq->bh = NULL;
 	rq->bhtail = NULL;
-	rq->next = NULL;
+	rq->q = NULL;
 }
 
 /*
@@ -1703,7 +1699,7 @@
 	unsigned long flags;
 	ide_hwgroup_t *hwgroup = HWGROUP(drive);
 	unsigned int major = HWIF(drive)->major;
-	struct request *cur_rq;
+	struct list_head * queue_head;
 	DECLARE_MUTEX_LOCKED(sem);
 
 #ifdef CONFIG_BLK_DEV_PDC4030
@@ -1716,20 +1712,17 @@
 	if (action == ide_wait)
 		rq->sem = &sem;
 	spin_lock_irqsave(&io_request_lock, flags);
-	cur_rq = drive->queue.current_request;
-	if (cur_rq == NULL || action == ide_preempt) {
-		rq->next = cur_rq;
-		drive->queue.current_request = rq;
+	queue_head = &drive->queue.queue_head;
+	if (list_empty(queue_head) || action == ide_preempt) {
 		if (action == ide_preempt)
 			hwgroup->rq = NULL;
 	} else {
 		if (action == ide_wait || action == ide_end) {
-			while (cur_rq->next != NULL)	/* find end of list */
-				cur_rq = cur_rq->next;
-		}
-		rq->next = cur_rq->next;
-		cur_rq->next = rq;
+			queue_head = queue_head->prev;
+		} else
+			queue_head = queue_head->next;
 	}
+	list_add(&rq->queue, queue_head);
 	ide_do_request(hwgroup, 0);
 	spin_unlock_irqrestore(&io_request_lock, flags);
 	if (action == ide_wait) {
diff -urN 2.3.46pre1/drivers/block/ll_rw_blk.c 2.3.46pre1aa1/drivers/block/ll_rw_blk.c
--- 2.3.46pre1/drivers/block/ll_rw_blk.c	Fri Feb 11 00:05:33 2000
+++ 2.3.46pre1aa1/drivers/block/ll_rw_blk.c	Wed Feb 16 00:28:25 2000
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 1991, 1992 Linus Torvalds
  * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  */
 
 /*
@@ -27,6 +28,8 @@
 
 #include <linux/module.h>
 
+#define DEBUG_ELEVATOR
+
 /*
  * MAC Floppy IWM hooks
  */
@@ -147,6 +150,18 @@
 	return ret;
 }
 
+static inline int get_request_latency(elevator_t * elevator, int rw)
+{
+	int latency;
+
+	if (rw != READ)
+		latency = elevator->write_latency;
+	else
+		latency = elevator->read_latency;
+
+	return latency;
+}
+
 void blk_cleanup_queue(request_queue_t * q)
 {
 	memset(q, 0, sizeof(*q));
@@ -168,11 +183,12 @@
 }
 
 static int ll_merge_fn(request_queue_t *q, struct request *req, 
-		       struct buffer_head *bh) 
+		       struct buffer_head *bh, int max_segments) 
 {
 	if (req->bhtail->b_data + req->bhtail->b_size != bh->b_data) {
-		if (req->nr_segments < MAX_SEGMENTS) {
+		if (req->nr_segments < max_segments) {
 			req->nr_segments++;
+			q->nr_segments++;
 			return 1;
 		}
 		return 0;
@@ -181,14 +197,17 @@
 }
 
 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
-				struct request *next)
+				struct request *next, int max_segments)
 {
 	int total_segments = req->nr_segments + next->nr_segments;
 
 	if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
+	{
 		total_segments--;
+		q->nr_segments--;
+	}
     
-	if (total_segments > MAX_SEGMENTS)
+	if (total_segments > max_segments)
 		return 0;
 
 	req->nr_segments = total_segments;
@@ -197,8 +216,9 @@
 
 void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
 {
+	INIT_LIST_HEAD(&q->queue_head);
+	q->elevator = ELEVATOR_DEFAULTS;
 	q->request_fn     	= rfn;
-	q->current_request	= NULL;
 	q->merge_fn       	= ll_merge_fn;
 	q->merge_requests_fn	= ll_merge_requests_fn;
 	q->make_request_fn	= NULL;
@@ -230,13 +250,15 @@
 		spin_unlock_irq(&io_request_lock);
 		BUG();
 	}
-	if (q->current_request)
+	if (!list_empty(&q->queue_head))
 		return;
 
 	q->plugged = 1;
 	queue_task(&q->plug_tq, &tq_disk);
 }
 
+void plug_device_noop(request_queue_t *q, kdev_t dev) { }
+
 /*
  * remove the plug and let it rip..
  */
@@ -248,7 +270,7 @@
 	spin_lock_irqsave(&io_request_lock,flags);
 	if (q->plugged) {
 		q->plugged = 0;
-		if (q->current_request)
+		if (!list_empty(&q->queue_head))
 			(q->request_fn)(q);
 	}
 	spin_unlock_irqrestore(&io_request_lock,flags);
@@ -298,7 +320,8 @@
 
 	add_wait_queue(&wait_for_request, &wait);
 	for (;;) {
-		current->state = TASK_UNINTERRUPTIBLE;
+		/* FIFO wake-one wakeup will make starvtion even better */
+		__set_current_state(TASK_UNINTERRUPTIBLE|TASK_EXCLUSIVE);
 		spin_lock_irqsave(&io_request_lock,flags);
 		req = get_request(n, dev);
 		spin_unlock_irqrestore(&io_request_lock,flags);
@@ -388,6 +411,109 @@
 		printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
 }
 
+/* elevator */
+
+static inline struct list_head * seek_to_not_starving_chunk(request_queue_t * q,
+							    int * lat, int * starving)
+{
+	int sequence = q->elevator.sequence;
+	struct list_head * entry = q->queue_head.prev;
+	int pos = 0;
+
+	do {
+		struct request * req = blkdev_entry_to_request(entry);
+		if (time_before_eq(req->elevator_sequence, sequence))
+		{
+			*lat -= q->nr_segments - pos;
+			*starving = 1;
+			return entry;
+		}
+		pos += req->nr_segments;
+	} while ((entry = entry->prev) != &q->queue_head);
+
+	*starving = 0;
+
+	return entry->next;
+}
+
+static inline void elevator_merge_requests(elevator_t * e, struct request * req, struct request * next)
+{
+	if (time_before(next->elevator_sequence, req->elevator_sequence))
+		req->elevator_sequence = next->elevator_sequence;
+	if (req->cmd == READ)
+		e->read_pendings--;
+
+}
+
+static inline int elevator_sequence(elevator_t * e, int latency)
+{
+	return latency + e->sequence;
+}
+
+#define elevator_merge_before(q, req, lat)	__elevator_merge((q), (req), (lat), 0)
+#define elevator_merge_after(q, req, lat)	__elevator_merge((q), (req), (lat), 1)
+static inline void __elevator_merge(request_queue_t * q, struct request * req, int latency, int after)
+{
+	int sequence = elevator_sequence(&q->elevator, latency);
+	if (after)
+		sequence -= req->nr_segments;
+	if (time_before(sequence, req->elevator_sequence))
+		req->elevator_sequence = sequence;
+}
+
+static inline void elevator_queue(request_queue_t * q,
+				  struct request * req,
+				  struct list_head * entry,
+				  int latency, int starving)
+{
+	struct request * tmp, * __tmp;
+	int __latency = latency;
+
+	__tmp = tmp = blkdev_entry_to_request(entry);
+
+	for (;; tmp = blkdev_next_request(tmp))
+	{
+		if ((latency -= tmp->nr_segments) <= 0)
+		{
+			tmp = __tmp;
+			latency = __latency;
+
+			if (starving)
+				break;
+
+			if (q->head_active && !q->plugged)
+			{
+				latency -= tmp->nr_segments;
+				break;
+			}
+
+			list_add(&req->queue, &q->queue_head);
+			goto after_link;
+		}
+
+		if (tmp->queue.next == &q->queue_head)
+			break;
+
+		{
+			const int after_current = IN_ORDER(tmp,req);
+			const int before_next = IN_ORDER(req,blkdev_next_request(tmp));
+
+			if (!IN_ORDER(tmp,blkdev_next_request(tmp))) {
+				if (after_current || before_next)
+					break;
+			} else {
+				if (after_current && before_next)
+					break;
+			}
+		}
+	}
+
+	list_add(&req->queue, &tmp->queue);
+
+ after_link:
+	req->elevator_sequence = elevator_sequence(&q->elevator, latency);
+}
+
 /*
  * add-request adds a request to the linked list.
  * It disables interrupts (aquires the request spinlock) so that it can muck
@@ -398,32 +524,20 @@
  * which is important for drive_stat_acct() above.
  */
 
-static inline void __add_request(request_queue_t * q, struct request * req)
+static inline void __add_request(request_queue_t * q, struct request * req,
+				 int empty, struct list_head * entry,
+				 int latency, int starving)
 {
-	int major = MAJOR(req->rq_dev);
-	struct request * tmp;
+	int major;
 
 	drive_stat_acct(req, req->nr_sectors, 1);
-	req->next = NULL;
 
-	if (!(tmp = q->current_request)) {
-		q->current_request = req;
+	if (empty) {
+		req->elevator_sequence = elevator_sequence(&q->elevator, latency);
+		list_add(&req->queue, &q->queue_head);
 		return;
 	}
-	for ( ; tmp->next ; tmp = tmp->next) {
-		const int after_current = IN_ORDER(tmp,req);
-		const int before_next = IN_ORDER(req,tmp->next);
-
-		if (!IN_ORDER(tmp,tmp->next)) {
-			if (after_current || before_next)
-				break;
-		} else {
-			if (after_current && before_next)
-				break;
-		}
-	}
-	req->next = tmp->next;
-	tmp->next = req;
+	elevator_queue(q, req, entry, latency, starving);
 
 	/*
 	 * FIXME(eric) I don't understand why there is a need for this
@@ -432,6 +546,7 @@
 	 * I am leaving this in here until I hear back from the COMPAQ
 	 * people.
 	 */
+	major = MAJOR(req->rq_dev);
 	if (major >= COMPAQ_SMART2_MAJOR+0 && major <= COMPAQ_SMART2_MAJOR+7)
 	{
 		(q->request_fn)(q);
@@ -448,12 +563,14 @@
  */
 static inline void attempt_merge (request_queue_t * q,
 				  struct request *req,
-				  int max_sectors)
+				  int max_sectors,
+				  int max_segments)
 {
-	struct request *next = req->next;
-
-	if (!next)
+	struct request *next;
+  
+	if (req->queue.next == &q->queue_head)
 		return;
+	next = blkdev_next_request(req);
 	if (req->sector + req->nr_sectors != next->sector)
 		return;
 	if (next->sem || req->cmd != next->cmd || req->rq_dev != next->rq_dev || req->nr_sectors + next->nr_sectors > max_sectors)
@@ -464,25 +581,79 @@
 	 * will have been updated to the appropriate number,
 	 * and we shouldn't do it here too.
 	 */
-	if(!(q->merge_requests_fn)(q, req, next))
+	if(!(q->merge_requests_fn)(q, req, next, max_segments))
 		return;
 
+	elevator_merge_requests(&q->elevator, req, next);
 	req->bhtail->b_reqnext = next->bh;
 	req->bhtail = next->bhtail;
 	req->nr_sectors += next->nr_sectors;
 	next->rq_status = RQ_INACTIVE;
-	req->next = next->next;
+	list_del(&next->queue);
 	wake_up (&wait_for_request);
 }
 
+static inline void elevator_debug(request_queue_t * q, kdev_t dev)
+{
+#ifdef DEBUG_ELEVATOR
+	int read_pendings = 0, nr_segments = 0;
+	elevator_t * elevator = &q->elevator;
+	struct list_head * entry = &q->queue_head;
+	static int counter;
+
+	if (counter++ % 100)
+		return;
+
+	while ((entry = entry->next) != &q->queue_head)
+	{
+		struct request * req;
+
+		req = blkdev_entry_to_request(entry);
+		if (!req->q)
+			continue;
+		if (req->cmd == READ)
+			read_pendings++;
+		nr_segments += req->nr_segments;
+	}
+
+	if (read_pendings != elevator->read_pendings)
+	{
+		printk(KERN_WARNING
+		       "%s: elevator read_pendings %d should be %d\n",
+		       kdevname(dev), elevator->read_pendings,
+		       read_pendings);
+		elevator->read_pendings = read_pendings;
+	}
+	if (nr_segments != q->nr_segments)
+	{
+		printk(KERN_WARNING
+		       "%s: elevator nr_segments %d should be %d\n",
+		       kdevname(dev), q->nr_segments,
+		       nr_segments);
+		q->nr_segments = nr_segments;
+	}
+#endif
+}
+
+static inline void elevator_account_request(request_queue_t * q, struct request * req)
+{
+	q->elevator.sequence++;
+	if (req->cmd == READ)
+		q->elevator.read_pendings++;
+	q->nr_segments++;
+}
+
 static inline void __make_request(request_queue_t * q, int rw,
 			   struct buffer_head * bh)
 {
 	int major = MAJOR(bh->b_rdev);
 	unsigned int sector, count;
-	struct request * req;
+	int max_segments = MAX_SEGMENTS;
+	struct request * req, * prev;
 	int rw_ahead, max_req, max_sectors;
 	unsigned long flags;
+	int orig_latency, latency, __latency, starving, __starving, empty;
+	struct list_head * entry, * __entry;
 
 	count = bh->b_size >> 9;
 	sector = bh->b_rsector;
@@ -569,13 +740,18 @@
 	 */
 	max_sectors = get_max_sectors(bh->b_rdev);
 
+	__latency = orig_latency = get_request_latency(&q->elevator, rw);
+
 	/*
 	 * Now we acquire the request spinlock, we have to be mega careful
 	 * not to schedule or do something nonatomic
 	 */
 	spin_lock_irqsave(&io_request_lock,flags);
-	req = q->current_request;
-	if (!req) {
+	elevator_debug(q, bh->b_rdev);
+
+	empty = 0;
+	if (list_empty(&q->queue_head)) {
+		empty = 1;
 		/* MD and loop can't handle plugging without deadlocking */
 		if (q->plug_device_fn)
 			q->plug_device_fn(q, bh->b_rdev); /* is atomic */
@@ -584,6 +760,17 @@
 		goto get_rq;
 	}
 
+	/* avoid write-bombs to not hurt iteractiveness of reads */
+	if (rw != READ && q->elevator.read_pendings)
+		max_segments = q->elevator.max_bomb_segments;
+
+	entry = seek_to_not_starving_chunk(q, &__latency, &starving);
+
+	__entry = entry;
+	__starving = starving;
+
+	latency = __latency;
+
 	if (q->head_active && !q->plugged) {
 		/*
 		 * The scsi disk and cdrom drivers completely remove the request
@@ -595,11 +782,18 @@
 		 * entry may be busy being processed and we thus can't change
 		 * it.
 		 */
-		if ((req = req->next) == NULL)
-			goto get_rq;
+		if (entry == q->queue_head.next) {
+			latency -= blkdev_entry_to_request(entry)->nr_segments;
+			if ((entry = entry->next) == &q->queue_head)
+				goto get_rq;
+			starving = 0;
+		}
 	}
 
+	prev = NULL;
 	do {
+		req = blkdev_entry_to_request(entry);
+
 		if (req->sem)
 			continue;
 		if (req->cmd != rw)
@@ -610,6 +804,8 @@
 			continue;
 		/* Can we add it to the end of this request? */
 		if (req->sector + req->nr_sectors == sector) {
+			if (latency - req->nr_segments < 0)
+				break;
 			/*
 			 * The merge_fn is a more advanced way
 			 * of accomplishing the same task.  Instead
@@ -622,16 +818,21 @@
 			 * may suggest that we shouldn't merge
 			 * this 
 			 */
-			if(!(q->merge_fn)(q, req, bh))
+			if(!(q->merge_fn)(q, req, bh, max_segments))
 				continue;
 			req->bhtail->b_reqnext = bh;
 			req->bhtail = bh;
 		    	req->nr_sectors += count;
 			drive_stat_acct(req, count, 0);
+
+			elevator_merge_after(q, req, latency);
+
 			/* Can we now merge this req with the next? */
-			attempt_merge(q, req, max_sectors);
+			attempt_merge(q, req, max_sectors, max_segments);
 		/* or to the beginning? */
 		} else if (req->sector - count == sector) {
+			if (!prev && starving)
+				continue;
 			/*
 			 * The merge_fn is a more advanced way
 			 * of accomplishing the same task.  Instead
@@ -644,7 +845,7 @@
 			 * may suggest that we shouldn't merge
 			 * this 
 			 */
-			if(!(q->merge_fn)(q, req, bh))
+			if(!(q->merge_fn)(q, req, bh, max_segments))
 				continue;
 		    	bh->b_reqnext = req->bh;
 		    	req->bh = bh;
@@ -653,13 +854,21 @@
 		    	req->sector = sector;
 		    	req->nr_sectors += count;
 			drive_stat_acct(req, count, 0);
+
+			elevator_merge_before(q, req, latency);
+
+			if (prev)
+				attempt_merge(q, prev, max_sectors, max_segments);
 		} else
 			continue;
 
+		q->elevator.sequence++;
 		spin_unlock_irqrestore(&io_request_lock,flags);
 	    	return;
 
-	} while ((req = req->next) != NULL);
+	} while (prev = req,
+		 (latency -= req->nr_segments) >= 0 &&
+		 (entry = entry->next) != &q->queue_head);
 
 /* find an unused request. */
 get_rq:
@@ -675,6 +884,14 @@
 			goto end_io;
 		req = __get_request_wait(max_req, bh->b_rdev);
 		spin_lock_irqsave(&io_request_lock,flags);
+
+		/* lock got dropped so revalidate elevator */
+		empty = 1;
+		if (!list_empty(&q->queue_head)) {
+			empty = 0;
+			__latency = orig_latency;
+			__entry = seek_to_not_starving_chunk(q, &__latency, &__starving);
+		}
 	}
 	/*
 	 * Dont start the IO if the buffer has been
@@ -707,8 +924,10 @@
 	req->sem = NULL;
 	req->bh = bh;
 	req->bhtail = bh;
-	req->next = NULL;
-	__add_request(q, req);
+	req->q = q;
+	__add_request(q, req, empty, __entry, __latency, __starving);
+	elevator_account_request(q, req);
+
 	spin_unlock_irqrestore(&io_request_lock, flags);
 	return;
 
@@ -867,6 +1086,8 @@
 
 void end_that_request_last(struct request *req)
 {
+	if (req->q)
+		BUG();
 	if (req->sem != NULL)
 		up(req->sem);
 	req->rq_status = RQ_INACTIVE;
@@ -886,7 +1107,6 @@
 	req = all_requests + NR_REQUEST;
 	while (--req >= all_requests) {
 		req->rq_status = RQ_INACTIVE;
-		req->next = NULL;
 	}
 	memset(ro_bits,0,sizeof(ro_bits));
 	memset(max_readahead, 0, sizeof(max_readahead));
@@ -977,6 +1197,9 @@
 #ifdef CONFIG_SJCD
 	sjcd_init();
 #endif CONFIG_SJCD
+#ifdef CONFIG_BLK_DEV_LVM
+	lvm_init();
+#endif
 #ifdef CONFIG_BLK_DEV_MD
 	md_init();
 #endif CONFIG_BLK_DEV_MD
diff -urN 2.3.46pre1/drivers/block/loop.c 2.3.46pre1aa1/drivers/block/loop.c
--- 2.3.46pre1/drivers/block/loop.c	Tue Feb 15 03:06:47 2000
+++ 2.3.46pre1aa1/drivers/block/loop.c	Wed Feb 16 00:28:25 2000
@@ -277,7 +277,7 @@
 repeat:
 	INIT_REQUEST;
 	current_request=CURRENT;
-	CURRENT=current_request->next;
+	blkdev_dequeue_request(current_request);
 	if (MINOR(current_request->rq_dev) >= max_loop)
 		goto error_out;
 	lo = &loop_dev[MINOR(current_request->rq_dev)];
@@ -375,15 +375,13 @@
 	spin_lock_irq(&io_request_lock);
 	current_request->sector += current_request->current_nr_sectors;
 	current_request->nr_sectors -= current_request->current_nr_sectors;
-	current_request->next=CURRENT;
-	CURRENT=current_request;
+	list_add(&current_request->queue, &current_request->q->queue_head);
 	end_request(1);
 	goto repeat;
 error_out_lock:
 	spin_lock_irq(&io_request_lock);
 error_out:
-	current_request->next=CURRENT;
-	CURRENT=current_request;
+	list_add(&current_request->queue, &current_request->q->queue_head);
 	end_request(0);
 	goto repeat;
 }
@@ -790,6 +788,7 @@
 	}		
 
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
+	blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
 	for (i=0; i < max_loop; i++) {
 		memset(&loop_dev[i], 0, sizeof(struct loop_device));
 		loop_dev[i].lo_number = i;
diff -urN 2.3.46pre1/drivers/block/lvm-snap.c 2.3.46pre1aa1/drivers/block/lvm-snap.c
--- 2.3.46pre1/drivers/block/lvm-snap.c	Thu Jan  1 01:00:00 1970
+++ 2.3.46pre1aa1/drivers/block/lvm-snap.c	Wed Feb 16 00:28:24 2000
@@ -0,0 +1,414 @@
+/* linux/drivers/block/lvm-snap.c
+
+   Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+  
+   LVM snapshotting */
+
+#include <linux/lvm.h>
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/blkdev.h>
+#include <linux/smp_lock.h>
+
+
+extern const char *const lvm_name;
+extern int lvm_blocksizes[];
+
+void lvm_snapshot_release(lv_t *);
+
+#define hashfn(dev,block,mask,chunk_size) \
+	((HASHDEV(dev)^((block)/(chunk_size))) & (mask))
+
+static inline lv_block_exception_t *
+lvm_find_exception_table(kdev_t org_dev, unsigned long org_start, lv_t * lv)
+{
+	struct list_head * hash_table = lv->lv_snapshot_hash_table, * next;
+	unsigned long mask = lv->lv_snapshot_hash_mask;
+	int chunk_size = lv->lv_chunk_size;
+	lv_block_exception_t * ret;
+	int i = 0;
+
+	hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
+	ret = NULL;
+	for (next = hash_table->next; next != hash_table; next = next->next)
+	{
+		lv_block_exception_t * exception;
+
+		exception = list_entry(next, lv_block_exception_t, hash);
+		if (exception->rsector_org == org_start &&
+		    exception->rdev_org == org_dev)
+		{
+			if (i)
+			{
+				/* fun, isn't it? :) */
+				list_del(next);
+				list_add(next, hash_table);
+			}
+			ret = exception;
+			break;
+		}
+		i++;
+	}
+	return ret;
+}
+
+static inline void lvm_hash_link(lv_block_exception_t * exception,
+				 kdev_t org_dev, unsigned long org_start,
+				 lv_t * lv)
+{
+	struct list_head * hash_table = lv->lv_snapshot_hash_table;
+	unsigned long mask = lv->lv_snapshot_hash_mask;
+	int chunk_size = lv->lv_chunk_size;
+
+	hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
+	list_add(&exception->hash, hash_table);
+}
+
+int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector,
+			     unsigned long pe_start, lv_t * lv)
+{
+	int ret;
+	unsigned long pe_off, pe_adjustment, __org_start;
+	kdev_t __org_dev;
+	int chunk_size = lv->lv_chunk_size;
+	lv_block_exception_t * exception;
+
+	pe_off = pe_start % chunk_size;
+	pe_adjustment = (*org_sector-pe_off) % chunk_size;
+	__org_start = *org_sector - pe_adjustment;
+	__org_dev = *org_dev;
+
+	ret = 0;
+	exception = lvm_find_exception_table(__org_dev, __org_start, lv);
+	if (exception)
+	{
+		*org_dev = exception->rdev_new;
+		*org_sector = exception->rsector_new + pe_adjustment;
+		ret = 1;
+	}
+	return ret;
+}
+
+static void lvm_drop_snapshot(lv_t * lv_snap, const char * reason)
+{
+	kdev_t last_dev;
+	int i;
+
+	/* no exception storage space available for this snapshot
+	   or error on this snapshot --> release it */
+	invalidate_buffers(lv_snap->lv_dev);
+
+	for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) {
+		if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) {
+			last_dev = lv_snap->lv_block_exception[i].rdev_new;
+			invalidate_buffers(last_dev);
+		}
+	}
+
+	lvm_snapshot_release(lv_snap);
+
+	printk(KERN_INFO
+	       "%s -- giving up to snapshot %s on %s due %s\n",
+	       lvm_name, lv_snap->lv_snapshot_org->lv_name, lv_snap->lv_name,
+	       reason);
+}
+
+static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks,
+					       unsigned long start,
+					       int nr_sectors,
+					       int blocksize)
+{
+	int i, sectors_per_block, nr_blocks;
+
+	sectors_per_block = blocksize >> 9;
+	nr_blocks = nr_sectors / sectors_per_block;
+	start /= sectors_per_block;
+
+	for (i = 0; i < nr_blocks; i++)
+		blocks[i] = start++;
+}
+
+static inline int get_blksize(kdev_t dev)
+{
+	int correct_size = BLOCK_SIZE, i, major;
+
+	major = MAJOR(dev);
+	if (blksize_size[major])
+	{
+		i = blksize_size[major][MINOR(dev)];
+		if (i)
+			correct_size = i;
+	}
+	return correct_size;
+}
+
+#ifdef DEBUG_SNAPSHOT
+static inline void invalidate_snap_cache(unsigned long start, unsigned long nr,
+					 kdev_t dev)
+{
+	struct buffer_head * bh;
+	int sectors_per_block, i, blksize, minor;
+
+	minor = MINOR(dev);
+	blksize = lvm_blocksizes[minor];
+	sectors_per_block = blksize >> 9;
+	nr /= sectors_per_block;
+	start /= sectors_per_block;
+
+	for (i = 0; i < nr; i++)
+	{
+		bh = get_hash_table(dev, start++, blksize);
+		if (bh)
+			bforget(bh);
+	}
+}
+#endif
+
+/*
+ * copy on write handler for one snapshot logical volume
+ *
+ * read the original blocks and store it/them on the new one(s).
+ * if there is no exception storage space free any longer --> release snapshot.
+ *
+ * this routine gets called for each _first_ write to a physical chunk.
+ */
+int lvm_snapshot_COW(kdev_t org_phys_dev,
+		     unsigned long org_phys_sector,
+		     unsigned long org_pe_start,
+		     unsigned long org_virt_sector,
+		     lv_t * lv_snap)
+{
+	const char * reason;
+	unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
+	int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
+	struct kiobuf * iobuf;
+	unsigned long blocks[KIO_MAX_SECTORS];
+	int blksize_snap, blksize_org, min_blksize, max_blksize;
+	int max_sectors, nr_sectors;
+
+	/* check if we are out of snapshot space */
+	if (idx >= lv_snap->lv_remap_end)
+		goto fail_out_of_space;
+
+	/* calculate physical boundaries of source chunk */
+	pe_off = org_pe_start % chunk_size;
+	org_start = org_phys_sector - ((org_phys_sector-pe_off) % chunk_size);
+	virt_start = org_virt_sector - (org_phys_sector - org_start);
+
+	/* calculate physical boundaries of destination chunk */
+	snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
+	snap_start = lv_snap->lv_block_exception[idx].rsector_new;
+
+#ifdef DEBUG_SNAPSHOT
+	printk(KERN_INFO
+	       "%s -- COW: "
+	       "org %02d:%02d faulting %lu start %lu, "
+	       "snap %02d:%02d start %lu, "
+	       "size %d, pe_start %lu pe_off %lu, virt_sec %lu\n",
+	       lvm_name,
+	       MAJOR(org_phys_dev), MINOR(org_phys_dev), org_phys_sector,
+	       org_start,
+	       MAJOR(snap_phys_dev), MINOR(snap_phys_dev), snap_start,
+	       chunk_size,
+	       org_pe_start, pe_off,
+	       org_virt_sector);
+#endif
+
+	iobuf = lv_snap->lv_iobuf;
+
+	blksize_org = get_blksize(org_phys_dev);
+	blksize_snap = get_blksize(snap_phys_dev);
+	max_blksize = max(blksize_org, blksize_snap);
+	min_blksize = min(blksize_org, blksize_snap);
+	max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
+
+	if (chunk_size % (max_blksize>>9))
+		goto fail_blksize;
+
+	while (chunk_size)
+	{
+		nr_sectors = min(chunk_size, max_sectors);
+		chunk_size -= nr_sectors;
+
+		iobuf->length = nr_sectors << 9;
+
+		lvm_snapshot_prepare_blocks(blocks, org_start,
+					    nr_sectors, blksize_org);
+		if (brw_kiovec(READ, 1, &iobuf, org_phys_dev,
+			       blocks, blksize_org) != (nr_sectors<<9))
+			goto fail_raw_read;
+
+		lvm_snapshot_prepare_blocks(blocks, snap_start,
+					    nr_sectors, blksize_snap);
+		if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
+			       blocks, blksize_snap) != (nr_sectors<<9))
+			goto fail_raw_write;
+	}
+
+#ifdef DEBUG_SNAPSHOT
+	/* invalidate the logcial snapshot buffer cache */
+	invalidate_snap_cache(virt_start, lv_snap->lv_chunk_size,
+			      lv_snap->lv_dev);
+#endif
+
+	/* the original chunk is now stored on the snapshot volume
+	   so update the execption table */
+	lv_snap->lv_block_exception[idx].rdev_org = org_phys_dev;
+	lv_snap->lv_block_exception[idx].rsector_org = org_start;
+	lvm_hash_link(lv_snap->lv_block_exception + idx,
+		      org_phys_dev, org_start, lv_snap);
+	lv_snap->lv_remap_ptr = idx + 1;
+	return 0;
+
+	/* slow path */
+ out:
+	lvm_drop_snapshot(lv_snap, reason);
+	return 1;
+
+ fail_out_of_space:
+	reason = "out of space";
+	goto out;
+ fail_raw_read:
+	reason = "read error";
+	goto out;
+ fail_raw_write:
+	reason = "write error";
+	goto out;
+ fail_blksize:
+	reason = "blocksize error";
+	goto out;
+}
+
+static int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors)
+{
+	int bytes, nr_pages, err, i;
+
+	bytes = sectors << 9;
+	nr_pages = (bytes + ~PAGE_MASK) >> PAGE_SHIFT;
+	err = expand_kiobuf(iobuf, nr_pages);
+	if (err)
+		goto out;
+
+	err = -ENOMEM;
+	iobuf->locked = 1;
+	iobuf->nr_pages = 0;
+	for (i = 0; i < nr_pages; i++)
+	{
+		struct page * page;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,27)
+		page = alloc_page(GFP_KERNEL);
+		if (!page)
+			goto out;
+#else
+		{
+			unsigned long addr = __get_free_page(GFP_USER);
+			if (!addr)
+				goto out;
+			iobuf->pagelist[i] = addr;
+			page = mem_map + MAP_NR(addr);
+		}
+#endif
+
+		iobuf->maplist[i] = page;
+		/* the only point to lock the page here is to be allowed
+		   to share unmap_kiobuf() in the fail-path */
+#ifndef LockPage
+#define LockPage(map) set_bit(PG_locked, &(map)->flags)
+#endif
+		LockPage(page);
+		iobuf->nr_pages++;
+	}
+	iobuf->offset = 0;
+
+	err = 0;
+ out:
+	return err;
+}
+
+static int calc_max_buckets(void)
+{
+	unsigned long mem;
+
+	mem = num_physpages << PAGE_SHIFT;
+	mem /= 100;
+	mem *= 2;
+	mem /= sizeof(struct list_head);
+
+	return mem;
+}
+
+static int lvm_snapshot_alloc_hash_table(lv_t * lv)
+{
+	int err;
+	unsigned long buckets, max_buckets, size;
+	struct list_head * hash;
+
+	buckets = lv->lv_remap_end;
+	max_buckets = calc_max_buckets();
+	buckets = min(buckets, max_buckets);
+	while (buckets & (buckets-1))
+		buckets &= (buckets-1);
+
+	size = buckets * sizeof(struct list_head);
+
+	err = -ENOMEM;
+	hash = vmalloc(size);
+	lv->lv_snapshot_hash_table = hash;
+
+	if (!hash)
+		goto out;
+
+	lv->lv_snapshot_hash_mask = buckets-1;
+	while (buckets--)
+		INIT_LIST_HEAD(hash+buckets);
+	err = 0;
+ out:
+	return err;
+}
+
+int lvm_snapshot_alloc(lv_t * lv_snap)
+{
+	int err, blocksize, max_sectors;
+
+	err = alloc_kiovec(1, &lv_snap->lv_iobuf);
+	if (err)
+		goto out;
+
+	blocksize = lvm_blocksizes[MINOR(lv_snap->lv_dev)];
+	max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9);
+
+	err = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors);
+	if (err)
+		goto out_free_kiovec;
+
+	err = lvm_snapshot_alloc_hash_table(lv_snap);
+	if (err)
+		goto out_free_kiovec;
+ out:
+	return err;
+
+ out_free_kiovec:
+	unmap_kiobuf(lv_snap->lv_iobuf);
+	free_kiovec(1, &lv_snap->lv_iobuf);
+	goto out;
+}
+
+void lvm_snapshot_release(lv_t * lv)
+{
+	if (lv->lv_block_exception)
+	{
+		vfree(lv->lv_block_exception);
+		lv->lv_block_exception = NULL;
+	}
+	if (lv->lv_snapshot_hash_table)
+	{
+		vfree(lv->lv_snapshot_hash_table);
+		lv->lv_snapshot_hash_table = NULL;
+	}
+	if (lv->lv_iobuf)
+	{
+		free_kiovec(1, &lv->lv_iobuf);
+		lv->lv_iobuf = NULL;
+	}
+}
diff -urN 2.3.46pre1/drivers/block/lvm.c 2.3.46pre1aa1/drivers/block/lvm.c
--- 2.3.46pre1/drivers/block/lvm.c	Thu Jan  1 01:00:00 1970
+++ 2.3.46pre1aa1/drivers/block/lvm.c	Wed Feb 16 00:28:24 2000
@@ -0,0 +1,2616 @@
+/*
+ * kernel/lvm.c
+ *
+ * Copyright (C) 1997 - 2000  Heinz Mauelshagen, Germany
+ *
+ * February-November 1997
+ * April-May,July-August,November 1998
+ * January-March,May,July,September,October 1999
+ *
+ *
+ * LVM driver is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * LVM driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA. 
+ *
+ */
+
+/*
+ * Changelog
+ *
+ *    09/11/1997 - added chr ioctls VG_STATUS_GET_COUNT
+ *                 and VG_STATUS_GET_NAMELIST
+ *    18/01/1998 - change lvm_chr_open/close lock handling
+ *    30/04/1998 - changed LV_STATUS ioctl to LV_STATUS_BYNAME and
+ *               - added   LV_STATUS_BYINDEX ioctl
+ *               - used lvm_status_byname_req_t and
+ *                      lvm_status_byindex_req_t vars
+ *    04/05/1998 - added multiple device support
+ *    08/05/1998 - added support to set/clear extendable flag in volume group
+ *    09/05/1998 - changed output of lvm_proc_get_info() because of
+ *                 support for free (eg. longer) logical volume names
+ *    12/05/1998 - added spin_locks (thanks to Pascal van Dam
+ *                 <pascal@ramoth.xs4all.nl>)
+ *    25/05/1998 - fixed handling of locked PEs in lvm_map() and lvm_chr_ioctl()
+ *    26/05/1998 - reactivated verify_area by access_ok
+ *    07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go
+ *                 beyond 128/256 KB max allocation limit per call
+ *               - #ifdef blocked spin_lock calls to avoid compile errors
+ *                 with 2.0.x
+ *    11/06/1998 - another enhancement to spinlock code in lvm_chr_open()
+ *                 and use of LVM_VERSION_CODE instead of my own macros
+ *                 (thanks to  Michael Marxmeier <mike@msede.com>)
+ *    07/07/1998 - added statistics in lvm_map()
+ *    08/07/1998 - saved statistics in do_lv_extend_reduce()
+ *    25/07/1998 - used __initfunc macro
+ *    02/08/1998 - changes for official char/block major numbers
+ *    07/08/1998 - avoided init_module() and cleanup_module() to be static
+ *    30/08/1998 - changed VG lv_open counter from sum of LV lv_open counters
+ *                 to sum of LVs open (no matter how often each is)
+ *    01/09/1998 - fixed lvm_gendisk.part[] index error
+ *    07/09/1998 - added copying of lv_current_pe-array
+ *                 in LV_STATUS_BYINDEX ioctl
+ *    17/11/1998 - added KERN_* levels to printk
+ *    13/01/1999 - fixed LV index bug in do_lv_create() which hit lvrename
+ *    07/02/1999 - fixed spinlock handling bug in case of LVM_RESET
+ *                 by moving spinlock code from lvm_chr_open()
+ *                 to lvm_chr_ioctl()
+ *               - added LVM_LOCK_LVM ioctl to lvm_chr_ioctl()
+ *               - allowed LVM_RESET and retrieval commands to go ahead;
+ *                 only other update ioctls are blocked now
+ *               - fixed pv->pe to NULL for pv_status
+ *               - using lv_req structure in lvm_chr_ioctl() now
+ *               - fixed NULL ptr reference bug in do_lv_extend_reduce()
+ *                 caused by uncontiguous PV array in lvm_chr_ioctl(VG_REDUCE)
+ *    09/02/1999 - changed BLKRASET and BLKRAGET in lvm_chr_ioctl() to
+ *                 handle lgoical volume private read ahead sector
+ *               - implemented LV read_ahead handling with lvm_blk_read()
+ *                 and lvm_blk_write()
+ *    10/02/1999 - implemented 2.[12].* support function lvm_hd_name()
+ *                 to be used in drivers/block/genhd.c by disk_name()
+ *    12/02/1999 - fixed index bug in lvm_blk_ioctl(), HDIO_GETGEO
+ *               - enhanced gendisk insert/remove handling
+ *    16/02/1999 - changed to dynamic block minor number allocation to
+ *                 have as much as 99 volume groups with 256 logical volumes
+ *                 as the grand total; this allows having 1 volume group with
+ *                 up to 256 logical volumes in it
+ *    21/02/1999 - added LV open count information to proc filesystem
+ *               - substituted redundant LVM_RESET code by calls
+ *                 to do_vg_remove()
+ *    22/02/1999 - used schedule_timeout() to be more responsive
+ *                 in case of do_vg_remove() with lots of logical volumes
+ *    19/03/1999 - fixed NULL pointer bug in module_init/lvm_init
+ *    17/05/1999 - used DECLARE_WAIT_QUEUE_HEAD macro (>2.3.0)
+ *               - enhanced lvm_hd_name support
+ *    03/07/1999 - avoided use of KERNEL_VERSION macro based ifdefs and
+ *                 memcpy_tofs/memcpy_fromfs macro redefinitions
+ *    06/07/1999 - corrected reads/writes statistic counter copy in case
+ *                 of striped logical volume
+ *    28/07/1999 - implemented snapshot logical volumes
+ *                 - lvm_chr_ioctl
+ *                   - LV_STATUS_BYINDEX
+ *                   - LV_STATUS_BYNAME
+ *                 - do_lv_create
+ *                 - do_lv_remove
+ *                 - lvm_map
+ *                 - new lvm_snapshot_remap_block
+ *                 - new lvm_snapshot_remap_new_block
+ *    08/10/1999 - implemented support for multiple snapshots per
+ *                 original logical volume
+ *    12/10/1999 - support for 2.3.19
+ *    11/11/1999 - support for 2.3.28
+ *    21/11/1999 - changed lvm_map() interface to buffer_head based
+ *    19/12/1999 - support for 2.3.33
+ *    01/01/2000 - changed locking concept in lvm_map(),
+ *                 do_vg_create() and do_lv_remove()
+ *
+ */
+
+
+/*
+ * TODO
+ *
+ *   - implement special handling of unavailable physical volumes
+ *
+ */
+
+char *lvm_version = "LVM version 0.8e  by Heinz Mauelshagen  (4/1/2000)\n";
+char *lvm_short_version = "version 0.8e  (4/1/2000)";
+
+#define MAJOR_NR	LVM_BLK_MAJOR
+#define	DEVICE_OFF(device)
+
+#include <linux/config.h>
+#include <linux/version.h>
+
+#ifdef MODVERSIONS
+#  undef MODULE
+#  define MODULE
+#    include <linux/modversions.h>
+#endif
+
+#ifdef MODULE
+#  include <linux/module.h>
+#endif
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+
+#include <linux/hdreg.h>
+#include <linux/stat.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/blkdev.h>
+#include <linux/genhd.h>
+#include <linux/locks.h>
+#include <linux/smp_lock.h>
+#include <asm/ioctl.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_KERNELD
+#include <linux/kerneld.h>
+#endif
+
+#include <linux/blk.h>
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 0)
+#include <linux/blkpg.h>
+#endif
+
+#include <linux/errno.h>
+#include <linux/lvm.h>
+
+#define	LVM_CORRECT_READ_AHEAD(a)		\
+do {						\
+	if ((a) < LVM_MIN_READ_AHEAD)		\
+		(a) =  LVM_MIN_READ_AHEAD;	\
+	if ((a) > LVM_MAX_READ_AHEAD)		\
+		(a) = LVM_MAX_READ_AHEAD;	\
+} while(0)
+
+#define	suser()	( current->uid == 0 && current->euid == 0)
+
+
+/*
+ * External function prototypes
+ */
+#ifdef MODULE
+int init_module ( void);
+void cleanup_module ( void);
+#else
+extern int lvm_init ( void);
+#endif
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+static void lvm_dummy_device_request ( request_queue_t*);
+#else
+static void lvm_dummy_device_request ( void);
+#endif
+static int lvm_blk_ioctl ( struct inode *, struct file *, uint, ulong);
+static int lvm_blk_open  ( struct inode *, struct file *);
+
+static int  lvm_chr_open  ( struct inode *, struct file *);
+
+static int lvm_chr_release ( struct inode *, struct file *);
+static int lvm_blk_release ( struct inode *, struct file *);
+
+static int  lvm_chr_ioctl ( struct inode *, struct file *, uint, ulong);
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+static int lvm_proc_get_info ( char *, char **, off_t, int);
+static int (*lvm_proc_get_info_ptr)(char *, char **, off_t, int) =
+   &lvm_proc_get_info;
+#else
+static int lvm_proc_get_info ( char *, char **, off_t, int, int);
+#endif
+#endif
+
+#ifdef LVM_HD_NAME
+void lvm_hd_name ( char*, int);
+#endif
+
+/* external snapshot calls */
+int lvm_snapshot_remap_block ( kdev_t*, ulong*, unsigned long, lv_t*);
+int lvm_snapshot_COW(kdev_t, unsigned long, unsigned long,
+		     unsigned long, lv_t *);
+int lvm_snapshot_alloc(lv_t *);
+void lvm_snapshot_release(lv_t *);
+
+/* End external function prototypes */
+
+
+/*
+ * Internal function prototypes
+ */
+static void lvm_init_vars ( void);
+#if LINUX_VERSION_CODE < KERNEL_VERSION( 2, 3, 43)
+extern int (*lvm_map_ptr) ( struct buffer_head*, int);
+#endif
+
+
+#ifdef LVM_HD_NAME
+extern void (*lvm_hd_name_ptr) ( char*, int);
+#endif
+static int lvm_map ( struct buffer_head*, int);
+static int do_vg_create ( int, void *);
+static int do_vg_remove ( int);
+static int do_lv_create ( int, char *, lv_t *);
+static int do_lv_remove ( int, char *, int);
+static int do_lv_extend_reduce ( int, char *, lv_t *);
+static void lvm_geninit ( struct gendisk *);
+#ifdef LVM_GET_INODE
+   static struct inode *lvm_get_inode ( int);
+   void lvm_clear_inode ( struct inode *);
+#endif
+inline int  lvm_strlen ( char *);
+inline void lvm_memcpy ( char *, char *, int);
+inline int  lvm_strcmp ( char *, char *);
+inline char *lvm_strrchr ( char *, char c);
+/* END Internal function prototypes */
+
+
+/* volume group descriptor area pointers */
+static vg_t *vg[ABS_MAX_VG + 1];
+static pv_t *pvp  = NULL;
+static lv_t *lvp  = NULL;
+static pe_t *pep  = NULL;
+static pe_t *pep1 = NULL;
+
+
+/* map from block minor number to VG and LV numbers */
+typedef struct {
+   int vg_number;
+   int lv_number;
+} vg_lv_map_t;
+static vg_lv_map_t vg_lv_map[ABS_MAX_LV];
+
+
+/* Request structures (lvm_chr_ioctl()) */
+static pv_change_req_t pv_change_req;
+static pv_flush_req_t  pv_flush_req;
+static pv_status_req_t pv_status_req;
+static pe_lock_req_t   pe_lock_req;
+static le_remap_req_t  le_remap_req;
+static lv_req_t        lv_req;
+
+#ifdef LVM_TOTAL_RESET
+static int lvm_reset_spindown = 0;
+#endif
+
+static char pv_name[NAME_LEN];
+/* static char rootvg[NAME_LEN] = { 0, }; */
+static uint lv_open = 0;
+const char *const lvm_name = LVM_NAME;
+static int lock = 0;
+static int loadtime = 0;
+static uint vg_count = 0;
+static long lvm_chr_open_count = 0;
+static ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 0)
+static DECLARE_WAIT_QUEUE_HEAD ( lvm_wait);
+static DECLARE_WAIT_QUEUE_HEAD ( lvm_map_wait);
+#else
+struct wait_queue *lvm_wait = NULL;
+struct wait_queue *lvm_map_wait = NULL;
+#endif
+
+static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED;
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+#if LINUX_VERSION_CODE < KERNEL_VERSION ( 2, 3, 31)
+static struct proc_dir_entry lvm_proc_entry = {
+   0, 3, LVM_NAME, S_IFREG | S_IRUGO,
+   1, 0, 0, 0,
+   NULL,
+   lvm_proc_get_info,
+   NULL, NULL, NULL, NULL, NULL,
+};
+#endif
+#endif
+
+static struct file_operations lvm_chr_fops = {
+	ioctl:		lvm_chr_ioctl,
+	open:		lvm_chr_open,
+	release:	lvm_chr_release,
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 38)
+static struct file_operations lvm_blk_fops = {
+	read:		block_read,
+	write:		block_write,
+	ioctl:		lvm_blk_ioctl,
+	open:		lvm_blk_open,
+	release:	lvm_blk_release,
+	fsync:		block_fsync,
+};
+#else
+static struct block_device_operations lvm_blk_fops =
+{
+	open:		lvm_blk_open,
+	release:	lvm_blk_release,
+	ioctl:		lvm_blk_ioctl,
+};
+#endif
+
+/* gendisk structures */
+static struct hd_struct lvm_hd_struct[MAX_LV];
+int lvm_blocksizes[MAX_LV] = { 0, };
+static int lvm_size[MAX_LV] = { 0, };
+static struct gendisk lvm_gendisk = {
+   MAJOR_NR,			/* major # */
+   LVM_NAME,			/* name of major */
+   0,				/* number of times minor is shifted
+				   to get real minor */
+   1,				/* maximum partitions per device */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 40)
+   MAX_LV,			/* maximum number of real devices */
+   lvm_geninit,			/* initialization called before we
+				   do other things */
+#endif
+   lvm_hd_struct,		/* partition table */
+   lvm_size,			/* device size in blocks, copied
+				   to block_size[] */
+   MAX_LV,			/* number or real devices */
+   NULL,			/* internal */
+   NULL,			/* pointer to next gendisk struct (internal) */
+};
+
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 3, 43)
+static void lvm_make_request (int rw, struct buffer_head * bh)
+{
+#ifdef CONFIG_BLK_DEV_MD
+	request_queue_t * q;
+	unsigned long flags;
+#endif
+
+	if (lvm_map(bh, rw))
+		goto sorry;
+	
+#ifdef CONFIG_BLK_DEV_MD
+	q = blk_get_queue(bh->b_rdev);
+
+	if (q->make_request_fn)
+		q->make_request_fn(rw, bh[i]);
+	else
+#endif
+		generic_make_request(rw, bh);
+	return;
+
+ sorry:
+	printk(KERN_ERR "Bad lvm_map in ll_rw_block\n");
+}
+#endif
+
+#ifdef MODULE
+/*
+ * Module initialization...
+ */
+int init_module ( void)
+#else
+/*
+ * Driver initialization...
+ */
+#ifdef __initfunc
+__initfunc ( int lvm_init ( void))
+#else
+int __init lvm_init ( void)
+#endif
+#endif /* #ifdef MODULE */
+{
+   struct gendisk *gendisk_ptr = NULL;
+
+   lvm_init_vars ();
+
+   /* insert our gendisk at the corresponding major */
+   lvm_geninit ( &lvm_gendisk);
+   if ( gendisk_head != NULL) {
+      gendisk_ptr = gendisk_head;
+      while ( gendisk_ptr->next != NULL &&
+              gendisk_ptr->major > lvm_gendisk.major) {
+         gendisk_ptr = gendisk_ptr->next;
+      }
+      lvm_gendisk.next = gendisk_ptr->next;
+      gendisk_ptr->next = &lvm_gendisk;
+   } else {
+      gendisk_head = &lvm_gendisk;
+      lvm_gendisk.next = NULL;
+   }
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 43)
+   /* reference from drivers/block/ll_rw_blk.c */
+   lvm_map_ptr = lvm_map;
+#endif
+
+#ifdef LVM_HD_NAME
+   /* reference from drivers/block/genhd.c */
+   lvm_hd_name_ptr = lvm_hd_name;
+#endif
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+   blk_init_queue ( BLK_DEFAULT_QUEUE ( MAJOR_NR), lvm_dummy_device_request);
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 43)
+   blk_queue_pluggable(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
+#else
+   blk_queue_pluggable(BLK_DEFAULT_QUEUE(MAJOR_NR), plug_device_noop);
+   blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request);
+#endif
+#else
+   blk_dev[MAJOR_NR].request_fn = lvm_dummy_device_request;
+   blk_dev[MAJOR_NR].current_request = NULL;
+#endif
+
+   /* optional read root VGDA */
+/*
+   if ( *rootvg != 0) {
+      vg_read_with_pv_and_lv ( rootvg, &vg);
+   }
+*/
+
+   if ( register_chrdev ( LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) {
+      printk ( KERN_ERR "%s -- register_chrdev failed\n", lvm_name);
+      return -EIO;
+   }
+   if ( register_blkdev ( MAJOR_NR, lvm_name, &lvm_blk_fops) < 0) {
+      printk ( "%s -- register_blkdev failed\n", lvm_name);
+      if ( unregister_chrdev ( LVM_CHAR_MAJOR, lvm_name) < 0)
+         printk ( KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
+      return -EIO;
+   }
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 25)
+    create_proc_info_entry ( LVM_NAME, S_IFREG | S_IRUGO,
+                             &proc_root, lvm_proc_get_info_ptr);
+#  else
+    proc_register ( &proc_root, &lvm_proc_entry);
+#  endif
+#endif
+
+   printk ( KERN_INFO
+            "%s%s -- "
+#ifdef MODULE
+            "Module"
+#else
+            "Driver"
+#endif
+            " successfully initialized\n",
+            lvm_version, lvm_name);
+
+   return 0;
+} /* init_module () / lvm_init () */
+
+
+#ifdef MODULE
+/*
+ * Module cleanup...
+ */
+void cleanup_module ( void) {
+   struct gendisk *gendisk_ptr = NULL, *gendisk_ptr_prev = NULL;
+
+   if ( unregister_chrdev ( LVM_CHAR_MAJOR, lvm_name) < 0) {
+      printk ( KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
+   }
+   if ( unregister_blkdev ( MAJOR_NR, lvm_name) < 0) {
+      printk ( KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name);
+   }
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+   blk_cleanup_queue ( BLK_DEFAULT_QUEUE ( MAJOR_NR));
+#else
+   blk_dev[MAJOR_NR].request_fn = NULL;
+   blk_dev[MAJOR_NR].current_request = NULL;
+#endif
+
+   gendisk_ptr = gendisk_ptr_prev = gendisk_head;
+   while ( gendisk_ptr != NULL) {
+      if ( gendisk_ptr == &lvm_gendisk) break;
+      gendisk_ptr_prev = gendisk_ptr;
+      gendisk_ptr = gendisk_ptr->next;
+   }
+   /* delete our gendisk from chain */
+   if ( gendisk_ptr == &lvm_gendisk) gendisk_ptr_prev->next = gendisk_ptr->next;
+
+   blk_size[MAJOR_NR] = NULL;
+   blksize_size[MAJOR_NR] = NULL;
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+   remove_proc_entry ( LVM_NAME, &proc_root);
+#  else
+   proc_unregister ( &proc_root, lvm_proc_entry.low_ino);
+#  endif 
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 43)
+   /* reference from linux/drivers/block/ll_rw_blk.c */
+   lvm_map_ptr = NULL;
+#endif
+
+#ifdef LVM_HD_NAME
+   /* reference from linux/drivers/block/genhd.c */
+   lvm_hd_name_ptr = NULL;
+#endif
+
+   printk ( KERN_INFO "%s -- Module successfully deactivated\n", lvm_name);
+
+   return;
+} /* void cleanup_module () */
+#endif /* #ifdef MODULE */
+
+
+/*
+ * support function to initialize lvm variables
+ */
+#ifdef __initfunc
+__initfunc ( void lvm_init_vars ( void))
+#else
+void __init lvm_init_vars ( void)
+#endif
+{
+   int v;
+
+   loadtime = CURRENT_TIME;
+
+   lvm_lock = SPIN_LOCK_UNLOCKED;
+
+   pe_lock_req.lock = UNLOCK_PE;
+   pe_lock_req.data.lv_dev = \
+   pe_lock_req.data.pv_dev = \
+   pe_lock_req.data.pv_offset = 0;
+
+   /* Initialize VG pointers */
+   for ( v = 0; v <= ABS_MAX_VG; v++) vg[v] = NULL;
+
+   /* Initialize LV -> VG association */
+   for ( v = 0; v < ABS_MAX_LV; v++) {
+      /* index ABS_MAX_VG never used for real VG */
+      vg_lv_map[v].vg_number = ABS_MAX_VG;
+      vg_lv_map[v].lv_number = -1;
+   }
+
+   return;
+} /* lvm_init_vars () */
+
+
+/********************************************************************
+ *
+ * Character device functions
+ *
+ ********************************************************************/
+
+/*
+ * character device open routine
+ */
+static int lvm_chr_open ( struct inode *inode,
+                          struct file *file) {
+   int minor = MINOR ( inode->i_rdev);
+
+#ifdef DEBUG
+   printk ( KERN_DEBUG
+            "%s -- lvm_chr_open MINOR: %d  VG#: %d  mode: 0x%X  lock: %d\n",
+            lvm_name, minor, VG_CHR(minor), file->f_mode, lock);
+#endif
+
+   /* super user validation */
+   if ( ! suser()) return -EACCES;
+
+   /* Group special file open */
+   if ( VG_CHR(minor) > MAX_VG) return -ENXIO;
+
+#ifdef MODULE
+   MOD_INC_USE_COUNT;
+#endif
+
+   lvm_chr_open_count++;
+   return 0;
+} /* lvm_chr_open () */
+
+
+/*
+ * character device i/o-control routine
+ *
+ * Only one changing process can do ioctl at one time, others will block.
+ *
+ */
+static int lvm_chr_ioctl ( struct inode *inode, struct file *file,
+                           uint command, ulong a) {
+   int minor = MINOR ( inode->i_rdev);
+   int extendable;
+   ulong  l, le, p, v;
+   ulong size;
+   void  *arg = ( void*) a;
+#ifdef LVM_GET_INODE
+   struct inode *inode_sav;
+#endif
+   lv_status_byname_req_t lv_status_byname_req;
+   lv_status_byindex_req_t lv_status_byindex_req;
+   lv_t lv;
+
+   /* otherwise cc will complain about unused variables */
+   ( void) lvm_lock;
+   
+
+#ifdef DEBUG_IOCTL
+   printk ( KERN_DEBUG
+            "%s -- lvm_chr_ioctl: command: 0x%X  MINOR: %d  "
+            "VG#: %d  mode: 0x%X\n",
+            lvm_name, command, minor, VG_CHR(minor), file->f_mode);
+#endif
+
+#ifdef LVM_TOTAL_RESET
+   if ( lvm_reset_spindown > 0) return -EACCES;
+#endif
+
+
+   /* Main command switch */
+   switch ( command) {
+      /* lock the LVM */
+      case LVM_LOCK_LVM:
+lock_try_again:
+         spin_lock ( &lvm_lock);
+         if( lock != 0 && lock != current->pid ) {
+#ifdef DEBUG_IOCTL
+            printk ( KERN_INFO "lvm_chr_ioctl: %s is locked by pid %d ...\n",
+                               lvm_name, lock);
+#endif
+            spin_unlock ( &lvm_lock);
+            interruptible_sleep_on ( &lvm_wait);
+            if ( current->sigpending != 0) return -EINTR;
+#ifdef LVM_TOTAL_RESET
+            if ( lvm_reset_spindown > 0) return -EACCES;
+#endif
+            goto lock_try_again;
+         }
+         lock = current->pid;
+         spin_unlock ( &lvm_lock);
+         return 0;
+
+
+      /* check lvm version to ensure driver/tools+lib interoperability */
+      case LVM_GET_IOP_VERSION:
+         if ( copy_to_user ( arg, &lvm_iop_version, sizeof ( ushort)) != 0)
+            return -EFAULT;
+         return 0;
+
+
+#ifdef LVM_TOTAL_RESET
+      /* lock reset function */
+      case LVM_RESET:
+         lvm_reset_spindown = 1;
+         for ( v = 0; v < ABS_MAX_VG; v++) {
+            if ( vg[v] != NULL) {
+               do_vg_remove ( v);
+            }
+         }
+
+#ifdef MODULE
+         while ( GET_USE_COUNT ( &__this_module) < 1)
+            MOD_INC_USE_COUNT;
+         while ( GET_USE_COUNT ( &__this_module) > 1)
+            MOD_DEC_USE_COUNT;
+#endif /* MODULE */
+         lock = 0; /* release lock */
+         wake_up_interruptible ( &lvm_wait);
+         return 0;
+#endif /* LVM_TOTAL_RESET */
+
+
+      /* lock/unlock i/o to a physical extent to move it to another
+         physical volume (move's done in user space's pvmove) */
+      case PE_LOCK_UNLOCK:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &pe_lock_req, arg, sizeof ( pe_lock_req_t)) != 0)
+            return -EFAULT;
+
+         switch ( pe_lock_req.lock) {
+            case LOCK_PE:
+               for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+                  if ( vg[VG_CHR(minor)]->pv[p] != NULL &&
+                       pe_lock_req.data.pv_dev ==
+                       vg[VG_CHR(minor)]->pv[p]->pv_dev)
+                     break;
+               }
+      
+               if ( p == vg[VG_CHR(minor)]->pv_max) return -ENXIO;
+
+               pe_lock_req.lock = UNLOCK_PE;
+               fsync_dev ( pe_lock_req.data.lv_dev);
+               pe_lock_req.lock = LOCK_PE;
+               break;
+
+            case UNLOCK_PE:
+               pe_lock_req.lock = UNLOCK_PE;
+               pe_lock_req.data.lv_dev = \
+               pe_lock_req.data.pv_dev = \
+               pe_lock_req.data.pv_offset = 0;
+               wake_up ( &lvm_map_wait);
+               break;
+
+            default:
+               return -EINVAL;
+         }
+
+         return 0;
+
+
+      /* remap a logical extent (after moving the physical extent) */
+      case LE_REMAP:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &le_remap_req, arg,
+                               sizeof ( le_remap_req_t)) != 0)
+            return -EFAULT;
+
+         for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+            if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+                 lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name,
+                              le_remap_req.lv_name) == 0) {
+               for ( le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le;
+                     le++) {
+                  if ( vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev ==
+                       le_remap_req.old_dev &&
+                       vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].pe ==
+                       le_remap_req.old_pe) {
+                     vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev =
+                        le_remap_req.new_dev;
+                     vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].pe =
+                        le_remap_req.new_pe;
+                     return 0;
+                  }
+               }
+               return -EINVAL;
+            }
+         }
+
+         return -ENXIO;
+
+
+      /* create a VGDA */
+      case VG_CREATE:
+         return do_vg_create ( minor, arg);
+
+
+      /* remove an inactive VGDA */
+      case VG_REMOVE:
+         return do_vg_remove ( minor);
+
+
+      /* extend a volume group */
+      case VG_EXTEND:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( vg[VG_CHR(minor)]->pv_cur < vg[VG_CHR(minor)]->pv_max) {
+            for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+               if ( vg[VG_CHR(minor)]->pv[p] == NULL) {
+                  if ( ( vg[VG_CHR(minor)]->pv[p] =
+                         kmalloc ( sizeof ( pv_t), GFP_USER)) == NULL) {
+                     printk ( KERN_CRIT
+                              "%s -- VG_EXTEND: kmalloc error PV at line %d\n",
+                              lvm_name, __LINE__);
+                     return -ENOMEM;
+                  }
+                  if ( copy_from_user ( vg[VG_CHR(minor)]->pv[p], arg,
+                                        sizeof ( pv_t)) != 0)
+                     return -EFAULT;
+
+                  vg[VG_CHR(minor)]->pv[p]->pv_status = PV_ACTIVE;
+                  /* We don't need the PE list
+                     in kernel space like LVs pe_t list */
+                  vg[VG_CHR(minor)]->pv[p]->pe = NULL;
+                  vg[VG_CHR(minor)]->pv_cur++;
+                  vg[VG_CHR(minor)]->pv_act++;
+                  vg[VG_CHR(minor)]->pe_total +=
+                     vg[VG_CHR(minor)]->pv[p]->pe_total;
+#ifdef LVM_GET_INODE
+                  /* insert a dummy inode for fs_may_mount */
+                  vg[VG_CHR(minor)]->pv[p]->inode =
+                     lvm_get_inode ( vg[VG_CHR(minor)]->pv[p]->pv_dev);
+#endif
+                  return 0;
+               }
+            }
+         }
+         return -EPERM;
+
+
+      /* reduce a volume group */
+      case VG_REDUCE:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( pv_name, arg, sizeof ( pv_name)) != 0)
+            return -EFAULT;
+
+         for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+            if ( vg[VG_CHR(minor)]->pv[p] != NULL && 
+                 lvm_strcmp ( vg[VG_CHR(minor)]->pv[p]->pv_name,
+                              pv_name) == 0) {
+               if ( vg[VG_CHR(minor)]->pv[p]->lv_cur > 0) return -EPERM;
+               vg[VG_CHR(minor)]->pe_total -=
+                  vg[VG_CHR(minor)]->pv[p]->pe_total;
+               vg[VG_CHR(minor)]->pv_cur--;
+               vg[VG_CHR(minor)]->pv_act--;
+#ifdef DEBUG_VFREE
+               printk ( KERN_DEBUG
+                        "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+#ifdef LVM_GET_INODE
+               lvm_clear_inode ( vg[VG_CHR(minor)]->pv[p]->inode);
+#endif
+               kfree ( vg[VG_CHR(minor)]->pv[p]);
+               /* Make PV pointer array contiguous */
+               for ( ; p < vg[VG_CHR(minor)]->pv_max-1; p++)
+                  vg[VG_CHR(minor)]->pv[p] = vg[VG_CHR(minor)]->pv[p + 1];
+               vg[VG_CHR(minor)]->pv[p + 1] = NULL;
+               return 0;
+            }
+         }
+         return -ENXIO;
+
+
+      /* set/clear extendability flag of volume group */
+      case VG_SET_EXTENDABLE:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &extendable, arg, sizeof ( extendable)) != 0)
+            return -EFAULT;
+
+         if ( extendable == VG_EXTENDABLE ||
+              extendable == ~VG_EXTENDABLE) {
+            if ( extendable == VG_EXTENDABLE)
+               vg[VG_CHR(minor)]->vg_status |= VG_EXTENDABLE;
+            else
+               vg[VG_CHR(minor)]->vg_status &= ~VG_EXTENDABLE;
+         } else return -EINVAL;
+         return 0;
+
+
+      /* get volume group data (only the vg_t struct) */
+      case VG_STATUS:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_to_user ( arg, vg[VG_CHR(minor)], sizeof ( vg_t)) != 0)
+            return -EFAULT;
+
+         return 0;
+
+
+      /* get volume group count */
+      case VG_STATUS_GET_COUNT:
+         if ( copy_to_user ( arg, &vg_count, sizeof ( vg_count)) != 0)
+            return -EFAULT;
+
+         return 0;
+
+
+      /* get volume group count */
+      case VG_STATUS_GET_NAMELIST:
+         for ( l = v = 0; v < ABS_MAX_VG; v++) {
+            if ( vg[v] != NULL) {
+               if ( copy_to_user ( arg + l++ * NAME_LEN,
+                                   vg[v]->vg_name,
+                                   NAME_LEN) != 0)
+                  return -EFAULT;
+            }
+         }
+         return 0;
+
+
+      /* create, remove, extend or reduce a logical volume */
+      case LV_CREATE:
+      case LV_REMOVE:
+      case LV_EXTEND:
+      case LV_REDUCE:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &lv_req, arg, sizeof ( lv_req)) != 0)
+            return -EFAULT;
+
+         if ( command != LV_REMOVE) {
+            if ( copy_from_user ( &lv, lv_req.lv, sizeof ( lv_t)) != 0)
+               return -EFAULT;
+         }
+
+         switch ( command) {
+            case LV_CREATE:
+               return do_lv_create ( minor, lv_req.lv_name, &lv);
+
+            case LV_REMOVE:
+               return do_lv_remove ( minor, lv_req.lv_name, -1);
+
+            case LV_EXTEND:
+            case LV_REDUCE:
+               return do_lv_extend_reduce ( minor, lv_req.lv_name, &lv);
+         }
+
+
+      /* get status of a logical volume by name */
+      case LV_STATUS_BYNAME:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &lv_status_byname_req, arg,
+                               sizeof ( lv_status_byname_req_t)) != 0)
+            return -EFAULT;
+
+         if ( lv_status_byname_req.lv == NULL) return -EINVAL;
+         if ( copy_from_user ( &lv, lv_status_byname_req.lv,
+                               sizeof ( lv_t)) != 0)
+            return -EFAULT;
+
+         for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+            if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+                 lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name,
+                              lv_status_byname_req.lv_name) == 0) {
+               if ( copy_to_user ( lv_status_byname_req.lv,
+                                   vg[VG_CHR(minor)]->lv[l],
+                                   sizeof ( lv_t)) != 0)
+                  return -EFAULT;
+
+               if ( lv.lv_current_pe != NULL) {
+                  size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le *
+                         sizeof ( pe_t);
+                  if ( copy_to_user ( lv.lv_current_pe,
+                                      vg[VG_CHR(minor)]->lv[l]->lv_current_pe,
+                                      size) != 0)
+                     return -EFAULT;
+               }
+               return 0;
+            }
+         }
+         return -ENXIO;
+
+
+      /* get status of a logical volume by index */
+      case LV_STATUS_BYINDEX:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &lv_status_byindex_req, arg,
+                               sizeof ( lv_status_byindex_req)) != 0)
+            return -EFAULT;
+
+         if ( ( lvp = lv_status_byindex_req.lv) == NULL) return -EINVAL;
+         l = lv_status_byindex_req.lv_index;
+         if ( vg[VG_CHR(minor)]->lv[l] == NULL) return -ENXIO;
+
+         if ( copy_from_user ( &lv, lvp, sizeof ( lv_t)) != 0)
+            return -EFAULT;
+
+         if ( copy_to_user ( lvp, vg[VG_CHR(minor)]->lv[l],
+                             sizeof ( lv_t)) != 0)
+            return -EFAULT;
+
+         if ( lv.lv_current_pe != NULL) {
+            size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le * sizeof ( pe_t);
+            if ( copy_to_user ( lv.lv_current_pe,
+                                vg[VG_CHR(minor)]->lv[l]->lv_current_pe,
+                                size) != 0)
+               return -EFAULT;
+         }
+         return 0;
+
+
+      /* change a physical volume */
+      case PV_CHANGE:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &pv_change_req, arg,
+                               sizeof ( pv_change_req)) != 0)
+            return -EFAULT;
+
+         for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+            if ( vg[VG_CHR(minor)]->pv[p] != NULL &&
+                 lvm_strcmp ( vg[VG_CHR(minor)]->pv[p]->pv_name,
+                              pv_change_req.pv_name) == 0) {
+#ifdef LVM_GET_INODE
+               inode_sav = vg[VG_CHR(minor)]->pv[p]->inode;
+#endif
+               if ( copy_from_user ( vg[VG_CHR(minor)]->pv[p],
+                                     pv_change_req.pv,
+                                     sizeof ( pv_t)) != 0)
+                  return -EFAULT;
+
+               /* We don't need the PE list
+                  in kernel space as with LVs pe_t list */
+               vg[VG_CHR(minor)]->pv[p]->pe = NULL;
+#ifdef LVM_GET_INODE
+               vg[VG_CHR(minor)]->pv[p]->inode = inode_sav;
+#endif
+               return 0;
+            }
+         }
+         return -ENXIO;
+
+
+      /* get physical volume data (pv_t structure only) */
+      case PV_STATUS:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &pv_status_req, arg,
+                               sizeof ( pv_status_req)) != 0)
+            return -EFAULT;
+
+         for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+            if ( vg[VG_CHR(minor)]->pv[p] != NULL) {
+               if ( lvm_strcmp ( vg[VG_CHR(minor)]->pv[p]->pv_name,
+                                 pv_status_req.pv_name) == 0) {
+                  if ( copy_to_user ( pv_status_req.pv,
+                                      vg[VG_CHR(minor)]->pv[p],
+                                      sizeof ( pv_t)) != 0)
+                     return -EFAULT;
+                  return 0;
+               }
+            }
+         }
+         return -ENXIO;
+
+
+      /* physical volume buffer flush/invalidate */
+      case PV_FLUSH:
+         if ( copy_from_user ( &pv_flush_req, arg, sizeof ( pv_flush_req)) != 0)
+            return -EFAULT;
+
+         for ( v = 0; v < ABS_MAX_VG; v++) {
+            if ( vg[v] == NULL) continue;
+            for ( p = 0; p < vg[v]->pv_max; p++) {
+               if ( vg[v]->pv[p] != NULL &&
+                    lvm_strcmp ( vg[v]->pv[p]->pv_name,
+                                 pv_flush_req.pv_name) == 0) {
+                  fsync_dev ( vg[v]->pv[p]->pv_dev);
+                  invalidate_buffers ( vg[v]->pv[p]->pv_dev);
+                  return 0;
+               }
+            }
+         }
+         return 0;
+
+
+      default:
+         printk ( KERN_WARNING
+                  "%s -- lvm_chr_ioctl: unknown command %x\n",
+                  lvm_name, command);
+         return -EINVAL;
+   }
+
+   return 0;
+} /* lvm_chr_ioctl */
+
+
+/*
+ * character device close routine
+ */
+static int lvm_chr_release ( struct inode *inode, struct file *file)
+{
+#ifdef DEBUG
+   int minor = MINOR ( inode->i_rdev);
+   printk ( KERN_DEBUG
+            "%s -- lvm_chr_release   VG#: %d\n", lvm_name, VG_CHR(minor));
+#endif
+
+#ifdef MODULE
+   if ( GET_USE_COUNT ( &__this_module) > 0) MOD_DEC_USE_COUNT;
+#endif
+
+#ifdef LVM_TOTAL_RESET
+   if ( lvm_reset_spindown > 0) {
+      lvm_reset_spindown = 0;
+      lvm_chr_open_count = 1;
+   }
+#endif
+
+   if ( lvm_chr_open_count > 0) lvm_chr_open_count--;
+   if ( lock == current->pid) {
+      lock = 0; /* release lock */
+      wake_up_interruptible ( &lvm_wait);
+   }
+
+   return 0;
+} /* lvm_chr_release () */
+
+
+
+/********************************************************************
+ *
+ * Block device functions
+ *
+ ********************************************************************/
+
+/*
+ * block device open routine
+ */
+static int lvm_blk_open ( struct inode *inode, struct file *file) {
+   int minor = MINOR ( inode->i_rdev);
+
+#ifdef DEBUG_LVM_BLK_OPEN
+   printk ( KERN_DEBUG
+            "%s -- lvm_blk_open MINOR: %d  VG#: %d  LV#: %d  mode: 0x%X\n",
+            lvm_name, minor, VG_BLK(minor), LV_BLK(minor), file->f_mode);
+#endif
+
+#ifdef LVM_TOTAL_RESET
+   if ( lvm_reset_spindown > 0) return -EPERM;
+#endif
+
+   if ( vg[VG_BLK(minor)] != NULL &&
+        ( vg[VG_BLK(minor)]->vg_status & VG_ACTIVE) &&
+        vg[VG_BLK(minor)]->lv[LV_BLK(minor)] != NULL &&
+        LV_BLK(minor) >= 0 &&
+        LV_BLK(minor) < vg[VG_BLK(minor)]->lv_max) {
+
+      /* Check parallel LV spindown (LV remove) */
+      if ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_status & LV_SPINDOWN)
+         return -EPERM;
+
+      /* Check inactive LV and open for read/write */
+      if ( file->f_mode & O_RDWR) {
+         if ( ! ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_status & LV_ACTIVE))
+            return -EPERM;
+         if ( ! ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_access & LV_WRITE))
+            return -EACCES;
+      }
+
+      if ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open == 0)
+         vg[VG_BLK(minor)]->lv_open++;
+      vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open++;
+
+#ifdef MODULE
+      MOD_INC_USE_COUNT;
+#endif
+
+#ifdef DEBUG_LVM_BLK_OPEN
+      printk ( KERN_DEBUG
+               "%s -- lvm_blk_open MINOR: %d  VG#: %d  LV#: %d  size: %d\n",
+               lvm_name, minor, VG_BLK(minor), LV_BLK(minor),
+               vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_size);
+#endif
+
+      return 0;
+   }
+
+   return -ENXIO;
+} /* lvm_blk_open () */
+
+
+/*
+ * block device i/o-control routine
+ */
+static int lvm_blk_ioctl (struct inode *inode, struct file *file,
+                          uint command, ulong a) {
+   int minor = MINOR ( inode->i_rdev);
+   void *arg = ( void*) a;
+   struct hd_geometry *hd = ( struct hd_geometry *) a;
+
+#ifdef DEBUG_IOCTL
+   printk ( KERN_DEBUG
+            "%s -- lvm_blk_ioctl MINOR: %d  command: 0x%X  arg: %X  "
+            "VG#: %dl  LV#: %d\n",
+            lvm_name, minor, command, ( ulong) arg,
+            VG_BLK(minor), LV_BLK(minor));
+#endif
+
+   switch ( command) {
+      /* return device size */
+      case BLKGETSIZE:
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- BLKGETSIZE: %u\n",
+                  lvm_name, vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_size);
+#endif
+         copy_to_user ( ( long*) arg, &vg[VG_BLK(minor)]->\
+                                      lv[LV_BLK(minor)]->lv_size,
+                        sizeof ( vg[VG_BLK(minor)]->\
+                                 lv[LV_BLK(minor)]->lv_size));
+         break;
+
+
+      /* flush buffer cache */
+      case BLKFLSBUF:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- BLKFLSBUF\n", lvm_name);
+#endif
+         fsync_dev ( inode->i_rdev);
+	 invalidate_buffers(inode->i_rdev);
+         break;
+
+
+      /* set read ahead for block device */
+      case BLKRASET:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- BLKRASET: %d sectors for %02X:%02X\n",
+                  lvm_name, ( long) arg, MAJOR( inode->i_rdev), minor);
+#endif
+         if ( ( long) arg < LVM_MIN_READ_AHEAD ||
+              ( long) arg > LVM_MAX_READ_AHEAD) return -EINVAL;
+         read_ahead[MAJOR_NR] =
+	 vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_read_ahead = ( long) arg;
+         break;
+
+
+      /* get current read ahead setting */
+      case BLKRAGET:
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- BLKRAGET\n", lvm_name);
+#endif
+         copy_to_user ( ( long*) arg,
+                        &vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_read_ahead,
+                        sizeof ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->\
+                                 lv_read_ahead));
+         break;
+
+
+      /* get disk geometry */
+      case HDIO_GETGEO:
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- HDIO_GETGEO\n", lvm_name);
+#endif
+         if ( hd == NULL) return -EINVAL;
+         {
+            unsigned char heads = 64;
+            unsigned char sectors = 32;
+            long start = 0;
+            short cylinders = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_size /
+                              heads / sectors;
+
+            if ( copy_to_user ( ( char*) &hd->heads, &heads,
+                                sizeof ( heads)) != 0 ||
+                 copy_to_user ( ( char*) &hd->sectors, &sectors,
+                                sizeof ( sectors)) != 0 ||
+                 copy_to_user ( ( short*) &hd->cylinders,
+                                &cylinders, sizeof ( cylinders)) != 0 ||
+                 copy_to_user ( ( long*) &hd->start, &start,
+                                sizeof ( start)) != 0)
+               return -EFAULT;
+         }
+
+#ifdef DEBUG_IOCTL
+            printk ( KERN_DEBUG
+                     "%s -- lvm_blk_ioctl -- cylinders: %d\n",
+                     lvm_name, vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->\
+                               lv_size / heads / sectors);
+#endif
+         break;
+
+
+      /* set access flags of a logical volume */
+      case LV_SET_ACCESS:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+         vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_access = ( ulong) arg;
+         break;
+
+
+      /* set status flags of a logical volume */
+      case LV_SET_STATUS:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+         if ( ! ( ( ulong) arg & LV_ACTIVE) &&
+              vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open > 1) return -EPERM;
+         vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_status = ( ulong) arg;
+         break;
+
+
+      /* set allocation flags of a logical volume */
+      case LV_SET_ALLOCATION:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+         vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_allocation = ( ulong) arg;
+         break;
+
+
+      default:
+         printk ( KERN_WARNING
+                  "%s -- lvm_blk_ioctl: unknown command %d\n",
+                  lvm_name, command);
+         return -EINVAL;
+   }
+
+   return 0;
+} /* lvm_blk_ioctl () */
+
+
+/*
+ * block device close routine
+ */
+static int lvm_blk_release ( struct inode *inode, struct file *file)
+{
+   int minor = MINOR ( inode->i_rdev);
+
+#ifdef DEBUG
+   printk ( KERN_DEBUG
+            "%s -- lvm_blk_release MINOR: %d  VG#: %d  LV#: %d\n",
+            lvm_name, minor, VG_BLK(minor), LV_BLK(minor));
+#endif
+
+   sync_dev ( inode->i_rdev);
+   if ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open == 1)
+      vg[VG_BLK(minor)]->lv_open--;
+   vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open--;
+
+#ifdef MODULE
+   MOD_DEC_USE_COUNT;
+#endif
+
+   return 0;
+} /* lvm_blk_release () */
+
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+/*
+ * Support function /proc-Filesystem
+ */
+#define  LVM_PROC_BUF   ( i == 0 ? dummy_buf : &buf[sz])
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 25)
+static int lvm_proc_get_info ( char *page, char **start, off_t pos, int count)
+#else
+static int lvm_proc_get_info ( char *page, char **start, off_t pos,
+                               int count, int whence)
+#endif
+{
+   int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter,
+       lv_open_total, pe_t_bytes, lv_block_exception_t_bytes, seconds;
+   static off_t sz;
+   off_t sz_last;
+   char allocation_flag, inactive_flag, rw_flag, stripes_flag;
+   char *lv_name = NULL;
+   static char *buf = NULL;
+   static char dummy_buf[160]; /* sized for 2 lines */
+
+#ifdef DEBUG_LVM_PROC_GET_INFO
+   printk ( KERN_DEBUG
+            "%s - lvm_proc_get_info CALLED  pos: %lu  count: %d  whence: %d\n",
+            lvm_name, pos, count, whence);
+#endif
+
+   if ( pos == 0 || buf == NULL) {
+      sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \
+      lv_open_total = pe_t_bytes = lv_block_exception_t_bytes = 0;
+   
+      /* search for activity */
+      for ( v = 0; v < ABS_MAX_VG; v++) {
+         if ( vg[v] != NULL) {
+            vg_counter++;
+            pv_counter += vg[v]->pv_cur;
+            lv_counter += vg[v]->lv_cur;
+            if ( vg[v]->lv_cur > 0) {
+               for ( l = 0; l < vg[v]->lv_max; l++) {
+                  if ( vg[v]->lv[l] != NULL) {
+                     pe_t_bytes += vg[v]->lv[l]->lv_allocated_le;
+                     if ( vg[v]->lv[l]->lv_block_exception != NULL) {
+                        lv_block_exception_t_bytes +=
+                           vg[v]->lv[l]->lv_remap_end;
+                     }
+                     if ( vg[v]->lv[l]->lv_open > 0) {
+                        lv_open_counter++;
+                        lv_open_total += vg[v]->lv[l]->lv_open;
+                     }
+                  }
+               }
+            }
+         }
+      }
+      pe_t_bytes *= sizeof ( pe_t);
+      lv_block_exception_t_bytes *= sizeof ( lv_block_exception_t);
+   
+      if ( buf != NULL) {
+#ifdef DEBUG_VFREE
+         printk ( KERN_DEBUG
+                  "%s -- vfree %d\n", lvm_name, __LINE__);
+#endif
+         vfree ( buf);
+         buf = NULL;
+      }
+
+      /* 2 times: first to get size to allocate buffer,
+         2nd to fill the vmalloced buffer */
+      for ( i = 0; i < 2; i++) {
+         sz = 0;
+         sz += sprintf ( LVM_PROC_BUF,
+                         "LVM "
+#ifdef MODULE
+                         "module"
+#else
+                         "driver"
+#endif
+                         " %s\n\n"
+                         "Total:  %d VG%s  %d PV%s  %d LV%s ",
+                         lvm_short_version,
+                         vg_counter, vg_counter == 1 ? "" : "s",
+                         pv_counter, pv_counter == 1 ? "" : "s",
+                         lv_counter, lv_counter == 1 ? "" : "s");
+         sz += sprintf ( LVM_PROC_BUF,
+                         "(%d LV%s open",
+                         lv_open_counter,
+                         lv_open_counter == 1 ? "" : "s");
+         if ( lv_open_total > 0) sz += sprintf ( LVM_PROC_BUF,
+                                                 " %d times)\n",
+                                                 lv_open_total);
+         else                    sz += sprintf ( LVM_PROC_BUF, ")");
+         sz += sprintf ( LVM_PROC_BUF,
+                         "\nGlobal: %lu bytes vmalloced   IOP version: %d   ",
+                         vg_counter * sizeof ( vg_t) +
+                         pv_counter * sizeof ( pv_t) +
+                         lv_counter * sizeof ( lv_t) +
+                         pe_t_bytes + lv_block_exception_t_bytes + sz_last,
+                         lvm_iop_version);
+
+         seconds = CURRENT_TIME - loadtime;
+         if ( seconds < 0) loadtime = CURRENT_TIME + seconds;
+         if ( seconds / 86400 > 0) {
+            sz += sprintf ( LVM_PROC_BUF, "%d day%s ",
+                                          seconds / 86400,
+                                          seconds / 86400 == 0 ||
+                                          seconds / 86400 > 1 ? "s": "");
+         }
+         sz += sprintf ( LVM_PROC_BUF, "%d:%02d:%02d active\n",
+                                       ( seconds % 86400) / 3600,
+                                       ( seconds % 3600) / 60,
+                                       seconds % 60);
+
+         if ( vg_counter > 0) {
+            for ( v = 0; v < ABS_MAX_VG; v++) {
+               /* volume group */
+               if ( vg[v] != NULL) {
+                  inactive_flag = ' ';
+                  if ( ! ( vg[v]->vg_status & VG_ACTIVE))
+                     inactive_flag = 'I';
+                  sz += sprintf ( LVM_PROC_BUF,
+                                  "\nVG: %c%s  [%d PV, %d LV/%d open] "
+                                  " PE Size: %d KB\n"
+                                  "  Usage [KB/PE]: %d /%d total  "
+                                  "%d /%d used  %d /%d free",
+                                  inactive_flag,
+                                  vg[v]->vg_name,
+                                  vg[v]->pv_cur,
+                                  vg[v]->lv_cur,
+                                  vg[v]->lv_open,
+                                  vg[v]->pe_size >> 1,
+                                  vg[v]->pe_size * vg[v]->pe_total >> 1,
+                                  vg[v]->pe_total,
+                                  vg[v]->pe_allocated * vg[v]->pe_size >> 1,
+                                  vg[v]->pe_allocated,
+                                  ( vg[v]->pe_total - vg[v]->pe_allocated) *
+                                  vg[v]->pe_size >> 1,
+                                  vg[v]->pe_total - vg[v]->pe_allocated);
+
+                  /* physical volumes */
+                  sz += sprintf ( LVM_PROC_BUF,
+                                  "\n  PV%s ",
+                                  vg[v]->pv_cur == 1 ? ": " : "s:");
+                  c = 0;
+                  for ( p = 0; p < vg[v]->pv_max; p++) {
+                     if ( vg[v]->pv[p] != NULL) {
+                        inactive_flag = 'A';
+                        if ( ! ( vg[v]->pv[p]->pv_status & PV_ACTIVE))
+                           inactive_flag = 'I';
+                        allocation_flag = 'A';
+                        if ( ! ( vg[v]->pv[p]->pv_allocatable & PV_ALLOCATABLE))
+                           allocation_flag = 'N';
+                        sz += sprintf ( LVM_PROC_BUF,
+                                        "[%c%c] %-21s %8d /%-6d  "
+                                        "%8d /%-6d  %8d /%-6d",
+                                        inactive_flag,
+                                        allocation_flag,
+                                        vg[v]->pv[p]->pv_name,
+                                        vg[v]->pv[p]->pe_total *
+                                        vg[v]->pv[p]->pe_size >> 1,
+                                        vg[v]->pv[p]->pe_total,
+                                        vg[v]->pv[p]->pe_allocated *
+                                        vg[v]->pv[p]->pe_size >> 1,
+                                        vg[v]->pv[p]->pe_allocated,
+                                        ( vg[v]->pv[p]->pe_total -
+                                          vg[v]->pv[p]->pe_allocated) *
+                                        vg[v]->pv[p]->pe_size >> 1,
+                                        vg[v]->pv[p]->pe_total -
+                                        vg[v]->pv[p]->pe_allocated);
+                        c++;
+                        if ( c < vg[v]->pv_cur) sz += sprintf ( LVM_PROC_BUF,
+                                                                "\n       ");
+                     }
+                  }
+
+                  /* logical volumes */
+                  sz += sprintf ( LVM_PROC_BUF,
+                                  "\n    LV%s ",
+                                  vg[v]->lv_cur == 1 ? ": " : "s:");
+                  c = 0;
+                  for ( l = 0; l < vg[v]->lv_max; l++) {
+                     if ( vg[v]->lv[l] != NULL) {
+                        inactive_flag = 'A';
+                        if ( ! ( vg[v]->lv[l]->lv_status & LV_ACTIVE))
+                           inactive_flag = 'I';
+                        rw_flag = 'R';
+                        if ( vg[v]->lv[l]->lv_access & LV_WRITE) rw_flag = 'W';
+                        allocation_flag = 'D';
+                        if ( vg[v]->lv[l]->lv_allocation & LV_CONTIGUOUS)
+                           allocation_flag = 'C';
+                        stripes_flag = 'L';
+                        if ( vg[v]->lv[l]->lv_stripes > 1) stripes_flag = 'S';
+                        sz += sprintf ( LVM_PROC_BUF,
+                                        "[%c%c%c%c",
+                                        inactive_flag,
+                                        rw_flag,
+                                        allocation_flag,
+                                        stripes_flag);
+                        if ( vg[v]->lv[l]->lv_stripes > 1)
+                           sz += sprintf ( LVM_PROC_BUF, "%-2d",
+                                           vg[v]->lv[l]->lv_stripes);
+                        else
+                           sz += sprintf ( LVM_PROC_BUF, "  ");
+                        lv_name = lvm_strrchr ( vg[v]->lv[l]->lv_name, '/');
+                        if ( lv_name != NULL) lv_name++;
+                        else lv_name = vg[v]->lv[l]->lv_name;
+                        sz += sprintf ( LVM_PROC_BUF, "] %-25s", lv_name);
+                        if ( lvm_strlen ( lv_name) > 25)
+                           sz += sprintf ( LVM_PROC_BUF,
+                                           "\n                              ");
+                        sz += sprintf ( LVM_PROC_BUF, "%9d /%-6d   ",
+                                        vg[v]->lv[l]->lv_size >> 1,
+                                        vg[v]->lv[l]->lv_size / vg[v]->pe_size);
+
+                        if ( vg[v]->lv[l]->lv_open == 0)
+                           sz += sprintf ( LVM_PROC_BUF, "close");
+                        else
+                           sz += sprintf ( LVM_PROC_BUF, "%dx open",
+                                           vg[v]->lv[l]->lv_open);
+                        c++;
+                        if ( c < vg[v]->lv_cur) sz += sprintf ( LVM_PROC_BUF,
+                                                                "\n         ");
+                     }
+                  }
+                  if ( vg[v]->lv_cur == 0)
+                     sz += sprintf ( LVM_PROC_BUF, "none");
+                  sz += sprintf ( LVM_PROC_BUF, "\n");
+               }
+            }
+         }
+
+         if ( buf == NULL) {
+            if ( ( buf = vmalloc ( sz)) == NULL) {
+               sz = 0;
+               return sprintf ( page, "%s - vmalloc error at line %d\n",
+                                      lvm_name, __LINE__);
+            }
+         }
+         sz_last = sz;
+      }
+   }
+
+   if ( pos > sz - 1) {
+      vfree ( buf);
+      buf = NULL;
+      return 0;
+   }
+
+   *start = &buf[pos];
+   if ( sz - pos < count) return sz - pos;
+   else                   return count;
+} /* lvm_proc_get_info () */
+#endif /* #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS */
+
+
+/*
+ * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
+ * (see init_module/lvm_init)
+ */
+static int lvm_map ( struct buffer_head *bh, int rw) {
+   int minor = MINOR ( bh->b_dev);
+   int ret = 0;
+   ulong index;
+   ulong size = bh->b_size >> 9;
+   ulong rsector_tmp = bh->b_blocknr * size;
+   ulong rsector_sav;
+   kdev_t rdev_tmp = bh->b_dev;
+   kdev_t rdev_sav;
+   lv_t *lv = vg[VG_BLK(minor)]->lv[LV_BLK(minor)];
+   unsigned long pe_start;
+   
+
+   if ( ! ( lv->lv_status & LV_ACTIVE)) {
+      printk ( KERN_ALERT
+               "%s - lvm_map: ll_rw_blk for inactive LV %s\n",
+               lvm_name, lv->lv_name);
+      return -1;
+   }
+
+/*
+if ( lv->lv_access & LV_SNAPSHOT)
+printk ( "%s -- %02d:%02d  block: %lu  rw: %d\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), bh->b_blocknr, rw);
+*/
+
+   /* take care of snapshot chunk writes before
+      check for writable logical volume */
+   if ( ( lv->lv_access & LV_SNAPSHOT) &&
+        MAJOR ( bh->b_dev) != 0 &&
+        MAJOR ( bh->b_dev) != MAJOR_NR &&
+#ifdef WRITEA
+        ( rw == WRITEA || rw == WRITE))
+#else
+        rw == WRITE)
+#endif
+   {
+/*
+printk ( "%s -- doing snapshot write for %02d:%02d[%02d:%02d]  b_blocknr: %lu  b_rsector: %lu\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), MAJOR ( bh->b_dev), MINOR ( bh->b_dev), bh->b_blocknr, bh->b_rsector);
+*/
+      return 0;
+   }
+
+#ifdef WRITEA
+   if ( ( rw == WRITE || rw == WRITEA) &&
+#else
+   if ( rw == WRITE &&
+#endif
+        ! ( lv->lv_access & LV_WRITE)) {
+      printk ( KERN_CRIT
+               "%s - lvm_map: ll_rw_blk write for readonly LV %s\n",
+               lvm_name, lv->lv_name);
+      return -1;
+   }
+
+
+#ifdef DEBUG_MAP
+   printk ( KERN_DEBUG
+            "%s - lvm_map minor:%d  *rdev: %02d:%02d  *rsector: %lu  "
+            "size:%lu\n",
+            lvm_name, minor,
+            MAJOR ( rdev_tmp),
+            MINOR ( rdev_tmp),
+            rsector_tmp, size);
+#endif
+
+   if ( rsector_tmp + size > lv->lv_size) {
+      printk ( KERN_ALERT
+               "%s - lvm_map *rsector: %lu or size: %lu wrong for"
+               " minor: %2d\n", lvm_name, rsector_tmp, size, minor);
+      return -1;
+   }
+
+   rsector_sav = rsector_tmp;
+   rdev_sav    = rdev_tmp;
+
+lvm_second_remap:
+   /* linear mapping */
+   if ( lv->lv_stripes < 2) {
+      index = rsector_tmp / vg[VG_BLK(minor)]->pe_size; /* get the index */
+      pe_start = lv->lv_current_pe[index].pe;
+      rsector_tmp = lv->lv_current_pe[index].pe +
+                    ( rsector_tmp % vg[VG_BLK(minor)]->pe_size);
+      rdev_tmp    = lv->lv_current_pe[index].dev;
+
+#ifdef DEBUG_MAP
+      printk ( KERN_DEBUG
+               "lv_current_pe[%ld].pe: %d  rdev: %02d:%02d  rsector:%ld\n",
+               index,
+               lv->lv_current_pe[index].pe,
+               MAJOR ( rdev_tmp),
+               MINOR ( rdev_tmp),
+               rsector_tmp);
+#endif
+
+   /* striped mapping */
+   } else {
+      ulong stripe_index;
+      ulong stripe_length;
+
+      stripe_length = vg[VG_BLK(minor)]->pe_size * lv->lv_stripes;
+      stripe_index = ( rsector_tmp % stripe_length) / lv->lv_stripesize;
+      index = rsector_tmp / stripe_length +
+              ( stripe_index % lv->lv_stripes) *
+              ( lv->lv_allocated_le / lv->lv_stripes);
+      pe_start = lv->lv_current_pe[index].pe;
+      rsector_tmp = lv->lv_current_pe[index].pe +
+                    ( rsector_tmp % stripe_length) -
+                    ( stripe_index % lv->lv_stripes) * lv->lv_stripesize -
+                    stripe_index / lv->lv_stripes *
+                    ( lv->lv_stripes - 1) * lv->lv_stripesize;
+      rdev_tmp = lv->lv_current_pe[index].dev;
+
+#ifdef DEBUG_MAP
+      printk(KERN_DEBUG
+	     "lv_current_pe[%ld].pe: %d  rdev: %02d:%02d  rsector:%ld\n"
+	     "stripe_length: %ld  stripe_index: %ld\n",
+	     index,
+	     lv->lv_current_pe[index].pe,
+	     MAJOR ( rdev_tmp),
+	     MINOR ( rdev_tmp),
+	     rsector_tmp,
+	     stripe_length,
+	     stripe_index);
+#endif
+   }
+
+   /* handle physical extents on the move */
+   if ( pe_lock_req.lock == LOCK_PE) {
+      if ( rdev_tmp == pe_lock_req.data.pv_dev &&
+           rsector_tmp >= pe_lock_req.data.pv_offset &&
+           rsector_tmp < ( pe_lock_req.data.pv_offset +
+                        vg[VG_BLK(minor)]->pe_size)) {
+         sleep_on ( &lvm_map_wait);
+         rsector_tmp = rsector_sav;
+         rdev_tmp    = rdev_sav;
+         goto lvm_second_remap;
+      }
+   }
+
+   /* statistic */
+#ifdef WRITEA
+   if ( rw == WRITE || rw == WRITEA)
+#else
+   if ( rw == WRITE)
+#endif
+      lv->lv_current_pe[index].writes++;
+   else
+      lv->lv_current_pe[index].reads++;
+
+   /* snapshot volume exception handling on physical device address base */
+   if ( lv->lv_access & ( LV_SNAPSHOT | LV_SNAPSHOT_ORG)) {
+      /* original logical volume */
+      if ( lv->lv_access & LV_SNAPSHOT_ORG) {
+#ifdef WRITEA
+         if ( rw == WRITE || rw == WRITEA)
+#else
+         if ( rw == WRITE)
+#endif
+         {
+            lv_t *lv_ptr;
+
+            /* start with first snapshot and loop thrugh all of them */
+            for ( lv_ptr = lv->lv_snapshot_next;
+                  lv_ptr != NULL;
+                  lv_ptr = lv_ptr->lv_snapshot_next) {
+	       down(&lv_ptr->lv_snapshot_sem);
+               /* do we still have exception storage for this snapshot free? */
+               if ( lv_ptr->lv_block_exception != NULL) {
+			kdev_t __dev;
+			unsigned long __sector;
+
+			__dev = rdev_tmp;
+			__sector = rsector_tmp;
+			if (!lvm_snapshot_remap_block(&rdev_tmp,
+						      &rsector_tmp,
+						      pe_start,
+						      lv_ptr))
+				/* create a new mapping */
+				ret = lvm_snapshot_COW(rdev_tmp,
+						       rsector_tmp,
+						       pe_start,
+						       rsector_sav,
+						       lv_ptr);
+			rdev_tmp    = __dev;
+			rsector_tmp = __sector;
+               }
+	       up(&lv_ptr->lv_snapshot_sem);
+            }
+         }
+      } else {
+         /* remap snapshot logical volume */
+	 down(&lv->lv_snapshot_sem);
+         if ( lv->lv_block_exception != NULL)
+            lvm_snapshot_remap_block ( &rdev_tmp, &rsector_tmp, pe_start, lv);
+	 up(&lv->lv_snapshot_sem);
+      }
+   }
+
+   bh->b_rdev    = rdev_tmp;
+   bh->b_rsector = rsector_tmp;
+
+   return ret;
+} /* lvm_map () */
+
+
+/*
+ * lvm_map snapshot logical volume support functions
+ */
+
+/*
+ * end lvm_map snapshot logical volume support functions
+ */
+
+
+/*
+ * internal support functions
+ */
+
+#ifdef LVM_HD_NAME
+/*
+ * generate "hard disk" name
+ */
+void lvm_hd_name ( char *buf, int minor) {
+   int len = 0;
+
+   if ( vg[VG_BLK(minor)] == NULL ||
+        vg[VG_BLK(minor)]->lv[LV_BLK(minor)] == NULL) return;
+   len = lvm_strlen ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_name) - 5;
+   lvm_memcpy ( buf, &vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_name[5], len);
+   buf[len] = 0;
+   return;
+}
+#endif
+
+
+/*
+ * this one never should be called...
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+static void lvm_dummy_device_request ( request_queue_t *t)
+#else
+static void lvm_dummy_device_request ( void)
+#endif
+{
+  printk ( KERN_EMERG
+           "%s -- oops, got lvm request for %02d:%02d [sector: %lu]\n",
+           lvm_name,
+           MAJOR ( CURRENT->rq_dev),
+           MINOR ( CURRENT->rq_dev),
+           CURRENT->sector);
+  return;
+}
+
+
+/*
+ * character device support function VGDA create
+ */
+int do_vg_create ( int minor, void *arg) {
+   int snaporg_minor = 0;
+   ulong  l, p;
+   lv_t lv;
+   vg_t *vg_ptr;
+
+   if ( vg[VG_CHR(minor)] != NULL) return -EPERM;
+
+   if ( ( vg_ptr = kmalloc ( sizeof ( vg_t), GFP_USER)) == NULL) {
+      printk ( KERN_CRIT
+               "%s -- VG_CREATE: kmalloc error VG at line %d\n",
+               lvm_name, __LINE__);
+      return -ENOMEM;
+   }
+
+   /* get the volume group structure */
+   if ( copy_from_user ( vg_ptr, arg, sizeof ( vg_t)) != 0) {
+      kfree ( vg_ptr);
+      return -EFAULT;
+   }
+
+   /* we are not that active so far... */
+   vg_ptr->vg_status &= ~VG_ACTIVE;
+   vg[VG_CHR(minor)] = vg_ptr;
+
+   vg[VG_CHR(minor)]->pe_allocated = 0;
+   if ( vg[VG_CHR(minor)]->pv_max > ABS_MAX_PV) {
+      printk ( KERN_WARNING
+               "%s -- Can't activate VG: ABS_MAX_PV too small\n",
+               lvm_name);
+      kfree ( vg[VG_CHR(minor)]);
+      vg[VG_CHR(minor)] = NULL;
+      return -EPERM;
+   }
+   if ( vg[VG_CHR(minor)]->lv_max > ABS_MAX_LV) {
+      printk ( KERN_WARNING
+               "%s -- Can't activate VG: ABS_MAX_LV too small for %u\n",
+               lvm_name, vg[VG_CHR(minor)]->lv_max);
+      kfree ( vg[VG_CHR(minor)]);
+      vg[VG_CHR(minor)] = NULL;
+      return -EPERM;
+   }
+
+   /* get the physical volume structures */
+   vg[VG_CHR(minor)]->pv_act = vg[VG_CHR(minor)]->pv_cur = 0; 
+   for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+      /* user space address */
+      if ( ( pvp = vg[VG_CHR(minor)]->pv[p]) != NULL) {
+         vg[VG_CHR(minor)]->pv[p] = kmalloc ( sizeof ( pv_t), GFP_USER);
+         if ( vg[VG_CHR(minor)]->pv[p] == NULL) {
+            printk ( KERN_CRIT
+                     "%s -- VG_CREATE: kmalloc error PV at line %d\n",
+                     lvm_name, __LINE__);
+            do_vg_remove ( minor);
+            return -ENOMEM;
+         }
+         if ( copy_from_user ( vg[VG_CHR(minor)]->pv[p], pvp,
+                               sizeof ( pv_t)) != 0) {
+            do_vg_remove ( minor);
+            return -EFAULT;
+         }
+
+         /* We don't need the PE list
+            in kernel space as with LVs pe_t list (see below) */
+         vg[VG_CHR(minor)]->pv[p]->pe = NULL;
+         vg[VG_CHR(minor)]->pv[p]->pe_allocated = 0;
+         vg[VG_CHR(minor)]->pv[p]->pv_status = PV_ACTIVE;
+         vg[VG_CHR(minor)]->pv_act++;
+         vg[VG_CHR(minor)]->pv_cur++;
+
+#ifdef LVM_GET_INODE
+         /* insert a dummy inode for fs_may_mount */
+         vg[VG_CHR(minor)]->pv[p]->inode =
+            lvm_get_inode ( vg[VG_CHR(minor)]->pv[p]->pv_dev);
+#endif
+      }
+   }
+
+   /* get the logical volume structures */
+   vg[VG_CHR(minor)]->lv_cur = 0;
+   for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+      /* user space address */
+      if ( ( lvp = vg[VG_CHR(minor)]->lv[l]) != NULL) {
+         if ( copy_from_user ( &lv, lvp, sizeof ( lv_t)) != 0) {
+            do_vg_remove ( minor);
+            return -EFAULT;
+         }
+         vg[VG_CHR(minor)]->lv[l] = NULL;
+	 {
+		 int err;
+
+		 err = do_lv_create(minor, lv.lv_name, &lv);
+		 if (err)
+		 {
+			 do_vg_remove(minor);
+			 return err;
+		 }
+         }
+      }
+   }
+
+   /* Second path to correct snapshot logical volumes which are not
+      in place during first path above */
+   for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+      if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+           vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) {
+         snaporg_minor = vg[VG_CHR(minor)]->lv[l]->lv_snapshot_minor;
+         if ( vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)] != NULL) {
+            /* get pointer to original logical volume */
+            lv_t *lv_ptr = vg[VG_CHR(minor)]->lv[l]->lv_snapshot_org =
+                           vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)];
+
+            /* set necessary fields of original logical volume */
+            lv_ptr->lv_access |= LV_SNAPSHOT_ORG;
+            lv_ptr->lv_snapshot_minor = 0;
+            lv_ptr->lv_snapshot_org = lv_ptr;
+            lv_ptr->lv_snapshot_prev = NULL;
+
+            /* find last snapshot logical volume in the chain */
+            while ( lv_ptr->lv_snapshot_next != NULL)
+               lv_ptr = lv_ptr->lv_snapshot_next;
+
+            /* set back pointer to this last one in our new logical volume */
+            vg[VG_CHR(minor)]->lv[l]->lv_snapshot_prev = lv_ptr;
+
+            /* last logical volume now points to our new snapshot volume */
+            lv_ptr->lv_snapshot_next = vg[VG_CHR(minor)]->lv[l];
+
+            /* now point to the new one */
+            lv_ptr = lv_ptr->lv_snapshot_next;
+
+            /* set necessary fields of new snapshot logical volume */
+            lv_ptr->lv_snapshot_next = NULL;
+            lv_ptr->lv_current_pe =
+               vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_current_pe;
+            lv_ptr->lv_allocated_le =
+               vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_allocated_le;
+            lv_ptr->lv_current_le =
+               vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_current_le;
+            lv_ptr->lv_size =
+               vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_size;
+         }
+      }
+   }
+
+   vg_count++;
+
+   /* let's go active */
+   vg[VG_CHR(minor)]->vg_status |= VG_ACTIVE;
+
+#ifdef MODULE
+   MOD_INC_USE_COUNT;
+#endif
+   return 0;
+} /* do_vg_create () */
+
+
+/*
+ * character device support function VGDA remove
+ */
+static int do_vg_remove ( int minor) {
+   int i;
+
+   if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+
+#ifdef LVM_TOTAL_RESET
+   if ( vg[VG_CHR(minor)]->lv_open > 0 && lvm_reset_spindown == 0)
+#else
+   if ( vg[VG_CHR(minor)]->lv_open > 0)
+#endif
+      return -EPERM;
+
+   /* let's go inactive */
+   vg[VG_CHR(minor)]->vg_status &= ~VG_ACTIVE;
+
+   /* free LVs */
+   /* first free snapshot logical volumes */
+   for ( i = 0; i < vg[VG_CHR(minor)]->lv_max; i++) {
+      if ( vg[VG_CHR(minor)]->lv[i] != NULL &&
+           vg[VG_CHR(minor)]->lv[i]->lv_access & LV_SNAPSHOT) {
+         do_lv_remove ( minor, NULL, i);
+         current->state = TASK_INTERRUPTIBLE;
+         schedule_timeout ( 1);
+      }
+   }
+   /* then free the rest */
+   for ( i = 0; i < vg[VG_CHR(minor)]->lv_max; i++) {
+      if ( vg[VG_CHR(minor)]->lv[i] != NULL) {
+         do_lv_remove ( minor, NULL, i);
+         current->state = TASK_INTERRUPTIBLE;
+         schedule_timeout ( 1);
+      }
+   }
+
+   /* free PVs */
+   for ( i = 0; i < vg[VG_CHR(minor)]->pv_max; i++) {
+      if ( vg[VG_CHR(minor)]->pv[i] != NULL) {
+#ifdef DEBUG_VFREE
+         printk ( KERN_DEBUG
+                  "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+#ifdef LVM_GET_INODE
+         lvm_clear_inode ( vg[VG_CHR(minor)]->pv[i]->inode);
+#endif
+         kfree ( vg[VG_CHR(minor)]->pv[i]);
+         vg[VG_CHR(minor)]->pv[i] = NULL;
+      }
+   }
+
+#ifdef DEBUG_VFREE
+   printk ( KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+   kfree ( vg[VG_CHR(minor)]);
+   vg[VG_CHR(minor)] = NULL;
+
+   vg_count--;
+
+#ifdef MODULE
+   MOD_DEC_USE_COUNT;
+#endif
+   return 0;
+} /* do_vg_remove () */
+
+
+/*
+ * character device support function logical volume create
+ */
+static int do_lv_create ( int minor, char *lv_name, lv_t *lv) {
+   int l, le, l_new, p, size;
+   ulong lv_status_save;
+   lv_block_exception_t *lvbe = lv->lv_block_exception;
+   lv_t *lv_ptr = NULL;
+
+   if ( ( pep = lv->lv_current_pe) == NULL) return -EINVAL;
+   if ( lv->lv_chunk_size > LVM_SNAPSHOT_MAX_CHUNK) return -EINVAL;
+
+   for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+      if ( vg[VG_CHR(minor)]->lv[l] != NULL && 
+           lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name, lv_name) == 0)
+         return -EEXIST;
+   }
+
+   /* in case of lv_remove(), lv_create() pair; for eg. lvrename does this */
+   l_new = -1;
+   if ( vg[VG_CHR(minor)]->lv[lv->lv_number] == NULL) l_new = lv->lv_number;
+   else {
+      for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+         if ( vg[VG_CHR(minor)]->lv[l] == NULL) if ( l_new == -1) l_new = l;
+      }
+   }
+   if ( l_new == -1) return -EPERM;
+   l = l_new;
+
+   if ( ( lv_ptr = kmalloc ( sizeof ( lv_t), GFP_USER)) == NULL) {;
+      printk ( KERN_CRIT "%s -- LV_CREATE: kmalloc error LV at line %d\n",
+                         lvm_name, __LINE__);
+      return -ENOMEM;
+   }
+
+   /* copy preloaded LV */
+   lvm_memcpy ( ( char*) lv_ptr, ( char *) lv, sizeof ( lv_t));
+
+   lv_status_save = lv_ptr->lv_status;
+   lv_ptr->lv_status &= ~LV_ACTIVE;
+   lv_ptr->lv_snapshot_org =  \
+   lv_ptr->lv_snapshot_prev = \
+   lv_ptr->lv_snapshot_next = NULL;
+   lv_ptr->lv_block_exception = NULL;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 4)
+   lv_ptr->lv_snapshot_sem = MUTEX;
+#else
+   init_MUTEX(&lv_ptr->lv_snapshot_sem);
+#endif
+   vg[VG_CHR(minor)]->lv[l] = lv_ptr;
+
+   /* get the PE structures from user space if this
+      is no snapshot logical volume */
+   if ( ! ( lv_ptr->lv_access & LV_SNAPSHOT)) {
+      size = lv_ptr->lv_allocated_le * sizeof ( pe_t);
+      if ( ( lv_ptr->lv_current_pe = vmalloc ( size)) == NULL) {
+         printk ( KERN_CRIT
+                  "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte "
+                  "at line %d\n",
+                  lvm_name, size, __LINE__);
+#ifdef DEBUG_VFREE
+         printk ( KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
+#endif
+         kfree ( lv_ptr);
+         vg[VG_CHR(minor)]->lv[l] = NULL;
+         return -ENOMEM;
+      }
+   
+      if ( copy_from_user ( lv_ptr->lv_current_pe, pep, size)) {
+         vfree ( lv_ptr->lv_current_pe);
+         kfree ( lv_ptr);
+         vg[VG_CHR(minor)]->lv[l] = NULL;
+         return -EFAULT;
+      }
+
+      /* correct the PE count in PVs */
+      for ( le = 0; le < lv_ptr->lv_allocated_le; le++) {
+         vg[VG_CHR(minor)]->pe_allocated++;
+         for ( p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
+            if ( vg[VG_CHR(minor)]->pv[p]->pv_dev ==
+                 lv_ptr->lv_current_pe[le].dev)
+               vg[VG_CHR(minor)]->pv[p]->pe_allocated++;
+         }
+      }
+   } else {
+      /* Get snapshot exception data and block list */
+      if ( lvbe != NULL) {
+         lv_ptr->lv_snapshot_org =
+            vg[VG_CHR(minor)]->lv[LV_BLK(lv_ptr->lv_snapshot_minor)];
+         if ( lv_ptr->lv_snapshot_org != NULL) {
+            size = lv_ptr->lv_remap_end * sizeof ( lv_block_exception_t);
+            if ( ( lv_ptr->lv_block_exception = vmalloc ( size)) == NULL) {
+               printk ( KERN_CRIT
+                        "%s -- do_lv_create: vmalloc error LV_BLOCK_EXCEPTION "
+                        "of %d byte at line %d\n",
+                        lvm_name, size, __LINE__);
+#ifdef DEBUG_VFREE
+               printk ( KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
+#endif
+               kfree ( lv_ptr);
+               vg[VG_CHR(minor)]->lv[l] = NULL;
+               return -ENOMEM;
+            }
+      
+            if ( copy_from_user ( lv_ptr->lv_block_exception, lvbe, size)) {
+               vfree ( lv_ptr->lv_block_exception);
+               kfree ( lv_ptr);
+               vg[VG_CHR(minor)]->lv[l] = NULL;
+               return -EFAULT;
+            }
+
+            /* get pointer to original logical volume */
+            lv_ptr = lv_ptr->lv_snapshot_org;
+
+            lv_ptr->lv_snapshot_minor = 0;
+            lv_ptr->lv_snapshot_org = lv_ptr;
+            lv_ptr->lv_snapshot_prev = NULL;
+            /* walk thrugh the snapshot list */
+            while ( lv_ptr->lv_snapshot_next != NULL)
+               lv_ptr = lv_ptr->lv_snapshot_next;
+            /* now lv_ptr points to the last existing snapshot in the chain */
+            vg[VG_CHR(minor)]->lv[l]->lv_snapshot_prev = lv_ptr;
+            /* our new one now back points to the previous last in the chain */
+            lv_ptr = vg[VG_CHR(minor)]->lv[l];
+            /* now lv_ptr points to our new last snapshot logical volume */
+            lv_ptr->lv_snapshot_org = lv_ptr->lv_snapshot_prev->lv_snapshot_org;
+            lv_ptr->lv_snapshot_next = NULL;
+            lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe;
+            lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le;
+            lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le;
+            lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size;
+            lv_ptr->lv_stripes = lv_ptr->lv_snapshot_org->lv_stripes;
+            lv_ptr->lv_stripesize = lv_ptr->lv_snapshot_org->lv_stripesize;
+	    {
+		int err;
+
+		err = lvm_snapshot_alloc(lv_ptr);
+		if (err)
+		{
+			vfree(lv_ptr->lv_block_exception);
+			kfree(lv_ptr);
+			vg[VG_CHR(minor)]->lv[l] = NULL;
+			return err;
+		}
+	    }
+         } else {
+            vfree ( lv_ptr->lv_block_exception);
+            kfree ( lv_ptr);
+            vg[VG_CHR(minor)]->lv[l] = NULL;
+            return -EFAULT;
+         }
+      } else {
+         kfree ( vg[VG_CHR(minor)]->lv[l]);
+         vg[VG_CHR(minor)]->lv[l] = NULL;
+         return -EINVAL;
+      }
+   } /* if ( vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) */
+
+   lv_ptr = vg[VG_CHR(minor)]->lv[l];
+   lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0;
+   lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
+   lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
+   vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = vg[VG_CHR(minor)]->vg_number;
+   vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = lv_ptr->lv_number;
+   LVM_CORRECT_READ_AHEAD ( lv_ptr->lv_read_ahead);
+   read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead;
+   vg[VG_CHR(minor)]->lv_cur++;
+   lv_ptr->lv_status = lv_status_save;
+
+   /* optionally add our new snapshot LV */
+   if ( lv_ptr->lv_access & LV_SNAPSHOT) {
+      /* sync the original logical volume */
+      fsync_dev ( lv_ptr->lv_snapshot_org->lv_dev);
+      /* put ourselve into the chain */
+      lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr;
+      lv_ptr->lv_snapshot_org->lv_access |= LV_SNAPSHOT_ORG;
+   }
+
+   return 0;
+} /* do_lv_create () */
+
+
+/*
+ * character device support function logical volume remove
+ */
+static int do_lv_remove ( int minor, char *lv_name, int l) {
+   uint le, p;
+   lv_t *lv_ptr;
+
+   if ( l == -1) {
+      for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+         if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+              lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name, lv_name) == 0) {
+            break;
+         }
+      }
+   }
+
+   lv_ptr = vg[VG_CHR(minor)]->lv[l];
+   if ( l < vg[VG_CHR(minor)]->lv_max) {
+#ifdef LVM_TOTAL_RESET
+      if ( lv_ptr->lv_open > 0 && lvm_reset_spindown == 0)
+#else
+      if ( lv_ptr->lv_open > 0)
+#endif
+         return -EBUSY;
+
+      /* check for deletion of snapshot source while
+         snapshot volume still exists */
+      if ( ( lv_ptr->lv_access & LV_SNAPSHOT_ORG) &&
+           lv_ptr->lv_snapshot_next != NULL)
+         return -EPERM;
+
+      lv_ptr->lv_status |= LV_SPINDOWN;
+
+      /* sync the buffers */
+      fsync_dev ( lv_ptr->lv_dev);
+
+      lv_ptr->lv_status &= ~LV_ACTIVE;
+
+      /* invalidate the buffers */
+      invalidate_buffers ( lv_ptr->lv_dev);
+
+      /* reset generic hd */
+      lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1;
+      lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0;
+      lvm_size[MINOR(lv_ptr->lv_dev)] = 0;
+
+      /* reset VG/LV mapping */
+      vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = ABS_MAX_VG;
+      vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = -1;
+   
+      /* correct the PE count in PVs if this is no snapshot logical volume */
+      if ( ! ( lv_ptr->lv_access & LV_SNAPSHOT)) {
+         /* only if this is no snapshot logical volume because we share
+            the lv_current_pe[] structs with the original logical volume */
+         for ( le = 0; le < lv_ptr->lv_allocated_le; le++) {
+            vg[VG_CHR(minor)]->pe_allocated--;
+            for ( p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
+               if (  vg[VG_CHR(minor)]->pv[p]->pv_dev ==
+                     lv_ptr->lv_current_pe[le].dev)
+                  vg[VG_CHR(minor)]->pv[p]->pe_allocated--;
+            }
+         }
+         vfree ( lv_ptr->lv_current_pe);
+      /* LV_SNAPSHOT */
+      } else {
+/*
+         if ( lv_ptr->lv_block_exception != NULL) {
+            int i;
+            kdev_t last_dev;
+            for ( i = last_dev = 0; i < lv_ptr->lv_remap_ptr; i++) {
+               if ( lv_ptr->lv_block_exception[i].rdev_new != last_dev) {
+                  last_dev = lv_ptr->lv_block_exception[i].rdev_new;
+                  invalidate_buffers ( last_dev);
+                  current->state = TASK_INTERRUPTIBLE;
+                  schedule_timeout ( 1);
+               }
+            }
+         }
+*/
+         /* remove this snapshot logical volume from the chain */
+         lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next;
+         if ( lv_ptr->lv_snapshot_next != NULL) {
+            lv_ptr->lv_snapshot_next->lv_snapshot_prev =
+               lv_ptr->lv_snapshot_prev;
+         }
+         /* no more snapshots? */
+         if ( lv_ptr->lv_snapshot_org->lv_snapshot_next == NULL)
+            lv_ptr->lv_snapshot_org->lv_access &= ~LV_SNAPSHOT_ORG;
+	 lvm_snapshot_release(lv_ptr);
+      }
+
+#ifdef DEBUG_VFREE
+      printk ( KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+      kfree ( lv_ptr);
+      vg[VG_CHR(minor)]->lv[l] = NULL;
+      vg[VG_CHR(minor)]->lv_cur--;
+      return 0;
+   }
+
+   return -ENXIO;
+} /* do_lv_remove () */
+
+
+/*
+ * character device support function logical volume extend / reduce
+ */
+static int do_lv_extend_reduce ( int minor, char *lv_name, lv_t *lv) {
+   int l, le, p, size, old_allocated_le;
+   uint32_t end, lv_status_save;
+   pe_t *pe;
+
+   if ( ( pep = lv->lv_current_pe) == NULL) return -EINVAL;
+
+   for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+      if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+           lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name, lv_name) == 0)
+         break;
+   }
+   if ( l == vg[VG_CHR(minor)]->lv_max) return -ENXIO;
+
+   /* check for active snapshot */
+   if ( lv->lv_access & ( LV_SNAPSHOT|LV_SNAPSHOT_ORG)) return -EPERM;
+
+   if ( ( pe = vmalloc ( size = lv->lv_current_le * sizeof ( pe_t))) == NULL) {
+      printk ( KERN_CRIT
+               "%s -- do_lv_extend_reduce: vmalloc error LV_CURRENT_PE "
+               "of %d Byte at line %d\n",
+               lvm_name, size, __LINE__);
+      return -ENOMEM;
+   }
+
+   /* get the PE structures from user space */
+   if ( copy_from_user ( pe, pep, size)) {
+      vfree ( pe);
+      return -EFAULT;
+   }
+
+#ifdef DEBUG
+   printk ( KERN_DEBUG
+            "%s -- fsync_dev and "
+            "invalidate_buffers for %s [%s] in %s\n",
+            lvm_name, vg[VG_CHR(minor)]->lv[l]->lv_name,
+            kdevname ( vg[VG_CHR(minor)]->lv[l]->lv_dev),
+            vg[VG_CHR(minor)]->vg_name);
+#endif
+
+   vg[VG_CHR(minor)]->lv[l]->lv_status |= LV_SPINDOWN;
+   fsync_dev ( vg[VG_CHR(minor)]->lv[l]->lv_dev);
+   vg[VG_CHR(minor)]->lv[l]->lv_status &= ~LV_ACTIVE;
+   invalidate_buffers ( vg[VG_CHR(minor)]->lv[l]->lv_dev);
+
+   /* reduce allocation counters on PV(s) */
+   for ( le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le; le++) {
+      vg[VG_CHR(minor)]->pe_allocated--;
+      for ( p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
+         if (  vg[VG_CHR(minor)]->pv[p]->pv_dev ==
+               vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev) {
+            vg[VG_CHR(minor)]->pv[p]->pe_allocated--;
+            break;
+         }
+      }
+   }
+
+#ifdef DEBUG_VFREE
+   printk ( KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
+#endif
+
+   /* save pointer to "old" lv/pe pointer array */
+   pep1 = vg[VG_CHR(minor)]->lv[l]->lv_current_pe;
+   end  = vg[VG_CHR(minor)]->lv[l]->lv_current_le;
+
+   /* save open counter */
+   lv_open = vg[VG_CHR(minor)]->lv[l]->lv_open;
+
+   /* save # of old allocated logical extents */
+   old_allocated_le = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le;
+
+   /* copy preloaded LV */
+   lv_status_save = lv->lv_status;
+   lv->lv_status |= LV_SPINDOWN;
+   lv->lv_status &= ~LV_ACTIVE;
+   lvm_memcpy ( ( char*) vg[VG_CHR(minor)]->lv[l], ( char*) lv, sizeof ( lv_t));
+   vg[VG_CHR(minor)]->lv[l]->lv_current_pe = pe;
+   vg[VG_CHR(minor)]->lv[l]->lv_open = lv_open;
+
+   /* save availiable i/o statistic data */
+   /* linear logical volume */
+   if ( vg[VG_CHR(minor)]->lv[l]->lv_stripes < 2) {
+      /* Check what last LE shall be used */
+      if ( end > vg[VG_CHR(minor)]->lv[l]->lv_current_le)
+         end = vg[VG_CHR(minor)]->lv[l]->lv_current_le;
+      for ( le = 0; le < end; le++) {
+         vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].reads  = pep1[le].reads;
+         vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].writes = pep1[le].writes;
+      }
+   /* striped logical volume */
+   } else {
+      uint i, j, source, dest, end, old_stripe_size, new_stripe_size;
+
+      old_stripe_size = old_allocated_le / vg[VG_CHR(minor)]->lv[l]->lv_stripes;
+      new_stripe_size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le /
+                        vg[VG_CHR(minor)]->lv[l]->lv_stripes;
+      end = old_stripe_size;
+      if ( end > new_stripe_size) end = new_stripe_size;
+      for ( i = source = dest = 0;
+            i < vg[VG_CHR(minor)]->lv[l]->lv_stripes; i++) {
+         for ( j = 0; j < end; j++) {
+            vg[VG_CHR(minor)]->lv[l]->lv_current_pe[dest+j].reads =
+               pep1[source+j].reads;
+            vg[VG_CHR(minor)]->lv[l]->lv_current_pe[dest+j].writes =
+               pep1[source+j].writes;
+         }
+         source += old_stripe_size;
+         dest   += new_stripe_size;
+      }
+   }
+   vfree ( pep1); pep1 = NULL;
+
+
+   /* extend the PE count in PVs */
+   for ( le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le; le++) {
+      vg[VG_CHR(minor)]->pe_allocated++;
+      for ( p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
+         if ( vg[VG_CHR(minor)]->pv[p]->pv_dev ==
+              vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev) {
+            vg[VG_CHR(minor)]->pv[p]->pe_allocated++;
+            break;
+         }
+      }
+   }
+
+   lvm_gendisk.part[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)].start_sect = 0;
+   lvm_gendisk.part[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)].nr_sects =
+      vg[VG_CHR(minor)]->lv[l]->lv_size;
+   lvm_size[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)] =
+      vg[VG_CHR(minor)]->lv[l]->lv_size >> 1;
+   /* vg_lv_map array doesn't have to be changed here */
+
+   LVM_CORRECT_READ_AHEAD ( vg[VG_CHR(minor)]->lv[l]->lv_read_ahead);
+   read_ahead[MAJOR_NR] = vg[VG_CHR(minor)]->lv[l]->lv_read_ahead;
+   vg[VG_CHR(minor)]->lv[l]->lv_status = lv_status_save;
+
+   return 0;
+} /* do_lv_extend_reduce () */
+
+
+/*
+ * support function initialize gendisk variables
+ */
+#ifdef __initfunc
+__initfunc ( void lvm_geninit ( struct gendisk *lvm_gdisk))
+#else
+void __init lvm_geninit ( struct gendisk *lvm_gdisk)
+#endif
+{
+   int i = 0;
+
+#ifdef DEBUG_GENDISK
+   printk ( KERN_DEBUG "%s -- lvm_gendisk\n", lvm_name);
+#endif
+
+   for ( i = 0; i < MAX_LV; i++) {
+      lvm_gendisk.part[i].start_sect = -1; /* avoid partition check */
+      lvm_size[i] = lvm_gendisk.part[i].nr_sects = 0;
+      lvm_blocksizes[i] = BLOCK_SIZE;
+   }
+
+   blksize_size[MAJOR_NR] = lvm_blocksizes;
+   blk_size[MAJOR_NR] = lvm_size;
+
+   return;
+} /* lvm_gen_init () */
+
+
+#ifdef LVM_GET_INODE
+/*
+ * support function to get an empty inode
+ *
+ * Gets an empty inode to be inserted into the inode hash,
+ * so that a physical volume can't be mounted.
+ * This is analog to drivers/block/md.c
+ *
+ * Is this the real thing?
+ *
+ */
+struct inode *lvm_get_inode ( int dev) {
+   struct inode *inode_this = NULL;
+
+   /* Lock the device by inserting a dummy inode. */
+   inode_this = get_empty_inode ();
+   inode_this->i_dev = dev;
+   insert_inode_hash ( inode_this);
+   return inode_this;
+}
+
+
+/*
+ * support function to clear an inode
+ *
+ */
+void lvm_clear_inode ( struct inode *inode) {
+#ifdef I_FREEING
+   inode->i_state |= I_FREEING;
+#endif
+   clear_inode ( inode);
+   return;
+}
+#endif /* #ifdef LVM_GET_INODE */
+
+
+/* my strlen */
+inline int lvm_strlen ( char *s1) {
+   int len = 0;
+
+   while ( s1[len] != 0) len++;
+   return len;
+}
+
+
+/* my strcmp */
+inline int lvm_strcmp ( char *s1, char *s2) {
+   while ( *s1 != 0 && *s2 != 0) {
+      if ( *s1 != *s2) return -1;
+      s1++; s2++;
+   }
+   if ( *s1 == 0 && *s2 == 0) return 0;
+   return -1;
+}
+
+
+/* my strrchr */
+inline char *lvm_strrchr ( char *s1, char c) {
+   char *s2 = NULL;
+
+   while ( *s1 != 0) {
+      if ( *s1 == c) s2 = s1;
+      s1++;
+   }
+   return s2;
+}
+
+
+/* my memcpy */
+inline void lvm_memcpy ( char *dest, char *source, int size) {
+   for ( ;size > 0; size--) *dest++ = *source++;
+}
diff -urN 2.3.46pre1/drivers/block/nbd.c 2.3.46pre1aa1/drivers/block/nbd.c
--- 2.3.46pre1/drivers/block/nbd.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/block/nbd.c	Wed Feb 16 00:28:25 2000
@@ -184,10 +184,10 @@
 	DEBUG("reading control, ");
 	reply.magic = 0;
 	result = nbd_xmit(0, lo->sock, (char *) &reply, sizeof(reply));
-	req = lo->tail;
 	if (result <= 0)
 		HARDFAIL("Recv control failed.");
 	memcpy(&xreq, reply.handle, sizeof(xreq));
+	req = blkdev_entry_prev_request(&lo->queue_head);
 
 	if (xreq != req)
 		FAIL("Unexpected handle received.\n");
@@ -216,47 +216,42 @@
 {
 	struct request *req;
 
-	while (1) {
+	down (&lo->queue_lock);
+	while (!list_empty(&lo->queue_head)) {
 		req = nbd_read_stat(lo);
 		if (!req)
-			return;
-		down (&lo->queue_lock);
+			goto out;
 #ifdef PARANOIA
-		if (req != lo->tail) {
+		if (req != blkdev_entry_prev_request(&lo->queue_head)) {
 			printk(KERN_ALERT "NBD: I have problem...\n");
 		}
 		if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
 			printk(KERN_ALERT "NBD: request corrupted!\n");
-			goto next;
+			continue;
 		}
 		if (lo->magic != LO_MAGIC) {
 			printk(KERN_ALERT "NBD: nbd_dev[] corrupted: Not enough magic\n");
-			up (&lo->queue_lock);
-			return;
+			goto out;
 		}
 #endif
-		nbd_end_request(req);
-		if (lo->tail == lo->head) {
-#ifdef PARANOIA
-			if (lo->tail->next)
-				printk(KERN_ERR "NBD: I did not expect this\n");
-#endif
-			lo->head = NULL;
-		}
-		lo->tail = lo->tail->next;
-	next:
+		list_del(&req->queue);
 		up (&lo->queue_lock);
+		
+		nbd_end_request(req);
+
+		down (&lo->queue_lock);
 	}
+ out:
+	up (&lo->queue_lock);
 }
 
 void nbd_clear_que(struct nbd_device *lo)
 {
 	struct request *req;
+	unsigned long flags;
 
-	while (1) {
-		req = lo->tail;
-		if (!req)
-			return;
+	while (!list_empty(&lo->queue_head)) {
+		req = blkdev_entry_prev_request(&lo->queue_head);
 #ifdef PARANOIA
 		if (lo != &nbd_dev[MINOR(req->rq_dev)]) {
 			printk(KERN_ALERT "NBD: request corrupted when clearing!\n");
@@ -268,15 +263,12 @@
 		}
 #endif
 		req->errors++;
+		list_del(&req->queue);
+		up(&lo->queue_lock);
+
 		nbd_end_request(req);
-		if (lo->tail == lo->head) {
-#ifdef PARANOIA
-			if (lo->tail->next)
-				printk(KERN_ERR "NBD: I did not assume this\n");
-#endif
-			lo->head = NULL;
-		}
-		lo->tail = lo->tail->next;
+
+		down(&lo->queue_lock);
 	}
 }
 
@@ -296,7 +288,7 @@
 	int dev;
 	struct nbd_device *lo;
 
-	while (CURRENT) {
+	while (!QUEUE_EMPTY) {
 		req = CURRENT;
 		dev = MINOR(req->rq_dev);
 #ifdef PARANOIA
@@ -314,28 +306,23 @@
 		requests_in++;
 #endif
 		req->errors = 0;
-		CURRENT = CURRENT->next;
-		req->next = NULL;
-
+		blkdev_dequeue_request(req);
 		spin_unlock_irq(&io_request_lock);
-		down (&lo->queue_lock);
-		if (lo->head == NULL) {
-			lo->head = req;
-			lo->tail = req;
-		} else {
-			lo->head->next = req;
-			lo->head = req;
-		}
 
+		down (&lo->queue_lock);
+		list_add(&req->queue, &lo->queue_head);
 		nbd_send_req(lo->sock, req);	/* Why does this block?         */
 		up (&lo->queue_lock);
+
 		spin_lock_irq(&io_request_lock);
 		continue;
 
 	      error_out:
 		req->errors++;
+		blkdev_dequeue_request(req);
+		spin_unlock(&io_request_lock);
 		nbd_end_request(req);
-		CURRENT = CURRENT->next;
+		spin_lock(&io_request_lock);
 	}
 	return;
 }
@@ -359,11 +346,14 @@
 	lo = &nbd_dev[dev];
 	switch (cmd) {
 	case NBD_CLEAR_SOCK:
+		down(&lo->queue_lock);
 		nbd_clear_que(lo);
-		if (lo->head || lo->tail) {
+		if (!list_empty(&lo->queue_head)) {
+			up(&lo->queue_lock);
 			printk(KERN_ERR "nbd: Some requests are in progress -> can not turn off.\n");
 			return -EBUSY;
 		}
+		up(&lo->queue_lock);
 		file = lo->file;
 		if (!file)
 			return -EINVAL;
@@ -415,8 +405,8 @@
 		return 0;
 #ifdef PARANOIA
 	case NBD_PRINT_DEBUG:
-		printk(KERN_INFO "NBD device %d: head = %lx, tail = %lx. Global: in %d, out %d\n",
-		       dev, (long) lo->head, (long) lo->tail, requests_in, requests_out);
+		printk(KERN_INFO "NBD device %d: queue_head = %p. Global: in %d, out %d\n",
+		       dev, lo->queue_head, requests_in, requests_out);
 		return 0;
 #endif
 	case BLKGETSIZE:
@@ -480,6 +470,7 @@
 	blksize_size[MAJOR_NR] = nbd_blksizes;
 	blk_size[MAJOR_NR] = nbd_sizes;
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), do_nbd_request);
+	blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
 	for (i = 0; i < MAX_NBD; i++) {
 		nbd_dev[i].refcnt = 0;
 		nbd_dev[i].file = NULL;
diff -urN 2.3.46pre1/drivers/block/paride/pcd.c 2.3.46pre1aa1/drivers/block/paride/pcd.c
--- 2.3.46pre1/drivers/block/paride/pcd.c	Tue Dec 14 15:48:49 1999
+++ 2.3.46pre1aa1/drivers/block/paride/pcd.c	Wed Feb 16 00:28:25 2000
@@ -756,7 +756,7 @@
 
 	if (pcd_busy) return;
         while (1) {
-	    if ((!CURRENT) || (CURRENT->rq_status == RQ_INACTIVE)) return;
+	    if (QUEUE_EMPTY || (CURRENT->rq_status == RQ_INACTIVE)) return;
 	    INIT_REQUEST;
 	    if (CURRENT->cmd == READ) {
 		unit = MINOR(CURRENT->rq_dev);
diff -urN 2.3.46pre1/drivers/block/paride/pd.c 2.3.46pre1aa1/drivers/block/paride/pd.c
--- 2.3.46pre1/drivers/block/paride/pd.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/block/paride/pd.c	Wed Feb 16 00:28:25 2000
@@ -868,7 +868,7 @@
 
         if (pd_busy) return;
 repeat:
-        if ((!CURRENT) || (CURRENT->rq_status == RQ_INACTIVE)) return;
+        if (QUEUE_EMPTY || (CURRENT->rq_status == RQ_INACTIVE)) return;
         INIT_REQUEST;
 
         pd_dev = MINOR(CURRENT->rq_dev);
@@ -890,7 +890,7 @@
 	pd_cmd = CURRENT->cmd;
 	pd_run = pd_count;
         while ((pd_run <= cluster) &&
-	       (req = req->next) && 
+	       (req = blkdev_next_request(req)) && 
 	       (pd_block+pd_run == req->sector) &&
 	       (pd_cmd == req->cmd) &&
 	       (pd_dev == MINOR(req->rq_dev)))
@@ -922,7 +922,7 @@
 	
 /* paranoia */
 
-	if ((!CURRENT) ||
+	if (QUEUE_EMPTY ||
 	    (CURRENT->cmd != pd_cmd) ||
 	    (MINOR(CURRENT->rq_dev) != pd_dev) ||
 	    (CURRENT->rq_status == RQ_INACTIVE) ||
diff -urN 2.3.46pre1/drivers/block/paride/pf.c 2.3.46pre1aa1/drivers/block/paride/pf.c
--- 2.3.46pre1/drivers/block/paride/pf.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/block/paride/pf.c	Wed Feb 16 00:28:25 2000
@@ -854,7 +854,7 @@
 
         if (pf_busy) return;
 repeat:
-        if ((!CURRENT) || (CURRENT->rq_status == RQ_INACTIVE)) return;
+        if (QUEUE_EMPTY || (CURRENT->rq_status == RQ_INACTIVE)) return;
         INIT_REQUEST;
 
         pf_unit = unit = DEVICE_NR(CURRENT->rq_dev);
@@ -874,7 +874,7 @@
 	pf_cmd = CURRENT->cmd;
 	pf_run = pf_count;
         while ((pf_run <= cluster) &&
-	       (req = req->next) && 
+	       (req = blkdev_next_request(req)) && 
 	       (pf_block+pf_run == req->sector) &&
 	       (pf_cmd == req->cmd) &&
 	       (pf_unit == DEVICE_NR(req->rq_dev)))
@@ -904,7 +904,7 @@
 	
 /* paranoia */
 
-	if ((!CURRENT) ||
+	if (QUEUE_EMPTY ||
 	    (CURRENT->cmd != pf_cmd) ||
 	    (DEVICE_NR(CURRENT->rq_dev) != pf_unit) ||
 	    (CURRENT->rq_status == RQ_INACTIVE) ||
diff -urN 2.3.46pre1/drivers/block/ps2esdi.c 2.3.46pre1aa1/drivers/block/ps2esdi.c
--- 2.3.46pre1/drivers/block/ps2esdi.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/block/ps2esdi.c	Wed Feb 16 00:28:25 2000
@@ -476,7 +476,7 @@
 	if (virt_to_bus(CURRENT->buffer + CURRENT->nr_sectors * 512) > 16 * MB) {
 		printk("%s: DMA above 16MB not supported\n", DEVICE_NAME);
 		end_request(FAIL);
-		if (CURRENT)
+		if (!QUEUE_EMPTY)
 			do_ps2esdi_request(q);
 		return;
 	}			/* check for above 16Mb dmas */
@@ -510,7 +510,7 @@
 		default:
 			printk("%s: Unknown command\n", DEVICE_NAME);
 			end_request(FAIL);
-			if (CURRENT)
+			if (!QUEUE_EMPTY)
 				do_ps2esdi_request(q);
 			break;
 		}		/* handle different commands */
@@ -520,7 +520,7 @@
 		printk("Grrr. error. ps2esdi_drives: %d, %lu %lu\n", ps2esdi_drives,
 		       CURRENT->sector, ps2esdi[MINOR(CURRENT->rq_dev)].nr_sects);
 		end_request(FAIL);
-		if (CURRENT)
+		if (!QUEUE_EMPTY)
 			do_ps2esdi_request(q);
 	}
 
@@ -591,7 +591,7 @@
 			return do_ps2esdi_request(NULL);
 		else {
 			end_request(FAIL);
-			if (CURRENT)
+			if (!QUEUE_EMPTY)
 				do_ps2esdi_request(NULL);
 		}
 	}
@@ -894,7 +894,7 @@
 				do_ps2esdi_request(NULL);
 			else {
 				end_request(FAIL);
-				if (CURRENT)
+				if (!QUEUE_EMPTY)
 					do_ps2esdi_request(NULL);
 			}
 			break;
@@ -940,7 +940,7 @@
 			do_ps2esdi_request(NULL);
 		else {
 			end_request(FAIL);
-			if (CURRENT)
+			if (!QUEUE_EMPTY)
 				do_ps2esdi_request(NULL);
 		}
 		break;
@@ -950,7 +950,7 @@
 		outb((int_ret_code & 0xe0) | ATT_EOI, ESDI_ATTN);
 		outb(CTRL_ENABLE_INTR, ESDI_CONTROL);
 		end_request(FAIL);
-		if (CURRENT)
+		if (!QUEUE_EMPTY)
 			do_ps2esdi_request(NULL);
 		break;
 
@@ -986,7 +986,7 @@
 		do_ps2esdi_request(NULL);
 	} else {
 		end_request(SUCCES);
-		if (CURRENT)
+		if (!QUEUE_EMPTY)
 			do_ps2esdi_request(NULL);
 	}
 }
diff -urN 2.3.46pre1/drivers/block/swim3.c 2.3.46pre1aa1/drivers/block/swim3.c
--- 2.3.46pre1/drivers/block/swim3.c	Thu Feb  3 06:05:57 2000
+++ 2.3.46pre1aa1/drivers/block/swim3.c	Wed Feb 16 00:28:25 2000
@@ -305,7 +305,7 @@
 		wake_up(&fs->wait);
 		return;
 	}
-	while (CURRENT && fs->state == idle) {
+	while (!QUEUE_EMPTY && fs->state == idle) {
 		if (MAJOR(CURRENT->rq_dev) != MAJOR_NR)
 			panic(DEVICE_NAME ": request list destroyed");
 		if (CURRENT->bh && !buffer_locked(CURRENT->bh))
diff -urN 2.3.46pre1/drivers/block/swim_iop.c 2.3.46pre1aa1/drivers/block/swim_iop.c
--- 2.3.46pre1/drivers/block/swim_iop.c	Thu Feb  3 06:05:57 2000
+++ 2.3.46pre1aa1/drivers/block/swim_iop.c	Wed Feb 16 00:28:25 2000
@@ -550,7 +550,7 @@
 		wake_up(&fs->wait);
 		return;
 	}
-	while (CURRENT && fs->state == idle) {
+	while (!QUEUE_EMPTY && fs->state == idle) {
 		if (MAJOR(CURRENT->rq_dev) != MAJOR_NR)
 			panic(DEVICE_NAME ": request list destroyed");
 		if (CURRENT->bh && !buffer_locked(CURRENT->bh))
diff -urN 2.3.46pre1/drivers/block/xd.c 2.3.46pre1aa1/drivers/block/xd.c
--- 2.3.46pre1/drivers/block/xd.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/block/xd.c	Wed Feb 16 00:28:25 2000
@@ -287,7 +287,7 @@
 	sti();
 	if (xdc_busy)
 		return;
-	while (code = 0, CURRENT) {
+	while (code = 0, !QUEUE_EMPTY) {
 		INIT_REQUEST;	/* do some checking on the request structure */
 
 		if (CURRENT_DEV < xd_drives
diff -urN 2.3.46pre1/drivers/cdrom/aztcd.c 2.3.46pre1aa1/drivers/cdrom/aztcd.c
--- 2.3.46pre1/drivers/cdrom/aztcd.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/cdrom/aztcd.c	Wed Feb 16 00:28:25 2000
@@ -234,7 +234,7 @@
 #endif
 
 #define CURRENT_VALID \
-  (CURRENT && MAJOR(CURRENT -> rq_dev) == MAJOR_NR && CURRENT -> cmd == READ \
+  (!QUEUE_EMPTY && MAJOR(CURRENT -> rq_dev) == MAJOR_NR && CURRENT -> cmd == READ \
    && CURRENT -> sector != -1)
 
 #define AFL_STATUSorDATA (AFL_STATUS | AFL_DATA)
diff -urN 2.3.46pre1/drivers/cdrom/cdu31a.c 2.3.46pre1aa1/drivers/cdrom/cdu31a.c
--- 2.3.46pre1/drivers/cdrom/cdu31a.c	Tue Dec 14 15:48:50 1999
+++ 2.3.46pre1aa1/drivers/cdrom/cdu31a.c	Wed Feb 16 00:28:25 2000
@@ -1672,7 +1672,7 @@
       if (signal_pending(current))
       {
          restore_flags(flags);
-         if (CURRENT && CURRENT->rq_status != RQ_INACTIVE)
+         if (!QUEUE_EMPTY && CURRENT->rq_status != RQ_INACTIVE)
          {
             end_request(0);
          }
@@ -1705,7 +1705,7 @@
        * The beginning here is stolen from the hard disk driver.  I hope
        * it's right.
        */
-      if (!(CURRENT) || CURRENT->rq_status == RQ_INACTIVE)
+      if (QUEUE_EMPTY || CURRENT->rq_status == RQ_INACTIVE)
       {
          goto end_do_cdu31a_request;
       }
diff -urN 2.3.46pre1/drivers/cdrom/cm206.c 2.3.46pre1aa1/drivers/cdrom/cm206.c
--- 2.3.46pre1/drivers/cdrom/cm206.c	Tue Dec 14 15:48:50 1999
+++ 2.3.46pre1aa1/drivers/cdrom/cm206.c	Wed Feb 16 00:28:25 2000
@@ -816,7 +816,7 @@
   
   while(1) {	 /* repeat until all requests have been satisfied */
     INIT_REQUEST;
-    if (CURRENT == NULL || CURRENT->rq_status == RQ_INACTIVE)
+    if (QUEUE_EMPTY || CURRENT->rq_status == RQ_INACTIVE)
       return;
     if (CURRENT->cmd != READ) {
       debug(("Non-read command %d on cdrom\n", CURRENT->cmd));
diff -urN 2.3.46pre1/drivers/cdrom/gscd.c 2.3.46pre1aa1/drivers/cdrom/gscd.c
--- 2.3.46pre1/drivers/cdrom/gscd.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/cdrom/gscd.c	Wed Feb 16 00:28:25 2000
@@ -279,13 +279,13 @@
 unsigned int nsect;
 
 repeat:
-	if (!(CURRENT) || CURRENT->rq_status == RQ_INACTIVE) return;
+	if (QUEUE_EMPTY || CURRENT->rq_status == RQ_INACTIVE) return;
 	INIT_REQUEST;
 	dev = MINOR(CURRENT->rq_dev);
 	block = CURRENT->sector;
 	nsect = CURRENT->nr_sectors;
 
-	if (CURRENT == NULL || CURRENT -> sector == -1)
+	if (QUEUE_EMPTY || CURRENT -> sector == -1)
 		return;
 
 	if (CURRENT -> cmd != READ)
diff -urN 2.3.46pre1/drivers/cdrom/mcd.c 2.3.46pre1aa1/drivers/cdrom/mcd.c
--- 2.3.46pre1/drivers/cdrom/mcd.c	Fri Feb 11 00:05:33 2000
+++ 2.3.46pre1aa1/drivers/cdrom/mcd.c	Wed Feb 16 00:28:25 2000
@@ -134,7 +134,7 @@
 /* #define DOUBLE_QUICK_ONLY */
 
 #define CURRENT_VALID \
-(CURRENT && MAJOR(CURRENT -> rq_dev) == MAJOR_NR && CURRENT -> cmd == READ \
+(!QUEUE_EMPTY && MAJOR(CURRENT -> rq_dev) == MAJOR_NR && CURRENT -> cmd == READ \
 && CURRENT -> sector != -1)
 
 #define MFL_STATUSorDATA (MFL_STATUS | MFL_DATA)
diff -urN 2.3.46pre1/drivers/cdrom/mcdx.c 2.3.46pre1aa1/drivers/cdrom/mcdx.c
--- 2.3.46pre1/drivers/cdrom/mcdx.c	Tue Dec 14 15:48:50 1999
+++ 2.3.46pre1aa1/drivers/cdrom/mcdx.c	Wed Feb 16 00:28:25 2000
@@ -530,7 +530,7 @@
 
   again:
 
-	if (CURRENT == NULL) {
+	if (QUEUE_EMPTY) {
 		xtrace(REQUEST, "end_request(0): CURRENT == NULL\n");
 		return;
 	}
diff -urN 2.3.46pre1/drivers/cdrom/optcd.c 2.3.46pre1aa1/drivers/cdrom/optcd.c
--- 2.3.46pre1/drivers/cdrom/optcd.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/cdrom/optcd.c	Wed Feb 16 00:28:25 2000
@@ -980,7 +980,7 @@
 
 
 #define CURRENT_VALID \
-	(CURRENT && MAJOR(CURRENT -> rq_dev) == MAJOR_NR \
+	(!QUEUE_EMPTY && MAJOR(CURRENT -> rq_dev) == MAJOR_NR \
 	 && CURRENT -> cmd == READ && CURRENT -> sector != -1)
 
 
diff -urN 2.3.46pre1/drivers/cdrom/sbpcd.c 2.3.46pre1aa1/drivers/cdrom/sbpcd.c
--- 2.3.46pre1/drivers/cdrom/sbpcd.c	Tue Dec 14 15:48:50 1999
+++ 2.3.46pre1aa1/drivers/cdrom/sbpcd.c	Wed Feb 16 00:28:25 2000
@@ -4791,9 +4791,7 @@
  */
 #undef DEBUG_GTL
 static inline void sbpcd_end_request(struct request *req, int uptodate) {
-	req->next=CURRENT;
-	CURRENT=req;
-	up(&ioctl_read_sem);
+	list_add(&req->queue, &req->q->queue_head);
 	end_request(uptodate);
 }
 /*==========================================================================*/
@@ -4815,7 +4813,7 @@
 #ifdef DEBUG_GTL
 	xnr=++xx_nr;
 
-	if(!CURRENT)
+	if(QUEUE_EMPTY)
 	{
 		printk( "do_sbpcd_request[%di](NULL), Pid:%d, Time:%li\n",
 			xnr, current->pid, jiffies);
@@ -4830,15 +4828,15 @@
 #endif
 	INIT_REQUEST;
 	req=CURRENT;		/* take out our request so no other */
-	CURRENT=req->next;	/* task can fuck it up         GTL  */
-	spin_unlock_irq(&io_request_lock);		/* FIXME!!!! */
+	blkdev_dequeue_request(req);	/* task can fuck it up         GTL  */
 	
-	down(&ioctl_read_sem);
 	if (req->rq_status == RQ_INACTIVE)
 		sbpcd_end_request(req, 0);
 	if (req -> sector == -1)
 		sbpcd_end_request(req, 0);
+	spin_unlock_irq(&io_request_lock);
 
+	down(&ioctl_read_sem);
 	if (req->cmd != READ)
 	{
 		msg(DBG_INF, "bad cmd %d\n", req->cmd);
@@ -4875,8 +4873,9 @@
 		printk(" do_sbpcd_request[%do](%p:%ld+%ld) end 2, Time:%li\n",
 			xnr, req, req->sector, req->nr_sectors, jiffies);
 #endif
+		up(&ioctl_read_sem);
+		spin_lock_irq(&io_request_lock);
 		sbpcd_end_request(req, 1);
-		spin_lock_irq(&io_request_lock);		/* FIXME!!!! */
 		goto request_loop;
 	}
 
@@ -4915,8 +4914,9 @@
 			printk(" do_sbpcd_request[%do](%p:%ld+%ld) end 3, Time:%li\n",
 				xnr, req, req->sector, req->nr_sectors, jiffies);
 #endif
+			up(&ioctl_read_sem);
+			spin_lock_irq(&io_request_lock);
 			sbpcd_end_request(req, 1);
-			spin_lock_irq(&io_request_lock);	/* FIXME!!!! */
 			goto request_loop;
 		}
 	}
@@ -4929,9 +4929,10 @@
 	printk(" do_sbpcd_request[%do](%p:%ld+%ld) end 4 (error), Time:%li\n",
 		xnr, req, req->sector, req->nr_sectors, jiffies);
 #endif
-	sbpcd_end_request(req, 0);
+	up(&ioctl_read_sem);
 	sbp_sleep(0);    /* wait a bit, try again */
-	spin_lock_irq(&io_request_lock);		/* FIXME!!!! */
+	spin_lock_irq(&io_request_lock);
+	sbpcd_end_request(req, 0);
 	goto request_loop;
 }
 /*==========================================================================*/
@@ -5741,6 +5742,7 @@
 #endif MODULE
 	}
 	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
+	blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
 	read_ahead[MAJOR_NR] = buffers * (CD_FRAMESIZE / 512);
 	
 	request_region(CDo_command,4,major_name);
diff -urN 2.3.46pre1/drivers/cdrom/sjcd.c 2.3.46pre1aa1/drivers/cdrom/sjcd.c
--- 2.3.46pre1/drivers/cdrom/sjcd.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/cdrom/sjcd.c	Wed Feb 16 00:28:25 2000
@@ -938,7 +938,7 @@
  */
 
 #define CURRENT_IS_VALID                                      \
-    ( CURRENT != NULL && MAJOR( CURRENT->rq_dev ) == MAJOR_NR && \
+    ( !QUEUE_EMPTY && MAJOR( CURRENT->rq_dev ) == MAJOR_NR && \
       CURRENT->cmd == READ && CURRENT->sector != -1 )
 
 static void sjcd_transfer( void ){
diff -urN 2.3.46pre1/drivers/cdrom/sonycd535.c 2.3.46pre1aa1/drivers/cdrom/sonycd535.c
--- 2.3.46pre1/drivers/cdrom/sonycd535.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/cdrom/sonycd535.c	Wed Feb 16 00:28:25 2000
@@ -803,7 +803,7 @@
 		 * The beginning here is stolen from the hard disk driver.  I hope
 		 * it's right.
 		 */
-		if (!(CURRENT) || CURRENT->rq_status == RQ_INACTIVE) {
+		if (QUEUE_EMPTY || CURRENT->rq_status == RQ_INACTIVE) {
 			return;
 		}
 		INIT_REQUEST;
diff -urN 2.3.46pre1/drivers/char/rtc.c 2.3.46pre1aa1/drivers/char/rtc.c
--- 2.3.46pre1/drivers/char/rtc.c	Fri Feb 11 00:05:34 2000
+++ 2.3.46pre1aa1/drivers/char/rtc.c	Wed Feb 16 00:28:24 2000
@@ -97,14 +97,18 @@
 static int rtc_ioctl(struct inode *inode, struct file *file,
 		     unsigned int cmd, unsigned long arg);
 
+#ifndef __alpha__
 static unsigned int rtc_poll(struct file *file, poll_table *wait);
+#endif
 
 static void get_rtc_time (struct rtc_time *rtc_tm);
 static void get_rtc_alm_time (struct rtc_time *alm_tm);
+#ifndef __alpha__
 static void rtc_dropped_irq(unsigned long data);
 
 static void set_rtc_irq_bit(unsigned char bit);
 static void mask_rtc_irq_bit(unsigned char bit);
+#endif
 
 static inline unsigned char rtc_is_updating(void);
 
@@ -132,6 +136,7 @@
 static const unsigned char days_in_mo[] = 
 {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
 
+#ifndef __alpha__
 /*
  *	A very tiny interrupt handler. It runs with SA_INTERRUPT set,
  *	so that there is no possibility of conflicting with the
@@ -162,6 +167,7 @@
 	if (atomic_read(&rtc_status) & RTC_TIMER_ON)
 		mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);
 }
+#endif
 
 /*
  *	Now all the various file operations that we export.
@@ -175,6 +181,9 @@
 static ssize_t rtc_read(struct file *file, char *buf,
 			size_t count, loff_t *ppos)
 {
+#ifdef __alpha__
+	return -EIO;
+#else
 	DECLARE_WAITQUEUE(wait, current);
 	unsigned long data;
 	ssize_t retval;
@@ -206,6 +215,7 @@
 	remove_wait_queue(&rtc_wait, &wait);
 
 	return retval;
+#endif
 }
 
 static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
@@ -216,6 +226,7 @@
 	struct rtc_time wtime; 
 
 	switch (cmd) {
+#ifndef __alpha__
 	case RTC_AIE_OFF:	/* Mask alarm int. enab. bit	*/
 	{
 		mask_rtc_irq_bit(RTC_AIE);
@@ -265,6 +276,7 @@
 		set_rtc_irq_bit(RTC_UIE);
 		return 0;
 	}
+#endif
 	case RTC_ALM_READ:	/* Read the present alarm time */
 	{
 		/*
@@ -398,6 +410,7 @@
 		spin_unlock_irqrestore(&rtc_lock, flags);
 		return 0;
 	}
+#ifndef __alpha__
 	case RTC_IRQP_READ:	/* Read the periodic IRQ rate.	*/
 	{
 		return put_user(rtc_freq, (unsigned long *)arg);
@@ -437,7 +450,7 @@
 		spin_unlock_irqrestore(&rtc_lock, flags);
 		return 0;
 	}
-#ifdef __alpha__
+#else
 	case RTC_EPOCH_READ:	/* Read the epoch.	*/
 	{
 		return put_user (epoch, (unsigned long *)arg);
@@ -494,13 +507,14 @@
 
 static int rtc_release(struct inode *inode, struct file *file)
 {
+	unsigned long flags;
+#ifndef __alpha__
 	/*
 	 * Turn off all interrupts once the device is no longer
 	 * in use, and clear the data.
 	 */
 
 	unsigned char tmp;
-	unsigned long flags;
 
 	spin_lock_irqsave(&rtc_lock, flags);
 	tmp = CMOS_READ(RTC_CONTROL);
@@ -520,6 +534,7 @@
 		rtc_fasync (-1, file, 0);
 	}
 
+#endif
 	MOD_DEC_USE_COUNT;
 
 	spin_lock_irqsave (&rtc_lock, flags);
@@ -529,6 +544,7 @@
 	return 0;
 }
 
+#ifndef __alpha__
 static unsigned int rtc_poll(struct file *file, poll_table *wait)
 {
 	unsigned long l, flags;
@@ -543,6 +559,7 @@
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
+#endif
 
 /*
  *	The various file operations we support.
@@ -551,7 +568,9 @@
 static struct file_operations rtc_fops = {
 	llseek:		rtc_llseek,
 	read:		rtc_read,
+#ifndef __alpha__
 	poll:		rtc_poll,
+#endif
 	ioctl:		rtc_ioctl,
 	open:		rtc_open,
 	release:	rtc_release,
@@ -612,12 +631,14 @@
 		return -EIO;
 	}
 
+#ifndef __alpha__
 	if(request_irq(RTC_IRQ, rtc_interrupt, SA_INTERRUPT, "rtc", NULL))
 	{
 		/* Yeah right, seeing as irq 8 doesn't even hit the bus. */
 		printk(KERN_ERR "rtc: IRQ %d is not free.\n", RTC_IRQ);
 		return -EIO;
 	}
+#endif
 
 	request_region(RTC_PORT(0), RTC_IO_EXTENT, "rtc");
 #endif /* __sparc__ vs. others */
@@ -654,12 +675,14 @@
 	if (guess)
 		printk("rtc: %s epoch (%lu) detected\n", guess, epoch);
 #endif
+#ifndef __alpha__
 	init_timer(&rtc_irq_timer);
 	rtc_irq_timer.function = rtc_dropped_irq;
 	spin_lock_irqsave(&rtc_lock, flags);
 	/* Initialize periodic freq. to CMOS reset default, which is 1024Hz */
 	CMOS_WRITE(((CMOS_READ(RTC_FREQ_SELECT) & 0xF0) | 0x06), RTC_FREQ_SELECT);
 	spin_unlock_irqrestore(&rtc_lock, flags);
+#endif
 	rtc_freq = 1024;
 
 	printk(KERN_INFO "Real Time Clock Driver v" RTC_VERSION "\n");
@@ -689,6 +712,7 @@
 module_exit(rtc_exit);
 EXPORT_NO_SYMBOLS;
 
+#ifndef __alpha__
 /*
  * 	At IRQ rates >= 4096Hz, an interrupt may get lost altogether.
  *	(usually during an IDE disk interrupt, with IRQ unmasking off)
@@ -714,6 +738,7 @@
 	rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & 0xF0);	/* restart */
 	spin_unlock_irqrestore(&rtc_lock, flags);
 }
+#endif
 
 /*
  *	Info exported via "/proc/driver/rtc".
@@ -902,6 +927,7 @@
 	}
 }
 
+#ifndef __alpha__
 /*
  * Used to disable/enable interrupts for any one of UIE, AIE, PIE.
  * Rumour has it that if you frob the interrupt enable/disable
@@ -939,3 +965,4 @@
 	rtc_irq_data = 0;
 	spin_unlock_irqrestore(&rtc_lock, flags);
 }
+#endif
diff -urN 2.3.46pre1/drivers/i2o/i2o_block.c 2.3.46pre1aa1/drivers/i2o/i2o_block.c
--- 2.3.46pre1/drivers/i2o/i2o_block.c	Sun Jan 30 15:43:28 2000
+++ 2.3.46pre1aa1/drivers/i2o/i2o_block.c	Wed Feb 16 00:28:25 2000
@@ -461,7 +461,7 @@
 	struct i2ob_device *dev;
 	u32 m;
 
-	while (CURRENT) {
+	while (!QUEUE_EMPTY) {
 		/*
 		 *	On an IRQ completion if there is an inactive
 		 *	request on the queue head it means it isnt yet
@@ -515,8 +515,7 @@
 			}
 		}
 		req->errors = 0;
-		CURRENT = CURRENT->next;
-		req->next = NULL;
+		blkdev_dequeue_request(req);
 		req->sem = NULL;
 		
 		ireq = i2ob_qhead;
diff -urN 2.3.46pre1/drivers/scsi/scsi.c 2.3.46pre1aa1/drivers/scsi/scsi.c
--- 2.3.46pre1/drivers/scsi/scsi.c	Fri Feb 11 00:05:35 2000
+++ 2.3.46pre1aa1/drivers/scsi/scsi.c	Wed Feb 16 00:28:25 2000
@@ -2193,19 +2193,24 @@
 			/* Now dump the request lists for each block device */
 			printk("Dump of pending block device requests\n");
 			for (i = 0; i < MAX_BLKDEV; i++) {
-				if (blk_dev[i].request_queue.current_request) {
+				struct list_head * queue_head;
+
+				queue_head = &blk_dev[i].request_queue.queue_head;
+				if (!list_empty(queue_head)) {
 					struct request *req;
+					struct list_head * entry;
+
 					printk("%d: ", i);
-					req = blk_dev[i].request_queue.current_request;
-					while (req) {
+					entry = queue_head->next;
+					do {
+						req = blkdev_entry_to_request(entry);
 						printk("(%s %d %ld %ld %ld) ",
 						   kdevname(req->rq_dev),
 						       req->cmd,
 						       req->sector,
 						       req->nr_sectors,
 						req->current_nr_sectors);
-						req = req->next;
-					}
+					} while ((entry = entry->next) != queue_head);
 					printk("\n");
 				}
 			}
diff -urN 2.3.46pre1/drivers/scsi/scsi_lib.c 2.3.46pre1aa1/drivers/scsi/scsi_lib.c
--- 2.3.46pre1/drivers/scsi/scsi_lib.c	Fri Feb 11 00:05:35 2000
+++ 2.3.46pre1aa1/drivers/scsi/scsi_lib.c	Wed Feb 16 00:28:25 2000
@@ -86,6 +86,7 @@
 	q = &SCpnt->device->request_queue;
 	SCpnt->request.cmd = SPECIAL;
 	SCpnt->request.special = (void *) SCpnt;
+	SCpnt->request.q = NULL;
 
 	/*
 	 * We have the option of inserting the head or the tail of the queue.
@@ -96,8 +97,7 @@
 	spin_lock_irqsave(&io_request_lock, flags);
 
 	if (at_head) {
-		SCpnt->request.next = q->current_request;
-		q->current_request = &SCpnt->request;
+		list_add(&SCpnt->request.queue, &q->queue_head);
 	} else {
 		/*
 		 * FIXME(eric) - we always insert at the tail of the
@@ -107,19 +107,7 @@
 		 * request might not float high enough in the queue
 		 * to be scheduled.
 		 */
-		SCpnt->request.next = NULL;
-		if (q->current_request == NULL) {
-			q->current_request = &SCpnt->request;
-		} else {
-			struct request *req;
-
-			for (req = q->current_request; req; req = req->next) {
-				if (req->next == NULL) {
-					req->next = &SCpnt->request;
-					break;
-				}
-			}
-		}
+		list_add_tail(&SCpnt->request.queue, &q->queue_head);
 	}
 
 	/*
@@ -239,9 +227,8 @@
 		 * in which case we need to request the blocks that come after
 		 * the bad sector.
 		 */
-		SCpnt->request.next = q->current_request;
-		q->current_request = &SCpnt->request;
 		SCpnt->request.special = (void *) SCpnt;
+		list_add(&SCpnt->request.queue, &q->queue_head);
 	}
 
 	/*
@@ -260,7 +247,7 @@
 	 * use function pointers to pick the right one.
 	 */
 	if (SDpnt->single_lun
-	    && q->current_request == NULL
+	    && list_empty(&q->queue_head)
 	    && SDpnt->device_busy == 0) {
 		request_queue_t *q;
 
@@ -850,18 +837,18 @@
 		}
 
 		/*
-		 * Loop through all of the requests in this queue, and find
-		 * one that is queueable.
-		 */
-		req = q->current_request;
-
-		/*
 		 * If we couldn't find a request that could be queued, then we
 		 * can also quit.
 		 */
-		if (!req) {
+		if (list_empty(&q->queue_head))
 			break;
-		}
+
+		/*
+		 * Loop through all of the requests in this queue, and find
+		 * one that is queueable.
+		 */
+		req = blkdev_entry_next_request(&q->queue_head);
+
 		/*
 		 * Find the actual device driver associated with this command.
 		 * The SPECIAL requests are things like character device or
@@ -922,8 +909,7 @@
 		 * reason to search the list, because all of the commands
 		 * in this queue are for the same device.
 		 */
-		q->current_request = req->next;
-		SCpnt->request.next = NULL;
+		blkdev_dequeue_request(req);
 
 		if (req != &SCpnt->request) {
 			memcpy(&SCpnt->request, req, sizeof(struct request));
@@ -932,7 +918,6 @@
 			 * We have copied the data out of the request block - it is now in
 			 * a field in SCpnt.  Release the request block.
 			 */
-			req->next = NULL;
 			req->rq_status = RQ_INACTIVE;
 			wake_up(&wait_for_request);
 		}
diff -urN 2.3.46pre1/drivers/scsi/scsi_merge.c 2.3.46pre1aa1/drivers/scsi/scsi_merge.c
--- 2.3.46pre1/drivers/scsi/scsi_merge.c	Fri Feb 11 00:05:35 2000
+++ 2.3.46pre1aa1/drivers/scsi/scsi_merge.c	Wed Feb 16 00:28:25 2000
@@ -343,6 +343,7 @@
 __inline static int __scsi_merge_fn(request_queue_t * q,
 				    struct request *req,
 				    struct buffer_head *bh,
+				    int max_segments,
 				    int use_clustering,
 				    int dma_host)
 {
@@ -357,6 +358,9 @@
 	count = bh->b_size >> 9;
 	sector = bh->b_rsector;
 
+	if (max_segments > 64)
+		max_segments = 64;
+
 	/*
 	 * We come in here in one of two cases.   The first is that we
 	 * are checking to see if we can add the buffer to the end of the
@@ -447,10 +451,11 @@
 	 * scsi.c allocates for this purpose
 	 * min(64,sg_tablesize) entries.
 	 */
-	if (req->nr_segments >= 64 &&
+	if (req->nr_segments >= max_segments &&
 	    req->nr_segments >= SHpnt->sg_tablesize)
 		return 0;
 	req->nr_segments++;
+	q->nr_segments++;
 	return 1;
       new_segment:
 	/*
@@ -459,20 +464,25 @@
 	 * check if things fit into sg_tablesize.
 	 */
 	if (req->nr_hw_segments >= SHpnt->sg_tablesize ||
-	    (req->nr_segments >= 64 &&
+	    (req->nr_segments >= max_segments &&
 	     req->nr_segments >= SHpnt->sg_tablesize))
 		return 0;
+	if (req->nr_segments >= max_segments)
+		return 0;
 	req->nr_hw_segments++;
 	req->nr_segments++;
+	q->nr_segments++;
 	return 1;
 #else
       new_segment:
-	if (req->nr_segments < SHpnt->sg_tablesize) {
+	if (req->nr_segments < SHpnt->sg_tablesize &&
+	    req->nr_segments < max_segments) {
 		/*
 		 * This will form the start of a new segment.  Bump the 
 		 * counter.
 		 */
 		req->nr_segments++;
+		q->nr_segments++;
 		return 1;
 	} else {
 		return 0;
@@ -500,11 +510,12 @@
 #define MERGEFCT(_FUNCTION, _CLUSTER, _DMA)		\
 static int _FUNCTION(request_queue_t * q,		\
 	       struct request * req,			\
-	       struct buffer_head * bh)			\
+	       struct buffer_head * bh,			\
+	       int max_segments)			\
 {							\
     int ret;						\
     SANITY_CHECK(req, _CLUSTER, _DMA);			\
-    ret =  __scsi_merge_fn(q, req, bh, _CLUSTER, _DMA); \
+    ret =  __scsi_merge_fn(q, req, bh, max_segments, _CLUSTER, _DMA); \
     return ret;						\
 }
 
@@ -550,6 +561,7 @@
 __inline static int __scsi_merge_requests_fn(request_queue_t * q,
 					     struct request *req,
 					     struct request *next,
+					     int max_segments,
 					     int use_clustering,
 					     int dma_host)
 {
@@ -559,11 +571,14 @@
 	SDpnt = (Scsi_Device *) q->queuedata;
 	SHpnt = SDpnt->host;
 
+	if (max_segments > 64)
+		max_segments = 64;
+
 #ifdef DMA_CHUNK_SIZE
 	/* If it would not fit into prepared memory space for sg chain,
 	 * then don't allow the merge.
 	 */
-	if (req->nr_segments + next->nr_segments - 1 > 64 &&
+	if (req->nr_segments + next->nr_segments - 1 > max_segments &&
 	    req->nr_segments + next->nr_segments - 1 > SHpnt->sg_tablesize) {
 		return 0;
 	}
@@ -619,6 +634,7 @@
 			 * This one is OK.  Let it go.
 			 */
 			req->nr_segments += next->nr_segments - 1;
+			q->nr_segments--;
 #ifdef DMA_CHUNK_SIZE
 			req->nr_hw_segments += next->nr_hw_segments - 1;
 #endif
@@ -627,7 +643,7 @@
 	}
       dont_combine:
 #ifdef DMA_CHUNK_SIZE
-	if (req->nr_segments + next->nr_segments > 64 &&
+	if (req->nr_segments + next->nr_segments > max_segments &&
 	    req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
 		return 0;
 	}
@@ -650,7 +666,8 @@
 	 * Make sure we can fix something that is the sum of the two.
 	 * A slightly stricter test than we had above.
 	 */
-	if (req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
+	if (req->nr_segments + next->nr_segments > max_segments &&
+	    req->nr_segments + next->nr_segments > SHpnt->sg_tablesize) {
 		return 0;
 	} else {
 		/*
@@ -683,11 +700,12 @@
 #define MERGEREQFCT(_FUNCTION, _CLUSTER, _DMA)		\
 static int _FUNCTION(request_queue_t * q,		\
 		     struct request * req,		\
-		     struct request * next)		\
+		     struct request * next,		\
+		     int max_segments)			\
 {							\
     int ret;						\
     SANITY_CHECK(req, _CLUSTER, _DMA);			\
-    ret =  __scsi_merge_requests_fn(q, req, next, _CLUSTER, _DMA); \
+    ret =  __scsi_merge_requests_fn(q, req, next, max_segments, _CLUSTER, _DMA); \
     return ret;						\
 }
 
diff -urN 2.3.46pre1/fs/buffer.c 2.3.46pre1aa1/fs/buffer.c
--- 2.3.46pre1/fs/buffer.c	Fri Feb 11 00:05:36 2000
+++ 2.3.46pre1aa1/fs/buffer.c	Wed Feb 16 00:28:25 2000
@@ -148,9 +148,9 @@
 	atomic_inc(&bh->b_count);
 	add_wait_queue(&bh->b_wait, &wait);
 repeat:
-	run_task_queue(&tq_disk);
 	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 	if (buffer_locked(bh)) {
+		run_task_queue(&tq_disk);
 		schedule();
 		goto repeat;
 	}
diff -urN 2.3.46pre1/fs/dcache.c 2.3.46pre1aa1/fs/dcache.c
--- 2.3.46pre1/fs/dcache.c	Sun Jan 30 15:43:39 2000
+++ 2.3.46pre1aa1/fs/dcache.c	Wed Feb 16 00:28:25 2000
@@ -57,6 +57,15 @@
 	int dummy[2];
 } dentry_stat = {0, 0, 45, 0,};
 
+struct {
+	/* Enlarging too much is not a good idea since a too large cache
+	   may generate too much collisions in the hash potentially
+	   slowing down the system. */
+	int limit_percent;
+} dcache_ctl = { 2, };
+int dcache_ctl_min[] = { 0, };
+int dcache_ctl_max[] = { 100, };
+
 static inline void d_free(struct dentry *dentry)
 {
 	if (dentry->d_op && dentry->d_op->d_release)
@@ -428,6 +437,20 @@
 	return 0;
 }
 
+static inline void preshrink_dcache_memory(void)
+{
+	unsigned long size, limit;
+
+	size = (dentry_stat.nr_unused * sizeof(struct dentry)) >> PAGE_SHIFT;
+	limit = num_physpages * dcache_ctl.limit_percent / 100;
+	if (size > limit)
+	{
+		lock_kernel();
+		prune_dcache(dentry_stat.nr_unused >> 2);
+		unlock_kernel();
+	}
+}
+
 #define NAME_ALLOC_LEN(len)	((len+16) & ~15)
 
 struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
@@ -435,6 +458,7 @@
 	char * str;
 	struct dentry *dentry;
 
+	preshrink_dcache_memory();
 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
 	if (!dentry)
 		return NULL;
diff -urN 2.3.46pre1/fs/exec.c 2.3.46pre1aa1/fs/exec.c
--- 2.3.46pre1/fs/exec.c	Fri Feb 11 00:05:36 2000
+++ 2.3.46pre1aa1/fs/exec.c	Wed Feb 16 00:28:25 2000
@@ -277,13 +277,13 @@
 	pmd = pmd_alloc(pgd, address);
 	if (!pmd) {
 		__free_page(page);
-		oom(tsk);
+		force_sig(SIGKILL, tsk);
 		return;
 	}
 	pte = pte_alloc(pmd, address);
 	if (!pte) {
 		__free_page(page);
-		oom(tsk);
+		force_sig(SIGKILL, tsk);
 		return;
 	}
 	if (!pte_none(*pte)) {
diff -urN 2.3.46pre1/fs/inode.c 2.3.46pre1aa1/fs/inode.c
--- 2.3.46pre1/fs/inode.c	Tue Feb 15 03:06:49 2000
+++ 2.3.46pre1aa1/fs/inode.c	Wed Feb 16 00:28:25 2000
@@ -70,10 +70,19 @@
 	int dummy[5];
 } inodes_stat = {0, 0,};
 
+struct {
+	/* Enlarging too much is not a good idea since a too large cache
+	   may generate too much collisions in the hash potentially
+	   slowing down the system. */
+	int limit_percent;
+} icache_ctl = { 2, };
+int icache_ctl_min[] = { 0, };
+int icache_ctl_max[] = { 100, };
+
 static kmem_cache_t * inode_cachep;
 
 #define alloc_inode() \
-	 ((struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
+	 (preshrink_icache_memory(), (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL))
 #define destroy_inode(inode) kmem_cache_free(inode_cachep, (inode))
 
 /*
@@ -411,6 +420,16 @@
 	kmem_cache_shrink(inode_cachep);
 
 	return 0;
+}
+
+static inline void preshrink_icache_memory(void)
+{
+	unsigned long size, limit;
+
+	size = (inodes_stat.nr_unused * sizeof(struct inode)) >> PAGE_SHIFT;
+	limit = num_physpages * icache_ctl.limit_percent / 100;
+	if (size > limit)
+		prune_icache(inodes_stat.nr_unused >> 2);
 }
 
 static inline void __iget(struct inode * inode)
diff -urN 2.3.46pre1/fs/partitions/check.c 2.3.46pre1aa1/fs/partitions/check.c
--- 2.3.46pre1/fs/partitions/check.c	Fri Feb 11 00:05:37 2000
+++ 2.3.46pre1aa1/fs/partitions/check.c	Wed Feb 16 00:28:24 2000
@@ -37,6 +37,11 @@
 extern void rd_load(void);
 extern void initrd_load(void);
 
+#if defined CONFIG_BLK_DEV_LVM || defined CONFIG_BLK_DEV_LVM_MODULE
+#include <linux/lvm.h>
+void ( *lvm_hd_name_ptr) ( char *, int) = NULL;
+#endif
+
 struct gendisk *gendisk_head;
 
 static int (*check_part[])(struct gendisk *hd, kdev_t dev, unsigned long first_sect, int first_minor) = {
@@ -88,6 +93,13 @@
 	 * This requires special handling here.
 	 */
 	switch (hd->major) {
+#if defined CONFIG_BLK_DEV_LVM || defined CONFIG_BLK_DEV_LVM_MODULE
+		case LVM_BLK_MAJOR:
+			*buf = 0;
+			if ( lvm_hd_name_ptr != NULL)
+				( lvm_hd_name_ptr) ( buf, minor);
+			return buf;
+#endif
 		case IDE9_MAJOR:
 			unit += 2;
 		case IDE8_MAJOR:
diff -urN 2.3.46pre1/include/asm-alpha/hardirq.h 2.3.46pre1aa1/include/asm-alpha/hardirq.h
--- 2.3.46pre1/include/asm-alpha/hardirq.h	Tue Feb 15 16:37:45 2000
+++ 2.3.46pre1aa1/include/asm-alpha/hardirq.h	Wed Feb 16 00:28:24 2000
@@ -8,8 +8,11 @@
 #ifndef __SMP__
 extern int __local_irq_count;
 #define local_irq_count(cpu)  ((void)(cpu), __local_irq_count)
+extern unsigned long __irq_attempt[];
+#define irq_attempt(cpu, irq)  ((void)(cpu), __irq_attempt[irq])
 #else
 #define local_irq_count(cpu)  (cpu_data[cpu].irq_count)
+#define irq_attempt(cpu, irq) (cpu_data[cpu].irq_attempt[irq])
 #endif
 
 /*
diff -urN 2.3.46pre1/include/asm-alpha/hw_irq.h 2.3.46pre1aa1/include/asm-alpha/hw_irq.h
--- 2.3.46pre1/include/asm-alpha/hw_irq.h	Tue Feb 15 03:16:53 2000
+++ 2.3.46pre1aa1/include/asm-alpha/hw_irq.h	Wed Feb 16 00:28:24 2000
@@ -18,21 +18,22 @@
 	outb(0, DMA1_CLR_MASK_REG);	\
 	outb(0, DMA2_CLR_MASK_REG)
 
-extern unsigned long _alpha_irq_masks[2];
-#define alpha_irq_mask _alpha_irq_masks[0]
-
 extern void common_ack_irq(unsigned long irq);
 extern void isa_device_interrupt(unsigned long vector, struct pt_regs * regs);
 extern void srm_device_interrupt(unsigned long vector, struct pt_regs * regs);
 
-extern void handle_irq(int irq, int ack, struct pt_regs * regs);
+extern void handle_irq(int irq, struct pt_regs * regs);
 
 #define RTC_IRQ    8
+#if 0 /* on Alpha we want to use only the RTC as timer for SMP issues */
 #ifdef CONFIG_RTC
 #define TIMER_IRQ  0			 /* timer is the pit */
 #else
 #define TIMER_IRQ  RTC_IRQ		 /* timer is the rtc */
 #endif
+#else
+#define TIMER_IRQ  RTC_IRQ		 /* timer is the rtc */
+#endif
 
 /*
  * PROBE_MASK is the bitset of irqs that we consider for autoprobing.
@@ -71,10 +72,11 @@
 #endif
 
 
-extern char _stext;
 static inline void alpha_do_profile (unsigned long pc)
 {
 	if (prof_buffer && current->pid) {
+		extern char _stext;
+
 		pc -= (unsigned long) &_stext;
 		pc >>= prof_shift;
 		/*
@@ -87,5 +89,10 @@
 		atomic_inc((atomic_t *)&prof_buffer[pc]);
 	}
 }
+
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {}
+extern void no_action(int cpl, void *dev_id, struct pt_regs *regs);
+extern void init_ISA_irqs(void);
+extern void init_RTC_irq(void);
 
 #endif
diff -urN 2.3.46pre1/include/asm-alpha/pgalloc.h 2.3.46pre1aa1/include/asm-alpha/pgalloc.h
--- 2.3.46pre1/include/asm-alpha/pgalloc.h	Tue Feb 15 03:15:06 2000
+++ 2.3.46pre1aa1/include/asm-alpha/pgalloc.h	Wed Feb 16 00:28:24 2000
@@ -3,13 +3,28 @@
 
 #include <linux/config.h>
 
-/* Caches aren't brain-dead on the Alpha. */
-#define flush_cache_all()			do { } while (0)
+/* The icache is not coherent with the dcache on alpha, thus before
+   running self modified code we must always run an imb().
+   Actually flush_cache_all() is real overkill as it's recalled from
+   vmalloc() before accessing pagetables and on the Alpha we are not required
+   to flush the icache before doing that, but the semantic of flush_cache_all()
+   requires us to flush _all_ the caches and so we must be correct here. It's
+   instead vmalloc that should be changed to use a more finegrined cache
+   flush operation (I suspect that also other archs doesn't need an icache
+   flush while handling pagetables). OTOH vmalloc is not a performance critical
+   path so after all we can live with it for now. */
+#define flush_cache_all()			flush_icache_range(0, 0)
 #define flush_cache_mm(mm)			do { } while (0)
 #define flush_cache_range(mm, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr)		do { } while (0)
 #define flush_page_to_ram(page)			do { } while (0)
-#define flush_icache_range(start, end)		do { } while (0)
+#ifndef __SMP__
+#define flush_icache_range(start, end)		imb()
+#else
+#define flush_icache_range(start, end)		smp_imb()
+extern void smp_imb(void);
+#endif
+#define flush_icache_page(vma,pg)		do { } while (0)
 
 /*
  * Use a few helper functions to hide the ugly broken ASN
diff -urN 2.3.46pre1/include/asm-alpha/smp.h 2.3.46pre1aa1/include/asm-alpha/smp.h
--- 2.3.46pre1/include/asm-alpha/smp.h	Tue Feb 15 03:15:06 2000
+++ 2.3.46pre1aa1/include/asm-alpha/smp.h	Wed Feb 16 00:28:24 2000
@@ -20,6 +20,7 @@
 #ifdef __SMP__
 
 #include <linux/threads.h>
+#include <asm/irq.h>
 
 struct cpuinfo_alpha {
 	unsigned long loops_per_sec;
@@ -28,6 +29,8 @@
 	unsigned long *pte_cache;
 	unsigned long pgtable_cache_sz;
 	unsigned long ipi_count;
+	unsigned long irq_attempt[NR_IRQS];
+	unsigned long smp_local_irq_count;
 	unsigned long prof_multiplier;
 	unsigned long prof_counter;
 	int irq_count, bh_count;
diff -urN 2.3.46pre1/include/linux/blk.h 2.3.46pre1aa1/include/linux/blk.h
--- 2.3.46pre1/include/linux/blk.h	Tue Feb 15 03:17:30 2000
+++ 2.3.46pre1aa1/include/linux/blk.h	Wed Feb 16 00:28:25 2000
@@ -96,6 +96,18 @@
  * code duplication in drivers.
  */
 
+extern inline void blkdev_dequeue_request(struct request * req)
+{
+	if (req->q)
+	{
+		if (req->cmd == READ)
+			req->q->elevator.read_pendings--;
+		req->q->nr_segments -= req->nr_segments;
+		req->q = NULL;
+	}
+	list_del(&req->queue);
+}
+
 int end_that_request_first(struct request *req, int uptodate, char *name);
 void end_that_request_last(struct request *req);
 
@@ -373,7 +385,10 @@
 #if !defined(IDE_DRIVER)
 
 #ifndef CURRENT
-#define CURRENT (blk_dev[MAJOR_NR].request_queue.current_request)
+#define CURRENT blkdev_entry_next_request(&blk_dev[MAJOR_NR].request_queue.queue_head)
+#endif
+#ifndef QUEUE_EMPTY
+#define QUEUE_EMPTY list_empty(&blk_dev[MAJOR_NR].request_queue.queue_head)
 #endif
 
 #ifndef DEVICE_NAME
@@ -418,7 +433,7 @@
 #endif
 
 #define INIT_REQUEST \
-	if (!CURRENT) {\
+	if (QUEUE_EMPTY) {\
 		CLEAR_INTR; \
 		return; \
 	} \
@@ -446,7 +461,7 @@
 	add_blkdev_randomness(MAJOR(req->rq_dev));
 #endif
 	DEVICE_OFF(req->rq_dev);
-	CURRENT = req->next;
+	blkdev_dequeue_request(req);
 	end_that_request_last(req);
 }
 
diff -urN 2.3.46pre1/include/linux/blkdev.h 2.3.46pre1aa1/include/linux/blkdev.h
--- 2.3.46pre1/include/linux/blkdev.h	Tue Feb 15 03:15:06 2000
+++ 2.3.46pre1aa1/include/linux/blkdev.h	Wed Feb 16 00:28:25 2000
@@ -5,6 +5,10 @@
 #include <linux/sched.h>
 #include <linux/genhd.h>
 #include <linux/tqueue.h>
+#include <linux/list.h>
+
+struct request_queue;
+typedef struct request_queue request_queue_t;
 
 /*
  * Ok, this is an expanded form so that we can use the same
@@ -13,6 +17,9 @@
  * for read/write completion.
  */
 struct request {
+	struct list_head queue;
+	int elevator_sequence;
+
 	volatile int rq_status;	/* should split this into a few status bits */
 #define RQ_INACTIVE		(-1)
 #define RQ_ACTIVE		1
@@ -33,25 +40,39 @@
 	struct semaphore * sem;
 	struct buffer_head * bh;
 	struct buffer_head * bhtail;
-	struct request * next;
+	request_queue_t * q;
 };
 
-typedef struct request_queue request_queue_t;
 typedef int (merge_request_fn) (request_queue_t *q, 
 				struct request  *req,
-				struct buffer_head *bh);
+				struct buffer_head *bh,
+				int);
 typedef int (merge_requests_fn) (request_queue_t *q, 
 				 struct request  *req,
-				 struct request  *req2);
+				 struct request  *req2,
+				 int);
 typedef void (request_fn_proc) (request_queue_t *q);
 typedef request_queue_t * (queue_proc) (kdev_t dev);
 typedef void (make_request_fn) (int rw, struct buffer_head *bh);
 typedef void (plug_device_fn) (request_queue_t *q, kdev_t device);
 typedef void (unplug_device_fn) (void *q);
 
+typedef struct elevator_s
+{
+	int sequence;
+	int read_latency;
+	int write_latency;
+	int max_bomb_segments;
+	int read_pendings;
+} elevator_t;
+
 struct request_queue
 {
-	struct request		* current_request;
+	struct list_head queue_head;
+	/* together with queue_head for cacheline sharing */
+	elevator_t elevator;
+	unsigned int nr_segments;
+
 	request_fn_proc		* request_fn;
 	merge_request_fn	* merge_fn;
 	merge_requests_fn	* merge_requests_fn;
@@ -108,6 +129,7 @@
 extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size);
 extern void generic_unplug_device(void * data);
 extern void generic_plug_device (request_queue_t *q, kdev_t dev);
+extern void plug_device_noop(request_queue_t *q, kdev_t dev);
 extern void generic_make_request(int rw, struct buffer_head * bh);
 extern request_queue_t * blk_get_queue(kdev_t dev);
 
@@ -141,5 +163,13 @@
 /* read-ahead in pages.. */
 #define MAX_READAHEAD	31
 #define MIN_READAHEAD	3
+
+#define ELEVATOR_DEFAULTS ((elevator_t) { 0, NR_REQUEST>>1, NR_REQUEST<<5, 4, 0, })
+
+#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queue)
+#define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next)
+#define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev)
+#define blkdev_next_request(req) blkdev_entry_to_request((req)->queue.next)
+#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queue.prev)
 
 #endif
diff -urN 2.3.46pre1/include/linux/irq.h 2.3.46pre1aa1/include/linux/irq.h
--- 2.3.46pre1/include/linux/irq.h	Tue Feb 15 03:17:05 2000
+++ 2.3.46pre1aa1/include/linux/irq.h	Wed Feb 16 00:28:24 2000
@@ -11,6 +11,7 @@
 #define IRQ_REPLAY	8	/* IRQ has been replayed but not acked yet */
 #define IRQ_AUTODETECT	16	/* IRQ is being autodetected */
 #define IRQ_WAITING	32	/* IRQ not yet seen - for autodetection */
+#define IRQ_LEVEL	64	/* IRQ level triggered */
 
 /*
  * Interrupt controller descriptor. This is all we need
diff -urN 2.3.46pre1/include/linux/lvm.h 2.3.46pre1aa1/include/linux/lvm.h
--- 2.3.46pre1/include/linux/lvm.h	Thu Jan  1 01:00:00 1970
+++ 2.3.46pre1aa1/include/linux/lvm.h	Wed Feb 16 00:28:24 2000
@@ -0,0 +1,827 @@
+/*
+ * kernel/lvm.h
+ *
+ * Copyright (C) 1997 - 2000  Heinz Mauelshagen, Germany
+ *
+ * February-November 1997
+ * May-July 1998
+ * January-March,July,September,October,Dezember 1999
+ * January 2000
+ *
+ * lvm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * lvm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA. 
+ *
+ */
+
+/*
+ * Changelog
+ *
+ *    10/10/1997 - beginning of new structure creation
+ *    12/05/1998 - incorporated structures from lvm_v1.h and deleted lvm_v1.h
+ *    07/06/1998 - avoided LVM_KMALLOC_MAX define by using vmalloc/vfree
+ *                 instead of kmalloc/kfree
+ *    01/07/1998 - fixed wrong LVM_MAX_SIZE
+ *    07/07/1998 - extended pe_t structure by ios member (for statistic)
+ *    02/08/1998 - changes for official char/block major numbers
+ *    07/08/1998 - avoided init_module() and cleanup_module() to be static
+ *    29/08/1998 - seprated core and disk structure type definitions
+ *    01/09/1998 - merged kernel integration version (mike)
+ *    20/01/1999 - added LVM_PE_DISK_OFFSET macro for use in
+ *                 vg_read_with_pv_and_lv(), pv_move_pe(), pv_show_pe_text()...
+ *    18/02/1999 - added definition of time_disk_t structure for;
+ *                 keeps time stamps on disk for nonatomic writes (future)
+ *    15/03/1999 - corrected LV() and VG() macro definition to use argument
+ *                 instead of minor
+ *    03/07/1999 - define for genhd.c name handling
+ *    23/07/1999 - implemented snapshot part
+ *    08/12/1999 - changed LVM_LV_SIZE_MAX macro to reflect current 1TB limit
+ *    01/01/2000 - extended lv_v2 core structure by wait_queue member
+ *
+ */
+
+
+#ifndef _LVM_H_INCLUDE
+#define _LVM_H_INCLUDE
+
+#define	_LVM_H_VERSION	"LVM 0.8 (1/1/2000)"
+
+/*
+ * preprocessor definitions
+ */
+/* if you like emergency reset code in the driver */
+#define	LVM_TOTAL_RESET
+
+#define LVM_GET_INODE
+#define	LVM_HD_NAME
+
+/* lots of debugging output (see driver source)
+#define DEBUG_LVM_GET_INFO
+#define DEBUG
+#define DEBUG_MAP
+#define DEBUG_MAP_SIZE
+#define DEBUG_IOCTL
+#define DEBUG_READ
+#define DEBUG_GENDISK
+#define DEBUG_VG_CREATE
+#define DEBUG_LVM_BLK_OPEN
+#define DEBUG_VFREE
+#define DEBUG_SNAPSHOT
+*/
+/*
+ * end of preprocessor definitions
+ */
+
+#ifndef LINUX_VERSION_CODE
+#  include <linux/version.h>
+   /* for 2.0.x series */
+#  ifndef KERNEL_VERSION
+#    define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
+#  endif
+#endif
+
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/iobuf.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION ( 2, 3 ,0)
+#  include <linux/spinlock.h>
+#else
+#  include <asm/spinlock.h>
+#endif
+
+/* leave this for now until major.h is updated (mike) */
+#ifndef	LVM_BLK_MAJOR
+#  define	LVM_BLK_MAJOR	58
+#endif
+#ifndef	LVM_CHAR_MAJOR
+#  define	LVM_CHAR_MAJOR	109
+#endif
+
+#if !defined ( LVM_BLK_MAJOR) || !defined ( LVM_CHAR_MAJOR)
+   #error Bad include/linux/major.h - LVM MAJOR undefined
+#endif
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION ( 2, 1 ,0)
+#  ifndef	uint8_t
+#    define uint8_t __u8
+#  endif
+#  ifndef	uint16_t
+#    define uint16_t __u16
+#  endif
+#  ifndef	uint32_t
+#    define uint32_t __u32
+#  endif
+#  ifndef	uint64_t
+#    define uint64_t __u64
+#  endif
+#endif
+
+#define LVM_STRUCT_VERSION	1  /* structure version */
+
+#ifndef	min
+#define min(a,b) (((a)<(b))?(a):(b))
+#endif
+#ifndef	max
+#define max(a,b) (((a)>(b))?(a):(b))
+#endif
+
+/* set the default structure version */
+#if ( LVM_STRUCT_VERSION == 1)
+#  define pv_t pv_v1_t
+#  define lv_t lv_v2_t
+#  define vg_t vg_v1_t
+#  define pv_disk_t pv_disk_v1_t
+#  define lv_disk_t lv_disk_v1_t
+#  define vg_disk_t vg_disk_v1_t
+#  define lv_exception_t lv_v2_exception_t
+#endif
+
+
+/*
+ * i/o protocoll version
+ *
+ * defined here for the driver and defined seperate in the
+ * user land LVM parts
+ *
+ */
+#define	LVM_DRIVER_IOP_VERSION	        6
+
+#define LVM_NAME        "lvm"
+
+/*
+ * VG/LV indexing macros
+ */
+/* character minor maps directly to volume group */
+#define	VG_CHR(a) ( a)
+
+/* block minor indexes into a volume group/logical volume indirection table */
+#define	VG_BLK(a)	( vg_lv_map[a].vg_number)
+#define LV_BLK(a)	( vg_lv_map[a].lv_number)
+
+/*
+ * absolute limits for VGs, PVs per VG and LVs per VG
+ */
+#define ABS_MAX_VG	99
+#define ABS_MAX_PV	256
+#define ABS_MAX_LV	256 /* caused by 8 bit minor */
+
+#define MAX_VG  ABS_MAX_VG
+#define MAX_LV	ABS_MAX_LV
+#define	MAX_PV	ABS_MAX_PV
+
+#if ( MAX_VG > ABS_MAX_VG)
+#   undef MAX_VG
+#   define MAX_VG ABS_MAX_VG
+#endif
+
+#if ( MAX_LV > ABS_MAX_LV)
+#   undef MAX_LV
+#   define MAX_LV ABS_MAX_LV
+#endif
+
+
+/*
+ * VGDA: default disk spaces and offsets
+ *
+ *   there's space after the structures for later extensions.
+ *
+ *   offset            what                                size
+ *   ---------------   ----------------------------------  ------------
+ *   0                 physical volume structure           ~500 byte
+ *
+ *   1K                volume group structure              ~200 byte
+ *
+ *   5K                time stamp structure                ~
+ *
+ *   6K                namelist of physical volumes        128 byte each
+ *
+ *   6k + n * 128byte  n logical volume structures         ~300 byte each
+ *
+ *   + m * 328byte     m physical extent alloc. structs    4 byte each
+ *
+ *   End of disk -     first physical extent               typical 4 megabyte
+ *   PE total *
+ *   PE size
+ *
+ *
+ */
+
+/* DONT TOUCH THESE !!! */
+/* base of PV structure in disk partition */
+#define	LVM_PV_DISK_BASE  	0L
+
+/* size reserved for PV structure on disk */
+#define	LVM_PV_DISK_SIZE  	1024L
+
+/* base of VG structure in disk partition */
+#define	LVM_VG_DISK_BASE  	LVM_PV_DISK_SIZE
+
+/* size reserved for VG structure */
+#define	LVM_VG_DISK_SIZE  	( 9 * 512L)
+
+/* size reserved for timekeeping */
+#define	LVM_TIMESTAMP_DISK_BASE	( LVM_VG_DISK_BASE +  LVM_VG_DISK_SIZE)
+#define	LVM_TIMESTAMP_DISK_SIZE	512L	/* reserved for timekeeping */
+
+/* name list of physical volumes on disk */
+#define	LVM_PV_NAMELIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \
+                                    LVM_TIMESTAMP_DISK_SIZE)
+
+/* now for the dynamically calculated parts of the VGDA */
+#define	LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + sizeof ( lv_t) * b)
+#define	LVM_DISK_SIZE(pv) 	 ( (pv)->pe_on_disk.base + \
+                                   (pv)->pe_on_disk.size)
+#define	LVM_PE_DISK_OFFSET(pe, pv)	( pe * pv->pe_size + \
+					  ( LVM_DISK_SIZE ( pv) / SECTOR_SIZE))
+#define	LVM_PE_ON_DISK_BASE(pv) \
+   { int rest; \
+     pv->pe_on_disk.base = pv->lv_on_disk.base + pv->lv_on_disk.size; \
+     if ( ( rest = pv->pe_on_disk.base % SECTOR_SIZE) != 0) \
+        pv->pe_on_disk.base += ( SECTOR_SIZE - rest); \
+   }
+/* END default disk spaces and offsets for PVs */
+
+
+/*
+ * LVM_PE_T_MAX corresponds to:
+ *
+ * 8KB PE size can map a ~512 MB logical volume at the cost of 1MB memory,
+ *
+ * 128MB PE size can map a 8TB logical volume at the same cost of memory.
+ *
+ * Default PE size of 4 MB gives a maximum logical volume size of 256 GB.
+ *
+ * Maximum PE size of 16GB gives a maximum logical volume size of 1024 TB.
+ *
+ * AFAIK, the actual kernels limit this to 1 TB.
+ *
+ * Should be a sufficient spectrum ;*)
+ */
+
+/* This is the usable size of disk_pe_t.le_num !!!        v     v */
+#define	LVM_PE_T_MAX		( ( 1 << ( sizeof ( uint16_t) * 8)) - 2)
+
+#define	LVM_LV_SIZE_MAX(a)	( ( long long) LVM_PE_T_MAX * (a)->pe_size > ( long long) 2*1024*1024*1024 ? ( long long) 2*1024*1024*1024 : ( long long) LVM_PE_T_MAX * (a)->pe_size)
+#define	LVM_MIN_PE_SIZE		( 8L * 2)           /* 8 KB in sectors */
+#define	LVM_MAX_PE_SIZE		( 16L * 1024L * 1024L * 2) /* 16GB in sectors */
+#define	LVM_DEFAULT_PE_SIZE	( 4096L * 2)        /* 4 MB in sectors */
+#define	LVM_DEFAULT_STRIPE_SIZE	16L                 /* 16 KB  */
+#define	LVM_MIN_STRIPE_SIZE	2L                  /* 1 KB in sectors */
+#define	LVM_MAX_STRIPE_SIZE	( 512L * 2)         /* 512 KB in sectors */
+#define	LVM_MAX_STRIPES		128		    /* max # of stripes */
+#define	LVM_MAX_SIZE            ( 1024LU * 1024 * 1024 * 2) /* 1TB[sectors] */
+#define	LVM_MAX_MIRRORS    	2		/* future use */
+#define	LVM_MIN_READ_AHEAD	0	/* minimum read ahead sectors */
+#define	LVM_MAX_READ_AHEAD	256	/* maximum read ahead sectors */
+#define	LVM_DEF_READ_AHEAD	((LVM_MAX_READ_AHEAD-LVM_MIN_READ_AHEAD)/2 + LVM_MIN_READ_AHEAD)
+#define	LVM_MAX_LV_IO_TIMEOUT	60	/* seconds I/O timeout (future use) */
+#define	LVM_PARTITION           0xfe	/* LVM partition id */
+#define	LVM_NEW_PARTITION       0x8e	/* new LVM partition id (10/09/1999) */
+#define	LVM_PE_SIZE_PV_SIZE_REL	5 /* max relation PV size and PE size */
+
+#define	LVM_SNAPSHOT_MAX_CHUNK	256	/* 256 KB */
+#define	LVM_SNAPSHOT_DEF_CHUNK	64	/* 64  KB */
+#define	LVM_SNAPSHOT_MIN_CHUNK	1	/* 1   KB */
+
+#define	UNDEF	-1
+#define FALSE	0
+#define TRUE	1
+
+
+/*
+ * ioctls
+ */
+/* volume group */
+#define	VG_CREATE               _IOW ( 0xfe, 0x00, 1)
+#define	VG_REMOVE               _IOW ( 0xfe, 0x01, 1)
+
+#define	VG_EXTEND               _IOW ( 0xfe, 0x03, 1)
+#define	VG_REDUCE               _IOW ( 0xfe, 0x04, 1)
+
+#define	VG_STATUS               _IOWR ( 0xfe, 0x05, 1)
+#define	VG_STATUS_GET_COUNT     _IOWR ( 0xfe, 0x06, 1)
+#define	VG_STATUS_GET_NAMELIST  _IOWR ( 0xfe, 0x07, 1)
+
+#define	VG_SET_EXTENDABLE       _IOW ( 0xfe, 0x08, 1)
+
+
+/* logical volume */
+#define	LV_CREATE               _IOW ( 0xfe, 0x20, 1)
+#define	LV_REMOVE               _IOW ( 0xfe, 0x21, 1)
+
+#define	LV_ACTIVATE             _IO ( 0xfe, 0x22)
+#define	LV_DEACTIVATE           _IO ( 0xfe, 0x23)
+
+#define	LV_EXTEND               _IOW ( 0xfe, 0x24, 1)
+#define	LV_REDUCE               _IOW ( 0xfe, 0x25, 1)
+
+#define	LV_STATUS_BYNAME        _IOWR ( 0xfe, 0x26, 1)
+#define	LV_STATUS_BYINDEX       _IOWR ( 0xfe, 0x27, 1)
+
+#define LV_SET_ACCESS           _IOW ( 0xfe, 0x28, 1)
+#define LV_SET_ALLOCATION       _IOW ( 0xfe, 0x29, 1)
+#define LV_SET_STATUS           _IOW ( 0xfe, 0x2a, 1)
+
+#define LE_REMAP                _IOW ( 0xfe, 0x2b, 1)
+
+
+/* physical volume */
+#define	PV_STATUS               _IOWR ( 0xfe, 0x40, 1)
+#define	PV_CHANGE               _IOWR ( 0xfe, 0x41, 1)
+#define	PV_FLUSH                _IOW ( 0xfe, 0x42, 1)
+
+/* physical extent */
+#define	PE_LOCK_UNLOCK          _IOW ( 0xfe, 0x50, 1)
+
+/* i/o protocol version */
+#define	LVM_GET_IOP_VERSION     _IOR ( 0xfe, 0x98, 1)
+
+#ifdef LVM_TOTAL_RESET
+/* special reset function for testing purposes */
+#define	LVM_RESET               _IO ( 0xfe, 0x99)
+#endif
+
+/* lock the logical volume manager */
+#define	LVM_LOCK_LVM            _IO ( 0xfe, 0x100)
+/* END ioctls */
+
+
+/*
+ * Status flags
+ */
+/* volume group */
+#define	VG_ACTIVE            0x01 /* vg_status */
+#define	VG_EXPORTED          0x02 /*     "     */
+#define	VG_EXTENDABLE        0x04 /*     "     */
+
+#define	VG_READ              0x01 /* vg_access */
+#define	VG_WRITE             0x02 /*     "     */
+
+/* logical volume */
+#define	LV_ACTIVE            0x01 /* lv_status */
+#define	LV_SPINDOWN          0x02 /*     "     */
+
+#define	LV_READ              0x01 /* lv_access */
+#define	LV_WRITE             0x02 /*     "     */
+#define	LV_SNAPSHOT          0x04 /*     "     */
+#define	LV_SNAPSHOT_ORG      0x08 /*     "     */
+
+#define	LV_BADBLOCK_ON       0x01 /* lv_badblock */
+
+#define	LV_STRICT            0x01 /* lv_allocation */
+#define	LV_CONTIGUOUS        0x02 /*       "       */
+
+/* physical volume */
+#define	PV_ACTIVE            0x01 /* pv_status */
+#define	PV_ALLOCATABLE       0x02 /* pv_allocatable */
+
+
+/*
+ * Structure definitions core/disk follow
+ *
+ * conditional conversion takes place on big endian architectures
+ * in functions * pv_copy_*(), vg_copy_*() and lv_copy_*()
+ *
+ */
+
+#define	NAME_LEN		128 /* don't change!!! */
+#define	UUID_LEN		16  /* don't change!!! */
+
+/* remap physical sector/rdev pairs */
+typedef struct {
+   struct list_head hash;
+   ulong  rsector_org;
+   kdev_t rdev_org;
+   ulong  rsector_new;
+   kdev_t rdev_new;
+} lv_block_exception_t;
+
+
+/* disk stored pe information */
+typedef struct {
+   uint16_t lv_num;
+   uint16_t le_num;
+} disk_pe_t;
+
+/* disk stored PV, VG, LV and PE size and offset information */
+typedef struct {
+   uint32_t base;
+   uint32_t size;
+} lvm_disk_data_t;
+
+
+/*
+ * Structure Physical Volume (PV) Version 1
+ */
+
+/* core */
+typedef struct {
+   uint8_t          id[2];                 /* Identifier */
+   uint16_t         version;               /* HM lvm version */
+   lvm_disk_data_t  pv_on_disk;
+   lvm_disk_data_t  vg_on_disk;
+   lvm_disk_data_t  pv_namelist_on_disk;
+   lvm_disk_data_t  lv_on_disk;
+   lvm_disk_data_t  pe_on_disk;
+   uint8_t          pv_name[NAME_LEN];
+   uint8_t          vg_name[NAME_LEN];
+   uint8_t          system_id[NAME_LEN]; /* for vgexport/vgimport */
+   kdev_t           pv_dev;
+   uint32_t         pv_number;
+   uint32_t         pv_status;
+   uint32_t         pv_allocatable;
+   uint32_t         pv_size;		/* HM */
+   uint32_t         lv_cur;
+   uint32_t         pe_size;
+   uint32_t         pe_total;
+   uint32_t         pe_allocated;
+   uint32_t         pe_stale;     /* for future use */
+   disk_pe_t        *pe;          /* HM */
+   struct inode     *inode;       /* HM */
+} pv_v1_t;
+
+/* disk */
+typedef struct {
+   uint8_t          id[2];                 /* Identifier */
+   uint16_t         version;               /* HM lvm version */
+   lvm_disk_data_t  pv_on_disk;
+   lvm_disk_data_t  vg_on_disk;
+   lvm_disk_data_t  pv_namelist_on_disk;
+   lvm_disk_data_t  lv_on_disk;
+   lvm_disk_data_t  pe_on_disk;
+   uint8_t          pv_name[NAME_LEN];
+   uint8_t          vg_name[NAME_LEN];
+   uint8_t          system_id[NAME_LEN]; /* for vgexport/vgimport */
+   uint32_t         pv_major;
+   uint32_t         pv_number;
+   uint32_t         pv_status;
+   uint32_t         pv_allocatable;
+   uint32_t         pv_size;		/* HM */
+   uint32_t         lv_cur;
+   uint32_t         pe_size;
+   uint32_t         pe_total;
+   uint32_t         pe_allocated;
+   uint32_t	    dummy1;
+   uint32_t	    dummy2;
+   uint32_t	    dummy3;
+} pv_disk_v1_t;
+
+
+/*
+ * Structure Physical Volume (PV) Version 2 (future!)
+ */
+
+typedef struct {
+   uint8_t          id[2];                 /* Identifier */
+   uint16_t         version;               /* HM lvm version */
+   lvm_disk_data_t  pv_on_disk;
+   lvm_disk_data_t  vg_on_disk;
+   lvm_disk_data_t  pv_uuid_on_disk;
+   lvm_disk_data_t  lv_on_disk;
+   lvm_disk_data_t  pe_on_disk;
+   uint8_t          pv_name[NAME_LEN];
+   uint8_t          vg_name[NAME_LEN];
+   uint8_t          system_id[NAME_LEN]; /* for vgexport/vgimport */
+   kdev_t           pv_dev;
+   uint32_t         pv_number;
+   uint32_t         pv_status;
+   uint32_t         pv_allocatable;
+   uint32_t         pv_size;		/* HM */
+   uint32_t         lv_cur;
+   uint32_t         pe_size;
+   uint32_t         pe_total;
+   uint32_t         pe_allocated;
+   uint32_t         pe_stale;     /* for future use */
+   disk_pe_t        *pe;          /* HM */
+   struct inode     *inode;       /* HM */
+   /* delta to version 1 starts here */
+   uint8_t          pv_uuid[UUID_LEN];
+   uint32_t         pv_atime;     /* PV access time */
+   uint32_t         pv_ctime;     /* PV creation time */
+   uint32_t         pv_mtime;     /* PV modification time */
+} pv_v2_t;
+
+
+/*
+ * Structures for Logical Volume (LV)
+ */
+
+/* core PE information */
+typedef struct {
+   kdev_t   dev;
+   uint32_t pe;		/* to be changed if > 2TB */
+   uint32_t reads;
+   uint32_t writes;
+} pe_t;
+
+typedef struct {
+   uint8_t  lv_name[NAME_LEN];
+   kdev_t   old_dev;
+   kdev_t   new_dev;
+   ulong    old_pe;
+   ulong    new_pe;
+} le_remap_req_t;
+
+
+
+/*
+ * Structure Logical Volume (LV) Version 1
+ */
+
+/* core */
+typedef struct {
+   uint8_t        lv_name[NAME_LEN];
+   uint8_t        vg_name[NAME_LEN];
+   uint32_t       lv_access;
+   uint32_t       lv_status;
+   uint32_t       lv_open;		/* HM */
+   kdev_t         lv_dev;		/* HM */
+   uint32_t       lv_number;		/* HM */
+   uint32_t       lv_mirror_copies;	/* for future use */
+   uint32_t       lv_recovery;          /*       "        */
+   uint32_t       lv_schedule;		/*       "        */
+   uint32_t       lv_size;
+   pe_t           *lv_current_pe;	/* HM */
+   uint32_t       lv_current_le;	/* for future use */
+   uint32_t       lv_allocated_le;
+   uint32_t       lv_stripes;
+   uint32_t       lv_stripesize;
+   uint32_t       lv_badblock;		/* for future use */
+   uint32_t       lv_allocation;
+   uint32_t       lv_io_timeout;	/* for future use */
+   uint32_t       lv_read_ahead;
+} lv_v1_t;
+
+/* disk */
+typedef struct {
+   uint8_t        lv_name[NAME_LEN];
+   uint8_t        vg_name[NAME_LEN];
+   uint32_t       lv_access;
+   uint32_t       lv_status;
+   uint32_t       lv_open;		/* HM */
+   uint32_t       lv_dev;		/* HM */
+   uint32_t       lv_number;		/* HM */
+   uint32_t       lv_mirror_copies;	/* for future use */
+   uint32_t       lv_recovery;          /*       "        */
+   uint32_t       lv_schedule;		/*       "        */
+   uint32_t       lv_size;
+   uint32_t       dummy;
+   uint32_t       lv_current_le;	/* for future use */
+   uint32_t       lv_allocated_le;
+   uint32_t       lv_stripes;
+   uint32_t       lv_stripesize;
+   uint32_t       lv_badblock;		/* for future use */
+   uint32_t       lv_allocation;
+   uint32_t       lv_io_timeout;	/* for future use */
+   uint32_t       lv_read_ahead;	/* HM, for future use */
+} lv_disk_v1_t;
+
+
+/*
+ * Structure Logical Volume (LV) Version 2
+ */
+
+/* core */
+typedef struct lv_v2 {
+   uint8_t        lv_name[NAME_LEN];
+   uint8_t        vg_name[NAME_LEN];
+   uint32_t       lv_access;
+   uint32_t       lv_status;
+   uint32_t       lv_open;		/* HM */
+   kdev_t         lv_dev;		/* HM */
+   uint32_t       lv_number;		/* HM */
+   uint32_t       lv_mirror_copies;	/* for future use */
+   uint32_t       lv_recovery;          /*       "        */
+   uint32_t       lv_schedule;		/*       "        */
+   uint32_t       lv_size;
+   pe_t           *lv_current_pe;	/* HM */
+   uint32_t       lv_current_le;	/* for future use */
+   uint32_t       lv_allocated_le;
+   uint32_t       lv_stripes;
+   uint32_t       lv_stripesize;
+   uint32_t       lv_badblock;		/* for future use */
+   uint32_t       lv_allocation;
+   uint32_t       lv_io_timeout;	/* for future use */
+   uint32_t       lv_read_ahead;
+   /* delta to version 1 starts here */
+   struct lv_v2   *lv_snapshot_org;
+   struct lv_v2   *lv_snapshot_prev;
+   struct lv_v2   *lv_snapshot_next;
+   lv_block_exception_t *lv_block_exception;
+   uint8_t __unused2;
+   uint32_t       lv_remap_ptr;
+   uint32_t       lv_remap_end;
+   uint32_t       lv_chunk_size;
+   uint32_t       lv_snapshot_minor;
+   struct kiobuf * lv_iobuf;
+   struct semaphore lv_snapshot_sem;
+   struct list_head * lv_snapshot_hash_table;
+   unsigned long lv_snapshot_hash_mask;
+} lv_v2_t;
+
+/* disk */
+typedef struct {
+   uint8_t        lv_name[NAME_LEN];
+   uint8_t        vg_name[NAME_LEN];
+   uint32_t       lv_access;
+   uint32_t       lv_status;
+   uint32_t       lv_open;		/* HM */
+   uint32_t       lv_dev;		/* HM */
+   uint32_t       lv_number;		/* HM */
+   uint32_t       lv_mirror_copies;	/* for future use */
+   uint32_t       lv_recovery;          /*       "        */
+   uint32_t       lv_schedule;		/*       "        */
+   uint32_t       lv_size;
+   uint32_t       dummy;
+   uint32_t       lv_current_le;	/* for future use */
+   uint32_t       lv_allocated_le;
+   uint32_t       lv_stripes;
+   uint32_t       lv_stripesize;
+   uint32_t       lv_badblock;		/* for future use */
+   uint32_t       lv_allocation;
+   uint32_t       lv_io_timeout;	/* for future use */
+   uint32_t       lv_read_ahead;	/* HM, for future use */
+} lv_disk_v2_t;
+
+
+/*
+ * Structure Volume Group (VG) Version 1
+ */
+
+typedef struct {
+   uint8_t        vg_name[NAME_LEN];     /* volume group name */
+   uint32_t       vg_number;             /* volume group number */
+   uint32_t       vg_access;             /* read/write */
+   uint32_t       vg_status;             /* active or not */
+   uint32_t       lv_max;                /* maximum logical volumes */
+   uint32_t       lv_cur;                /* current logical volumes */
+   uint32_t       lv_open;               /* open    logical volumes */
+   uint32_t       pv_max;        /* maximum physical volumes */
+   uint32_t       pv_cur;	 /* current physical volumes FU */
+   uint32_t       pv_act;        /* active physical volumes */
+   uint32_t       dummy;         /* was obsolete max_pe_per_pv */
+   uint32_t       vgda;		 /* volume group descriptor arrays FU */
+   uint32_t       pe_size;       /* physical extent size in sectors */
+   uint32_t       pe_total;      /* total of physical extents */
+   uint32_t       pe_allocated;  /* allocated physical extents */
+   uint32_t       pvg_total;	 /* physical volume groups FU */
+   struct proc_dir_entry *proc;
+   pv_t           *pv[ABS_MAX_PV+1]; /* physical volume struct pointers */
+   lv_t           *lv[ABS_MAX_LV+1]; /* logical  volume struct pointers */
+} vg_v1_t;
+
+typedef struct {
+   uint8_t        vg_name[NAME_LEN];     /* volume group name */
+   uint32_t       vg_number;             /* volume group number */
+   uint32_t       vg_access;             /* read/write */
+   uint32_t       vg_status;             /* active or not */
+   uint32_t       lv_max;                /* maximum logical volumes */
+   uint32_t       lv_cur;                /* current logical volumes */
+   uint32_t       lv_open;               /* open    logical volumes */
+   uint32_t       pv_max;        /* maximum physical volumes */
+   uint32_t       pv_cur;	 /* current physical volumes FU */
+   uint32_t       pv_act;        /* active physical volumes */
+   uint32_t       dummy;
+   uint32_t       vgda;		 /* volume group descriptor arrays FU */
+   uint32_t       pe_size;       /* physical extent size in sectors */
+   uint32_t       pe_total;      /* total of physical extents */
+   uint32_t       pe_allocated;  /* allocated physical extents */
+   uint32_t       pvg_total;	 /* physical volume groups FU */
+} vg_disk_v1_t;
+
+/*
+ * Structure Volume Group (VG) Version 2
+ */
+
+typedef struct {
+   uint8_t        vg_name[NAME_LEN];     /* volume group name */
+   uint32_t       vg_number;             /* volume group number */
+   uint32_t       vg_access;             /* read/write */
+   uint32_t       vg_status;             /* active or not */
+   uint32_t       lv_max;                /* maximum logical volumes */
+   uint32_t       lv_cur;                /* current logical volumes */
+   uint32_t       lv_open;               /* open    logical volumes */
+   uint32_t       pv_max;                /* maximum physical volumes */
+   uint32_t       pv_cur;		 /* current physical volumes FU */
+   uint32_t       pv_act;                /* future: active physical volumes */
+   uint32_t       max_pe_per_pv;         /* OBSOLETE maximum PE/PV */
+   uint32_t       vgda;			 /* volume group descriptor arrays FU */
+   uint32_t       pe_size;               /* physical extent size in sectors */
+   uint32_t       pe_total;              /* total of physical extents */
+   uint32_t       pe_allocated;          /* allocated physical extents */
+   uint32_t       pvg_total;		 /* physical volume groups FU */
+   struct proc_dir_entry *proc;
+   pv_t           *pv[ABS_MAX_PV+1]; /* physical volume struct pointers */
+   lv_t           *lv[ABS_MAX_LV+1]; /* logical  volume struct pointers */
+   /* delta to version 1 starts here */
+   uint8_t        vg_uuid[UUID_LEN];     /*  volume group UUID */
+   time_t         vg_atime;              /* VG access time */
+   time_t         vg_ctime;              /* VG creation time */
+   time_t         vg_mtime;              /* VG modification time */
+} vg_v2_t;
+
+
+/*
+ * Timekeeping structure on disk (0.7 feature)
+ *
+ * Holds several timestamps for start/stop time of non
+ * atomic VGDA disk i/o operations
+ *
+ */
+
+typedef struct {
+   uint32_t	seconds; /* seconds since the epoch */
+   uint32_t	jiffies; /* micro timer */
+} lvm_time_t;
+
+#define	TIMESTAMP_ID_SIZE	2
+typedef struct {
+   uint8_t    id[TIMESTAMP_ID_SIZE]; /* Identifier */
+   lvm_time_t pv_vg_lv_pe_io_begin;
+   lvm_time_t pv_vg_lv_pe_io_end;
+   lvm_time_t pv_io_begin;
+   lvm_time_t pv_io_end;
+   lvm_time_t vg_io_begin;
+   lvm_time_t vg_io_end;
+   lvm_time_t lv_io_begin;
+   lvm_time_t lv_io_end;
+   lvm_time_t pe_io_begin;
+   lvm_time_t pe_io_end;
+   lvm_time_t pe_move_io_begin;
+   lvm_time_t pe_move_io_end;
+   uint8_t    dummy[LVM_TIMESTAMP_DISK_SIZE - 
+                    TIMESTAMP_ID_SIZE -
+                    12 * sizeof(lvm_time_t)];
+      /* ATTENTION  ^^ */
+} timestamp_disk_t;
+
+/* same on disk and in core so far */
+typedef timestamp_disk_t timestamp_t;
+
+/* function identifiers for timestamp actions */
+typedef enum { PV_VG_LV_PE_IO_BEGIN,
+               PV_VG_LV_PE_IO_END,
+               PV_IO_BEGIN,
+               PV_IO_END,
+               VG_IO_BEGIN,
+               VG_IO_END,
+               LV_IO_BEGIN,
+               LV_IO_END,
+               PE_IO_BEGIN,
+               PE_IO_END,
+               PE_MOVE_IO_BEGIN,
+               PE_MOVE_IO_END} ts_fct_id_t;
+
+
+/*
+ * Request structures for ioctls
+ */
+
+/* Request structure PV_STATUS */
+typedef struct {
+   char pv_name[NAME_LEN];
+   pv_t *pv;
+} pv_status_req_t, pv_change_req_t;
+
+/* Request structure PV_FLUSH */
+typedef struct {
+   char pv_name[NAME_LEN];
+} pv_flush_req_t;
+
+
+/* Request structure PE_MOVE */
+typedef struct {
+   enum { LOCK_PE, UNLOCK_PE} lock;
+   struct {
+      kdev_t lv_dev;
+      kdev_t pv_dev;
+      uint32_t      pv_offset;
+   } data;
+} pe_lock_req_t;
+
+
+/* Request structure LV_STATUS_BYNAME */
+typedef struct {
+   char lv_name[NAME_LEN];
+   lv_t *lv;
+} lv_status_byname_req_t, lv_req_t;
+
+/* Request structure LV_STATUS_BYINDEX */
+typedef struct {
+   ulong lv_index;
+   lv_t *lv;
+} lv_status_byindex_req_t;
+
+#endif /* #ifndef _LVM_H_INCLUDE */
diff -urN 2.3.46pre1/include/linux/major.h 2.3.46pre1aa1/include/linux/major.h
--- 2.3.46pre1/include/linux/major.h	Sun Jan 30 15:43:30 2000
+++ 2.3.46pre1aa1/include/linux/major.h	Wed Feb 16 00:28:24 2000
@@ -92,8 +92,6 @@
 #define SCSI_DISK7_MAJOR	71
 
 
-#define LVM_BLK_MAJOR	58	/* Logical Volume Manager */
-
 #define COMPAQ_SMART2_MAJOR	72
 #define COMPAQ_SMART2_MAJOR1	73
 #define COMPAQ_SMART2_MAJOR2	74
diff -urN 2.3.46pre1/include/linux/mm.h 2.3.46pre1aa1/include/linux/mm.h
--- 2.3.46pre1/include/linux/mm.h	Tue Feb 15 17:19:32 2000
+++ 2.3.46pre1aa1/include/linux/mm.h	Wed Feb 16 00:28:25 2000
@@ -399,7 +399,6 @@
 		unsigned long * zones_size, unsigned long zone_start_paddr);
 extern void mem_init(void);
 extern void show_mem(void);
-extern void oom(struct task_struct * tsk);
 extern void si_meminfo(struct sysinfo * val);
 extern void swapin_readahead(swp_entry_t);
 
diff -urN 2.3.46pre1/include/linux/nbd.h 2.3.46pre1aa1/include/linux/nbd.h
--- 2.3.46pre1/include/linux/nbd.h	Sat Feb 12 05:15:10 2000
+++ 2.3.46pre1aa1/include/linux/nbd.h	Wed Feb 16 00:28:25 2000
@@ -60,8 +60,7 @@
 	struct socket * sock;
 	struct file * file; 		/* If == NULL, device is not ready, yet	*/
 	int magic;			/* FIXME: not if debugging is off	*/
-	struct request *head;	/* Requests are added here...			*/
-	struct request *tail;
+	struct list_head queue_head;	/* Requests are added here...			*/
 	struct semaphore queue_lock;
 };
 #endif
diff -urN 2.3.46pre1/include/linux/rbtree.h 2.3.46pre1aa1/include/linux/rbtree.h
--- 2.3.46pre1/include/linux/rbtree.h	Thu Jan  1 01:00:00 1970
+++ 2.3.46pre1aa1/include/linux/rbtree.h	Wed Feb 16 00:28:25 2000
@@ -0,0 +1,128 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/include/linux/rbtree.h
+
+  To use rbtrees you'll have to implement your own insert and search cores.
+  This will avoid us to use callbacks and to drop drammatically performances.
+  I know it's not the cleaner way,  but in C (not in C++) to get
+  performances and genericity...
+
+  Some example of insert and search follows here. The search is a plain
+  normal search over an ordered tree. The insert instead must be implemented
+  int two steps: as first thing the code must insert the element in
+  order as a red leaf in the tree, then the support library function
+  rb_insert_color() must be called. Such function will do the
+  not trivial work to rebalance the rbtree if necessary.
+
+-----------------------------------------------------------------------
+static inline struct page * rb_search_page_cache(struct inode * inode,
+						 unsigned long offset)
+{
+	rb_node_t * n = inode->i_rb_page_cache.rb_node;
+	struct page * page;
+
+	while (n)
+	{
+		page = rb_entry(n, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			n = n->rb_left;
+		else if (offset > page->offset)
+			n = n->rb_right;
+		else
+			return page;
+	}
+	return NULL;
+}
+
+static inline struct page * __rb_insert_page_cache(struct inode * inode,
+						   unsigned long offset,
+						   rb_node_t * node)
+{
+	rb_node_t ** p = &inode->i_rb_page_cache.rb_node;
+	rb_node_t * parent = NULL;
+	struct page * page;
+
+	while (*p)
+	{
+		parent = *p;
+		page = rb_entry(parent, struct page, rb_page_cache);
+
+		if (offset < page->offset)
+			p = &(*p)->rb_left;
+		else if (offset > page->offset)
+			p = &(*p)->rb_right;
+		else
+			return page;
+	}
+
+	node->rb_parent = parent;
+	node->rb_color = RB_RED;
+	node->rb_left = node->rb_right = NULL;
+
+	*p = node;
+
+	return NULL;
+}
+
+static inline struct page * rb_insert_page_cache(struct inode * inode,
+						 unsigned long offset,
+						 rb_node_t * node)
+{
+	struct page * ret;
+	if ((ret = __rb_insert_page_cache(inode, offset, node)))
+		goto out;
+	rb_insert_color(node, &inode->i_rb_page_cache);
+ out:
+	return ret;
+}
+-----------------------------------------------------------------------
+*/
+
+#ifndef	_LINUX_RBTREE_H
+#define	_LINUX_RBTREE_H
+
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+
+typedef struct rb_node_s
+{
+	struct rb_node_s * rb_parent;
+	int rb_color;
+#define	RB_RED		0
+#define	RB_BLACK	1
+	struct rb_node_s * rb_right;
+	struct rb_node_s * rb_left;
+}
+rb_node_t;
+
+typedef struct rb_root_s
+{
+	struct rb_node_s * rb_node;
+}
+rb_root_t;
+
+#define RB_ROOT	(rb_root_t) { NULL, }
+#define	rb_entry(ptr, type, member)					\
+	((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+extern void rb_insert_color(rb_node_t *, rb_root_t *);
+extern void rb_erase(rb_node_t *, rb_root_t *);
+
+#endif	/* _LINUX_RBTREE_H */
diff -urN 2.3.46pre1/include/linux/sched.h 2.3.46pre1aa1/include/linux/sched.h
--- 2.3.46pre1/include/linux/sched.h	Tue Feb 15 17:19:32 2000
+++ 2.3.46pre1aa1/include/linux/sched.h	Wed Feb 16 00:28:25 2000
@@ -356,6 +356,9 @@
    	u32 self_exec_id;
 /* Protection of fields allocatio/deallocation */
 	struct semaphore exit_sem;
+
+/* oom handling, left at the end since it's not critical info */
+	int oom_kill_try;
 };
 
 /*
@@ -426,6 +429,7 @@
 /* signals */	SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \
 /* exec cts */	0,0, \
 /* exit_sem */	__MUTEX_INITIALIZER(name.exit_sem),	\
+/* oom */	0, \
 }
 
 #ifndef INIT_TASK_SIZE
diff -urN 2.3.46pre1/include/linux/sysctl.h 2.3.46pre1aa1/include/linux/sysctl.h
--- 2.3.46pre1/include/linux/sysctl.h	Fri Feb 11 00:05:38 2000
+++ 2.3.46pre1aa1/include/linux/sysctl.h	Wed Feb 16 00:28:25 2000
@@ -501,7 +501,9 @@
 	FS_NRSUPER=9,	/* int:current number of allocated super_blocks */
 	FS_MAXSUPER=10,	/* int:maximum number of super_blocks that can be allocated */
 	FS_OVERFLOWUID=11,	/* int: overflow UID */
-	FS_OVERFLOWGID=12	/* int: overflow GID */
+	FS_OVERFLOWGID=12,	/* int: overflow GID */
+	FS_DCACHE_CTL=13, /* dentry cache controls */
+	FS_ICACHE_CTL=14, /* inode cache controls */
 };
 
 /* CTL_DEBUG names: */
diff -urN 2.3.46pre1/include/linux/timer.h 2.3.46pre1aa1/include/linux/timer.h
--- 2.3.46pre1/include/linux/timer.h	Tue Feb 15 03:15:06 2000
+++ 2.3.46pre1aa1/include/linux/timer.h	Wed Feb 16 00:28:25 2000
@@ -105,10 +105,10 @@
  * good compiler would generate better code (and a really good compiler
  * wouldn't care). Gcc is currently neither.
  */
-#define time_after(a,b)		((long)(b) - (long)(a) < 0)
+#define time_after(a,b)		((signed)(b) - (signed)(a) < 0)
 #define time_before(a,b)	time_after(b,a)
 
-#define time_after_eq(a,b)	((long)(a) - (long)(b) >= 0)
+#define time_after_eq(a,b)	((signed)(a) - (signed)(b) >= 0)
 #define time_before_eq(a,b)	time_after_eq(b,a)
 
 #endif
diff -urN 2.3.46pre1/kernel/ksyms.c 2.3.46pre1aa1/kernel/ksyms.c
--- 2.3.46pre1/kernel/ksyms.c	Tue Feb 15 03:06:49 2000
+++ 2.3.46pre1aa1/kernel/ksyms.c	Wed Feb 16 00:28:24 2000
@@ -71,6 +71,15 @@
 };
 #endif
 
+#ifdef CONFIG_BLK_DEV_LVM_MODULE
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 43)
+   extern int (*lvm_map_ptr) ( int, kdev_t *, unsigned long *,
+                               unsigned long, int);
+   EXPORT_SYMBOL(lvm_map_ptr);
+#endif
+   extern void (*lvm_hd_name_ptr) ( char*, int);
+   EXPORT_SYMBOL(lvm_hd_name_ptr);
+#endif
 
 #ifdef CONFIG_KMOD
 EXPORT_SYMBOL(request_module);
@@ -159,6 +168,8 @@
 EXPORT_SYMBOL(free_kiovec);
 EXPORT_SYMBOL(brw_kiovec);
 EXPORT_SYMBOL(alloc_kiovec);
+EXPORT_SYMBOL(expand_kiobuf);
+EXPORT_SYMBOL(unmap_kiobuf);
 EXPORT_SYMBOL(get_empty_filp);
 EXPORT_SYMBOL(init_private_file);
 EXPORT_SYMBOL(filp_open);
diff -urN 2.3.46pre1/kernel/ptrace.c 2.3.46pre1aa1/kernel/ptrace.c
--- 2.3.46pre1/kernel/ptrace.c	Tue Feb 15 03:06:49 2000
+++ 2.3.46pre1aa1/kernel/ptrace.c	Wed Feb 16 00:28:25 2000
@@ -26,6 +26,7 @@
 	unsigned long mapnr;
 	unsigned long maddr; 
 	struct page *page;
+	int fault;
 
 repeat:
 	pgdir = pgd_offset(vma->vm_mm, addr);
@@ -65,8 +66,12 @@
 
 fault_in_page:
 	/* -1: out of memory. 0 - unmapped page */
-	if (handle_mm_fault(tsk, vma, addr, write) > 0)
+	fault = handle_mm_fault(tsk, vma, addr, write);
+	if (fault > 0)
 		goto repeat;
+	if (fault < 0)
+		/* the out of memory is been triggered by the current task. */
+		force_sig(SIGKILL, current);
 	return 0;
 
 bad_pgd:
diff -urN 2.3.46pre1/kernel/sched.c 2.3.46pre1aa1/kernel/sched.c
--- 2.3.46pre1/kernel/sched.c	Fri Feb 11 00:05:39 2000
+++ 2.3.46pre1aa1/kernel/sched.c	Wed Feb 16 00:28:25 2000
@@ -141,7 +141,7 @@
 #endif
 
 	/* .. and a slight advantage to the current MM */
-	if (p->mm == this_mm)
+	if (p->mm == this_mm || !p->mm)
 		weight += 1;
 	weight += p->priority;
 
@@ -173,7 +173,7 @@
  */
 static inline int preemption_goodness(struct task_struct * prev, struct task_struct * p, int cpu)
 {
-	return goodness(p, cpu, prev->mm) - goodness(prev, cpu, prev->mm);
+	return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm);
 }
 
 /*
diff -urN 2.3.46pre1/kernel/sysctl.c 2.3.46pre1aa1/kernel/sysctl.c
--- 2.3.46pre1/kernel/sysctl.c	Fri Feb 11 00:05:39 2000
+++ 2.3.46pre1aa1/kernel/sysctl.c	Wed Feb 16 00:28:25 2000
@@ -47,6 +47,9 @@
 static int maxolduid = 65535;
 static int minolduid = 0;
 
+extern int dcache_ctl[], dcache_ctl_min[], dcache_ctl_max[];
+extern int icache_ctl[], icache_ctl_min[], icache_ctl_max[];
+
 #ifdef CONFIG_KMOD
 extern char modprobe_path[];
 #endif
@@ -294,6 +297,12 @@
 	{FS_OVERFLOWGID, "overflowgid", &fs_overflowgid, sizeof(int), 0644, NULL,
 	 &proc_dointvec_minmax, &sysctl_intvec, NULL,
 	 &minolduid, &maxolduid},
+	{FS_DCACHE_CTL, "dcache_ctl", &dcache_ctl, 1*sizeof(int), 0644, NULL,
+	 &proc_dointvec_minmax, &sysctl_intvec, NULL,
+	 &dcache_ctl_min, &dcache_ctl_max},
+	{FS_ICACHE_CTL, "icache_ctl", &icache_ctl, 1*sizeof(int), 0644, NULL,
+	 &proc_dointvec_minmax, &sysctl_intvec, NULL,
+	 &icache_ctl_min, &icache_ctl_max},
 	{0}
 };
 
diff -urN 2.3.46pre1/kernel/timer.c 2.3.46pre1aa1/kernel/timer.c
--- 2.3.46pre1/kernel/timer.c	Fri Feb 11 00:05:39 2000
+++ 2.3.46pre1aa1/kernel/timer.c	Wed Feb 16 00:28:25 2000
@@ -101,6 +101,8 @@
 	(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
 };
 
+static struct timer_list ** run_timer_list_running;
+
 #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
 
 static unsigned long timer_jiffies = 0;
@@ -125,7 +127,13 @@
 	unsigned long idx = expires - timer_jiffies;
 	struct timer_list ** vec;
 
-	if (idx < TVR_SIZE) {
+	if (run_timer_list_running) {
+		if ((signed) idx < 0)
+			printk(KERN_WARNING __FUNCTION__
+			       ": potential recursion idx %ld from %p\n",
+			       (signed) idx, __builtin_return_address(0));
+		vec = run_timer_list_running;
+	} else if (idx < TVR_SIZE) {
 		int i = expires & TVR_MASK;
 		vec = tv1.vec + i;
 	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
@@ -266,13 +274,14 @@
 {
 	spin_lock_irq(&timerlist_lock);
 	while ((long)(jiffies - timer_jiffies) >= 0) {
-		struct timer_list *timer;
+		struct timer_list *timer, * queued = NULL;
 		if (!tv1.index) {
 			int n = 1;
 			do {
 				cascade_timers(tvecs[n]);
 			} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
 		}
+		run_timer_list_running = &queued;
 		while ((timer = tv1.vec[tv1.index])) {
 			void (*fn)(unsigned long) = timer->function;
 			unsigned long data = timer->data;
@@ -283,8 +292,14 @@
 			fn(data);
 			spin_lock_irq(&timerlist_lock);
 		}
+		run_timer_list_running = NULL;
 		++timer_jiffies; 
 		tv1.index = (tv1.index + 1) & TVR_MASK;
+		while (queued) {
+			timer = queued;
+			queued = queued->next;
+			internal_add_timer(timer);
+		}			
 	}
 	spin_unlock_irq(&timerlist_lock);
 }
diff -urN 2.3.46pre1/lib/Makefile 2.3.46pre1aa1/lib/Makefile
--- 2.3.46pre1/lib/Makefile	Mon Jan 18 02:27:00 1999
+++ 2.3.46pre1aa1/lib/Makefile	Wed Feb 16 00:28:25 2000
@@ -7,6 +7,6 @@
 #
 
 L_TARGET := lib.a
-L_OBJS   := errno.o ctype.o string.o vsprintf.o
+L_OBJS   := errno.o ctype.o string.o vsprintf.o rbtree.o
 
 include $(TOPDIR)/Rules.make
diff -urN 2.3.46pre1/lib/rbtree.c 2.3.46pre1aa1/lib/rbtree.c
--- 2.3.46pre1/lib/rbtree.c	Thu Jan  1 01:00:00 1970
+++ 2.3.46pre1aa1/lib/rbtree.c	Wed Feb 16 00:28:25 2000
@@ -0,0 +1,293 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/lib/rbtree.c
+*/
+
+#include <linux/rbtree.h>
+
+static void __rb_rotate_left(rb_node_t * node, rb_root_t * root)
+{
+	rb_node_t * right = node->rb_right;
+
+	if ((node->rb_right = right->rb_left))
+		right->rb_left->rb_parent = node;
+	right->rb_left = node;
+
+	if ((right->rb_parent = node->rb_parent))
+	{
+		if (node == node->rb_parent->rb_left)
+			node->rb_parent->rb_left = right;
+		else
+			node->rb_parent->rb_right = right;
+	}
+	else
+		root->rb_node = right;
+	node->rb_parent = right;
+}
+
+static void __rb_rotate_right(rb_node_t * node, rb_root_t * root)
+{
+	rb_node_t * left = node->rb_left;
+
+	if ((node->rb_left = left->rb_right))
+		left->rb_right->rb_parent = node;
+	left->rb_right = node;
+
+	if ((left->rb_parent = node->rb_parent))
+	{
+		if (node == node->rb_parent->rb_right)
+			node->rb_parent->rb_right = left;
+		else
+			node->rb_parent->rb_left = left;
+	}
+	else
+		root->rb_node = left;
+	node->rb_parent = left;
+}
+
+void rb_insert_color(rb_node_t * node, rb_root_t * root)
+{
+	rb_node_t * parent, * gparent;
+
+	while ((parent = node->rb_parent) && parent->rb_color == RB_RED)
+	{
+		gparent = parent->rb_parent;
+
+		if (parent == gparent->rb_left)
+		{
+			{
+				register rb_node_t * uncle = gparent->rb_right;
+				if (uncle && uncle->rb_color == RB_RED)
+				{
+					uncle->rb_color = RB_BLACK;
+					parent->rb_color = RB_BLACK;
+					gparent->rb_color = RB_RED;
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_right == node)
+			{
+				register rb_node_t * tmp;
+				__rb_rotate_left(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			parent->rb_color = RB_BLACK;
+			gparent->rb_color = RB_RED;
+			__rb_rotate_right(gparent, root);
+		} else {
+			{
+				register rb_node_t * uncle = gparent->rb_left;
+				if (uncle && uncle->rb_color == RB_RED)
+				{
+					uncle->rb_color = RB_BLACK;
+					parent->rb_color = RB_BLACK;
+					gparent->rb_color = RB_RED;
+					node = gparent;
+					continue;
+				}
+			}
+
+			if (parent->rb_left == node)
+			{
+				register rb_node_t * tmp;
+				__rb_rotate_right(parent, root);
+				tmp = parent;
+				parent = node;
+				node = tmp;
+			}
+
+			parent->rb_color = RB_BLACK;
+			gparent->rb_color = RB_RED;
+			__rb_rotate_left(gparent, root);
+		}
+	}
+
+	root->rb_node->rb_color = RB_BLACK;
+}
+
+static void __rb_erase_color(rb_node_t * node, rb_node_t * parent,
+			     rb_root_t * root)
+{
+	rb_node_t * other;
+
+	while ((!node || node->rb_color == RB_BLACK) && node != root->rb_node)
+	{
+		if (parent->rb_left == node)
+		{
+			other = parent->rb_right;
+			if (other->rb_color == RB_RED)
+			{
+				other->rb_color = RB_BLACK;
+				parent->rb_color = RB_RED;
+				__rb_rotate_left(parent, root);
+				other = parent->rb_right;
+			}
+			if ((!other->rb_left ||
+			     other->rb_left->rb_color == RB_BLACK)
+			    && (!other->rb_right ||
+				other->rb_right->rb_color == RB_BLACK))
+			{
+				other->rb_color = RB_RED;
+				node = parent;
+				parent = node->rb_parent;
+			}
+			else
+			{
+				if (!other->rb_right ||
+				    other->rb_right->rb_color == RB_BLACK)
+				{
+					register rb_node_t * o_left;
+					if ((o_left = other->rb_left))
+						o_left->rb_color = RB_BLACK;
+					other->rb_color = RB_RED;
+					__rb_rotate_right(other, root);
+					other = parent->rb_right;
+				}
+				other->rb_color = parent->rb_color;
+				parent->rb_color = RB_BLACK;
+				if (other->rb_right)
+					other->rb_right->rb_color = RB_BLACK;
+				__rb_rotate_left(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+		else
+		{
+			other = parent->rb_left;
+			if (other->rb_color == RB_RED)
+			{
+				other->rb_color = RB_BLACK;
+				parent->rb_color = RB_RED;
+				__rb_rotate_right(parent, root);
+				other = parent->rb_left;
+			}
+			if ((!other->rb_left ||
+			     other->rb_left->rb_color == RB_BLACK)
+			    && (!other->rb_right ||
+				other->rb_right->rb_color == RB_BLACK))
+			{
+				other->rb_color = RB_RED;
+				node = parent;
+				parent = node->rb_parent;
+			}
+			else
+			{
+				if (!other->rb_left ||
+				    other->rb_left->rb_color == RB_BLACK)
+				{
+					register rb_node_t * o_right;
+					if ((o_right = other->rb_right))
+						o_right->rb_color = RB_BLACK;
+					other->rb_color = RB_RED;
+					__rb_rotate_left(other, root);
+					other = parent->rb_left;
+				}
+				other->rb_color = parent->rb_color;
+				parent->rb_color = RB_BLACK;
+				if (other->rb_left)
+					other->rb_left->rb_color = RB_BLACK;
+				__rb_rotate_right(parent, root);
+				node = root->rb_node;
+				break;
+			}
+		}
+	}
+	if (node)
+		node->rb_color = RB_BLACK;
+}
+
+void rb_erase(rb_node_t * node, rb_root_t * root)
+{
+	rb_node_t * child, * parent;
+	int color;
+
+	if (!node->rb_left)
+		child = node->rb_right;
+	else if (!node->rb_right)
+		child = node->rb_left;
+	else
+	{
+		rb_node_t * old = node, * left;
+
+		node = node->rb_right;
+		while ((left = node->rb_left))
+			node = left;
+		child = node->rb_right;
+		parent = node->rb_parent;
+		color = node->rb_color;
+
+		if (child)
+			child->rb_parent = parent;
+		if (parent)
+		{
+			if (parent->rb_left == node)
+				parent->rb_left = child;
+			else
+				parent->rb_right = child;
+		}
+		else
+			root->rb_node = child;
+
+		if (node->rb_parent == old)
+			parent = node;
+		node->rb_parent = old->rb_parent;
+		node->rb_color = old->rb_color;
+		node->rb_right = old->rb_right;
+		node->rb_left = old->rb_left;
+
+		if (old->rb_parent)
+		{
+			if (old->rb_parent->rb_left == old)
+				old->rb_parent->rb_left = node;
+			else
+				old->rb_parent->rb_right = node;
+		} else
+			root->rb_node = node;
+
+		old->rb_left->rb_parent = node;
+		if (old->rb_right)
+			old->rb_right->rb_parent = node;
+		goto color;
+	}
+
+	parent = node->rb_parent;
+	color = node->rb_color;
+
+	if (child)
+		child->rb_parent = parent;
+	if (parent)
+	{
+		if (parent->rb_left == node)
+			parent->rb_left = child;
+		else
+			parent->rb_right = child;
+	}
+	else
+		root->rb_node = child;
+
+ color:
+	if (color == RB_BLACK)
+		__rb_erase_color(child, parent, root);
+}
diff -urN 2.3.46pre1/mm/filemap.c 2.3.46pre1aa1/mm/filemap.c
--- 2.3.46pre1/mm/filemap.c	Tue Feb 15 03:06:49 2000
+++ 2.3.46pre1aa1/mm/filemap.c	Wed Feb 16 00:28:25 2000
@@ -586,10 +586,10 @@
 
 	add_wait_queue(&page->wait, &wait);
 	do {
-		run_task_queue(&tq_disk);
 		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 		if (!PageLocked(page))
 			break;
+		run_task_queue(&tq_disk);
 		schedule();
 	} while (PageLocked(page));
 	tsk->state = TASK_RUNNING;
@@ -631,13 +631,13 @@
 		struct task_struct *tsk = current;
 		DECLARE_WAITQUEUE(wait, tsk);
 
-		run_task_queue(&tq_disk);
-
 		__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&page->wait, &wait);
 
-		if (PageLocked(page))
+		if (PageLocked(page)) {
+			run_task_queue(&tq_disk);
 			schedule();
+		}
 		__set_task_state(tsk, TASK_RUNNING);
 		remove_wait_queue(&page->wait, &wait);
 
@@ -681,13 +681,13 @@
 		struct task_struct *tsk = current;
 		DECLARE_WAITQUEUE(wait, tsk);
 
-		run_task_queue(&tq_disk);
-
 		__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&page->wait, &wait);
 
-		if (PageLocked(page))
+		if (PageLocked(page)) {
+			run_task_queue(&tq_disk);
 			schedule();
+		}
 		__set_task_state(tsk, TASK_RUNNING);
 		remove_wait_queue(&page->wait, &wait);
 
diff -urN 2.3.46pre1/mm/memory.c 2.3.46pre1aa1/mm/memory.c
--- 2.3.46pre1/mm/memory.c	Tue Feb 15 03:06:49 2000
+++ 2.3.46pre1aa1/mm/memory.c	Wed Feb 16 00:28:25 2000
@@ -70,16 +70,6 @@
 mem_map_t * mem_map = NULL;
 
 /*
- * oom() prints a message (so that the user knows why the process died),
- * and gives the process an untrappable SIGKILL.
- */
-void oom(struct task_struct * task)
-{
-	printk("\nOut of memory for %s.\n", task->comm);
-	force_sig(SIGKILL, task);
-}
-
-/*
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
  */
diff -urN 2.3.46pre1/mm/vmscan.c 2.3.46pre1aa1/mm/vmscan.c
--- 2.3.46pre1/mm/vmscan.c	Sat Feb 12 21:03:24 2000
+++ 2.3.46pre1aa1/mm/vmscan.c	Wed Feb 16 00:28:25 2000
@@ -325,6 +325,7 @@
 	struct task_struct * p;
 	int counter;
 	int __ret = 0;
+	int assign = 0;
 
 	lock_kernel();
 	/* 
@@ -344,12 +345,9 @@
 	counter = nr_threads / (priority+1);
 	if (counter < 1)
 		counter = 1;
-	if (counter > nr_threads)
-		counter = nr_threads;
 
 	for (; counter >= 0; counter--) {
-		int assign = 0;
-		int max_cnt = 0;
+		unsigned long max_cnt = 0;
 		struct mm_struct *best = NULL;
 		int pid = 0;
 	select:
@@ -362,7 +360,7 @@
 	 		if (mm->rss <= 0)
 				continue;
 			/* Refresh swap_cnt? */
-			if (assign)
+			if (assign == 1)
 				mm->swap_cnt = mm->rss;
 			if (mm->swap_cnt > max_cnt) {
 				max_cnt = mm->swap_cnt;
@@ -371,6 +369,8 @@
 			}
 		}
 		read_unlock(&tasklist_lock);
+		if (assign == 1)
+			assign = 2;
 		if (!best) {
 			if (!assign) {
 				assign = 1;