diff -urN 2.2.14/CREDITS 2.2.14aa6/CREDITS
--- 2.2.14/CREDITS	Fri Jan  7 18:19:26 2000
+++ 2.2.14aa6/CREDITS	Wed Feb  2 02:31:43 2000
@@ -1338,6 +1338,13 @@
 D: XF86_8514
 D: cfdisk (curses based disk partitioning program)
 
+N: Heinz Mauelshagen
+E: mge@EZ-Darmstadt.Telekom.de
+D: Logical Volume Manager
+S: Bartningstr. 12
+S: 64289 Darmstadt
+S: Germany
+
 N: Mike McLagan
 E: mike.mclagan@linux.org
 W: http://www.invlogic.com/~mmclagan
diff -urN 2.2.14/Documentation/Configure.help 2.2.14aa6/Documentation/Configure.help
--- 2.2.14/Documentation/Configure.help	Wed Jan  5 14:16:51 2000
+++ 2.2.14aa6/Documentation/Configure.help	Wed Feb  2 02:31:43 2000
@@ -168,6 +168,11 @@
   on the Alpha. The only time you would ever not say Y is to say M in
   order to debug the code. Say Y unless you know what you are doing.
 
+Big memory support
+CONFIG_BIGMEM
+  This option is required if you want to utilize physical memory which
+  is not covered by the kernel virtual address space (> 1GB).
+
 Normal PC floppy disk support
 CONFIG_BLK_DEV_FD
   If you want to use the floppy disk drive(s) of your PC under Linux,
@@ -938,6 +943,30 @@
   called on26.o. You must also have a high-level driver for the type
   of device that you want to support.
 
+Logical Volume Manager (LVM) support
+CONFIG_BLK_DEV_LVM
+  This driver lets you combine several hard disks, hard disk partitions,
+  multiple devices or even loop devices (for evaluation purposes) into
+  a volume group. Imagine a volume group as a kind of virtual disk.
+  Logical volumes, which can be thought of as virtual partitions,
+  can be created in the volume group.  You can resize volume groups and
+  logical volumes after creation time, corresponding to new capacity needs.
+  Logical volumes are accessed as block devices named
+  /dev/VolumeGroupName/LogicalVolumeName.
+
+  For details see /usr/src/linux/Documentaion/LVM-HOWTO.
+
+  To get the newest software see <http://linux.msede.com/lvm>.
+
+Logical Volume Manager proc filesystem information
+CONFIG_LVM_PROC_FS
+  If you say Y here, you are able to access overall Logical Volume Manager,
+  Volume Group, Logical and Physical Volume information in /proc/lvm.
+  
+  To use this option, you have to check, that the "proc filesystem support"
+  (CONFIG_PROC_FS) is enabled too.
+
+
 Multiple devices driver support
 CONFIG_BLK_DEV_MD
   This driver lets you combine several hard disk partitions into one
@@ -9278,6 +9307,20 @@
   If you think you have a use for such a device (such as periodic data
   sampling), then say Y here, and read Documentation/rtc.txt for
   details.
+  For DEC Alpha users it is highly recommended to say Y here; if you
+  don't need all the features, you can choose the lightweight version
+  afterwards.
+
+Use only lightweight version (no interrupts)
+CONFIG_RTC_LIGHT
+  This option turns off extended features of the RTC driver that deal
+  with interrupts (periodic signals and alarm).  If you only need this
+  driver to read and set your system hardware clock, say Y here.
+  If you are on DEC Alpha, enabling this option will allow the kernel
+  to receive system clock interrupts in the standard, traditional
+  manner (that is, from the RTC device).  Fully featured RTC driver
+  would move the clock signal source to the PIT (Programmable
+  Interrupt Timer), like on a PC.
 
 Tadpole ANA H8 Support
 CONFIG_H8
diff -urN 2.2.14/Documentation/LVM-HOWTO 2.2.14aa6/Documentation/LVM-HOWTO
--- 2.2.14/Documentation/LVM-HOWTO	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/Documentation/LVM-HOWTO	Wed Feb  2 02:31:43 2000
@@ -0,0 +1,118 @@
+Heinz Mauelshagen's LVM (Logical Volume Manager) howto.             01/28/1999
+
+
+Abstract:
+---------
+The LVM adds a kind of virtual disks and virtual partitions functionality
+to the Linux operating system
+
+It achieves this by adding an additional layer between the physical peripherals
+and the i/o interface in the kernel.
+
+This allows the concatenation of several disk partitions or total disks
+(so-called physical volumes or PVs) or even multiple devices
+to form a storage pool (so-called Volume Group or VG) with
+allocation units called physical extents (called PE).
+You can think of the volume group as a virtual disk.
+Please see scenario below.
+
+Some or all PEs of this VG then can be allocated to so-called Logical Volumes
+or LVs in units called logical extents or LEs.
+Each LE is mapped to a corresponding PE.
+LEs and PEs are equal in size.
+Logical volumes are a kind of virtual partitions.
+
+
+The LVs can be used through device special files similar to the known
+/dev/sd[a-z]* or /dev/hd[a-z]* named /dev/VolumeGroupName/LogicalVolumeName.
+
+But going beyond this, you are able to extend or reduce
+VGs _AND_ LVs at runtime!
+
+So...
+If for example the capacity of a LV gets too small and your VG containing
+this LV is full, you could add another PV to that VG and simply extend
+the LV afterwards.
+If you reduce or delete a LV you can use the freed capacity for different
+LVs in the same VG.
+
+
+The above scenario looks like this:
+
+     /------------------------------------------\
+     |  /--PV2---\      VG 1      /--PVn---\    |
+     |  |-VGDA---|                |-VGDA-- |    |
+     |  |PE1PE2..|                |PE1PE2..|    |
+     |  |        |     ......     |        |    |
+     |  |        |                |        |    |
+     |  |    /-----------------------\     |    |
+     |  |    \-------LV 1------------/     |    |
+     |  |   ..PEn|                |   ..PEn|    |
+     |  \--------/                \--------/    |
+     \------------------------------------------/
+
+PV 1 could be /dev/sdc1 sized 3GB
+PV n could be /dev/sde1 sized 4GB
+VG 1 could be test_vg
+LV 1 could be /dev/test_vg/test_lv
+VGDA is the volume group descriptor area holding the LVM metadata
+PE1 up to PEn is the number of physical extents on each disk(partition)
+
+
+
+Installation steps see INSTALL and insmod(1)/modprobe(1), kmod/kerneld(8)
+to load the logical volume manager module if you did not bind it
+into the kernel.
+
+
+Configuration steps for getting the above scenario:
+
+1. Set the partition system id to 0xFE on /dev/sdc1 and /dev/sde1.
+
+2. do a "pvcreate /dev/sd[ce]1"
+   For testing purposes you can use more than one partition on a disk.
+   You should not use more than one partition because in the case of
+   a striped LV you'll have a performance breakdown.
+
+3. do a "vgcreate test_vg /dev/sd[ce]1" to create the new VG named "test_vg"
+   which has the total capacity of both partitions.
+   vgcreate activates (transfers the metadata into the LVM driver in the kernel)
+   the new volume group too to be able to create LVs in the next step.
+
+4. do a "lvcreate -L1500 -ntest_lv test_vg" to get a 1500MB linear LV named
+   "test_lv" and it's block device special "/dev/test_vg/test_lv".
+
+   Or do a "lvcreate -i2 -I4 -l1500 -nanother_test_lv test_vg" to get a 100 LE
+   large logical volume with 2 stripes and stripesize 4 KB.
+
+5. For example generate a filesystem in one LV with
+   "mke2fs /dev/test_vg/test_lv" and mount it.
+
+6. extend /dev/test_vg/test_lv to 1600MB with relative size by
+   "lvextend -L+100 /dev/test_vg/test_lv"
+   or with absolute size by
+   "lvextend -L1600 /dev/test_vg/test_lv"
+ 
+7. reduce /dev/test_vg/test_lv to 900 logical extents with relative extents by
+   "lvreduce -l-700 /dev/test_vg/test_lv"
+   or with absolute extents by
+   "lvreduce -l900 /dev/test_vg/test_lv"
+ 
+9. rename a VG by deactivating it with
+   "vgchange -an test_vg"   # only VGs with _no_ open LVs can be deactivated!
+   "vgrename test_vg whatever"
+   and reactivate it again by
+   "vgchange -ay whatever"
+
+9. rename a LV after closing it by
+   "lvchange -an /dev/whatever/test_lv" # only closed LVs can be deactivated
+   "lvrename  /dev/whatever/test_lv  /dev/whatever/whatvolume"
+   or by
+   "lvrename  whatever test_lv whatvolume"
+   and reactivate it again by
+   "lvchange -ay /dev/whatever/whatvolume"
+
+10. if you own Ted Tso's resize2fs program, you are able to resize the
+    ext2 type filesystems contained in logical volumes without destroyiing
+    the data by
+    "e2fsadm -L+100 /dev/test_vg/another_test_lv"
diff -urN 2.2.14/MAINTAINERS 2.2.14aa6/MAINTAINERS
--- 2.2.14/MAINTAINERS	Fri Jan  7 18:19:26 2000
+++ 2.2.14aa6/MAINTAINERS	Wed Feb  2 02:31:43 2000
@@ -507,6 +507,13 @@
 W:	http://people.redhat.com/zab/maestro/
 S:	Supported
 
+LOGICAL VOLUME MANAGER
+P:	Heinz Mauelshagen
+M:	linux-LVM@EZ-Darmstadt.Telekom.de
+L:	linux-LVM@msede.com
+W:	http://linux.msede.com/lvm
+S:	Maintained
+
 M68K
 P:	Jes Sorensen
 M:	Jes.Sorensen@cern.ch
diff -urN 2.2.14/Makefile 2.2.14aa6/Makefile
--- 2.2.14/Makefile	Wed Jan  5 14:16:51 2000
+++ 2.2.14aa6/Makefile	Wed Feb  2 02:31:42 2000
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 2
 SUBLEVEL = 14
-EXTRAVERSION = 
+EXTRAVERSION = aa6
 
 ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)
 
diff -urN 2.2.14/arch/alpha/config.in 2.2.14aa6/arch/alpha/config.in
--- 2.2.14/arch/alpha/config.in	Wed Jan  5 14:16:51 2000
+++ 2.2.14aa6/arch/alpha/config.in	Wed Feb  2 02:31:42 2000
@@ -21,6 +21,7 @@
 mainmenu_option next_comment
 comment 'General setup'
 
+bool 'BIGMEM support' CONFIG_BIGMEM
 choice 'Alpha system type' \
 	"Generic		CONFIG_ALPHA_GENERIC		\
 	 Alcor/Alpha-XLT	CONFIG_ALPHA_ALCOR		\
diff -urN 2.2.14/arch/alpha/defconfig 2.2.14aa6/arch/alpha/defconfig
--- 2.2.14/arch/alpha/defconfig	Wed Jan  5 14:16:51 2000
+++ 2.2.14aa6/arch/alpha/defconfig	Wed Feb  2 02:31:42 2000
@@ -255,7 +255,8 @@
 # CONFIG_QIC02_TAPE is not set
 # CONFIG_WATCHDOG is not set
 # CONFIG_NVRAM is not set
-# CONFIG_RTC is not set
+CONFIG_RTC=y
+CONFIG_RTC_LIGHT=y
 
 #
 # Video For Linux
diff -urN 2.2.14/arch/alpha/kernel/alpha_ksyms.c 2.2.14aa6/arch/alpha/kernel/alpha_ksyms.c
--- 2.2.14/arch/alpha/kernel/alpha_ksyms.c	Fri Jan  7 18:19:06 2000
+++ 2.2.14aa6/arch/alpha/kernel/alpha_ksyms.c	Wed Feb  2 02:31:42 2000
@@ -160,6 +160,7 @@
 EXPORT_SYMBOL(flush_tlb_mm);
 EXPORT_SYMBOL(flush_tlb_page);
 EXPORT_SYMBOL(flush_tlb_range);
+EXPORT_SYMBOL(smp_imb);
 EXPORT_SYMBOL(cpu_data);
 EXPORT_SYMBOL(cpu_number_map);
 EXPORT_SYMBOL(global_bh_lock);
diff -urN 2.2.14/arch/alpha/kernel/irq.h 2.2.14aa6/arch/alpha/kernel/irq.h
--- 2.2.14/arch/alpha/kernel/irq.h	Wed Jan 19 05:56:50 2000
+++ 2.2.14aa6/arch/alpha/kernel/irq.h	Wed Feb  2 02:35:20 2000
@@ -44,7 +44,7 @@
 }
 
 #define RTC_IRQ    8
-#ifdef CONFIG_RTC
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 #define TIMER_IRQ  0			 /* timer is the pit */
 #else
 #define TIMER_IRQ  RTC_IRQ		 /* timer is the rtc */
diff -urN 2.2.14/arch/alpha/kernel/process.c 2.2.14aa6/arch/alpha/kernel/process.c
--- 2.2.14/arch/alpha/kernel/process.c	Wed Jan  5 14:16:51 2000
+++ 2.2.14aa6/arch/alpha/kernel/process.c	Wed Feb  2 02:31:42 2000
@@ -30,7 +30,7 @@
 #include <linux/reboot.h>
 #include <linux/console.h>
 
-#ifdef CONFIG_RTC
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 #include <linux/mc146818rtc.h>
 #endif
 
@@ -150,7 +150,7 @@
 	}
 #endif /* __SMP__ */
 
-#ifdef CONFIG_RTC
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 	/* Reset rtc to defaults.  */
 	{
 		unsigned char control;
diff -urN 2.2.14/arch/alpha/kernel/setup.c 2.2.14aa6/arch/alpha/kernel/setup.c
--- 2.2.14/arch/alpha/kernel/setup.c	Wed Jan  5 14:16:51 2000
+++ 2.2.14aa6/arch/alpha/kernel/setup.c	Wed Feb  2 02:31:42 2000
@@ -25,8 +25,9 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/string.h>
+#include <linux/bigmem.h>
 
-#ifdef CONFIG_RTC
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 #include <linux/timex.h>
 #endif
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -277,8 +278,18 @@
 		if (initrd_end > *memory_end_p) {
 			printk("initrd extends beyond end of memory "
 			       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
-			       initrd_end, (unsigned long) memory_end_p);
+			       initrd_end, *memory_end_p);
 			initrd_start = initrd_end = 0;
+		} else {
+			/* move initrd from the middle of the RAM to the
+			   start of the RAM so we won't risk to rewrite
+			   initrd while allocating the memory at boot time */
+			memmove((char *) *memory_start_p,
+				(char *) initrd_start, INITRD_SIZE);
+			initrd_start = *memory_start_p;
+			initrd_end = initrd_start + INITRD_SIZE;
+			*memory_start_p = PAGE_ALIGN(initrd_end);
+			initrd_below_start_ok = 1;
 		}
 	}
 #endif
@@ -292,7 +303,7 @@
 	/* ??? There is some circumstantial evidence that this needs
 	   to be done now rather than later in time_init, which would
 	   be more natural.  Someone please explain or refute.  */
-#if defined(CONFIG_RTC)
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 	rtc_init_pit();
 #else
 	alpha_mv.init_pit();
@@ -352,16 +363,42 @@
 			high = tmp;
 	}
 
-	/* Round it up to an even number of pages. */
-	high = (high + PAGE_SIZE) & (PAGE_MASK*2);
+#ifndef CONFIG_BIGMEM
+#define MAX_MEMORY 0x80000000UL
+#else
+#define LOW_MEMORY 0x80000000UL
+#define MAX_MEMORY (VMALLOC_START-PAGE_OFFSET)
+#endif
 
 	/* Enforce maximum of 2GB even if there is more,
 	 * but only if the platform (support) cannot handle it.
 	 */
-	if (high > 0x80000000UL) {
-		printk("Cropping memory from %luMB to 2048MB\n", high >> 20);
-		high = 0x80000000UL;
+	if (high > MAX_MEMORY) {
+		printk("Cropping memory from %luMB to %luMB\n",
+		       high>>20, MAX_MEMORY>>20);
+		high = MAX_MEMORY;
+	}
+
+#ifdef CONFIG_BIGMEM
+	bigmem_start = bigmem_end = high;
+	if (high > LOW_MEMORY)
+	{
+		high = bigmem_start = LOW_MEMORY;
+		printk(KERN_NOTICE "%luMB BIGMEM available\n",
+		       (bigmem_end-bigmem_start)>>20);
 	}
+#ifdef BIGMEM_DEBUG
+	else
+	{
+		high -= high/4;
+		bigmem_start = high;
+		printk(KERN_NOTICE "emulating %luMB BIGMEM\n",
+		       (bigmem_end-bigmem_start)>>20);
+	}
+#endif
+	bigmem_start += PAGE_OFFSET;
+	bigmem_end += PAGE_OFFSET;
+#endif
 
 	return (unsigned long) __va(high);
 }
diff -urN 2.2.14/arch/alpha/kernel/smp.c 2.2.14aa6/arch/alpha/kernel/smp.c
--- 2.2.14/arch/alpha/kernel/smp.c	Wed Jan  5 14:16:51 2000
+++ 2.2.14aa6/arch/alpha/kernel/smp.c	Wed Feb  2 02:31:42 2000
@@ -839,6 +839,22 @@
 }
 
 static void
+ipi_imb(void)
+{
+	imb();
+}
+
+void
+smp_imb(void)
+{
+	/* Must wait other processors to flush their icache before continue. */
+	if (smp_call_function(ipi_imb, NULL, 1, 1))
+		printk(KERN_CRIT "smp_imb: timed out\n");
+
+	imb();
+}
+
+static void
 ipi_flush_tlb_all(void *ignored)
 {
 	tbia();
diff -urN 2.2.14/arch/alpha/kernel/sys_nautilus.c 2.2.14aa6/arch/alpha/kernel/sys_nautilus.c
--- 2.2.14/arch/alpha/kernel/sys_nautilus.c	Fri Jan  7 18:19:06 2000
+++ 2.2.14aa6/arch/alpha/kernel/sys_nautilus.c	Wed Feb  2 02:31:42 2000
@@ -83,7 +83,7 @@
 nautilus_kill_arch (int mode, char *restart_cmd)
 {
 
-#ifdef CONFIG_RTC
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 	/* Reset rtc to defaults.  */
 	{
 		unsigned char control;
diff -urN 2.2.14/arch/alpha/kernel/time.c 2.2.14aa6/arch/alpha/kernel/time.c
--- 2.2.14/arch/alpha/kernel/time.c	Sun Jan  2 18:26:32 2000
+++ 2.2.14aa6/arch/alpha/kernel/time.c	Wed Feb  2 02:31:42 2000
@@ -173,7 +173,7 @@
  * drivers depend on them being initialized (e.g., joystick driver).
  */
 
-#ifdef CONFIG_RTC
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 void
 rtc_init_pit (void)
 {
diff -urN 2.2.14/arch/alpha/mm/init.c 2.2.14aa6/arch/alpha/mm/init.c
--- 2.2.14/arch/alpha/mm/init.c	Wed Jan  5 14:16:51 2000
+++ 2.2.14aa6/arch/alpha/mm/init.c	Wed Feb  2 02:31:42 2000
@@ -18,6 +18,7 @@
 #ifdef CONFIG_BLK_DEV_INITRD
 #include <linux/blk.h>
 #endif
+#include <linux/bigmem.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -30,6 +31,9 @@
 extern void die_if_kernel(char *,struct pt_regs *,long);
 extern void show_net_buffers(void);
 
+#ifdef CONFIG_BIGMEM
+unsigned long bigmem_start, bigmem_end;
+#endif
 struct thread_struct original_pcb;
 
 #ifndef __SMP__
@@ -196,7 +200,11 @@
 	struct thread_struct *original_pcb_ptr;
 
 	/* initialize mem_map[] */
+#ifndef CONFIG_BIGMEM
 	start_mem = free_area_init(start_mem, end_mem);
+#else
+	start_mem = free_area_init(start_mem, bigmem_end);
+#endif
 
 	/* find free clusters, update mem_map[] accordingly */
 	memdesc = (struct memdesc_struct *)
@@ -305,8 +313,18 @@
 {
 	unsigned long tmp;
 
+#ifdef CONFIG_BIGMEM
+	bigmem_start = PAGE_ALIGN(bigmem_start);
+	bigmem_end &= PAGE_MASK;
+#endif
 	end_mem &= PAGE_MASK;
+#ifndef CONFIG_BIGMEM
 	max_mapnr = num_physpages = MAP_NR(end_mem);
+#else
+	max_mapnr = num_physpages = MAP_NR(bigmem_end);
+	/* cache the bigmem_mapnr */
+	bigmem_mapnr = MAP_NR(bigmem_start);
+#endif
 	high_memory = (void *) end_mem;
 	start_mem = PAGE_ALIGN(start_mem);
 
@@ -326,13 +344,23 @@
 			continue;
 		atomic_set(&mem_map[MAP_NR(tmp)].count, 1);
 #ifdef CONFIG_BLK_DEV_INITRD
-		if (initrd_start && tmp >= initrd_start && tmp < initrd_end)
+		if (initrd_start && tmp >= (initrd_start & PAGE_MASK) && tmp < initrd_end)
 			continue;
 #endif
 		kill_page(tmp);
 		free_page(tmp);
 	}
-	tmp = nr_free_pages << (PAGE_SHIFT - 10);
+#ifdef CONFIG_BIGMEM
+	for (tmp = bigmem_start; tmp < bigmem_end;  tmp += PAGE_SIZE)
+	{
+		clear_bit(PG_reserved, &mem_map[MAP_NR(tmp)].flags);
+		set_bit(PG_BIGMEM, &mem_map[MAP_NR(tmp)].flags);
+		atomic_set(&mem_map[MAP_NR(tmp)].count, 1);
+		kill_page(tmp);
+		free_page(tmp);
+	}
+#endif
+	tmp = (unsigned long) nr_free_pages << (PAGE_SHIFT - 10);
 	printk("Memory: %luk available\n", tmp);
 	return;
 }
@@ -364,15 +392,20 @@
 	val->sharedram = 0;
 	val->freeram = ((unsigned long)nr_free_pages) << PAGE_SHIFT;
 	val->bufferram = buffermem;
+	val->totalbig = 0;
+	val->freebig = (unsigned long) nr_free_bigpages << PAGE_SHIFT;
 	while (i-- > 0)  {
 		if (PageReserved(mem_map+i))
 			continue;
 		val->totalram++;
+		if (PageBIGMEM(mem_map+i))
+			val->totalbig++;
 		if (!atomic_read(&mem_map[i].count))
 			continue;
 		val->sharedram += atomic_read(&mem_map[i].count) - 1;
 	}
 	val->totalram <<= PAGE_SHIFT;
 	val->sharedram <<= PAGE_SHIFT;
+	val->totalbig <<= PAGE_SHIFT;
 	return;
 }
diff -urN 2.2.14/arch/i386/config.in 2.2.14aa6/arch/i386/config.in
--- 2.2.14/arch/i386/config.in	Wed Jan  5 14:16:51 2000
+++ 2.2.14aa6/arch/i386/config.in	Wed Feb  2 02:31:42 2000
@@ -54,6 +54,7 @@
 mainmenu_option next_comment
 comment 'General setup'
 
+bool 'BIGMEM support' CONFIG_BIGMEM
 bool 'Networking support' CONFIG_NET
 bool 'PCI support' CONFIG_PCI
 if [ "$CONFIG_PCI" = "y" ]; then
diff -urN 2.2.14/arch/i386/kernel/irq.c 2.2.14aa6/arch/i386/kernel/irq.c
--- 2.2.14/arch/i386/kernel/irq.c	Sat Oct 23 15:31:08 1999
+++ 2.2.14aa6/arch/i386/kernel/irq.c	Wed Feb  2 02:31:42 2000
@@ -971,8 +971,24 @@
 	unsigned int i;
 	unsigned long delay;
 
+	/* 
+	 * something may have generated an irq long ago and we want to
+	 * flush such a longstanding irq before considering it as spurious. 
+	 */
+	spin_lock_irq(&irq_controller_lock);
+	for (i = NR_IRQS-1; i > 0; i--) 
+		if (!irq_desc[i].action) 
+			irq_desc[i].handler->startup(i);
+	spin_unlock_irq(&irq_controller_lock);
+
+	/* Wait for longstanding interrupts to trigger. */
+	for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+		/* about 20ms delay */ synchronize_irq();
+
 	/*
-	 * first, enable any unassigned irqs
+	 * enable any unassigned irqs
+	 * (we must startup again here because if a longstanding irq
+	 * happened in the previous stage, it may have masked itself)
 	 */
 	spin_lock_irq(&irq_controller_lock);
 	for (i = NR_IRQS-1; i > 0; i--) {
diff -urN 2.2.14/arch/i386/kernel/ptrace.c 2.2.14aa6/arch/i386/kernel/ptrace.c
--- 2.2.14/arch/i386/kernel/ptrace.c	Wed Jan  5 14:16:51 2000
+++ 2.2.14aa6/arch/i386/kernel/ptrace.c	Wed Feb  2 02:31:42 2000
@@ -11,6 +11,7 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/bigmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -80,6 +81,7 @@
 	pmd_t * pgmiddle;
 	pte_t * pgtable;
 	unsigned long page;
+	unsigned long retval;
 	int fault;
 
 repeat:
@@ -125,7 +127,10 @@
 	if (MAP_NR(page) >= max_mapnr)
 		return 0;
 	page += addr & ~PAGE_MASK;
-	return *(unsigned long *) page;
+	page = kmap(page, KM_READ);
+	retval = *(unsigned long *) page;
+	kunmap(page, KM_READ);
+	return retval;
 }
 
 /*
@@ -195,7 +200,13 @@
 	}
 /* this is a hack for non-kernel-mapped video buffers and similar */
 	if (MAP_NR(page) < max_mapnr)
-		*(unsigned long *) (page + (addr & ~PAGE_MASK)) = data;
+	{
+		unsigned long vaddr;
+
+		vaddr = kmap(page, KM_WRITE);
+		*(unsigned long *) (vaddr + (addr & ~PAGE_MASK)) = data;
+		kunmap(vaddr, KM_WRITE);
+	}
 /* we're bypassing pagetables, so we have to set the dirty bit ourselves */
 /* this should also re-instate whatever read-only mode there was before */
 	set_pte(pgtable, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
diff -urN 2.2.14/arch/i386/kernel/setup.c 2.2.14aa6/arch/i386/kernel/setup.c
--- 2.2.14/arch/i386/kernel/setup.c	Wed Jan  5 14:16:51 2000
+++ 2.2.14aa6/arch/i386/kernel/setup.c	Wed Feb  2 02:31:42 2000
@@ -23,6 +23,8 @@
  *
  *	Improved Intel cache detection.
  *	Dave Jones <dave@powertweak.com>, October 1999
+ *
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
 /*
@@ -50,6 +52,7 @@
 #ifdef CONFIG_BLK_DEV_RAM
 #include <linux/blk.h>
 #endif
+#include <linux/bigmem.h>
 #include <asm/processor.h>
 #include <linux/console.h>
 #include <asm/uaccess.h>
@@ -383,12 +386,31 @@
 #define VMALLOC_RESERVE	(64 << 20)	/* 64MB for vmalloc */
 #define MAXMEM	((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
 
+#ifdef CONFIG_BIGMEM
+	bigmem_start = bigmem_end = memory_end;
+#endif
 	if (memory_end > MAXMEM)
 	{
+#ifdef CONFIG_BIGMEM
+#define MAXBIGMEM ((unsigned long)(~(VMALLOC_RESERVE-1)))
+		bigmem_start = MAXMEM;
+		bigmem_end = (memory_end < MAXBIGMEM) ? memory_end : MAXBIGMEM;
+#endif
 		memory_end = MAXMEM;
+#ifdef CONFIG_BIGMEM
+		printk(KERN_NOTICE "%ldMB BIGMEM available.\n",
+			(bigmem_end-bigmem_start)>>20);
+#else
 		printk(KERN_WARNING "Warning only %ldMB will be used.\n",
 			MAXMEM>>20);
+#endif
 	}
+#if defined(CONFIG_BIGMEM) && defined(BIGMEM_DEBUG)
+	else {
+		memory_end -= memory_end/4;
+		bigmem_start = memory_end;
+	}
+#endif
 
 	memory_end += PAGE_OFFSET;
 	*memory_start_p = memory_start;
diff -urN 2.2.14/arch/i386/mm/Makefile 2.2.14aa6/arch/i386/mm/Makefile
--- 2.2.14/arch/i386/mm/Makefile	Mon Jan 18 02:28:56 1999
+++ 2.2.14aa6/arch/i386/mm/Makefile	Wed Feb  2 02:31:42 2000
@@ -10,4 +10,8 @@
 O_TARGET := mm.o
 O_OBJS	 := init.o fault.o ioremap.o extable.o
 
+ifeq ($(CONFIG_BIGMEM),y)
+O_OBJS += bigmem.o
+endif
+
 include $(TOPDIR)/Rules.make
diff -urN 2.2.14/arch/i386/mm/bigmem.c 2.2.14aa6/arch/i386/mm/bigmem.c
--- 2.2.14/arch/i386/mm/bigmem.c	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/arch/i386/mm/bigmem.c	Wed Feb  2 02:31:42 2000
@@ -0,0 +1,35 @@
+/*
+ * BIGMEM IA32 code and variables.
+ *
+ * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
+ *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
+ */
+
+#include <linux/mm.h>
+#include <linux/bigmem.h>
+
+unsigned long bigmem_start, bigmem_end;
+
+/* NOTE: fixmap_init alloc all the fixmap pagetables contigous on the
+   physical space so we can cache the place of the first one and move
+   around without checking the pgd every time. */
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+#define kmap_get_fixmap_pte(vaddr)					\
+	pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
+
+void __init kmap_init(void)
+{
+	unsigned long kmap_vstart;
+
+	/* cache the first kmap pte */
+	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+
+	kmap_prot = PAGE_KERNEL;
+#if 0
+	if (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
+		pgprot_val(kmap_prot) |= _PAGE_GLOBAL;
+#endif
+}
diff -urN 2.2.14/arch/i386/mm/fault.c 2.2.14aa6/arch/i386/mm/fault.c
--- 2.2.14/arch/i386/mm/fault.c	Mon Jan 17 16:44:33 2000
+++ 2.2.14aa6/arch/i386/mm/fault.c	Wed Feb  2 02:31:42 2000
@@ -291,7 +291,8 @@
 	up(&mm->mmap_sem);
 	if (error_code & 4)
 	{
-		if (!((regs->eflags >> 12) & 3))
+		if (tsk->oom_kill_try++ > 10 ||
+		    !((regs->eflags >> 12) & 3))
 		{
 			printk("VM: killing process %s\n", tsk->comm);
 			do_exit(SIGKILL);
@@ -304,6 +305,11 @@
 			 */
 			printk("VM: terminating process %s\n", tsk->comm);
 			force_sig(SIGTERM, current);
+			if (tsk->oom_kill_try > 1)
+			{
+				tsk->policy |= SCHED_YIELD;
+				schedule();
+			}
 			return;
 		}
 	}
diff -urN 2.2.14/arch/i386/mm/init.c 2.2.14aa6/arch/i386/mm/init.c
--- 2.2.14/arch/i386/mm/init.c	Sat Oct 23 15:31:08 1999
+++ 2.2.14aa6/arch/i386/mm/init.c	Wed Feb  2 02:31:42 2000
@@ -2,6 +2,8 @@
  *  linux/arch/i386/mm/init.c
  *
  *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
 #include <linux/config.h>
@@ -20,6 +22,7 @@
 #ifdef CONFIG_BLK_DEV_INITRD
 #include <linux/blk.h>
 #endif
+#include <linux/bigmem.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -148,6 +151,7 @@
 {
 	int i,free = 0,total = 0,reserved = 0;
 	int shared = 0, cached = 0;
+	int bigmem = 0;
 
 	printk("Mem-info:\n");
 	show_free_areas();
@@ -155,6 +159,8 @@
 	i = max_mapnr;
 	while (i-- > 0) {
 		total++;
+		if (PageBIGMEM(mem_map+i))
+			bigmem++;
 		if (PageReserved(mem_map+i))
 			reserved++;
 		else if (PageSwapCache(mem_map+i))
@@ -165,6 +171,7 @@
 			shared += atomic_read(&mem_map[i].count) - 1;
 	}
 	printk("%d pages of RAM\n",total);
+	printk("%d pages of BIGMEM\n",bigmem);
 	printk("%d reserved pages\n",reserved);
 	printk("%d pages shared\n",shared);
 	printk("%d pages swap cached\n",cached);
@@ -344,7 +351,12 @@
 #endif
 	local_flush_tlb();
 
+#ifndef CONFIG_BIGMEM
 	return free_area_init(start_mem, end_mem);
+#else
+	kmap_init(); /* run after fixmap_init */
+	return free_area_init(start_mem, bigmem_end + PAGE_OFFSET);
+#endif
 }
 
 /*
@@ -393,11 +405,22 @@
 	int reservedpages = 0;
 	int datapages = 0;
 	int initpages = 0;
+	int bigpages = 0;
 	unsigned long tmp;
 
 	end_mem &= PAGE_MASK;
+#ifdef CONFIG_BIGMEM
+	bigmem_start = PAGE_ALIGN(bigmem_start);
+	bigmem_end &= PAGE_MASK;
+#endif
 	high_memory = (void *) end_mem;
+#ifndef CONFIG_BIGMEM
 	max_mapnr = num_physpages = MAP_NR(end_mem);
+#else
+	max_mapnr = num_physpages = PHYSMAP_NR(bigmem_end);
+	/* cache the bigmem_mapnr */
+	bigmem_mapnr = PHYSMAP_NR(bigmem_start);
+#endif
 
 	/* clear the zero-page */
 	memset(empty_zero_page, 0, PAGE_SIZE);
@@ -452,13 +475,34 @@
 #endif
 			free_page(tmp);
 	}
-	printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
+#ifdef CONFIG_BIGMEM
+	for (tmp = bigmem_start; tmp < bigmem_end;  tmp += PAGE_SIZE) {
+		/*
+		  RMQUEUE_ORDER in page_alloc.c returns PAGE_OFFSET + tmp
+		  which cannot be allowed to be 0 since the callers of
+		  __get_free_pages treat 0 as an allocation failure.  To
+		  avoid this possibility, do not allow allocation of the
+		  BIGMEM page which would map to 0.
+
+		  Leonard N. Zubkoff, 30 October 1999
+		*/
+		if (tmp + PAGE_OFFSET != 0) {
+			clear_bit(PG_reserved, &mem_map[PHYSMAP_NR(tmp)].flags);
+			set_bit(PG_BIGMEM, &mem_map[PHYSMAP_NR(tmp)].flags);
+			atomic_set(&mem_map[PHYSMAP_NR(tmp)].count, 1);
+			free_page(tmp + PAGE_OFFSET);
+			bigpages++;
+		}
+	}
+#endif
+	printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %dk bigmem)\n",
 		(unsigned long) nr_free_pages << (PAGE_SHIFT-10),
 		max_mapnr << (PAGE_SHIFT-10),
 		codepages << (PAGE_SHIFT-10),
 		reservedpages << (PAGE_SHIFT-10),
 		datapages << (PAGE_SHIFT-10),
-		initpages << (PAGE_SHIFT-10));
+		initpages << (PAGE_SHIFT-10),
+		bigpages << (PAGE_SHIFT-10));
 
 	if (boot_cpu_data.wp_works_ok < 0)
 		test_wp_bit();
@@ -486,15 +530,20 @@
 	val->sharedram = 0;
 	val->freeram = nr_free_pages << PAGE_SHIFT;
 	val->bufferram = buffermem;
+	val->totalbig = 0;
+	val->freebig = nr_free_bigpages << PAGE_SHIFT;
 	while (i-- > 0)  {
 		if (PageReserved(mem_map+i))
 			continue;
 		val->totalram++;
+		if (PageBIGMEM(mem_map+i))
+			val->totalbig++;
 		if (!atomic_read(&mem_map[i].count))
 			continue;
 		val->sharedram += atomic_read(&mem_map[i].count) - 1;
 	}
 	val->totalram <<= PAGE_SHIFT;
 	val->sharedram <<= PAGE_SHIFT;
+	val->totalbig <<= PAGE_SHIFT;
 	return;
 }
diff -urN 2.2.14/arch/ppc/mm/init.c 2.2.14aa6/arch/ppc/mm/init.c
--- 2.2.14/arch/ppc/mm/init.c	Mon Jan 17 16:44:35 2000
+++ 2.2.14aa6/arch/ppc/mm/init.c	Wed Feb  2 02:31:42 2000
@@ -1315,6 +1315,26 @@
 }
 #endif /* CONFIG_MBX */
 #ifndef CONFIG_8xx
+static void __init apply_ram_limit(struct mem_pieces * mp)
+{
+	int i;
+
+	for (i = 0; i < mp->n_regions; i++)
+	{
+		if (mp->regions[i].address >= __max_memory)
+		{
+			mp->n_regions = i;
+			break;
+		}
+		if (mp->regions[i].address+mp->regions[i].size > __max_memory)
+		{
+			mp->regions[i].size = __max_memory - mp->regions[i].address;
+			mp->n_regions = i+1;
+			break;
+		}
+	}			
+}
+
 /*
  * On systems with Open Firmware, collect information about
  * physical RAM and which pieces are already in use.
@@ -1380,6 +1400,7 @@
 	if (boot_infos == 0) {
 		/* record which bits the prom is using */
 		get_mem_prop("available", &phys_avail);
+		apply_ram_limit(&phys_avail);
 		prom_mem = phys_mem;
 		for (i = 0; i < phys_avail.n_regions; ++i)
 			remove_mem_piece(&prom_mem,
diff -urN 2.2.14/drivers/block/Config.in 2.2.14aa6/drivers/block/Config.in
--- 2.2.14/drivers/block/Config.in	Fri Jan  7 18:19:10 2000
+++ 2.2.14aa6/drivers/block/Config.in	Wed Feb  2 02:31:43 2000
@@ -95,6 +95,10 @@
 
 comment 'Additional Block Devices'
 
+tristate 'Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM N
+if [ "$CONFIG_BLK_DEV_LVM" != "n" ]; then
+  bool '   LVM information in proc filesystem' CONFIG_LVM_PROC_FS Y
+fi
 tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
 if [ "$CONFIG_NET" = "y" ]; then
   tristate 'Network block device support' CONFIG_BLK_DEV_NBD
diff -urN 2.2.14/drivers/block/Makefile 2.2.14aa6/drivers/block/Makefile
--- 2.2.14/drivers/block/Makefile	Fri Jan  7 18:19:10 2000
+++ 2.2.14aa6/drivers/block/Makefile	Wed Feb  2 02:31:43 2000
@@ -250,6 +250,14 @@
   endif
 endif
 
+ifeq ($(CONFIG_BLK_DEV_LVM),y)
+L_OBJS += lvm.o lvm-snap.o
+else
+   ifeq ($(CONFIG_BLK_DEV_LVM),m)
+   M_OBJS += lvm-mod.o
+   endif
+endif
+
 ifeq ($(CONFIG_BLK_DEV_MD),y)
 LX_OBJS += md.o
 
@@ -308,3 +316,6 @@
 
 ide-mod.o: ide.o $(IDE_OBJS)
 	$(LD) $(LD_RFLAG) -r -o $@ ide.o $(IDE_OBJS)
+
+lvm-mod.o: lvm.o lvm-snap.o
+	$(LD) -r -o $@ lvm.o lvm-snap.o
diff -urN 2.2.14/drivers/block/README.lvm 2.2.14aa6/drivers/block/README.lvm
--- 2.2.14/drivers/block/README.lvm	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/drivers/block/README.lvm	Wed Feb  2 02:31:43 2000
@@ -0,0 +1,8 @@
+
+This is the Logical Volume Manager driver for Linux,
+
+Tools, library that manage logical volumes can be found
+at <http://linux.msede.com/lvm>.
+
+There you can obtain actual driver versions too.
+
diff -urN 2.2.14/drivers/block/genhd.c 2.2.14aa6/drivers/block/genhd.c
--- 2.2.14/drivers/block/genhd.c	Fri Jan  7 18:19:10 2000
+++ 2.2.14aa6/drivers/block/genhd.c	Wed Feb  2 02:31:43 2000
@@ -50,6 +50,11 @@
 				le32_to_cpu(__a); \
 			})
 
+#if defined CONFIG_BLK_DEV_LVM || defined CONFIG_BLK_DEV_LVM_MODULE
+#include <linux/lvm.h>
+void ( *lvm_hd_name_ptr) ( char *, int) = NULL;
+#endif
+
 struct gendisk *gendisk_head = NULL;
 
 static int current_minor = 0;
@@ -103,6 +108,13 @@
 	 * MD devices are named md0, md1, ... md15, fix it up here.
 	 */
 	switch (hd->major) {
+#if defined CONFIG_BLK_DEV_LVM || defined CONFIG_BLK_DEV_LVM_MODULE
+		case LVM_BLK_MAJOR:
+			*buf = 0;
+			if ( lvm_hd_name_ptr != NULL)
+				( lvm_hd_name_ptr) ( buf, minor);
+			return buf;
+#endif
 		case IDE5_MAJOR:
 			unit += 2;
 		case IDE4_MAJOR:
diff -urN 2.2.14/drivers/block/ll_rw_blk.c 2.2.14aa6/drivers/block/ll_rw_blk.c
--- 2.2.14/drivers/block/ll_rw_blk.c	Fri Jan  7 18:19:10 2000
+++ 2.2.14aa6/drivers/block/ll_rw_blk.c	Wed Feb  2 02:31:43 2000
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 1991, 1992 Linus Torvalds
  * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  */
 
 /*
@@ -24,6 +25,14 @@
 
 #include <linux/module.h>
 
+#if defined CONFIG_BLK_DEV_LVM || defined CONFIG_BLK_DEV_LVM_MODULE
+#include <linux/lvm.h>
+   /* function pointer to the LVM driver remapping function
+      which will be setup during driver/module init; neccessary
+      to be able to load LVM as a module */
+int (*lvm_map_ptr) (struct buffer_head *, int) = NULL;
+#endif
+
 /*
  * The request-struct contains all necessary data
  * to load a nr of sectors into memory
@@ -53,11 +62,11 @@
 /*
  * used to wait on when there are no free requests
  */
-struct wait_queue * wait_for_request = NULL;
+struct wait_queue * wait_for_request;
 
 /* This specifies how many sectors to read ahead on the disk.  */
 
-int read_ahead[MAX_BLKDEV] = {0, };
+int read_ahead[MAX_BLKDEV];
 
 /* blk_dev_struct is:
  *	*request_fn
@@ -73,7 +82,7 @@
  *
  * if (!blk_size[MAJOR]) then no minor size checking is done.
  */
-int * blk_size[MAX_BLKDEV] = { NULL, NULL, };
+int * blk_size[MAX_BLKDEV];
 
 /*
  * blksize_size contains the size of all block-devices:
@@ -82,7 +91,7 @@
  *
  * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
  */
-int * blksize_size[MAX_BLKDEV] = { NULL, NULL, };
+int * blksize_size[MAX_BLKDEV];
 
 /*
  * hardsect_size contains the size of the hardware sector of a device.
@@ -96,22 +105,22 @@
  * This is currently set by some scsi devices and read by the msdos fs driver.
  * Other uses may appear later.
  */
-int * hardsect_size[MAX_BLKDEV] = { NULL, NULL, };
+int * hardsect_size[MAX_BLKDEV];
 
 /*
  * The following tunes the read-ahead algorithm in mm/filemap.c
  */
-int * max_readahead[MAX_BLKDEV] = { NULL, NULL, };
+int * max_readahead[MAX_BLKDEV];
 
 /*
  * Max number of sectors per request
  */
-int * max_sectors[MAX_BLKDEV] = { NULL, NULL, };
+int * max_sectors[MAX_BLKDEV];
 
 /*
  * Max number of segments per request
  */
-int * max_segments[MAX_BLKDEV] = { NULL, NULL, };
+int * max_segments[MAX_BLKDEV];
 
 static inline int get_max_sectors(kdev_t dev)
 {
@@ -291,6 +300,129 @@
 		printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
 }
 
+static inline int seek_to_not_starving_chunk(struct request ** req, int * lat)
+{
+	struct request * tmp = *req;
+	int found = 0, pos = 0;
+	int last_pos = 0;
+
+	while (tmp)
+	{
+		if (tmp->elevator_latency <= 0)
+		{
+			*req = tmp;
+			found = 1;
+			last_pos = pos;
+		}
+		tmp = tmp->next;
+		pos++;
+	}
+	*lat -= last_pos;
+
+	return found;
+}
+
+#define CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY	\
+	     case IDE0_MAJOR:	/* same as HD_MAJOR */	\
+	     case IDE1_MAJOR:				\
+	     case FLOPPY_MAJOR:				\
+	     case IDE2_MAJOR:				\
+	     case IDE3_MAJOR:				\
+	     case IDE4_MAJOR:				\
+	     case IDE5_MAJOR:				\
+	     case ACSI_MAJOR:				\
+	     case MFM_ACORN_MAJOR:			\
+             case MDISK_MAJOR:				\
+             case DASD_MAJOR:
+#define CASE_COALESCE_ALSO_FIRST_REQUEST	\
+	     case SCSI_DISK0_MAJOR:		\
+	     case SCSI_DISK1_MAJOR:		\
+	     case SCSI_DISK2_MAJOR:		\
+	     case SCSI_DISK3_MAJOR:		\
+	     case SCSI_DISK4_MAJOR:		\
+	     case SCSI_DISK5_MAJOR:		\
+	     case SCSI_DISK6_MAJOR:		\
+	     case SCSI_DISK7_MAJOR:		\
+	     case SCSI_CDROM_MAJOR:		\
+	     case DAC960_MAJOR+0:		\
+	     case DAC960_MAJOR+1:		\
+	     case DAC960_MAJOR+2:		\
+	     case DAC960_MAJOR+3:		\
+	     case DAC960_MAJOR+4:		\
+	     case DAC960_MAJOR+5:		\
+	     case DAC960_MAJOR+6:		\
+	     case DAC960_MAJOR+7:		\
+	     case COMPAQ_SMART2_MAJOR+0:	\
+	     case COMPAQ_SMART2_MAJOR+1:	\
+	     case COMPAQ_SMART2_MAJOR+2:	\
+	     case COMPAQ_SMART2_MAJOR+3:	\
+	     case COMPAQ_SMART2_MAJOR+4:	\
+	     case COMPAQ_SMART2_MAJOR+5:	\
+	     case COMPAQ_SMART2_MAJOR+6:	\
+	     case COMPAQ_SMART2_MAJOR+7:
+
+#define elevator_starve_rest_of_queue(req)			\
+do {								\
+	for ((req) = (req)->next; (req); (req) = (req)->next)	\
+		(req)->elevator_latency--;			\
+} while (0)
+
+static inline void elevator_queue(struct request * req,
+				  struct request * tmp,
+				  int latency,
+				  struct request ** current_request,
+				  int major)
+{
+	struct request * head;
+	int starving;
+
+	starving = seek_to_not_starving_chunk(&tmp, &latency);
+	head = tmp;
+
+	for ( ; tmp->next ; tmp = tmp->next) {
+		const int after_current = IN_ORDER(tmp,req);
+		const int before_next = IN_ORDER(req,tmp->next);
+
+		if (!IN_ORDER(tmp,tmp->next)) {
+			if (after_current || before_next)
+				break;
+		} else {
+			if (after_current && before_next)
+				break;
+		}
+
+		if (--latency <= 0)
+		{
+			tmp = head;
+
+			if (starving)
+				break;
+
+			switch (major)
+			{
+			CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY
+				if (head == blk_dev[major].current_request)
+			default:
+					goto link;
+			CASE_COALESCE_ALSO_FIRST_REQUEST
+			}
+
+			req->next = tmp;
+			*current_request = req;
+			goto after_link;
+		}
+	}
+
+ link:
+	req->next = tmp->next;
+	tmp->next = req;
+
+ after_link:
+	req->elevator_latency = latency;
+
+	elevator_starve_rest_of_queue(req);
+}
+
 /*
  * add-request adds a request to the linked list.
  * It disables interrupts (aquires the request spinlock) so that it can muck
@@ -309,6 +441,7 @@
 	short		 disk_index;
 	unsigned long flags;
 	int queue_new_request = 0;
+	int latency;
 
 	switch (major) {
 		case DAC960_MAJOR+0:
@@ -333,7 +466,10 @@
 			break;
 	}
 
-	req->next = NULL;
+	if (req->cmd != READ)
+		latency = dev->elevator.write_latency;
+	else
+		latency = dev->elevator.read_latency;
 
 	/*
 	 * We use the goto to reduce locking complexity
@@ -344,28 +480,17 @@
 	if (req->bh)
 		mark_buffer_clean(req->bh);
 	if (!(tmp = *current_request)) {
+		req->next = NULL;
+		req->elevator_latency = latency;
 		*current_request = req;
 		if (dev->current_request != &dev->plug)
 			queue_new_request = 1;
 		goto out;
 	}
-	for ( ; tmp->next ; tmp = tmp->next) {
-		const int after_current = IN_ORDER(tmp,req);
-		const int before_next = IN_ORDER(req,tmp->next);
-
-		if (!IN_ORDER(tmp,tmp->next)) {
-			if (after_current || before_next)
-				break;
-		} else {
-			if (after_current && before_next)
-				break;
-		}
-	}
-	req->next = tmp->next;
-	tmp->next = req;
+	elevator_queue(req, tmp, latency, current_request, major);
 
 /* for SCSI devices, call request_fn unconditionally */
-	if (scsi_blk_major(major) ||
+	if ((0 && scsi_blk_major(major)) ||
             (major >= DAC960_MAJOR+0 && major <= DAC960_MAJOR+7) ||
             (major >= COMPAQ_SMART2_MAJOR+0 &&
              major <= COMPAQ_SMART2_MAJOR+7))
@@ -399,6 +524,8 @@
 		total_segments--;
 	if (total_segments > max_segments)
 		return;
+	if (next->elevator_latency < req->elevator_latency)
+		req->elevator_latency = next->elevator_latency;
 	req->bhtail->b_reqnext = next->bh;
 	req->bhtail = next->bhtail;
 	req->nr_sectors += next->nr_sectors;
@@ -408,12 +535,29 @@
 	wake_up (&wait_for_request);
 }
 
+#define read_pendings(req)			\
+({						\
+	int __ret = 0;				\
+	struct request * tmp = (req);		\
+	while (tmp)				\
+	{					\
+		if (tmp->cmd == READ)		\
+		{				\
+			__ret = 1;		\
+			break;			\
+		}				\
+		tmp = tmp->next;		\
+	}					\
+	__ret;					\
+})
+
 void make_request(int major, int rw, struct buffer_head * bh)
 {
 	unsigned int sector, count;
 	struct request * req;
 	int rw_ahead, max_req, max_sectors, max_segments;
 	unsigned long flags;
+	int latency;
 
 	count = bh->b_size >> 9;
 	sector = bh->b_rsector;
@@ -490,6 +634,11 @@
 	max_sectors = get_max_sectors(bh->b_rdev);
 	max_segments = get_max_segments(bh->b_rdev);
 
+	if (rw != READ)
+		latency = blk_dev[major].elevator.write_latency;
+	else
+		latency = blk_dev[major].elevator.read_latency;
+
 	/*
 	 * Now we acquire the request spinlock, we have to be mega careful
 	 * not to schedule or do something nonatomic
@@ -499,20 +648,14 @@
 	if (!req) {
 		/* MD and loop can't handle plugging without deadlocking */
 		if (major != MD_MAJOR && major != LOOP_MAJOR && 
+#if defined CONFIG_BLK_DEV_LVM || defined CONFIG_BLK_DEV_LVM_MODULE
+		    major != LVM_BLK_MAJOR &&
+#endif
 		    major != DDV_MAJOR && major != NBD_MAJOR)
 			plug_device(blk_dev + major); /* is atomic */
-	} else switch (major) {
-	     case IDE0_MAJOR:	/* same as HD_MAJOR */
-	     case IDE1_MAJOR:
-	     case FLOPPY_MAJOR:
-	     case IDE2_MAJOR:
-	     case IDE3_MAJOR:
-	     case IDE4_MAJOR:
-	     case IDE5_MAJOR:
-	     case ACSI_MAJOR:
-	     case MFM_ACORN_MAJOR:
-             case MDISK_MAJOR:
-             case DASD_MAJOR:
+	} else if (rw == READ || !read_pendings(req)) switch (major) {
+		struct request * prev;
+	     CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY
 		/*
 		 * The scsi disk and cdrom drivers completely remove the request
 		 * from the queue when they start processing an entry.  For this
@@ -523,38 +666,24 @@
 		 * entry may be busy being processed and we thus can't change it.
 		 */
 		if (req == blk_dev[major].current_request)
+		{
 	        	req = req->next;
+			latency--;
+		}
 		if (!req)
 			break;
 		/* fall through */
+	     CASE_COALESCE_ALSO_FIRST_REQUEST
 
-	     case SCSI_DISK0_MAJOR:
-	     case SCSI_DISK1_MAJOR:
-	     case SCSI_DISK2_MAJOR:
-	     case SCSI_DISK3_MAJOR:
-	     case SCSI_DISK4_MAJOR:
-	     case SCSI_DISK5_MAJOR:
-	     case SCSI_DISK6_MAJOR:
-	     case SCSI_DISK7_MAJOR:
-	     case SCSI_CDROM_MAJOR:
-	     case DAC960_MAJOR+0:
-	     case DAC960_MAJOR+1:
-	     case DAC960_MAJOR+2:
-	     case DAC960_MAJOR+3:
-	     case DAC960_MAJOR+4:
-	     case DAC960_MAJOR+5:
-	     case DAC960_MAJOR+6:
-	     case DAC960_MAJOR+7:
-	     case COMPAQ_SMART2_MAJOR+0:
-	     case COMPAQ_SMART2_MAJOR+1:
-	     case COMPAQ_SMART2_MAJOR+2:
-	     case COMPAQ_SMART2_MAJOR+3:
-	     case COMPAQ_SMART2_MAJOR+4:
-	     case COMPAQ_SMART2_MAJOR+5:
-	     case COMPAQ_SMART2_MAJOR+6:
-	     case COMPAQ_SMART2_MAJOR+7:
-
-		do {
+		if (seek_to_not_starving_chunk(&req, &latency))
+		{
+			req = req->next;
+			latency--;
+		}
+		for (prev = NULL;
+		     req && latency >= 0;
+		     prev = req, req = req->next, latency--)
+		{
 			if (req->sem)
 				continue;
 			if (req->cmd != rw)
@@ -571,11 +700,18 @@
 						req->nr_segments++;
 					else continue;
 				}
+				if (!latency)
+					continue;
 				req->bhtail->b_reqnext = bh;
 				req->bhtail = bh;
 			    	req->nr_sectors += count;
 				/* Can we now merge this req with the next? */
 				attempt_merge(req, max_sectors, max_segments);
+
+				/* latency stuff */
+				if (--latency < req->elevator_latency)
+					req->elevator_latency = latency;
+				elevator_starve_rest_of_queue(req);
 			/* or to the beginning? */
 			} else if (req->sector - count == sector) {
 				if (bh->b_data + bh->b_size
@@ -590,6 +726,15 @@
 			    	req->current_nr_sectors = count;
 			    	req->sector = sector;
 			    	req->nr_sectors += count;
+				/* Can we now merge this req with the prev? */
+				if (prev)
+					attempt_merge(prev, max_sectors,
+						      max_segments);
+
+				/* latency stuff */
+				if (latency < --req->elevator_latency)
+					req->elevator_latency = latency;
+				elevator_starve_rest_of_queue(req);
 			} else
 				continue;
 
@@ -597,7 +742,7 @@
 			spin_unlock_irqrestore(&io_request_lock,flags);
 		    	return;
 
-		} while ((req = req->next) != NULL);
+		}
 	}
 
 /* find an unused request. */
@@ -675,13 +820,34 @@
 			       correct_size, bh[i]->b_size);
 			goto sorry;
 		}
-
-		/* Md remaps blocks now */
+		/* LVM and MD remap blocks now */
+#if defined CONFIG_BLK_DEV_LVM || defined CONFIG_BLK_DEV_LVM_MODULE
+		major = MAJOR(bh[i]->b_dev);
+		if (major == LVM_BLK_MAJOR) {
+			if (lvm_map_ptr == NULL) {
+				printk(KERN_ERR
+				     "Bad lvm_map_ptr in ll_rw_block\n");
+				goto sorry;
+			}
+			if ((lvm_map_ptr) (bh[i], rw) != 0) {
+				printk(KERN_ERR
+				       "Bad lvm_map in ll_rw_block\n");
+				goto sorry;
+			}
+			/* remap major too ... */
+			major = MAJOR(bh[i]->b_rdev);
+		} else {
+			bh[i]->b_rdev = bh[i]->b_dev;
+			bh[i]->b_rsector = bh[i]->b_blocknr * (bh[i]->b_size >> 9);
+		}
+#else
 		bh[i]->b_rdev = bh[i]->b_dev;
 		bh[i]->b_rsector=bh[i]->b_blocknr*(bh[i]->b_size >> 9);
+#endif
 #ifdef CONFIG_BLK_DEV_MD
 		if (major==MD_MAJOR &&
-		    md_map (MINOR(bh[i]->b_dev), &bh[i]->b_rdev,
+		    /* changed             v   to allow LVM to remap */
+		    md_map (MINOR(bh[i]->b_rdev), &bh[i]->b_rdev,
 			    &bh[i]->b_rsector, bh[i]->b_size >> 9)) {
 		        printk (KERN_ERR
 				"Bad md_map in ll_rw_block\n");
@@ -701,7 +867,8 @@
 			set_bit(BH_Req, &bh[i]->b_state);
 #ifdef CONFIG_BLK_DEV_MD
 			if (MAJOR(bh[i]->b_dev) == MD_MAJOR) {
-				md_make_request(MINOR (bh[i]->b_dev), rw, bh[i]);
+				/* remap device for MD too      v */
+				md_make_request(MINOR (bh[i]->b_rdev), rw, bh[i]);
 				continue;
 			}
 #endif
@@ -792,6 +959,8 @@
 		dev->plug_tq.sync    = 0;
 		dev->plug_tq.routine = &unplug_device;
 		dev->plug_tq.data    = dev;
+		dev->elevator.read_latency = ELEVATOR_READ_LATENCY;
+		dev->elevator.write_latency = ELEVATOR_WRITE_LATENCY;
 	}
 
 	req = all_requests + NR_REQUEST;
@@ -885,6 +1054,9 @@
 #ifdef CONFIG_SJCD
 	sjcd_init();
 #endif CONFIG_SJCD
+#ifdef CONFIG_BLK_DEV_LVM
+	lvm_init();
+#endif
 #ifdef CONFIG_BLK_DEV_MD
 	md_init();
 #endif CONFIG_BLK_DEV_MD
diff -urN 2.2.14/drivers/block/lvm-snap.c 2.2.14aa6/drivers/block/lvm-snap.c
--- 2.2.14/drivers/block/lvm-snap.c	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/drivers/block/lvm-snap.c	Wed Feb  2 02:31:43 2000
@@ -0,0 +1,414 @@
+/* linux/drivers/block/lvm-snap.c
+
+   Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+  
+   LVM snapshotting */
+
+#include <linux/lvm.h>
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/blkdev.h>
+#include <linux/smp_lock.h>
+
+
+extern const char *const lvm_name;
+extern int lvm_blocksizes[];
+
+void lvm_snapshot_release(lv_t *);
+
+#define hashfn(dev,block,mask,chunk_size) \
+	((HASHDEV(dev)^((block)/(chunk_size))) & (mask))
+
+static inline lv_block_exception_t *
+lvm_find_exception_table(kdev_t org_dev, unsigned long org_start, lv_t * lv)
+{
+	struct list_head * hash_table = lv->lv_snapshot_hash_table, * next;
+	unsigned long mask = lv->lv_snapshot_hash_mask;
+	int chunk_size = lv->lv_chunk_size;
+	lv_block_exception_t * ret;
+	int i = 0;
+
+	hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
+	ret = NULL;
+	for (next = hash_table->next; next != hash_table; next = next->next)
+	{
+		lv_block_exception_t * exception;
+
+		exception = list_entry(next, lv_block_exception_t, hash);
+		if (exception->rsector_org == org_start &&
+		    exception->rdev_org == org_dev)
+		{
+			if (i)
+			{
+				/* fun, isn't it? :) */
+				list_del(next);
+				list_add(next, hash_table);
+			}
+			ret = exception;
+			break;
+		}
+		i++;
+	}
+	return ret;
+}
+
+static inline void lvm_hash_link(lv_block_exception_t * exception,
+				 kdev_t org_dev, unsigned long org_start,
+				 lv_t * lv)
+{
+	struct list_head * hash_table = lv->lv_snapshot_hash_table;
+	unsigned long mask = lv->lv_snapshot_hash_mask;
+	int chunk_size = lv->lv_chunk_size;
+
+	hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
+	list_add(&exception->hash, hash_table);
+}
+
+int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector,
+			     unsigned long pe_start, lv_t * lv)
+{
+	int ret;
+	unsigned long pe_off, pe_adjustment, __org_start;
+	kdev_t __org_dev;
+	int chunk_size = lv->lv_chunk_size;
+	lv_block_exception_t * exception;
+
+	pe_off = pe_start % chunk_size;
+	pe_adjustment = (*org_sector-pe_off) % chunk_size;
+	__org_start = *org_sector - pe_adjustment;
+	__org_dev = *org_dev;
+
+	ret = 0;
+	exception = lvm_find_exception_table(__org_dev, __org_start, lv);
+	if (exception)
+	{
+		*org_dev = exception->rdev_new;
+		*org_sector = exception->rsector_new + pe_adjustment;
+		ret = 1;
+	}
+	return ret;
+}
+
+static void lvm_drop_snapshot(lv_t * lv_snap, const char * reason)
+{
+	kdev_t last_dev;
+	int i;
+
+	/* no exception storage space available for this snapshot
+	   or error on this snapshot --> release it */
+	invalidate_buffers(lv_snap->lv_dev);
+
+	for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) {
+		if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) {
+			last_dev = lv_snap->lv_block_exception[i].rdev_new;
+			invalidate_buffers(last_dev);
+		}
+	}
+
+	lvm_snapshot_release(lv_snap);
+
+	printk(KERN_INFO
+	       "%s -- giving up to snapshot %s on %s due %s\n",
+	       lvm_name, lv_snap->lv_snapshot_org->lv_name, lv_snap->lv_name,
+	       reason);
+}
+
+static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks,
+					       unsigned long start,
+					       int nr_sectors,
+					       int blocksize)
+{
+	int i, sectors_per_block, nr_blocks;
+
+	sectors_per_block = blocksize >> 9;
+	nr_blocks = nr_sectors / sectors_per_block;
+	start /= sectors_per_block;
+
+	for (i = 0; i < nr_blocks; i++)
+		blocks[i] = start++;
+}
+
+static inline int get_blksize(kdev_t dev)
+{
+	int correct_size = BLOCK_SIZE, i, major;
+
+	major = MAJOR(dev);
+	if (blksize_size[major])
+	{
+		i = blksize_size[major][MINOR(dev)];
+		if (i)
+			correct_size = i;
+	}
+	return correct_size;
+}
+
+#ifdef DEBUG_SNAPSHOT
+static inline void invalidate_snap_cache(unsigned long start, unsigned long nr,
+					 kdev_t dev)
+{
+	struct buffer_head * bh;
+	int sectors_per_block, i, blksize, minor;
+
+	minor = MINOR(dev);
+	blksize = lvm_blocksizes[minor];
+	sectors_per_block = blksize >> 9;
+	nr /= sectors_per_block;
+	start /= sectors_per_block;
+
+	for (i = 0; i < nr; i++)
+	{
+		bh = get_hash_table(dev, start++, blksize);
+		if (bh)
+			bforget(bh);
+	}
+}
+#endif
+
+/*
+ * copy on write handler for one snapshot logical volume
+ *
+ * read the original blocks and store it/them on the new one(s).
+ * if there is no exception storage space free any longer --> release snapshot.
+ *
+ * this routine gets called for each _first_ write to a physical chunk.
+ */
+int lvm_snapshot_COW(kdev_t org_phys_dev,
+		     unsigned long org_phys_sector,
+		     unsigned long org_pe_start,
+		     unsigned long org_virt_sector,
+		     lv_t * lv_snap)
+{
+	const char * reason;
+	unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
+	int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
+	struct kiobuf * iobuf;
+	unsigned long blocks[KIO_MAX_SECTORS];
+	int blksize_snap, blksize_org, min_blksize, max_blksize;
+	int max_sectors, nr_sectors;
+
+	/* check if we are out of snapshot space */
+	if (idx >= lv_snap->lv_remap_end)
+		goto fail_out_of_space;
+
+	/* calculate physical boundaries of source chunk */
+	pe_off = org_pe_start % chunk_size;
+	org_start = org_phys_sector - ((org_phys_sector-pe_off) % chunk_size);
+	virt_start = org_virt_sector - (org_phys_sector - org_start);
+
+	/* calculate physical boundaries of destination chunk */
+	snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
+	snap_start = lv_snap->lv_block_exception[idx].rsector_new;
+
+#ifdef DEBUG_SNAPSHOT
+	printk(KERN_INFO
+	       "%s -- COW: "
+	       "org %02d:%02d faulting %lu start %lu, "
+	       "snap %02d:%02d start %lu, "
+	       "size %d, pe_start %lu pe_off %lu, virt_sec %lu\n",
+	       lvm_name,
+	       MAJOR(org_phys_dev), MINOR(org_phys_dev), org_phys_sector,
+	       org_start,
+	       MAJOR(snap_phys_dev), MINOR(snap_phys_dev), snap_start,
+	       chunk_size,
+	       org_pe_start, pe_off,
+	       org_virt_sector);
+#endif
+
+	iobuf = lv_snap->lv_iobuf;
+
+	blksize_org = get_blksize(org_phys_dev);
+	blksize_snap = get_blksize(snap_phys_dev);
+	max_blksize = max(blksize_org, blksize_snap);
+	min_blksize = min(blksize_org, blksize_snap);
+	max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
+
+	if (chunk_size % (max_blksize>>9))
+		goto fail_blksize;
+
+	while (chunk_size)
+	{
+		nr_sectors = min(chunk_size, max_sectors);
+		chunk_size -= nr_sectors;
+
+		iobuf->length = nr_sectors << 9;
+
+		lvm_snapshot_prepare_blocks(blocks, org_start,
+					    nr_sectors, blksize_org);
+		if (brw_kiovec(READ, 1, &iobuf, org_phys_dev,
+			       blocks, blksize_org) != (nr_sectors<<9))
+			goto fail_raw_read;
+
+		lvm_snapshot_prepare_blocks(blocks, snap_start,
+					    nr_sectors, blksize_snap);
+		if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
+			       blocks, blksize_snap) != (nr_sectors<<9))
+			goto fail_raw_write;
+	}
+
+#ifdef DEBUG_SNAPSHOT
+	/* invalidate the logcial snapshot buffer cache */
+	invalidate_snap_cache(virt_start, lv_snap->lv_chunk_size,
+			      lv_snap->lv_dev);
+#endif
+
+	/* the original chunk is now stored on the snapshot volume
+	   so update the execption table */
+	lv_snap->lv_block_exception[idx].rdev_org = org_phys_dev;
+	lv_snap->lv_block_exception[idx].rsector_org = org_start;
+	lvm_hash_link(lv_snap->lv_block_exception + idx,
+		      org_phys_dev, org_start, lv_snap);
+	lv_snap->lv_remap_ptr = idx + 1;
+	return 0;
+
+	/* slow path */
+ out:
+	lvm_drop_snapshot(lv_snap, reason);
+	return 1;
+
+ fail_out_of_space:
+	reason = "out of space";
+	goto out;
+ fail_raw_read:
+	reason = "read error";
+	goto out;
+ fail_raw_write:
+	reason = "write error";
+	goto out;
+ fail_blksize:
+	reason = "blocksize error";
+	goto out;
+}
+
+static int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors)
+{
+	int bytes, nr_pages, err, i;
+
+	bytes = sectors << 9;
+	nr_pages = (bytes + ~PAGE_MASK) >> PAGE_SHIFT;
+	err = expand_kiobuf(iobuf, nr_pages);
+	if (err)
+		goto out;
+
+	err = -ENOMEM;
+	iobuf->locked = 1;
+	iobuf->nr_pages = 0;
+	for (i = 0; i < nr_pages; i++)
+	{
+		struct page * page;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,27)
+		page = alloc_page(GFP_KERNEL);
+		if (!page)
+			goto out;
+#else
+		{
+			unsigned long addr = __get_free_page(GFP_USER);
+			if (!addr)
+				goto out;
+			iobuf->pagelist[i] = addr;
+			page = mem_map + MAP_NR(addr);
+		}
+#endif
+
+		iobuf->maplist[i] = page;
+		/* the only point to lock the page here is to be allowed
+		   to share unmap_kiobuf() in the fail-path */
+#ifndef LockPage
+#define LockPage(map) set_bit(PG_locked, &(map)->flags)
+#endif
+		LockPage(page);
+		iobuf->nr_pages++;
+	}
+	iobuf->offset = 0;
+
+	err = 0;
+ out:
+	return err;
+}
+
+static int calc_max_buckets(void)
+{
+	unsigned long mem;
+
+	mem = num_physpages << PAGE_SHIFT;
+	mem /= 100;
+	mem *= 2;
+	mem /= sizeof(struct list_head);
+
+	return mem;
+}
+
+static int lvm_snapshot_alloc_hash_table(lv_t * lv)
+{
+	int err;
+	unsigned long buckets, max_buckets, size;
+	struct list_head * hash;
+
+	buckets = lv->lv_remap_end;
+	max_buckets = calc_max_buckets();
+	buckets = min(buckets, max_buckets);
+	while (buckets & (buckets-1))
+		buckets &= (buckets-1);
+
+	size = buckets * sizeof(struct list_head);
+
+	err = -ENOMEM;
+	hash = vmalloc(size);
+	lv->lv_snapshot_hash_table = hash;
+
+	if (!hash)
+		goto out;
+
+	lv->lv_snapshot_hash_mask = buckets-1;
+	while (buckets--)
+		INIT_LIST_HEAD(hash+buckets);
+	err = 0;
+ out:
+	return err;
+}
+
+int lvm_snapshot_alloc(lv_t * lv_snap)
+{
+	int err, blocksize, max_sectors;
+
+	err = alloc_kiovec(1, &lv_snap->lv_iobuf);
+	if (err)
+		goto out;
+
+	blocksize = lvm_blocksizes[MINOR(lv_snap->lv_dev)];
+	max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9);
+
+	err = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors);
+	if (err)
+		goto out_free_kiovec;
+
+	err = lvm_snapshot_alloc_hash_table(lv_snap);
+	if (err)
+		goto out_free_kiovec;
+ out:
+	return err;
+
+ out_free_kiovec:
+	unmap_kiobuf(lv_snap->lv_iobuf);
+	free_kiovec(1, &lv_snap->lv_iobuf);
+	goto out;
+}
+
+void lvm_snapshot_release(lv_t * lv)
+{
+	if (lv->lv_block_exception)
+	{
+		vfree(lv->lv_block_exception);
+		lv->lv_block_exception = NULL;
+	}
+	if (lv->lv_snapshot_hash_table)
+	{
+		vfree(lv->lv_snapshot_hash_table);
+		lv->lv_snapshot_hash_table = NULL;
+	}
+	if (lv->lv_iobuf)
+	{
+		free_kiovec(1, &lv->lv_iobuf);
+		lv->lv_iobuf = NULL;
+	}
+}
diff -urN 2.2.14/drivers/block/lvm.c 2.2.14aa6/drivers/block/lvm.c
--- 2.2.14/drivers/block/lvm.c	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/drivers/block/lvm.c	Wed Feb  2 02:31:43 2000
@@ -0,0 +1,2576 @@
+/*
+ * kernel/lvm.c
+ *
+ * Copyright (C) 1997 - 2000  Heinz Mauelshagen, Germany
+ *
+ * February-November 1997
+ * April-May,July-August,November 1998
+ * January-March,May,July,September,October 1999
+ *
+ *
+ * LVM driver is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * LVM driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA. 
+ *
+ */
+
+/*
+ * Changelog
+ *
+ *    09/11/1997 - added chr ioctls VG_STATUS_GET_COUNT
+ *                 and VG_STATUS_GET_NAMELIST
+ *    18/01/1998 - change lvm_chr_open/close lock handling
+ *    30/04/1998 - changed LV_STATUS ioctl to LV_STATUS_BYNAME and
+ *               - added   LV_STATUS_BYINDEX ioctl
+ *               - used lvm_status_byname_req_t and
+ *                      lvm_status_byindex_req_t vars
+ *    04/05/1998 - added multiple device support
+ *    08/05/1998 - added support to set/clear extendable flag in volume group
+ *    09/05/1998 - changed output of lvm_proc_get_info() because of
+ *                 support for free (eg. longer) logical volume names
+ *    12/05/1998 - added spin_locks (thanks to Pascal van Dam
+ *                 <pascal@ramoth.xs4all.nl>)
+ *    25/05/1998 - fixed handling of locked PEs in lvm_map() and lvm_chr_ioctl()
+ *    26/05/1998 - reactivated verify_area by access_ok
+ *    07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go
+ *                 beyond 128/256 KB max allocation limit per call
+ *               - #ifdef blocked spin_lock calls to avoid compile errors
+ *                 with 2.0.x
+ *    11/06/1998 - another enhancement to spinlock code in lvm_chr_open()
+ *                 and use of LVM_VERSION_CODE instead of my own macros
+ *                 (thanks to  Michael Marxmeier <mike@msede.com>)
+ *    07/07/1998 - added statistics in lvm_map()
+ *    08/07/1998 - saved statistics in do_lv_extend_reduce()
+ *    25/07/1998 - used __initfunc macro
+ *    02/08/1998 - changes for official char/block major numbers
+ *    07/08/1998 - avoided init_module() and cleanup_module() to be static
+ *    30/08/1998 - changed VG lv_open counter from sum of LV lv_open counters
+ *                 to sum of LVs open (no matter how often each is)
+ *    01/09/1998 - fixed lvm_gendisk.part[] index error
+ *    07/09/1998 - added copying of lv_current_pe-array
+ *                 in LV_STATUS_BYINDEX ioctl
+ *    17/11/1998 - added KERN_* levels to printk
+ *    13/01/1999 - fixed LV index bug in do_lv_create() which hit lvrename
+ *    07/02/1999 - fixed spinlock handling bug in case of LVM_RESET
+ *                 by moving spinlock code from lvm_chr_open()
+ *                 to lvm_chr_ioctl()
+ *               - added LVM_LOCK_LVM ioctl to lvm_chr_ioctl()
+ *               - allowed LVM_RESET and retrieval commands to go ahead;
+ *                 only other update ioctls are blocked now
+ *               - fixed pv->pe to NULL for pv_status
+ *               - using lv_req structure in lvm_chr_ioctl() now
+ *               - fixed NULL ptr reference bug in do_lv_extend_reduce()
+ *                 caused by uncontiguous PV array in lvm_chr_ioctl(VG_REDUCE)
+ *    09/02/1999 - changed BLKRASET and BLKRAGET in lvm_chr_ioctl() to
+ *                 handle lgoical volume private read ahead sector
+ *               - implemented LV read_ahead handling with lvm_blk_read()
+ *                 and lvm_blk_write()
+ *    10/02/1999 - implemented 2.[12].* support function lvm_hd_name()
+ *                 to be used in drivers/block/genhd.c by disk_name()
+ *    12/02/1999 - fixed index bug in lvm_blk_ioctl(), HDIO_GETGEO
+ *               - enhanced gendisk insert/remove handling
+ *    16/02/1999 - changed to dynamic block minor number allocation to
+ *                 have as much as 99 volume groups with 256 logical volumes
+ *                 as the grand total; this allows having 1 volume group with
+ *                 up to 256 logical volumes in it
+ *    21/02/1999 - added LV open count information to proc filesystem
+ *               - substituted redundant LVM_RESET code by calls
+ *                 to do_vg_remove()
+ *    22/02/1999 - used schedule_timeout() to be more responsive
+ *                 in case of do_vg_remove() with lots of logical volumes
+ *    19/03/1999 - fixed NULL pointer bug in module_init/lvm_init
+ *    17/05/1999 - used DECLARE_WAIT_QUEUE_HEAD macro (>2.3.0)
+ *               - enhanced lvm_hd_name support
+ *    03/07/1999 - avoided use of KERNEL_VERSION macro based ifdefs and
+ *                 memcpy_tofs/memcpy_fromfs macro redefinitions
+ *    06/07/1999 - corrected reads/writes statistic counter copy in case
+ *                 of striped logical volume
+ *    28/07/1999 - implemented snapshot logical volumes
+ *                 - lvm_chr_ioctl
+ *                   - LV_STATUS_BYINDEX
+ *                   - LV_STATUS_BYNAME
+ *                 - do_lv_create
+ *                 - do_lv_remove
+ *                 - lvm_map
+ *                 - new lvm_snapshot_remap_block
+ *                 - new lvm_snapshot_remap_new_block
+ *    08/10/1999 - implemented support for multiple snapshots per
+ *                 original logical volume
+ *    12/10/1999 - support for 2.3.19
+ *    11/11/1999 - support for 2.3.28
+ *    21/11/1999 - changed lvm_map() interface to buffer_head based
+ *    19/12/1999 - support for 2.3.33
+ *    01/01/2000 - changed locking concept in lvm_map(),
+ *                 do_vg_create() and do_lv_remove()
+ *
+ */
+
+
+/*
+ * TODO
+ *
+ *   - implement special handling of unavailable physical volumes
+ *
+ */
+
+char *lvm_version = "LVM version 0.8e  by Heinz Mauelshagen  (4/1/2000)\n";
+char *lvm_short_version = "version 0.8e  (4/1/2000)";
+
+#define MAJOR_NR	LVM_BLK_MAJOR
+#define	DEVICE_OFF(device)
+
+#include <linux/config.h>
+#include <linux/version.h>
+
+#ifdef MODVERSIONS
+#  undef MODULE
+#  define MODULE
+#    include <linux/modversions.h>
+#endif
+
+#ifdef MODULE
+#  include <linux/module.h>
+#endif
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+
+#include <linux/hdreg.h>
+#include <linux/stat.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/blkdev.h>
+#include <linux/genhd.h>
+#include <linux/locks.h>
+#include <linux/smp_lock.h>
+#include <asm/ioctl.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_KERNELD
+#include <linux/kerneld.h>
+#endif
+
+#include <linux/blk.h>
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 0)
+#include <linux/blkpg.h>
+#endif
+
+#include <linux/errno.h>
+#include <linux/lvm.h>
+
+#define	LVM_CORRECT_READ_AHEAD(a)		\
+do {						\
+	if ((a) < LVM_MIN_READ_AHEAD)		\
+		(a) =  LVM_MIN_READ_AHEAD;	\
+	if ((a) > LVM_MAX_READ_AHEAD)		\
+		(a) = LVM_MAX_READ_AHEAD;	\
+} while(0)
+
+#define	suser()	( current->uid == 0 && current->euid == 0)
+
+
+/*
+ * External function prototypes
+ */
+#ifdef MODULE
+int init_module ( void);
+void cleanup_module ( void);
+#else
+extern int lvm_init ( void);
+#endif
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+static void lvm_dummy_device_request ( request_queue_t*);
+#else
+static void lvm_dummy_device_request ( void);
+#endif
+static int lvm_blk_ioctl ( struct inode *, struct file *, uint, ulong);
+static int lvm_blk_open  ( struct inode *, struct file *);
+
+static int  lvm_chr_open  ( struct inode *, struct file *);
+
+static int lvm_chr_release ( struct inode *, struct file *);
+static int lvm_blk_release ( struct inode *, struct file *);
+
+static int  lvm_chr_ioctl ( struct inode *, struct file *, uint, ulong);
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+static int lvm_proc_get_info ( char *, char **, off_t, int);
+static int (*lvm_proc_get_info_ptr)(char *, char **, off_t, int) =
+   &lvm_proc_get_info;
+#else
+static int lvm_proc_get_info ( char *, char **, off_t, int, int);
+#endif
+#endif
+
+#ifdef LVM_HD_NAME
+void lvm_hd_name ( char*, int);
+#endif
+
+/* external snapshot calls */
+int lvm_snapshot_remap_block ( kdev_t*, ulong*, unsigned long, lv_t*);
+int lvm_snapshot_COW(kdev_t, unsigned long, unsigned long,
+		     unsigned long, lv_t *);
+int lvm_snapshot_alloc(lv_t *);
+void lvm_snapshot_release(lv_t *);
+
+/* End external function prototypes */
+
+
+/*
+ * Internal function prototypes
+ */
+static void lvm_init_vars ( void);
+extern int (*lvm_map_ptr) ( struct buffer_head*, int);
+
+
+#ifdef LVM_HD_NAME
+extern void (*lvm_hd_name_ptr) ( char*, int);
+#endif
+static int lvm_map ( struct buffer_head*, int);
+static int do_vg_create ( int, void *);
+static int do_vg_remove ( int);
+static int do_lv_create ( int, char *, lv_t *);
+static int do_lv_remove ( int, char *, int);
+static int do_lv_extend_reduce ( int, char *, lv_t *);
+static void lvm_geninit ( struct gendisk *);
+#ifdef LVM_GET_INODE
+   static struct inode *lvm_get_inode ( int);
+   void lvm_clear_inode ( struct inode *);
+#endif
+inline int  lvm_strlen ( char *);
+inline void lvm_memcpy ( char *, char *, int);
+inline int  lvm_strcmp ( char *, char *);
+inline char *lvm_strrchr ( char *, char c);
+/* END Internal function prototypes */
+
+
+/* volume group descriptor area pointers */
+static vg_t *vg[ABS_MAX_VG + 1];
+static pv_t *pvp  = NULL;
+static lv_t *lvp  = NULL;
+static pe_t *pep  = NULL;
+static pe_t *pep1 = NULL;
+
+
+/* map from block minor number to VG and LV numbers */
+typedef struct {
+   int vg_number;
+   int lv_number;
+} vg_lv_map_t;
+static vg_lv_map_t vg_lv_map[ABS_MAX_LV];
+
+
+/* Request structures (lvm_chr_ioctl()) */
+static pv_change_req_t pv_change_req;
+static pv_flush_req_t  pv_flush_req;
+static pv_status_req_t pv_status_req;
+static pe_lock_req_t   pe_lock_req;
+static le_remap_req_t  le_remap_req;
+static lv_req_t        lv_req;
+
+#ifdef LVM_TOTAL_RESET
+static int lvm_reset_spindown = 0;
+#endif
+
+static char pv_name[NAME_LEN];
+/* static char rootvg[NAME_LEN] = { 0, }; */
+static uint lv_open = 0;
+const char *const lvm_name = LVM_NAME;
+static int lock = 0;
+static int loadtime = 0;
+static uint vg_count = 0;
+static long lvm_chr_open_count = 0;
+static ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 0)
+static DECLARE_WAIT_QUEUE_HEAD ( lvm_wait);
+static DECLARE_WAIT_QUEUE_HEAD ( lvm_map_wait);
+#else
+struct wait_queue *lvm_wait = NULL;
+struct wait_queue *lvm_map_wait = NULL;
+#endif
+
+static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED;
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+#if LINUX_VERSION_CODE < KERNEL_VERSION ( 2, 3, 31)
+static struct proc_dir_entry lvm_proc_entry = {
+   0, 3, LVM_NAME, S_IFREG | S_IRUGO,
+   1, 0, 0, 0,
+   NULL,
+   lvm_proc_get_info,
+   NULL, NULL, NULL, NULL, NULL,
+};
+#endif
+#endif
+
+static struct file_operations lvm_chr_fops = {
+	ioctl:		lvm_chr_ioctl,
+	open:		lvm_chr_open,
+	release:	lvm_chr_release,
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 38)
+static struct file_operations lvm_blk_fops = {
+	read:		block_read,
+	write:		block_write,
+	ioctl:		lvm_blk_ioctl,
+	open:		lvm_blk_open,
+	release:	lvm_blk_release,
+	fsync:		block_fsync,
+};
+#else
+static struct block_device_operations lvm_blk_fops =
+{
+	open:		lvm_blk_open,
+	release:	lvm_blk_release,
+	ioctl:		lvm_blk_ioctl,
+};
+#endif
+
+/* gendisk structures */
+static struct hd_struct lvm_hd_struct[MAX_LV];
+int lvm_blocksizes[MAX_LV] = { 0, };
+static int lvm_size[MAX_LV] = { 0, };
+static struct gendisk lvm_gendisk = {
+   MAJOR_NR,			/* major # */
+   LVM_NAME,			/* name of major */
+   0,				/* number of times minor is shifted
+				   to get real minor */
+   1,				/* maximum partitions per device */
+   MAX_LV,			/* maximum number of real devices */
+   lvm_geninit,			/* initialization called before we
+				   do other things */
+   lvm_hd_struct,		/* partition table */
+   lvm_size,			/* device size in blocks, copied
+				   to block_size[] */
+   MAX_LV,			/* number or real devices */
+   NULL,			/* internal */
+   NULL,			/* pointer to next gendisk struct (internal) */
+};
+
+
+#ifdef MODULE
+/*
+ * Module initialization...
+ */
+int init_module ( void)
+#else
+/*
+ * Driver initialization...
+ */
+#ifdef __initfunc
+__initfunc ( int lvm_init ( void))
+#else
+int __init lvm_init ( void)
+#endif
+#endif /* #ifdef MODULE */
+{
+   struct gendisk *gendisk_ptr = NULL;
+
+   lvm_init_vars ();
+
+   /* insert our gendisk at the corresponding major */
+   lvm_geninit ( &lvm_gendisk);
+   if ( gendisk_head != NULL) {
+      gendisk_ptr = gendisk_head;
+      while ( gendisk_ptr->next != NULL &&
+              gendisk_ptr->major > lvm_gendisk.major) {
+         gendisk_ptr = gendisk_ptr->next;
+      }
+      lvm_gendisk.next = gendisk_ptr->next;
+      gendisk_ptr->next = &lvm_gendisk;
+   } else {
+      gendisk_head = &lvm_gendisk;
+      lvm_gendisk.next = NULL;
+   }
+
+   /* reference from drivers/block/ll_rw_blk.c */
+   lvm_map_ptr = lvm_map;
+
+#ifdef LVM_HD_NAME
+   /* reference from drivers/block/genhd.c */
+   lvm_hd_name_ptr = lvm_hd_name;
+#endif
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+   blk_init_queue ( BLK_DEFAULT_QUEUE ( MAJOR_NR), lvm_dummy_device_request);
+#else
+   blk_dev[MAJOR_NR].request_fn = lvm_dummy_device_request;
+   blk_dev[MAJOR_NR].current_request = NULL;
+#endif
+
+   /* optional read root VGDA */
+/*
+   if ( *rootvg != 0) {
+      vg_read_with_pv_and_lv ( rootvg, &vg);
+   }
+*/
+
+   if ( register_chrdev ( LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) {
+      printk ( KERN_ERR "%s -- register_chrdev failed\n", lvm_name);
+      return -EIO;
+   }
+   if ( register_blkdev ( MAJOR_NR, lvm_name, &lvm_blk_fops) < 0) {
+      printk ( "%s -- register_blkdev failed\n", lvm_name);
+      if ( unregister_chrdev ( LVM_CHAR_MAJOR, lvm_name) < 0)
+         printk ( KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
+      return -EIO;
+   }
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 25)
+    create_proc_info_entry ( LVM_NAME, S_IFREG | S_IRUGO,
+                             &proc_root, lvm_proc_get_info_ptr);
+#  else
+    proc_register ( &proc_root, &lvm_proc_entry);
+#  endif
+#endif
+
+   printk ( KERN_INFO
+            "%s%s -- "
+#ifdef MODULE
+            "Module"
+#else
+            "Driver"
+#endif
+            " successfully initialized\n",
+            lvm_version, lvm_name);
+
+   return 0;
+} /* init_module () / lvm_init () */
+
+
+#ifdef MODULE
+/*
+ * Module cleanup...
+ */
+void cleanup_module ( void) {
+   struct gendisk *gendisk_ptr = NULL, *gendisk_ptr_prev = NULL;
+
+   if ( unregister_chrdev ( LVM_CHAR_MAJOR, lvm_name) < 0) {
+      printk ( KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
+   }
+   if ( unregister_blkdev ( MAJOR_NR, lvm_name) < 0) {
+      printk ( KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name);
+   }
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+   blk_cleanup_queue ( BLK_DEFAULT_QUEUE ( MAJOR_NR));
+#else
+   blk_dev[MAJOR_NR].request_fn = NULL;
+   blk_dev[MAJOR_NR].current_request = NULL;
+#endif
+
+   gendisk_ptr = gendisk_ptr_prev = gendisk_head;
+   while ( gendisk_ptr != NULL) {
+      if ( gendisk_ptr == &lvm_gendisk) break;
+      gendisk_ptr_prev = gendisk_ptr;
+      gendisk_ptr = gendisk_ptr->next;
+   }
+   /* delete our gendisk from chain */
+   if ( gendisk_ptr == &lvm_gendisk) gendisk_ptr_prev->next = gendisk_ptr->next;
+
+   blk_size[MAJOR_NR] = NULL;
+   blksize_size[MAJOR_NR] = NULL;
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+   remove_proc_entry ( LVM_NAME, &proc_root);
+#  else
+   proc_unregister ( &proc_root, lvm_proc_entry.low_ino);
+#  endif 
+#endif
+
+   /* reference from linux/drivers/block/ll_rw_blk.c */
+   lvm_map_ptr = NULL;
+
+#ifdef LVM_HD_NAME
+   /* reference from linux/drivers/block/genhd.c */
+   lvm_hd_name_ptr = NULL;
+#endif
+
+   printk ( KERN_INFO "%s -- Module successfully deactivated\n", lvm_name);
+
+   return;
+} /* void cleanup_module () */
+#endif /* #ifdef MODULE */
+
+
+/*
+ * support function to initialize lvm variables
+ */
+#ifdef __initfunc
+__initfunc ( void lvm_init_vars ( void))
+#else
+void __init lvm_init_vars ( void)
+#endif
+{
+   int v;
+
+   loadtime = CURRENT_TIME;
+
+   lvm_lock = SPIN_LOCK_UNLOCKED;
+
+   pe_lock_req.lock = UNLOCK_PE;
+   pe_lock_req.data.lv_dev = \
+   pe_lock_req.data.pv_dev = \
+   pe_lock_req.data.pv_offset = 0;
+
+   /* Initialize VG pointers */
+   for ( v = 0; v <= ABS_MAX_VG; v++) vg[v] = NULL;
+
+   /* Initialize LV -> VG association */
+   for ( v = 0; v < ABS_MAX_LV; v++) {
+      /* index ABS_MAX_VG never used for real VG */
+      vg_lv_map[v].vg_number = ABS_MAX_VG;
+      vg_lv_map[v].lv_number = -1;
+   }
+
+   return;
+} /* lvm_init_vars () */
+
+
+/********************************************************************
+ *
+ * Character device functions
+ *
+ ********************************************************************/
+
+/*
+ * character device open routine
+ */
+static int lvm_chr_open ( struct inode *inode,
+                          struct file *file) {
+   int minor = MINOR ( inode->i_rdev);
+
+#ifdef DEBUG
+   printk ( KERN_DEBUG
+            "%s -- lvm_chr_open MINOR: %d  VG#: %d  mode: 0x%X  lock: %d\n",
+            lvm_name, minor, VG_CHR(minor), file->f_mode, lock);
+#endif
+
+   /* super user validation */
+   if ( ! suser()) return -EACCES;
+
+   /* Group special file open */
+   if ( VG_CHR(minor) > MAX_VG) return -ENXIO;
+
+#ifdef MODULE
+   MOD_INC_USE_COUNT;
+#endif
+
+   lvm_chr_open_count++;
+   return 0;
+} /* lvm_chr_open () */
+
+
+/*
+ * character device i/o-control routine
+ *
+ * Only one changing process can do ioctl at one time, others will block.
+ *
+ */
+static int lvm_chr_ioctl ( struct inode *inode, struct file *file,
+                           uint command, ulong a) {
+   int minor = MINOR ( inode->i_rdev);
+   int extendable;
+   ulong  l, le, p, v;
+   ulong size;
+   void  *arg = ( void*) a;
+#ifdef LVM_GET_INODE
+   struct inode *inode_sav;
+#endif
+   lv_status_byname_req_t lv_status_byname_req;
+   lv_status_byindex_req_t lv_status_byindex_req;
+   lv_t lv;
+
+   /* otherwise cc will complain about unused variables */
+   ( void) lvm_lock;
+   
+
+#ifdef DEBUG_IOCTL
+   printk ( KERN_DEBUG
+            "%s -- lvm_chr_ioctl: command: 0x%X  MINOR: %d  "
+            "VG#: %d  mode: 0x%X\n",
+            lvm_name, command, minor, VG_CHR(minor), file->f_mode);
+#endif
+
+#ifdef LVM_TOTAL_RESET
+   if ( lvm_reset_spindown > 0) return -EACCES;
+#endif
+
+
+   /* Main command switch */
+   switch ( command) {
+      /* lock the LVM */
+      case LVM_LOCK_LVM:
+lock_try_again:
+         spin_lock ( &lvm_lock);
+         if( lock != 0 && lock != current->pid ) {
+#ifdef DEBUG_IOCTL
+            printk ( KERN_INFO "lvm_chr_ioctl: %s is locked by pid %d ...\n",
+                               lvm_name, lock);
+#endif
+            spin_unlock ( &lvm_lock);
+            interruptible_sleep_on ( &lvm_wait);
+            if ( current->sigpending != 0) return -EINTR;
+#ifdef LVM_TOTAL_RESET
+            if ( lvm_reset_spindown > 0) return -EACCES;
+#endif
+            goto lock_try_again;
+         }
+         lock = current->pid;
+         spin_unlock ( &lvm_lock);
+         return 0;
+
+
+      /* check lvm version to ensure driver/tools+lib interoperability */
+      case LVM_GET_IOP_VERSION:
+         if ( copy_to_user ( arg, &lvm_iop_version, sizeof ( ushort)) != 0)
+            return -EFAULT;
+         return 0;
+
+
+#ifdef LVM_TOTAL_RESET
+      /* lock reset function */
+      case LVM_RESET:
+         lvm_reset_spindown = 1;
+         for ( v = 0; v < ABS_MAX_VG; v++) {
+            if ( vg[v] != NULL) {
+               do_vg_remove ( v);
+            }
+         }
+
+#ifdef MODULE
+         while ( GET_USE_COUNT ( &__this_module) < 1)
+            MOD_INC_USE_COUNT;
+         while ( GET_USE_COUNT ( &__this_module) > 1)
+            MOD_DEC_USE_COUNT;
+#endif /* MODULE */
+         lock = 0; /* release lock */
+         wake_up_interruptible ( &lvm_wait);
+         return 0;
+#endif /* LVM_TOTAL_RESET */
+
+
+      /* lock/unlock i/o to a physical extent to move it to another
+         physical volume (move's done in user space's pvmove) */
+      case PE_LOCK_UNLOCK:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &pe_lock_req, arg, sizeof ( pe_lock_req_t)) != 0)
+            return -EFAULT;
+
+         switch ( pe_lock_req.lock) {
+            case LOCK_PE:
+               for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+                  if ( vg[VG_CHR(minor)]->pv[p] != NULL &&
+                       pe_lock_req.data.pv_dev ==
+                       vg[VG_CHR(minor)]->pv[p]->pv_dev)
+                     break;
+               }
+      
+               if ( p == vg[VG_CHR(minor)]->pv_max) return -ENXIO;
+
+               pe_lock_req.lock = UNLOCK_PE;
+               fsync_dev ( pe_lock_req.data.lv_dev);
+               pe_lock_req.lock = LOCK_PE;
+               break;
+
+            case UNLOCK_PE:
+               pe_lock_req.lock = UNLOCK_PE;
+               pe_lock_req.data.lv_dev = \
+               pe_lock_req.data.pv_dev = \
+               pe_lock_req.data.pv_offset = 0;
+               wake_up ( &lvm_map_wait);
+               break;
+
+            default:
+               return -EINVAL;
+         }
+
+         return 0;
+
+
+      /* remap a logical extent (after moving the physical extent) */
+      case LE_REMAP:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &le_remap_req, arg,
+                               sizeof ( le_remap_req_t)) != 0)
+            return -EFAULT;
+
+         for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+            if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+                 lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name,
+                              le_remap_req.lv_name) == 0) {
+               for ( le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le;
+                     le++) {
+                  if ( vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev ==
+                       le_remap_req.old_dev &&
+                       vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].pe ==
+                       le_remap_req.old_pe) {
+                     vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev =
+                        le_remap_req.new_dev;
+                     vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].pe =
+                        le_remap_req.new_pe;
+                     return 0;
+                  }
+               }
+               return -EINVAL;
+            }
+         }
+
+         return -ENXIO;
+
+
+      /* create a VGDA */
+      case VG_CREATE:
+         return do_vg_create ( minor, arg);
+
+
+      /* remove an inactive VGDA */
+      case VG_REMOVE:
+         return do_vg_remove ( minor);
+
+
+      /* extend a volume group */
+      case VG_EXTEND:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( vg[VG_CHR(minor)]->pv_cur < vg[VG_CHR(minor)]->pv_max) {
+            for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+               if ( vg[VG_CHR(minor)]->pv[p] == NULL) {
+                  if ( ( vg[VG_CHR(minor)]->pv[p] =
+                         kmalloc ( sizeof ( pv_t), GFP_USER)) == NULL) {
+                     printk ( KERN_CRIT
+                              "%s -- VG_EXTEND: kmalloc error PV at line %d\n",
+                              lvm_name, __LINE__);
+                     return -ENOMEM;
+                  }
+                  if ( copy_from_user ( vg[VG_CHR(minor)]->pv[p], arg,
+                                        sizeof ( pv_t)) != 0)
+                     return -EFAULT;
+
+                  vg[VG_CHR(minor)]->pv[p]->pv_status = PV_ACTIVE;
+                  /* We don't need the PE list
+                     in kernel space like LVs pe_t list */
+                  vg[VG_CHR(minor)]->pv[p]->pe = NULL;
+                  vg[VG_CHR(minor)]->pv_cur++;
+                  vg[VG_CHR(minor)]->pv_act++;
+                  vg[VG_CHR(minor)]->pe_total +=
+                     vg[VG_CHR(minor)]->pv[p]->pe_total;
+#ifdef LVM_GET_INODE
+                  /* insert a dummy inode for fs_may_mount */
+                  vg[VG_CHR(minor)]->pv[p]->inode =
+                     lvm_get_inode ( vg[VG_CHR(minor)]->pv[p]->pv_dev);
+#endif
+                  return 0;
+               }
+            }
+         }
+         return -EPERM;
+
+
+      /* reduce a volume group */
+      case VG_REDUCE:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( pv_name, arg, sizeof ( pv_name)) != 0)
+            return -EFAULT;
+
+         for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+            if ( vg[VG_CHR(minor)]->pv[p] != NULL && 
+                 lvm_strcmp ( vg[VG_CHR(minor)]->pv[p]->pv_name,
+                              pv_name) == 0) {
+               if ( vg[VG_CHR(minor)]->pv[p]->lv_cur > 0) return -EPERM;
+               vg[VG_CHR(minor)]->pe_total -=
+                  vg[VG_CHR(minor)]->pv[p]->pe_total;
+               vg[VG_CHR(minor)]->pv_cur--;
+               vg[VG_CHR(minor)]->pv_act--;
+#ifdef DEBUG_VFREE
+               printk ( KERN_DEBUG
+                        "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+#ifdef LVM_GET_INODE
+               lvm_clear_inode ( vg[VG_CHR(minor)]->pv[p]->inode);
+#endif
+               kfree ( vg[VG_CHR(minor)]->pv[p]);
+               /* Make PV pointer array contiguous */
+               for ( ; p < vg[VG_CHR(minor)]->pv_max-1; p++)
+                  vg[VG_CHR(minor)]->pv[p] = vg[VG_CHR(minor)]->pv[p + 1];
+               vg[VG_CHR(minor)]->pv[p + 1] = NULL;
+               return 0;
+            }
+         }
+         return -ENXIO;
+
+
+      /* set/clear extendability flag of volume group */
+      case VG_SET_EXTENDABLE:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &extendable, arg, sizeof ( extendable)) != 0)
+            return -EFAULT;
+
+         if ( extendable == VG_EXTENDABLE ||
+              extendable == ~VG_EXTENDABLE) {
+            if ( extendable == VG_EXTENDABLE)
+               vg[VG_CHR(minor)]->vg_status |= VG_EXTENDABLE;
+            else
+               vg[VG_CHR(minor)]->vg_status &= ~VG_EXTENDABLE;
+         } else return -EINVAL;
+         return 0;
+
+
+      /* get volume group data (only the vg_t struct) */
+      case VG_STATUS:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_to_user ( arg, vg[VG_CHR(minor)], sizeof ( vg_t)) != 0)
+            return -EFAULT;
+
+         return 0;
+
+
+      /* get volume group count */
+      case VG_STATUS_GET_COUNT:
+         if ( copy_to_user ( arg, &vg_count, sizeof ( vg_count)) != 0)
+            return -EFAULT;
+
+         return 0;
+
+
+      /* get volume group count */
+      case VG_STATUS_GET_NAMELIST:
+         for ( l = v = 0; v < ABS_MAX_VG; v++) {
+            if ( vg[v] != NULL) {
+               if ( copy_to_user ( arg + l++ * NAME_LEN,
+                                   vg[v]->vg_name,
+                                   NAME_LEN) != 0)
+                  return -EFAULT;
+            }
+         }
+         return 0;
+
+
+      /* create, remove, extend or reduce a logical volume */
+      case LV_CREATE:
+      case LV_REMOVE:
+      case LV_EXTEND:
+      case LV_REDUCE:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &lv_req, arg, sizeof ( lv_req)) != 0)
+            return -EFAULT;
+
+         if ( command != LV_REMOVE) {
+            if ( copy_from_user ( &lv, lv_req.lv, sizeof ( lv_t)) != 0)
+               return -EFAULT;
+         }
+
+         switch ( command) {
+            case LV_CREATE:
+               return do_lv_create ( minor, lv_req.lv_name, &lv);
+
+            case LV_REMOVE:
+               return do_lv_remove ( minor, lv_req.lv_name, -1);
+
+            case LV_EXTEND:
+            case LV_REDUCE:
+               return do_lv_extend_reduce ( minor, lv_req.lv_name, &lv);
+         }
+
+
+      /* get status of a logical volume by name */
+      case LV_STATUS_BYNAME:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &lv_status_byname_req, arg,
+                               sizeof ( lv_status_byname_req_t)) != 0)
+            return -EFAULT;
+
+         if ( lv_status_byname_req.lv == NULL) return -EINVAL;
+         if ( copy_from_user ( &lv, lv_status_byname_req.lv,
+                               sizeof ( lv_t)) != 0)
+            return -EFAULT;
+
+         for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+            if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+                 lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name,
+                              lv_status_byname_req.lv_name) == 0) {
+               if ( copy_to_user ( lv_status_byname_req.lv,
+                                   vg[VG_CHR(minor)]->lv[l],
+                                   sizeof ( lv_t)) != 0)
+                  return -EFAULT;
+
+               if ( lv.lv_current_pe != NULL) {
+                  size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le *
+                         sizeof ( pe_t);
+                  if ( copy_to_user ( lv.lv_current_pe,
+                                      vg[VG_CHR(minor)]->lv[l]->lv_current_pe,
+                                      size) != 0)
+                     return -EFAULT;
+               }
+               return 0;
+            }
+         }
+         return -ENXIO;
+
+
+      /* get status of a logical volume by index */
+      case LV_STATUS_BYINDEX:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &lv_status_byindex_req, arg,
+                               sizeof ( lv_status_byindex_req)) != 0)
+            return -EFAULT;
+
+         if ( ( lvp = lv_status_byindex_req.lv) == NULL) return -EINVAL;
+         l = lv_status_byindex_req.lv_index;
+         if ( vg[VG_CHR(minor)]->lv[l] == NULL) return -ENXIO;
+
+         if ( copy_from_user ( &lv, lvp, sizeof ( lv_t)) != 0)
+            return -EFAULT;
+
+         if ( copy_to_user ( lvp, vg[VG_CHR(minor)]->lv[l],
+                             sizeof ( lv_t)) != 0)
+            return -EFAULT;
+
+         if ( lv.lv_current_pe != NULL) {
+            size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le * sizeof ( pe_t);
+            if ( copy_to_user ( lv.lv_current_pe,
+                                vg[VG_CHR(minor)]->lv[l]->lv_current_pe,
+                                size) != 0)
+               return -EFAULT;
+         }
+         return 0;
+
+
+      /* change a physical volume */
+      case PV_CHANGE:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &pv_change_req, arg,
+                               sizeof ( pv_change_req)) != 0)
+            return -EFAULT;
+
+         for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+            if ( vg[VG_CHR(minor)]->pv[p] != NULL &&
+                 lvm_strcmp ( vg[VG_CHR(minor)]->pv[p]->pv_name,
+                              pv_change_req.pv_name) == 0) {
+#ifdef LVM_GET_INODE
+               inode_sav = vg[VG_CHR(minor)]->pv[p]->inode;
+#endif
+               if ( copy_from_user ( vg[VG_CHR(minor)]->pv[p],
+                                     pv_change_req.pv,
+                                     sizeof ( pv_t)) != 0)
+                  return -EFAULT;
+
+               /* We don't need the PE list
+                  in kernel space as with LVs pe_t list */
+               vg[VG_CHR(minor)]->pv[p]->pe = NULL;
+#ifdef LVM_GET_INODE
+               vg[VG_CHR(minor)]->pv[p]->inode = inode_sav;
+#endif
+               return 0;
+            }
+         }
+         return -ENXIO;
+
+
+      /* get physical volume data (pv_t structure only) */
+      case PV_STATUS:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &pv_status_req, arg,
+                               sizeof ( pv_status_req)) != 0)
+            return -EFAULT;
+
+         for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+            if ( vg[VG_CHR(minor)]->pv[p] != NULL) {
+               if ( lvm_strcmp ( vg[VG_CHR(minor)]->pv[p]->pv_name,
+                                 pv_status_req.pv_name) == 0) {
+                  if ( copy_to_user ( pv_status_req.pv,
+                                      vg[VG_CHR(minor)]->pv[p],
+                                      sizeof ( pv_t)) != 0)
+                     return -EFAULT;
+                  return 0;
+               }
+            }
+         }
+         return -ENXIO;
+
+
+      /* physical volume buffer flush/invalidate */
+      case PV_FLUSH:
+         if ( copy_from_user ( &pv_flush_req, arg, sizeof ( pv_flush_req)) != 0)
+            return -EFAULT;
+
+         for ( v = 0; v < ABS_MAX_VG; v++) {
+            if ( vg[v] == NULL) continue;
+            for ( p = 0; p < vg[v]->pv_max; p++) {
+               if ( vg[v]->pv[p] != NULL &&
+                    lvm_strcmp ( vg[v]->pv[p]->pv_name,
+                                 pv_flush_req.pv_name) == 0) {
+                  fsync_dev ( vg[v]->pv[p]->pv_dev);
+                  invalidate_buffers ( vg[v]->pv[p]->pv_dev);
+                  return 0;
+               }
+            }
+         }
+         return 0;
+
+
+      default:
+         printk ( KERN_WARNING
+                  "%s -- lvm_chr_ioctl: unknown command %x\n",
+                  lvm_name, command);
+         return -EINVAL;
+   }
+
+   return 0;
+} /* lvm_chr_ioctl */
+
+
+/*
+ * character device close routine
+ */
+static int lvm_chr_release ( struct inode *inode, struct file *file)
+{
+#ifdef DEBUG
+   int minor = MINOR ( inode->i_rdev);
+   printk ( KERN_DEBUG
+            "%s -- lvm_chr_release   VG#: %d\n", lvm_name, VG_CHR(minor));
+#endif
+
+#ifdef MODULE
+   if ( GET_USE_COUNT ( &__this_module) > 0) MOD_DEC_USE_COUNT;
+#endif
+
+#ifdef LVM_TOTAL_RESET
+   if ( lvm_reset_spindown > 0) {
+      lvm_reset_spindown = 0;
+      lvm_chr_open_count = 1;
+   }
+#endif
+
+   if ( lvm_chr_open_count > 0) lvm_chr_open_count--;
+   if ( lock == current->pid) {
+      lock = 0; /* release lock */
+      wake_up_interruptible ( &lvm_wait);
+   }
+
+   return 0;
+} /* lvm_chr_release () */
+
+
+
+/********************************************************************
+ *
+ * Block device functions
+ *
+ ********************************************************************/
+
+/*
+ * block device open routine
+ */
+static int lvm_blk_open ( struct inode *inode, struct file *file) {
+   int minor = MINOR ( inode->i_rdev);
+
+#ifdef DEBUG_LVM_BLK_OPEN
+   printk ( KERN_DEBUG
+            "%s -- lvm_blk_open MINOR: %d  VG#: %d  LV#: %d  mode: 0x%X\n",
+            lvm_name, minor, VG_BLK(minor), LV_BLK(minor), file->f_mode);
+#endif
+
+#ifdef LVM_TOTAL_RESET
+   if ( lvm_reset_spindown > 0) return -EPERM;
+#endif
+
+   if ( vg[VG_BLK(minor)] != NULL &&
+        ( vg[VG_BLK(minor)]->vg_status & VG_ACTIVE) &&
+        vg[VG_BLK(minor)]->lv[LV_BLK(minor)] != NULL &&
+        LV_BLK(minor) >= 0 &&
+        LV_BLK(minor) < vg[VG_BLK(minor)]->lv_max) {
+
+      /* Check parallel LV spindown (LV remove) */
+      if ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_status & LV_SPINDOWN)
+         return -EPERM;
+
+      /* Check inactive LV and open for read/write */
+      if ( file->f_mode & O_RDWR) {
+         if ( ! ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_status & LV_ACTIVE))
+            return -EPERM;
+         if ( ! ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_access & LV_WRITE))
+            return -EACCES;
+      }
+
+      if ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open == 0)
+         vg[VG_BLK(minor)]->lv_open++;
+      vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open++;
+
+#ifdef MODULE
+      MOD_INC_USE_COUNT;
+#endif
+
+#ifdef DEBUG_LVM_BLK_OPEN
+      printk ( KERN_DEBUG
+               "%s -- lvm_blk_open MINOR: %d  VG#: %d  LV#: %d  size: %d\n",
+               lvm_name, minor, VG_BLK(minor), LV_BLK(minor),
+               vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_size);
+#endif
+
+      return 0;
+   }
+
+   return -ENXIO;
+} /* lvm_blk_open () */
+
+
+/*
+ * block device i/o-control routine
+ */
+static int lvm_blk_ioctl (struct inode *inode, struct file *file,
+                          uint command, ulong a) {
+   int minor = MINOR ( inode->i_rdev);
+   void *arg = ( void*) a;
+   struct hd_geometry *hd = ( struct hd_geometry *) a;
+
+#ifdef DEBUG_IOCTL
+   printk ( KERN_DEBUG
+            "%s -- lvm_blk_ioctl MINOR: %d  command: 0x%X  arg: %X  "
+            "VG#: %dl  LV#: %d\n",
+            lvm_name, minor, command, ( ulong) arg,
+            VG_BLK(minor), LV_BLK(minor));
+#endif
+
+   switch ( command) {
+      /* return device size */
+      case BLKGETSIZE:
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- BLKGETSIZE: %u\n",
+                  lvm_name, vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_size);
+#endif
+         copy_to_user ( ( long*) arg, &vg[VG_BLK(minor)]->\
+                                      lv[LV_BLK(minor)]->lv_size,
+                        sizeof ( vg[VG_BLK(minor)]->\
+                                 lv[LV_BLK(minor)]->lv_size));
+         break;
+
+
+      /* flush buffer cache */
+      case BLKFLSBUF:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- BLKFLSBUF\n", lvm_name);
+#endif
+         fsync_dev ( inode->i_rdev);
+	 invalidate_buffers(inode->i_rdev);
+         break;
+
+
+      /* set read ahead for block device */
+      case BLKRASET:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- BLKRASET: %d sectors for %02X:%02X\n",
+                  lvm_name, ( long) arg, MAJOR( inode->i_rdev), minor);
+#endif
+         if ( ( long) arg < LVM_MIN_READ_AHEAD ||
+              ( long) arg > LVM_MAX_READ_AHEAD) return -EINVAL;
+         read_ahead[MAJOR_NR] =
+	 vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_read_ahead = ( long) arg;
+         break;
+
+
+      /* get current read ahead setting */
+      case BLKRAGET:
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- BLKRAGET\n", lvm_name);
+#endif
+         copy_to_user ( ( long*) arg,
+                        &vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_read_ahead,
+                        sizeof ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->\
+                                 lv_read_ahead));
+         break;
+
+
+      /* get disk geometry */
+      case HDIO_GETGEO:
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- HDIO_GETGEO\n", lvm_name);
+#endif
+         if ( hd == NULL) return -EINVAL;
+         {
+            unsigned char heads = 64;
+            unsigned char sectors = 32;
+            long start = 0;
+            short cylinders = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_size /
+                              heads / sectors;
+
+            if ( copy_to_user ( ( char*) &hd->heads, &heads,
+                                sizeof ( heads)) != 0 ||
+                 copy_to_user ( ( char*) &hd->sectors, &sectors,
+                                sizeof ( sectors)) != 0 ||
+                 copy_to_user ( ( short*) &hd->cylinders,
+                                &cylinders, sizeof ( cylinders)) != 0 ||
+                 copy_to_user ( ( long*) &hd->start, &start,
+                                sizeof ( start)) != 0)
+               return -EFAULT;
+         }
+
+#ifdef DEBUG_IOCTL
+            printk ( KERN_DEBUG
+                     "%s -- lvm_blk_ioctl -- cylinders: %d\n",
+                     lvm_name, vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->\
+                               lv_size / heads / sectors);
+#endif
+         break;
+
+
+      /* set access flags of a logical volume */
+      case LV_SET_ACCESS:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+         vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_access = ( ulong) arg;
+         break;
+
+
+      /* set status flags of a logical volume */
+      case LV_SET_STATUS:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+         if ( ! ( ( ulong) arg & LV_ACTIVE) &&
+              vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open > 1) return -EPERM;
+         vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_status = ( ulong) arg;
+         break;
+
+
+      /* set allocation flags of a logical volume */
+      case LV_SET_ALLOCATION:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+         vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_allocation = ( ulong) arg;
+         break;
+
+
+      default:
+         printk ( KERN_WARNING
+                  "%s -- lvm_blk_ioctl: unknown command %d\n",
+                  lvm_name, command);
+         return -EINVAL;
+   }
+
+   return 0;
+} /* lvm_blk_ioctl () */
+
+
+/*
+ * block device close routine
+ */
+static int lvm_blk_release ( struct inode *inode, struct file *file)
+{
+   int minor = MINOR ( inode->i_rdev);
+
+#ifdef DEBUG
+   printk ( KERN_DEBUG
+            "%s -- lvm_blk_release MINOR: %d  VG#: %d  LV#: %d\n",
+            lvm_name, minor, VG_BLK(minor), LV_BLK(minor));
+#endif
+
+   sync_dev ( inode->i_rdev);
+   if ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open == 1)
+      vg[VG_BLK(minor)]->lv_open--;
+   vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open--;
+
+#ifdef MODULE
+   MOD_DEC_USE_COUNT;
+#endif
+
+   return 0;
+} /* lvm_blk_release () */
+
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+/*
+ * Support function /proc-Filesystem
+ */
+#define  LVM_PROC_BUF   ( i == 0 ? dummy_buf : &buf[sz])
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 25)
+static int lvm_proc_get_info ( char *page, char **start, off_t pos, int count)
+#else
+static int lvm_proc_get_info ( char *page, char **start, off_t pos,
+                               int count, int whence)
+#endif
+{
+   int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter,
+       lv_open_total, pe_t_bytes, lv_block_exception_t_bytes, seconds;
+   static off_t sz;
+   off_t sz_last;
+   char allocation_flag, inactive_flag, rw_flag, stripes_flag;
+   char *lv_name = NULL;
+   static char *buf = NULL;
+   static char dummy_buf[160]; /* sized for 2 lines */
+
+#ifdef DEBUG_LVM_PROC_GET_INFO
+   printk ( KERN_DEBUG
+            "%s - lvm_proc_get_info CALLED  pos: %lu  count: %d  whence: %d\n",
+            lvm_name, pos, count, whence);
+#endif
+
+   if ( pos == 0 || buf == NULL) {
+      sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \
+      lv_open_total = pe_t_bytes = lv_block_exception_t_bytes = 0;
+   
+      /* search for activity */
+      for ( v = 0; v < ABS_MAX_VG; v++) {
+         if ( vg[v] != NULL) {
+            vg_counter++;
+            pv_counter += vg[v]->pv_cur;
+            lv_counter += vg[v]->lv_cur;
+            if ( vg[v]->lv_cur > 0) {
+               for ( l = 0; l < vg[v]->lv_max; l++) {
+                  if ( vg[v]->lv[l] != NULL) {
+                     pe_t_bytes += vg[v]->lv[l]->lv_allocated_le;
+                     if ( vg[v]->lv[l]->lv_block_exception != NULL) {
+                        lv_block_exception_t_bytes +=
+                           vg[v]->lv[l]->lv_remap_end;
+                     }
+                     if ( vg[v]->lv[l]->lv_open > 0) {
+                        lv_open_counter++;
+                        lv_open_total += vg[v]->lv[l]->lv_open;
+                     }
+                  }
+               }
+            }
+         }
+      }
+      pe_t_bytes *= sizeof ( pe_t);
+      lv_block_exception_t_bytes *= sizeof ( lv_block_exception_t);
+   
+      if ( buf != NULL) {
+#ifdef DEBUG_VFREE
+         printk ( KERN_DEBUG
+                  "%s -- vfree %d\n", lvm_name, __LINE__);
+#endif
+         vfree ( buf);
+         buf = NULL;
+      }
+
+      /* 2 times: first to get size to allocate buffer,
+         2nd to fill the vmalloced buffer */
+      for ( i = 0; i < 2; i++) {
+         sz = 0;
+         sz += sprintf ( LVM_PROC_BUF,
+                         "LVM "
+#ifdef MODULE
+                         "module"
+#else
+                         "driver"
+#endif
+                         " %s\n\n"
+                         "Total:  %d VG%s  %d PV%s  %d LV%s ",
+                         lvm_short_version,
+                         vg_counter, vg_counter == 1 ? "" : "s",
+                         pv_counter, pv_counter == 1 ? "" : "s",
+                         lv_counter, lv_counter == 1 ? "" : "s");
+         sz += sprintf ( LVM_PROC_BUF,
+                         "(%d LV%s open",
+                         lv_open_counter,
+                         lv_open_counter == 1 ? "" : "s");
+         if ( lv_open_total > 0) sz += sprintf ( LVM_PROC_BUF,
+                                                 " %d times)\n",
+                                                 lv_open_total);
+         else                    sz += sprintf ( LVM_PROC_BUF, ")");
+         sz += sprintf ( LVM_PROC_BUF,
+                         "\nGlobal: %lu bytes vmalloced   IOP version: %d   ",
+                         vg_counter * sizeof ( vg_t) +
+                         pv_counter * sizeof ( pv_t) +
+                         lv_counter * sizeof ( lv_t) +
+                         pe_t_bytes + lv_block_exception_t_bytes + sz_last,
+                         lvm_iop_version);
+
+         seconds = CURRENT_TIME - loadtime;
+         if ( seconds < 0) loadtime = CURRENT_TIME + seconds;
+         if ( seconds / 86400 > 0) {
+            sz += sprintf ( LVM_PROC_BUF, "%d day%s ",
+                                          seconds / 86400,
+                                          seconds / 86400 == 0 ||
+                                          seconds / 86400 > 1 ? "s": "");
+         }
+         sz += sprintf ( LVM_PROC_BUF, "%d:%02d:%02d active\n",
+                                       ( seconds % 86400) / 3600,
+                                       ( seconds % 3600) / 60,
+                                       seconds % 60);
+
+         if ( vg_counter > 0) {
+            for ( v = 0; v < ABS_MAX_VG; v++) {
+               /* volume group */
+               if ( vg[v] != NULL) {
+                  inactive_flag = ' ';
+                  if ( ! ( vg[v]->vg_status & VG_ACTIVE))
+                     inactive_flag = 'I';
+                  sz += sprintf ( LVM_PROC_BUF,
+                                  "\nVG: %c%s  [%d PV, %d LV/%d open] "
+                                  " PE Size: %d KB\n"
+                                  "  Usage [KB/PE]: %d /%d total  "
+                                  "%d /%d used  %d /%d free",
+                                  inactive_flag,
+                                  vg[v]->vg_name,
+                                  vg[v]->pv_cur,
+                                  vg[v]->lv_cur,
+                                  vg[v]->lv_open,
+                                  vg[v]->pe_size >> 1,
+                                  vg[v]->pe_size * vg[v]->pe_total >> 1,
+                                  vg[v]->pe_total,
+                                  vg[v]->pe_allocated * vg[v]->pe_size >> 1,
+                                  vg[v]->pe_allocated,
+                                  ( vg[v]->pe_total - vg[v]->pe_allocated) *
+                                  vg[v]->pe_size >> 1,
+                                  vg[v]->pe_total - vg[v]->pe_allocated);
+
+                  /* physical volumes */
+                  sz += sprintf ( LVM_PROC_BUF,
+                                  "\n  PV%s ",
+                                  vg[v]->pv_cur == 1 ? ": " : "s:");
+                  c = 0;
+                  for ( p = 0; p < vg[v]->pv_max; p++) {
+                     if ( vg[v]->pv[p] != NULL) {
+                        inactive_flag = 'A';
+                        if ( ! ( vg[v]->pv[p]->pv_status & PV_ACTIVE))
+                           inactive_flag = 'I';
+                        allocation_flag = 'A';
+                        if ( ! ( vg[v]->pv[p]->pv_allocatable & PV_ALLOCATABLE))
+                           allocation_flag = 'N';
+                        sz += sprintf ( LVM_PROC_BUF,
+                                        "[%c%c] %-21s %8d /%-6d  "
+                                        "%8d /%-6d  %8d /%-6d",
+                                        inactive_flag,
+                                        allocation_flag,
+                                        vg[v]->pv[p]->pv_name,
+                                        vg[v]->pv[p]->pe_total *
+                                        vg[v]->pv[p]->pe_size >> 1,
+                                        vg[v]->pv[p]->pe_total,
+                                        vg[v]->pv[p]->pe_allocated *
+                                        vg[v]->pv[p]->pe_size >> 1,
+                                        vg[v]->pv[p]->pe_allocated,
+                                        ( vg[v]->pv[p]->pe_total -
+                                          vg[v]->pv[p]->pe_allocated) *
+                                        vg[v]->pv[p]->pe_size >> 1,
+                                        vg[v]->pv[p]->pe_total -
+                                        vg[v]->pv[p]->pe_allocated);
+                        c++;
+                        if ( c < vg[v]->pv_cur) sz += sprintf ( LVM_PROC_BUF,
+                                                                "\n       ");
+                     }
+                  }
+
+                  /* logical volumes */
+                  sz += sprintf ( LVM_PROC_BUF,
+                                  "\n    LV%s ",
+                                  vg[v]->lv_cur == 1 ? ": " : "s:");
+                  c = 0;
+                  for ( l = 0; l < vg[v]->lv_max; l++) {
+                     if ( vg[v]->lv[l] != NULL) {
+                        inactive_flag = 'A';
+                        if ( ! ( vg[v]->lv[l]->lv_status & LV_ACTIVE))
+                           inactive_flag = 'I';
+                        rw_flag = 'R';
+                        if ( vg[v]->lv[l]->lv_access & LV_WRITE) rw_flag = 'W';
+                        allocation_flag = 'D';
+                        if ( vg[v]->lv[l]->lv_allocation & LV_CONTIGUOUS)
+                           allocation_flag = 'C';
+                        stripes_flag = 'L';
+                        if ( vg[v]->lv[l]->lv_stripes > 1) stripes_flag = 'S';
+                        sz += sprintf ( LVM_PROC_BUF,
+                                        "[%c%c%c%c",
+                                        inactive_flag,
+                                        rw_flag,
+                                        allocation_flag,
+                                        stripes_flag);
+                        if ( vg[v]->lv[l]->lv_stripes > 1)
+                           sz += sprintf ( LVM_PROC_BUF, "%-2d",
+                                           vg[v]->lv[l]->lv_stripes);
+                        else
+                           sz += sprintf ( LVM_PROC_BUF, "  ");
+                        lv_name = lvm_strrchr ( vg[v]->lv[l]->lv_name, '/');
+                        if ( lv_name != NULL) lv_name++;
+                        else lv_name = vg[v]->lv[l]->lv_name;
+                        sz += sprintf ( LVM_PROC_BUF, "] %-25s", lv_name);
+                        if ( lvm_strlen ( lv_name) > 25)
+                           sz += sprintf ( LVM_PROC_BUF,
+                                           "\n                              ");
+                        sz += sprintf ( LVM_PROC_BUF, "%9d /%-6d   ",
+                                        vg[v]->lv[l]->lv_size >> 1,
+                                        vg[v]->lv[l]->lv_size / vg[v]->pe_size);
+
+                        if ( vg[v]->lv[l]->lv_open == 0)
+                           sz += sprintf ( LVM_PROC_BUF, "close");
+                        else
+                           sz += sprintf ( LVM_PROC_BUF, "%dx open",
+                                           vg[v]->lv[l]->lv_open);
+                        c++;
+                        if ( c < vg[v]->lv_cur) sz += sprintf ( LVM_PROC_BUF,
+                                                                "\n         ");
+                     }
+                  }
+                  if ( vg[v]->lv_cur == 0)
+                     sz += sprintf ( LVM_PROC_BUF, "none");
+                  sz += sprintf ( LVM_PROC_BUF, "\n");
+               }
+            }
+         }
+
+         if ( buf == NULL) {
+            if ( ( buf = vmalloc ( sz)) == NULL) {
+               sz = 0;
+               return sprintf ( page, "%s - vmalloc error at line %d\n",
+                                      lvm_name, __LINE__);
+            }
+         }
+         sz_last = sz;
+      }
+   }
+
+   if ( pos > sz - 1) {
+      vfree ( buf);
+      buf = NULL;
+      return 0;
+   }
+
+   *start = &buf[pos];
+   if ( sz - pos < count) return sz - pos;
+   else                   return count;
+} /* lvm_proc_get_info () */
+#endif /* #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS */
+
+
+/*
+ * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
+ * (see init_module/lvm_init)
+ */
+static int lvm_map ( struct buffer_head *bh, int rw) {
+   int minor = MINOR ( bh->b_dev);
+   int ret = 0;
+   ulong index;
+   ulong size = bh->b_size >> 9;
+   ulong rsector_tmp = bh->b_blocknr * size;
+   ulong rsector_sav;
+   kdev_t rdev_tmp = bh->b_dev;
+   kdev_t rdev_sav;
+   lv_t *lv = vg[VG_BLK(minor)]->lv[LV_BLK(minor)];
+   unsigned long pe_start;
+   
+
+   if ( ! ( lv->lv_status & LV_ACTIVE)) {
+      printk ( KERN_ALERT
+               "%s - lvm_map: ll_rw_blk for inactive LV %s\n",
+               lvm_name, lv->lv_name);
+      return -1;
+   }
+
+/*
+if ( lv->lv_access & LV_SNAPSHOT)
+printk ( "%s -- %02d:%02d  block: %lu  rw: %d\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), bh->b_blocknr, rw);
+*/
+
+   /* take care of snapshot chunk writes before
+      check for writable logical volume */
+   if ( ( lv->lv_access & LV_SNAPSHOT) &&
+        MAJOR ( bh->b_dev) != 0 &&
+        MAJOR ( bh->b_dev) != MAJOR_NR &&
+#ifdef WRITEA
+        ( rw == WRITEA || rw == WRITE))
+#else
+        rw == WRITE)
+#endif
+   {
+/*
+printk ( "%s -- doing snapshot write for %02d:%02d[%02d:%02d]  b_blocknr: %lu  b_rsector: %lu\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), MAJOR ( bh->b_dev), MINOR ( bh->b_dev), bh->b_blocknr, bh->b_rsector);
+*/
+      return 0;
+   }
+
+#ifdef WRITEA
+   if ( ( rw == WRITE || rw == WRITEA) &&
+#else
+   if ( rw == WRITE &&
+#endif
+        ! ( lv->lv_access & LV_WRITE)) {
+      printk ( KERN_CRIT
+               "%s - lvm_map: ll_rw_blk write for readonly LV %s\n",
+               lvm_name, lv->lv_name);
+      return -1;
+   }
+
+
+#ifdef DEBUG_MAP
+   printk ( KERN_DEBUG
+            "%s - lvm_map minor:%d  *rdev: %02d:%02d  *rsector: %lu  "
+            "size:%lu\n",
+            lvm_name, minor,
+            MAJOR ( rdev_tmp),
+            MINOR ( rdev_tmp),
+            rsector_tmp, size);
+#endif
+
+   if ( rsector_tmp + size > lv->lv_size) {
+      printk ( KERN_ALERT
+               "%s - lvm_map *rsector: %lu or size: %lu wrong for"
+               " minor: %2d\n", lvm_name, rsector_tmp, size, minor);
+      return -1;
+   }
+
+   rsector_sav = rsector_tmp;
+   rdev_sav    = rdev_tmp;
+
+lvm_second_remap:
+   /* linear mapping */
+   if ( lv->lv_stripes < 2) {
+      index = rsector_tmp / vg[VG_BLK(minor)]->pe_size; /* get the index */
+      pe_start = lv->lv_current_pe[index].pe;
+      rsector_tmp = lv->lv_current_pe[index].pe +
+                    ( rsector_tmp % vg[VG_BLK(minor)]->pe_size);
+      rdev_tmp    = lv->lv_current_pe[index].dev;
+
+#ifdef DEBUG_MAP
+      printk ( KERN_DEBUG
+               "lv_current_pe[%ld].pe: %d  rdev: %02d:%02d  rsector:%ld\n",
+               index,
+               lv->lv_current_pe[index].pe,
+               MAJOR ( rdev_tmp),
+               MINOR ( rdev_tmp),
+               rsector_tmp);
+#endif
+
+   /* striped mapping */
+   } else {
+      ulong stripe_index;
+      ulong stripe_length;
+
+      stripe_length = vg[VG_BLK(minor)]->pe_size * lv->lv_stripes;
+      stripe_index = ( rsector_tmp % stripe_length) / lv->lv_stripesize;
+      index = rsector_tmp / stripe_length +
+              ( stripe_index % lv->lv_stripes) *
+              ( lv->lv_allocated_le / lv->lv_stripes);
+      pe_start = lv->lv_current_pe[index].pe;
+      rsector_tmp = lv->lv_current_pe[index].pe +
+                    ( rsector_tmp % stripe_length) -
+                    ( stripe_index % lv->lv_stripes) * lv->lv_stripesize -
+                    stripe_index / lv->lv_stripes *
+                    ( lv->lv_stripes - 1) * lv->lv_stripesize;
+      rdev_tmp = lv->lv_current_pe[index].dev;
+
+#ifdef DEBUG_MAP
+      printk(KERN_DEBUG
+	     "lv_current_pe[%ld].pe: %d  rdev: %02d:%02d  rsector:%ld\n"
+	     "stripe_length: %ld  stripe_index: %ld\n",
+	     index,
+	     lv->lv_current_pe[index].pe,
+	     MAJOR ( rdev_tmp),
+	     MINOR ( rdev_tmp),
+	     rsector_tmp,
+	     stripe_length,
+	     stripe_index);
+#endif
+   }
+
+   /* handle physical extents on the move */
+   if ( pe_lock_req.lock == LOCK_PE) {
+      if ( rdev_tmp == pe_lock_req.data.pv_dev &&
+           rsector_tmp >= pe_lock_req.data.pv_offset &&
+           rsector_tmp < ( pe_lock_req.data.pv_offset +
+                        vg[VG_BLK(minor)]->pe_size)) {
+         sleep_on ( &lvm_map_wait);
+         rsector_tmp = rsector_sav;
+         rdev_tmp    = rdev_sav;
+         goto lvm_second_remap;
+      }
+   }
+
+   /* statistic */
+#ifdef WRITEA
+   if ( rw == WRITE || rw == WRITEA)
+#else
+   if ( rw == WRITE)
+#endif
+      lv->lv_current_pe[index].writes++;
+   else
+      lv->lv_current_pe[index].reads++;
+
+   /* snapshot volume exception handling on physical device address base */
+   if ( lv->lv_access & ( LV_SNAPSHOT | LV_SNAPSHOT_ORG)) {
+      /* original logical volume */
+      if ( lv->lv_access & LV_SNAPSHOT_ORG) {
+#ifdef WRITEA
+         if ( rw == WRITE || rw == WRITEA)
+#else
+         if ( rw == WRITE)
+#endif
+         {
+            lv_t *lv_ptr;
+
+            /* start with first snapshot and loop thrugh all of them */
+            for ( lv_ptr = lv->lv_snapshot_next;
+                  lv_ptr != NULL;
+                  lv_ptr = lv_ptr->lv_snapshot_next) {
+	       down(&lv_ptr->lv_snapshot_sem);
+               /* do we still have exception storage for this snapshot free? */
+               if ( lv_ptr->lv_block_exception != NULL) {
+			kdev_t __dev;
+			unsigned long __sector;
+
+			__dev = rdev_tmp;
+			__sector = rsector_tmp;
+			if (!lvm_snapshot_remap_block(&rdev_tmp,
+						      &rsector_tmp,
+						      pe_start,
+						      lv_ptr))
+				/* create a new mapping */
+				ret = lvm_snapshot_COW(rdev_tmp,
+						       rsector_tmp,
+						       pe_start,
+						       rsector_sav,
+						       lv_ptr);
+			rdev_tmp    = __dev;
+			rsector_tmp = __sector;
+               }
+	       up(&lv_ptr->lv_snapshot_sem);
+            }
+         }
+      } else {
+         /* remap snapshot logical volume */
+	 down(&lv->lv_snapshot_sem);
+         if ( lv->lv_block_exception != NULL)
+            lvm_snapshot_remap_block ( &rdev_tmp, &rsector_tmp, pe_start, lv);
+	 up(&lv->lv_snapshot_sem);
+      }
+   }
+
+   bh->b_rdev    = rdev_tmp;
+   bh->b_rsector = rsector_tmp;
+
+   return ret;
+} /* lvm_map () */
+
+
+/*
+ * lvm_map snapshot logical volume support functions
+ */
+
+/*
+ * end lvm_map snapshot logical volume support functions
+ */
+
+
+/*
+ * internal support functions
+ */
+
+#ifdef LVM_HD_NAME
+/*
+ * generate "hard disk" name
+ */
+void lvm_hd_name ( char *buf, int minor) {
+   int len = 0;
+
+   if ( vg[VG_BLK(minor)] == NULL ||
+        vg[VG_BLK(minor)]->lv[LV_BLK(minor)] == NULL) return;
+   len = lvm_strlen ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_name) - 5;
+   lvm_memcpy ( buf, &vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_name[5], len);
+   buf[len] = 0;
+   return;
+}
+#endif
+
+
+/*
+ * this one never should be called...
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+static void lvm_dummy_device_request ( request_queue_t *t)
+#else
+static void lvm_dummy_device_request ( void)
+#endif
+{
+  printk ( KERN_EMERG
+           "%s -- oops, got lvm request for %02d:%02d [sector: %lu]\n",
+           lvm_name,
+           MAJOR ( CURRENT->rq_dev),
+           MINOR ( CURRENT->rq_dev),
+           CURRENT->sector);
+  return;
+}
+
+
+/*
+ * character device support function VGDA create
+ */
+int do_vg_create ( int minor, void *arg) {
+   int snaporg_minor = 0;
+   ulong  l, p;
+   lv_t lv;
+   vg_t *vg_ptr;
+
+   if ( vg[VG_CHR(minor)] != NULL) return -EPERM;
+
+   if ( ( vg_ptr = kmalloc ( sizeof ( vg_t), GFP_USER)) == NULL) {
+      printk ( KERN_CRIT
+               "%s -- VG_CREATE: kmalloc error VG at line %d\n",
+               lvm_name, __LINE__);
+      return -ENOMEM;
+   }
+
+   /* get the volume group structure */
+   if ( copy_from_user ( vg_ptr, arg, sizeof ( vg_t)) != 0) {
+      kfree ( vg_ptr);
+      return -EFAULT;
+   }
+
+   /* we are not that active so far... */
+   vg_ptr->vg_status &= ~VG_ACTIVE;
+   vg[VG_CHR(minor)] = vg_ptr;
+
+   vg[VG_CHR(minor)]->pe_allocated = 0;
+   if ( vg[VG_CHR(minor)]->pv_max > ABS_MAX_PV) {
+      printk ( KERN_WARNING
+               "%s -- Can't activate VG: ABS_MAX_PV too small\n",
+               lvm_name);
+      kfree ( vg[VG_CHR(minor)]);
+      vg[VG_CHR(minor)] = NULL;
+      return -EPERM;
+   }
+   if ( vg[VG_CHR(minor)]->lv_max > ABS_MAX_LV) {
+      printk ( KERN_WARNING
+               "%s -- Can't activate VG: ABS_MAX_LV too small for %u\n",
+               lvm_name, vg[VG_CHR(minor)]->lv_max);
+      kfree ( vg[VG_CHR(minor)]);
+      vg[VG_CHR(minor)] = NULL;
+      return -EPERM;
+   }
+
+   /* get the physical volume structures */
+   vg[VG_CHR(minor)]->pv_act = vg[VG_CHR(minor)]->pv_cur = 0; 
+   for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+      /* user space address */
+      if ( ( pvp = vg[VG_CHR(minor)]->pv[p]) != NULL) {
+         vg[VG_CHR(minor)]->pv[p] = kmalloc ( sizeof ( pv_t), GFP_USER);
+         if ( vg[VG_CHR(minor)]->pv[p] == NULL) {
+            printk ( KERN_CRIT
+                     "%s -- VG_CREATE: kmalloc error PV at line %d\n",
+                     lvm_name, __LINE__);
+            do_vg_remove ( minor);
+            return -ENOMEM;
+         }
+         if ( copy_from_user ( vg[VG_CHR(minor)]->pv[p], pvp,
+                               sizeof ( pv_t)) != 0) {
+            do_vg_remove ( minor);
+            return -EFAULT;
+         }
+
+         /* We don't need the PE list
+            in kernel space as with LVs pe_t list (see below) */
+         vg[VG_CHR(minor)]->pv[p]->pe = NULL;
+         vg[VG_CHR(minor)]->pv[p]->pe_allocated = 0;
+         vg[VG_CHR(minor)]->pv[p]->pv_status = PV_ACTIVE;
+         vg[VG_CHR(minor)]->pv_act++;
+         vg[VG_CHR(minor)]->pv_cur++;
+
+#ifdef LVM_GET_INODE
+         /* insert a dummy inode for fs_may_mount */
+         vg[VG_CHR(minor)]->pv[p]->inode =
+            lvm_get_inode ( vg[VG_CHR(minor)]->pv[p]->pv_dev);
+#endif
+      }
+   }
+
+   /* get the logical volume structures */
+   vg[VG_CHR(minor)]->lv_cur = 0;
+   for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+      /* user space address */
+      if ( ( lvp = vg[VG_CHR(minor)]->lv[l]) != NULL) {
+         if ( copy_from_user ( &lv, lvp, sizeof ( lv_t)) != 0) {
+            do_vg_remove ( minor);
+            return -EFAULT;
+         }
+         vg[VG_CHR(minor)]->lv[l] = NULL;
+	 {
+		 int err;
+
+		 err = do_lv_create(minor, lv.lv_name, &lv);
+		 if (err)
+		 {
+			 do_vg_remove(minor);
+			 return err;
+		 }
+         }
+      }
+   }
+
+   /* Second path to correct snapshot logical volumes which are not
+      in place during first path above */
+   for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+      if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+           vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) {
+         snaporg_minor = vg[VG_CHR(minor)]->lv[l]->lv_snapshot_minor;
+         if ( vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)] != NULL) {
+            /* get pointer to original logical volume */
+            lv_t *lv_ptr = vg[VG_CHR(minor)]->lv[l]->lv_snapshot_org =
+                           vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)];
+
+            /* set necessary fields of original logical volume */
+            lv_ptr->lv_access |= LV_SNAPSHOT_ORG;
+            lv_ptr->lv_snapshot_minor = 0;
+            lv_ptr->lv_snapshot_org = lv_ptr;
+            lv_ptr->lv_snapshot_prev = NULL;
+
+            /* find last snapshot logical volume in the chain */
+            while ( lv_ptr->lv_snapshot_next != NULL)
+               lv_ptr = lv_ptr->lv_snapshot_next;
+
+            /* set back pointer to this last one in our new logical volume */
+            vg[VG_CHR(minor)]->lv[l]->lv_snapshot_prev = lv_ptr;
+
+            /* last logical volume now points to our new snapshot volume */
+            lv_ptr->lv_snapshot_next = vg[VG_CHR(minor)]->lv[l];
+
+            /* now point to the new one */
+            lv_ptr = lv_ptr->lv_snapshot_next;
+
+            /* set necessary fields of new snapshot logical volume */
+            lv_ptr->lv_snapshot_next = NULL;
+            lv_ptr->lv_current_pe =
+               vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_current_pe;
+            lv_ptr->lv_allocated_le =
+               vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_allocated_le;
+            lv_ptr->lv_current_le =
+               vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_current_le;
+            lv_ptr->lv_size =
+               vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_size;
+         }
+      }
+   }
+
+   vg_count++;
+
+   /* let's go active */
+   vg[VG_CHR(minor)]->vg_status |= VG_ACTIVE;
+
+#ifdef MODULE
+   MOD_INC_USE_COUNT;
+#endif
+   return 0;
+} /* do_vg_create () */
+
+
+/*
+ * character device support function VGDA remove
+ */
+static int do_vg_remove ( int minor) {
+   int i;
+
+   if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+
+#ifdef LVM_TOTAL_RESET
+   if ( vg[VG_CHR(minor)]->lv_open > 0 && lvm_reset_spindown == 0)
+#else
+   if ( vg[VG_CHR(minor)]->lv_open > 0)
+#endif
+      return -EPERM;
+
+   /* let's go inactive */
+   vg[VG_CHR(minor)]->vg_status &= ~VG_ACTIVE;
+
+   /* free LVs */
+   /* first free snapshot logical volumes */
+   for ( i = 0; i < vg[VG_CHR(minor)]->lv_max; i++) {
+      if ( vg[VG_CHR(minor)]->lv[i] != NULL &&
+           vg[VG_CHR(minor)]->lv[i]->lv_access & LV_SNAPSHOT) {
+         do_lv_remove ( minor, NULL, i);
+         current->state = TASK_INTERRUPTIBLE;
+         schedule_timeout ( 1);
+      }
+   }
+   /* then free the rest */
+   for ( i = 0; i < vg[VG_CHR(minor)]->lv_max; i++) {
+      if ( vg[VG_CHR(minor)]->lv[i] != NULL) {
+         do_lv_remove ( minor, NULL, i);
+         current->state = TASK_INTERRUPTIBLE;
+         schedule_timeout ( 1);
+      }
+   }
+
+   /* free PVs */
+   for ( i = 0; i < vg[VG_CHR(minor)]->pv_max; i++) {
+      if ( vg[VG_CHR(minor)]->pv[i] != NULL) {
+#ifdef DEBUG_VFREE
+         printk ( KERN_DEBUG
+                  "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+#ifdef LVM_GET_INODE
+         lvm_clear_inode ( vg[VG_CHR(minor)]->pv[i]->inode);
+#endif
+         kfree ( vg[VG_CHR(minor)]->pv[i]);
+         vg[VG_CHR(minor)]->pv[i] = NULL;
+      }
+   }
+
+#ifdef DEBUG_VFREE
+   printk ( KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+   kfree ( vg[VG_CHR(minor)]);
+   vg[VG_CHR(minor)] = NULL;
+
+   vg_count--;
+
+#ifdef MODULE
+   MOD_DEC_USE_COUNT;
+#endif
+   return 0;
+} /* do_vg_remove () */
+
+
+/*
+ * character device support function logical volume create
+ */
+static int do_lv_create ( int minor, char *lv_name, lv_t *lv) {
+   int l, le, l_new, p, size;
+   ulong lv_status_save;
+   lv_block_exception_t *lvbe = lv->lv_block_exception;
+   lv_t *lv_ptr = NULL;
+
+   if ( ( pep = lv->lv_current_pe) == NULL) return -EINVAL;
+   if ( lv->lv_chunk_size > LVM_SNAPSHOT_MAX_CHUNK) return -EINVAL;
+
+   for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+      if ( vg[VG_CHR(minor)]->lv[l] != NULL && 
+           lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name, lv_name) == 0)
+         return -EEXIST;
+   }
+
+   /* in case of lv_remove(), lv_create() pair; for eg. lvrename does this */
+   l_new = -1;
+   if ( vg[VG_CHR(minor)]->lv[lv->lv_number] == NULL) l_new = lv->lv_number;
+   else {
+      for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+         if ( vg[VG_CHR(minor)]->lv[l] == NULL) if ( l_new == -1) l_new = l;
+      }
+   }
+   if ( l_new == -1) return -EPERM;
+   l = l_new;
+
+   if ( ( lv_ptr = kmalloc ( sizeof ( lv_t), GFP_USER)) == NULL) {;
+      printk ( KERN_CRIT "%s -- LV_CREATE: kmalloc error LV at line %d\n",
+                         lvm_name, __LINE__);
+      return -ENOMEM;
+   }
+
+   /* copy preloaded LV */
+   lvm_memcpy ( ( char*) lv_ptr, ( char *) lv, sizeof ( lv_t));
+
+   lv_status_save = lv_ptr->lv_status;
+   lv_ptr->lv_status &= ~LV_ACTIVE;
+   lv_ptr->lv_snapshot_org =  \
+   lv_ptr->lv_snapshot_prev = \
+   lv_ptr->lv_snapshot_next = NULL;
+   lv_ptr->lv_block_exception = NULL;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 4)
+   lv_ptr->lv_snapshot_sem = MUTEX;
+#else
+   init_MUTEX(&lv_ptr->lv_snapshot_sem);
+#endif
+   vg[VG_CHR(minor)]->lv[l] = lv_ptr;
+
+   /* get the PE structures from user space if this
+      is no snapshot logical volume */
+   if ( ! ( lv_ptr->lv_access & LV_SNAPSHOT)) {
+      size = lv_ptr->lv_allocated_le * sizeof ( pe_t);
+      if ( ( lv_ptr->lv_current_pe = vmalloc ( size)) == NULL) {
+         printk ( KERN_CRIT
+                  "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte "
+                  "at line %d\n",
+                  lvm_name, size, __LINE__);
+#ifdef DEBUG_VFREE
+         printk ( KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
+#endif
+         kfree ( lv_ptr);
+         vg[VG_CHR(minor)]->lv[l] = NULL;
+         return -ENOMEM;
+      }
+   
+      if ( copy_from_user ( lv_ptr->lv_current_pe, pep, size)) {
+         vfree ( lv_ptr->lv_current_pe);
+         kfree ( lv_ptr);
+         vg[VG_CHR(minor)]->lv[l] = NULL;
+         return -EFAULT;
+      }
+
+      /* correct the PE count in PVs */
+      for ( le = 0; le < lv_ptr->lv_allocated_le; le++) {
+         vg[VG_CHR(minor)]->pe_allocated++;
+         for ( p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
+            if ( vg[VG_CHR(minor)]->pv[p]->pv_dev ==
+                 lv_ptr->lv_current_pe[le].dev)
+               vg[VG_CHR(minor)]->pv[p]->pe_allocated++;
+         }
+      }
+   } else {
+      /* Get snapshot exception data and block list */
+      if ( lvbe != NULL) {
+         lv_ptr->lv_snapshot_org =
+            vg[VG_CHR(minor)]->lv[LV_BLK(lv_ptr->lv_snapshot_minor)];
+         if ( lv_ptr->lv_snapshot_org != NULL) {
+            size = lv_ptr->lv_remap_end * sizeof ( lv_block_exception_t);
+            if ( ( lv_ptr->lv_block_exception = vmalloc ( size)) == NULL) {
+               printk ( KERN_CRIT
+                        "%s -- do_lv_create: vmalloc error LV_BLOCK_EXCEPTION "
+                        "of %d byte at line %d\n",
+                        lvm_name, size, __LINE__);
+#ifdef DEBUG_VFREE
+               printk ( KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
+#endif
+               kfree ( lv_ptr);
+               vg[VG_CHR(minor)]->lv[l] = NULL;
+               return -ENOMEM;
+            }
+      
+            if ( copy_from_user ( lv_ptr->lv_block_exception, lvbe, size)) {
+               vfree ( lv_ptr->lv_block_exception);
+               kfree ( lv_ptr);
+               vg[VG_CHR(minor)]->lv[l] = NULL;
+               return -EFAULT;
+            }
+
+            /* get pointer to original logical volume */
+            lv_ptr = lv_ptr->lv_snapshot_org;
+
+            lv_ptr->lv_snapshot_minor = 0;
+            lv_ptr->lv_snapshot_org = lv_ptr;
+            lv_ptr->lv_snapshot_prev = NULL;
+            /* walk thrugh the snapshot list */
+            while ( lv_ptr->lv_snapshot_next != NULL)
+               lv_ptr = lv_ptr->lv_snapshot_next;
+            /* now lv_ptr points to the last existing snapshot in the chain */
+            vg[VG_CHR(minor)]->lv[l]->lv_snapshot_prev = lv_ptr;
+            /* our new one now back points to the previous last in the chain */
+            lv_ptr = vg[VG_CHR(minor)]->lv[l];
+            /* now lv_ptr points to our new last snapshot logical volume */
+            lv_ptr->lv_snapshot_org = lv_ptr->lv_snapshot_prev->lv_snapshot_org;
+            lv_ptr->lv_snapshot_next = NULL;
+            lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe;
+            lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le;
+            lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le;
+            lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size;
+            lv_ptr->lv_stripes = lv_ptr->lv_snapshot_org->lv_stripes;
+            lv_ptr->lv_stripesize = lv_ptr->lv_snapshot_org->lv_stripesize;
+	    {
+		int err;
+
+		err = lvm_snapshot_alloc(lv_ptr);
+		if (err)
+		{
+			vfree(lv_ptr->lv_block_exception);
+			kfree(lv_ptr);
+			vg[VG_CHR(minor)]->lv[l] = NULL;
+			return err;
+		}
+	    }
+         } else {
+            vfree ( lv_ptr->lv_block_exception);
+            kfree ( lv_ptr);
+            vg[VG_CHR(minor)]->lv[l] = NULL;
+            return -EFAULT;
+         }
+      } else {
+         kfree ( vg[VG_CHR(minor)]->lv[l]);
+         vg[VG_CHR(minor)]->lv[l] = NULL;
+         return -EINVAL;
+      }
+   } /* if ( vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) */
+
+   lv_ptr = vg[VG_CHR(minor)]->lv[l];
+   lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0;
+   lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
+   lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
+   vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = vg[VG_CHR(minor)]->vg_number;
+   vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = lv_ptr->lv_number;
+   LVM_CORRECT_READ_AHEAD ( lv_ptr->lv_read_ahead);
+   read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead;
+   vg[VG_CHR(minor)]->lv_cur++;
+   lv_ptr->lv_status = lv_status_save;
+
+   /* optionally add our new snapshot LV */
+   if ( lv_ptr->lv_access & LV_SNAPSHOT) {
+      /* sync the original logical volume */
+      fsync_dev ( lv_ptr->lv_snapshot_org->lv_dev);
+      /* put ourselve into the chain */
+      lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr;
+      lv_ptr->lv_snapshot_org->lv_access |= LV_SNAPSHOT_ORG;
+   }
+
+   return 0;
+} /* do_lv_create () */
+
+
+/*
+ * character device support function logical volume remove
+ */
+static int do_lv_remove ( int minor, char *lv_name, int l) {
+   uint le, p;
+   lv_t *lv_ptr;
+
+   if ( l == -1) {
+      for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+         if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+              lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name, lv_name) == 0) {
+            break;
+         }
+      }
+   }
+
+   lv_ptr = vg[VG_CHR(minor)]->lv[l];
+   if ( l < vg[VG_CHR(minor)]->lv_max) {
+#ifdef LVM_TOTAL_RESET
+      if ( lv_ptr->lv_open > 0 && lvm_reset_spindown == 0)
+#else
+      if ( lv_ptr->lv_open > 0)
+#endif
+         return -EBUSY;
+
+      /* check for deletion of snapshot source while
+         snapshot volume still exists */
+      if ( ( lv_ptr->lv_access & LV_SNAPSHOT_ORG) &&
+           lv_ptr->lv_snapshot_next != NULL)
+         return -EPERM;
+
+      lv_ptr->lv_status |= LV_SPINDOWN;
+
+      /* sync the buffers */
+      fsync_dev ( lv_ptr->lv_dev);
+
+      lv_ptr->lv_status &= ~LV_ACTIVE;
+
+      /* invalidate the buffers */
+      invalidate_buffers ( lv_ptr->lv_dev);
+
+      /* reset generic hd */
+      lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1;
+      lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0;
+      lvm_size[MINOR(lv_ptr->lv_dev)] = 0;
+
+      /* reset VG/LV mapping */
+      vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = ABS_MAX_VG;
+      vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = -1;
+   
+      /* correct the PE count in PVs if this is no snapshot logical volume */
+      if ( ! ( lv_ptr->lv_access & LV_SNAPSHOT)) {
+         /* only if this is no snapshot logical volume because we share
+            the lv_current_pe[] structs with the original logical volume */
+         for ( le = 0; le < lv_ptr->lv_allocated_le; le++) {
+            vg[VG_CHR(minor)]->pe_allocated--;
+            for ( p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
+               if (  vg[VG_CHR(minor)]->pv[p]->pv_dev ==
+                     lv_ptr->lv_current_pe[le].dev)
+                  vg[VG_CHR(minor)]->pv[p]->pe_allocated--;
+            }
+         }
+         vfree ( lv_ptr->lv_current_pe);
+      /* LV_SNAPSHOT */
+      } else {
+/*
+         if ( lv_ptr->lv_block_exception != NULL) {
+            int i;
+            kdev_t last_dev;
+            for ( i = last_dev = 0; i < lv_ptr->lv_remap_ptr; i++) {
+               if ( lv_ptr->lv_block_exception[i].rdev_new != last_dev) {
+                  last_dev = lv_ptr->lv_block_exception[i].rdev_new;
+                  invalidate_buffers ( last_dev);
+                  current->state = TASK_INTERRUPTIBLE;
+                  schedule_timeout ( 1);
+               }
+            }
+         }
+*/
+         /* remove this snapshot logical volume from the chain */
+         lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next;
+         if ( lv_ptr->lv_snapshot_next != NULL) {
+            lv_ptr->lv_snapshot_next->lv_snapshot_prev =
+               lv_ptr->lv_snapshot_prev;
+         }
+         /* no more snapshots? */
+         if ( lv_ptr->lv_snapshot_org->lv_snapshot_next == NULL)
+            lv_ptr->lv_snapshot_org->lv_access &= ~LV_SNAPSHOT_ORG;
+	 lvm_snapshot_release(lv_ptr);
+      }
+
+#ifdef DEBUG_VFREE
+      printk ( KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+      kfree ( lv_ptr);
+      vg[VG_CHR(minor)]->lv[l] = NULL;
+      vg[VG_CHR(minor)]->lv_cur--;
+      return 0;
+   }
+
+   return -ENXIO;
+} /* do_lv_remove () */
+
+
+/*
+ * character device support function logical volume extend / reduce
+ */
+static int do_lv_extend_reduce ( int minor, char *lv_name, lv_t *lv) {
+   int l, le, p, size, old_allocated_le;
+   uint32_t end, lv_status_save;
+   pe_t *pe;
+
+   if ( ( pep = lv->lv_current_pe) == NULL) return -EINVAL;
+
+   for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+      if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+           lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name, lv_name) == 0)
+         break;
+   }
+   if ( l == vg[VG_CHR(minor)]->lv_max) return -ENXIO;
+
+   /* check for active snapshot */
+   if ( lv->lv_access & ( LV_SNAPSHOT|LV_SNAPSHOT_ORG)) return -EPERM;
+
+   if ( ( pe = vmalloc ( size = lv->lv_current_le * sizeof ( pe_t))) == NULL) {
+      printk ( KERN_CRIT
+               "%s -- do_lv_extend_reduce: vmalloc error LV_CURRENT_PE "
+               "of %d Byte at line %d\n",
+               lvm_name, size, __LINE__);
+      return -ENOMEM;
+   }
+
+   /* get the PE structures from user space */
+   if ( copy_from_user ( pe, pep, size)) {
+      vfree ( pe);
+      return -EFAULT;
+   }
+
+#ifdef DEBUG
+   printk ( KERN_DEBUG
+            "%s -- fsync_dev and "
+            "invalidate_buffers for %s [%s] in %s\n",
+            lvm_name, vg[VG_CHR(minor)]->lv[l]->lv_name,
+            kdevname ( vg[VG_CHR(minor)]->lv[l]->lv_dev),
+            vg[VG_CHR(minor)]->vg_name);
+#endif
+
+   vg[VG_CHR(minor)]->lv[l]->lv_status |= LV_SPINDOWN;
+   fsync_dev ( vg[VG_CHR(minor)]->lv[l]->lv_dev);
+   vg[VG_CHR(minor)]->lv[l]->lv_status &= ~LV_ACTIVE;
+   invalidate_buffers ( vg[VG_CHR(minor)]->lv[l]->lv_dev);
+
+   /* reduce allocation counters on PV(s) */
+   for ( le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le; le++) {
+      vg[VG_CHR(minor)]->pe_allocated--;
+      for ( p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
+         if (  vg[VG_CHR(minor)]->pv[p]->pv_dev ==
+               vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev) {
+            vg[VG_CHR(minor)]->pv[p]->pe_allocated--;
+            break;
+         }
+      }
+   }
+
+#ifdef DEBUG_VFREE
+   printk ( KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
+#endif
+
+   /* save pointer to "old" lv/pe pointer array */
+   pep1 = vg[VG_CHR(minor)]->lv[l]->lv_current_pe;
+   end  = vg[VG_CHR(minor)]->lv[l]->lv_current_le;
+
+   /* save open counter */
+   lv_open = vg[VG_CHR(minor)]->lv[l]->lv_open;
+
+   /* save # of old allocated logical extents */
+   old_allocated_le = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le;
+
+   /* copy preloaded LV */
+   lv_status_save = lv->lv_status;
+   lv->lv_status |= LV_SPINDOWN;
+   lv->lv_status &= ~LV_ACTIVE;
+   lvm_memcpy ( ( char*) vg[VG_CHR(minor)]->lv[l], ( char*) lv, sizeof ( lv_t));
+   vg[VG_CHR(minor)]->lv[l]->lv_current_pe = pe;
+   vg[VG_CHR(minor)]->lv[l]->lv_open = lv_open;
+
+   /* save availiable i/o statistic data */
+   /* linear logical volume */
+   if ( vg[VG_CHR(minor)]->lv[l]->lv_stripes < 2) {
+      /* Check what last LE shall be used */
+      if ( end > vg[VG_CHR(minor)]->lv[l]->lv_current_le)
+         end = vg[VG_CHR(minor)]->lv[l]->lv_current_le;
+      for ( le = 0; le < end; le++) {
+         vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].reads  = pep1[le].reads;
+         vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].writes = pep1[le].writes;
+      }
+   /* striped logical volume */
+   } else {
+      uint i, j, source, dest, end, old_stripe_size, new_stripe_size;
+
+      old_stripe_size = old_allocated_le / vg[VG_CHR(minor)]->lv[l]->lv_stripes;
+      new_stripe_size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le /
+                        vg[VG_CHR(minor)]->lv[l]->lv_stripes;
+      end = old_stripe_size;
+      if ( end > new_stripe_size) end = new_stripe_size;
+      for ( i = source = dest = 0;
+            i < vg[VG_CHR(minor)]->lv[l]->lv_stripes; i++) {
+         for ( j = 0; j < end; j++) {
+            vg[VG_CHR(minor)]->lv[l]->lv_current_pe[dest+j].reads =
+               pep1[source+j].reads;
+            vg[VG_CHR(minor)]->lv[l]->lv_current_pe[dest+j].writes =
+               pep1[source+j].writes;
+         }
+         source += old_stripe_size;
+         dest   += new_stripe_size;
+      }
+   }
+   vfree ( pep1); pep1 = NULL;
+
+
+   /* extend the PE count in PVs */
+   for ( le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le; le++) {
+      vg[VG_CHR(minor)]->pe_allocated++;
+      for ( p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
+         if ( vg[VG_CHR(minor)]->pv[p]->pv_dev ==
+              vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev) {
+            vg[VG_CHR(minor)]->pv[p]->pe_allocated++;
+            break;
+         }
+      }
+   }
+
+   lvm_gendisk.part[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)].start_sect = 0;
+   lvm_gendisk.part[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)].nr_sects =
+      vg[VG_CHR(minor)]->lv[l]->lv_size;
+   lvm_size[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)] =
+      vg[VG_CHR(minor)]->lv[l]->lv_size >> 1;
+   /* vg_lv_map array doesn't have to be changed here */
+
+   LVM_CORRECT_READ_AHEAD ( vg[VG_CHR(minor)]->lv[l]->lv_read_ahead);
+   read_ahead[MAJOR_NR] = vg[VG_CHR(minor)]->lv[l]->lv_read_ahead;
+   vg[VG_CHR(minor)]->lv[l]->lv_status = lv_status_save;
+
+   return 0;
+} /* do_lv_extend_reduce () */
+
+
+/*
+ * support function initialize gendisk variables
+ */
+#ifdef __initfunc
+__initfunc ( void lvm_geninit ( struct gendisk *lvm_gdisk))
+#else
+void __init lvm_geninit ( struct gendisk *lvm_gdisk)
+#endif
+{
+   int i = 0;
+
+#ifdef DEBUG_GENDISK
+   printk ( KERN_DEBUG "%s -- lvm_gendisk\n", lvm_name);
+#endif
+
+   for ( i = 0; i < MAX_LV; i++) {
+      lvm_gendisk.part[i].start_sect = -1; /* avoid partition check */
+      lvm_size[i] = lvm_gendisk.part[i].nr_sects = 0;
+      lvm_blocksizes[i] = BLOCK_SIZE;
+   }
+
+   blksize_size[MAJOR_NR] = lvm_blocksizes;
+   blk_size[MAJOR_NR] = lvm_size;
+
+   return;
+} /* lvm_gen_init () */
+
+
+#ifdef LVM_GET_INODE
+/*
+ * support function to get an empty inode
+ *
+ * Gets an empty inode to be inserted into the inode hash,
+ * so that a physical volume can't be mounted.
+ * This is analog to drivers/block/md.c
+ *
+ * Is this the real thing?
+ *
+ */
+struct inode *lvm_get_inode ( int dev) {
+   struct inode *inode_this = NULL;
+
+   /* Lock the device by inserting a dummy inode. */
+   inode_this = get_empty_inode ();
+   inode_this->i_dev = dev;
+   insert_inode_hash ( inode_this);
+   return inode_this;
+}
+
+
+/*
+ * support function to clear an inode
+ *
+ */
+void lvm_clear_inode ( struct inode *inode) {
+#ifdef I_FREEING
+   inode->i_state |= I_FREEING;
+#endif
+   clear_inode ( inode);
+   return;
+}
+#endif /* #ifdef LVM_GET_INODE */
+
+
+/* my strlen */
+inline int lvm_strlen ( char *s1) {
+   int len = 0;
+
+   while ( s1[len] != 0) len++;
+   return len;
+}
+
+
+/* my strcmp */
+inline int lvm_strcmp ( char *s1, char *s2) {
+   while ( *s1 != 0 && *s2 != 0) {
+      if ( *s1 != *s2) return -1;
+      s1++; s2++;
+   }
+   if ( *s1 == 0 && *s2 == 0) return 0;
+   return -1;
+}
+
+
+/* my strrchr */
+inline char *lvm_strrchr ( char *s1, char c) {
+   char *s2 = NULL;
+
+   while ( *s1 != 0) {
+      if ( *s1 == c) s2 = s1;
+      s1++;
+   }
+   return s2;
+}
+
+
+/* my memcpy */
+inline void lvm_memcpy ( char *dest, char *source, int size) {
+   for ( ;size > 0; size--) *dest++ = *source++;
+}
diff -urN 2.2.14/drivers/block/raid1.c 2.2.14aa6/drivers/block/raid1.c
--- 2.2.14/drivers/block/raid1.c	Mon Jan 17 16:44:36 2000
+++ 2.2.14aa6/drivers/block/raid1.c	Wed Feb  2 02:31:42 2000
@@ -211,7 +211,11 @@
 	while (!( /* FIXME: now we are rather fault tolerant than nice */
 	r1_bh = kmalloc (sizeof (struct raid1_bh), GFP_KERNEL)
 	) )
+	{
 		printk ("raid1_make_request(#1): out of memory\n");
+		current->policy |= SCHED_YIELD;
+		schedule();
+	}
 	memset (r1_bh, 0, sizeof (struct raid1_bh));
 
 /*
@@ -299,7 +303,11 @@
 		while (!( /* FIXME: now we are rather fault tolerant than nice */
 		mirror_bh[i] = kmalloc (sizeof (struct buffer_head), GFP_KERNEL)
 		) )
+		{
 			printk ("raid1_make_request(#2): out of memory\n");
+			current->policy |= SCHED_YIELD;
+			schedule();
+		}
 		memset (mirror_bh[i], 0, sizeof (struct buffer_head));
 
 	/*
@@ -711,7 +719,11 @@
 	while (!( /* FIXME: now we are rather fault tolerant than nice */
 	mddev->private = kmalloc (sizeof (struct raid1_data), GFP_KERNEL)
 	) )
+	{
 		printk ("raid1_run(): out of memory\n");
+		current->policy |= SCHED_YIELD;
+		schedule();
+	}
 	raid_conf = mddev->private;
 	memset(raid_conf, 0, sizeof(*raid_conf));
 
diff -urN 2.2.14/drivers/block/rd.c 2.2.14aa6/drivers/block/rd.c
--- 2.2.14/drivers/block/rd.c	Fri Jan  7 18:19:11 2000
+++ 2.2.14aa6/drivers/block/rd.c	Wed Feb  2 02:31:42 2000
@@ -177,7 +177,7 @@
 	if (CURRENT->cmd == READ) 
 		memset(CURRENT->buffer, 0, len); 
 	else	
-		set_bit(BH_Protected, &CURRENT->bh->b_state);
+		mark_buffer_protected(CURRENT->bh);
 
 	end_request(1);
 	goto repeat;
diff -urN 2.2.14/drivers/char/Config.in 2.2.14aa6/drivers/char/Config.in
--- 2.2.14/drivers/char/Config.in	Wed Jan  5 14:16:52 2000
+++ 2.2.14aa6/drivers/char/Config.in	Wed Feb  2 02:31:42 2000
@@ -112,6 +112,9 @@
 
 tristate '/dev/nvram support' CONFIG_NVRAM
 bool 'Enhanced Real Time Clock Support' CONFIG_RTC
+if [ "$CONFIG_RTC" = "y" -a "$ARCH" = "alpha" ]; then
+  bool '  Use only lightweight version (no interrupts)' CONFIG_RTC_LIGHT
+fi
 if [ "$CONFIG_ALPHA_BOOK1" = "y" ]; then
   bool 'Tadpole ANA H8 Support'  CONFIG_H8
 fi
diff -urN 2.2.14/drivers/char/Makefile 2.2.14aa6/drivers/char/Makefile
--- 2.2.14/drivers/char/Makefile	Wed Jan  5 14:16:52 2000
+++ 2.2.14aa6/drivers/char/Makefile	Wed Feb  2 02:31:43 2000
@@ -20,7 +20,7 @@
 
 L_TARGET := char.a
 M_OBJS   :=
-L_OBJS   := tty_io.o n_tty.o tty_ioctl.o mem.o random.o
+L_OBJS   := tty_io.o n_tty.o tty_ioctl.o mem.o random.o raw.o
 LX_OBJS  := pty.o misc.o
 
 ifdef CONFIG_VT
diff -urN 2.2.14/drivers/char/mem.c 2.2.14aa6/drivers/char/mem.c
--- 2.2.14/drivers/char/mem.c	Wed Jan  5 14:16:52 2000
+++ 2.2.14aa6/drivers/char/mem.c	Wed Feb  2 02:31:43 2000
@@ -17,6 +17,7 @@
 #include <linux/joystick.h>
 #include <linux/i2c.h>
 #include <linux/capability.h>
+#include <linux/raw.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -620,6 +621,7 @@
 	if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
 		printk("unable to get major %d for memory devs\n", MEM_MAJOR);
 	rand_initialize();
+	raw_init();
 #ifdef CONFIG_USB
 #ifdef CONFIG_USB_UHCI
 	uhci_init();
diff -urN 2.2.14/drivers/char/raw.c 2.2.14aa6/drivers/char/raw.c
--- 2.2.14/drivers/char/raw.c	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/drivers/char/raw.c	Wed Feb  2 02:31:43 2000
@@ -0,0 +1,384 @@
+/*
+ * linux/drivers/char/raw.c
+ *
+ * Front-end raw character devices.  These can be bound to any block
+ * devices to provide genuine Unix raw character device semantics.
+ *
+ * We reserve minor number 0 for a control interface.  ioctl()s on this
+ * device are used to bind the other minor numbers to block devices.
+ */
+
+#include <linux/fs.h>
+#include <linux/iobuf.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/raw.h>
+#include <asm/uaccess.h>
+
+#define dprintk(x...) 
+
+static kdev_t raw_device_bindings[256] = {};
+static int raw_device_inuse[256] = {};
+static int raw_device_sector_size[256] = {};
+static int raw_device_sector_bits[256] = {};
+
+extern struct file_operations * get_blkfops(unsigned int major);
+
+static ssize_t rw_raw_dev(int rw, struct file *, char *, size_t, loff_t *);
+
+ssize_t	raw_read(struct file *, char *, size_t, loff_t *);
+ssize_t	raw_write(struct file *, const char *, size_t, loff_t *);
+int	raw_open(struct inode *, struct file *);
+int	raw_release(struct inode *, struct file *);
+int	raw_ctl_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+
+
+static struct file_operations raw_fops = {
+	NULL,		/* llseek */
+	raw_read,	/* read */
+	raw_write,	/* write */
+	NULL,		/* readdir */
+	NULL,		/* poll */
+	NULL,		/* ioctl */
+	NULL,		/* mmap */
+	raw_open,	/* open */
+	NULL,		/* flush */
+	raw_release,	/* release */
+	NULL		/* fsync */
+};
+
+static struct file_operations raw_ctl_fops = {
+	NULL,		/* llseek */
+	NULL,		/* read */
+	NULL,		/* write */
+	NULL,		/* readdir */
+	NULL,		/* poll */
+	raw_ctl_ioctl,	/* ioctl */
+	NULL,		/* mmap */
+	raw_open,	/* open */
+	NULL,		/* flush */
+	NULL,		/* no special release code */
+	NULL		/* fsync */
+};
+
+
+
+void __init raw_init(void)
+{
+	register_chrdev(RAW_MAJOR, "raw", &raw_fops);
+}
+
+
+/*
+ * The raw IO open and release code needs to fake appropriate
+ * open/release calls to the underlying block devices.  
+ */
+
+static int bdev_open(kdev_t dev, int mode)
+{
+	int err = 0;
+	struct file dummy_file = {};
+	struct dentry dummy_dentry = {};
+	struct inode * inode = get_empty_inode();
+	
+	if (!inode)
+		return -ENOMEM;
+	
+	dummy_file.f_op = get_blkfops(MAJOR(dev));
+	if (!dummy_file.f_op) {
+		err = -ENODEV;
+		goto done;
+	}
+	
+	if (dummy_file.f_op->open) {
+		inode->i_rdev = dev;
+		dummy_dentry.d_inode = inode;
+		dummy_file.f_dentry = &dummy_dentry;
+		dummy_file.f_mode = mode;
+		err = dummy_file.f_op->open(inode, &dummy_file);
+	}
+
+ done:
+	iput(inode);
+	return err;
+}
+
+static int bdev_close(kdev_t dev)
+{
+	int err;
+	struct inode * inode = get_empty_inode();
+
+	if (!inode)
+		return -ENOMEM;
+	
+	inode->i_rdev = dev;
+	err = blkdev_release(inode);
+	iput(inode);
+	return err;
+}
+
+
+
+/* 
+ * Open/close code for raw IO.
+ */
+
+int raw_open(struct inode *inode, struct file *filp)
+{
+	int minor;
+	kdev_t bdev;
+	int err;
+	int sector_size;
+	int sector_bits;
+
+	minor = MINOR(inode->i_rdev);
+	
+	/* 
+	 * Is it the control device? 
+	 */
+	
+	if (minor == 0) {
+		filp->f_op = &raw_ctl_fops;
+		return 0;
+	}
+	
+	/*
+	 * No, it is a normal raw device.  All we need to do on open is
+	 * to check that the device is bound, and force the underlying
+	 * block device to a sector-size blocksize. 
+	 */
+
+	bdev = raw_device_bindings[minor];
+	if (bdev == NODEV) 
+		return -ENODEV;
+
+	err = bdev_open(bdev, filp->f_mode);
+	if (err)
+		return err;
+	
+	/*
+	 * Don't change the blocksize if we already have users using
+	 * this device 
+	 */
+
+	if (raw_device_inuse[minor]++)
+		return 0;
+	
+	/* 
+	 * Don't interfere with mounted devices: we cannot safely set
+	 * the blocksize on a device which is already mounted.  
+	 */
+	
+	sector_size = 512;
+	if (lookup_vfsmnt(bdev) != NULL) {
+		if (blksize_size[MAJOR(bdev)])
+			sector_size = blksize_size[MAJOR(bdev)][MINOR(bdev)];
+	} else {
+		if (hardsect_size[MAJOR(bdev)])
+			sector_size = hardsect_size[MAJOR(bdev)][MINOR(bdev)];
+	}
+
+	set_blocksize(bdev, sector_size);
+	raw_device_sector_size[minor] = sector_size;
+
+	for (sector_bits = 0; !(sector_size & 1); )
+		sector_size>>=1, sector_bits++;
+	raw_device_sector_bits[minor] = sector_bits;
+	
+	return 0;
+}
+
+int raw_release(struct inode *inode, struct file *filp)
+{
+	int minor;
+	kdev_t bdev;
+	
+	minor = MINOR(inode->i_rdev);
+	bdev = raw_device_bindings[minor];
+	bdev_close(bdev);
+	raw_device_inuse[minor]--;
+	return 0;
+}
+
+
+
+/*
+ * Deal with ioctls against the raw-device control interface, to bind
+ * and unbind other raw devices.  
+ */
+
+int raw_ctl_ioctl(struct inode *inode, 
+		  struct file *flip,
+		  unsigned int command, 
+		  unsigned long arg)
+{
+	struct raw_config_request rq;
+	int err = 0;
+	int minor;
+	
+	switch (command) {
+	case RAW_SETBIND:
+	case RAW_GETBIND:
+
+		/* First, find out which raw minor we want */
+
+		err = copy_from_user(&rq, (void *) arg, sizeof(rq));
+		if (err)
+			break;
+		
+		minor = rq.raw_minor;
+		if (minor == 0 || minor > MINORMASK) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (command == RAW_SETBIND) {
+			/* 
+			 * For now, we don't need to check that the underlying
+			 * block device is present or not: we can do that when
+			 * the raw device is opened.  Just check that the
+			 * major/minor numbers make sense. 
+			 */
+
+			if (rq.block_major == NODEV || 
+			    rq.block_major > MAX_BLKDEV ||
+			    rq.block_minor > MINORMASK) {
+				err = -EINVAL;
+				break;
+			}
+			
+			if (raw_device_inuse[minor]) {
+				err = -EBUSY;
+				break;
+			}
+			raw_device_bindings[minor] = 
+				MKDEV(rq.block_major, rq.block_minor);
+		} else {
+			rq.block_major = MAJOR(raw_device_bindings[minor]);
+			rq.block_minor = MINOR(raw_device_bindings[minor]);
+			err = copy_to_user((void *) arg, &rq, sizeof(rq));
+		}
+		break;
+		
+	default:
+		err = -EINVAL;
+	}
+	
+	return err;
+}
+
+
+
+ssize_t	raw_read(struct file *filp, char * buf, 
+		 size_t size, loff_t *offp)
+{
+	return rw_raw_dev(READ, filp, buf, size, offp);
+}
+
+ssize_t	raw_write(struct file *filp, const char *buf, 
+		  size_t size, loff_t *offp)
+{
+	return rw_raw_dev(WRITE, filp, (char *) buf, size, offp);
+}
+
+#define SECTOR_BITS 9
+#define SECTOR_SIZE (1U << SECTOR_BITS)
+#define SECTOR_MASK (SECTOR_SIZE - 1)
+
+ssize_t	rw_raw_dev(int rw, struct file *filp, char *buf, 
+		   size_t size, loff_t *offp)
+{
+	struct kiobuf * iobuf;
+	int		err;
+	unsigned long	blocknr, blocks;
+	unsigned long	b[KIO_MAX_SECTORS];
+	size_t		transferred;
+	int		iosize;
+	int		i;
+	int		minor;
+	kdev_t		dev;
+	unsigned long	limit;
+
+	int		sector_size, sector_bits, sector_mask;
+	int		max_sectors;
+	
+	/*
+	 * First, a few checks on device size limits 
+	 */
+
+	minor = MINOR(filp->f_dentry->d_inode->i_rdev);
+	dev = raw_device_bindings[minor];
+	sector_size = raw_device_sector_size[minor];
+	sector_bits = raw_device_sector_bits[minor];
+	sector_mask = sector_size- 1;
+	max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9);
+	
+	if (blk_size[MAJOR(dev)])
+		limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits;
+	else
+		limit = INT_MAX;
+	dprintk ("rw_raw_dev: dev %d:%d (+%d)\n",
+		 MAJOR(dev), MINOR(dev), limit);
+	
+	if ((*offp & sector_mask) || (size & sector_mask))
+		return -EINVAL;
+	if ((*offp >> sector_bits) > limit)
+		return 0;
+
+	/* 
+	 * We'll just use one kiobuf
+	 */
+
+	err = alloc_kiovec(1, &iobuf);
+	if (err)
+		return err;
+
+	/*
+	 * Split the IO into KIO_MAX_SECTORS chunks, mapping and
+	 * unmapping the single kiobuf as we go to perform each chunk of
+	 * IO.  
+	 */
+
+	transferred = 0;
+	blocknr = *offp >> sector_bits;
+	while (size > 0) {
+		blocks = size >> sector_bits;
+		if (blocks > max_sectors)
+			blocks = max_sectors;
+		if (blocks > limit - blocknr)
+			blocks = limit - blocknr;
+		if (!blocks)
+			break;
+
+		iosize = blocks << sector_bits;
+		
+		err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
+		if (err)
+			break;
+		
+		for (i=0; i < blocks; i++)
+ 			b[i] = blocknr++;
+		
+		err = brw_kiovec(rw, 1, &iobuf, dev, b, sector_size);
+
+		if (err >= 0) {
+			transferred += err;
+			size -= err;
+			buf += err;
+		}
+
+		unmap_kiobuf(iobuf);
+
+		if (err != iosize)
+			break;
+	}
+	
+	free_kiovec(1, &iobuf);
+
+	if (transferred) {
+		*offp += transferred;
+		return transferred;
+	}
+	
+	return err;
+}
diff -urN 2.2.14/drivers/char/rtc.c 2.2.14aa6/drivers/char/rtc.c
--- 2.2.14/drivers/char/rtc.c	Sun Jan  2 18:26:37 2000
+++ 2.2.14aa6/drivers/char/rtc.c	Wed Feb  2 02:31:42 2000
@@ -120,6 +120,7 @@
  *	(See ./arch/XXXX/kernel/time.c for the set_rtc_mmss() function.)
  */
 
+#ifndef CONFIG_RTC_LIGHT
 static void rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	/*
@@ -137,6 +138,7 @@
 	if (rtc_status & RTC_TIMER_ON)
 		mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);
 }
+#endif
 
 /*
  *	Now all the various file operations that we export.
@@ -150,6 +152,9 @@
 static ssize_t rtc_read(struct file *file, char *buf,
 			size_t count, loff_t *ppos)
 {
+#ifdef CONFIG_RTC_LIGHT
+	return -EIO;
+#else
 	struct wait_queue wait = { current, NULL };
 	unsigned long data;
 	ssize_t retval;
@@ -181,6 +186,7 @@
 	remove_wait_queue(&rtc_wait, &wait);
 
 	return retval;
+#endif
 }
 
 static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
@@ -191,6 +197,7 @@
 	struct rtc_time wtime; 
 
 	switch (cmd) {
+#ifndef CONFIG_RTC_LIGHT
 	case RTC_AIE_OFF:	/* Mask alarm int. enab. bit	*/
 	{
 		mask_rtc_irq_bit(RTC_AIE);
@@ -238,6 +245,7 @@
 		set_rtc_irq_bit(RTC_UIE);
 		return 0;
 	}
+#endif
 	case RTC_ALM_READ:	/* Read the present alarm time */
 	{
 		/*
@@ -377,6 +385,7 @@
 	{
 		return put_user(rtc_freq, (unsigned long *)arg);
 	}
+#ifndef CONFIG_RTC_LIGHT
 	case RTC_IRQP_SET:	/* Set periodic IRQ rate.	*/
 	{
 		int tmp = 0;
@@ -413,6 +422,7 @@
 		restore_flags(flags);
 		return 0;
 	}
+#endif
 #ifdef __alpha__
 	case RTC_EPOCH_READ:	/* Read the epoch.	*/
 	{
@@ -462,6 +472,7 @@
 	 * in use, and clear the data.
 	 */
 
+#ifndef CONFIG_RTC_LIGHT
 	unsigned char tmp;
 	unsigned long flags;
 
@@ -481,10 +492,12 @@
 	}
 
 	rtc_irq_data = 0;
+#endif
 	rtc_status &= ~RTC_IS_OPEN;
 	return 0;
 }
 
+#ifndef CONFIG_RTC_LIGHT
 static unsigned int rtc_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &rtc_wait, wait);
@@ -492,6 +505,7 @@
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
+#endif
 
 /*
  *	The various file operations we support.
@@ -502,7 +516,11 @@
 	rtc_read,
 	NULL,		/* No write */
 	NULL,		/* No readdir */
+#ifdef CONFIG_RTC_LIGHT
+	NULL,
+#else
 	rtc_poll,
+#endif
 	rtc_ioctl,
 	NULL,		/* No mmap */
 	rtc_open,
@@ -526,12 +544,14 @@
 	char *guess = NULL;
 #endif
 	printk(KERN_INFO "Real Time Clock Driver v%s\n", RTC_VERSION);
+#ifndef CONFIG_RTC_LIGHT
 	if(request_irq(RTC_IRQ, rtc_interrupt, SA_INTERRUPT, "rtc", NULL))
 	{
 		/* Yeah right, seeing as irq 8 doesn't even hit the bus. */
 		printk(KERN_ERR "rtc: IRQ %d is not free.\n", RTC_IRQ);
 		return -EIO;
 	}
+#endif
 	misc_register(&rtc_dev);
 	/* Check region? Naaah! Just snarf it up. */
 	request_region(RTC_PORT(0), RTC_IO_EXTENT, "rtc");
@@ -565,6 +585,7 @@
 	if (guess)
 		printk("rtc: %s epoch (%lu) detected\n", guess, epoch);
 #endif
+#ifndef CONFIG_RTC_LIGHT
 	init_timer(&rtc_irq_timer);
 	rtc_irq_timer.function = rtc_dropped_irq;
 	rtc_wait = NULL;
@@ -573,6 +594,7 @@
 	/* Initialize periodic freq. to CMOS reset default, which is 1024Hz */
 	CMOS_WRITE(((CMOS_READ(RTC_FREQ_SELECT) & 0xF0) | 0x06), RTC_FREQ_SELECT);
 	restore_flags(flags);
+#endif
 	rtc_freq = 1024;
 	return 0;
 }
@@ -589,6 +611,7 @@
  *	for something that requires a steady > 1KHz signal anyways.)
  */
 
+#ifndef CONFIG_RTC_LIGHT
 void rtc_dropped_irq(unsigned long data)
 {
 	unsigned long flags;
@@ -603,6 +626,7 @@
 	rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & 0xF0);	/* restart */
 	restore_flags(flags);
 }
+#endif
 
 /*
  *	Info exported via "/proc/rtc".
@@ -789,6 +813,7 @@
  * meddles with the interrupt enable/disable bits.
  */
 
+#ifndef CONFIG_RTC_LIGHT
 void mask_rtc_irq_bit(unsigned char bit)
 {
 	unsigned char val;
@@ -818,3 +843,4 @@
 	rtc_irq_data = 0;
 	restore_flags(flags);
 }
+#endif
diff -urN 2.2.14/fs/Makefile 2.2.14aa6/fs/Makefile
--- 2.2.14/fs/Makefile	Thu Aug 26 14:20:19 1999
+++ 2.2.14aa6/fs/Makefile	Wed Feb  2 02:31:43 2000
@@ -13,7 +13,7 @@
 O_OBJS    = open.o read_write.o devices.o file_table.o buffer.o \
 		super.o  block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
 		ioctl.o readdir.o select.o fifo.o locks.o filesystems.o \
-		dcache.o inode.o attr.o bad_inode.o file.o $(BINFMTS) 
+		dcache.o inode.o attr.o bad_inode.o file.o iobuf.o $(BINFMTS) 
 
 MOD_LIST_NAME := FS_MODULES
 ALL_SUB_DIRS = coda minix ext2 fat msdos vfat proc isofs nfs umsdos ntfs \
diff -urN 2.2.14/fs/buffer.c 2.2.14aa6/fs/buffer.c
--- 2.2.14/fs/buffer.c	Wed Jan  5 14:16:55 2000
+++ 2.2.14aa6/fs/buffer.c	Wed Feb  2 02:31:43 2000
@@ -27,6 +27,8 @@
 /* invalidate_buffers/set_blocksize/sync_dev race conditions and
    fs corruption fixes, 1999, Andrea Arcangeli <andrea@suse.de> */
 
+/* async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de> */
+
 #include <linux/malloc.h>
 #include <linux/locks.h>
 #include <linux/errno.h>
@@ -39,6 +41,8 @@
 #include <linux/file.h>
 #include <linux/init.h>
 #include <linux/quotaops.h>
+#include <linux/iobuf.h>
+#include <linux/bigmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -78,6 +82,7 @@
 
 static int nr_buffers = 0;
 static int nr_buffers_type[NR_LIST] = {0,};
+static unsigned long size_buffers_type[NR_LIST];
 static int nr_buffer_heads = 0;
 static int nr_unused_buffer_heads = 0;
 static int nr_hashed_buffers = 0;
@@ -140,8 +145,9 @@
 	add_wait_queue(&bh->b_wait, &wait);
 repeat:
 	tsk->state = TASK_UNINTERRUPTIBLE;
-	run_task_queue(&tq_disk);
+	mb();
 	if (buffer_locked(bh)) {
+		run_task_queue(&tq_disk);
 		schedule();
 		goto repeat;
 	}
@@ -469,6 +475,7 @@
 		return;
 	}
 	nr_buffers_type[bh->b_list]--;
+	size_buffers_type[bh->b_list] -= bh->b_size;
 	remove_from_hash_queue(bh);
 	remove_from_lru_list(bh);
 }
@@ -518,6 +525,7 @@
 		(*bhp)->b_prev_free = bh;
 
 		nr_buffers_type[bh->b_list]++;
+		size_buffers_type[bh->b_list] += bh->b_size;
 
 		/* Put the buffer in new hash-queue if it has a device. */
 		bh->b_next = NULL;
@@ -678,21 +686,26 @@
 			bhnext = bh->b_next_free;
 			if (bh->b_dev != dev || bh->b_size == size)
 				continue;
-			if (buffer_dirty(bh))
-				printk(KERN_ERR "set_blocksize: dev %s buffer_dirty %lu size %lu\n", kdevname(dev), bh->b_blocknr, bh->b_size);
 			if (buffer_locked(bh))
 			{
 				slept = 1;
 				wait_on_buffer(bh);
 			}
+			if (buffer_dirty(bh))
+				printk(KERN_WARNING "set_blocksize: dev %s buffer_dirty %lu size %lu\n", kdevname(dev), bh->b_blocknr, bh->b_size);
 			if (!bh->b_count)
 				put_last_free(bh);
 			else
-				printk(KERN_ERR
+			{
+				mark_buffer_clean(bh);
+				clear_bit(BH_Uptodate, &bh->b_state);
+				clear_bit(BH_Req, &bh->b_state);
+				printk(KERN_WARNING
 				       "set_blocksize: "
-				       "b_count %d, dev %s, block %lu!\n",
+				       "b_count %d, dev %s, block %lu, from %p\n",
 				       bh->b_count, bdevname(bh->b_dev),
-				       bh->b_blocknr);
+				       bh->b_blocknr, __builtin_return_address(0));
+			}
 			if (slept)
 				goto again;
 		}
@@ -805,6 +818,46 @@
 	insert_into_queues(bh);
 }
 
+/* -1 -> no need to flush
+    0 -> async flush
+    1 -> sync flush (wait for I/O completation) */
+static int balance_dirty_state(kdev_t dev)
+{
+	unsigned long dirty, tot, hard_dirty_limit, soft_dirty_limit;
+
+	dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT;
+	tot = (buffermem >> PAGE_SHIFT) + nr_free_pages - nr_free_bigpages;
+	tot -= size_buffers_type[BUF_PROTECTED] >> PAGE_SHIFT;
+
+	dirty *= 200;
+	soft_dirty_limit = tot * bdf_prm.b_un.nfract;
+	hard_dirty_limit = soft_dirty_limit * 2;
+
+	if (dirty > soft_dirty_limit)
+	{
+		if (dirty > hard_dirty_limit)
+			return 1;
+		return 0;
+	}
+	return -1;
+}
+
+/*
+ * if a new dirty buffer is created we need to balance bdflush.
+ *
+ * in the future we might want to make bdflush aware of different
+ * pressures on different devices - thus the (currently unused)
+ * 'dev' parameter.
+ */
+void balance_dirty(kdev_t dev)
+{
+	int state = balance_dirty_state(dev);
+
+	if (state < 0)
+		return;
+	wakeup_bdflush(state);
+}
+
 /*
  * A buffer may need to be moved from one buffer list to another
  * (e.g. in case it is not shared any more). Handle this.
@@ -817,7 +870,9 @@
 		printk("Attempt to refile free buffer\n");
 		return;
 	}
-	if (buffer_dirty(buf))
+	if (buffer_protected(buf))
+		dispose = BUF_PROTECTED;
+	else if (buffer_dirty(buf))
 		dispose = BUF_DIRTY;
 	else if (buffer_locked(buf))
 		dispose = BUF_LOCKED;
@@ -826,13 +881,7 @@
 	if(dispose != buf->b_list) {
 		file_buffer(buf, dispose);
 		if(dispose == BUF_DIRTY) {
-			int too_many = (nr_buffers * bdf_prm.b_un.nfract/100);
-
-			/* This buffer is dirty, maybe we need to start flushing.
-			 * If too high a percentage of the buffers are dirty...
-			 */
-			if (nr_buffers_type[BUF_DIRTY] > too_many)
-				wakeup_bdflush(1);
+			balance_dirty(buf->b_dev);
 
 			/* If this is a loop device, and
 			 * more than half of the buffers are dirty...
@@ -1250,6 +1299,225 @@
 	return;
 }
 
+
+/*
+ * For brw_kiovec: submit a set of buffer_head temporary IOs and wait
+ * for them to complete.  Clean up the buffer_heads afterwards.  
+ */
+
+#define dprintk(x...)
+
+static int do_kio(int rw, int nr, struct buffer_head *bh[], int size)
+{
+	int iosize;
+	int i;
+	int err;
+	struct buffer_head *tmp;
+
+	dprintk ("do_kio start\n");
+	
+	ll_rw_block(rw, nr, bh);
+	iosize = err = 0;
+	
+	for (i = nr; --i >= 0; ) {
+		tmp = bh[i];
+		wait_on_buffer(tmp);
+		if (!buffer_uptodate(tmp)) {
+			err = -EIO;
+			/* We are waiting on bh'es in reverse order so
+                           clearing iosize on error calculates the
+                           amount of IO before the first error. */
+			iosize = 0;
+		}
+		
+		free_async_buffers(tmp);
+		iosize += size;
+	}
+	
+	dprintk ("do_kio end %d %d\n", iosize, err);
+	
+	if (iosize)
+		return iosize;
+	else
+		return err;
+}
+
+/*
+ * Clean up the bounce buffers potentially used by brw_kiovec.  All of
+ * the kiovec's bounce buffers must be cleared of temporarily allocated
+ * bounce pages, but only READ pages for whom IO completed successfully
+ * can actually be transferred back to user space. 
+ */
+
+void cleanup_bounce_buffers(int rw, int nr, struct kiobuf *iovec[], 
+			    int transferred)
+{
+	int i;
+	for (i = 0; i < nr; i++) {
+		struct kiobuf *iobuf = iovec[i];
+		if (iobuf->bounced) {
+			if (transferred > 0 && !(rw & WRITE))
+				kiobuf_copy_bounce(iobuf, COPY_FROM_BOUNCE, 
+						   transferred);
+			
+			clear_kiobuf_bounce_pages(iobuf);
+		}
+		transferred -= iobuf->length;
+	}
+}
+
+/*
+ * Start I/O on a physical range of kernel memory, defined by a vector
+ * of kiobuf structs (much like a user-space iovec list).
+ *
+ * The kiobuf must already be locked for IO.  IO is submitted
+ * asynchronously: you need to check page->locked, page->uptodate, and
+ * maybe wait on page->wait.
+ *
+ * It is up to the caller to make sure that there are enough blocks
+ * passed in to completely map the iobufs to disk.
+ */
+
+int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
+	       kdev_t dev, unsigned long b[], int size)
+{
+	int		err;
+	int		length;
+	int		transferred;
+	int		i;
+	int		bufind;
+	int		pageind;
+	int		bhind;
+	int		offset;
+	unsigned long	blocknr;
+	struct kiobuf *	iobuf = NULL;
+	unsigned long	page;
+	unsigned long	bounce;
+	struct page *	map;
+	struct buffer_head *tmp, *bh[KIO_MAX_SECTORS];
+
+	/* 
+	 * First, do some alignment and validity checks 
+	 */
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+		if ((iobuf->offset & (size-1)) ||
+		    (iobuf->length & (size-1)))
+			return -EINVAL;
+		if (!iobuf->locked)
+			panic("brw_kiovec: iobuf not locked for I/O");
+		if (!iobuf->nr_pages)
+			panic("brw_kiovec: iobuf not initialised");
+	}
+
+	/* DEBUG */
+#if 0
+	return iobuf->length;
+#endif
+	dprintk ("brw_kiovec: start\n");
+	
+	/* 
+	 * OK to walk down the iovec doing page IO on each page we find. 
+	 */
+	bufind = bhind = transferred = err = 0;
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+		err = setup_kiobuf_bounce_pages(iobuf, GFP_USER);
+		if (err) 
+			goto finished;
+		if (rw & WRITE)
+			kiobuf_copy_bounce(iobuf, COPY_TO_BOUNCE, -1);
+		
+		offset = iobuf->offset;
+		length = iobuf->length;
+		dprintk ("iobuf %d %d %d\n", offset, length, size);
+
+		for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
+			map    = iobuf->maplist[pageind];
+			bounce = iobuf->bouncelist[pageind];
+
+			if (bounce)
+				page = bounce;
+			else
+				page = iobuf->pagelist[pageind];
+
+			while (length > 0) {
+				blocknr = b[bufind++];
+				tmp = get_unused_buffer_head(0);
+				if (!tmp) {
+					err = -ENOMEM;
+					goto error;
+				}
+				
+				tmp->b_dev = B_FREE;
+				tmp->b_size = size;
+				tmp->b_data = (char *) (page + offset);
+				tmp->b_this_page = tmp;
+
+				init_buffer(tmp, dev, blocknr,
+					    end_buffer_io_sync, NULL);
+				if (rw == WRITE) {
+					set_bit(BH_Uptodate, &tmp->b_state);
+					set_bit(BH_Dirty, &tmp->b_state);
+				}
+
+				dprintk ("buffer %d (%d) at %p\n", 
+					 bhind, tmp->b_blocknr, tmp->b_data);
+				bh[bhind++] = tmp;
+				length -= size;
+				offset += size;
+
+				/* 
+				 * Start the IO if we have got too much or if
+				 * this is the end of the last iobuf 
+				 */
+				if (bhind >= KIO_MAX_SECTORS) {
+					err = do_kio(rw, bhind, bh, size);
+					if (err >= 0)
+						transferred += err;
+					else
+						goto finished;
+					bhind = 0;
+				}
+				
+				if (offset >= PAGE_SIZE) {
+					offset = 0;
+					break;
+				}
+			} /* End of block loop */
+		} /* End of page loop */		
+	} /* End of iovec loop */
+
+	/* Is there any IO still left to submit? */
+	if (bhind) {
+		err = do_kio(rw, bhind, bh, size);
+		if (err >= 0)
+			transferred += err;
+		else
+			goto finished;
+	}
+
+ finished:
+	dprintk ("brw_kiovec: end (%d, %d)\n", transferred, err);
+
+	cleanup_bounce_buffers(rw, nr, iovec, transferred);
+	
+	if (transferred)
+		return transferred;
+	return err;
+
+ error:
+	/* We got an error allocation the bh'es.  Just free the current
+           buffer_heads and exit. */
+	for (i = bhind; --i >= 0; ) {
+		free_async_buffers(bh[bhind]);
+	}
+
+	clear_kiobuf_bounce_pages(iobuf);
+
+	goto finished;
+}
+
 /*
  * Start I/O on a page.
  * This function expects the page to be locked and may return before I/O is complete.
@@ -1472,7 +1740,8 @@
 		if (!buffer_busy(p))
 			continue;
 
-		wakeup_bdflush(0);
+		if (buffer_dirty(bh))
+			wakeup_bdflush(0);
 		return 0;
 	} while (tmp != bh);
 
@@ -1503,7 +1772,7 @@
 	int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
 	int protected = 0;
 	int nlist;
-	static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","DIRTY"};
+	static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","DIRTY","PROTECTED",};
 
 	printk("Buffer memory:   %8ldkB\n",buffermem>>10);
 	printk("Buffer heads:    %6d\n",nr_buffer_heads);
@@ -1527,7 +1796,7 @@
 			used++, lastused = found;
 		bh = bh->b_next_free;
 	  } while (bh != lru_list[nlist]);
-	  printk("%8s: %d buffers, %d used (last=%d), "
+	  printk("%9s: %d buffers, %d used (last=%d), "
 		 "%d locked, %d protected, %d dirty\n",
 		 buf_types[nlist], found, used, lastused,
 		 locked, protected, dirty);
@@ -1699,7 +1968,6 @@
 	if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
 	printk("Wrote %d/%d buffers\n", nwritten, ndirty);
 #endif
-	run_task_queue(&tq_disk);
 	return 0;
 }
 
@@ -1872,7 +2140,8 @@
 		
 		/* If there are still a lot of dirty buffers around, skip the sleep
 		   and flush some more */
-		if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
+		if (!ndirty || balance_dirty_state(NODEV) < 0)
+		{
 			spin_lock_irq(&current->sigmask_lock);
 			flush_signals(current);
 			spin_unlock_irq(&current->sigmask_lock);
@@ -1896,13 +2165,18 @@
 	tsk->session = 1;
 	tsk->pgrp = 1;
 	strcpy(tsk->comm, "kupdate");
+
+	/* sigstop and sigcont will stop and wakeup kupdate */
+	spin_lock_irq(&tsk->sigmask_lock);
 	sigfillset(&tsk->blocked);
-	/* sigcont will wakeup kupdate after setting interval to 0 */
 	sigdelset(&tsk->blocked, SIGCONT);
+	sigdelset(&tsk->blocked, SIGSTOP);
+	spin_unlock_irq(&tsk->sigmask_lock);
 
 	lock_kernel();
 
 	for (;;) {
+		/* update interval */
 		interval = bdf_prm.b_un.interval;
 		if (interval)
 		{
@@ -1911,8 +2185,24 @@
 		}
 		else
 		{
+		stop_kupdate:
 			tsk->state = TASK_STOPPED;
 			schedule(); /* wait for SIGCONT */
+		}
+		/* check for sigstop */
+		if (signal_pending(tsk))
+		{
+			int stopped = 0;
+			spin_lock_irq(&tsk->sigmask_lock);
+			if (sigismember(&tsk->signal, SIGSTOP))
+			{
+				sigdelset(&tsk->signal, SIGSTOP);
+				stopped = 1;
+			}
+			recalc_sigpending(tsk);
+			spin_unlock_irq(&tsk->sigmask_lock);
+			if (stopped)
+				goto stop_kupdate;
 		}
 #ifdef DEBUG
 		printk("kupdate() activated...\n");
diff -urN 2.2.14/fs/dcache.c 2.2.14aa6/fs/dcache.c
--- 2.2.14/fs/dcache.c	Wed Jan  5 14:16:55 2000
+++ 2.2.14aa6/fs/dcache.c	Wed Feb  2 02:31:42 2000
@@ -476,7 +476,7 @@
 {
 	if (gfp_mask & __GFP_IO) {
 		int count = 0;
-		if (priority)
+		if (priority > 1)
 			count = dentry_stat.nr_unused / priority;
 		prune_dcache(count, -1);
 	}
diff -urN 2.2.14/fs/dquot.c 2.2.14aa6/fs/dquot.c
--- 2.2.14/fs/dquot.c	Fri Jan  7 18:19:17 2000
+++ 2.2.14aa6/fs/dquot.c	Wed Feb  2 02:31:42 2000
@@ -539,7 +539,6 @@
 struct dquot *get_empty_dquot(void)
 {
 	struct dquot *dquot;
-	int count;
 
 repeat:
 	dquot = find_best_free();
@@ -569,10 +568,10 @@
 	/*
 	 * Try pruning the dcache to free up some dquots ...
 	 */
-	printk(KERN_DEBUG "get_empty_dquot: pruning %d\n", count);
+	printk(KERN_DEBUG "get_empty_dquot: pruning\n");
 	if (prune_dcache(0, 128))
 	{
-		free_inode_memory(count);
+		free_inode_memory();
 		goto repeat;
 	}
 
diff -urN 2.2.14/fs/inode.c 2.2.14aa6/fs/inode.c
--- 2.2.14/fs/inode.c	Fri Jan  7 18:19:18 2000
+++ 2.2.14aa6/fs/inode.c	Wed Feb  2 02:31:42 2000
@@ -435,7 +435,7 @@
  * This is the externally visible routine for
  * inode memory management.
  */
-void free_inode_memory(int goal)
+void free_inode_memory(void)
 {
 	spin_lock(&inode_lock);
 	free_inodes();
diff -urN 2.2.14/fs/iobuf.c 2.2.14aa6/fs/iobuf.c
--- 2.2.14/fs/iobuf.c	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/fs/iobuf.c	Wed Feb  2 02:31:43 2000
@@ -0,0 +1,236 @@
+/*
+ * iobuf.c
+ *
+ * Keep track of the general-purpose IO-buffer structures used to track
+ * abstract kernel-space io buffers.
+ * 
+ */
+
+#include <linux/iobuf.h>
+#include <linux/malloc.h>
+#include <linux/slab.h>
+#include <linux/bigmem.h>
+
+static kmem_cache_t *kiobuf_cachep;
+
+void __init kiobuf_init(void)
+{
+	kiobuf_cachep =  kmem_cache_create("kiobuf",
+					   sizeof(struct kiobuf),
+					   0,
+					   SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if(!kiobuf_cachep)
+		panic("Cannot create kernel iobuf cache\n");
+}
+
+
+int alloc_kiovec(int nr, struct kiobuf **bufp)
+{
+	int i;
+	struct kiobuf *iobuf;
+	
+	for (i = 0; i < nr; i++) {
+		iobuf = kmem_cache_alloc(kiobuf_cachep, SLAB_KERNEL);
+		if (!iobuf) {
+			free_kiovec(i, bufp);
+			return -ENOMEM;
+		}
+		
+		memset(iobuf, 0, sizeof(*iobuf));
+		iobuf->array_len  = KIO_STATIC_PAGES;
+		iobuf->pagelist   = iobuf->page_array;
+		iobuf->maplist    = iobuf->map_array;
+		iobuf->bouncelist = iobuf->bounce_array;
+		*bufp++ = iobuf;
+	}
+	
+	return 0;
+}
+
+void clear_kiobuf_bounce_pages(struct kiobuf *iobuf)
+{
+	int i;
+	
+	if (!iobuf->bounced)
+		return;
+	
+	for (i = 0; i < iobuf->nr_pages; i++) {
+		unsigned long page = iobuf->bouncelist[i];
+		if (page)
+			free_page(page);
+	}
+	iobuf->bounced = 0;
+}
+
+void free_kiovec(int nr, struct kiobuf **bufp) 
+{
+	struct kiobuf *iobuf;
+	int i;
+	
+	for (i = 0; i < nr; i++) {
+		iobuf = bufp[i];
+		clear_kiobuf_bounce_pages(iobuf);
+		if (iobuf->array_len > KIO_STATIC_PAGES) {
+			kfree (iobuf->pagelist);
+		}
+		kmem_cache_free(kiobuf_cachep, bufp[i]);
+	}
+}
+
+int expand_kiobuf(struct kiobuf *iobuf, int wanted)
+{
+	unsigned long *	pagelist, * bouncelist;
+	struct page ** maplist;
+	
+	if (iobuf->array_len >= wanted)
+		return 0;
+
+	/*
+	 * kmalloc enough space for the page, map and bounce lists all
+	 * at once. 
+	 */
+	pagelist = (unsigned long *) 
+		kmalloc(3 * wanted * sizeof(unsigned long), GFP_KERNEL);
+	if (!pagelist)
+		return -ENOMEM;
+
+	/* Did it grow while we waited? */
+	if (iobuf->array_len >= wanted) {
+		kfree(pagelist);
+		return 0;
+	}
+	
+	maplist    = (struct page **) (pagelist + wanted);
+	bouncelist = pagelist + 2 * wanted;
+
+	memcpy (pagelist, iobuf->pagelist,
+		iobuf->array_len * sizeof(unsigned long));
+	memcpy (maplist, iobuf->maplist,
+		iobuf->array_len * sizeof(struct page **));
+	memcpy (bouncelist, iobuf->bouncelist,
+		iobuf->array_len * sizeof(unsigned long));
+
+	if (iobuf->array_len > KIO_STATIC_PAGES)
+		kfree (iobuf->pagelist);
+	
+	iobuf->pagelist   = pagelist;
+	iobuf->maplist    = maplist;
+	iobuf->bouncelist = bouncelist;
+	iobuf->array_len  = wanted;
+	return 0;
+}
+
+
+/*
+ * Test whether a given page from the bounce buffer matches the given
+ * gfp_mask.  Return true if a bounce buffer is required for this
+ * page. 
+ */
+
+static inline int test_bounce_page(unsigned long page, 
+				   struct page * map,
+				   int gfp_mask)
+{
+	/* Unmapped pages from PCI memory or BIGMEM pages always need a
+	 * bounce buffer unless the caller is prepared to accept
+	 * GFP_BIGMEM pages. */
+	
+	if (!map || PageBIGMEM(map) )
+		/* Careful, the following must return the right value
+		 * even if CONFIG_BIGMEM is not set */
+		return !(gfp_mask & __GFP_BIGMEM);
+	
+	/* A DMA-able page never needs a bounce buffer */
+	if (PageDMA(map))
+		return 0;
+	
+	/* Otherwise it is a non-ISA-DMA-capable page and needs bounce
+	 * buffers if GFP_DMA is requested */
+	return gfp_mask & __GFP_DMA;
+}
+
+int setup_kiobuf_bounce_pages(struct kiobuf *iobuf, int gfp_mask)
+{
+	int i;
+	
+	clear_kiobuf_bounce_pages(iobuf);
+	
+	for (i = 0; i < iobuf->nr_pages; i++) {
+		struct page *map = iobuf->maplist[i];
+		unsigned long page = iobuf->pagelist[i];
+		unsigned long bounce_page;
+		
+		if (!test_bounce_page(page, map, gfp_mask)) {
+			iobuf->bouncelist[i] = 0;
+			continue;
+		}
+		
+		bounce_page = __get_free_page(gfp_mask);
+		if (!bounce_page)
+			goto error;
+
+		iobuf->bouncelist[i] = bounce_page;
+		iobuf->bounced = 1;
+	}
+	return 0;
+	
+ error:
+	clear_kiobuf_bounce_pages(iobuf);
+	return -ENOMEM;
+}
+
+/*
+ * Copy a bounce buffer.  For completion of partially-failed read IOs,
+ * we need to be able to place an upper limit on the data successfully
+ * transferred from bounce buffers to the user's own buffers.  
+ */
+
+void kiobuf_copy_bounce(struct kiobuf *iobuf, int direction, int max)
+{
+	int i;
+	int offset, length;
+	
+	if (!iobuf->bounced)
+		return;
+	
+	offset = iobuf->offset;
+	length = iobuf->length;
+	if (max >= 0 && length > max)
+		length = max;
+	
+	i = 0;
+
+	if (offset > PAGE_SIZE) {
+		i = (offset >> PAGE_SHIFT);
+		offset &= ~PAGE_MASK;
+	}
+	
+	for (; i < iobuf->nr_pages && length > 0; i++) {
+		unsigned long page = iobuf->pagelist[i];
+		unsigned long bounce_page = iobuf->bouncelist[i];
+		unsigned long kin, kout;
+		int pagelen = length;
+		
+		if (bounce_page) {
+			if (pagelen > PAGE_SIZE)
+				pagelen = PAGE_SIZE;
+		
+			if (direction == COPY_TO_BOUNCE) {
+				kin  = kmap(page, KM_READ);
+				kout = kmap(bounce_page, KM_WRITE);
+			} else {
+				kin  = kmap(bounce_page, KM_READ);
+				kout = kmap(page, KM_WRITE);
+			}
+			
+			memcpy((char *) (kout+offset), 
+			       (char *) (kin+offset),
+			       pagelen);
+			kunmap(kout, KM_WRITE);
+			kunmap(kin, KM_READ);
+		}
+		
+		length -= pagelen;
+		offset = 0;
+	}
+}
diff -urN 2.2.14/fs/proc/array.c 2.2.14aa6/fs/proc/array.c
--- 2.2.14/fs/proc/array.c	Wed Jan  5 14:16:55 2000
+++ 2.2.14aa6/fs/proc/array.c	Wed Feb  2 02:31:42 2000
@@ -42,6 +42,8 @@
  * Alan Cox	     :  security fixes.
  *			<Alan.Cox@linux.org>
  *
+ * Gerhard Wichert   :  added BIGMEM support
+ * Siemens AG           <Gerhard.Wichert@pdb.siemens.de>
  */
 
 #include <linux/types.h>
@@ -374,6 +376,8 @@
 		"MemShared: %8lu kB\n"
 		"Buffers:   %8lu kB\n"
 		"Cached:    %8lu kB\n"
+		"BigTotal:  %8lu kB\n"
+		"BigFree:   %8lu kB\n"
 		"SwapTotal: %8lu kB\n"
 		"SwapFree:  %8lu kB\n",
 		i.totalram >> 10,
@@ -381,6 +385,8 @@
 		i.sharedram >> 10,
 		i.bufferram >> 10,
 		page_cache_size << (PAGE_SHIFT - 10),
+		i.totalbig >> 10,
+		i.freebig >> 10,
 		i.totalswap >> 10,
 		i.freeswap >> 10);
 }
@@ -436,6 +442,8 @@
 	return pte_page(pte) + (ptr & ~PAGE_MASK);
 }
 
+#include <linux/bigmem.h>
+
 static int get_array(struct task_struct *p, unsigned long start, unsigned long end, char * buffer)
 {
 	unsigned long addr;
@@ -448,6 +456,7 @@
 		addr = get_phys_addr(p, start);
 		if (!addr)
 			return result;
+		addr = kmap(addr, KM_READ);
 		do {
 			c = *(char *) addr;
 			if (!c)
@@ -455,12 +464,19 @@
 			if (size < PAGE_SIZE)
 				buffer[size++] = c;
 			else
+			{
+				kunmap(addr, KM_READ);
 				return result;
+			}
 			addr++;
 			start++;
 			if (!c && start >= end)
+			{
+				kunmap(addr, KM_READ);
 				return result;
+			}
 		} while (addr & ~PAGE_MASK);
+		kunmap(addr-1, KM_READ);
 	}
 	return result;
 }
diff -urN 2.2.14/fs/proc/fd.c 2.2.14aa6/fs/proc/fd.c
--- 2.2.14/fs/proc/fd.c	Sun Oct 31 23:31:32 1999
+++ 2.2.14aa6/fs/proc/fd.c	Wed Feb  2 02:31:42 2000
@@ -87,7 +87,6 @@
 	fd = 0;
 	len = dentry->d_name.len;
 	name = dentry->d_name.name;
-	if (len > 1 && *name == '0') goto out;
 	while (len-- > 0) {
 		c = *name - '0';
 		name++;
diff -urN 2.2.14/fs/proc/mem.c 2.2.14aa6/fs/proc/mem.c
--- 2.2.14/fs/proc/mem.c	Wed Jan  5 14:16:55 2000
+++ 2.2.14aa6/fs/proc/mem.c	Wed Feb  2 02:31:42 2000
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/proc_fs.h>
+#include <linux/bigmem.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -120,7 +121,9 @@
 		i = PAGE_SIZE-(addr & ~PAGE_MASK);
 		if (i > scount)
 			i = scount;
+		page = (char *) kmap((unsigned long) page, KM_READ);
 		copy_to_user(tmp, page, i);
+		kunmap((unsigned long) page, KM_READ);
 		addr += i;
 		tmp += i;
 		scount -= i;
@@ -177,7 +180,9 @@
 		i = PAGE_SIZE-(addr & ~PAGE_MASK);
 		if (i > count)
 			i = count;
+		page = (unsigned long) kmap((unsigned long) page, KM_WRITE);
 		copy_from_user(page, tmp, i);
+		kunmap((unsigned long) page, KM_WRITE);
 		addr += i;
 		tmp += i;
 		count -= i;
diff -urN 2.2.14/fs/proc/root.c 2.2.14aa6/fs/proc/root.c
--- 2.2.14/fs/proc/root.c	Sun Oct 31 23:31:32 1999
+++ 2.2.14aa6/fs/proc/root.c	Wed Feb  2 02:31:42 2000
@@ -845,7 +845,6 @@
 		}
 		pid *= 10;
 		pid += c;
-		if (!pid) break;
 		if (pid & 0xffff0000) {
 			pid = 0;
 			break;
diff -urN 2.2.14/include/asm-alpha/bigmem.h 2.2.14aa6/include/asm-alpha/bigmem.h
--- 2.2.14/include/asm-alpha/bigmem.h	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/include/asm-alpha/bigmem.h	Wed Feb  2 02:31:42 2000
@@ -0,0 +1,27 @@
+/*
+ * linux/include/asm-alpha/bigmem.h
+ *
+ * On alpha we can address all the VM with a flat mapping. We need
+ * to differentiate BIGMEM memory only because the default PCI DMA window
+ * is currently limited to 2g. Thus kmap/kunmap are noops here.
+ *
+ * With bigmem support the alpha now is capable of allocating up to
+ * 2048Giga of memory.
+ *
+ * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de>, SuSE GmbH
+ */
+
+#ifndef _ASM_BIGMEM_H
+#define _ASM_BIGMEM_H
+
+#include <linux/init.h>
+
+#undef BIGMEM_DEBUG /* undef for production */
+
+/* declarations for bigmem.c */
+extern unsigned long bigmem_start, bigmem_end;
+
+#define kmap(kaddr, type) kaddr
+#define kunmap(vaddr, type) do { } while (0)
+
+#endif /* _ASM_BIGMEM_H */
diff -urN 2.2.14/include/asm-alpha/pgtable.h 2.2.14aa6/include/asm-alpha/pgtable.h
--- 2.2.14/include/asm-alpha/pgtable.h	Fri Jan 21 03:31:05 2000
+++ 2.2.14aa6/include/asm-alpha/pgtable.h	Wed Feb  2 02:33:49 2000
@@ -17,13 +17,27 @@
 #include <asm/spinlock.h>	/* For the task lock */
 
 
-/* Caches aren't brain-dead on the Alpha. */
-#define flush_cache_all()			do { } while (0)
+/* The icache is not coherent with the dcache on alpha, thus before
+   running self modified code we must always run an imb().
+   Actually flush_cache_all() is real overkill as it's recalled from
+   vmalloc() before accessing pagetables and on the Alpha we are not required
+   to flush the icache before doing that, but the semantic of flush_cache_all()
+   requires us to flush _all_ the caches and so we must be correct here. It's
+   instead vmalloc that should be changed to use a more finegrined cache
+   flush operation (I suspect that also other archs doesn't need an icache
+   flush while handling pagetables). OTOH vmalloc is not a performance critical
+   path so after all we can live with it for now. */
+#define flush_cache_all()			flush_icache_range(0, 0)
 #define flush_cache_mm(mm)			do { } while (0)
 #define flush_cache_range(mm, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr)		do { } while (0)
 #define flush_page_to_ram(page)			do { } while (0)
-#define flush_icache_range(start, end)		do { } while (0)
+#ifndef __SMP__
+#define flush_icache_range(start, end)		imb()
+#else
+#define flush_icache_range(start, end)		smp_imb()
+extern void smp_imb(void);
+#endif
 
 /*
  * Use a few helper functions to hide the ugly broken ASN
diff -urN 2.2.14/include/asm-i386/bigmem.h 2.2.14aa6/include/asm-i386/bigmem.h
--- 2.2.14/include/asm-i386/bigmem.h	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/include/asm-i386/bigmem.h	Wed Feb  2 23:41:47 2000
@@ -0,0 +1,69 @@
+/*
+ * bigmem.h:	virtual kernel memory mappings for big memory
+ *
+ * Used in CONFIG_BIGMEM systems for memory pages which	are not
+ * addressable by direct kernel virtual adresses.
+ *
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ *		      Gerhard.Wichert@pdb.siemens.de
+ */
+
+#ifndef _ASM_BIGMEM_H
+#define _ASM_BIGMEM_H
+
+#include <linux/init.h>
+
+#undef BIGMEM_DEBUG /* undef for production */
+
+/* declarations for bigmem.c */
+extern unsigned long bigmem_start, bigmem_end;
+extern int nr_free_bigpages;
+
+extern pte_t *kmap_pte;
+extern pgprot_t kmap_prot;
+
+extern void kmap_init(void) __init;
+
+/* kmap helper functions necessary to access the bigmem pages in kernel */
+#include <asm/pgtable.h>
+#include <asm/kmap_types.h>
+
+extern inline unsigned long kmap(unsigned long kaddr, enum km_type type)
+{
+	if (__pa(kaddr) < bigmem_start)
+		return kaddr;
+	{
+		enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id();
+		unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN+idx);
+
+#ifdef BIGMEM_DEBUG
+		if (!pte_none(*(kmap_pte-idx)))
+		{
+			__label__ here;
+		here:
+			printk(KERN_ERR "not null pte on CPU %d from %p\n",
+			       smp_processor_id(), &&here);
+		}
+#endif
+		set_pte(kmap_pte-idx, mk_pte(kaddr & PAGE_MASK, kmap_prot));
+		__flush_tlb_one(vaddr);
+
+		return vaddr | (kaddr & ~PAGE_MASK);
+	}
+}
+
+extern inline void kunmap(unsigned long vaddr, enum km_type type)
+{
+#ifdef BIGMEM_DEBUG
+	enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id();
+	if ((vaddr & PAGE_MASK) == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+	{
+		/* force other mappings to Oops if they'll try to access
+		   this pte without first remap it */
+		pte_clear(kmap_pte-idx);
+		__flush_tlb_one(vaddr);
+	}
+#endif
+}
+
+#endif /* _ASM_BIGMEM_H */
diff -urN 2.2.14/include/asm-i386/fixmap.h 2.2.14aa6/include/asm-i386/fixmap.h
--- 2.2.14/include/asm-i386/fixmap.h	Wed Jan  5 14:16:55 2000
+++ 2.2.14aa6/include/asm-i386/fixmap.h	Wed Feb  2 23:37:00 2000
@@ -6,6 +6,8 @@
  * for more details.
  *
  * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
 #ifndef _ASM_FIXMAP_H
@@ -14,6 +16,10 @@
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <asm/page.h>
+#ifdef CONFIG_BIGMEM
+#include <linux/tasks.h>
+#include <asm/kmap_types.h>
+#endif
 
 /*
  * Here we define all the compile-time 'special' virtual
@@ -55,6 +61,10 @@
 	FIX_CO_APIC,	/* Cobalt APIC Redirection Table */ 
 	FIX_LI_PCIA,	/* Lithium PCI Bridge A */
 	FIX_LI_PCIB,	/* Lithium PCI Bridge B */
+#endif
+#ifdef CONFIG_BIGMEM
+	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
+	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
 #endif
 	__end_of_fixed_addresses
 };
diff -urN 2.2.14/include/asm-i386/io.h 2.2.14aa6/include/asm-i386/io.h
--- 2.2.14/include/asm-i386/io.h	Tue Dec 28 16:10:32 1999
+++ 2.2.14aa6/include/asm-i386/io.h	Wed Feb  2 23:37:00 2000
@@ -27,6 +27,7 @@
 
  /*
   *  Bit simplified and optimized by Jan Hubicka
+  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
   */
 
 #ifdef SLOW_IO_BY_JUMPING
@@ -109,12 +110,20 @@
  */
 extern inline unsigned long virt_to_phys(volatile void * address)
 {
+#ifdef CONFIG_BIGMEM
+	return __pa(address);
+#else
 	return __io_phys(address);
+#endif
 }
 
 extern inline void * phys_to_virt(unsigned long address)
 {
+#ifdef CONFIG_BIGMEM
+	return __va(address);
+#else
 	return __io_virt(address);
+#endif
 }
 
 extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
diff -urN 2.2.14/include/asm-i386/kmap_types.h 2.2.14aa6/include/asm-i386/kmap_types.h
--- 2.2.14/include/asm-i386/kmap_types.h	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/include/asm-i386/kmap_types.h	Wed Feb  2 02:31:42 2000
@@ -0,0 +1,10 @@
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+enum km_type {
+	KM_READ,
+	KM_WRITE,
+	KM_TYPE_NR,
+};
+
+#endif
diff -urN 2.2.14/include/asm-i386/page.h 2.2.14aa6/include/asm-i386/page.h
--- 2.2.14/include/asm-i386/page.h	Tue Dec 28 16:10:28 1999
+++ 2.2.14aa6/include/asm-i386/page.h	Wed Feb  2 23:37:00 2000
@@ -88,6 +88,7 @@
 #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define MAP_NR(addr)		(__pa(addr) >> PAGE_SHIFT)
+#define PHYSMAP_NR(addr)	((unsigned long)(addr) >> PAGE_SHIFT)
 
 #endif /* __KERNEL__ */
 
diff -urN 2.2.14/include/linux/bigmem.h 2.2.14aa6/include/linux/bigmem.h
--- 2.2.14/include/linux/bigmem.h	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/include/linux/bigmem.h	Wed Feb  2 23:41:47 2000
@@ -0,0 +1,50 @@
+#ifndef _LINUX_BIGMEM_H
+#define _LINUX_BIGMEM_H
+
+#include <linux/config.h>
+
+#ifdef CONFIG_BIGMEM
+
+#include <asm/bigmem.h>
+
+/* declarations for linux/mm/bigmem.c */
+extern unsigned long bigmem_mapnr;
+extern int nr_free_bigpages;
+
+extern struct page * prepare_bigmem_swapout(struct page *);
+extern struct page * replace_with_bigmem(struct page *);
+extern unsigned long prepare_bigmem_shm_swapin(unsigned long);
+
+#else /* CONFIG_BIGMEM */
+
+#define prepare_bigmem_swapout(page) page
+#define replace_with_bigmem(page) page
+#define prepare_bigmem_shm_swapin(page) page
+#define kmap(kaddr, type) kaddr
+#define kunmap(vaddr, type) do { } while (0)
+#define nr_free_bigpages 0
+
+#endif /* CONFIG_BIGMEM */
+
+/* when CONFIG_BIGMEM is not set these will be plain clear/copy_page */
+extern inline void clear_bigpage(unsigned long kaddr)
+{
+	unsigned long vaddr;
+
+	vaddr = kmap(kaddr, KM_WRITE);
+	clear_page(vaddr);
+	kunmap(vaddr, KM_WRITE);
+}
+
+extern inline void copy_bigpage(unsigned long to, unsigned long from)
+{
+	unsigned long vfrom, vto;
+
+	vfrom = kmap(from, KM_READ);
+	vto = kmap(to, KM_WRITE);
+	copy_page(vto, vfrom);
+	kunmap(vfrom, KM_READ);
+	kunmap(vto, KM_WRITE);
+}
+
+#endif /* _LINUX_BIGMEM_H */
diff -urN 2.2.14/include/linux/blkdev.h 2.2.14aa6/include/linux/blkdev.h
--- 2.2.14/include/linux/blkdev.h	Sat Jan 22 15:37:27 2000
+++ 2.2.14aa6/include/linux/blkdev.h	Wed Feb  2 23:37:00 2000
@@ -32,10 +32,16 @@
 	struct buffer_head * bh;
 	struct buffer_head * bhtail;
 	struct request * next;
+	int elevator_latency;
 };
 
 typedef void (request_fn_proc) (void);
 typedef struct request ** (queue_proc) (kdev_t dev);
+typedef struct elevator_s
+{
+	int read_latency;
+	int write_latency;
+} elevator_t;
 
 struct blk_dev_struct {
 	request_fn_proc		*request_fn;
@@ -47,6 +53,7 @@
 	struct request		*current_request;
 	struct request   plug;
 	struct tq_struct plug_tq;
+	elevator_t elevator;
 };
 
 struct sec_size {
@@ -90,5 +97,8 @@
 #define MAX_READAHEAD PageAlignSize(4096*31)
 #define MIN_READAHEAD PageAlignSize(4096*3)
 #endif
+
+#define ELEVATOR_READ_LATENCY (NR_REQUEST>>2)
+#define ELEVATOR_WRITE_LATENCY (NR_REQUEST<<2)
 
 #endif
diff -urN 2.2.14/include/linux/dcache.h 2.2.14aa6/include/linux/dcache.h
--- 2.2.14/include/linux/dcache.h	Fri Jan  7 18:19:21 2000
+++ 2.2.14aa6/include/linux/dcache.h	Wed Feb  2 02:31:42 2000
@@ -143,7 +143,7 @@
 /* dcache memory management */
 extern void shrink_dcache_memory(int, unsigned int);
 extern void check_dcache_memory(void);
-extern void free_inode_memory(int);	/* defined in fs/inode.c */
+extern void free_inode_memory(void);	/* defined in fs/inode.c */
 
 /* only used at mount-time */
 extern struct dentry * d_alloc_root(struct inode * root_inode, struct dentry * old_root);
diff -urN 2.2.14/include/linux/fs.h 2.2.14aa6/include/linux/fs.h
--- 2.2.14/include/linux/fs.h	Fri Jan 21 03:31:05 2000
+++ 2.2.14aa6/include/linux/fs.h	Wed Feb  2 23:37:00 2000
@@ -759,9 +759,18 @@
 #define BUF_CLEAN	0
 #define BUF_LOCKED	1	/* Buffers scheduled for write */
 #define BUF_DIRTY	2	/* Dirty buffers, not yet scheduled for write */
-#define NR_LIST		3
+#define BUF_PROTECTED	3	/* Ramdisk persistent storage */
+#define NR_LIST		4
 
 void mark_buffer_uptodate(struct buffer_head * bh, int on);
+
+extern inline void mark_buffer_protected(struct buffer_head * bh)
+{
+	if (!test_and_set_bit(BH_Protected, &bh->b_state)) {
+		if (bh->b_list != BUF_PROTECTED)
+			refile_buffer(bh);
+	}
+}
 
 extern inline void mark_buffer_clean(struct buffer_head * bh)
 {
diff -urN 2.2.14/include/linux/iobuf.h 2.2.14aa6/include/linux/iobuf.h
--- 2.2.14/include/linux/iobuf.h	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/include/linux/iobuf.h	Wed Feb  2 23:37:00 2000
@@ -0,0 +1,82 @@
+/*
+ * iobuf.h
+ *
+ * Defines the structures used to track abstract kernel-space io buffers.
+ *
+ */
+
+#ifndef __LINUX_IOBUF_H
+#define __LINUX_IOBUF_H
+
+#include <linux/mm.h>
+#include <linux/init.h>
+
+/*
+ * The kiobuf structure describes a physical set of pages reserved
+ * locked for IO.  The reference counts on each page will have been
+ * incremented, and the flags field will indicate whether or not we have
+ * pre-locked all of the pages for IO.
+ *
+ * kiobufs may be passed in arrays to form a kiovec, but we must
+ * preserve the property that no page is present more than once over the
+ * entire iovec.
+ */
+
+#define KIO_MAX_ATOMIC_IO	64 /* in kb */
+#define KIO_MAX_ATOMIC_BYTES	(64 * 1024)
+#define KIO_STATIC_PAGES	(KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10))
+#define KIO_MAX_SECTORS		(KIO_MAX_ATOMIC_IO * 2)
+
+struct kiobuf 
+{
+	int		nr_pages;	/* Pages actually referenced */
+	int		array_len;	/* Space in the allocated lists */
+	int		offset;		/* Offset to start of valid data */
+	int		length;		/* Number of valid bytes of data */
+
+	/* Keep separate track of the physical addresses and page
+	 * structs involved.  If we do IO to a memory-mapped device
+	 * region, there won't necessarily be page structs defined for
+	 * every address. */
+
+	unsigned long *	pagelist;
+	struct page **	maplist;
+	unsigned long *	bouncelist;
+
+	unsigned int	locked : 1;	/* If set, pages has been locked */
+	unsigned int	bounced : 1;	/* If set, bounce pages are set up */
+	
+	/* Always embed enough struct pages for 64k of IO */
+	unsigned long	page_array[KIO_STATIC_PAGES];
+	struct page *	map_array[KIO_STATIC_PAGES];
+	unsigned long	bounce_array[KIO_STATIC_PAGES];
+};
+
+
+/* mm/memory.c */
+
+int	map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len);
+void	unmap_kiobuf(struct kiobuf *iobuf);
+
+/* fs/iobuf.c */
+
+void __init kiobuf_init(void);
+int	alloc_kiovec(int nr, struct kiobuf **);
+void	free_kiovec(int nr, struct kiobuf **);
+int	expand_kiobuf(struct kiobuf *, int);
+int	setup_kiobuf_bounce_pages(struct kiobuf *, int gfp_mask);
+void	clear_kiobuf_bounce_pages(struct kiobuf *);
+void	kiobuf_copy_bounce(struct kiobuf *, int direction, int max);
+
+/* Direction codes for kiobuf_copy_bounce: */
+enum {
+	COPY_TO_BOUNCE,
+	COPY_FROM_BOUNCE
+};
+
+/* fs/buffer.c */
+
+int	brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
+		   kdev_t dev, unsigned long b[], int size);
+
+#endif /* __LINUX_IOBUF_H */
diff -urN 2.2.14/include/linux/kernel.h 2.2.14aa6/include/linux/kernel.h
--- 2.2.14/include/linux/kernel.h	Wed Jan 19 05:56:22 2000
+++ 2.2.14aa6/include/linux/kernel.h	Wed Feb  2 02:31:42 2000
@@ -90,7 +90,9 @@
 	unsigned long totalswap;	/* Total swap space size */
 	unsigned long freeswap;		/* swap space still available */
 	unsigned short procs;		/* Number of current processes */
-	char _f[22];			/* Pads structure to 64 bytes */
+	unsigned long totalbig;		/* Total big memory size */
+	unsigned long freebig;		/* Available big memory size */
+	char _f[20-2*sizeof(long)];	/* Padding: libc5 uses this.. */
 };
 
 #endif
diff -urN 2.2.14/include/linux/lvm.h 2.2.14aa6/include/linux/lvm.h
--- 2.2.14/include/linux/lvm.h	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/include/linux/lvm.h	Wed Feb  2 23:51:26 2000
@@ -0,0 +1,827 @@
+/*
+ * kernel/lvm.h
+ *
+ * Copyright (C) 1997 - 2000  Heinz Mauelshagen, Germany
+ *
+ * February-November 1997
+ * May-July 1998
+ * January-March,July,September,October,Dezember 1999
+ * January 2000
+ *
+ * lvm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * lvm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA. 
+ *
+ */
+
+/*
+ * Changelog
+ *
+ *    10/10/1997 - beginning of new structure creation
+ *    12/05/1998 - incorporated structures from lvm_v1.h and deleted lvm_v1.h
+ *    07/06/1998 - avoided LVM_KMALLOC_MAX define by using vmalloc/vfree
+ *                 instead of kmalloc/kfree
+ *    01/07/1998 - fixed wrong LVM_MAX_SIZE
+ *    07/07/1998 - extended pe_t structure by ios member (for statistic)
+ *    02/08/1998 - changes for official char/block major numbers
+ *    07/08/1998 - avoided init_module() and cleanup_module() to be static
+ *    29/08/1998 - seprated core and disk structure type definitions
+ *    01/09/1998 - merged kernel integration version (mike)
+ *    20/01/1999 - added LVM_PE_DISK_OFFSET macro for use in
+ *                 vg_read_with_pv_and_lv(), pv_move_pe(), pv_show_pe_text()...
+ *    18/02/1999 - added definition of time_disk_t structure for;
+ *                 keeps time stamps on disk for nonatomic writes (future)
+ *    15/03/1999 - corrected LV() and VG() macro definition to use argument
+ *                 instead of minor
+ *    03/07/1999 - define for genhd.c name handling
+ *    23/07/1999 - implemented snapshot part
+ *    08/12/1999 - changed LVM_LV_SIZE_MAX macro to reflect current 1TB limit
+ *    01/01/2000 - extended lv_v2 core structure by wait_queue member
+ *
+ */
+
+
+#ifndef _LVM_H_INCLUDE
+#define _LVM_H_INCLUDE
+
+#define	_LVM_H_VERSION	"LVM 0.8 (1/1/2000)"
+
+/*
+ * preprocessor definitions
+ */
+/* if you like emergency reset code in the driver */
+#define	LVM_TOTAL_RESET
+
+#define LVM_GET_INODE
+#define	LVM_HD_NAME
+
+/* lots of debugging output (see driver source)
+#define DEBUG_LVM_GET_INFO
+#define DEBUG
+#define DEBUG_MAP
+#define DEBUG_MAP_SIZE
+#define DEBUG_IOCTL
+#define DEBUG_READ
+#define DEBUG_GENDISK
+#define DEBUG_VG_CREATE
+#define DEBUG_LVM_BLK_OPEN
+#define DEBUG_VFREE
+#define DEBUG_SNAPSHOT
+*/
+/*
+ * end of preprocessor definitions
+ */
+
+#ifndef LINUX_VERSION_CODE
+#  include <linux/version.h>
+   /* for 2.0.x series */
+#  ifndef KERNEL_VERSION
+#    define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
+#  endif
+#endif
+
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/iobuf.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION ( 2, 3 ,0)
+#  include <linux/spinlock.h>
+#else
+#  include <asm/spinlock.h>
+#endif
+
+/* leave this for now until major.h is updated (mike) */
+#ifndef	LVM_BLK_MAJOR
+#  define	LVM_BLK_MAJOR	58
+#endif
+#ifndef	LVM_CHAR_MAJOR
+#  define	LVM_CHAR_MAJOR	109
+#endif
+
+#if !defined ( LVM_BLK_MAJOR) || !defined ( LVM_CHAR_MAJOR)
+   #error Bad include/linux/major.h - LVM MAJOR undefined
+#endif
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION ( 2, 1 ,0)
+#  ifndef	uint8_t
+#    define uint8_t __u8
+#  endif
+#  ifndef	uint16_t
+#    define uint16_t __u16
+#  endif
+#  ifndef	uint32_t
+#    define uint32_t __u32
+#  endif
+#  ifndef	uint64_t
+#    define uint64_t __u64
+#  endif
+#endif
+
+#define LVM_STRUCT_VERSION	1  /* structure version */
+
+#ifndef	min
+#define min(a,b) (((a)<(b))?(a):(b))
+#endif
+#ifndef	max
+#define max(a,b) (((a)>(b))?(a):(b))
+#endif
+
+/* set the default structure version */
+#if ( LVM_STRUCT_VERSION == 1)
+#  define pv_t pv_v1_t
+#  define lv_t lv_v2_t
+#  define vg_t vg_v1_t
+#  define pv_disk_t pv_disk_v1_t
+#  define lv_disk_t lv_disk_v1_t
+#  define vg_disk_t vg_disk_v1_t
+#  define lv_exception_t lv_v2_exception_t
+#endif
+
+
+/*
+ * i/o protocoll version
+ *
+ * defined here for the driver and defined seperate in the
+ * user land LVM parts
+ *
+ */
+#define	LVM_DRIVER_IOP_VERSION	        6
+
+#define LVM_NAME        "lvm"
+
+/*
+ * VG/LV indexing macros
+ */
+/* character minor maps directly to volume group */
+#define	VG_CHR(a) ( a)
+
+/* block minor indexes into a volume group/logical volume indirection table */
+#define	VG_BLK(a)	( vg_lv_map[a].vg_number)
+#define LV_BLK(a)	( vg_lv_map[a].lv_number)
+
+/*
+ * absolute limits for VGs, PVs per VG and LVs per VG
+ */
+#define ABS_MAX_VG	99
+#define ABS_MAX_PV	256
+#define ABS_MAX_LV	256 /* caused by 8 bit minor */
+
+#define MAX_VG  ABS_MAX_VG
+#define MAX_LV	ABS_MAX_LV
+#define	MAX_PV	ABS_MAX_PV
+
+#if ( MAX_VG > ABS_MAX_VG)
+#   undef MAX_VG
+#   define MAX_VG ABS_MAX_VG
+#endif
+
+#if ( MAX_LV > ABS_MAX_LV)
+#   undef MAX_LV
+#   define MAX_LV ABS_MAX_LV
+#endif
+
+
+/*
+ * VGDA: default disk spaces and offsets
+ *
+ *   there's space after the structures for later extensions.
+ *
+ *   offset            what                                size
+ *   ---------------   ----------------------------------  ------------
+ *   0                 physical volume structure           ~500 byte
+ *
+ *   1K                volume group structure              ~200 byte
+ *
+ *   5K                time stamp structure                ~
+ *
+ *   6K                namelist of physical volumes        128 byte each
+ *
+ *   6k + n * 128byte  n logical volume structures         ~300 byte each
+ *
+ *   + m * 328byte     m physical extent alloc. structs    4 byte each
+ *
+ *   End of disk -     first physical extent               typical 4 megabyte
+ *   PE total *
+ *   PE size
+ *
+ *
+ */
+
+/* DONT TOUCH THESE !!! */
+/* base of PV structure in disk partition */
+#define	LVM_PV_DISK_BASE  	0L
+
+/* size reserved for PV structure on disk */
+#define	LVM_PV_DISK_SIZE  	1024L
+
+/* base of VG structure in disk partition */
+#define	LVM_VG_DISK_BASE  	LVM_PV_DISK_SIZE
+
+/* size reserved for VG structure */
+#define	LVM_VG_DISK_SIZE  	( 9 * 512L)
+
+/* size reserved for timekeeping */
+#define	LVM_TIMESTAMP_DISK_BASE	( LVM_VG_DISK_BASE +  LVM_VG_DISK_SIZE)
+#define	LVM_TIMESTAMP_DISK_SIZE	512L	/* reserved for timekeeping */
+
+/* name list of physical volumes on disk */
+#define	LVM_PV_NAMELIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \
+                                    LVM_TIMESTAMP_DISK_SIZE)
+
+/* now for the dynamically calculated parts of the VGDA */
+#define	LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + sizeof ( lv_t) * b)
+#define	LVM_DISK_SIZE(pv) 	 ( (pv)->pe_on_disk.base + \
+                                   (pv)->pe_on_disk.size)
+#define	LVM_PE_DISK_OFFSET(pe, pv)	( pe * pv->pe_size + \
+					  ( LVM_DISK_SIZE ( pv) / SECTOR_SIZE))
+#define	LVM_PE_ON_DISK_BASE(pv) \
+   { int rest; \
+     pv->pe_on_disk.base = pv->lv_on_disk.base + pv->lv_on_disk.size; \
+     if ( ( rest = pv->pe_on_disk.base % SECTOR_SIZE) != 0) \
+        pv->pe_on_disk.base += ( SECTOR_SIZE - rest); \
+   }
+/* END default disk spaces and offsets for PVs */
+
+
+/*
+ * LVM_PE_T_MAX corresponds to:
+ *
+ * 8KB PE size can map a ~512 MB logical volume at the cost of 1MB memory,
+ *
+ * 128MB PE size can map a 8TB logical volume at the same cost of memory.
+ *
+ * Default PE size of 4 MB gives a maximum logical volume size of 256 GB.
+ *
+ * Maximum PE size of 16GB gives a maximum logical volume size of 1024 TB.
+ *
+ * AFAIK, the actual kernels limit this to 1 TB.
+ *
+ * Should be a sufficient spectrum ;*)
+ */
+
+/* This is the usable size of disk_pe_t.le_num !!!        v     v */
+#define	LVM_PE_T_MAX		( ( 1 << ( sizeof ( uint16_t) * 8)) - 2)
+
+#define	LVM_LV_SIZE_MAX(a)	( ( long long) LVM_PE_T_MAX * (a)->pe_size > ( long long) 2*1024*1024*1024 ? ( long long) 2*1024*1024*1024 : ( long long) LVM_PE_T_MAX * (a)->pe_size)
+#define	LVM_MIN_PE_SIZE		( 8L * 2)           /* 8 KB in sectors */
+#define	LVM_MAX_PE_SIZE		( 16L * 1024L * 1024L * 2) /* 16GB in sectors */
+#define	LVM_DEFAULT_PE_SIZE	( 4096L * 2)        /* 4 MB in sectors */
+#define	LVM_DEFAULT_STRIPE_SIZE	16L                 /* 16 KB  */
+#define	LVM_MIN_STRIPE_SIZE	2L                  /* 1 KB in sectors */
+#define	LVM_MAX_STRIPE_SIZE	( 512L * 2)         /* 512 KB in sectors */
+#define	LVM_MAX_STRIPES		128		    /* max # of stripes */
+#define	LVM_MAX_SIZE            ( 1024LU * 1024 * 1024 * 2) /* 1TB[sectors] */
+#define	LVM_MAX_MIRRORS    	2		/* future use */
+#define	LVM_MIN_READ_AHEAD	0	/* minimum read ahead sectors */
+#define	LVM_MAX_READ_AHEAD	256	/* maximum read ahead sectors */
+#define	LVM_DEF_READ_AHEAD	((LVM_MAX_READ_AHEAD-LVM_MIN_READ_AHEAD)/2 + LVM_MIN_READ_AHEAD)
+#define	LVM_MAX_LV_IO_TIMEOUT	60	/* seconds I/O timeout (future use) */
+#define	LVM_PARTITION           0xfe	/* LVM partition id */
+#define	LVM_NEW_PARTITION       0x8e	/* new LVM partition id (10/09/1999) */
+#define	LVM_PE_SIZE_PV_SIZE_REL	5 /* max relation PV size and PE size */
+
+#define	LVM_SNAPSHOT_MAX_CHUNK	256	/* 256 KB */
+#define	LVM_SNAPSHOT_DEF_CHUNK	64	/* 64  KB */
+#define	LVM_SNAPSHOT_MIN_CHUNK	1	/* 1   KB */
+
+#define	UNDEF	-1
+#define FALSE	0
+#define TRUE	1
+
+
+/*
+ * ioctls
+ */
+/* volume group */
+#define	VG_CREATE               _IOW ( 0xfe, 0x00, 1)
+#define	VG_REMOVE               _IOW ( 0xfe, 0x01, 1)
+
+#define	VG_EXTEND               _IOW ( 0xfe, 0x03, 1)
+#define	VG_REDUCE               _IOW ( 0xfe, 0x04, 1)
+
+#define	VG_STATUS               _IOWR ( 0xfe, 0x05, 1)
+#define	VG_STATUS_GET_COUNT     _IOWR ( 0xfe, 0x06, 1)
+#define	VG_STATUS_GET_NAMELIST  _IOWR ( 0xfe, 0x07, 1)
+
+#define	VG_SET_EXTENDABLE       _IOW ( 0xfe, 0x08, 1)
+
+
+/* logical volume */
+#define	LV_CREATE               _IOW ( 0xfe, 0x20, 1)
+#define	LV_REMOVE               _IOW ( 0xfe, 0x21, 1)
+
+#define	LV_ACTIVATE             _IO ( 0xfe, 0x22)
+#define	LV_DEACTIVATE           _IO ( 0xfe, 0x23)
+
+#define	LV_EXTEND               _IOW ( 0xfe, 0x24, 1)
+#define	LV_REDUCE               _IOW ( 0xfe, 0x25, 1)
+
+#define	LV_STATUS_BYNAME        _IOWR ( 0xfe, 0x26, 1)
+#define	LV_STATUS_BYINDEX       _IOWR ( 0xfe, 0x27, 1)
+
+#define LV_SET_ACCESS           _IOW ( 0xfe, 0x28, 1)
+#define LV_SET_ALLOCATION       _IOW ( 0xfe, 0x29, 1)
+#define LV_SET_STATUS           _IOW ( 0xfe, 0x2a, 1)
+
+#define LE_REMAP                _IOW ( 0xfe, 0x2b, 1)
+
+
+/* physical volume */
+#define	PV_STATUS               _IOWR ( 0xfe, 0x40, 1)
+#define	PV_CHANGE               _IOWR ( 0xfe, 0x41, 1)
+#define	PV_FLUSH                _IOW ( 0xfe, 0x42, 1)
+
+/* physical extent */
+#define	PE_LOCK_UNLOCK          _IOW ( 0xfe, 0x50, 1)
+
+/* i/o protocol version */
+#define	LVM_GET_IOP_VERSION     _IOR ( 0xfe, 0x98, 1)
+
+#ifdef LVM_TOTAL_RESET
+/* special reset function for testing purposes */
+#define	LVM_RESET               _IO ( 0xfe, 0x99)
+#endif
+
+/* lock the logical volume manager */
+#define	LVM_LOCK_LVM            _IO ( 0xfe, 0x100)
+/* END ioctls */
+
+
+/*
+ * Status flags
+ */
+/* volume group */
+#define	VG_ACTIVE            0x01 /* vg_status */
+#define	VG_EXPORTED          0x02 /*     "     */
+#define	VG_EXTENDABLE        0x04 /*     "     */
+
+#define	VG_READ              0x01 /* vg_access */
+#define	VG_WRITE             0x02 /*     "     */
+
+/* logical volume */
+#define	LV_ACTIVE            0x01 /* lv_status */
+#define	LV_SPINDOWN          0x02 /*     "     */
+
+#define	LV_READ              0x01 /* lv_access */
+#define	LV_WRITE             0x02 /*     "     */
+#define	LV_SNAPSHOT          0x04 /*     "     */
+#define	LV_SNAPSHOT_ORG      0x08 /*     "     */
+
+#define	LV_BADBLOCK_ON       0x01 /* lv_badblock */
+
+#define	LV_STRICT            0x01 /* lv_allocation */
+#define	LV_CONTIGUOUS        0x02 /*       "       */
+
+/* physical volume */
+#define	PV_ACTIVE            0x01 /* pv_status */
+#define	PV_ALLOCATABLE       0x02 /* pv_allocatable */
+
+
+/*
+ * Structure definitions core/disk follow
+ *
+ * conditional conversion takes place on big endian architectures
+ * in functions * pv_copy_*(), vg_copy_*() and lv_copy_*()
+ *
+ */
+
+#define	NAME_LEN		128 /* don't change!!! */
+#define	UUID_LEN		16  /* don't change!!! */
+
+/* remap physical sector/rdev pairs */
+typedef struct {
+   struct list_head hash;
+   ulong  rsector_org;
+   kdev_t rdev_org;
+   ulong  rsector_new;
+   kdev_t rdev_new;
+} lv_block_exception_t;
+
+
+/* disk stored pe information */
+typedef struct {
+   uint16_t lv_num;
+   uint16_t le_num;
+} disk_pe_t;
+
+/* disk stored PV, VG, LV and PE size and offset information */
+typedef struct {
+   uint32_t base;
+   uint32_t size;
+} lvm_disk_data_t;
+
+
+/*
+ * Structure Physical Volume (PV) Version 1
+ */
+
+/* core */
+typedef struct {
+   uint8_t          id[2];                 /* Identifier */
+   uint16_t         version;               /* HM lvm version */
+   lvm_disk_data_t  pv_on_disk;
+   lvm_disk_data_t  vg_on_disk;
+   lvm_disk_data_t  pv_namelist_on_disk;
+   lvm_disk_data_t  lv_on_disk;
+   lvm_disk_data_t  pe_on_disk;
+   uint8_t          pv_name[NAME_LEN];
+   uint8_t          vg_name[NAME_LEN];
+   uint8_t          system_id[NAME_LEN]; /* for vgexport/vgimport */
+   kdev_t           pv_dev;
+   uint32_t         pv_number;
+   uint32_t         pv_status;
+   uint32_t         pv_allocatable;
+   uint32_t         pv_size;		/* HM */
+   uint32_t         lv_cur;
+   uint32_t         pe_size;
+   uint32_t         pe_total;
+   uint32_t         pe_allocated;
+   uint32_t         pe_stale;     /* for future use */
+   disk_pe_t        *pe;          /* HM */
+   struct inode     *inode;       /* HM */
+} pv_v1_t;
+
+/* disk */
+typedef struct {
+   uint8_t          id[2];                 /* Identifier */
+   uint16_t         version;               /* HM lvm version */
+   lvm_disk_data_t  pv_on_disk;
+   lvm_disk_data_t  vg_on_disk;
+   lvm_disk_data_t  pv_namelist_on_disk;
+   lvm_disk_data_t  lv_on_disk;
+   lvm_disk_data_t  pe_on_disk;
+   uint8_t          pv_name[NAME_LEN];
+   uint8_t          vg_name[NAME_LEN];
+   uint8_t          system_id[NAME_LEN]; /* for vgexport/vgimport */
+   uint32_t         pv_major;
+   uint32_t         pv_number;
+   uint32_t         pv_status;
+   uint32_t         pv_allocatable;
+   uint32_t         pv_size;		/* HM */
+   uint32_t         lv_cur;
+   uint32_t         pe_size;
+   uint32_t         pe_total;
+   uint32_t         pe_allocated;
+   uint32_t	    dummy1;
+   uint32_t	    dummy2;
+   uint32_t	    dummy3;
+} pv_disk_v1_t;
+
+
+/*
+ * Structure Physical Volume (PV) Version 2 (future!)
+ */
+
+typedef struct {
+   uint8_t          id[2];                 /* Identifier */
+   uint16_t         version;               /* HM lvm version */
+   lvm_disk_data_t  pv_on_disk;
+   lvm_disk_data_t  vg_on_disk;
+   lvm_disk_data_t  pv_uuid_on_disk;
+   lvm_disk_data_t  lv_on_disk;
+   lvm_disk_data_t  pe_on_disk;
+   uint8_t          pv_name[NAME_LEN];
+   uint8_t          vg_name[NAME_LEN];
+   uint8_t          system_id[NAME_LEN]; /* for vgexport/vgimport */
+   kdev_t           pv_dev;
+   uint32_t         pv_number;
+   uint32_t         pv_status;
+   uint32_t         pv_allocatable;
+   uint32_t         pv_size;		/* HM */
+   uint32_t         lv_cur;
+   uint32_t         pe_size;
+   uint32_t         pe_total;
+   uint32_t         pe_allocated;
+   uint32_t         pe_stale;     /* for future use */
+   disk_pe_t        *pe;          /* HM */
+   struct inode     *inode;       /* HM */
+   /* delta to version 1 starts here */
+   uint8_t          pv_uuid[UUID_LEN];
+   uint32_t         pv_atime;     /* PV access time */
+   uint32_t         pv_ctime;     /* PV creation time */
+   uint32_t         pv_mtime;     /* PV modification time */
+} pv_v2_t;
+
+
+/*
+ * Structures for Logical Volume (LV)
+ */
+
+/* core PE information */
+typedef struct {
+   kdev_t   dev;
+   uint32_t pe;		/* to be changed if > 2TB */
+   uint32_t reads;
+   uint32_t writes;
+} pe_t;
+
+typedef struct {
+   uint8_t  lv_name[NAME_LEN];
+   kdev_t   old_dev;
+   kdev_t   new_dev;
+   ulong    old_pe;
+   ulong    new_pe;
+} le_remap_req_t;
+
+
+
+/*
+ * Structure Logical Volume (LV) Version 1
+ */
+
+/* core */
+typedef struct {
+   uint8_t        lv_name[NAME_LEN];
+   uint8_t        vg_name[NAME_LEN];
+   uint32_t       lv_access;
+   uint32_t       lv_status;
+   uint32_t       lv_open;		/* HM */
+   kdev_t         lv_dev;		/* HM */
+   uint32_t       lv_number;		/* HM */
+   uint32_t       lv_mirror_copies;	/* for future use */
+   uint32_t       lv_recovery;          /*       "        */
+   uint32_t       lv_schedule;		/*       "        */
+   uint32_t       lv_size;
+   pe_t           *lv_current_pe;	/* HM */
+   uint32_t       lv_current_le;	/* for future use */
+   uint32_t       lv_allocated_le;
+   uint32_t       lv_stripes;
+   uint32_t       lv_stripesize;
+   uint32_t       lv_badblock;		/* for future use */
+   uint32_t       lv_allocation;
+   uint32_t       lv_io_timeout;	/* for future use */
+   uint32_t       lv_read_ahead;
+} lv_v1_t;
+
+/* disk */
+typedef struct {
+   uint8_t        lv_name[NAME_LEN];
+   uint8_t        vg_name[NAME_LEN];
+   uint32_t       lv_access;
+   uint32_t       lv_status;
+   uint32_t       lv_open;		/* HM */
+   uint32_t       lv_dev;		/* HM */
+   uint32_t       lv_number;		/* HM */
+   uint32_t       lv_mirror_copies;	/* for future use */
+   uint32_t       lv_recovery;          /*       "        */
+   uint32_t       lv_schedule;		/*       "        */
+   uint32_t       lv_size;
+   uint32_t       dummy;
+   uint32_t       lv_current_le;	/* for future use */
+   uint32_t       lv_allocated_le;
+   uint32_t       lv_stripes;
+   uint32_t       lv_stripesize;
+   uint32_t       lv_badblock;		/* for future use */
+   uint32_t       lv_allocation;
+   uint32_t       lv_io_timeout;	/* for future use */
+   uint32_t       lv_read_ahead;	/* HM, for future use */
+} lv_disk_v1_t;
+
+
+/*
+ * Structure Logical Volume (LV) Version 2
+ */
+
+/* core */
+typedef struct lv_v2 {
+   uint8_t        lv_name[NAME_LEN];
+   uint8_t        vg_name[NAME_LEN];
+   uint32_t       lv_access;
+   uint32_t       lv_status;
+   uint32_t       lv_open;		/* HM */
+   kdev_t         lv_dev;		/* HM */
+   uint32_t       lv_number;		/* HM */
+   uint32_t       lv_mirror_copies;	/* for future use */
+   uint32_t       lv_recovery;          /*       "        */
+   uint32_t       lv_schedule;		/*       "        */
+   uint32_t       lv_size;
+   pe_t           *lv_current_pe;	/* HM */
+   uint32_t       lv_current_le;	/* for future use */
+   uint32_t       lv_allocated_le;
+   uint32_t       lv_stripes;
+   uint32_t       lv_stripesize;
+   uint32_t       lv_badblock;		/* for future use */
+   uint32_t       lv_allocation;
+   uint32_t       lv_io_timeout;	/* for future use */
+   uint32_t       lv_read_ahead;
+   /* delta to version 1 starts here */
+   struct lv_v2   *lv_snapshot_org;
+   struct lv_v2   *lv_snapshot_prev;
+   struct lv_v2   *lv_snapshot_next;
+   lv_block_exception_t *lv_block_exception;
+   uint8_t __unused2;
+   uint32_t       lv_remap_ptr;
+   uint32_t       lv_remap_end;
+   uint32_t       lv_chunk_size;
+   uint32_t       lv_snapshot_minor;
+   struct kiobuf * lv_iobuf;
+   struct semaphore lv_snapshot_sem;
+   struct list_head * lv_snapshot_hash_table;
+   unsigned long lv_snapshot_hash_mask;
+} lv_v2_t;
+
+/* disk */
+typedef struct {
+   uint8_t        lv_name[NAME_LEN];
+   uint8_t        vg_name[NAME_LEN];
+   uint32_t       lv_access;
+   uint32_t       lv_status;
+   uint32_t       lv_open;		/* HM */
+   uint32_t       lv_dev;		/* HM */
+   uint32_t       lv_number;		/* HM */
+   uint32_t       lv_mirror_copies;	/* for future use */
+   uint32_t       lv_recovery;          /*       "        */
+   uint32_t       lv_schedule;		/*       "        */
+   uint32_t       lv_size;
+   uint32_t       dummy;
+   uint32_t       lv_current_le;	/* for future use */
+   uint32_t       lv_allocated_le;
+   uint32_t       lv_stripes;
+   uint32_t       lv_stripesize;
+   uint32_t       lv_badblock;		/* for future use */
+   uint32_t       lv_allocation;
+   uint32_t       lv_io_timeout;	/* for future use */
+   uint32_t       lv_read_ahead;	/* HM, for future use */
+} lv_disk_v2_t;
+
+
+/*
+ * Structure Volume Group (VG) Version 1
+ */
+
+typedef struct {
+   uint8_t        vg_name[NAME_LEN];     /* volume group name */
+   uint32_t       vg_number;             /* volume group number */
+   uint32_t       vg_access;             /* read/write */
+   uint32_t       vg_status;             /* active or not */
+   uint32_t       lv_max;                /* maximum logical volumes */
+   uint32_t       lv_cur;                /* current logical volumes */
+   uint32_t       lv_open;               /* open    logical volumes */
+   uint32_t       pv_max;        /* maximum physical volumes */
+   uint32_t       pv_cur;	 /* current physical volumes FU */
+   uint32_t       pv_act;        /* active physical volumes */
+   uint32_t       dummy;         /* was obsolete max_pe_per_pv */
+   uint32_t       vgda;		 /* volume group descriptor arrays FU */
+   uint32_t       pe_size;       /* physical extent size in sectors */
+   uint32_t       pe_total;      /* total of physical extents */
+   uint32_t       pe_allocated;  /* allocated physical extents */
+   uint32_t       pvg_total;	 /* physical volume groups FU */
+   struct proc_dir_entry *proc;
+   pv_t           *pv[ABS_MAX_PV+1]; /* physical volume struct pointers */
+   lv_t           *lv[ABS_MAX_LV+1]; /* logical  volume struct pointers */
+} vg_v1_t;
+
+typedef struct {
+   uint8_t        vg_name[NAME_LEN];     /* volume group name */
+   uint32_t       vg_number;             /* volume group number */
+   uint32_t       vg_access;             /* read/write */
+   uint32_t       vg_status;             /* active or not */
+   uint32_t       lv_max;                /* maximum logical volumes */
+   uint32_t       lv_cur;                /* current logical volumes */
+   uint32_t       lv_open;               /* open    logical volumes */
+   uint32_t       pv_max;        /* maximum physical volumes */
+   uint32_t       pv_cur;	 /* current physical volumes FU */
+   uint32_t       pv_act;        /* active physical volumes */
+   uint32_t       dummy;
+   uint32_t       vgda;		 /* volume group descriptor arrays FU */
+   uint32_t       pe_size;       /* physical extent size in sectors */
+   uint32_t       pe_total;      /* total of physical extents */
+   uint32_t       pe_allocated;  /* allocated physical extents */
+   uint32_t       pvg_total;	 /* physical volume groups FU */
+} vg_disk_v1_t;
+
+/*
+ * Structure Volume Group (VG) Version 2
+ */
+
+typedef struct {
+   uint8_t        vg_name[NAME_LEN];     /* volume group name */
+   uint32_t       vg_number;             /* volume group number */
+   uint32_t       vg_access;             /* read/write */
+   uint32_t       vg_status;             /* active or not */
+   uint32_t       lv_max;                /* maximum logical volumes */
+   uint32_t       lv_cur;                /* current logical volumes */
+   uint32_t       lv_open;               /* open    logical volumes */
+   uint32_t       pv_max;                /* maximum physical volumes */
+   uint32_t       pv_cur;		 /* current physical volumes FU */
+   uint32_t       pv_act;                /* future: active physical volumes */
+   uint32_t       max_pe_per_pv;         /* OBSOLETE maximum PE/PV */
+   uint32_t       vgda;			 /* volume group descriptor arrays FU */
+   uint32_t       pe_size;               /* physical extent size in sectors */
+   uint32_t       pe_total;              /* total of physical extents */
+   uint32_t       pe_allocated;          /* allocated physical extents */
+   uint32_t       pvg_total;		 /* physical volume groups FU */
+   struct proc_dir_entry *proc;
+   pv_t           *pv[ABS_MAX_PV+1]; /* physical volume struct pointers */
+   lv_t           *lv[ABS_MAX_LV+1]; /* logical  volume struct pointers */
+   /* delta to version 1 starts here */
+   uint8_t        vg_uuid[UUID_LEN];     /*  volume group UUID */
+   time_t         vg_atime;              /* VG access time */
+   time_t         vg_ctime;              /* VG creation time */
+   time_t         vg_mtime;              /* VG modification time */
+} vg_v2_t;
+
+
+/*
+ * Timekeeping structure on disk (0.7 feature)
+ *
+ * Holds several timestamps for start/stop time of non
+ * atomic VGDA disk i/o operations
+ *
+ */
+
+typedef struct {
+   uint32_t	seconds; /* seconds since the epoch */
+   uint32_t	jiffies; /* micro timer */
+} lvm_time_t;
+
+#define	TIMESTAMP_ID_SIZE	2
+typedef struct {
+   uint8_t    id[TIMESTAMP_ID_SIZE]; /* Identifier */
+   lvm_time_t pv_vg_lv_pe_io_begin;
+   lvm_time_t pv_vg_lv_pe_io_end;
+   lvm_time_t pv_io_begin;
+   lvm_time_t pv_io_end;
+   lvm_time_t vg_io_begin;
+   lvm_time_t vg_io_end;
+   lvm_time_t lv_io_begin;
+   lvm_time_t lv_io_end;
+   lvm_time_t pe_io_begin;
+   lvm_time_t pe_io_end;
+   lvm_time_t pe_move_io_begin;
+   lvm_time_t pe_move_io_end;
+   uint8_t    dummy[LVM_TIMESTAMP_DISK_SIZE - 
+                    TIMESTAMP_ID_SIZE -
+                    12 * sizeof(lvm_time_t)];
+      /* ATTENTION  ^^ */
+} timestamp_disk_t;
+
+/* same on disk and in core so far */
+typedef timestamp_disk_t timestamp_t;
+
+/* function identifiers for timestamp actions */
+typedef enum { PV_VG_LV_PE_IO_BEGIN,
+               PV_VG_LV_PE_IO_END,
+               PV_IO_BEGIN,
+               PV_IO_END,
+               VG_IO_BEGIN,
+               VG_IO_END,
+               LV_IO_BEGIN,
+               LV_IO_END,
+               PE_IO_BEGIN,
+               PE_IO_END,
+               PE_MOVE_IO_BEGIN,
+               PE_MOVE_IO_END} ts_fct_id_t;
+
+
+/*
+ * Request structures for ioctls
+ */
+
+/* Request structure PV_STATUS */
+typedef struct {
+   char pv_name[NAME_LEN];
+   pv_t *pv;
+} pv_status_req_t, pv_change_req_t;
+
+/* Request structure PV_FLUSH */
+typedef struct {
+   char pv_name[NAME_LEN];
+} pv_flush_req_t;
+
+
+/* Request structure PE_MOVE */
+typedef struct {
+   enum { LOCK_PE, UNLOCK_PE} lock;
+   struct {
+      kdev_t lv_dev;
+      kdev_t pv_dev;
+      uint32_t      pv_offset;
+   } data;
+} pe_lock_req_t;
+
+
+/* Request structure LV_STATUS_BYNAME */
+typedef struct {
+   char lv_name[NAME_LEN];
+   lv_t *lv;
+} lv_status_byname_req_t, lv_req_t;
+
+/* Request structure LV_STATUS_BYINDEX */
+typedef struct {
+   ulong lv_index;
+   lv_t *lv;
+} lv_status_byindex_req_t;
+
+#endif /* #ifndef _LVM_H_INCLUDE */
diff -urN 2.2.14/include/linux/major.h 2.2.14aa6/include/linux/major.h
--- 2.2.14/include/linux/major.h	Wed Jan  5 14:16:56 2000
+++ 2.2.14aa6/include/linux/major.h	Wed Feb  2 02:31:43 2000
@@ -113,6 +113,8 @@
 
 #define AURORA_MAJOR 79
 
+#define RAW_MAJOR	162
+
 #define UNIX98_PTY_MASTER_MAJOR	128
 #define UNIX98_PTY_MAJOR_COUNT	8
 #define UNIX98_PTY_SLAVE_MAJOR	(UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT)
diff -urN 2.2.14/include/linux/mm.h 2.2.14aa6/include/linux/mm.h
--- 2.2.14/include/linux/mm.h	Fri Jan 21 03:31:05 2000
+++ 2.2.14aa6/include/linux/mm.h	Wed Feb  2 23:37:00 2000
@@ -144,6 +144,7 @@
 #define PG_Slab			 9
 #define PG_swap_cache		10
 #define PG_skip			11
+#define PG_BIGMEM		12
 #define PG_reserved		31
 
 /* Make it prettier to test the above... */
@@ -175,6 +176,11 @@
 			(test_and_clear_bit(PG_dirty, &(page)->flags))
 #define PageTestandClearSwapCache(page)	\
 			(test_and_clear_bit(PG_swap_cache, &(page)->flags))
+#ifdef CONFIG_BIGMEM
+#define PageBIGMEM(page)	(test_bit(PG_BIGMEM, &(page)->flags))
+#else
+#define PageBIGMEM(page) 0 /* needed to optimize away at compile time */
+#endif
 
 /*
  * Various page->flags bits:
@@ -275,7 +281,8 @@
 
 #define free_page(addr) free_pages((addr),0)
 extern void FASTCALL(free_pages(unsigned long addr, unsigned long order));
-extern void FASTCALL(__free_page(struct page *));
+#define __free_page(page) __free_pages((page),0)
+extern void FASTCALL(__free_pages(struct page *, unsigned long));
 
 extern void show_free_areas(void);
 extern unsigned long put_dirty_page(struct task_struct * tsk,unsigned long page,
@@ -331,11 +338,17 @@
 #define __GFP_HIGH	0x08
 #define __GFP_IO	0x10
 #define __GFP_SWAP	0x20
+#ifdef CONFIG_BIGMEM
+#define __GFP_BIGMEM	0x40
+#else
+#define __GFP_BIGMEM	0x0 /* noop */
+#endif
 
 #define __GFP_DMA	0x80
 
-#define GFP_BUFFER	(__GFP_LOW | __GFP_WAIT)
+#define GFP_BUFFER	(__GFP_MED | __GFP_WAIT)
 #define GFP_ATOMIC	(__GFP_HIGH)
+#define GFP_BIGUSER	(__GFP_LOW | __GFP_WAIT | __GFP_IO | __GFP_BIGMEM)
 #define GFP_USER	(__GFP_LOW | __GFP_WAIT | __GFP_IO)
 #define GFP_KERNEL	(__GFP_MED | __GFP_WAIT | __GFP_IO)
 #define GFP_NFS		(__GFP_HIGH | __GFP_WAIT | __GFP_IO)
@@ -345,6 +358,11 @@
    platforms, used as appropriate on others */
 
 #define GFP_DMA		__GFP_DMA
+
+/* Flag - indicates that the buffer can be taken from big memory which is not
+   directly addressable by the kernel */
+
+#define GFP_BIGMEM	__GFP_BIGMEM
 
 /* vma is the first one with  address < vma->vm_end,
  * and even  address < vma->vm_start. Have to extend vma. */
diff -urN 2.2.14/include/linux/raw.h 2.2.14aa6/include/linux/raw.h
--- 2.2.14/include/linux/raw.h	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/include/linux/raw.h	Wed Feb  2 02:31:43 2000
@@ -0,0 +1,23 @@
+#ifndef __LINUX_RAW_H
+#define __LINUX_RAW_H
+
+#include <linux/types.h>
+
+#define RAW_SETBIND	_IO( 0xac, 0 )
+#define RAW_GETBIND	_IO( 0xac, 1 )
+
+struct raw_config_request 
+{
+	int	raw_minor;
+	__u64	block_major;
+	__u64	block_minor;
+};
+
+#ifdef __KERNEL__
+
+/* drivers/char/raw.c */
+extern void raw_init(void);
+
+#endif /* __KERNEL__ */
+
+#endif /* __LINUX_RAW_H */
diff -urN 2.2.14/include/linux/sched.h 2.2.14aa6/include/linux/sched.h
--- 2.2.14/include/linux/sched.h	Fri Jan 21 03:31:05 2000
+++ 2.2.14aa6/include/linux/sched.h	Wed Feb  2 23:37:00 2000
@@ -291,6 +291,8 @@
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
 	unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
 	int swappable:1;
+	int trashing_mem:1;
+	int trashing_bigmem:1;
 /* process credentials */
 	uid_t uid,euid,suid,fsuid;
 	gid_t gid,egid,sgid,fsgid;
@@ -328,6 +330,9 @@
 /* Thread group tracking */
    	u32 parent_exec_id;
    	u32 self_exec_id;
+
+/* oom handling */
+	int oom_kill_try;
 };
 
 /*
@@ -378,7 +383,7 @@
 /* utime */	{0,0,0,0},0, \
 /* per CPU times */ {0, }, {0, }, \
 /* flt */	0,0,0,0,0,0, \
-/* swp */	0, \
+/* swp */	0,0,0, \
 /* process credentials */					\
 /* uid etc */	0,0,0,0,0,0,0,0,				\
 /* suppl grps*/ 0, {0,},					\
@@ -395,6 +400,7 @@
 /* mm */	&init_mm, \
 /* signals */	SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \
 /* exec cts */	0,0, \
+/* oom */	0, \
 }
 
 union task_union {
diff -urN 2.2.14/init/main.c 2.2.14aa6/init/main.c
--- 2.2.14/init/main.c	Wed Jan  5 14:16:56 2000
+++ 2.2.14aa6/init/main.c	Wed Feb  2 02:31:43 2000
@@ -22,6 +22,7 @@
 #include <linux/smp_lock.h>
 #include <linux/blk.h>
 #include <linux/hdreg.h>
+#include <linux/iobuf.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -1274,6 +1275,7 @@
 	vma_init();
 	buffer_init(memory_end-memory_start);
  	page_cache_init(memory_end-memory_start);
+	kiobuf_init();
 	signals_init();
 	inode_init();
 	file_table_init();
diff -urN 2.2.14/ipc/shm.c 2.2.14aa6/ipc/shm.c
--- 2.2.14/ipc/shm.c	Wed Jan  5 14:16:56 2000
+++ 2.2.14aa6/ipc/shm.c	Wed Feb  2 02:31:42 2000
@@ -4,6 +4,7 @@
  *         Many improvements/fixes by Bruno Haible.
  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
+ * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
  */
 
 #include <linux/malloc.h>
@@ -12,6 +13,8 @@
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/vmalloc.h>
+#include <linux/bigmem.h>
+#include <linux/pagemap.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -638,21 +641,29 @@
 
 	pte = __pte(shp->shm_pages[idx]);
 	if (!pte_present(pte)) {
-		unsigned long page = get_free_page(GFP_USER);
+		unsigned long page = __get_free_page(GFP_BIGUSER);
 		if (!page)
 			return -1;
+		clear_bigpage(page);
 		pte = __pte(shp->shm_pages[idx]);
 		if (pte_present(pte)) {
 			free_page (page); /* doesn't sleep */
 			goto done;
 		}
 		if (!pte_none(pte)) {
+			struct page * page_map;
+
+			page = prepare_bigmem_shm_swapin(page);
+			if (!page)
+				return -1;
 			rw_swap_page_nocache(READ, pte_val(pte), (char *)page);
 			pte = __pte(shp->shm_pages[idx]);
 			if (pte_present(pte))  {
 				free_page (page); /* doesn't sleep */
 				goto done;
 			}
+			page_map = replace_with_bigmem(&mem_map[MAP_NR(page)]);
+			page = page_address(page_map);
 			swap_free(pte_val(pte));
 			shm_swp--;
 		}
@@ -669,7 +680,7 @@
 }
 
 /*
- * Goes through counter = (shm_rss >> prio) present shm pages.
+ * Goes through counter = (shm_rss / prio) present shm pages.
  */
 static unsigned long swap_id = 0; /* currently being swapped */
 static unsigned long swap_idx = 0; /* next to swap */
@@ -682,8 +693,9 @@
 	unsigned long id, idx;
 	int loop = 0;
 	int counter;
+	struct page * page_map;
 	
-	counter = shm_rss >> prio;
+	counter = shm_rss / prio;
 	if (!counter || !(swap_nr = get_swap_page()))
 		return 0;
 
@@ -710,7 +722,10 @@
 	page = __pte(shp->shm_pages[idx]);
 	if (!pte_present(page))
 		goto check_table;
-	if ((gfp_mask & __GFP_DMA) && !PageDMA(&mem_map[MAP_NR(pte_page(page))]))
+	page_map = &mem_map[MAP_NR(pte_page(page))];
+	if ((gfp_mask & __GFP_DMA) && !PageDMA(page_map))
+		goto check_table;
+	if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page_map))
 		goto check_table;
 	swap_attempts++;
 
@@ -719,11 +734,13 @@
 		swap_free (swap_nr);
 		return 0;
 	}
-	if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) != 1)
+	if (atomic_read(&page_map->count) != 1)
+		goto check_table;
+	if (!(page_map = prepare_bigmem_swapout(page_map)))
 		goto check_table;
 	shp->shm_pages[idx] = swap_nr;
-	rw_swap_page_nocache (WRITE, swap_nr, (char *) pte_page(page));
-	free_page(pte_page(page));
+	rw_swap_page_nocache (WRITE, swap_nr, (char *) page_address(page_map));
+	__free_page(page_map);
 	swap_successes++;
 	shm_swp++;
 	shm_rss--;
diff -urN 2.2.14/kernel/ksyms.c 2.2.14aa6/kernel/ksyms.c
--- 2.2.14/kernel/ksyms.c	Wed Jan  5 14:16:56 2000
+++ 2.2.14aa6/kernel/ksyms.c	Wed Feb  2 02:31:43 2000
@@ -39,6 +39,7 @@
 #include <linux/poll.h>
 #include <linux/mm.h>
 #include <linux/capability.h>
+#include <linux/iobuf.h>
 
 #if defined(CONFIG_PROC_FS)
 #include <linux/proc_fs.h>
@@ -72,6 +73,13 @@
 };
 #endif
 
+#ifdef CONFIG_BLK_DEV_LVM_MODULE
+   extern int (*lvm_map_ptr) ( int, kdev_t *, unsigned long *,
+                               unsigned long, int);
+   extern void (*lvm_hd_name_ptr) ( char*, int);
+   EXPORT_SYMBOL(lvm_map_ptr);
+   EXPORT_SYMBOL(lvm_hd_name_ptr);
+#endif
 
 #ifdef CONFIG_KMOD
 EXPORT_SYMBOL(request_module);
@@ -93,7 +101,7 @@
 /* internal kernel memory management */
 EXPORT_SYMBOL(__get_free_pages);
 EXPORT_SYMBOL(free_pages);
-EXPORT_SYMBOL(__free_page);
+EXPORT_SYMBOL(__free_pages);
 EXPORT_SYMBOL(kmem_find_general_cachep);
 EXPORT_SYMBOL(kmem_cache_create);
 EXPORT_SYMBOL(kmem_cache_shrink);
@@ -107,6 +115,7 @@
 EXPORT_SYMBOL(mem_map);
 EXPORT_SYMBOL(remap_page_range);
 EXPORT_SYMBOL(max_mapnr);
+EXPORT_SYMBOL(num_physpages);
 EXPORT_SYMBOL(high_memory);
 EXPORT_SYMBOL(update_vm_cache);
 EXPORT_SYMBOL(update_vm_cache_conditional);
@@ -238,6 +247,13 @@
 EXPORT_SYMBOL(max_sectors);
 EXPORT_SYMBOL(max_segments);
 EXPORT_SYMBOL(max_readahead);
+
+/* rawio */
+EXPORT_SYMBOL(alloc_kiovec);
+EXPORT_SYMBOL(expand_kiobuf);
+EXPORT_SYMBOL(unmap_kiobuf);
+EXPORT_SYMBOL(brw_kiovec);
+EXPORT_SYMBOL(free_kiovec);
 
 /* tty routines */
 EXPORT_SYMBOL(tty_hangup);
diff -urN 2.2.14/kernel/sched.c 2.2.14aa6/kernel/sched.c
--- 2.2.14/kernel/sched.c	Wed Jan  5 14:16:56 2000
+++ 2.2.14aa6/kernel/sched.c	Wed Feb  2 02:31:42 2000
@@ -211,69 +211,14 @@
 	return goodness(prev, p, cpu) - goodness(prev, prev, cpu);
 }
 
-/*
- * If there is a dependency between p1 and p2,
- * don't be too eager to go into the slow schedule.
- * In particular, if p1 and p2 both want the kernel
- * lock, there is no point in trying to make them
- * extremely parallel..
- *
- * (No lock - lock_depth < 0)
- *
- * There are two additional metrics here:
- *
- * first, a 'cutoff' interval, currently 0-200 usecs on
- * x86 CPUs, depending on the size of the 'SMP-local cache'.
- * If the current process has longer average timeslices than
- * this, then we utilize the idle CPU.
- *
- * second, if the wakeup comes from a process context,
- * then the two processes are 'related'. (they form a
- * 'gang')
- *
- * An idle CPU is almost always a bad thing, thus we skip
- * the idle-CPU utilization only if both these conditions
- * are true. (ie. a 'process-gang' rescheduling with rather
- * high frequency should stay on the same CPU).
- *
- * [We can switch to something more finegrained in 2.3.]
- *
- * do not 'guess' if the to-be-scheduled task is RT.
- */
-#define related(p1,p2) (((p1)->lock_depth >= 0) && (p2)->lock_depth >= 0) && \
-	(((p2)->policy == SCHED_OTHER) && ((p1)->avg_slice < cacheflush_time))
-
-static inline void reschedule_idle_slow(struct task_struct * p)
+static void reschedule_idle(struct task_struct * p)
 {
 #ifdef __SMP__
-/*
- * (see reschedule_idle() for an explanation first ...)
- *
- * Pass #2
- *
- * We try to find another (idle) CPU for this woken-up process.
- *
- * On SMP, we mostly try to see if the CPU the task used
- * to run on is idle.. but we will use another idle CPU too,
- * at this point we already know that this CPU is not
- * willing to reschedule in the near future.
- *
- * An idle CPU is definitely wasted, especially if this CPU is
- * running long-timeslice processes. The following algorithm is
- * pretty good at finding the best idle CPU to send this process
- * to.
- *
- * [We can try to preempt low-priority processes on other CPUs in
- * 2.3. Also we can try to use the avg_slice value to predict
- * 'likely reschedule' events even on other CPUs.]
- */
 	int this_cpu = smp_processor_id(), target_cpu;
 	struct task_struct *tsk, *target_tsk;
-	int cpu, best_cpu, weight, best_weight, i;
+	int cpu, best_cpu, i;
 	unsigned long flags;
 
-	best_weight = 0; /* prevents negative weight */
-
 	spin_lock_irqsave(&runqueue_lock, flags);
 
 	/*
@@ -289,20 +234,50 @@
 	for (i = 0; i < smp_num_cpus; i++) {
 		cpu = cpu_logical_map(i);
 		tsk = cpu_curr(cpu);
-		if (related(tsk, p))
-			goto out_no_target;
-		weight = preemption_goodness(tsk, p, cpu);
-		if (weight > best_weight) {
-			best_weight = weight;
+		if (tsk == idle_task(cpu))
 			target_tsk = tsk;
-		}
 	}
 
+	if (target_tsk && p->avg_slice > cacheflush_time)
+		goto send_now;
+
+	tsk = cpu_curr(best_cpu);
+	if (preemption_goodness(tsk, p, best_cpu) > 0)
+		target_tsk = tsk;
+
 	/*
 	 * found any suitable CPU?
 	 */
 	if (!target_tsk)
-		goto out_no_target;
+	{
+		int best_weight;
+
+#if 1
+		if ((p->policy & ~SCHED_YIELD) == SCHED_OTHER)
+			goto out_no_target;
+#endif
+
+		/* This is a realtime task so try to reschedule all
+		   CPUs (that maybe are running SCHED_OTHER tasks),
+		   to decrease the scheduler latency. */
+		for (best_weight = i = 0; i < smp_num_cpus; i++) {
+			int weight;
+
+			cpu = cpu_logical_map(i);
+			if (cpu == best_cpu)
+				/* just checked previously */
+				continue;
+			tsk = cpu_curr(cpu);
+			weight = preemption_goodness(tsk, p, cpu);
+			if (weight > best_weight)
+			{
+				best_weight = weight;
+				target_tsk = tsk;
+			}
+		}
+		if (!target_tsk)
+			goto out_no_target;
+	}
 		
 send_now:
 	target_cpu = target_tsk->processor;
@@ -328,35 +303,6 @@
 #endif
 }
 
-static void reschedule_idle(struct task_struct * p)
-{
-#ifdef __SMP__
-	int cpu = smp_processor_id();
-	/*
-	 * ("wakeup()" should not be called before we've initialized
-	 * SMP completely.
-	 * Basically a not-yet initialized SMP subsystem can be
-	 * considered as a not-yet working scheduler, simply dont use
-	 * it before it's up and running ...)
-	 *
-	 * SMP rescheduling is done in 2 passes:
-	 *  - pass #1: faster: 'quick decisions'
-	 *  - pass #2: slower: 'lets try and find a suitable CPU'
-	 */
-
-	/*
-	 * Pass #1. (subtle. We might be in the middle of __switch_to, so
-	 * to preserve scheduling atomicity we have to use cpu_curr)
-	 */
-	if ((p->processor == cpu) && related(cpu_curr(cpu), p))
-		return;
-#endif /* __SMP__ */
-	/*
-	 * Pass #2
-	 */
-	reschedule_idle_slow(p);
-}
-
 /*
  * Careful!
  *
@@ -498,17 +444,19 @@
 	(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
 };
 
+static struct timer_list ** run_timer_list_running;
+
 #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
 
 static unsigned long timer_jiffies = 0;
 
 static inline void insert_timer(struct timer_list *timer,
-				struct timer_list **vec, int idx)
+				struct timer_list **vec)
 {
-	if ((timer->next = vec[idx]))
-		vec[idx]->prev = timer;
-	vec[idx] = timer;
-	timer->prev = (struct timer_list *)&vec[idx];
+	if ((timer->next = *vec))
+		(*vec)->prev = timer;
+	*vec = timer;
+	timer->prev = (struct timer_list *)vec;
 }
 
 static inline void internal_add_timer(struct timer_list *timer)
@@ -518,31 +466,36 @@
 	 */
 	unsigned long expires = timer->expires;
 	unsigned long idx = expires - timer_jiffies;
+	struct timer_list ** vec;
 
-	if (idx < TVR_SIZE) {
+	if (run_timer_list_running)
+		vec = run_timer_list_running;
+	else if (idx < TVR_SIZE) {
 		int i = expires & TVR_MASK;
-		insert_timer(timer, tv1.vec, i);
+		vec = tv1.vec + i;
 	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
 		int i = (expires >> TVR_BITS) & TVN_MASK;
-		insert_timer(timer, tv2.vec, i);
+		vec = tv2.vec + i;
 	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
 		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
-		insert_timer(timer, tv3.vec, i);
+		vec = tv3.vec + i;
 	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
 		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
-		insert_timer(timer, tv4.vec, i);
+		vec = tv4.vec + i;
 	} else if ((signed long) idx < 0) {
 		/* can happen if you add a timer with expires == jiffies,
 		 * or you set a timer to go off in the past
 		 */
-		insert_timer(timer, tv1.vec, tv1.index);
+		vec = tv1.vec + tv1.index;
 	} else if (idx <= 0xffffffffUL) {
 		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
-		insert_timer(timer, tv5.vec, i);
+		vec = tv5.vec + i;
 	} else {
 		/* Can only get here on architectures with 64-bit jiffies */
 		timer->next = timer->prev = timer;
+		return;
 	}
+	insert_timer(timer, vec);
 }
 
 spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
@@ -1128,13 +1081,14 @@
 {
 	spin_lock_irq(&timerlist_lock);
 	while ((long)(jiffies - timer_jiffies) >= 0) {
-		struct timer_list *timer;
+		struct timer_list *timer, * queued = NULL;
 		if (!tv1.index) {
 			int n = 1;
 			do {
 				cascade_timers(tvecs[n]);
 			} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
 		}
+		run_timer_list_running = &queued;
 		while ((timer = tv1.vec[tv1.index])) {
 			void (*fn)(unsigned long) = timer->function;
 			unsigned long data = timer->data;
@@ -1144,8 +1098,15 @@
 			fn(data);
 			spin_lock_irq(&timerlist_lock);
 		}
+		run_timer_list_running = NULL;
 		++timer_jiffies; 
 		tv1.index = (tv1.index + 1) & TVR_MASK;
+		while (queued)
+		{
+			timer = queued;
+			queued = queued->next;
+			internal_add_timer(timer);
+		}			
 	}
 	spin_unlock_irq(&timerlist_lock);
 }
diff -urN 2.2.14/mm/Makefile 2.2.14aa6/mm/Makefile
--- 2.2.14/mm/Makefile	Mon Jan 18 02:27:01 1999
+++ 2.2.14aa6/mm/Makefile	Wed Feb  2 02:31:42 2000
@@ -12,4 +12,8 @@
 	    vmalloc.o slab.o \
 	    swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o
 
+ifeq ($(CONFIG_BIGMEM),y)
+O_OBJS += bigmem.o
+endif
+
 include $(TOPDIR)/Rules.make
diff -urN 2.2.14/mm/bigmem.c 2.2.14aa6/mm/bigmem.c
--- 2.2.14/mm/bigmem.c	Thu Jan  1 01:00:00 1970
+++ 2.2.14aa6/mm/bigmem.c	Wed Feb  2 02:31:42 2000
@@ -0,0 +1,87 @@
+/*
+ * BIGMEM common code and variables.
+ *
+ * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
+ *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/bigmem.h>
+
+unsigned long bigmem_mapnr;
+int nr_free_bigpages = 0;
+
+struct page * prepare_bigmem_swapout(struct page * page)
+{
+	/* if this is a bigmem page so it can't be swapped out directly
+	   otherwise the b_data buffer addresses will break
+	   the lowlevel device drivers. */
+	if (PageBIGMEM(page))
+	{
+		unsigned long regular_page;
+		unsigned long vaddr;
+
+		regular_page = __get_free_page(GFP_ATOMIC);
+		if (!regular_page)
+			return NULL;
+
+		vaddr = kmap(page_address(page), KM_READ);
+		copy_page(regular_page, vaddr);
+		kunmap(vaddr, KM_READ);
+
+		/* ok, we can just forget about our bigmem page since 
+		   we stored its data into the new regular_page. */
+		__free_page(page);
+
+		page = MAP_NR(regular_page) + mem_map;
+	}
+	return page;
+}
+
+struct page * replace_with_bigmem(struct page * page)
+{
+	if (!PageBIGMEM(page) && nr_free_bigpages)
+	{
+		unsigned long kaddr;
+
+		kaddr = __get_free_page(GFP_ATOMIC|GFP_BIGMEM);
+		if (kaddr)
+		{
+			struct page * bigmem_page;
+
+			bigmem_page = MAP_NR(kaddr) + mem_map;
+			if (PageBIGMEM(bigmem_page))
+			{
+				unsigned long vaddr;
+
+				vaddr = kmap(kaddr, KM_WRITE);
+				copy_page(vaddr, page_address(page));
+				kunmap(vaddr, KM_WRITE);
+
+				/* Preserve the caching of the swap_entry. */
+				bigmem_page->offset = page->offset;
+
+				/* We can just forget the old page since 
+				   we stored its data into the new
+				   bigmem_page. */
+				__free_page(page);
+
+				page = bigmem_page;
+			}
+		}
+	}
+	return page;
+}
+
+unsigned long prepare_bigmem_shm_swapin(unsigned long page)
+{
+	if (!PageBIGMEM(&mem_map[MAP_NR(page)]))
+		return page;
+
+	free_page(page);
+
+	/* no need to clear the page since it will be rewrited by the
+	   swapin. */
+	return __get_free_page(GFP_ATOMIC);
+}
diff -urN 2.2.14/mm/filemap.c 2.2.14aa6/mm/filemap.c
--- 2.2.14/mm/filemap.c	Wed Jan  5 14:16:56 2000
+++ 2.2.14aa6/mm/filemap.c	Wed Feb  2 02:31:43 2000
@@ -21,6 +21,7 @@
 #include <linux/swapctl.h>
 #include <linux/slab.h>
 #include <linux/init.h>
+#include <linux/bigmem.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -138,11 +139,15 @@
 int shrink_mmap(int priority, int gfp_mask)
 {
 	static unsigned long clock = 0;
+#ifndef CONFIG_BIGMEM
 	unsigned long limit = num_physpages;
+#else
+	unsigned long limit = bigmem_mapnr;
+#endif
 	struct page * page;
 	int count;
 
-	count = limit >> priority;
+	count = limit / priority;
 
 	page = mem_map + clock;
 	do {
@@ -154,7 +159,11 @@
 		 */
 		page++;
 		clock++;
+#ifndef CONFIG_BIGMEM
 		if (clock >= max_mapnr) {
+#else
+		if (clock >= bigmem_mapnr) {
+#endif
 			clock = 0;
 			page = mem_map;
 		}
@@ -176,6 +185,9 @@
 		if (atomic_read(&page->count) != 1)
 			continue;
 
+		if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page))
+			continue;
+
 		count--;
 
 		/*
@@ -335,8 +347,9 @@
 	add_wait_queue(&page->wait, &wait);
 repeat:
 	tsk->state = TASK_UNINTERRUPTIBLE;
-	run_task_queue(&tq_disk);
+	mb();
 	if (PageLocked(page)) {
+		run_task_queue(&tq_disk);
 		schedule();
 		goto repeat;
 	}
diff -urN 2.2.14/mm/memory.c 2.2.14aa6/mm/memory.c
--- 2.2.14/mm/memory.c	Wed Jan  5 14:16:56 2000
+++ 2.2.14aa6/mm/memory.c	Wed Feb  2 02:31:43 2000
@@ -31,12 +31,18 @@
 /*
  * 05.04.94  -  Multi-page memory management added for v1.1.
  * 		Idea by Alex Bligh (alex@cconcepts.co.uk)
+ *
+ * 16.07.99  -  Support of BIGMEM added by Gerhard Wichert, Siemens AG
+ *		(Gerhard.Wichert@pdb.siemens.de)
  */
 
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
 #include <linux/smp_lock.h>
+#include <linux/bigmem.h>
+#include <linux/pagemap.h>
+#include <linux/iobuf.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -53,10 +59,10 @@
 static inline void copy_cow_page(unsigned long from, unsigned long to)
 {
 	if (from == ZERO_PAGE(to)) {
-		clear_page(to);
+		clear_bigpage(to);
 		return;
 	}
-	copy_page(to, from);
+	copy_bigpage(to, from);
 }
 
 mem_map_t * mem_map = NULL;
@@ -397,6 +403,183 @@
 	}
 }
 
+
+/*
+ * Do a quick page-table lookup for a single page. 
+ */
+static unsigned long get_page(unsigned long address) 
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+
+	pgd = pgd_offset(current->mm, address);
+	pmd = pmd_offset(pgd, address);
+	if (pmd) {
+		pte_t * pte = pte_offset(pmd, address);
+		if (pte && pte_present(*pte)) {
+			return pte_page(*pte);
+		}
+	}
+	
+	printk(KERN_ERR "Missing page in lock_down_page\n");
+	return 0;
+}
+
+/* 
+ * Given a physical address, is there a useful struct page pointing to it?
+ */
+
+static struct page * get_page_map(unsigned long page)
+{
+	struct page *map;
+	
+	if (MAP_NR(page) >= max_mapnr)
+		return 0;
+	if (page == ZERO_PAGE(page))
+		return 0;
+	map = mem_map + MAP_NR(page);
+	if (PageReserved(map))
+		return 0;
+	return map;
+}
+
+/*
+ * Force in an entire range of pages from the current process's user VA,
+ * and pin and lock the pages for IO.  
+ */
+
+#define dprintk(x...)
+int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
+{
+	unsigned long		ptr, end;
+	int			err;
+	struct mm_struct *	mm;
+	struct vm_area_struct *	vma = 0;
+	unsigned long		page;
+	struct page *		map;
+	int			doublepage = 0;
+	int			repeat = 0;
+	int			i;
+	
+	/* Make sure the iobuf is not already mapped somewhere. */
+	if (iobuf->nr_pages)
+		return -EINVAL;
+
+	mm = current->mm;
+	dprintk ("map_user_kiobuf: begin\n");
+	
+	ptr = va & PAGE_MASK;
+	end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
+	err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
+	if (err)
+		return err;
+
+ repeat:
+	down(&mm->mmap_sem);
+
+	err = -EFAULT;
+	iobuf->locked = 1;
+	iobuf->offset = va & ~PAGE_MASK;
+	iobuf->length = len;
+	
+	i = 0;
+	
+	/* 
+	 * First of all, try to fault in all of the necessary pages
+	 */
+	while (ptr < end) {
+		if (!vma || ptr >= vma->vm_end) {
+			vma = find_vma(current->mm, ptr);
+			if (!vma) 
+				goto out_unlock;
+		}
+		if (!handle_mm_fault(current, vma, ptr, (rw==READ))) 
+			goto out_unlock;
+		page = get_page(ptr);
+		if (!page) {
+			printk (KERN_ERR "Missing page in map_user_kiobuf\n");
+			goto out_unlock;
+		}
+		map = get_page_map(page);
+		if (map) {
+			if (PageLocked(map))
+				goto retry;
+			atomic_inc(&map->count);
+			set_bit(PG_locked, &map->flags);
+		}
+		dprintk ("Installing page %p %p: %d\n", (void *)page, map, i);
+		iobuf->pagelist[i] = page;
+		iobuf->maplist[i] = map;
+		iobuf->nr_pages = ++i;
+		
+		ptr += PAGE_SIZE;
+	}
+
+	up(&mm->mmap_sem);
+	dprintk ("map_user_kiobuf: end OK\n");
+	return 0;
+
+ out_unlock:
+	up(&mm->mmap_sem);
+	unmap_kiobuf(iobuf);
+	dprintk ("map_user_kiobuf: end %d\n", err);
+	return err;
+
+ retry:
+
+	/* 
+	 * Undo the locking so far, wait on the page we got to, and try again.
+	 */
+	unmap_kiobuf(iobuf);
+	up(&mm->mmap_sem);
+
+	/* 
+	 * Did the release also unlock the page we got stuck on?
+	 */
+	if (!PageLocked(map)) {
+		/* If so, we may well have the page mapped twice in the
+		 * IO address range.  Bad news.  Of course, it _might_
+		 * just be a coincidence, but if it happens more than
+		 * once, chances are we have a double-mapped page. */
+		if (++doublepage >= 3) {
+			return -EINVAL;
+		}
+	}
+	
+	/*
+	 * Try again...
+	 */
+	wait_on_page(map);
+	if (++repeat < 16)
+		goto repeat;
+	return -EAGAIN;
+}
+
+
+/*
+ * Unmap all of the pages referenced by a kiobuf.  We release the pages,
+ * and unlock them if they were locked. 
+ */
+
+void unmap_kiobuf (struct kiobuf *iobuf) 
+{
+	int i;
+	struct page *map;
+	
+	for (i = 0; i < iobuf->nr_pages; i++) {
+		map = iobuf->maplist[i];
+		
+		if (map && iobuf->locked) {
+			clear_bit(PG_locked, &map->flags);
+			wake_up(&map->wait);
+			__free_page(map);
+		}
+	}
+	
+	iobuf->nr_pages = 0;
+	iobuf->locked = 0;
+}
+
 static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
                                      unsigned long size, pgprot_t prot)
 {
@@ -613,7 +796,7 @@
 	struct page * page_map;
 	
 	pte = *page_table;
-	new_page = __get_free_page(GFP_USER);
+	new_page = __get_free_page(GFP_BIGUSER);
 	/* Did swap_out() unmapped the protected page while we slept? */
 	if (pte_val(*page_table) != pte_val(pte))
 		goto end_wp_page;
@@ -807,10 +990,10 @@
 {
 	pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
 	if (write_access) {
-		unsigned long page = __get_free_page(GFP_USER);
+		unsigned long page = __get_free_page(GFP_BIGUSER);
 		if (!page)
 			return -1;
-		clear_page(page);
+		clear_bigpage(page);
 		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
 		vma->vm_mm->rss++;
 		tsk->min_flt++;
diff -urN 2.2.14/mm/page_alloc.c 2.2.14aa6/mm/page_alloc.c
--- 2.2.14/mm/page_alloc.c	Wed Jan  5 14:16:56 2000
+++ 2.2.14aa6/mm/page_alloc.c	Wed Feb  2 02:31:43 2000
@@ -3,6 +3,7 @@
  *
  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  *  Swap reorganised 29.12.95, Stephen Tweedie
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
 #include <linux/config.h>
@@ -13,6 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
+#include <linux/bigmem.h> /* export bigmem vars */
 
 #include <asm/dma.h>
 #include <asm/uaccess.h> /* for copy_to/from_user */
@@ -35,7 +37,11 @@
 #else
 #define NR_MEM_LISTS 10
 #endif
+#ifndef CONFIG_BIGMEM
 #define NR_MEM_TYPES 2		/* GFP_DMA vs not for now. */
+#else
+#define NR_MEM_TYPES 3
+#endif
 
 /* The start of this MUST match the start of "struct page" */
 struct free_area_struct {
@@ -104,6 +110,13 @@
 
 #define list(x) (mem_map+(x))
 
+#ifdef CONFIG_BIGMEM
+	if (map_nr >= bigmem_mapnr)
+	{
+		area = free_area[2] + order;
+		nr_free_bigpages -= mask;
+	}
+#endif
 	map_nr &= mask;
 	nr_free_pages -= mask;
 	while (mask + (1 << (NR_MEM_LISTS-1))) {
@@ -123,7 +136,7 @@
 	spin_unlock_irqrestore(&page_alloc_lock, flags);
 }
 
-static inline void __free_pages(struct page *page, unsigned long order)
+void __free_pages(struct page *page, unsigned long order)
 {
 	if (!PageReserved(page) && atomic_dec_and_test(&page->count)) {
 		if (PageSwapCache(page))
@@ -134,11 +147,6 @@
 	}
 }
 
-void __free_page(struct page *page)
-{
-	__free_pages(page, 0);
-}
-
 void free_pages(unsigned long addr, unsigned long order)
 {
 	unsigned long map_nr = MAP_NR(addr);
@@ -153,6 +161,17 @@
 #define MARK_USED(index, order, area) \
 	change_bit((index) >> (1+(order)), (area)->map)
 #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
+#ifdef CONFIG_BIGMEM
+#define UPDATE_NR_FREE_BIGPAGES(map_nr, order)			\
+	do							\
+	{							\
+		if ((map_nr) >= bigmem_mapnr)			\
+			nr_free_bigpages -= 1 << (order);	\
+	}							\
+	while (0)
+#else
+#define UPDATE_NR_FREE_BIGPAGES(map_nr, order) do { } while (0)
+#endif
 #define RMQUEUE_TYPE(order, type) \
 do { struct free_area_struct * area = free_area[type]+order; \
      unsigned long new_order = order; \
@@ -163,6 +182,7 @@
 			map_nr = ret - mem_map; \
 			MARK_USED(map_nr, new_order, area); \
 			nr_free_pages -= 1 << order; \
+			UPDATE_NR_FREE_BIGPAGES(map_nr, order); \
 			area->count--; \
 			EXPAND(ret, map_nr, order, new_order, area); \
 			spin_unlock_irqrestore(&page_alloc_lock, flags); \
@@ -184,8 +204,6 @@
 	atomic_set(&map->count, 1); \
 } while (0)
 
-int low_on_memory = 0;
-
 unsigned long __get_free_pages(int gfp_mask, unsigned long order)
 {
 	unsigned long flags;
@@ -211,17 +229,66 @@
 	 */
 	if (!(current->flags & PF_MEMALLOC)) {
 		int freed;
+		extern struct wait_queue * kswapd_wait;
 
-		if (nr_free_pages > freepages.min) {
-			if (!low_on_memory)
+#ifndef CONFIG_BIGMEM
+		if (nr_free_pages >= freepages.high)
+		{
+			/* share RO cachelines in fast path */
+			if (current->trashing_mem)
+				current->trashing_mem = 0;
+			goto ok_to_allocate;
+		}
+		else
+		{
+			if (nr_free_pages < freepages.low)
+				wake_up_interruptible(&kswapd_wait);
+			if (nr_free_pages > freepages.min && !current->trashing_mem)
 				goto ok_to_allocate;
-			if (nr_free_pages >= freepages.high) {
-				low_on_memory = 0;
+		}
+
+		current->trashing_mem = 1;
+#else
+		if (gfp_mask & __GFP_BIGMEM)
+		{
+			if (nr_free_pages >= freepages.high)
+			{
+				/* share RO cachelines in fast path */
+				if (current->trashing_bigmem)
+					current->trashing_bigmem = 0;
 				goto ok_to_allocate;
 			}
+			else
+			{
+				if (nr_free_pages < freepages.low)
+					wake_up_interruptible(&kswapd_wait);
+				if (nr_free_pages > freepages.min && !current->trashing_bigmem)
+					goto ok_to_allocate;
+			}
+
+			current->trashing_bigmem = 1;
 		}
+		else
+		{
+			if (nr_free_pages-nr_free_bigpages >= freepages.high)
+			{
+				/* share RO cachelines in fast path */
+				if (current->trashing_mem)
+					current->trashing_mem = 0;
+				goto ok_to_allocate;
+			}
+			else
+			{
+				if (nr_free_pages-nr_free_bigpages < freepages.low)
+					wake_up_interruptible(&kswapd_wait);
+				if (nr_free_pages-nr_free_bigpages > freepages.min && !current->trashing_mem)
+					goto ok_to_allocate;
+			}
+
+			current->trashing_mem = 1;
+		}
+#endif
 
-		low_on_memory = 1;
 		current->flags |= PF_MEMALLOC;
 		freed = try_to_free_pages(gfp_mask);
 		current->flags &= ~PF_MEMALLOC;
@@ -233,20 +300,16 @@
 	spin_lock_irqsave(&page_alloc_lock, flags);
 	/* if it's not a dma request, try non-dma first */
 	if (!(gfp_mask & __GFP_DMA))
+	{
+#ifdef CONFIG_BIGMEM
+		if (gfp_mask & __GFP_BIGMEM)
+			RMQUEUE_TYPE(order, 2);
+#endif
 		RMQUEUE_TYPE(order, 0);
+	}
 	RMQUEUE_TYPE(order, 1);
 	spin_unlock_irqrestore(&page_alloc_lock, flags);
 
-	/*
-	 * If we can schedule, do so, and make sure to yield.
-	 * We may be a real-time process, and if kswapd is
-	 * waiting for us we need to allow it to run a bit.
-	 */
-	if (gfp_mask & __GFP_WAIT) {
-		current->policy |= SCHED_YIELD;
-		schedule();
-	}
-
 nopage:
 	return 0;
 }
@@ -262,7 +325,9 @@
 	unsigned type;
 
 	spin_lock_irqsave(&page_alloc_lock, flags);
-	printk("Free pages:      %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
+	printk("Free pages:      %6dkB (%6dkB BigMem)\n ( ",
+		nr_free_pages<<(PAGE_SHIFT-10),
+		nr_free_bigpages<<(PAGE_SHIFT-10));
 	printk("Free: %d (%d %d %d)\n",
 		nr_free_pages,
 		freepages.min,
@@ -270,7 +335,19 @@
 		freepages.high);
 	for (type = 0; type < NR_MEM_TYPES; type++) {
  		unsigned long total = 0;
+#ifdef CONFIG_BIGMEM
+		switch (type)
+		{
+		case 0:
+		case 1:
+#endif
 		printk("%sDMA: ", type ? "" : "Non");
+#ifdef CONFIG_BIGMEM
+			break;
+		case 2:
+			printk("BIGMEM: ");
+		}
+#endif
  		for (order=0 ; order < NR_MEM_LISTS; order++) {
 			unsigned long nr = free_area[type][order].count;
 
@@ -422,6 +499,8 @@
 	 * this process.
 	 */
 	delete_from_swap_cache(page_map);
+	page_map = replace_with_bigmem(page_map);
+	page = page_address(page_map);
 	set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
   	return 1;
 }
diff -urN 2.2.14/mm/page_io.c 2.2.14aa6/mm/page_io.c
--- 2.2.14/mm/page_io.c	Mon Jan 17 16:44:50 2000
+++ 2.2.14aa6/mm/page_io.c	Wed Feb  2 02:31:42 2000
@@ -86,9 +86,22 @@
 
 	if (PageSwapCache(page)) {
 		/* Make sure we are the only process doing I/O with this swap page. */
-		while (test_and_set_bit(offset,p->swap_lockmap)) {
-			run_task_queue(&tq_disk);
-			sleep_on(&lock_queue);
+		if (test_and_set_bit(offset, p->swap_lockmap))
+		{
+			struct wait_queue __wait;
+			
+			__wait.task = current;
+			add_wait_queue(&lock_queue, &__wait);
+			for (;;) {
+				current->state = TASK_UNINTERRUPTIBLE;
+				mb();
+				if (!test_and_set_bit(offset, p->swap_lockmap))
+					break;
+				run_task_queue(&tq_disk);
+				schedule();
+			}
+			current->state = TASK_RUNNING;
+			remove_wait_queue(&lock_queue, &__wait);
 		}
 
 		/* 
diff -urN 2.2.14/mm/swap.c 2.2.14aa6/mm/swap.c
--- 2.2.14/mm/swap.c	Mon Jan 18 02:27:01 1999
+++ 2.2.14aa6/mm/swap.c	Wed Feb  2 02:31:42 2000
@@ -47,13 +47,13 @@
 atomic_t nr_async_pages = ATOMIC_INIT(0);
 
 buffer_mem_t buffer_mem = {
-	2,	/* minimum percent buffer */
+	0,	/* minimum percent buffer */
 	10,	/* borrow percent buffer */
 	60	/* maximum percent buffer */
 };
 
 buffer_mem_t page_cache = {
-	2,	/* minimum percent page cache */
+	0,	/* minimum percent page cache */
 	15,	/* borrow percent page cache */
 	75	/* maximum */
 };
diff -urN 2.2.14/mm/vmalloc.c 2.2.14aa6/mm/vmalloc.c
--- 2.2.14/mm/vmalloc.c	Tue Jul 13 00:33:04 1999
+++ 2.2.14aa6/mm/vmalloc.c	Wed Feb  2 02:31:43 2000
@@ -2,6 +2,7 @@
  *  linux/mm/vmalloc.c
  *
  *  Copyright (C) 1993  Linus Torvalds
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
 #include <linux/malloc.h>
@@ -94,7 +95,7 @@
 		unsigned long page;
 		if (!pte_none(*pte))
 			printk("alloc_area_pte: page already exists\n");
-		page = __get_free_page(GFP_KERNEL);
+		page = __get_free_page(GFP_KERNEL|GFP_BIGMEM);
 		if (!page)
 			return -ENOMEM;
 		set_pte(pte, mk_pte(page, PAGE_KERNEL));
diff -urN 2.2.14/mm/vmscan.c 2.2.14aa6/mm/vmscan.c
--- 2.2.14/mm/vmscan.c	Wed Jan  5 14:16:56 2000
+++ 2.2.14aa6/mm/vmscan.c	Wed Feb  2 02:31:43 2000
@@ -17,6 +17,7 @@
 #include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
+#include <linux/bigmem.h>
 
 #include <asm/pgtable.h>
 
@@ -32,7 +33,7 @@
  * have died while we slept).
  */
 static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
-	unsigned long address, pte_t * page_table, int gfp_mask)
+	unsigned long address, pte_t * page_table, int gfp_mask, int * IO)
 {
 	pte_t pte;
 	unsigned long entry;
@@ -60,7 +61,8 @@
 
 	if (PageReserved(page_map)
 	    || PageLocked(page_map)
-	    || ((gfp_mask & __GFP_DMA) && !PageDMA(page_map)))
+	    || ((gfp_mask & __GFP_DMA) && !PageDMA(page_map))
+	    || (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page_map)))
 		return 0;
 
 	/*
@@ -151,21 +153,29 @@
 	if (!entry)
 		return 0; /* No swap space left */
 		
+	if (!(page_map = prepare_bigmem_swapout(page_map)))
+		goto out_swap_free;
+
 	vma->vm_mm->rss--;
 	tsk->nswap++;
 	set_pte(page_table, __pte(entry));
 	flush_tlb_page(vma, address);
 	swap_duplicate(entry);	/* One for the process, one for the swap cache */
+	*IO = 1;
 	add_to_swap_cache(page_map, entry);
 	/* We checked we were unlocked way up above, and we
 	   have been careful not to stall until here */
 	set_bit(PG_locked, &page_map->flags);
 
 	/* OK, do a physical asynchronous write to swap.  */
-	rw_swap_page(WRITE, entry, (char *) page, 0);
+	rw_swap_page(WRITE, entry, (char *) page_address(page_map), 0);
 
 	__free_page(page_map);
 	return 1;
+
+ out_swap_free:
+	swap_free(entry);
+	return 0;
 }
 
 /*
@@ -183,7 +193,7 @@
  */
 
 static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
-	pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
+	pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask, int * IO)
 {
 	pte_t * pte;
 	unsigned long pmd_end;
@@ -205,7 +215,7 @@
 	do {
 		int result;
 		tsk->mm->swap_address = address + PAGE_SIZE;
-		result = try_to_swap_out(tsk, vma, address, pte, gfp_mask);
+		result = try_to_swap_out(tsk, vma, address, pte, gfp_mask, IO);
 		if (result)
 			return result;
 		address += PAGE_SIZE;
@@ -215,7 +225,7 @@
 }
 
 static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
-	pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
+	pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask, int * IO)
 {
 	pmd_t * pmd;
 	unsigned long pgd_end;
@@ -235,7 +245,7 @@
 		end = pgd_end;
 	
 	do {
-		int result = swap_out_pmd(tsk, vma, pmd, address, end, gfp_mask);
+		int result = swap_out_pmd(tsk, vma, pmd, address, end, gfp_mask, IO);
 		if (result)
 			return result;
 		address = (address + PMD_SIZE) & PMD_MASK;
@@ -245,7 +255,7 @@
 }
 
 static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
-	unsigned long address, int gfp_mask)
+	unsigned long address, int gfp_mask, int * IO)
 {
 	pgd_t *pgdir;
 	unsigned long end;
@@ -258,7 +268,7 @@
 
 	end = vma->vm_end;
 	while (address < end) {
-		int result = swap_out_pgd(tsk, vma, pgdir, address, end, gfp_mask);
+		int result = swap_out_pgd(tsk, vma, pgdir, address, end, gfp_mask, IO);
 		if (result)
 			return result;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
@@ -267,7 +277,7 @@
 	return 0;
 }
 
-static int swap_out_process(struct task_struct * p, int gfp_mask)
+static int swap_out_process(struct task_struct * p, int gfp_mask, int * IO)
 {
 	unsigned long address;
 	struct vm_area_struct* vma;
@@ -286,7 +296,7 @@
 			address = vma->vm_start;
 
 		for (;;) {
-			int result = swap_out_vma(p, vma, address, gfp_mask);
+			int result = swap_out_vma(p, vma, address, gfp_mask, IO);
 			if (result)
 				return result;
 			vma = vma->vm_next;
@@ -307,7 +317,7 @@
  * N.B. This function returns only 0 or 1.  Return values != 1 from
  * the lower level routines result in continued processing.
  */
-static int swap_out(unsigned int priority, int gfp_mask)
+static int swap_out(unsigned int priority, int gfp_mask, int * IO)
 {
 	struct task_struct * p, * pbest;
 	int assign = 0, counter;
@@ -327,7 +337,7 @@
 	 * Think of swap_cnt as a "shadow rss" - it tells us which process
 	 * we want to page out (always try largest first).
 	 */
-	counter = nr_tasks / (priority+1);
+	counter = nr_tasks / priority;
 	if (counter < 1)
 		counter = 1;
 
@@ -361,7 +371,7 @@
 			goto out;
 		}
 
-		if (swap_out_process(pbest, gfp_mask))
+		if (swap_out_process(pbest, gfp_mask, IO))
 			return 1;
 	}
 out:
@@ -381,13 +391,14 @@
 {
 	int priority;
 	int count = SWAP_CLUSTER_MAX;
+	int IO = 0;
 
 	lock_kernel();
 
 	/* Always trim SLAB caches when memory gets low. */
 	kmem_cache_reap(gfp_mask);
 
-	priority = 6;
+	priority = 5;
 	do {
 		while (shrink_mmap(priority, gfp_mask)) {
 			if (!--count)
@@ -397,23 +408,26 @@
 		/* Try to get rid of some shared memory pages.. */
 		if (gfp_mask & __GFP_IO) {
 			while (shm_swap(priority, gfp_mask)) {
+				IO = 1;
 				if (!--count)
 					goto done;
 			}
 		}
 
 		/* Then, try to page stuff out.. */
-		while (swap_out(priority, gfp_mask)) {
+		while (swap_out(priority, gfp_mask, &IO)) {
 			if (!--count)
 				goto done;
 		}
 
 		shrink_dcache_memory(priority, gfp_mask);
-	} while (--priority >= 0);
+	} while (--priority > 0);
 done:
 	unlock_kernel();
+	if (IO)
+		run_task_queue(&tq_disk);
 
-	return priority >= 0;
+	return priority > 0;
 }
 
 /*
@@ -437,7 +451,7 @@
        printk ("Starting kswapd v%.*s\n", i, s);
 }
 
-static struct wait_queue * kswapd_wait = NULL;
+struct wait_queue * kswapd_wait;
 
 /*
  * The background pageout daemon, started as a kernel thread
@@ -485,8 +499,11 @@
 		 * the processes needing more memory will wake us
 		 * up on a more timely basis.
 		 */
-		interruptible_sleep_on_timeout(&kswapd_wait, HZ);
-		while (nr_free_pages < freepages.high)
+		interruptible_sleep_on(&kswapd_wait);
+
+		/* kswapd is critical to provide GFP_ATOMIC
+		   allocations (not GFP_BIGMEM ones). */
+		while (nr_free_pages - nr_free_bigpages < freepages.high)
 		{
 			if (do_try_to_free_pages(GFP_KSWAPD))
 			{
@@ -519,7 +536,6 @@
 {
 	int retval = 1;
 
-	wake_up_interruptible(&kswapd_wait);
 	if (gfp_mask & __GFP_WAIT)
 		retval = do_try_to_free_pages(gfp_mask);
 	return retval;
diff -urN 2.2.14/net/ipv4/tcp_input.c 2.2.14aa6/net/ipv4/tcp_input.c
--- 2.2.14/net/ipv4/tcp_input.c	Fri Jan  7 18:19:25 2000
+++ 2.2.14aa6/net/ipv4/tcp_input.c	Wed Feb  2 02:31:42 2000
@@ -96,6 +96,7 @@
  */
 static void tcp_delack_estimator(struct tcp_opt *tp)
 {
+	tcp_exit_quickack_mode(tp);
 	if(tp->ato == 0) {
 		tp->lrcvtime = tcp_time_stamp;
 
@@ -114,10 +115,7 @@
 		if(m > tp->rto)
 			tp->ato = tp->rto;
 		else {
-			/* This funny shift makes sure we
-			 * clear the "quick ack mode" bit.
-			 */
-			tp->ato = ((tp->ato << 1) >> 2) + m;
+			tp->ato = (tp->ato >> 1) + m;
 		}
 	}
 }
diff -urN 2.2.14/net/ipv4/tcp_ipv4.c 2.2.14aa6/net/ipv4/tcp_ipv4.c
--- 2.2.14/net/ipv4/tcp_ipv4.c	Fri Jan  7 18:19:25 2000
+++ 2.2.14aa6/net/ipv4/tcp_ipv4.c	Wed Feb  2 02:31:42 2000
@@ -1394,6 +1394,7 @@
 		newtp->snd_una = req->snt_isn + 1;
 		newtp->srtt = 0;
 		newtp->ato = 0;
+		tcp_enter_quickack_mode(newtp);
 		newtp->snd_wl1 = req->rcv_isn;
 		newtp->snd_wl2 = req->snt_isn;
 
@@ -1937,6 +1938,7 @@
 	skb_queue_head_init(&tp->out_of_order_queue);
 	tcp_init_xmit_timers(sk);
 
+	tcp_enter_quickack_mode(tp);
 	tp->rto  = TCP_TIMEOUT_INIT;		/*TCP_WRITE_TIME*/
 	tp->mdev = TCP_TIMEOUT_INIT;
 	tp->mss_clamp = ~0;
diff -urN 2.2.14/net/ipv4/tcp_output.c 2.2.14aa6/net/ipv4/tcp_output.c
--- 2.2.14/net/ipv4/tcp_output.c	Fri Jan  7 18:19:25 2000
+++ 2.2.14aa6/net/ipv4/tcp_output.c	Wed Feb  2 02:31:42 2000
@@ -752,12 +752,16 @@
 		}
 	} else {
 		/* Socket is locked, keep trying until memory is available. */
-		do {
+		for (;;) {
 			skb = sock_wmalloc(sk,
 					   (MAX_HEADER +
 					    sk->prot->max_header),
 					   1, GFP_KERNEL);
-		} while (skb == NULL);
+			if (skb)
+				break;
+			current->policy |= SCHED_YIELD;
+			schedule();
+		}
 
 		/* Reserve space for headers and prepare control bits. */
 		skb_reserve(skb, MAX_HEADER + sk->prot->max_header);
@@ -1004,9 +1008,20 @@
 	unsigned long timeout;
 
 	/* Stay within the limit we were given */
-	timeout = tp->ato;
+	timeout = tp->ato & ~(1<<31);
 	if (timeout > max_timeout)
 		timeout = max_timeout;
+	if (!timeout)
+	{
+		timeout = tp->rto;
+		if ((signed) timeout <= 0)
+		{
+			printk(KERN_ERR
+			       "tcp_send_delayed_ack: rto %ld!\n", timeout);
+			timeout = 1;
+		}
+		timeout = min(timeout, max_timeout);
+	}
 	timeout += jiffies;
 
 	/* Use new timeout only if there wasn't a older one earlier. */
diff -urN 2.2.14/net/ipv4/tcp_timer.c 2.2.14aa6/net/ipv4/tcp_timer.c
--- 2.2.14/net/ipv4/tcp_timer.c	Fri Jan  7 18:19:25 2000
+++ 2.2.14aa6/net/ipv4/tcp_timer.c	Wed Feb  2 02:31:42 2000
@@ -173,7 +173,21 @@
 		if (!atomic_read(&sk->sock_readers))
 			tcp_send_ack(sk);
 		else
-			tcp_send_delayed_ack(&(sk->tp_pinfo.af_tcp), HZ/10);
+		{
+			struct tcp_opt * tp = &(sk->tp_pinfo.af_tcp);
+			int rto;
+
+			rto = tp->rto;
+			if (rto <= 0)
+			{
+				printk(KERN_ERR
+				       "tcp_delack_timer: rto %d!\n", rto);
+				rto = 1;
+			}
+			rto = min(rto, HZ/10);
+			tp->delack_timer.expires = rto + jiffies;
+			add_timer(&tp->delack_timer);
+		}
 	}
 }