diff -urN 2.2.15pre16/CREDITS 2.2.15pre16aa3/CREDITS
--- 2.2.15pre16/CREDITS	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/CREDITS	Thu Mar 30 16:00:57 2000
@@ -1339,6 +1339,13 @@
 D: XF86_8514
 D: cfdisk (curses based disk partitioning program)
 
+N: Heinz Mauelshagen
+E: mge@EZ-Darmstadt.Telekom.de
+D: Logical Volume Manager
+S: Bartningstr. 12
+S: 64289 Darmstadt
+S: Germany
+
 N: Mike McLagan
 E: mike.mclagan@linux.org
 W: http://www.invlogic.com/~mmclagan
diff -urN 2.2.15pre16/Documentation/Configure.help 2.2.15pre16aa3/Documentation/Configure.help
--- 2.2.15pre16/Documentation/Configure.help	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/Documentation/Configure.help	Thu Mar 30 16:00:57 2000
@@ -168,6 +168,11 @@
   on the Alpha. The only time you would ever not say Y is to say M in
   order to debug the code. Say Y unless you know what you are doing.
 
+Big memory support
+CONFIG_BIGMEM
+  This option is required if you want to utilize physical memory which
+  is not covered by the kernel virtual address space (> 1GB).
+
 Normal PC floppy disk support
 CONFIG_BLK_DEV_FD
   If you want to use the floppy disk drive(s) of your PC under Linux,
@@ -938,6 +943,30 @@
   called on26.o. You must also have a high-level driver for the type
   of device that you want to support.
 
+Logical Volume Manager (LVM) support
+CONFIG_BLK_DEV_LVM
+  This driver lets you combine several hard disks, hard disk partitions,
+  multiple devices or even loop devices (for evaluation purposes) into
+  a volume group. Imagine a volume group as a kind of virtual disk.
+  Logical volumes, which can be thought of as virtual partitions,
+  can be created in the volume group.  You can resize volume groups and
+  logical volumes after creation time, corresponding to new capacity needs.
+  Logical volumes are accessed as block devices named
+  /dev/VolumeGroupName/LogicalVolumeName.
+
+  For details see /usr/src/linux/Documentaion/LVM-HOWTO.
+
+  To get the newest software see <http://linux.msede.com/lvm>.
+
+Logical Volume Manager proc filesystem information
+CONFIG_LVM_PROC_FS
+  If you say Y here, you are able to access overall Logical Volume Manager,
+  Volume Group, Logical and Physical Volume information in /proc/lvm.
+  
+  To use this option, you have to check, that the "proc filesystem support"
+  (CONFIG_PROC_FS) is enabled too.
+
+
 Multiple devices driver support
 CONFIG_BLK_DEV_MD
   This driver lets you combine several hard disk partitions into one
@@ -9352,6 +9381,20 @@
   If you think you have a use for such a device (such as periodic data
   sampling), then say Y here, and read Documentation/rtc.txt for
   details.
+  For DEC Alpha users it is highly recommended to say Y here; if you
+  don't need all the features, you can choose the lightweight version
+  afterwards.
+
+Use only lightweight version (no interrupts)
+CONFIG_RTC_LIGHT
+  This option turns off extended features of the RTC driver that deal
+  with interrupts (periodic signals and alarm).  If you only need this
+  driver to read and set your system hardware clock, say Y here.
+  If you are on DEC Alpha, enabling this option will allow the kernel
+  to receive system clock interrupts in the standard, traditional
+  manner (that is, from the RTC device).  Fully featured RTC driver
+  would move the clock signal source to the PIT (Programmable
+  Interrupt Timer), like on a PC.
 
 Tadpole ANA H8 Support
 CONFIG_H8
diff -urN 2.2.15pre16/Documentation/LVM-HOWTO 2.2.15pre16aa3/Documentation/LVM-HOWTO
--- 2.2.15pre16/Documentation/LVM-HOWTO	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/Documentation/LVM-HOWTO	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,118 @@
+Heinz Mauelshagen's LVM (Logical Volume Manager) howto.             01/28/1999
+
+
+Abstract:
+---------
+The LVM adds a kind of virtual disks and virtual partitions functionality
+to the Linux operating system
+
+It achieves this by adding an additional layer between the physical peripherals
+and the i/o interface in the kernel.
+
+This allows the concatenation of several disk partitions or total disks
+(so-called physical volumes or PVs) or even multiple devices
+to form a storage pool (so-called Volume Group or VG) with
+allocation units called physical extents (called PE).
+You can think of the volume group as a virtual disk.
+Please see scenario below.
+
+Some or all PEs of this VG then can be allocated to so-called Logical Volumes
+or LVs in units called logical extents or LEs.
+Each LE is mapped to a corresponding PE.
+LEs and PEs are equal in size.
+Logical volumes are a kind of virtual partitions.
+
+
+The LVs can be used through device special files similar to the known
+/dev/sd[a-z]* or /dev/hd[a-z]* named /dev/VolumeGroupName/LogicalVolumeName.
+
+But going beyond this, you are able to extend or reduce
+VGs _AND_ LVs at runtime!
+
+So...
+If for example the capacity of a LV gets too small and your VG containing
+this LV is full, you could add another PV to that VG and simply extend
+the LV afterwards.
+If you reduce or delete a LV you can use the freed capacity for different
+LVs in the same VG.
+
+
+The above scenario looks like this:
+
+     /------------------------------------------\
+     |  /--PV2---\      VG 1      /--PVn---\    |
+     |  |-VGDA---|                |-VGDA-- |    |
+     |  |PE1PE2..|                |PE1PE2..|    |
+     |  |        |     ......     |        |    |
+     |  |        |                |        |    |
+     |  |    /-----------------------\     |    |
+     |  |    \-------LV 1------------/     |    |
+     |  |   ..PEn|                |   ..PEn|    |
+     |  \--------/                \--------/    |
+     \------------------------------------------/
+
+PV 1 could be /dev/sdc1 sized 3GB
+PV n could be /dev/sde1 sized 4GB
+VG 1 could be test_vg
+LV 1 could be /dev/test_vg/test_lv
+VGDA is the volume group descriptor area holding the LVM metadata
+PE1 up to PEn is the number of physical extents on each disk(partition)
+
+
+
+Installation steps see INSTALL and insmod(1)/modprobe(1), kmod/kerneld(8)
+to load the logical volume manager module if you did not bind it
+into the kernel.
+
+
+Configuration steps for getting the above scenario:
+
+1. Set the partition system id to 0xFE on /dev/sdc1 and /dev/sde1.
+
+2. do a "pvcreate /dev/sd[ce]1"
+   For testing purposes you can use more than one partition on a disk.
+   You should not use more than one partition because in the case of
+   a striped LV you'll have a performance breakdown.
+
+3. do a "vgcreate test_vg /dev/sd[ce]1" to create the new VG named "test_vg"
+   which has the total capacity of both partitions.
+   vgcreate activates (transfers the metadata into the LVM driver in the kernel)
+   the new volume group too to be able to create LVs in the next step.
+
+4. do a "lvcreate -L1500 -ntest_lv test_vg" to get a 1500MB linear LV named
+   "test_lv" and it's block device special "/dev/test_vg/test_lv".
+
+   Or do a "lvcreate -i2 -I4 -l1500 -nanother_test_lv test_vg" to get a 100 LE
+   large logical volume with 2 stripes and stripesize 4 KB.
+
+5. For example generate a filesystem in one LV with
+   "mke2fs /dev/test_vg/test_lv" and mount it.
+
+6. extend /dev/test_vg/test_lv to 1600MB with relative size by
+   "lvextend -L+100 /dev/test_vg/test_lv"
+   or with absolute size by
+   "lvextend -L1600 /dev/test_vg/test_lv"
+ 
+7. reduce /dev/test_vg/test_lv to 900 logical extents with relative extents by
+   "lvreduce -l-700 /dev/test_vg/test_lv"
+   or with absolute extents by
+   "lvreduce -l900 /dev/test_vg/test_lv"
+ 
+9. rename a VG by deactivating it with
+   "vgchange -an test_vg"   # only VGs with _no_ open LVs can be deactivated!
+   "vgrename test_vg whatever"
+   and reactivate it again by
+   "vgchange -ay whatever"
+
+9. rename a LV after closing it by
+   "lvchange -an /dev/whatever/test_lv" # only closed LVs can be deactivated
+   "lvrename  /dev/whatever/test_lv  /dev/whatever/whatvolume"
+   or by
+   "lvrename  whatever test_lv whatvolume"
+   and reactivate it again by
+   "lvchange -ay /dev/whatever/whatvolume"
+
+10. if you own Ted Tso's resize2fs program, you are able to resize the
+    ext2 type filesystems contained in logical volumes without destroyiing
+    the data by
+    "e2fsadm -L+100 /dev/test_vg/another_test_lv"
diff -urN 2.2.15pre16/MAINTAINERS 2.2.15pre16aa3/MAINTAINERS
--- 2.2.15pre16/MAINTAINERS	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/MAINTAINERS	Thu Mar 30 16:00:57 2000
@@ -531,6 +531,13 @@
 W:	http://people.redhat.com/zab/maestro/
 S:	Supported
 
+LOGICAL VOLUME MANAGER
+P:	Heinz Mauelshagen
+M:	linux-LVM@EZ-Darmstadt.Telekom.de
+L:	linux-LVM@msede.com
+W:	http://linux.msede.com/lvm
+S:	Maintained
+
 M68K
 P:	Jes Sorensen
 M:	Jes.Sorensen@cern.ch
diff -urN 2.2.15pre16/Makefile 2.2.15pre16aa3/Makefile
--- 2.2.15pre16/Makefile	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/Makefile	Thu Mar 30 16:00:57 2000
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 2
 SUBLEVEL = 15
-EXTRAVERSION = pre16
+EXTRAVERSION = pre16aa3
 
 ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)
 
@@ -294,13 +294,15 @@
 	   echo \#define LINUX_COMPILE_DOMAIN ; \
 	 fi >> .ver
 	@echo \#define LINUX_COMPILER \"`$(CC) $(CFLAGS) -v 2>&1 | tail -1`\" >> .ver
-	@mv -f .ver $@
+	@cp .ver $@
+	@rm .ver
 
 include/linux/version.h: ./Makefile
 	@echo \#define UTS_RELEASE \"$(KERNELRELEASE)\" > .ver
 	@echo \#define LINUX_VERSION_CODE `expr $(VERSION) \\* 65536 + $(PATCHLEVEL) \\* 256 + $(SUBLEVEL)` >> .ver
 	@echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' >>.ver
-	@mv -f .ver $@
+	@cp .ver $@
+	@rm .ver
 
 init/version.o: init/version.c include/linux/compile.h include/config/MARKER
 	$(CC) $(CFLAGS) -DUTS_MACHINE='"$(ARCH)"' -c -o init/version.o init/version.c
diff -urN 2.2.15pre16/arch/alpha/config.in 2.2.15pre16aa3/arch/alpha/config.in
--- 2.2.15pre16/arch/alpha/config.in	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/alpha/config.in	Thu Mar 30 16:00:57 2000
@@ -21,6 +21,7 @@
 mainmenu_option next_comment
 comment 'General setup'
 
+bool 'BIGMEM support' CONFIG_BIGMEM
 choice 'Alpha system type' \
 	"Generic		CONFIG_ALPHA_GENERIC		\
 	 Alcor/Alpha-XLT	CONFIG_ALPHA_ALCOR		\
diff -urN 2.2.15pre16/arch/alpha/defconfig 2.2.15pre16aa3/arch/alpha/defconfig
--- 2.2.15pre16/arch/alpha/defconfig	Wed Jan  5 14:16:51 2000
+++ 2.2.15pre16aa3/arch/alpha/defconfig	Thu Mar 30 16:00:57 2000
@@ -255,7 +255,8 @@
 # CONFIG_QIC02_TAPE is not set
 # CONFIG_WATCHDOG is not set
 # CONFIG_NVRAM is not set
-# CONFIG_RTC is not set
+CONFIG_RTC=y
+CONFIG_RTC_LIGHT=y
 
 #
 # Video For Linux
diff -urN 2.2.15pre16/arch/alpha/kernel/alpha_ksyms.c 2.2.15pre16aa3/arch/alpha/kernel/alpha_ksyms.c
--- 2.2.15pre16/arch/alpha/kernel/alpha_ksyms.c	Fri Jan  7 18:19:06 2000
+++ 2.2.15pre16aa3/arch/alpha/kernel/alpha_ksyms.c	Thu Mar 30 16:00:56 2000
@@ -37,6 +37,7 @@
 extern struct hwrpb_struct *hwrpb;
 extern void dump_thread(struct pt_regs *, struct user *);
 extern int dump_fpu(struct pt_regs *, elf_fpregset_t *);
+extern spinlock_t rtc_lock;
 
 /* these are C runtime functions with special calling conventions: */
 extern void __divl (void);
@@ -48,6 +49,8 @@
 extern void __divqu (void);
 extern void __remqu (void);
 
+EXPORT_SYMBOL(init_mm);
+
 EXPORT_SYMBOL(alpha_mv);
 EXPORT_SYMBOL(enable_irq);
 EXPORT_SYMBOL(disable_irq);
@@ -160,6 +163,7 @@
 EXPORT_SYMBOL(flush_tlb_mm);
 EXPORT_SYMBOL(flush_tlb_page);
 EXPORT_SYMBOL(flush_tlb_range);
+EXPORT_SYMBOL(smp_imb);
 EXPORT_SYMBOL(cpu_data);
 EXPORT_SYMBOL(cpu_number_map);
 EXPORT_SYMBOL(global_bh_lock);
@@ -184,6 +188,8 @@
 EXPORT_SYMBOL(local_bh_count);
 EXPORT_SYMBOL(local_irq_count);
 #endif /* __SMP__ */
+
+EXPORT_SYMBOL(rtc_lock);
 
 /*
  * The following are special because they're not called
diff -urN 2.2.15pre16/arch/alpha/kernel/irq.h 2.2.15pre16aa3/arch/alpha/kernel/irq.h
--- 2.2.15pre16/arch/alpha/kernel/irq.h	Wed Mar 29 14:59:28 2000
+++ 2.2.15pre16aa3/arch/alpha/kernel/irq.h	Thu Mar 30 16:00:57 2000
@@ -44,7 +44,7 @@
 }
 
 #define RTC_IRQ    8
-#ifdef CONFIG_RTC
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 #define TIMER_IRQ  0			 /* timer is the pit */
 #else
 #define TIMER_IRQ  RTC_IRQ		 /* timer is the rtc */
diff -urN 2.2.15pre16/arch/alpha/kernel/process.c 2.2.15pre16aa3/arch/alpha/kernel/process.c
--- 2.2.15pre16/arch/alpha/kernel/process.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/alpha/kernel/process.c	Thu Mar 30 16:00:57 2000
@@ -30,7 +30,7 @@
 #include <linux/reboot.h>
 #include <linux/console.h>
 
-#ifdef CONFIG_RTC
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 #include <linux/mc146818rtc.h>
 #endif
 
@@ -150,7 +150,7 @@
 	}
 #endif /* __SMP__ */
 
-#ifdef CONFIG_RTC
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 	/* Reset rtc to defaults.  */
 	{
 		unsigned char control;
diff -urN 2.2.15pre16/arch/alpha/kernel/setup.c 2.2.15pre16aa3/arch/alpha/kernel/setup.c
--- 2.2.15pre16/arch/alpha/kernel/setup.c	Wed Jan  5 14:16:51 2000
+++ 2.2.15pre16aa3/arch/alpha/kernel/setup.c	Thu Mar 30 16:00:57 2000
@@ -25,8 +25,9 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/string.h>
+#include <linux/bigmem.h>
 
-#ifdef CONFIG_RTC
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 #include <linux/timex.h>
 #endif
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -277,8 +278,18 @@
 		if (initrd_end > *memory_end_p) {
 			printk("initrd extends beyond end of memory "
 			       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
-			       initrd_end, (unsigned long) memory_end_p);
+			       initrd_end, *memory_end_p);
 			initrd_start = initrd_end = 0;
+		} else {
+			/* move initrd from the middle of the RAM to the
+			   start of the RAM so we won't risk to rewrite
+			   initrd while allocating the memory at boot time */
+			memmove((char *) *memory_start_p,
+				(char *) initrd_start, INITRD_SIZE);
+			initrd_start = *memory_start_p;
+			initrd_end = initrd_start + INITRD_SIZE;
+			*memory_start_p = PAGE_ALIGN(initrd_end);
+			initrd_below_start_ok = 1;
 		}
 	}
 #endif
@@ -292,7 +303,7 @@
 	/* ??? There is some circumstantial evidence that this needs
 	   to be done now rather than later in time_init, which would
 	   be more natural.  Someone please explain or refute.  */
-#if defined(CONFIG_RTC)
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 	rtc_init_pit();
 #else
 	alpha_mv.init_pit();
@@ -352,16 +363,42 @@
 			high = tmp;
 	}
 
-	/* Round it up to an even number of pages. */
-	high = (high + PAGE_SIZE) & (PAGE_MASK*2);
+#ifndef CONFIG_BIGMEM
+#define MAX_MEMORY 0x80000000UL
+#else
+#define LOW_MEMORY 0x80000000UL
+#define MAX_MEMORY (VMALLOC_START-PAGE_OFFSET)
+#endif
 
 	/* Enforce maximum of 2GB even if there is more,
 	 * but only if the platform (support) cannot handle it.
 	 */
-	if (high > 0x80000000UL) {
-		printk("Cropping memory from %luMB to 2048MB\n", high >> 20);
-		high = 0x80000000UL;
+	if (high > MAX_MEMORY) {
+		printk("Cropping memory from %luMB to %luMB\n",
+		       high>>20, MAX_MEMORY>>20);
+		high = MAX_MEMORY;
+	}
+
+#ifdef CONFIG_BIGMEM
+	bigmem_start = bigmem_end = high;
+	if (high > LOW_MEMORY)
+	{
+		high = bigmem_start = LOW_MEMORY;
+		printk(KERN_NOTICE "%luMB BIGMEM available\n",
+		       (bigmem_end-bigmem_start)>>20);
 	}
+#ifdef BIGMEM_DEBUG
+	else
+	{
+		high -= high/4;
+		bigmem_start = high;
+		printk(KERN_NOTICE "emulating %luMB BIGMEM\n",
+		       (bigmem_end-bigmem_start)>>20);
+	}
+#endif
+	bigmem_start += PAGE_OFFSET;
+	bigmem_end += PAGE_OFFSET;
+#endif
 
 	return (unsigned long) __va(high);
 }
diff -urN 2.2.15pre16/arch/alpha/kernel/smp.c 2.2.15pre16aa3/arch/alpha/kernel/smp.c
--- 2.2.15pre16/arch/alpha/kernel/smp.c	Wed Jan  5 14:16:51 2000
+++ 2.2.15pre16aa3/arch/alpha/kernel/smp.c	Thu Mar 30 16:00:56 2000
@@ -693,7 +693,7 @@
 		return -EBUSY;
 
 	while (*(void **)lock)
-		schedule();
+		barrier();
 	goto again;
 }
 
@@ -836,6 +836,22 @@
 	}
 
 	return 0;
+}
+
+static void
+ipi_imb(void *ignored)
+{
+	imb();
+}
+
+void
+smp_imb(void)
+{
+	/* Must wait other processors to flush their icache before continue. */
+	if (smp_call_function(ipi_imb, NULL, 1, 1))
+		printk(KERN_CRIT "smp_imb: timed out\n");
+
+	imb();
 }
 
 static void
diff -urN 2.2.15pre16/arch/alpha/kernel/sys_nautilus.c 2.2.15pre16aa3/arch/alpha/kernel/sys_nautilus.c
--- 2.2.15pre16/arch/alpha/kernel/sys_nautilus.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/alpha/kernel/sys_nautilus.c	Thu Mar 30 16:00:57 2000
@@ -89,7 +89,7 @@
 nautilus_kill_arch (int mode, char *restart_cmd)
 {
 
-#ifdef CONFIG_RTC
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 	/* Reset rtc to defaults.  */
 	{
 		unsigned char control;
diff -urN 2.2.15pre16/arch/alpha/kernel/time.c 2.2.15pre16aa3/arch/alpha/kernel/time.c
--- 2.2.15pre16/arch/alpha/kernel/time.c	Sun Jan  2 18:26:32 2000
+++ 2.2.15pre16aa3/arch/alpha/kernel/time.c	Thu Mar 30 16:00:57 2000
@@ -47,6 +47,7 @@
 
 static int set_rtc_mmss(unsigned long);
 
+spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
 
 /*
  * Shift amount by which scaled_ticks_per_cycle is scaled.  Shifting
@@ -173,7 +174,7 @@
  * drivers depend on them being initialized (e.g., joystick driver).
  */
 
-#ifdef CONFIG_RTC
+#if defined(CONFIG_RTC) && !defined(CONFIG_RTC_LIGHT)
 void
 rtc_init_pit (void)
 {
@@ -326,6 +327,20 @@
 	irq_handler = timer_interrupt;
 	if (request_irq(TIMER_IRQ, irq_handler, 0, "timer", NULL))
 		panic("Could not allocate timer IRQ!");
+	do_get_fast_time = do_gettimeofday;
+}
+
+static inline void
+timeval_normalize(struct timeval * tv)
+{
+	time_t __sec;
+
+	__sec = tv->tv_usec / 1000000;
+	if (__sec)
+	{
+		tv->tv_usec %= 1000000;
+		tv->tv_sec += __sec;
+	}
 }
 
 /*
@@ -376,13 +391,11 @@
 #endif
 
 	usec += delta_usec;
-	if (usec >= 1000000) {
-		sec += 1;
-		usec -= 1000000;
-	}
 
 	tv->tv_sec = sec;
 	tv->tv_usec = usec;
+
+	timeval_normalize(tv);
 }
 
 void
@@ -443,6 +456,8 @@
 	int real_seconds, real_minutes, cmos_minutes;
 	unsigned char save_control, save_freq_select;
 
+	/* irq are locally disabled here */
+	spin_lock(&rtc_lock);
 	/* Tell the clock it's being set */
 	save_control = CMOS_READ(RTC_CONTROL);
 	CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
@@ -492,6 +507,7 @@
 	 */
 	CMOS_WRITE(save_control, RTC_CONTROL);
 	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+	spin_unlock(&rtc_lock);
 
 	return retval;
 }
diff -urN 2.2.15pre16/arch/alpha/mm/fault.c 2.2.15pre16aa3/arch/alpha/mm/fault.c
--- 2.2.15pre16/arch/alpha/mm/fault.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/alpha/mm/fault.c	Thu Mar 30 16:00:57 2000
@@ -102,7 +102,7 @@
 		goto good_area;
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 		goto bad_area;
-	if (expand_stack(vma, address))
+	if (expand_stack(vma, address, NULL))
 		goto bad_area;
 /*
  * Ok, we have a good vm_area for this memory access, so
diff -urN 2.2.15pre16/arch/alpha/mm/init.c 2.2.15pre16aa3/arch/alpha/mm/init.c
--- 2.2.15pre16/arch/alpha/mm/init.c	Wed Jan  5 14:16:51 2000
+++ 2.2.15pre16aa3/arch/alpha/mm/init.c	Thu Mar 30 16:00:57 2000
@@ -18,6 +18,7 @@
 #ifdef CONFIG_BLK_DEV_INITRD
 #include <linux/blk.h>
 #endif
+#include <linux/bigmem.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -30,6 +31,11 @@
 extern void die_if_kernel(char *,struct pt_regs *,long);
 extern void show_net_buffers(void);
 
+static unsigned long totalram_pages, totalbig_pages;
+
+#ifdef CONFIG_BIGMEM
+unsigned long bigmem_start, bigmem_end;
+#endif
 struct thread_struct original_pcb;
 
 #ifndef __SMP__
@@ -196,7 +202,11 @@
 	struct thread_struct *original_pcb_ptr;
 
 	/* initialize mem_map[] */
+#ifndef CONFIG_BIGMEM
 	start_mem = free_area_init(start_mem, end_mem);
+#else
+	start_mem = free_area_init(start_mem, bigmem_end);
+#endif
 
 	/* find free clusters, update mem_map[] accordingly */
 	memdesc = (struct memdesc_struct *)
@@ -304,9 +314,20 @@
 mem_init(unsigned long start_mem, unsigned long end_mem)
 {
 	unsigned long tmp;
+	unsigned long reservedpages = 0;
 
+#ifdef CONFIG_BIGMEM
+	bigmem_start = PAGE_ALIGN(bigmem_start);
+	bigmem_end &= PAGE_MASK;
+#endif
 	end_mem &= PAGE_MASK;
+#ifndef CONFIG_BIGMEM
 	max_mapnr = num_physpages = MAP_NR(end_mem);
+#else
+	max_mapnr = num_physpages = MAP_NR(bigmem_end);
+	/* cache the bigmem_mapnr */
+	bigmem_mapnr = MAP_NR(bigmem_start);
+#endif
 	high_memory = (void *) end_mem;
 	start_mem = PAGE_ALIGN(start_mem);
 
@@ -323,17 +344,33 @@
 		if (tmp >= MAX_DMA_ADDRESS)
 			clear_bit(PG_DMA, &mem_map[MAP_NR(tmp)].flags);
 		if (PageReserved(mem_map+MAP_NR(tmp)))
+		{
+			reservedpages++;
 			continue;
+		}
 		atomic_set(&mem_map[MAP_NR(tmp)].count, 1);
 #ifdef CONFIG_BLK_DEV_INITRD
-		if (initrd_start && tmp >= initrd_start && tmp < initrd_end)
+		if (initrd_start && tmp >= (initrd_start & PAGE_MASK) && tmp < initrd_end)
 			continue;
 #endif
 		kill_page(tmp);
 		free_page(tmp);
 	}
-	tmp = nr_free_pages << (PAGE_SHIFT - 10);
+#ifdef CONFIG_BIGMEM
+	for (tmp = bigmem_start; tmp < bigmem_end;  tmp += PAGE_SIZE)
+	{
+		clear_bit(PG_reserved, &mem_map[MAP_NR(tmp)].flags);
+		set_bit(PG_BIGMEM, &mem_map[MAP_NR(tmp)].flags);
+		atomic_set(&mem_map[MAP_NR(tmp)].count, 1);
+		kill_page(tmp);
+		free_page(tmp);
+		totalbig_pages++;
+	}
+#endif
+	tmp = (unsigned long) nr_free_pages << (PAGE_SHIFT - 10);
 	printk("Memory: %luk available\n", tmp);
+
+	totalram_pages = max_mapnr - reservedpages;
 	return;
 }
 
@@ -357,22 +394,11 @@
 void
 si_meminfo(struct sysinfo *val)
 {
-	int i;
-
-	i = max_mapnr;
-	val->totalram = 0;
+	val->totalram = totalram_pages << PAGE_SHIFT;
 	val->sharedram = 0;
 	val->freeram = ((unsigned long)nr_free_pages) << PAGE_SHIFT;
 	val->bufferram = buffermem;
-	while (i-- > 0)  {
-		if (PageReserved(mem_map+i))
-			continue;
-		val->totalram++;
-		if (!atomic_read(&mem_map[i].count))
-			continue;
-		val->sharedram += atomic_read(&mem_map[i].count) - 1;
-	}
-	val->totalram <<= PAGE_SHIFT;
-	val->sharedram <<= PAGE_SHIFT;
+	val->totalbig = totalbig_pages << PAGE_SHIFT;
+	val->freebig = (unsigned long) nr_free_bigpages << PAGE_SHIFT;
 	return;
 }
diff -urN 2.2.15pre16/arch/alpha/vmlinux.lds 2.2.15pre16aa3/arch/alpha/vmlinux.lds
--- 2.2.15pre16/arch/alpha/vmlinux.lds	Mon Jan 17 16:44:33 2000
+++ 2.2.15pre16aa3/arch/alpha/vmlinux.lds	Thu Mar 30 16:00:56 2000
@@ -39,9 +39,11 @@
   .got : { *(.got) }
   .sdata : { *(.sdata) }
   _edata = .;
-  _bss = .;
+
+  __bss_start = .;
   .sbss : { *(.sbss) *(.scommon) }
   .bss : { *(.bss) *(COMMON) }
+  __bss_stop = .;
   _end = .;
 
   .mdebug 0 : { *(.mdebug) }
diff -urN 2.2.15pre16/arch/i386/config.in 2.2.15pre16aa3/arch/i386/config.in
--- 2.2.15pre16/arch/i386/config.in	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/i386/config.in	Thu Mar 30 16:00:57 2000
@@ -54,6 +54,7 @@
 mainmenu_option next_comment
 comment 'General setup'
 
+bool 'BIGMEM support' CONFIG_BIGMEM
 bool 'Networking support' CONFIG_NET
 bool 'PCI support' CONFIG_PCI
 if [ "$CONFIG_PCI" = "y" ]; then
diff -urN 2.2.15pre16/arch/i386/kernel/entry.S 2.2.15pre16aa3/arch/i386/kernel/entry.S
--- 2.2.15pre16/arch/i386/kernel/entry.S	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/i386/kernel/entry.S	Thu Mar 30 16:00:57 2000
@@ -564,6 +564,14 @@
 	.long SYMBOL_NAME(sys_ni_syscall)		/* streams1 */
 	.long SYMBOL_NAME(sys_ni_syscall)		/* streams2 */
 	.long SYMBOL_NAME(sys_vfork)            /* 190 */
+	.long SYMBOL_NAME(sys_ni_syscall)
+	.long SYMBOL_NAME(sys_mmap2)
+	.long SYMBOL_NAME(sys_truncate64)
+	.long SYMBOL_NAME(sys_ftruncate64)
+	.long SYMBOL_NAME(sys_stat64)		/* 195 */
+	.long SYMBOL_NAME(sys_lstat64)
+	.long SYMBOL_NAME(sys_fstat64)
+
 
 	/*
 	 * NOTE!! This doesn't have to be exact - we just have
@@ -571,6 +579,6 @@
 	 * entries. Don't panic if you notice that this hasn't
 	 * been shrunk every time we add a new system call.
 	 */
-	.rept NR_syscalls-190
+	.rept NR_syscalls-197
 		.long SYMBOL_NAME(sys_ni_syscall)
 	.endr
diff -urN 2.2.15pre16/arch/i386/kernel/head.S 2.2.15pre16aa3/arch/i386/kernel/head.S
--- 2.2.15pre16/arch/i386/kernel/head.S	Mon Jan 17 16:44:33 2000
+++ 2.2.15pre16aa3/arch/i386/kernel/head.S	Thu Mar 30 16:00:56 2000
@@ -83,22 +83,12 @@
 
 #ifdef __SMP__
 	orw  %bx,%bx
-	jz  1f				/* Initial CPU cleans BSS */
+	jz  1f				/* Initial CPU setup the system */
 	pushl $0
 	popfl
 	jmp checkCPUtype
 1:
 #endif __SMP__
-/*
- * Clear BSS first so that there are no surprises...
- */
-	xorl %eax,%eax
-	movl $ SYMBOL_NAME(__bss_start),%edi
-	movl $ SYMBOL_NAME(_end),%ecx
-	subl %edi,%ecx
-	cld
-	rep
-	stosb
 /*
  * start system 32-bit setup. We need to re-do some of the things done
  * in 16-bit mode for the "real" operations.
diff -urN 2.2.15pre16/arch/i386/kernel/i386_ksyms.c 2.2.15pre16aa3/arch/i386/kernel/i386_ksyms.c
--- 2.2.15pre16/arch/i386/kernel/i386_ksyms.c	Mon Jan 17 16:44:33 2000
+++ 2.2.15pre16aa3/arch/i386/kernel/i386_ksyms.c	Thu Mar 30 16:00:56 2000
@@ -20,6 +20,7 @@
 
 extern void dump_thread(struct pt_regs *, struct user *);
 extern int dump_fpu(elf_fpregset_t *);
+extern spinlock_t rtc_lock;
 
 #if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
 extern struct drive_info_struct drive_info;
@@ -119,3 +120,5 @@
 #ifdef CONFIG_VT
 EXPORT_SYMBOL(screen_info);
 #endif
+
+EXPORT_SYMBOL(rtc_lock);
diff -urN 2.2.15pre16/arch/i386/kernel/irq.c 2.2.15pre16aa3/arch/i386/kernel/irq.c
--- 2.2.15pre16/arch/i386/kernel/irq.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/i386/kernel/irq.c	Thu Mar 30 16:00:56 2000
@@ -971,8 +971,24 @@
 	unsigned int i;
 	unsigned long delay;
 
+	/* 
+	 * something may have generated an irq long ago and we want to
+	 * flush such a longstanding irq before considering it as spurious. 
+	 */
+	spin_lock_irq(&irq_controller_lock);
+	for (i = NR_IRQS-1; i > 0; i--) 
+		if (!irq_desc[i].action) 
+			irq_desc[i].handler->startup(i);
+	spin_unlock_irq(&irq_controller_lock);
+
+	/* Wait for longstanding interrupts to trigger. */
+	for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+		/* about 20ms delay */ synchronize_irq();
+
 	/*
-	 * first, enable any unassigned irqs
+	 * enable any unassigned irqs
+	 * (we must startup again here because if a longstanding irq
+	 * happened in the previous stage, it may have masked itself)
 	 */
 	spin_lock_irq(&irq_controller_lock);
 	for (i = NR_IRQS-1; i > 0; i--) {
diff -urN 2.2.15pre16/arch/i386/kernel/ptrace.c 2.2.15pre16aa3/arch/i386/kernel/ptrace.c
--- 2.2.15pre16/arch/i386/kernel/ptrace.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/i386/kernel/ptrace.c	Thu Mar 30 16:00:57 2000
@@ -12,6 +12,7 @@
 #include <linux/ptrace.h>
 #include <linux/user.h>
 #include <linux/sys.h> 
+#include <linux/bigmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -81,6 +82,7 @@
 	pmd_t * pgmiddle;
 	pte_t * pgtable;
 	unsigned long page;
+	unsigned long retval;
 	int fault;
 
 repeat:
@@ -126,7 +128,10 @@
 	if (MAP_NR(page) >= max_mapnr)
 		return 0;
 	page += addr & ~PAGE_MASK;
-	return *(unsigned long *) page;
+	page = kmap(page, KM_READ);
+	retval = *(unsigned long *) page;
+	kunmap(page, KM_READ);
+	return retval;
 }
 
 /*
@@ -196,7 +201,13 @@
 	}
 /* this is a hack for non-kernel-mapped video buffers and similar */
 	if (MAP_NR(page) < max_mapnr)
-		*(unsigned long *) (page + (addr & ~PAGE_MASK)) = data;
+	{
+		unsigned long vaddr;
+
+		vaddr = kmap(page, KM_WRITE);
+		*(unsigned long *) (vaddr + (addr & ~PAGE_MASK)) = data;
+		kunmap(vaddr, KM_WRITE);
+	}
 /* we're bypassing pagetables, so we have to set the dirty bit ourselves */
 /* this should also re-instate whatever read-only mode there was before */
 	set_pte(pgtable, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
diff -urN 2.2.15pre16/arch/i386/kernel/setup.c 2.2.15pre16aa3/arch/i386/kernel/setup.c
--- 2.2.15pre16/arch/i386/kernel/setup.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/i386/kernel/setup.c	Thu Mar 30 16:00:57 2000
@@ -23,6 +23,8 @@
  *
  *	Improved Intel cache detection.
  *	Dave Jones <dave@powertweak.com>, October 1999
+ *
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
 /*
@@ -48,6 +50,7 @@
 #ifdef CONFIG_BLK_DEV_RAM
 #include <linux/blk.h>
 #endif
+#include <linux/bigmem.h>
 #include <asm/processor.h>
 #include <linux/console.h>
 #include <asm/uaccess.h>
@@ -377,12 +380,31 @@
 #define VMALLOC_RESERVE	(64 << 20)	/* 64MB for vmalloc */
 #define MAXMEM	((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
 
+#ifdef CONFIG_BIGMEM
+	bigmem_start = bigmem_end = memory_end;
+#endif
 	if (memory_end > MAXMEM)
 	{
+#ifdef CONFIG_BIGMEM
+#define MAXBIGMEM ((unsigned long)(~(VMALLOC_RESERVE-1)))
+		bigmem_start = MAXMEM;
+		bigmem_end = (memory_end < MAXBIGMEM) ? memory_end : MAXBIGMEM;
+#endif
 		memory_end = MAXMEM;
+#ifdef CONFIG_BIGMEM
+		printk(KERN_NOTICE "%ldMB BIGMEM available.\n",
+			(bigmem_end-bigmem_start)>>20);
+#else
 		printk(KERN_WARNING "Warning only %ldMB will be used.\n",
 			MAXMEM>>20);
+#endif
 	}
+#if defined(CONFIG_BIGMEM) && defined(BIGMEM_DEBUG)
+	else {
+		memory_end -= memory_end/4;
+		bigmem_start = memory_end;
+	}
+#endif
 
 	memory_end += PAGE_OFFSET;
 	*memory_start_p = memory_start;
diff -urN 2.2.15pre16/arch/i386/kernel/smp.c 2.2.15pre16aa3/arch/i386/kernel/smp.c
--- 2.2.15pre16/arch/i386/kernel/smp.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/i386/kernel/smp.c	Thu Mar 30 16:00:57 2000
@@ -795,7 +795,6 @@
 	return memory_start;
 }
 
-#ifdef CONFIG_X86_TSC
 /*
  * TSC synchronization.
  *
@@ -995,8 +994,6 @@
 }
 #undef NR_LOOPS
 
-#endif
-
 extern void calibrate_delay(void);
 
 void __init smp_callin(void)
@@ -1083,12 +1080,11 @@
 	 */
 	set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]);
 
-#ifdef CONFIG_X86_TSC
 	/*
 	 *	Synchronize the TSC with the BP
 	 */
- 	synchronize_tsc_ap ();
-#endif
+	if (boot_cpu_data.x86_capability & X86_FEATURE_TSC)
+		synchronize_tsc_ap ();
 }
 
 int cpucount = 0;
@@ -1624,13 +1620,11 @@
 
 smp_done:
 
-#ifdef CONFIG_X86_TSC
 	/*
 	 * Synchronize the TSC with the AP
 	 */
-	if (cpucount)
+	if (boot_cpu_data.x86_capability & X86_FEATURE_TSC && cpucount)
 	 	synchronize_tsc_bp();
-#endif
 }
 
 /*
diff -urN 2.2.15pre16/arch/i386/kernel/sys_i386.c 2.2.15pre16aa3/arch/i386/kernel/sys_i386.c
--- 2.2.15pre16/arch/i386/kernel/sys_i386.c	Mon Jan 17 16:44:33 2000
+++ 2.2.15pre16aa3/arch/i386/kernel/sys_i386.c	Thu Mar 30 16:00:58 2000
@@ -41,6 +41,43 @@
 	return error;
 }
 
+/* common code for old and new mmaps */
+static inline long do_mmap2(
+	unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags,
+	unsigned long fd, unsigned long pgoff)
+{
+	int error = -EBADF;
+	struct file * file = NULL;
+
+	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+	if (!(flags & MAP_ANONYMOUS)) {
+		file = fget(fd);
+		if (!file)
+			goto out;
+	}
+
+	down(&current->mm->mmap_sem);
+	lock_kernel();
+
+	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+
+	unlock_kernel();
+	up(&current->mm->mmap_sem);
+
+	if (file)
+		fput(file);
+out:
+	return error;
+}
+
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags,
+	unsigned long fd, unsigned long pgoff)
+{
+	return do_mmap2(addr, len, prot, flags, fd, pgoff);
+}
+
 /*
  * Perform the select(nd, in, out, ex, tv) and mmap() system
  * calls. Linux/i386 didn't use to be able to handle more than
@@ -59,30 +96,19 @@
 
 asmlinkage int old_mmap(struct mmap_arg_struct *arg)
 {
-	int error = -EFAULT;
-	struct file * file = NULL;
 	struct mmap_arg_struct a;
+	int err = -EFAULT;
 
 	if (copy_from_user(&a, arg, sizeof(a)))
-		return -EFAULT;
+		goto out;
 
-	down(&current->mm->mmap_sem);
-	lock_kernel();
-	if (!(a.flags & MAP_ANONYMOUS)) {
-		error = -EBADF;
-		file = fget(a.fd);
-		if (!file)
-			goto out;
-	}
-	a.flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+	err = -EINVAL;
+	if (a.offset & ~PAGE_MASK)
+		goto out;
 
-	error = do_mmap(file, a.addr, a.len, a.prot, a.flags, a.offset);
-	if (file)
-		fput(file);
+	err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
 out:
-	unlock_kernel();
-	up(&current->mm->mmap_sem);
-	return error;
+	return err;
 }
 
 extern asmlinkage int sys_select(int, fd_set *, fd_set *, fd_set *, struct timeval *);
diff -urN 2.2.15pre16/arch/i386/kernel/time.c 2.2.15pre16aa3/arch/i386/kernel/time.c
--- 2.2.15pre16/arch/i386/kernel/time.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/i386/kernel/time.c	Thu Mar 30 16:00:57 2000
@@ -77,6 +77,9 @@
 unsigned long fast_gettimeoffset_quotient=0;
 
 extern rwlock_t xtime_lock;
+extern volatile unsigned long lost_ticks;
+
+spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
 
 static inline unsigned long do_fast_gettimeoffset(void)
 {
@@ -112,6 +115,8 @@
 
 #ifndef CONFIG_X86_TSC
 
+spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
+
 /* This function must be called with interrupts disabled 
  * It was inspired by Steve McCanne's microtime-i386 for BSD.  -- jrs
  * 
@@ -156,6 +161,8 @@
 	 */
 	unsigned long jiffies_t;
 
+	/* gets recalled with irq locally disabled */
+	spin_lock(&i8253_lock);
 	/* timer count may underflow right here */
 	outb_p(0x00, 0x43);	/* latch the count ASAP */
 
@@ -214,6 +221,7 @@
 		}
 	} else
 		jiffies_p = jiffies_t;
+	spin_unlock(&i8253_lock);
 
 	count_p = count;
 
@@ -231,13 +239,26 @@
 
 #endif
 
+/* FIXME: should be inline but gcc is buggy and breaks */
+static void
+timeval_normalize(struct timeval * tv)
+{
+	time_t __sec;
+
+	__sec = tv->tv_usec / 1000000;
+	if (__sec)
+	{
+		tv->tv_usec %= 1000000;
+		tv->tv_sec += __sec;
+	}
+}
+
 /*
  * This version of gettimeofday has microsecond resolution
  * and better than microsecond precision on fast x86 machines with TSC.
  */
 void do_gettimeofday(struct timeval *tv)
 {
-	extern volatile unsigned long lost_ticks;
 	unsigned long flags;
 	unsigned long usec, sec;
 
@@ -252,13 +273,10 @@
 	usec += xtime.tv_usec;
 	read_unlock_irqrestore(&xtime_lock, flags);
 
-	while (usec >= 1000000) {
-		usec -= 1000000;
-		sec++;
-	}
-
 	tv->tv_sec = sec;
 	tv->tv_usec = usec;
+
+	timeval_normalize(tv);
 }
 
 void do_settimeofday(struct timeval *tv)
@@ -271,6 +289,7 @@
 	 * would have done, and then undo it!
 	 */
 	tv->tv_usec -= do_gettimeoffset();
+	tv->tv_usec -= lost_ticks * (1000000 / HZ);
 
 	while (tv->tv_usec < 0) {
 		tv->tv_usec += 1000000;
@@ -301,6 +320,8 @@
 	int real_seconds, real_minutes, cmos_minutes;
 	unsigned char save_control, save_freq_select;
 
+	/* gets recalled with irq locally disabled */
+	spin_lock(&rtc_lock);
 	save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */
 	CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
 
@@ -346,6 +367,7 @@
 	 */
 	CMOS_WRITE(save_control, RTC_CONTROL);
 	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+	spin_unlock(&rtc_lock);
 
 	return retval;
 }
@@ -447,10 +469,19 @@
 
 		rdtscl(last_tsc_low);
 
+#if 0 /*
+       * SUBTLE: this is not necessary from here because it's implicit in the
+       * write xtime_lock.
+       */
+		spin_lock(&i8253_lock);
+#endif
 		outb_p(0x00, 0x43);     /* latch the count ASAP */
 
 		count = inb_p(0x40);    /* read the latched count */
 		count |= inb(0x40) << 8;
+#if 0
+		spin_unlock(&i8253_lock);
+#endif
 
 		count = ((LATCH-1) - count) * TICK_SIZE;
 		delay_at_last_interrupt = (count + LATCH/2) / LATCH;
diff -urN 2.2.15pre16/arch/i386/mm/Makefile 2.2.15pre16aa3/arch/i386/mm/Makefile
--- 2.2.15pre16/arch/i386/mm/Makefile	Mon Jan 18 02:28:56 1999
+++ 2.2.15pre16aa3/arch/i386/mm/Makefile	Thu Mar 30 16:00:57 2000
@@ -10,4 +10,8 @@
 O_TARGET := mm.o
 O_OBJS	 := init.o fault.o ioremap.o extable.o
 
+ifeq ($(CONFIG_BIGMEM),y)
+O_OBJS += bigmem.o
+endif
+
 include $(TOPDIR)/Rules.make
diff -urN 2.2.15pre16/arch/i386/mm/bigmem.c 2.2.15pre16aa3/arch/i386/mm/bigmem.c
--- 2.2.15pre16/arch/i386/mm/bigmem.c	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/arch/i386/mm/bigmem.c	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,35 @@
+/*
+ * BIGMEM IA32 code and variables.
+ *
+ * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
+ *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
+ */
+
+#include <linux/mm.h>
+#include <linux/bigmem.h>
+
+unsigned long bigmem_start, bigmem_end;
+
+/* NOTE: fixmap_init alloc all the fixmap pagetables contigous on the
+   physical space so we can cache the place of the first one and move
+   around without checking the pgd every time. */
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+#define kmap_get_fixmap_pte(vaddr)					\
+	pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
+
+void __init kmap_init(void)
+{
+	unsigned long kmap_vstart;
+
+	/* cache the first kmap pte */
+	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+
+	kmap_prot = PAGE_KERNEL;
+#if 0
+	if (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
+		pgprot_val(kmap_prot) |= _PAGE_GLOBAL;
+#endif
+}
diff -urN 2.2.15pre16/arch/i386/mm/fault.c 2.2.15pre16aa3/arch/i386/mm/fault.c
--- 2.2.15pre16/arch/i386/mm/fault.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/i386/mm/fault.c	Thu Mar 30 16:00:57 2000
@@ -29,13 +29,13 @@
  */
 int __verify_write(const void * addr, unsigned long size)
 {
-	struct vm_area_struct * vma;
+	struct vm_area_struct * vma, * prev_vma;
 	unsigned long start = (unsigned long) addr;
 
 	if (!size)
 		return 1;
 
-	vma = find_vma(current->mm, start);
+	vma = find_vma_prev(current->mm, start, &prev_vma);
 	if (!vma)
 		goto bad_area;
 	if (vma->vm_start > start)
@@ -75,7 +75,7 @@
 check_stack:
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 		goto bad_area;
-	if (expand_stack(vma, start) == 0)
+	if (expand_stack(vma, start, prev_vma) == 0)
 		goto good_area;
 
 bad_area:
@@ -112,7 +112,7 @@
 {
 	struct task_struct *tsk;
 	struct mm_struct *mm;
-	struct vm_area_struct * vma;
+	struct vm_area_struct * vma, * prev_vma;
 	unsigned long address;
 	unsigned long page;
 	unsigned long fixup;
@@ -133,7 +133,7 @@
 
 	down(&mm->mmap_sem);
 
-	vma = find_vma(mm, address);
+	vma = find_vma_prev(mm, address, &prev_vma);
 	if (!vma)
 		goto bad_area;
 	if (vma->vm_start <= address)
@@ -150,7 +150,7 @@
 		if (address + 32 < regs->esp)
 			goto bad_area;
 	}
-	if (expand_stack(vma, address))
+	if (expand_stack(vma, address, prev_vma))
 		goto bad_area;
 /*
  * Ok, we have a good vm_area for this memory access, so
diff -urN 2.2.15pre16/arch/i386/mm/init.c 2.2.15pre16aa3/arch/i386/mm/init.c
--- 2.2.15pre16/arch/i386/mm/init.c	Sat Oct 23 15:31:08 1999
+++ 2.2.15pre16aa3/arch/i386/mm/init.c	Thu Mar 30 16:00:57 2000
@@ -2,6 +2,8 @@
  *  linux/arch/i386/mm/init.c
  *
  *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
 #include <linux/config.h>
@@ -20,6 +22,7 @@
 #ifdef CONFIG_BLK_DEV_INITRD
 #include <linux/blk.h>
 #endif
+#include <linux/bigmem.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -28,6 +31,8 @@
 #include <asm/dma.h>
 #include <asm/fixmap.h>
 
+static int totalram_pages, totalbig_pages;
+
 extern void show_net_buffers(void);
 extern unsigned long init_smp_mappings(unsigned long);
 
@@ -148,6 +153,7 @@
 {
 	int i,free = 0,total = 0,reserved = 0;
 	int shared = 0, cached = 0;
+	int bigmem = 0;
 
 	printk("Mem-info:\n");
 	show_free_areas();
@@ -155,6 +161,8 @@
 	i = max_mapnr;
 	while (i-- > 0) {
 		total++;
+		if (PageBIGMEM(mem_map+i))
+			bigmem++;
 		if (PageReserved(mem_map+i))
 			reserved++;
 		else if (PageSwapCache(mem_map+i))
@@ -165,6 +173,7 @@
 			shared += atomic_read(&mem_map[i].count) - 1;
 	}
 	printk("%d pages of RAM\n",total);
+	printk("%d pages of BIGMEM\n",bigmem);
 	printk("%d reserved pages\n",reserved);
 	printk("%d pages shared\n",shared);
 	printk("%d pages swap cached\n",cached);
@@ -344,7 +353,12 @@
 #endif
 	local_flush_tlb();
 
+#ifndef CONFIG_BIGMEM
 	return free_area_init(start_mem, end_mem);
+#else
+	kmap_init(); /* run after fixmap_init */
+	return free_area_init(start_mem, bigmem_end + PAGE_OFFSET);
+#endif
 }
 
 /*
@@ -396,8 +410,18 @@
 	unsigned long tmp;
 
 	end_mem &= PAGE_MASK;
+#ifdef CONFIG_BIGMEM
+	bigmem_start = PAGE_ALIGN(bigmem_start);
+	bigmem_end &= PAGE_MASK;
+#endif
 	high_memory = (void *) end_mem;
+#ifndef CONFIG_BIGMEM
 	max_mapnr = num_physpages = MAP_NR(end_mem);
+#else
+	max_mapnr = num_physpages = PHYSMAP_NR(bigmem_end);
+	/* cache the bigmem_mapnr */
+	bigmem_mapnr = PHYSMAP_NR(bigmem_start);
+#endif
 
 	/* clear the zero-page */
 	memset(empty_zero_page, 0, PAGE_SIZE);
@@ -452,16 +476,39 @@
 #endif
 			free_page(tmp);
 	}
-	printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
+#ifdef CONFIG_BIGMEM
+	for (tmp = bigmem_start; tmp < bigmem_end;  tmp += PAGE_SIZE) {
+		/*
+		  RMQUEUE_ORDER in page_alloc.c returns PAGE_OFFSET + tmp
+		  which cannot be allowed to be 0 since the callers of
+		  __get_free_pages treat 0 as an allocation failure.  To
+		  avoid this possibility, do not allow allocation of the
+		  BIGMEM page which would map to 0.
+
+		  Leonard N. Zubkoff, 30 October 1999
+		*/
+		if (tmp + PAGE_OFFSET != 0) {
+			clear_bit(PG_reserved, &mem_map[PHYSMAP_NR(tmp)].flags);
+			set_bit(PG_BIGMEM, &mem_map[PHYSMAP_NR(tmp)].flags);
+			atomic_set(&mem_map[PHYSMAP_NR(tmp)].count, 1);
+			free_page(tmp + PAGE_OFFSET);
+			totalbig_pages++;
+		}
+	}
+#endif
+	printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %dk bigmem)\n",
 		(unsigned long) nr_free_pages << (PAGE_SHIFT-10),
 		max_mapnr << (PAGE_SHIFT-10),
 		codepages << (PAGE_SHIFT-10),
 		reservedpages << (PAGE_SHIFT-10),
 		datapages << (PAGE_SHIFT-10),
-		initpages << (PAGE_SHIFT-10));
+		initpages << (PAGE_SHIFT-10),
+		totalbig_pages << (PAGE_SHIFT-10));
 
 	if (boot_cpu_data.wp_works_ok < 0)
 		test_wp_bit();
+
+	totalram_pages = max_mapnr - reservedpages;
 }
 
 void free_initmem(void)
@@ -479,22 +526,11 @@
 
 void si_meminfo(struct sysinfo *val)
 {
-	int i;
-
-	i = max_mapnr;
-	val->totalram = 0;
+	val->totalram = totalram_pages << PAGE_SHIFT;
 	val->sharedram = 0;
 	val->freeram = nr_free_pages << PAGE_SHIFT;
 	val->bufferram = buffermem;
-	while (i-- > 0)  {
-		if (PageReserved(mem_map+i))
-			continue;
-		val->totalram++;
-		if (!atomic_read(&mem_map[i].count))
-			continue;
-		val->sharedram += atomic_read(&mem_map[i].count) - 1;
-	}
-	val->totalram <<= PAGE_SHIFT;
-	val->sharedram <<= PAGE_SHIFT;
+	val->totalbig = totalbig_pages << PAGE_SHIFT;
+	val->freebig = nr_free_bigpages << PAGE_SHIFT;
 	return;
 }
diff -urN 2.2.15pre16/arch/i386/vmlinux.lds.S 2.2.15pre16aa3/arch/i386/vmlinux.lds.S
--- 2.2.15pre16/arch/i386/vmlinux.lds.S	Mon Jan 17 16:44:33 2000
+++ 2.2.15pre16aa3/arch/i386/vmlinux.lds.S	Thu Mar 30 16:00:56 2000
@@ -56,6 +56,7 @@
   .bss : {
 	*(.bss)
 	}
+  __bss_stop = .;
   _end = . ;
 
   /* Stabs debugging sections.  */
diff -urN 2.2.15pre16/arch/ppc/kernel/time.c 2.2.15pre16aa3/arch/ppc/kernel/time.c
--- 2.2.15pre16/arch/ppc/kernel/time.c	Mon Jan 17 16:44:35 2000
+++ 2.2.15pre16aa3/arch/ppc/kernel/time.c	Thu Mar 30 16:00:56 2000
@@ -139,6 +139,19 @@
 	hardirq_exit(cpu);
 }
 
+static inline void
+timeval_normalize(struct timeval * tv)
+{
+	time_t __sec;
+
+	__sec = tv->tv_usec / 1000000;
+	if (__sec)
+	{
+		tv->tv_usec %= 1000000;
+		tv->tv_sec += __sec;
+	}
+}
+
 /*
  * This version of gettimeofday has microsecond resolution.
  */
@@ -153,10 +166,7 @@
 #ifndef __SMP__
 	tv->tv_usec += (decrementer_count - get_dec())
 	    * count_period_num / count_period_den;
-	if (tv->tv_usec >= 1000000) {
-		tv->tv_usec -= 1000000;
-		tv->tv_sec++;
-	}
+	timeval_normalize(tv);
 #endif
 	restore_flags(flags);
 }
diff -urN 2.2.15pre16/arch/ppc/mm/fault.c 2.2.15pre16aa3/arch/ppc/mm/fault.c
--- 2.2.15pre16/arch/ppc/mm/fault.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/ppc/mm/fault.c	Thu Mar 30 16:00:57 2000
@@ -58,7 +58,7 @@
 void do_page_fault(struct pt_regs *regs, unsigned long address,
 		   unsigned long error_code)
 {
-	struct vm_area_struct * vma;
+	struct vm_area_struct * vma, * prev_vma;
 	struct mm_struct *mm = current->mm;
 	int fault;
 
@@ -89,14 +89,14 @@
 	}
 
 	down(&mm->mmap_sem);
-	vma = find_vma(mm, address);
+	vma = find_vma_prev(mm, address, &prev_vma);
 	if (!vma)
 		goto bad_area;
 	if (vma->vm_start <= address)
 		goto good_area;
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 		goto bad_area;
-	if (expand_stack(vma, address))
+	if (expand_stack(vma, address, prev_vma))
 		goto bad_area;
 
 good_area:
diff -urN 2.2.15pre16/arch/ppc/mm/init.c 2.2.15pre16aa3/arch/ppc/mm/init.c
--- 2.2.15pre16/arch/ppc/mm/init.c	Mon Jan 17 16:44:35 2000
+++ 2.2.15pre16aa3/arch/ppc/mm/init.c	Thu Mar 30 16:00:56 2000
@@ -1315,6 +1315,26 @@
 }
 #endif /* CONFIG_MBX */
 #ifndef CONFIG_8xx
+static void __init apply_ram_limit(struct mem_pieces * mp)
+{
+	int i;
+
+	for (i = 0; i < mp->n_regions; i++)
+	{
+		if (mp->regions[i].address >= __max_memory)
+		{
+			mp->n_regions = i;
+			break;
+		}
+		if (mp->regions[i].address+mp->regions[i].size > __max_memory)
+		{
+			mp->regions[i].size = __max_memory - mp->regions[i].address;
+			mp->n_regions = i+1;
+			break;
+		}
+	}			
+}
+
 /*
  * On systems with Open Firmware, collect information about
  * physical RAM and which pieces are already in use.
@@ -1380,6 +1400,7 @@
 	if (boot_infos == 0) {
 		/* record which bits the prom is using */
 		get_mem_prop("available", &phys_avail);
+		apply_ram_limit(&phys_avail);
 		prom_mem = phys_mem;
 		for (i = 0; i < phys_avail.n_regions; ++i)
 			remove_mem_piece(&prom_mem,
diff -urN 2.2.15pre16/arch/sparc/kernel/sys_sparc.c 2.2.15pre16aa3/arch/sparc/kernel/sys_sparc.c
--- 2.2.15pre16/arch/sparc/kernel/sys_sparc.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/sparc/kernel/sys_sparc.c	Thu Mar 30 16:00:58 2000
@@ -176,9 +176,9 @@
 }
 
 /* Linux version of mmap */
-asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len,
+asmlinkage unsigned long do_mmap2(unsigned long addr, unsigned long len,
 	unsigned long prot, unsigned long flags, unsigned long fd,
-	unsigned long off)
+	unsigned long pgoff)
 {
 	struct file * file = NULL;
 	unsigned long retval = -EBADF;
@@ -211,7 +211,7 @@
 		goto out_putf;
 
 	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
-	retval = do_mmap(file, addr, len, prot, flags, off);
+	retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
 
 out_putf:
 	if (file)
@@ -220,6 +220,22 @@
 	unlock_kernel();
 	up(&current->mm->mmap_sem);
 	return retval;
+}
+
+asmlinkage unsigned long sys_mmap2(unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags, unsigned long fd,
+	unsigned long pgoff)
+{
+	/* Make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE
+	   we have. */
+	return do_mmap2(addr, len, prot, flags, fd, pgoff >> (PAGE_SHIFT - 12));
+}
+
+asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags, unsigned long fd,
+	unsigned long off)
+{
+	return do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
 }
 
 /* we come to here via sys_nis_syscall so it can setup the regs argument */
diff -urN 2.2.15pre16/arch/sparc/kernel/systbls.S 2.2.15pre16aa3/arch/sparc/kernel/systbls.S
--- 2.2.15pre16/arch/sparc/kernel/systbls.S	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/sparc/kernel/systbls.S	Thu Mar 30 16:00:58 2000
@@ -29,12 +29,12 @@
 /*40*/	.long sys_newlstat, sys_dup, sys_pipe, sys_times, sys_nis_syscall
 /*45*/	.long sys_umount, sys_setgid, sys_getgid, sys_signal, sys_geteuid
 /*50*/	.long sys_getegid, sys_acct, sys_nis_syscall, sys_nis_syscall, sys_ioctl
-/*55*/	.long sys_reboot, sys_lfs_syscall, sys_symlink, sys_readlink, sys_execve
-/*60*/	.long sys_umask, sys_chroot, sys_newfstat, sys_lfs_syscall, sys_getpagesize
+/*55*/	.long sys_reboot, sys_mmap2, sys_symlink, sys_readlink, sys_execve
+/*60*/	.long sys_umask, sys_chroot, sys_newfstat, sys_fstat64, sys_getpagesize
 /*65*/	.long sys_msync, sys_vfork, sys_pread, sys_pwrite, sys_nis_syscall
 /*70*/	.long sys_nis_syscall, sys_mmap, sys_nis_syscall, sys_munmap, sys_mprotect
-/*75*/	.long sys_nis_syscall, sys_vhangup, sys_lfs_syscall, sys_nis_syscall, sys_getgroups
-/*80*/	.long sys_setgroups, sys_getpgrp, sys_nis_syscall, sys_setitimer, sys_lfs_syscall
+/*75*/	.long sys_nis_syscall, sys_vhangup, sys_truncate64, sys_nis_syscall, sys_getgroups
+/*80*/	.long sys_setgroups, sys_getpgrp, sys_nis_syscall, sys_setitimer, sys_ftruncate64
 /*85*/	.long sys_swapon, sys_getitimer, sys_nis_syscall, sys_sethostname, sys_nis_syscall
 /*90*/	.long sys_dup2, sys_nis_syscall, sys_fcntl, sys_select, sys_nis_syscall
 /*95*/	.long sys_fsync, sys_setpriority, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
@@ -44,8 +44,8 @@
 /*115*/	.long sys_nis_syscall, sys_gettimeofday, sys_getrusage, sys_nis_syscall, sys_getcwd
 /*120*/	.long sys_readv, sys_writev, sys_settimeofday, sys_fchown, sys_fchmod
 /*125*/	.long sys_nis_syscall, sys_setreuid, sys_setregid, sys_rename, sys_truncate
-/*130*/	.long sys_ftruncate, sys_flock, sys_lfs_syscall, sys_nis_syscall, sys_nis_syscall
-/*135*/	.long sys_nis_syscall, sys_mkdir, sys_rmdir, sys_utimes, sys_lfs_syscall
+/*130*/	.long sys_ftruncate, sys_flock, sys_lstat64, sys_nis_syscall, sys_nis_syscall
+/*135*/	.long sys_nis_syscall, sys_mkdir, sys_rmdir, sys_utimes, sys_stat64
 /*140*/	.long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_getrlimit
 /*145*/	.long sys_setrlimit, sys_nis_syscall, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
 /*150*/	.long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_nis_syscall
diff -urN 2.2.15pre16/arch/sparc64/kernel/sparc64_ksyms.c 2.2.15pre16aa3/arch/sparc64/kernel/sparc64_ksyms.c
--- 2.2.15pre16/arch/sparc64/kernel/sparc64_ksyms.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/sparc64/kernel/sparc64_ksyms.c	Thu Mar 30 16:00:58 2000
@@ -81,6 +81,7 @@
 extern int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
 extern int sys32_ioctl(unsigned int fd, unsigned int cmd, u32 arg);
 extern int (*handle_mathemu)(struct pt_regs *, struct fpustate *);
+extern long sparc32_open(const char * filename, int flags, int mode);
                 
 extern void bcopy (const char *, char *, int);
 extern int __ashrdi3(int, int);
@@ -267,6 +268,7 @@
 EXPORT_SYMBOL(prom_cpu_nodes);
 EXPORT_SYMBOL(sys_ioctl);
 EXPORT_SYMBOL(sys32_ioctl);
+EXPORT_SYMBOL(sparc32_open);
 EXPORT_SYMBOL(move_addr_to_kernel);
 EXPORT_SYMBOL(move_addr_to_user);
 #endif
diff -urN 2.2.15pre16/arch/sparc64/kernel/sys32.S 2.2.15pre16aa3/arch/sparc64/kernel/sys32.S
--- 2.2.15pre16/arch/sparc64/kernel/sys32.S	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/sparc64/kernel/sys32.S	Thu Mar 30 16:00:58 2000
@@ -60,3 +60,12 @@
 	sethi		%hi(sys_bdflush), %g1
 	jmpl		%g1 + %lo(sys_bdflush), %g0
 	 sra		%o1, 0, %o1
+
+	.align		32
+	.globl		sys32_mmap2
+sys32_mmap2:
+	srl		%o4, 0, %o4
+	sethi		%hi(sys_mmap), %g1
+	srl		%o5, 0, %o5
+	jmpl		%g1 + %lo(sys_mmap), %g0
+	 sllx		%o5, 12, %o5
diff -urN 2.2.15pre16/arch/sparc64/kernel/sys_sparc32.c 2.2.15pre16aa3/arch/sparc64/kernel/sys_sparc32.c
--- 2.2.15pre16/arch/sparc64/kernel/sys_sparc32.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/sparc64/kernel/sys_sparc32.c	Thu Mar 30 16:00:58 2000
@@ -579,6 +579,30 @@
 	return err;
 }
 
+static inline int get_flock64(struct flock *kfl, struct flock32_64 *ufl)
+{
+	int err;
+	
+	err = get_user(kfl->l_type, &ufl->l_type);
+	err |= __get_user(kfl->l_whence, &ufl->l_whence);
+	err |= __get_user(kfl->l_start, &ufl->l_start);
+	err |= __get_user(kfl->l_len, &ufl->l_len);
+	err |= __get_user(kfl->l_pid, &ufl->l_pid);
+	return err;
+}
+
+static inline int put_flock64(struct flock *kfl, struct flock32_64 *ufl)
+{
+	int err;
+	
+	err = __put_user(kfl->l_type, &ufl->l_type);
+	err |= __put_user(kfl->l_whence, &ufl->l_whence);
+	err |= __put_user(kfl->l_start, &ufl->l_start);
+	err |= __put_user(kfl->l_len, &ufl->l_len);
+	err |= __put_user(kfl->l_pid, &ufl->l_pid);
+	return err;
+}
+
 extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg);
 
 asmlinkage long sys32_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
@@ -597,10 +621,31 @@
 			old_fs = get_fs(); set_fs (KERNEL_DS);
 			ret = sys_fcntl(fd, cmd, (unsigned long)&f);
 			set_fs (old_fs);
+			if (f.l_start >= 0x7fffffffUL ||
+			    f.l_len >= 0x7fffffffUL ||
+			    f.l_start + f.l_len >= 0x7fffffffUL)
+				return -EOVERFLOW;
 			if(put_flock(&f, (struct flock32 *)arg))
 				return -EFAULT;
 			return ret;
 		}
+	case F_GETLK64:
+	case F_SETLK64:
+	case F_SETLKW64:
+		{
+			struct flock f;
+			mm_segment_t old_fs;
+			long ret;
+			
+			if(get_flock64(&f, (struct flock32_64 *)arg))
+				return -EFAULT;
+			old_fs = get_fs(); set_fs (KERNEL_DS);
+			ret = sys_fcntl(fd, cmd + F_GETLK - F_GETLK64, (unsigned long)&f);
+			set_fs (old_fs);
+			if(put_flock64(&f, (struct flock32_64 *)arg))
+				return -EFAULT;
+			return ret;
+		}
 	default:
 		return sys_fcntl(fd, cmd, (unsigned long)arg);
 	}
@@ -717,6 +762,25 @@
 	return ret;
 }
 
+extern asmlinkage long sys_truncate(const char * path, unsigned long length);
+extern asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
+
+asmlinkage int sys32_truncate64(const char * path, unsigned long high, unsigned long low)
+{
+	if ((int)high < 0)
+		return -EINVAL;
+	else
+		return sys_truncate(path, (high << 32) | low);
+}
+
+asmlinkage int sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low)
+{
+	if ((int)high < 0)
+		return -EINVAL;
+	else
+		return sys_ftruncate(fd, (high << 32) | low);
+}
+
 extern asmlinkage int sys_utime(char * filename, struct utimbuf * times);
 
 struct utimbuf32 {
@@ -4018,4 +4082,37 @@
 		ret = -EFAULT;
 
 	return ret;
+}
+
+/* This is just a version for 32-bit applications which does
+ * not force O_LARGEFILE on.
+ */
+
+asmlinkage long sparc32_open(const char * filename, int flags, int mode)
+{
+	char * tmp;
+	int fd, error;
+
+	tmp = getname(filename);
+	fd = PTR_ERR(tmp);
+	if (!IS_ERR(tmp)) {
+		lock_kernel();
+		fd = get_unused_fd();
+		if (fd >= 0) {
+			struct file * f = filp_open(tmp, flags, mode);
+			error = PTR_ERR(f);
+			if (IS_ERR(f))
+				goto out_error;
+			fd_install(fd, f);
+		}
+out:
+		unlock_kernel();
+		putname(tmp);
+	}
+	return fd;
+
+out_error:
+	put_unused_fd(fd);
+	fd = error;
+	goto out;
 }
diff -urN 2.2.15pre16/arch/sparc64/kernel/sys_sunos32.c 2.2.15pre16aa3/arch/sparc64/kernel/sys_sunos32.c
--- 2.2.15pre16/arch/sparc64/kernel/sys_sunos32.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/sparc64/kernel/sys_sunos32.c	Thu Mar 30 16:00:58 2000
@@ -1296,13 +1296,15 @@
 	return rval;
 }
 
+extern asmlinkage long sparc32_open(const char * filename, int flags, int mode);
+
 asmlinkage int sunos_open(u32 filename, int flags, int mode)
 {
 	int ret;
 
 	lock_kernel();
 	current->personality |= PER_BSD;
-	ret = sys_open ((char *)A(filename), flags, mode);
+	ret = sparc32_open ((char *)A(filename), flags, mode);
 	unlock_kernel();
 	return ret;
 }
diff -urN 2.2.15pre16/arch/sparc64/kernel/systbls.S 2.2.15pre16aa3/arch/sparc64/kernel/systbls.S
--- 2.2.15pre16/arch/sparc64/kernel/systbls.S	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/arch/sparc64/kernel/systbls.S	Thu Mar 30 16:00:58 2000
@@ -20,7 +20,7 @@
 	.globl sys_call_table32
 sys_call_table32:
 /*0*/	.word sys_nis_syscall, sparc_exit, sys_fork, sys_read, sys_write
-/*5*/	.word sys_open, sys_close, sys32_wait4, sys_creat, sys_link
+/*5*/	.word sparc32_open, sys_close, sys32_wait4, sys_creat, sys_link
 /*10*/  .word sys_unlink, sunos_execv, sys_chdir, sys32_chown16, sys32_mknod
 /*15*/	.word sys32_chmod, sys32_lchown16, sparc_brk, sys_perfctr, sys32_lseek
 /*20*/	.word sys_getpid, sys_capget, sys_capset, sys_setuid, sys_getuid
@@ -30,12 +30,12 @@
 /*40*/	.word sys32_newlstat, sys_dup, sys_pipe, sys32_times, sys_nis_syscall
 	.word sys_umount, sys_setgid, sys_getgid, sys_signal, sys_geteuid
 /*50*/	.word sys_getegid, sys_acct, sys_nis_syscall, sys_nis_syscall, sys32_ioctl
-	.word sys_reboot, sys_lfs_syscall, sys_symlink, sys_readlink, sys32_execve
-/*60*/	.word sys_umask, sys_chroot, sys32_newfstat, sys_lfs_syscall, sys_getpagesize
+	.word sys_reboot, sys32_mmap2, sys_symlink, sys_readlink, sys32_execve
+/*60*/	.word sys_umask, sys_chroot, sys32_newfstat, sys_fstat64, sys_getpagesize
 	.word sys_msync, sys_vfork, sys32_pread, sys32_pwrite, sys_nis_syscall
 /*70*/	.word sys_nis_syscall, sys32_mmap, sys_nis_syscall, sys_munmap, sys_mprotect
-	.word sys_nis_syscall, sys_vhangup, sys_lfs_syscall, sys_nis_syscall, sys32_getgroups
-/*80*/	.word sys32_setgroups, sys_getpgrp, sys_nis_syscall, sys32_setitimer, sys_lfs_syscall
+	.word sys_nis_syscall, sys_vhangup, sys32_truncate64, sys_nis_syscall, sys32_getgroups
+/*80*/	.word sys32_setgroups, sys_getpgrp, sys_nis_syscall, sys32_setitimer, sys32_ftruncate64
 	.word sys_swapon, sys32_getitimer, sys_nis_syscall, sys_sethostname, sys_nis_syscall
 /*90*/	.word sys_dup2, sys_nis_syscall, sys32_fcntl, sys32_select, sys_nis_syscall
 	.word sys_fsync, sys_setpriority, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
@@ -45,8 +45,8 @@
 	.word sys_nis_syscall, sys32_gettimeofday, sys32_getrusage, sys_nis_syscall, sys_getcwd
 /*120*/	.word sys32_readv, sys32_writev, sys32_settimeofday, sys32_fchown16, sys_fchmod
 	.word sys_nis_syscall, sys32_setreuid, sys32_setregid, sys_rename, sys_truncate
-/*130*/	.word sys_ftruncate, sys_flock, sys_lfs_syscall, sys_nis_syscall, sys_nis_syscall
-	.word sys_nis_syscall, sys_mkdir, sys_rmdir, sys32_utimes, sys_lfs_syscall
+/*130*/	.word sys_ftruncate, sys_flock, sys_lstat64, sys_nis_syscall, sys_nis_syscall
+	.word sys_nis_syscall, sys_mkdir, sys_rmdir, sys32_utimes, sys_stat64
 /*140*/	.word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys32_getrlimit
 	.word sys32_setrlimit, sys_nis_syscall, sys32_prctl, sys32_pciconfig_read, sys32_pciconfig_write
 /*150*/	.word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_nis_syscall
@@ -114,15 +114,15 @@
 	.word sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_nis_syscall
 /*170*/	.word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_getdents
 	.word sys_setsid, sys_fchdir, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
-/*180*/	.word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_sigpending, sys_query_module
+/*180*/	.word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_query_module
 	.word sys_setpgid, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_newuname
 /*190*/	.word sys_init_module, sys_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
-	.word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys_sigaction, sys_sgetmask
-/*200*/	.word sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, sys_nis_syscall
+	.word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys_nis_syscall, sys_sgetmask
+/*200*/	.word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall
 	.word sys_nis_syscall, sys_socketcall, sys_syslog, sys_nis_syscall, sys_nis_syscall
 /*210*/	.word sys_idle, sys_nis_syscall, sys_waitpid, sys_swapoff, sys_sysinfo
-	.word sys_ipc, sys_sigreturn, sys_clone, sys_nis_syscall, sys_adjtimex
-/*220*/	.word sys_sigprocmask, sys_create_module, sys_delete_module, sys_get_kernel_syms, sys_getpgid
+	.word sys_ipc, sys_nis_syscall, sys_clone, sys_nis_syscall, sys_adjtimex
+/*220*/	.word sys_nis_syscall, sys_create_module, sys_delete_module, sys_get_kernel_syms, sys_getpgid
 	.word sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid, sys_setfsgid
 /*230*/	.word sys_select, sys_time, sys_nis_syscall, sys_stime, sys_nis_syscall
 	.word sys_nis_syscall, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
diff -urN 2.2.15pre16/arch/sparc64/solaris/fs.c 2.2.15pre16aa3/arch/sparc64/solaris/fs.c
--- 2.2.15pre16/arch/sparc64/solaris/fs.c	Mon Jan 17 16:44:36 2000
+++ 2.2.15pre16aa3/arch/sparc64/solaris/fs.c	Thu Mar 30 16:00:58 2000
@@ -572,20 +572,20 @@
 	return error;
 }
 
+extern asmlinkage long sparc32_open(const char * filename, int flags, int mode);
+
 asmlinkage int solaris_open(u32 filename, int flags, u32 mode)
 {
-	int (*sys_open)(const char *,int,int) = 
-		(int (*)(const char *,int,int))SYS(open);
 	int fl = flags & 0xf;
 
-/*	if (flags & 0x2000) - allow LFS			*/
+	if (flags & 0x2000) fl |= O_LARGEFILE;
 	if (flags & 0x8050) fl |= O_SYNC;
 	if (flags & 0x80) fl |= O_NONBLOCK;
 	if (flags & 0x100) fl |= O_CREAT;
 	if (flags & 0x200) fl |= O_TRUNC;
 	if (flags & 0x400) fl |= O_EXCL;
 	if (flags & 0x800) fl |= O_NOCTTY;
-	return sys_open((const char *)A(filename), fl, mode);
+	return sparc32_open((const char *)A(filename), fl, mode);
 }
 
 #define SOL_F_SETLK	6
diff -urN 2.2.15pre16/drivers/block/Config.in 2.2.15pre16aa3/drivers/block/Config.in
--- 2.2.15pre16/drivers/block/Config.in	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/drivers/block/Config.in	Thu Mar 30 16:00:57 2000
@@ -96,6 +96,10 @@
 
 comment 'Additional Block Devices'
 
+tristate 'Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM N
+if [ "$CONFIG_BLK_DEV_LVM" != "n" ]; then
+  bool '   LVM information in proc filesystem' CONFIG_LVM_PROC_FS Y
+fi
 tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
 if [ "$CONFIG_NET" = "y" ]; then
   tristate 'Network block device support' CONFIG_BLK_DEV_NBD
diff -urN 2.2.15pre16/drivers/block/Makefile 2.2.15pre16aa3/drivers/block/Makefile
--- 2.2.15pre16/drivers/block/Makefile	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/drivers/block/Makefile	Thu Mar 30 16:00:57 2000
@@ -254,6 +254,14 @@
   endif
 endif
 
+ifeq ($(CONFIG_BLK_DEV_LVM),y)
+L_OBJS += lvm.o lvm-snap.o
+else
+   ifeq ($(CONFIG_BLK_DEV_LVM),m)
+   M_OBJS += lvm-mod.o
+   endif
+endif
+
 ifeq ($(CONFIG_BLK_DEV_MD),y)
 LX_OBJS += md.o
 
@@ -312,3 +320,6 @@
 
 ide-mod.o: ide.o $(IDE_OBJS)
 	$(LD) $(LD_RFLAG) -r -o $@ ide.o $(IDE_OBJS)
+
+lvm-mod.o: lvm.o lvm-snap.o
+	$(LD) -r -o $@ lvm.o lvm-snap.o
diff -urN 2.2.15pre16/drivers/block/README.lvm 2.2.15pre16aa3/drivers/block/README.lvm
--- 2.2.15pre16/drivers/block/README.lvm	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/drivers/block/README.lvm	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,8 @@
+
+This is the Logical Volume Manager driver for Linux,
+
+Tools, library that manage logical volumes can be found
+at <http://linux.msede.com/lvm>.
+
+There you can obtain actual driver versions too.
+
diff -urN 2.2.15pre16/drivers/block/genhd.c 2.2.15pre16aa3/drivers/block/genhd.c
--- 2.2.15pre16/drivers/block/genhd.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/drivers/block/genhd.c	Thu Mar 30 16:00:57 2000
@@ -50,6 +50,11 @@
 				le32_to_cpu(__a); \
 			})
 
+#if defined CONFIG_BLK_DEV_LVM || defined CONFIG_BLK_DEV_LVM_MODULE
+#include <linux/lvm.h>
+void ( *lvm_hd_name_ptr) ( char *, int) = NULL;
+#endif
+
 struct gendisk *gendisk_head = NULL;
 
 static int current_minor = 0;
@@ -104,6 +109,13 @@
 	 * MD devices are named md0, md1, ... md15, fix it up here.
 	 */
 	switch (hd->major) {
+#if defined CONFIG_BLK_DEV_LVM || defined CONFIG_BLK_DEV_LVM_MODULE
+		case LVM_BLK_MAJOR:
+			*buf = 0;
+			if ( lvm_hd_name_ptr != NULL)
+				( lvm_hd_name_ptr) ( buf, minor);
+			return buf;
+#endif
 		case IDE5_MAJOR:
 			unit += 2;
 		case IDE4_MAJOR:
diff -urN 2.2.15pre16/drivers/block/ide.c 2.2.15pre16aa3/drivers/block/ide.c
--- 2.2.15pre16/drivers/block/ide.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/drivers/block/ide.c	Thu Mar 30 16:00:56 2000
@@ -2287,6 +2287,10 @@
 			drive->nice1 = (arg >> IDE_NICE_1) & 1;
 			return 0;
 
+		case BLKELVGET:
+		case BLKELVSET:
+ 			return blkelv_ioctl(inode->i_rdev, cmd, arg);
+
 		RO_IOCTLS(inode->i_rdev, arg);
 
 		default:
diff -urN 2.2.15pre16/drivers/block/ll_rw_blk.c 2.2.15pre16aa3/drivers/block/ll_rw_blk.c
--- 2.2.15pre16/drivers/block/ll_rw_blk.c	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/drivers/block/ll_rw_blk.c	Thu Mar 30 16:00:57 2000
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 1991, 1992 Linus Torvalds
  * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  */
 
 /*
@@ -20,10 +21,19 @@
 
 #include <asm/system.h>
 #include <asm/io.h>
+#include <asm/uaccess.h>
 #include <linux/blk.h>
 
 #include <linux/module.h>
 
+#if defined CONFIG_BLK_DEV_LVM || defined CONFIG_BLK_DEV_LVM_MODULE
+#include <linux/lvm.h>
+   /* function pointer to the LVM driver remapping function
+      which will be setup during driver/module init; neccessary
+      to be able to load LVM as a module */
+int (*lvm_map_ptr) (struct buffer_head *, int) = NULL;
+#endif
+
 /*
  * The request-struct contains all necessary data
  * to load a nr of sectors into memory
@@ -53,11 +63,11 @@
 /*
  * used to wait on when there are no free requests
  */
-struct wait_queue * wait_for_request = NULL;
+struct wait_queue * wait_for_request;
 
 /* This specifies how many sectors to read ahead on the disk.  */
 
-int read_ahead[MAX_BLKDEV] = {0, };
+int read_ahead[MAX_BLKDEV];
 
 /* blk_dev_struct is:
  *	*request_fn
@@ -73,7 +83,7 @@
  *
  * if (!blk_size[MAJOR]) then no minor size checking is done.
  */
-int * blk_size[MAX_BLKDEV] = { NULL, NULL, };
+int * blk_size[MAX_BLKDEV];
 
 /*
  * blksize_size contains the size of all block-devices:
@@ -82,7 +92,7 @@
  *
  * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
  */
-int * blksize_size[MAX_BLKDEV] = { NULL, NULL, };
+int * blksize_size[MAX_BLKDEV];
 
 /*
  * hardsect_size contains the size of the hardware sector of a device.
@@ -96,22 +106,22 @@
  * This is currently set by some scsi devices and read by the msdos fs driver.
  * Other uses may appear later.
  */
-int * hardsect_size[MAX_BLKDEV] = { NULL, NULL, };
+int * hardsect_size[MAX_BLKDEV];
 
 /*
  * The following tunes the read-ahead algorithm in mm/filemap.c
  */
-int * max_readahead[MAX_BLKDEV] = { NULL, NULL, };
+int * max_readahead[MAX_BLKDEV];
 
 /*
  * Max number of sectors per request
  */
-int * max_sectors[MAX_BLKDEV] = { NULL, NULL, };
+int * max_sectors[MAX_BLKDEV];
 
 /*
  * Max number of segments per request
  */
-int * max_segments[MAX_BLKDEV] = { NULL, NULL, };
+int * max_segments[MAX_BLKDEV];
 
 static inline int get_max_sectors(kdev_t dev)
 {
@@ -142,6 +152,17 @@
 	return &blk_dev[major].current_request;
 }
 
+static inline int get_request_latency(elevator_t * elevator, int rw)
+{
+	int latency;
+
+	latency = elevator->read_latency;
+	if (rw != READ)
+		latency = elevator->write_latency;
+
+	return latency;
+}
+
 /*
  * remove the plug and let it rip..
  */
@@ -291,6 +312,196 @@
 		printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
 }
 
+static int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg)
+{
+	int ret;
+	blkelv_ioctl_arg_t output;
+
+	output.queue_ID			= elevator;
+	output.read_latency		= elevator->read_latency;
+	output.write_latency		= elevator->write_latency;
+	output.max_bomb_segments	= elevator->max_bomb_segments;
+
+	ret = -EFAULT;
+	if (copy_to_user(arg, &output, sizeof(blkelv_ioctl_arg_t)))
+		goto out;
+	ret = 0;
+ out:
+	return ret;
+}
+
+static int blkelvset_ioctl(elevator_t * elevator, const blkelv_ioctl_arg_t * arg)
+{
+	blkelv_ioctl_arg_t input;
+	int ret;
+
+	ret = -EFAULT;
+	if (copy_from_user(&input, arg, sizeof(blkelv_ioctl_arg_t)))
+		goto out;
+
+	ret = -EINVAL;
+	if (input.read_latency < 0)
+		goto out;
+	if (input.write_latency < 0)
+		goto out;
+	if (input.max_bomb_segments <= 0)
+		goto out;
+
+	elevator->read_latency		= input.read_latency;
+	elevator->write_latency		= input.write_latency;
+	elevator->max_bomb_segments	= input.max_bomb_segments;
+
+	ret = 0;
+ out:
+	return ret;
+}
+
+int blkelv_ioctl(kdev_t dev, unsigned long cmd, unsigned long arg)
+{
+	elevator_t * elevator = &blk_dev[MAJOR(dev)].elevator;
+	blkelv_ioctl_arg_t * __arg = (blkelv_ioctl_arg_t *) arg;
+
+	switch (cmd) {
+	case BLKELVGET:
+		return blkelvget_ioctl(elevator, __arg);
+	case BLKELVSET:
+		return blkelvset_ioctl(elevator, __arg);
+	}
+}
+
+static inline int seek_to_not_starving_chunk(struct request ** req, int * lat)
+{
+	struct request * tmp = *req;
+	int found = 0, pos = 0;
+	int last_pos = 0, __lat = *lat;
+
+	do {
+		if (tmp->elevator_latency <= 0)
+		{
+			*req = tmp;
+			found = 1;
+			last_pos = pos;
+			if (last_pos >= __lat)
+				break;
+		}
+		pos += tmp->nr_segments;
+	} while ((tmp = tmp->next));
+	*lat -= last_pos;
+
+	return found;
+}
+
+#define CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY	\
+	     case IDE0_MAJOR:	/* same as HD_MAJOR */	\
+	     case IDE1_MAJOR:				\
+	     case FLOPPY_MAJOR:				\
+	     case IDE2_MAJOR:				\
+	     case IDE3_MAJOR:				\
+	     case IDE4_MAJOR:				\
+	     case IDE5_MAJOR:				\
+	     case ACSI_MAJOR:				\
+	     case MFM_ACORN_MAJOR:			\
+             case MDISK_MAJOR:				\
+             case DASD_MAJOR:
+#define CASE_COALESCE_ALSO_FIRST_REQUEST	\
+	     case SCSI_DISK0_MAJOR:		\
+	     case SCSI_DISK1_MAJOR:		\
+	     case SCSI_DISK2_MAJOR:		\
+	     case SCSI_DISK3_MAJOR:		\
+	     case SCSI_DISK4_MAJOR:		\
+	     case SCSI_DISK5_MAJOR:		\
+	     case SCSI_DISK6_MAJOR:		\
+	     case SCSI_DISK7_MAJOR:		\
+	     case SCSI_CDROM_MAJOR:		\
+	     case DAC960_MAJOR+0:		\
+	     case DAC960_MAJOR+1:		\
+	     case DAC960_MAJOR+2:		\
+	     case DAC960_MAJOR+3:		\
+	     case DAC960_MAJOR+4:		\
+	     case DAC960_MAJOR+5:		\
+	     case DAC960_MAJOR+6:		\
+	     case DAC960_MAJOR+7:		\
+	     case COMPAQ_SMART2_MAJOR+0:	\
+	     case COMPAQ_SMART2_MAJOR+1:	\
+	     case COMPAQ_SMART2_MAJOR+2:	\
+	     case COMPAQ_SMART2_MAJOR+3:	\
+	     case COMPAQ_SMART2_MAJOR+4:	\
+	     case COMPAQ_SMART2_MAJOR+5:	\
+	     case COMPAQ_SMART2_MAJOR+6:	\
+	     case COMPAQ_SMART2_MAJOR+7:
+
+#define elevator_starve_rest_of_queue(req)			\
+do {								\
+	struct request * tmp = (req);				\
+	for ((tmp) = (tmp)->next; (tmp); (tmp) = (tmp)->next)	\
+		(tmp)->elevator_latency--;			\
+} while (0)
+
+static inline void elevator_queue(struct request * req,
+				  struct request * tmp,
+				  int latency,
+				  struct blk_dev_struct * dev,
+				  struct request ** queue_head)
+{
+	struct request * __tmp;
+	int starving, __latency;
+
+	starving = seek_to_not_starving_chunk(&tmp, &latency);
+	__tmp = tmp;
+	__latency = latency;
+
+	for (;; tmp = tmp->next)
+	{
+		if ((latency -= tmp->nr_segments) <= 0)
+		{
+			tmp = __tmp;
+			latency = __latency - tmp->nr_segments;
+
+			if (starving)
+				break;
+
+			switch (MAJOR(req->rq_dev))
+			{
+			CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY
+				if (tmp == dev->current_request)
+			default:
+					goto link;
+			CASE_COALESCE_ALSO_FIRST_REQUEST
+			}
+
+			latency += tmp->nr_segments;
+			req->next = tmp;
+			*queue_head = req;
+			goto after_link;
+		}
+
+		if (!tmp->next)
+			break;
+
+		{
+			const int after_current = IN_ORDER(tmp,req);
+			const int before_next = IN_ORDER(req,tmp->next);
+
+			if (!IN_ORDER(tmp,tmp->next)) {
+				if (after_current || before_next)
+					break;
+			} else {
+				if (after_current && before_next)
+					break;
+			}
+		}
+	}
+
+ link:
+	req->next = tmp->next;
+	tmp->next = req;
+
+ after_link:
+	req->elevator_latency = latency;
+
+	elevator_starve_rest_of_queue(req);
+}
+
 /*
  * add-request adds a request to the linked list.
  * It disables interrupts (aquires the request spinlock) so that it can muck
@@ -309,6 +520,7 @@
 	short		 disk_index;
 	unsigned long flags;
 	int queue_new_request = 0;
+	int latency;
 
 	switch (major) {
 		case DAC960_MAJOR+0:
@@ -333,7 +545,7 @@
 			break;
 	}
 
-	req->next = NULL;
+	latency = get_request_latency(&dev->elevator, req->cmd);
 
 	/*
 	 * We use the goto to reduce locking complexity
@@ -344,28 +556,17 @@
 	if (req->bh)
 		mark_buffer_clean(req->bh);
 	if (!(tmp = *current_request)) {
+		req->next = NULL;
+		req->elevator_latency = latency;
 		*current_request = req;
 		if (dev->current_request != &dev->plug)
 			queue_new_request = 1;
 		goto out;
 	}
-	for ( ; tmp->next ; tmp = tmp->next) {
-		const int after_current = IN_ORDER(tmp,req);
-		const int before_next = IN_ORDER(req,tmp->next);
-
-		if (!IN_ORDER(tmp,tmp->next)) {
-			if (after_current || before_next)
-				break;
-		} else {
-			if (after_current && before_next)
-				break;
-		}
-	}
-	req->next = tmp->next;
-	tmp->next = req;
+	elevator_queue(req, tmp, latency, dev, current_request);
 
 /* for SCSI devices, call request_fn unconditionally */
-	if (scsi_blk_major(major) ||
+	if ((0 && scsi_blk_major(major)) ||
             (major >= DAC960_MAJOR+0 && major <= DAC960_MAJOR+7) ||
             (major >= COMPAQ_SMART2_MAJOR+0 &&
              major <= COMPAQ_SMART2_MAJOR+7))
@@ -399,6 +600,8 @@
 		total_segments--;
 	if (total_segments > max_segments)
 		return;
+	if (next->elevator_latency < req->elevator_latency)
+		req->elevator_latency = next->elevator_latency;
 	req->bhtail->b_reqnext = next->bh;
 	req->bhtail = next->bhtail;
 	req->nr_sectors += next->nr_sectors;
@@ -408,12 +611,28 @@
 	wake_up (&wait_for_request);
 }
 
+#define read_pendings(req)			\
+({						\
+	int __ret = 0;				\
+	struct request * tmp = (req);		\
+	do {					\
+		if (tmp->cmd == READ)		\
+		{				\
+			__ret = 1;		\
+			break;			\
+		}				\
+		tmp = tmp->next;		\
+	} while (tmp);				\
+	__ret;					\
+})
+
 void make_request(int major, int rw, struct buffer_head * bh)
 {
 	unsigned int sector, count;
-	struct request * req;
+	struct request * req, * prev;
 	int rw_ahead, max_req, max_sectors, max_segments;
 	unsigned long flags;
+	int latency, starving;
 
 	count = bh->b_size >> 9;
 	sector = bh->b_rsector;
@@ -490,6 +709,8 @@
 	max_sectors = get_max_sectors(bh->b_rdev);
 	max_segments = get_max_segments(bh->b_rdev);
 
+	latency = get_request_latency(&blk_dev[major].elevator, rw);
+
 	/*
 	 * Now we acquire the request spinlock, we have to be mega careful
 	 * not to schedule or do something nonatomic
@@ -499,20 +720,13 @@
 	if (!req) {
 		/* MD and loop can't handle plugging without deadlocking */
 		if (major != MD_MAJOR && major != LOOP_MAJOR && 
+#if defined CONFIG_BLK_DEV_LVM || defined CONFIG_BLK_DEV_LVM_MODULE
+		    major != LVM_BLK_MAJOR &&
+#endif
 		    major != DDV_MAJOR && major != NBD_MAJOR)
 			plug_device(blk_dev + major); /* is atomic */
 	} else switch (major) {
-	     case IDE0_MAJOR:	/* same as HD_MAJOR */
-	     case IDE1_MAJOR:
-	     case FLOPPY_MAJOR:
-	     case IDE2_MAJOR:
-	     case IDE3_MAJOR:
-	     case IDE4_MAJOR:
-	     case IDE5_MAJOR:
-	     case ACSI_MAJOR:
-	     case MFM_ACORN_MAJOR:
-             case MDISK_MAJOR:
-             case DASD_MAJOR:
+	     CASE_COALESCE_BUT_FIRST_REQUEST_MAYBE_BUSY
 		/*
 		 * The scsi disk and cdrom drivers completely remove the request
 		 * from the queue when they start processing an entry.  For this
@@ -523,37 +737,20 @@
 		 * entry may be busy being processed and we thus can't change it.
 		 */
 		if (req == blk_dev[major].current_request)
-	        	req = req->next;
-		if (!req)
-			break;
+		{
+	        	if (!(req = req->next))
+				break;
+			latency -= req->nr_segments;
+		}
 		/* fall through */
+	     CASE_COALESCE_ALSO_FIRST_REQUEST
 
-	     case SCSI_DISK0_MAJOR:
-	     case SCSI_DISK1_MAJOR:
-	     case SCSI_DISK2_MAJOR:
-	     case SCSI_DISK3_MAJOR:
-	     case SCSI_DISK4_MAJOR:
-	     case SCSI_DISK5_MAJOR:
-	     case SCSI_DISK6_MAJOR:
-	     case SCSI_DISK7_MAJOR:
-	     case SCSI_CDROM_MAJOR:
-	     case DAC960_MAJOR+0:
-	     case DAC960_MAJOR+1:
-	     case DAC960_MAJOR+2:
-	     case DAC960_MAJOR+3:
-	     case DAC960_MAJOR+4:
-	     case DAC960_MAJOR+5:
-	     case DAC960_MAJOR+6:
-	     case DAC960_MAJOR+7:
-	     case COMPAQ_SMART2_MAJOR+0:
-	     case COMPAQ_SMART2_MAJOR+1:
-	     case COMPAQ_SMART2_MAJOR+2:
-	     case COMPAQ_SMART2_MAJOR+3:
-	     case COMPAQ_SMART2_MAJOR+4:
-	     case COMPAQ_SMART2_MAJOR+5:
-	     case COMPAQ_SMART2_MAJOR+6:
-	     case COMPAQ_SMART2_MAJOR+7:
+		/* avoid write-bombs to not hurt iteractiveness of reads */
+		if (rw != READ && read_pendings(req))
+			max_segments = blk_dev[major].elevator.max_bomb_segments;
 
+		starving = seek_to_not_starving_chunk(&req, &latency);
+		prev = NULL;
 		do {
 			if (req->sem)
 				continue;
@@ -565,24 +762,34 @@
 				continue;
 			/* Can we add it to the end of this request? */
 			if (req->sector + req->nr_sectors == sector) {
+				if (latency - req->nr_segments < 0)
+					break;
 				if (req->bhtail->b_data + req->bhtail->b_size
 				    != bh->b_data) {
 					if (req->nr_segments < max_segments)
 						req->nr_segments++;
-					else continue;
+					else break;
 				}
 				req->bhtail->b_reqnext = bh;
 				req->bhtail = bh;
 			    	req->nr_sectors += count;
+
+				/* latency stuff */
+				if ((latency -= req->nr_segments) < req->elevator_latency)
+					req->elevator_latency = latency;
+				elevator_starve_rest_of_queue(req);
+
 				/* Can we now merge this req with the next? */
 				attempt_merge(req, max_sectors, max_segments);
 			/* or to the beginning? */
 			} else if (req->sector - count == sector) {
+				if (!prev && starving)
+					break;
 				if (bh->b_data + bh->b_size
 				    != req->bh->b_data) {
 					if (req->nr_segments < max_segments)
 						req->nr_segments++;
-					else continue;
+					else break;
 				}
 			    	bh->b_reqnext = req->bh;
 			    	req->bh = bh;
@@ -590,6 +797,14 @@
 			    	req->current_nr_sectors = count;
 			    	req->sector = sector;
 			    	req->nr_sectors += count;
+
+				/* latency stuff */
+				if (latency < --req->elevator_latency)
+					req->elevator_latency = latency;
+				elevator_starve_rest_of_queue(req);
+
+				if (prev)
+					attempt_merge(prev, max_sectors, max_segments);
 			} else
 				continue;
 
@@ -597,7 +812,8 @@
 			spin_unlock_irqrestore(&io_request_lock,flags);
 		    	return;
 
-		} while ((req = req->next) != NULL);
+		} while (prev = req,
+			 (latency -= req->nr_segments) >= 0 && (req = req->next) != NULL);
 	}
 
 /* find an unused request. */
@@ -623,7 +839,6 @@
 	req->sem = NULL;
 	req->bh = bh;
 	req->bhtail = bh;
-	req->next = NULL;
 	add_request(major+blk_dev,req);
 	return;
 
@@ -675,13 +890,34 @@
 			       correct_size, bh[i]->b_size);
 			goto sorry;
 		}
-
-		/* Md remaps blocks now */
+		/* LVM and MD remap blocks now */
+#if defined CONFIG_BLK_DEV_LVM || defined CONFIG_BLK_DEV_LVM_MODULE
+		major = MAJOR(bh[i]->b_dev);
+		if (major == LVM_BLK_MAJOR) {
+			if (lvm_map_ptr == NULL) {
+				printk(KERN_ERR
+				     "Bad lvm_map_ptr in ll_rw_block\n");
+				goto sorry;
+			}
+			if ((lvm_map_ptr) (bh[i], rw) != 0) {
+				printk(KERN_ERR
+				       "Bad lvm_map in ll_rw_block\n");
+				goto sorry;
+			}
+			/* remap major too ... */
+			major = MAJOR(bh[i]->b_rdev);
+		} else {
+			bh[i]->b_rdev = bh[i]->b_dev;
+			bh[i]->b_rsector = bh[i]->b_blocknr * (bh[i]->b_size >> 9);
+		}
+#else
 		bh[i]->b_rdev = bh[i]->b_dev;
 		bh[i]->b_rsector=bh[i]->b_blocknr*(bh[i]->b_size >> 9);
+#endif
 #ifdef CONFIG_BLK_DEV_MD
 		if (major==MD_MAJOR &&
-		    md_map (MINOR(bh[i]->b_dev), &bh[i]->b_rdev,
+		    /* changed             v   to allow LVM to remap */
+		    md_map (MINOR(bh[i]->b_rdev), &bh[i]->b_rdev,
 			    &bh[i]->b_rsector, bh[i]->b_size >> 9)) {
 		        printk (KERN_ERR
 				"Bad md_map in ll_rw_block\n");
@@ -700,8 +936,10 @@
 		if (bh[i]) {
 			set_bit(BH_Req, &bh[i]->b_state);
 #ifdef CONFIG_BLK_DEV_MD
-			if (MAJOR(bh[i]->b_dev) == MD_MAJOR) {
-				md_make_request(MINOR (bh[i]->b_dev), rw, bh[i]);
+			/* changed         v  to allow LVM to remap */
+			if (MAJOR(bh[i]->b_rdev) == MD_MAJOR) {
+				/* remap device for MD too      v */
+				md_make_request(MINOR (bh[i]->b_rdev), rw, bh[i]);
 				continue;
 			}
 #endif
@@ -792,12 +1030,12 @@
 		dev->plug_tq.sync    = 0;
 		dev->plug_tq.routine = &unplug_device;
 		dev->plug_tq.data    = dev;
+		dev->elevator = ELEVATOR_DEFAULTS;
 	}
 
 	req = all_requests + NR_REQUEST;
 	while (--req >= all_requests) {
 		req->rq_status = RQ_INACTIVE;
-		req->next = NULL;
 	}
 	memset(ro_bits,0,sizeof(ro_bits));
 	memset(max_readahead, 0, sizeof(max_readahead));
@@ -884,6 +1122,9 @@
 #ifdef CONFIG_SJCD
 	sjcd_init();
 #endif CONFIG_SJCD
+#ifdef CONFIG_BLK_DEV_LVM
+	lvm_init();
+#endif
 #ifdef CONFIG_BLK_DEV_MD
 	md_init();
 #endif CONFIG_BLK_DEV_MD
@@ -908,3 +1149,4 @@
 EXPORT_SYMBOL(io_request_lock);
 EXPORT_SYMBOL(end_that_request_first);
 EXPORT_SYMBOL(end_that_request_last);
+EXPORT_SYMBOL(blkelv_ioctl);
diff -urN 2.2.15pre16/drivers/block/loop.c 2.2.15pre16aa3/drivers/block/loop.c
--- 2.2.15pre16/drivers/block/loop.c	Mon Jan 17 16:44:36 2000
+++ 2.2.15pre16aa3/drivers/block/loop.c	Thu Mar 30 16:00:58 2000
@@ -143,12 +143,12 @@
 	int	size;
 
 	if (S_ISREG(lo->lo_dentry->d_inode->i_mode))
-		size = (lo->lo_dentry->d_inode->i_size - lo->lo_offset) / BLOCK_SIZE;
+		size = (lo->lo_dentry->d_inode->i_size - lo->lo_offset) >> BLOCK_SIZE_BITS;
 	else {
 		kdev_t lodev = lo->lo_device;
 		if (blk_size[MAJOR(lodev)])
 			size = blk_size[MAJOR(lodev)][MINOR(lodev)] -
-                                lo->lo_offset / BLOCK_SIZE;
+				 (lo->lo_offset >> BLOCK_SIZE_BITS);
 		else
 			size = MAX_DISK_SIZE;
 	}
diff -urN 2.2.15pre16/drivers/block/lvm-snap.c 2.2.15pre16aa3/drivers/block/lvm-snap.c
--- 2.2.15pre16/drivers/block/lvm-snap.c	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/drivers/block/lvm-snap.c	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,414 @@
+/* linux/drivers/block/lvm-snap.c
+
+   Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+  
+   LVM snapshotting */
+
+#include <linux/lvm.h>
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/blkdev.h>
+#include <linux/smp_lock.h>
+
+
+extern const char *const lvm_name;
+extern int lvm_blocksizes[];
+
+void lvm_snapshot_release(lv_t *);
+
+#define hashfn(dev,block,mask,chunk_size) \
+	((HASHDEV(dev)^((block)/(chunk_size))) & (mask))
+
+static inline lv_block_exception_t *
+lvm_find_exception_table(kdev_t org_dev, unsigned long org_start, lv_t * lv)
+{
+	struct list_head * hash_table = lv->lv_snapshot_hash_table, * next;
+	unsigned long mask = lv->lv_snapshot_hash_mask;
+	int chunk_size = lv->lv_chunk_size;
+	lv_block_exception_t * ret;
+	int i = 0;
+
+	hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
+	ret = NULL;
+	for (next = hash_table->next; next != hash_table; next = next->next)
+	{
+		lv_block_exception_t * exception;
+
+		exception = list_entry(next, lv_block_exception_t, hash);
+		if (exception->rsector_org == org_start &&
+		    exception->rdev_org == org_dev)
+		{
+			if (i)
+			{
+				/* fun, isn't it? :) */
+				list_del(next);
+				list_add(next, hash_table);
+			}
+			ret = exception;
+			break;
+		}
+		i++;
+	}
+	return ret;
+}
+
+static inline void lvm_hash_link(lv_block_exception_t * exception,
+				 kdev_t org_dev, unsigned long org_start,
+				 lv_t * lv)
+{
+	struct list_head * hash_table = lv->lv_snapshot_hash_table;
+	unsigned long mask = lv->lv_snapshot_hash_mask;
+	int chunk_size = lv->lv_chunk_size;
+
+	hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
+	list_add(&exception->hash, hash_table);
+}
+
+int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector,
+			     unsigned long pe_start, lv_t * lv)
+{
+	int ret;
+	unsigned long pe_off, pe_adjustment, __org_start;
+	kdev_t __org_dev;
+	int chunk_size = lv->lv_chunk_size;
+	lv_block_exception_t * exception;
+
+	pe_off = pe_start % chunk_size;
+	pe_adjustment = (*org_sector-pe_off) % chunk_size;
+	__org_start = *org_sector - pe_adjustment;
+	__org_dev = *org_dev;
+
+	ret = 0;
+	exception = lvm_find_exception_table(__org_dev, __org_start, lv);
+	if (exception)
+	{
+		*org_dev = exception->rdev_new;
+		*org_sector = exception->rsector_new + pe_adjustment;
+		ret = 1;
+	}
+	return ret;
+}
+
+static void lvm_drop_snapshot(lv_t * lv_snap, const char * reason)
+{
+	kdev_t last_dev;
+	int i;
+
+	/* no exception storage space available for this snapshot
+	   or error on this snapshot --> release it */
+	invalidate_buffers(lv_snap->lv_dev);
+
+	for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) {
+		if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) {
+			last_dev = lv_snap->lv_block_exception[i].rdev_new;
+			invalidate_buffers(last_dev);
+		}
+	}
+
+	lvm_snapshot_release(lv_snap);
+
+	printk(KERN_INFO
+	       "%s -- giving up to snapshot %s on %s due %s\n",
+	       lvm_name, lv_snap->lv_snapshot_org->lv_name, lv_snap->lv_name,
+	       reason);
+}
+
+static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks,
+					       unsigned long start,
+					       int nr_sectors,
+					       int blocksize)
+{
+	int i, sectors_per_block, nr_blocks;
+
+	sectors_per_block = blocksize >> 9;
+	nr_blocks = nr_sectors / sectors_per_block;
+	start /= sectors_per_block;
+
+	for (i = 0; i < nr_blocks; i++)
+		blocks[i] = start++;
+}
+
+static inline int get_blksize(kdev_t dev)
+{
+	int correct_size = BLOCK_SIZE, i, major;
+
+	major = MAJOR(dev);
+	if (blksize_size[major])
+	{
+		i = blksize_size[major][MINOR(dev)];
+		if (i)
+			correct_size = i;
+	}
+	return correct_size;
+}
+
+#ifdef DEBUG_SNAPSHOT
+static inline void invalidate_snap_cache(unsigned long start, unsigned long nr,
+					 kdev_t dev)
+{
+	struct buffer_head * bh;
+	int sectors_per_block, i, blksize, minor;
+
+	minor = MINOR(dev);
+	blksize = lvm_blocksizes[minor];
+	sectors_per_block = blksize >> 9;
+	nr /= sectors_per_block;
+	start /= sectors_per_block;
+
+	for (i = 0; i < nr; i++)
+	{
+		bh = get_hash_table(dev, start++, blksize);
+		if (bh)
+			bforget(bh);
+	}
+}
+#endif
+
+/*
+ * copy on write handler for one snapshot logical volume
+ *
+ * read the original blocks and store it/them on the new one(s).
+ * if there is no exception storage space free any longer --> release snapshot.
+ *
+ * this routine gets called for each _first_ write to a physical chunk.
+ */
+int lvm_snapshot_COW(kdev_t org_phys_dev,
+		     unsigned long org_phys_sector,
+		     unsigned long org_pe_start,
+		     unsigned long org_virt_sector,
+		     lv_t * lv_snap)
+{
+	const char * reason;
+	unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
+	int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
+	struct kiobuf * iobuf;
+	unsigned long blocks[KIO_MAX_SECTORS];
+	int blksize_snap, blksize_org, min_blksize, max_blksize;
+	int max_sectors, nr_sectors;
+
+	/* check if we are out of snapshot space */
+	if (idx >= lv_snap->lv_remap_end)
+		goto fail_out_of_space;
+
+	/* calculate physical boundaries of source chunk */
+	pe_off = org_pe_start % chunk_size;
+	org_start = org_phys_sector - ((org_phys_sector-pe_off) % chunk_size);
+	virt_start = org_virt_sector - (org_phys_sector - org_start);
+
+	/* calculate physical boundaries of destination chunk */
+	snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
+	snap_start = lv_snap->lv_block_exception[idx].rsector_new;
+
+#ifdef DEBUG_SNAPSHOT
+	printk(KERN_INFO
+	       "%s -- COW: "
+	       "org %02d:%02d faulting %lu start %lu, "
+	       "snap %02d:%02d start %lu, "
+	       "size %d, pe_start %lu pe_off %lu, virt_sec %lu\n",
+	       lvm_name,
+	       MAJOR(org_phys_dev), MINOR(org_phys_dev), org_phys_sector,
+	       org_start,
+	       MAJOR(snap_phys_dev), MINOR(snap_phys_dev), snap_start,
+	       chunk_size,
+	       org_pe_start, pe_off,
+	       org_virt_sector);
+#endif
+
+	iobuf = lv_snap->lv_iobuf;
+
+	blksize_org = get_blksize(org_phys_dev);
+	blksize_snap = get_blksize(snap_phys_dev);
+	max_blksize = max(blksize_org, blksize_snap);
+	min_blksize = min(blksize_org, blksize_snap);
+	max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
+
+	if (chunk_size % (max_blksize>>9))
+		goto fail_blksize;
+
+	while (chunk_size)
+	{
+		nr_sectors = min(chunk_size, max_sectors);
+		chunk_size -= nr_sectors;
+
+		iobuf->length = nr_sectors << 9;
+
+		lvm_snapshot_prepare_blocks(blocks, org_start,
+					    nr_sectors, blksize_org);
+		if (brw_kiovec(READ, 1, &iobuf, org_phys_dev,
+			       blocks, blksize_org) != (nr_sectors<<9))
+			goto fail_raw_read;
+
+		lvm_snapshot_prepare_blocks(blocks, snap_start,
+					    nr_sectors, blksize_snap);
+		if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
+			       blocks, blksize_snap) != (nr_sectors<<9))
+			goto fail_raw_write;
+	}
+
+#ifdef DEBUG_SNAPSHOT
+	/* invalidate the logcial snapshot buffer cache */
+	invalidate_snap_cache(virt_start, lv_snap->lv_chunk_size,
+			      lv_snap->lv_dev);
+#endif
+
+	/* the original chunk is now stored on the snapshot volume
+	   so update the execption table */
+	lv_snap->lv_block_exception[idx].rdev_org = org_phys_dev;
+	lv_snap->lv_block_exception[idx].rsector_org = org_start;
+	lvm_hash_link(lv_snap->lv_block_exception + idx,
+		      org_phys_dev, org_start, lv_snap);
+	lv_snap->lv_remap_ptr = idx + 1;
+	return 0;
+
+	/* slow path */
+ out:
+	lvm_drop_snapshot(lv_snap, reason);
+	return 1;
+
+ fail_out_of_space:
+	reason = "out of space";
+	goto out;
+ fail_raw_read:
+	reason = "read error";
+	goto out;
+ fail_raw_write:
+	reason = "write error";
+	goto out;
+ fail_blksize:
+	reason = "blocksize error";
+	goto out;
+}
+
+static int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors)
+{
+	int bytes, nr_pages, err, i;
+
+	bytes = sectors << 9;
+	nr_pages = (bytes + ~PAGE_MASK) >> PAGE_SHIFT;
+	err = expand_kiobuf(iobuf, nr_pages);
+	if (err)
+		goto out;
+
+	err = -ENOMEM;
+	iobuf->locked = 1;
+	iobuf->nr_pages = 0;
+	for (i = 0; i < nr_pages; i++)
+	{
+		struct page * page;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,27)
+		page = alloc_page(GFP_KERNEL);
+		if (!page)
+			goto out;
+#else
+		{
+			unsigned long addr = __get_free_page(GFP_USER);
+			if (!addr)
+				goto out;
+			iobuf->pagelist[i] = addr;
+			page = mem_map + MAP_NR(addr);
+		}
+#endif
+
+		iobuf->maplist[i] = page;
+		/* the only point to lock the page here is to be allowed
+		   to share unmap_kiobuf() in the fail-path */
+#ifndef LockPage
+#define LockPage(map) set_bit(PG_locked, &(map)->flags)
+#endif
+		LockPage(page);
+		iobuf->nr_pages++;
+	}
+	iobuf->offset = 0;
+
+	err = 0;
+ out:
+	return err;
+}
+
+static int calc_max_buckets(void)
+{
+	unsigned long mem;
+
+	mem = num_physpages << PAGE_SHIFT;
+	mem /= 100;
+	mem *= 2;
+	mem /= sizeof(struct list_head);
+
+	return mem;
+}
+
+static int lvm_snapshot_alloc_hash_table(lv_t * lv)
+{
+	int err;
+	unsigned long buckets, max_buckets, size;
+	struct list_head * hash;
+
+	buckets = lv->lv_remap_end;
+	max_buckets = calc_max_buckets();
+	buckets = min(buckets, max_buckets);
+	while (buckets & (buckets-1))
+		buckets &= (buckets-1);
+
+	size = buckets * sizeof(struct list_head);
+
+	err = -ENOMEM;
+	hash = vmalloc(size);
+	lv->lv_snapshot_hash_table = hash;
+
+	if (!hash)
+		goto out;
+
+	lv->lv_snapshot_hash_mask = buckets-1;
+	while (buckets--)
+		INIT_LIST_HEAD(hash+buckets);
+	err = 0;
+ out:
+	return err;
+}
+
+int lvm_snapshot_alloc(lv_t * lv_snap)
+{
+	int err, blocksize, max_sectors;
+
+	err = alloc_kiovec(1, &lv_snap->lv_iobuf);
+	if (err)
+		goto out;
+
+	blocksize = lvm_blocksizes[MINOR(lv_snap->lv_dev)];
+	max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9);
+
+	err = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors);
+	if (err)
+		goto out_free_kiovec;
+
+	err = lvm_snapshot_alloc_hash_table(lv_snap);
+	if (err)
+		goto out_free_kiovec;
+ out:
+	return err;
+
+ out_free_kiovec:
+	unmap_kiobuf(lv_snap->lv_iobuf);
+	free_kiovec(1, &lv_snap->lv_iobuf);
+	goto out;
+}
+
+void lvm_snapshot_release(lv_t * lv)
+{
+	if (lv->lv_block_exception)
+	{
+		vfree(lv->lv_block_exception);
+		lv->lv_block_exception = NULL;
+	}
+	if (lv->lv_snapshot_hash_table)
+	{
+		vfree(lv->lv_snapshot_hash_table);
+		lv->lv_snapshot_hash_table = NULL;
+	}
+	if (lv->lv_iobuf)
+	{
+		free_kiovec(1, &lv->lv_iobuf);
+		lv->lv_iobuf = NULL;
+	}
+}
diff -urN 2.2.15pre16/drivers/block/lvm.c 2.2.15pre16aa3/drivers/block/lvm.c
--- 2.2.15pre16/drivers/block/lvm.c	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/drivers/block/lvm.c	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,2576 @@
+/*
+ * kernel/lvm.c
+ *
+ * Copyright (C) 1997 - 2000  Heinz Mauelshagen, Germany
+ *
+ * February-November 1997
+ * April-May,July-August,November 1998
+ * January-March,May,July,September,October 1999
+ *
+ *
+ * LVM driver is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * LVM driver is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA. 
+ *
+ */
+
+/*
+ * Changelog
+ *
+ *    09/11/1997 - added chr ioctls VG_STATUS_GET_COUNT
+ *                 and VG_STATUS_GET_NAMELIST
+ *    18/01/1998 - change lvm_chr_open/close lock handling
+ *    30/04/1998 - changed LV_STATUS ioctl to LV_STATUS_BYNAME and
+ *               - added   LV_STATUS_BYINDEX ioctl
+ *               - used lvm_status_byname_req_t and
+ *                      lvm_status_byindex_req_t vars
+ *    04/05/1998 - added multiple device support
+ *    08/05/1998 - added support to set/clear extendable flag in volume group
+ *    09/05/1998 - changed output of lvm_proc_get_info() because of
+ *                 support for free (eg. longer) logical volume names
+ *    12/05/1998 - added spin_locks (thanks to Pascal van Dam
+ *                 <pascal@ramoth.xs4all.nl>)
+ *    25/05/1998 - fixed handling of locked PEs in lvm_map() and lvm_chr_ioctl()
+ *    26/05/1998 - reactivated verify_area by access_ok
+ *    07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go
+ *                 beyond 128/256 KB max allocation limit per call
+ *               - #ifdef blocked spin_lock calls to avoid compile errors
+ *                 with 2.0.x
+ *    11/06/1998 - another enhancement to spinlock code in lvm_chr_open()
+ *                 and use of LVM_VERSION_CODE instead of my own macros
+ *                 (thanks to  Michael Marxmeier <mike@msede.com>)
+ *    07/07/1998 - added statistics in lvm_map()
+ *    08/07/1998 - saved statistics in do_lv_extend_reduce()
+ *    25/07/1998 - used __initfunc macro
+ *    02/08/1998 - changes for official char/block major numbers
+ *    07/08/1998 - avoided init_module() and cleanup_module() to be static
+ *    30/08/1998 - changed VG lv_open counter from sum of LV lv_open counters
+ *                 to sum of LVs open (no matter how often each is)
+ *    01/09/1998 - fixed lvm_gendisk.part[] index error
+ *    07/09/1998 - added copying of lv_current_pe-array
+ *                 in LV_STATUS_BYINDEX ioctl
+ *    17/11/1998 - added KERN_* levels to printk
+ *    13/01/1999 - fixed LV index bug in do_lv_create() which hit lvrename
+ *    07/02/1999 - fixed spinlock handling bug in case of LVM_RESET
+ *                 by moving spinlock code from lvm_chr_open()
+ *                 to lvm_chr_ioctl()
+ *               - added LVM_LOCK_LVM ioctl to lvm_chr_ioctl()
+ *               - allowed LVM_RESET and retrieval commands to go ahead;
+ *                 only other update ioctls are blocked now
+ *               - fixed pv->pe to NULL for pv_status
+ *               - using lv_req structure in lvm_chr_ioctl() now
+ *               - fixed NULL ptr reference bug in do_lv_extend_reduce()
+ *                 caused by uncontiguous PV array in lvm_chr_ioctl(VG_REDUCE)
+ *    09/02/1999 - changed BLKRASET and BLKRAGET in lvm_chr_ioctl() to
+ *                 handle lgoical volume private read ahead sector
+ *               - implemented LV read_ahead handling with lvm_blk_read()
+ *                 and lvm_blk_write()
+ *    10/02/1999 - implemented 2.[12].* support function lvm_hd_name()
+ *                 to be used in drivers/block/genhd.c by disk_name()
+ *    12/02/1999 - fixed index bug in lvm_blk_ioctl(), HDIO_GETGEO
+ *               - enhanced gendisk insert/remove handling
+ *    16/02/1999 - changed to dynamic block minor number allocation to
+ *                 have as much as 99 volume groups with 256 logical volumes
+ *                 as the grand total; this allows having 1 volume group with
+ *                 up to 256 logical volumes in it
+ *    21/02/1999 - added LV open count information to proc filesystem
+ *               - substituted redundant LVM_RESET code by calls
+ *                 to do_vg_remove()
+ *    22/02/1999 - used schedule_timeout() to be more responsive
+ *                 in case of do_vg_remove() with lots of logical volumes
+ *    19/03/1999 - fixed NULL pointer bug in module_init/lvm_init
+ *    17/05/1999 - used DECLARE_WAIT_QUEUE_HEAD macro (>2.3.0)
+ *               - enhanced lvm_hd_name support
+ *    03/07/1999 - avoided use of KERNEL_VERSION macro based ifdefs and
+ *                 memcpy_tofs/memcpy_fromfs macro redefinitions
+ *    06/07/1999 - corrected reads/writes statistic counter copy in case
+ *                 of striped logical volume
+ *    28/07/1999 - implemented snapshot logical volumes
+ *                 - lvm_chr_ioctl
+ *                   - LV_STATUS_BYINDEX
+ *                   - LV_STATUS_BYNAME
+ *                 - do_lv_create
+ *                 - do_lv_remove
+ *                 - lvm_map
+ *                 - new lvm_snapshot_remap_block
+ *                 - new lvm_snapshot_remap_new_block
+ *    08/10/1999 - implemented support for multiple snapshots per
+ *                 original logical volume
+ *    12/10/1999 - support for 2.3.19
+ *    11/11/1999 - support for 2.3.28
+ *    21/11/1999 - changed lvm_map() interface to buffer_head based
+ *    19/12/1999 - support for 2.3.33
+ *    01/01/2000 - changed locking concept in lvm_map(),
+ *                 do_vg_create() and do_lv_remove()
+ *
+ */
+
+
+/*
+ * TODO
+ *
+ *   - implement special handling of unavailable physical volumes
+ *
+ */
+
+char *lvm_version = "LVM version 0.8e  by Heinz Mauelshagen  (4/1/2000)\n";
+char *lvm_short_version = "version 0.8e  (4/1/2000)";
+
+#define MAJOR_NR	LVM_BLK_MAJOR
+#define	DEVICE_OFF(device)
+
+#include <linux/config.h>
+#include <linux/version.h>
+
+#ifdef MODVERSIONS
+#  undef MODULE
+#  define MODULE
+#    include <linux/modversions.h>
+#endif
+
+#ifdef MODULE
+#  include <linux/module.h>
+#endif
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+
+#include <linux/hdreg.h>
+#include <linux/stat.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/blkdev.h>
+#include <linux/genhd.h>
+#include <linux/locks.h>
+#include <linux/smp_lock.h>
+#include <asm/ioctl.h>
+#include <asm/segment.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_KERNELD
+#include <linux/kerneld.h>
+#endif
+
+#include <linux/blk.h>
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 0)
+#include <linux/blkpg.h>
+#endif
+
+#include <linux/errno.h>
+#include <linux/lvm.h>
+
+#define	LVM_CORRECT_READ_AHEAD(a)		\
+do {						\
+	if ((a) < LVM_MIN_READ_AHEAD)		\
+		(a) =  LVM_MIN_READ_AHEAD;	\
+	if ((a) > LVM_MAX_READ_AHEAD)		\
+		(a) = LVM_MAX_READ_AHEAD;	\
+} while(0)
+
+#define	suser()	( current->uid == 0 && current->euid == 0)
+
+
+/*
+ * External function prototypes
+ */
+#ifdef MODULE
+int init_module ( void);
+void cleanup_module ( void);
+#else
+extern int lvm_init ( void);
+#endif
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+static void lvm_dummy_device_request ( request_queue_t*);
+#else
+static void lvm_dummy_device_request ( void);
+#endif
+static int lvm_blk_ioctl ( struct inode *, struct file *, uint, ulong);
+static int lvm_blk_open  ( struct inode *, struct file *);
+
+static int  lvm_chr_open  ( struct inode *, struct file *);
+
+static int lvm_chr_release ( struct inode *, struct file *);
+static int lvm_blk_release ( struct inode *, struct file *);
+
+static int  lvm_chr_ioctl ( struct inode *, struct file *, uint, ulong);
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+static int lvm_proc_get_info ( char *, char **, off_t, int);
+static int (*lvm_proc_get_info_ptr)(char *, char **, off_t, int) =
+   &lvm_proc_get_info;
+#else
+static int lvm_proc_get_info ( char *, char **, off_t, int, int);
+#endif
+#endif
+
+#ifdef LVM_HD_NAME
+void lvm_hd_name ( char*, int);
+#endif
+
+/* external snapshot calls */
+int lvm_snapshot_remap_block ( kdev_t*, ulong*, unsigned long, lv_t*);
+int lvm_snapshot_COW(kdev_t, unsigned long, unsigned long,
+		     unsigned long, lv_t *);
+int lvm_snapshot_alloc(lv_t *);
+void lvm_snapshot_release(lv_t *);
+
+/* End external function prototypes */
+
+
+/*
+ * Internal function prototypes
+ */
+static void lvm_init_vars ( void);
+extern int (*lvm_map_ptr) ( struct buffer_head*, int);
+
+
+#ifdef LVM_HD_NAME
+extern void (*lvm_hd_name_ptr) ( char*, int);
+#endif
+static int lvm_map ( struct buffer_head*, int);
+static int do_vg_create ( int, void *);
+static int do_vg_remove ( int);
+static int do_lv_create ( int, char *, lv_t *);
+static int do_lv_remove ( int, char *, int);
+static int do_lv_extend_reduce ( int, char *, lv_t *);
+static void lvm_geninit ( struct gendisk *);
+#ifdef LVM_GET_INODE
+   static struct inode *lvm_get_inode ( int);
+   void lvm_clear_inode ( struct inode *);
+#endif
+inline int  lvm_strlen ( char *);
+inline void lvm_memcpy ( char *, char *, int);
+inline int  lvm_strcmp ( char *, char *);
+inline char *lvm_strrchr ( char *, char c);
+/* END Internal function prototypes */
+
+
+/* volume group descriptor area pointers */
+static vg_t *vg[ABS_MAX_VG + 1];
+static pv_t *pvp  = NULL;
+static lv_t *lvp  = NULL;
+static pe_t *pep  = NULL;
+static pe_t *pep1 = NULL;
+
+
+/* map from block minor number to VG and LV numbers */
+typedef struct {
+   int vg_number;
+   int lv_number;
+} vg_lv_map_t;
+static vg_lv_map_t vg_lv_map[ABS_MAX_LV];
+
+
+/* Request structures (lvm_chr_ioctl()) */
+static pv_change_req_t pv_change_req;
+static pv_flush_req_t  pv_flush_req;
+static pv_status_req_t pv_status_req;
+static pe_lock_req_t   pe_lock_req;
+static le_remap_req_t  le_remap_req;
+static lv_req_t        lv_req;
+
+#ifdef LVM_TOTAL_RESET
+static int lvm_reset_spindown = 0;
+#endif
+
+static char pv_name[NAME_LEN];
+/* static char rootvg[NAME_LEN] = { 0, }; */
+static uint lv_open = 0;
+const char *const lvm_name = LVM_NAME;
+static int lock = 0;
+static int loadtime = 0;
+static uint vg_count = 0;
+static long lvm_chr_open_count = 0;
+static ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 0)
+static DECLARE_WAIT_QUEUE_HEAD ( lvm_wait);
+static DECLARE_WAIT_QUEUE_HEAD ( lvm_map_wait);
+#else
+struct wait_queue *lvm_wait = NULL;
+struct wait_queue *lvm_map_wait = NULL;
+#endif
+
+static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED;
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+#if LINUX_VERSION_CODE < KERNEL_VERSION ( 2, 3, 31)
+static struct proc_dir_entry lvm_proc_entry = {
+   0, 3, LVM_NAME, S_IFREG | S_IRUGO,
+   1, 0, 0, 0,
+   NULL,
+   lvm_proc_get_info,
+   NULL, NULL, NULL, NULL, NULL,
+};
+#endif
+#endif
+
+static struct file_operations lvm_chr_fops = {
+	ioctl:		lvm_chr_ioctl,
+	open:		lvm_chr_open,
+	release:	lvm_chr_release,
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 38)
+static struct file_operations lvm_blk_fops = {
+	read:		block_read,
+	write:		block_write,
+	ioctl:		lvm_blk_ioctl,
+	open:		lvm_blk_open,
+	release:	lvm_blk_release,
+	fsync:		block_fsync,
+};
+#else
+static struct block_device_operations lvm_blk_fops =
+{
+	open:		lvm_blk_open,
+	release:	lvm_blk_release,
+	ioctl:		lvm_blk_ioctl,
+};
+#endif
+
+/* gendisk structures */
+static struct hd_struct lvm_hd_struct[MAX_LV];
+int lvm_blocksizes[MAX_LV] = { 0, };
+static int lvm_size[MAX_LV] = { 0, };
+static struct gendisk lvm_gendisk = {
+   MAJOR_NR,			/* major # */
+   LVM_NAME,			/* name of major */
+   0,				/* number of times minor is shifted
+				   to get real minor */
+   1,				/* maximum partitions per device */
+   MAX_LV,			/* maximum number of real devices */
+   lvm_geninit,			/* initialization called before we
+				   do other things */
+   lvm_hd_struct,		/* partition table */
+   lvm_size,			/* device size in blocks, copied
+				   to block_size[] */
+   MAX_LV,			/* number or real devices */
+   NULL,			/* internal */
+   NULL,			/* pointer to next gendisk struct (internal) */
+};
+
+
+#ifdef MODULE
+/*
+ * Module initialization...
+ */
+int init_module ( void)
+#else
+/*
+ * Driver initialization...
+ */
+#ifdef __initfunc
+__initfunc ( int lvm_init ( void))
+#else
+int __init lvm_init ( void)
+#endif
+#endif /* #ifdef MODULE */
+{
+   struct gendisk *gendisk_ptr = NULL;
+
+   lvm_init_vars ();
+
+   /* insert our gendisk at the corresponding major */
+   lvm_geninit ( &lvm_gendisk);
+   if ( gendisk_head != NULL) {
+      gendisk_ptr = gendisk_head;
+      while ( gendisk_ptr->next != NULL &&
+              gendisk_ptr->major > lvm_gendisk.major) {
+         gendisk_ptr = gendisk_ptr->next;
+      }
+      lvm_gendisk.next = gendisk_ptr->next;
+      gendisk_ptr->next = &lvm_gendisk;
+   } else {
+      gendisk_head = &lvm_gendisk;
+      lvm_gendisk.next = NULL;
+   }
+
+   /* reference from drivers/block/ll_rw_blk.c */
+   lvm_map_ptr = lvm_map;
+
+#ifdef LVM_HD_NAME
+   /* reference from drivers/block/genhd.c */
+   lvm_hd_name_ptr = lvm_hd_name;
+#endif
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+   blk_init_queue ( BLK_DEFAULT_QUEUE ( MAJOR_NR), lvm_dummy_device_request);
+#else
+   blk_dev[MAJOR_NR].request_fn = lvm_dummy_device_request;
+   blk_dev[MAJOR_NR].current_request = NULL;
+#endif
+
+   /* optional read root VGDA */
+/*
+   if ( *rootvg != 0) {
+      vg_read_with_pv_and_lv ( rootvg, &vg);
+   }
+*/
+
+   if ( register_chrdev ( LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) {
+      printk ( KERN_ERR "%s -- register_chrdev failed\n", lvm_name);
+      return -EIO;
+   }
+   if ( register_blkdev ( MAJOR_NR, lvm_name, &lvm_blk_fops) < 0) {
+      printk ( "%s -- register_blkdev failed\n", lvm_name);
+      if ( unregister_chrdev ( LVM_CHAR_MAJOR, lvm_name) < 0)
+         printk ( KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
+      return -EIO;
+   }
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 25)
+    create_proc_info_entry ( LVM_NAME, S_IFREG | S_IRUGO,
+                             &proc_root, lvm_proc_get_info_ptr);
+#  else
+    proc_register ( &proc_root, &lvm_proc_entry);
+#  endif
+#endif
+
+   printk ( KERN_INFO
+            "%s%s -- "
+#ifdef MODULE
+            "Module"
+#else
+            "Driver"
+#endif
+            " successfully initialized\n",
+            lvm_version, lvm_name);
+
+   return 0;
+} /* init_module () / lvm_init () */
+
+
+#ifdef MODULE
+/*
+ * Module cleanup...
+ */
+void cleanup_module ( void) {
+   struct gendisk *gendisk_ptr = NULL, *gendisk_ptr_prev = NULL;
+
+   if ( unregister_chrdev ( LVM_CHAR_MAJOR, lvm_name) < 0) {
+      printk ( KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
+   }
+   if ( unregister_blkdev ( MAJOR_NR, lvm_name) < 0) {
+      printk ( KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name);
+   }
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+   blk_cleanup_queue ( BLK_DEFAULT_QUEUE ( MAJOR_NR));
+#else
+   blk_dev[MAJOR_NR].request_fn = NULL;
+   blk_dev[MAJOR_NR].current_request = NULL;
+#endif
+
+   gendisk_ptr = gendisk_ptr_prev = gendisk_head;
+   while ( gendisk_ptr != NULL) {
+      if ( gendisk_ptr == &lvm_gendisk) break;
+      gendisk_ptr_prev = gendisk_ptr;
+      gendisk_ptr = gendisk_ptr->next;
+   }
+   /* delete our gendisk from chain */
+   if ( gendisk_ptr == &lvm_gendisk) gendisk_ptr_prev->next = gendisk_ptr->next;
+
+   blk_size[MAJOR_NR] = NULL;
+   blksize_size[MAJOR_NR] = NULL;
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+   remove_proc_entry ( LVM_NAME, &proc_root);
+#  else
+   proc_unregister ( &proc_root, lvm_proc_entry.low_ino);
+#  endif 
+#endif
+
+   /* reference from linux/drivers/block/ll_rw_blk.c */
+   lvm_map_ptr = NULL;
+
+#ifdef LVM_HD_NAME
+   /* reference from linux/drivers/block/genhd.c */
+   lvm_hd_name_ptr = NULL;
+#endif
+
+   printk ( KERN_INFO "%s -- Module successfully deactivated\n", lvm_name);
+
+   return;
+} /* void cleanup_module () */
+#endif /* #ifdef MODULE */
+
+
+/*
+ * support function to initialize lvm variables
+ */
+#ifdef __initfunc
+__initfunc ( void lvm_init_vars ( void))
+#else
+void __init lvm_init_vars ( void)
+#endif
+{
+   int v;
+
+   loadtime = CURRENT_TIME;
+
+   lvm_lock = SPIN_LOCK_UNLOCKED;
+
+   pe_lock_req.lock = UNLOCK_PE;
+   pe_lock_req.data.lv_dev = \
+   pe_lock_req.data.pv_dev = \
+   pe_lock_req.data.pv_offset = 0;
+
+   /* Initialize VG pointers */
+   for ( v = 0; v <= ABS_MAX_VG; v++) vg[v] = NULL;
+
+   /* Initialize LV -> VG association */
+   for ( v = 0; v < ABS_MAX_LV; v++) {
+      /* index ABS_MAX_VG never used for real VG */
+      vg_lv_map[v].vg_number = ABS_MAX_VG;
+      vg_lv_map[v].lv_number = -1;
+   }
+
+   return;
+} /* lvm_init_vars () */
+
+
+/********************************************************************
+ *
+ * Character device functions
+ *
+ ********************************************************************/
+
+/*
+ * character device open routine
+ */
+static int lvm_chr_open ( struct inode *inode,
+                          struct file *file) {
+   int minor = MINOR ( inode->i_rdev);
+
+#ifdef DEBUG
+   printk ( KERN_DEBUG
+            "%s -- lvm_chr_open MINOR: %d  VG#: %d  mode: 0x%X  lock: %d\n",
+            lvm_name, minor, VG_CHR(minor), file->f_mode, lock);
+#endif
+
+   /* super user validation */
+   if ( ! suser()) return -EACCES;
+
+   /* Group special file open */
+   if ( VG_CHR(minor) > MAX_VG) return -ENXIO;
+
+#ifdef MODULE
+   MOD_INC_USE_COUNT;
+#endif
+
+   lvm_chr_open_count++;
+   return 0;
+} /* lvm_chr_open () */
+
+
+/*
+ * character device i/o-control routine
+ *
+ * Only one changing process can do ioctl at one time, others will block.
+ *
+ */
+static int lvm_chr_ioctl ( struct inode *inode, struct file *file,
+                           uint command, ulong a) {
+   int minor = MINOR ( inode->i_rdev);
+   int extendable;
+   ulong  l, le, p, v;
+   ulong size;
+   void  *arg = ( void*) a;
+#ifdef LVM_GET_INODE
+   struct inode *inode_sav;
+#endif
+   lv_status_byname_req_t lv_status_byname_req;
+   lv_status_byindex_req_t lv_status_byindex_req;
+   lv_t lv;
+
+   /* otherwise cc will complain about unused variables */
+   ( void) lvm_lock;
+   
+
+#ifdef DEBUG_IOCTL
+   printk ( KERN_DEBUG
+            "%s -- lvm_chr_ioctl: command: 0x%X  MINOR: %d  "
+            "VG#: %d  mode: 0x%X\n",
+            lvm_name, command, minor, VG_CHR(minor), file->f_mode);
+#endif
+
+#ifdef LVM_TOTAL_RESET
+   if ( lvm_reset_spindown > 0) return -EACCES;
+#endif
+
+
+   /* Main command switch */
+   switch ( command) {
+      /* lock the LVM */
+      case LVM_LOCK_LVM:
+lock_try_again:
+         spin_lock ( &lvm_lock);
+         if( lock != 0 && lock != current->pid ) {
+#ifdef DEBUG_IOCTL
+            printk ( KERN_INFO "lvm_chr_ioctl: %s is locked by pid %d ...\n",
+                               lvm_name, lock);
+#endif
+            spin_unlock ( &lvm_lock);
+            interruptible_sleep_on ( &lvm_wait);
+            if ( current->sigpending != 0) return -EINTR;
+#ifdef LVM_TOTAL_RESET
+            if ( lvm_reset_spindown > 0) return -EACCES;
+#endif
+            goto lock_try_again;
+         }
+         lock = current->pid;
+         spin_unlock ( &lvm_lock);
+         return 0;
+
+
+      /* check lvm version to ensure driver/tools+lib interoperability */
+      case LVM_GET_IOP_VERSION:
+         if ( copy_to_user ( arg, &lvm_iop_version, sizeof ( ushort)) != 0)
+            return -EFAULT;
+         return 0;
+
+
+#ifdef LVM_TOTAL_RESET
+      /* lock reset function */
+      case LVM_RESET:
+         lvm_reset_spindown = 1;
+         for ( v = 0; v < ABS_MAX_VG; v++) {
+            if ( vg[v] != NULL) {
+               do_vg_remove ( v);
+            }
+         }
+
+#ifdef MODULE
+         while ( GET_USE_COUNT ( &__this_module) < 1)
+            MOD_INC_USE_COUNT;
+         while ( GET_USE_COUNT ( &__this_module) > 1)
+            MOD_DEC_USE_COUNT;
+#endif /* MODULE */
+         lock = 0; /* release lock */
+         wake_up_interruptible ( &lvm_wait);
+         return 0;
+#endif /* LVM_TOTAL_RESET */
+
+
+      /* lock/unlock i/o to a physical extent to move it to another
+         physical volume (move's done in user space's pvmove) */
+      case PE_LOCK_UNLOCK:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &pe_lock_req, arg, sizeof ( pe_lock_req_t)) != 0)
+            return -EFAULT;
+
+         switch ( pe_lock_req.lock) {
+            case LOCK_PE:
+               for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+                  if ( vg[VG_CHR(minor)]->pv[p] != NULL &&
+                       pe_lock_req.data.pv_dev ==
+                       vg[VG_CHR(minor)]->pv[p]->pv_dev)
+                     break;
+               }
+      
+               if ( p == vg[VG_CHR(minor)]->pv_max) return -ENXIO;
+
+               pe_lock_req.lock = UNLOCK_PE;
+               fsync_dev ( pe_lock_req.data.lv_dev);
+               pe_lock_req.lock = LOCK_PE;
+               break;
+
+            case UNLOCK_PE:
+               pe_lock_req.lock = UNLOCK_PE;
+               pe_lock_req.data.lv_dev = \
+               pe_lock_req.data.pv_dev = \
+               pe_lock_req.data.pv_offset = 0;
+               wake_up ( &lvm_map_wait);
+               break;
+
+            default:
+               return -EINVAL;
+         }
+
+         return 0;
+
+
+      /* remap a logical extent (after moving the physical extent) */
+      case LE_REMAP:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &le_remap_req, arg,
+                               sizeof ( le_remap_req_t)) != 0)
+            return -EFAULT;
+
+         for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+            if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+                 lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name,
+                              le_remap_req.lv_name) == 0) {
+               for ( le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le;
+                     le++) {
+                  if ( vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev ==
+                       le_remap_req.old_dev &&
+                       vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].pe ==
+                       le_remap_req.old_pe) {
+                     vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev =
+                        le_remap_req.new_dev;
+                     vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].pe =
+                        le_remap_req.new_pe;
+                     return 0;
+                  }
+               }
+               return -EINVAL;
+            }
+         }
+
+         return -ENXIO;
+
+
+      /* create a VGDA */
+      case VG_CREATE:
+         return do_vg_create ( minor, arg);
+
+
+      /* remove an inactive VGDA */
+      case VG_REMOVE:
+         return do_vg_remove ( minor);
+
+
+      /* extend a volume group */
+      case VG_EXTEND:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( vg[VG_CHR(minor)]->pv_cur < vg[VG_CHR(minor)]->pv_max) {
+            for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+               if ( vg[VG_CHR(minor)]->pv[p] == NULL) {
+                  if ( ( vg[VG_CHR(minor)]->pv[p] =
+                         kmalloc ( sizeof ( pv_t), GFP_USER)) == NULL) {
+                     printk ( KERN_CRIT
+                              "%s -- VG_EXTEND: kmalloc error PV at line %d\n",
+                              lvm_name, __LINE__);
+                     return -ENOMEM;
+                  }
+                  if ( copy_from_user ( vg[VG_CHR(minor)]->pv[p], arg,
+                                        sizeof ( pv_t)) != 0)
+                     return -EFAULT;
+
+                  vg[VG_CHR(minor)]->pv[p]->pv_status = PV_ACTIVE;
+                  /* We don't need the PE list
+                     in kernel space like LVs pe_t list */
+                  vg[VG_CHR(minor)]->pv[p]->pe = NULL;
+                  vg[VG_CHR(minor)]->pv_cur++;
+                  vg[VG_CHR(minor)]->pv_act++;
+                  vg[VG_CHR(minor)]->pe_total +=
+                     vg[VG_CHR(minor)]->pv[p]->pe_total;
+#ifdef LVM_GET_INODE
+                  /* insert a dummy inode for fs_may_mount */
+                  vg[VG_CHR(minor)]->pv[p]->inode =
+                     lvm_get_inode ( vg[VG_CHR(minor)]->pv[p]->pv_dev);
+#endif
+                  return 0;
+               }
+            }
+         }
+         return -EPERM;
+
+
+      /* reduce a volume group */
+      case VG_REDUCE:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( pv_name, arg, sizeof ( pv_name)) != 0)
+            return -EFAULT;
+
+         for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+            if ( vg[VG_CHR(minor)]->pv[p] != NULL && 
+                 lvm_strcmp ( vg[VG_CHR(minor)]->pv[p]->pv_name,
+                              pv_name) == 0) {
+               if ( vg[VG_CHR(minor)]->pv[p]->lv_cur > 0) return -EPERM;
+               vg[VG_CHR(minor)]->pe_total -=
+                  vg[VG_CHR(minor)]->pv[p]->pe_total;
+               vg[VG_CHR(minor)]->pv_cur--;
+               vg[VG_CHR(minor)]->pv_act--;
+#ifdef DEBUG_VFREE
+               printk ( KERN_DEBUG
+                        "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+#ifdef LVM_GET_INODE
+               lvm_clear_inode ( vg[VG_CHR(minor)]->pv[p]->inode);
+#endif
+               kfree ( vg[VG_CHR(minor)]->pv[p]);
+               /* Make PV pointer array contiguous */
+               for ( ; p < vg[VG_CHR(minor)]->pv_max-1; p++)
+                  vg[VG_CHR(minor)]->pv[p] = vg[VG_CHR(minor)]->pv[p + 1];
+               vg[VG_CHR(minor)]->pv[p + 1] = NULL;
+               return 0;
+            }
+         }
+         return -ENXIO;
+
+
+      /* set/clear extendability flag of volume group */
+      case VG_SET_EXTENDABLE:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &extendable, arg, sizeof ( extendable)) != 0)
+            return -EFAULT;
+
+         if ( extendable == VG_EXTENDABLE ||
+              extendable == ~VG_EXTENDABLE) {
+            if ( extendable == VG_EXTENDABLE)
+               vg[VG_CHR(minor)]->vg_status |= VG_EXTENDABLE;
+            else
+               vg[VG_CHR(minor)]->vg_status &= ~VG_EXTENDABLE;
+         } else return -EINVAL;
+         return 0;
+
+
+      /* get volume group data (only the vg_t struct) */
+      case VG_STATUS:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_to_user ( arg, vg[VG_CHR(minor)], sizeof ( vg_t)) != 0)
+            return -EFAULT;
+
+         return 0;
+
+
+      /* get volume group count */
+      case VG_STATUS_GET_COUNT:
+         if ( copy_to_user ( arg, &vg_count, sizeof ( vg_count)) != 0)
+            return -EFAULT;
+
+         return 0;
+
+
+      /* get volume group count */
+      case VG_STATUS_GET_NAMELIST:
+         for ( l = v = 0; v < ABS_MAX_VG; v++) {
+            if ( vg[v] != NULL) {
+               if ( copy_to_user ( arg + l++ * NAME_LEN,
+                                   vg[v]->vg_name,
+                                   NAME_LEN) != 0)
+                  return -EFAULT;
+            }
+         }
+         return 0;
+
+
+      /* create, remove, extend or reduce a logical volume */
+      case LV_CREATE:
+      case LV_REMOVE:
+      case LV_EXTEND:
+      case LV_REDUCE:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &lv_req, arg, sizeof ( lv_req)) != 0)
+            return -EFAULT;
+
+         if ( command != LV_REMOVE) {
+            if ( copy_from_user ( &lv, lv_req.lv, sizeof ( lv_t)) != 0)
+               return -EFAULT;
+         }
+
+         switch ( command) {
+            case LV_CREATE:
+               return do_lv_create ( minor, lv_req.lv_name, &lv);
+
+            case LV_REMOVE:
+               return do_lv_remove ( minor, lv_req.lv_name, -1);
+
+            case LV_EXTEND:
+            case LV_REDUCE:
+               return do_lv_extend_reduce ( minor, lv_req.lv_name, &lv);
+         }
+
+
+      /* get status of a logical volume by name */
+      case LV_STATUS_BYNAME:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &lv_status_byname_req, arg,
+                               sizeof ( lv_status_byname_req_t)) != 0)
+            return -EFAULT;
+
+         if ( lv_status_byname_req.lv == NULL) return -EINVAL;
+         if ( copy_from_user ( &lv, lv_status_byname_req.lv,
+                               sizeof ( lv_t)) != 0)
+            return -EFAULT;
+
+         for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+            if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+                 lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name,
+                              lv_status_byname_req.lv_name) == 0) {
+               if ( copy_to_user ( lv_status_byname_req.lv,
+                                   vg[VG_CHR(minor)]->lv[l],
+                                   sizeof ( lv_t)) != 0)
+                  return -EFAULT;
+
+               if ( lv.lv_current_pe != NULL) {
+                  size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le *
+                         sizeof ( pe_t);
+                  if ( copy_to_user ( lv.lv_current_pe,
+                                      vg[VG_CHR(minor)]->lv[l]->lv_current_pe,
+                                      size) != 0)
+                     return -EFAULT;
+               }
+               return 0;
+            }
+         }
+         return -ENXIO;
+
+
+      /* get status of a logical volume by index */
+      case LV_STATUS_BYINDEX:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &lv_status_byindex_req, arg,
+                               sizeof ( lv_status_byindex_req)) != 0)
+            return -EFAULT;
+
+         if ( ( lvp = lv_status_byindex_req.lv) == NULL) return -EINVAL;
+         l = lv_status_byindex_req.lv_index;
+         if ( vg[VG_CHR(minor)]->lv[l] == NULL) return -ENXIO;
+
+         if ( copy_from_user ( &lv, lvp, sizeof ( lv_t)) != 0)
+            return -EFAULT;
+
+         if ( copy_to_user ( lvp, vg[VG_CHR(minor)]->lv[l],
+                             sizeof ( lv_t)) != 0)
+            return -EFAULT;
+
+         if ( lv.lv_current_pe != NULL) {
+            size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le * sizeof ( pe_t);
+            if ( copy_to_user ( lv.lv_current_pe,
+                                vg[VG_CHR(minor)]->lv[l]->lv_current_pe,
+                                size) != 0)
+               return -EFAULT;
+         }
+         return 0;
+
+
+      /* change a physical volume */
+      case PV_CHANGE:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &pv_change_req, arg,
+                               sizeof ( pv_change_req)) != 0)
+            return -EFAULT;
+
+         for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+            if ( vg[VG_CHR(minor)]->pv[p] != NULL &&
+                 lvm_strcmp ( vg[VG_CHR(minor)]->pv[p]->pv_name,
+                              pv_change_req.pv_name) == 0) {
+#ifdef LVM_GET_INODE
+               inode_sav = vg[VG_CHR(minor)]->pv[p]->inode;
+#endif
+               if ( copy_from_user ( vg[VG_CHR(minor)]->pv[p],
+                                     pv_change_req.pv,
+                                     sizeof ( pv_t)) != 0)
+                  return -EFAULT;
+
+               /* We don't need the PE list
+                  in kernel space as with LVs pe_t list */
+               vg[VG_CHR(minor)]->pv[p]->pe = NULL;
+#ifdef LVM_GET_INODE
+               vg[VG_CHR(minor)]->pv[p]->inode = inode_sav;
+#endif
+               return 0;
+            }
+         }
+         return -ENXIO;
+
+
+      /* get physical volume data (pv_t structure only) */
+      case PV_STATUS:
+         if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+         if ( copy_from_user ( &pv_status_req, arg,
+                               sizeof ( pv_status_req)) != 0)
+            return -EFAULT;
+
+         for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+            if ( vg[VG_CHR(minor)]->pv[p] != NULL) {
+               if ( lvm_strcmp ( vg[VG_CHR(minor)]->pv[p]->pv_name,
+                                 pv_status_req.pv_name) == 0) {
+                  if ( copy_to_user ( pv_status_req.pv,
+                                      vg[VG_CHR(minor)]->pv[p],
+                                      sizeof ( pv_t)) != 0)
+                     return -EFAULT;
+                  return 0;
+               }
+            }
+         }
+         return -ENXIO;
+
+
+      /* physical volume buffer flush/invalidate */
+      case PV_FLUSH:
+         if ( copy_from_user ( &pv_flush_req, arg, sizeof ( pv_flush_req)) != 0)
+            return -EFAULT;
+
+         for ( v = 0; v < ABS_MAX_VG; v++) {
+            if ( vg[v] == NULL) continue;
+            for ( p = 0; p < vg[v]->pv_max; p++) {
+               if ( vg[v]->pv[p] != NULL &&
+                    lvm_strcmp ( vg[v]->pv[p]->pv_name,
+                                 pv_flush_req.pv_name) == 0) {
+                  fsync_dev ( vg[v]->pv[p]->pv_dev);
+                  invalidate_buffers ( vg[v]->pv[p]->pv_dev);
+                  return 0;
+               }
+            }
+         }
+         return 0;
+
+
+      default:
+         printk ( KERN_WARNING
+                  "%s -- lvm_chr_ioctl: unknown command %x\n",
+                  lvm_name, command);
+         return -EINVAL;
+   }
+
+   return 0;
+} /* lvm_chr_ioctl */
+
+
+/*
+ * character device close routine
+ */
+static int lvm_chr_release ( struct inode *inode, struct file *file)
+{
+#ifdef DEBUG
+   int minor = MINOR ( inode->i_rdev);
+   printk ( KERN_DEBUG
+            "%s -- lvm_chr_release   VG#: %d\n", lvm_name, VG_CHR(minor));
+#endif
+
+#ifdef MODULE
+   if ( GET_USE_COUNT ( &__this_module) > 0) MOD_DEC_USE_COUNT;
+#endif
+
+#ifdef LVM_TOTAL_RESET
+   if ( lvm_reset_spindown > 0) {
+      lvm_reset_spindown = 0;
+      lvm_chr_open_count = 1;
+   }
+#endif
+
+   if ( lvm_chr_open_count > 0) lvm_chr_open_count--;
+   if ( lock == current->pid) {
+      lock = 0; /* release lock */
+      wake_up_interruptible ( &lvm_wait);
+   }
+
+   return 0;
+} /* lvm_chr_release () */
+
+
+
+/********************************************************************
+ *
+ * Block device functions
+ *
+ ********************************************************************/
+
+/*
+ * block device open routine
+ */
+static int lvm_blk_open ( struct inode *inode, struct file *file) {
+   int minor = MINOR ( inode->i_rdev);
+
+#ifdef DEBUG_LVM_BLK_OPEN
+   printk ( KERN_DEBUG
+            "%s -- lvm_blk_open MINOR: %d  VG#: %d  LV#: %d  mode: 0x%X\n",
+            lvm_name, minor, VG_BLK(minor), LV_BLK(minor), file->f_mode);
+#endif
+
+#ifdef LVM_TOTAL_RESET
+   if ( lvm_reset_spindown > 0) return -EPERM;
+#endif
+
+   if ( vg[VG_BLK(minor)] != NULL &&
+        ( vg[VG_BLK(minor)]->vg_status & VG_ACTIVE) &&
+        vg[VG_BLK(minor)]->lv[LV_BLK(minor)] != NULL &&
+        LV_BLK(minor) >= 0 &&
+        LV_BLK(minor) < vg[VG_BLK(minor)]->lv_max) {
+
+      /* Check parallel LV spindown (LV remove) */
+      if ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_status & LV_SPINDOWN)
+         return -EPERM;
+
+      /* Check inactive LV and open for read/write */
+      if ( file->f_mode & O_RDWR) {
+         if ( ! ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_status & LV_ACTIVE))
+            return -EPERM;
+         if ( ! ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_access & LV_WRITE))
+            return -EACCES;
+      }
+
+      if ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open == 0)
+         vg[VG_BLK(minor)]->lv_open++;
+      vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open++;
+
+#ifdef MODULE
+      MOD_INC_USE_COUNT;
+#endif
+
+#ifdef DEBUG_LVM_BLK_OPEN
+      printk ( KERN_DEBUG
+               "%s -- lvm_blk_open MINOR: %d  VG#: %d  LV#: %d  size: %d\n",
+               lvm_name, minor, VG_BLK(minor), LV_BLK(minor),
+               vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_size);
+#endif
+
+      return 0;
+   }
+
+   return -ENXIO;
+} /* lvm_blk_open () */
+
+
+/*
+ * block device i/o-control routine
+ */
+static int lvm_blk_ioctl (struct inode *inode, struct file *file,
+                          uint command, ulong a) {
+   int minor = MINOR ( inode->i_rdev);
+   void *arg = ( void*) a;
+   struct hd_geometry *hd = ( struct hd_geometry *) a;
+
+#ifdef DEBUG_IOCTL
+   printk ( KERN_DEBUG
+            "%s -- lvm_blk_ioctl MINOR: %d  command: 0x%X  arg: %X  "
+            "VG#: %dl  LV#: %d\n",
+            lvm_name, minor, command, ( ulong) arg,
+            VG_BLK(minor), LV_BLK(minor));
+#endif
+
+   switch ( command) {
+      /* return device size */
+      case BLKGETSIZE:
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- BLKGETSIZE: %u\n",
+                  lvm_name, vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_size);
+#endif
+         copy_to_user ( ( long*) arg, &vg[VG_BLK(minor)]->\
+                                      lv[LV_BLK(minor)]->lv_size,
+                        sizeof ( vg[VG_BLK(minor)]->\
+                                 lv[LV_BLK(minor)]->lv_size));
+         break;
+
+
+      /* flush buffer cache */
+      case BLKFLSBUF:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- BLKFLSBUF\n", lvm_name);
+#endif
+         fsync_dev ( inode->i_rdev);
+	 invalidate_buffers(inode->i_rdev);
+         break;
+
+
+      /* set read ahead for block device */
+      case BLKRASET:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- BLKRASET: %d sectors for %02X:%02X\n",
+                  lvm_name, ( long) arg, MAJOR( inode->i_rdev), minor);
+#endif
+         if ( ( long) arg < LVM_MIN_READ_AHEAD ||
+              ( long) arg > LVM_MAX_READ_AHEAD) return -EINVAL;
+         read_ahead[MAJOR_NR] =
+	 vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_read_ahead = ( long) arg;
+         break;
+
+
+      /* get current read ahead setting */
+      case BLKRAGET:
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- BLKRAGET\n", lvm_name);
+#endif
+         copy_to_user ( ( long*) arg,
+                        &vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_read_ahead,
+                        sizeof ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->\
+                                 lv_read_ahead));
+         break;
+
+
+      /* get disk geometry */
+      case HDIO_GETGEO:
+#ifdef DEBUG_IOCTL
+         printk ( KERN_DEBUG
+                  "%s -- lvm_blk_ioctl -- HDIO_GETGEO\n", lvm_name);
+#endif
+         if ( hd == NULL) return -EINVAL;
+         {
+            unsigned char heads = 64;
+            unsigned char sectors = 32;
+            long start = 0;
+            short cylinders = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_size /
+                              heads / sectors;
+
+            if ( copy_to_user ( ( char*) &hd->heads, &heads,
+                                sizeof ( heads)) != 0 ||
+                 copy_to_user ( ( char*) &hd->sectors, &sectors,
+                                sizeof ( sectors)) != 0 ||
+                 copy_to_user ( ( short*) &hd->cylinders,
+                                &cylinders, sizeof ( cylinders)) != 0 ||
+                 copy_to_user ( ( long*) &hd->start, &start,
+                                sizeof ( start)) != 0)
+               return -EFAULT;
+         }
+
+#ifdef DEBUG_IOCTL
+            printk ( KERN_DEBUG
+                     "%s -- lvm_blk_ioctl -- cylinders: %d\n",
+                     lvm_name, vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->\
+                               lv_size / heads / sectors);
+#endif
+         break;
+
+
+      /* set access flags of a logical volume */
+      case LV_SET_ACCESS:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+         vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_access = ( ulong) arg;
+         break;
+
+
+      /* set status flags of a logical volume */
+      case LV_SET_STATUS:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+         if ( ! ( ( ulong) arg & LV_ACTIVE) &&
+              vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open > 1) return -EPERM;
+         vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_status = ( ulong) arg;
+         break;
+
+
+      /* set allocation flags of a logical volume */
+      case LV_SET_ALLOCATION:
+         /* super user validation */
+         if ( ! suser ()) return -EACCES;
+         vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_allocation = ( ulong) arg;
+         break;
+
+
+      default:
+         printk ( KERN_WARNING
+                  "%s -- lvm_blk_ioctl: unknown command %d\n",
+                  lvm_name, command);
+         return -EINVAL;
+   }
+
+   return 0;
+} /* lvm_blk_ioctl () */
+
+
+/*
+ * block device close routine
+ */
+static int lvm_blk_release ( struct inode *inode, struct file *file)
+{
+   int minor = MINOR ( inode->i_rdev);
+
+#ifdef DEBUG
+   printk ( KERN_DEBUG
+            "%s -- lvm_blk_release MINOR: %d  VG#: %d  LV#: %d\n",
+            lvm_name, minor, VG_BLK(minor), LV_BLK(minor));
+#endif
+
+   sync_dev ( inode->i_rdev);
+   if ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open == 1)
+      vg[VG_BLK(minor)]->lv_open--;
+   vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_open--;
+
+#ifdef MODULE
+   MOD_DEC_USE_COUNT;
+#endif
+
+   return 0;
+} /* lvm_blk_release () */
+
+
+#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS
+/*
+ * Support function /proc-Filesystem
+ */
+#define  LVM_PROC_BUF   ( i == 0 ? dummy_buf : &buf[sz])
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 25)
+static int lvm_proc_get_info ( char *page, char **start, off_t pos, int count)
+#else
+static int lvm_proc_get_info ( char *page, char **start, off_t pos,
+                               int count, int whence)
+#endif
+{
+   int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter,
+       lv_open_total, pe_t_bytes, lv_block_exception_t_bytes, seconds;
+   static off_t sz;
+   off_t sz_last;
+   char allocation_flag, inactive_flag, rw_flag, stripes_flag;
+   char *lv_name = NULL;
+   static char *buf = NULL;
+   static char dummy_buf[160]; /* sized for 2 lines */
+
+#ifdef DEBUG_LVM_PROC_GET_INFO
+   printk ( KERN_DEBUG
+            "%s - lvm_proc_get_info CALLED  pos: %lu  count: %d  whence: %d\n",
+            lvm_name, pos, count, whence);
+#endif
+
+   if ( pos == 0 || buf == NULL) {
+      sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \
+      lv_open_total = pe_t_bytes = lv_block_exception_t_bytes = 0;
+   
+      /* search for activity */
+      for ( v = 0; v < ABS_MAX_VG; v++) {
+         if ( vg[v] != NULL) {
+            vg_counter++;
+            pv_counter += vg[v]->pv_cur;
+            lv_counter += vg[v]->lv_cur;
+            if ( vg[v]->lv_cur > 0) {
+               for ( l = 0; l < vg[v]->lv_max; l++) {
+                  if ( vg[v]->lv[l] != NULL) {
+                     pe_t_bytes += vg[v]->lv[l]->lv_allocated_le;
+                     if ( vg[v]->lv[l]->lv_block_exception != NULL) {
+                        lv_block_exception_t_bytes +=
+                           vg[v]->lv[l]->lv_remap_end;
+                     }
+                     if ( vg[v]->lv[l]->lv_open > 0) {
+                        lv_open_counter++;
+                        lv_open_total += vg[v]->lv[l]->lv_open;
+                     }
+                  }
+               }
+            }
+         }
+      }
+      pe_t_bytes *= sizeof ( pe_t);
+      lv_block_exception_t_bytes *= sizeof ( lv_block_exception_t);
+   
+      if ( buf != NULL) {
+#ifdef DEBUG_VFREE
+         printk ( KERN_DEBUG
+                  "%s -- vfree %d\n", lvm_name, __LINE__);
+#endif
+         vfree ( buf);
+         buf = NULL;
+      }
+
+      /* 2 times: first to get size to allocate buffer,
+         2nd to fill the vmalloced buffer */
+      for ( i = 0; i < 2; i++) {
+         sz = 0;
+         sz += sprintf ( LVM_PROC_BUF,
+                         "LVM "
+#ifdef MODULE
+                         "module"
+#else
+                         "driver"
+#endif
+                         " %s\n\n"
+                         "Total:  %d VG%s  %d PV%s  %d LV%s ",
+                         lvm_short_version,
+                         vg_counter, vg_counter == 1 ? "" : "s",
+                         pv_counter, pv_counter == 1 ? "" : "s",
+                         lv_counter, lv_counter == 1 ? "" : "s");
+         sz += sprintf ( LVM_PROC_BUF,
+                         "(%d LV%s open",
+                         lv_open_counter,
+                         lv_open_counter == 1 ? "" : "s");
+         if ( lv_open_total > 0) sz += sprintf ( LVM_PROC_BUF,
+                                                 " %d times)\n",
+                                                 lv_open_total);
+         else                    sz += sprintf ( LVM_PROC_BUF, ")");
+         sz += sprintf ( LVM_PROC_BUF,
+                         "\nGlobal: %lu bytes vmalloced   IOP version: %d   ",
+                         vg_counter * sizeof ( vg_t) +
+                         pv_counter * sizeof ( pv_t) +
+                         lv_counter * sizeof ( lv_t) +
+                         pe_t_bytes + lv_block_exception_t_bytes + sz_last,
+                         lvm_iop_version);
+
+         seconds = CURRENT_TIME - loadtime;
+         if ( seconds < 0) loadtime = CURRENT_TIME + seconds;
+         if ( seconds / 86400 > 0) {
+            sz += sprintf ( LVM_PROC_BUF, "%d day%s ",
+                                          seconds / 86400,
+                                          seconds / 86400 == 0 ||
+                                          seconds / 86400 > 1 ? "s": "");
+         }
+         sz += sprintf ( LVM_PROC_BUF, "%d:%02d:%02d active\n",
+                                       ( seconds % 86400) / 3600,
+                                       ( seconds % 3600) / 60,
+                                       seconds % 60);
+
+         if ( vg_counter > 0) {
+            for ( v = 0; v < ABS_MAX_VG; v++) {
+               /* volume group */
+               if ( vg[v] != NULL) {
+                  inactive_flag = ' ';
+                  if ( ! ( vg[v]->vg_status & VG_ACTIVE))
+                     inactive_flag = 'I';
+                  sz += sprintf ( LVM_PROC_BUF,
+                                  "\nVG: %c%s  [%d PV, %d LV/%d open] "
+                                  " PE Size: %d KB\n"
+                                  "  Usage [KB/PE]: %d /%d total  "
+                                  "%d /%d used  %d /%d free",
+                                  inactive_flag,
+                                  vg[v]->vg_name,
+                                  vg[v]->pv_cur,
+                                  vg[v]->lv_cur,
+                                  vg[v]->lv_open,
+                                  vg[v]->pe_size >> 1,
+                                  vg[v]->pe_size * vg[v]->pe_total >> 1,
+                                  vg[v]->pe_total,
+                                  vg[v]->pe_allocated * vg[v]->pe_size >> 1,
+                                  vg[v]->pe_allocated,
+                                  ( vg[v]->pe_total - vg[v]->pe_allocated) *
+                                  vg[v]->pe_size >> 1,
+                                  vg[v]->pe_total - vg[v]->pe_allocated);
+
+                  /* physical volumes */
+                  sz += sprintf ( LVM_PROC_BUF,
+                                  "\n  PV%s ",
+                                  vg[v]->pv_cur == 1 ? ": " : "s:");
+                  c = 0;
+                  for ( p = 0; p < vg[v]->pv_max; p++) {
+                     if ( vg[v]->pv[p] != NULL) {
+                        inactive_flag = 'A';
+                        if ( ! ( vg[v]->pv[p]->pv_status & PV_ACTIVE))
+                           inactive_flag = 'I';
+                        allocation_flag = 'A';
+                        if ( ! ( vg[v]->pv[p]->pv_allocatable & PV_ALLOCATABLE))
+                           allocation_flag = 'N';
+                        sz += sprintf ( LVM_PROC_BUF,
+                                        "[%c%c] %-21s %8d /%-6d  "
+                                        "%8d /%-6d  %8d /%-6d",
+                                        inactive_flag,
+                                        allocation_flag,
+                                        vg[v]->pv[p]->pv_name,
+                                        vg[v]->pv[p]->pe_total *
+                                        vg[v]->pv[p]->pe_size >> 1,
+                                        vg[v]->pv[p]->pe_total,
+                                        vg[v]->pv[p]->pe_allocated *
+                                        vg[v]->pv[p]->pe_size >> 1,
+                                        vg[v]->pv[p]->pe_allocated,
+                                        ( vg[v]->pv[p]->pe_total -
+                                          vg[v]->pv[p]->pe_allocated) *
+                                        vg[v]->pv[p]->pe_size >> 1,
+                                        vg[v]->pv[p]->pe_total -
+                                        vg[v]->pv[p]->pe_allocated);
+                        c++;
+                        if ( c < vg[v]->pv_cur) sz += sprintf ( LVM_PROC_BUF,
+                                                                "\n       ");
+                     }
+                  }
+
+                  /* logical volumes */
+                  sz += sprintf ( LVM_PROC_BUF,
+                                  "\n    LV%s ",
+                                  vg[v]->lv_cur == 1 ? ": " : "s:");
+                  c = 0;
+                  for ( l = 0; l < vg[v]->lv_max; l++) {
+                     if ( vg[v]->lv[l] != NULL) {
+                        inactive_flag = 'A';
+                        if ( ! ( vg[v]->lv[l]->lv_status & LV_ACTIVE))
+                           inactive_flag = 'I';
+                        rw_flag = 'R';
+                        if ( vg[v]->lv[l]->lv_access & LV_WRITE) rw_flag = 'W';
+                        allocation_flag = 'D';
+                        if ( vg[v]->lv[l]->lv_allocation & LV_CONTIGUOUS)
+                           allocation_flag = 'C';
+                        stripes_flag = 'L';
+                        if ( vg[v]->lv[l]->lv_stripes > 1) stripes_flag = 'S';
+                        sz += sprintf ( LVM_PROC_BUF,
+                                        "[%c%c%c%c",
+                                        inactive_flag,
+                                        rw_flag,
+                                        allocation_flag,
+                                        stripes_flag);
+                        if ( vg[v]->lv[l]->lv_stripes > 1)
+                           sz += sprintf ( LVM_PROC_BUF, "%-2d",
+                                           vg[v]->lv[l]->lv_stripes);
+                        else
+                           sz += sprintf ( LVM_PROC_BUF, "  ");
+                        lv_name = lvm_strrchr ( vg[v]->lv[l]->lv_name, '/');
+                        if ( lv_name != NULL) lv_name++;
+                        else lv_name = vg[v]->lv[l]->lv_name;
+                        sz += sprintf ( LVM_PROC_BUF, "] %-25s", lv_name);
+                        if ( lvm_strlen ( lv_name) > 25)
+                           sz += sprintf ( LVM_PROC_BUF,
+                                           "\n                              ");
+                        sz += sprintf ( LVM_PROC_BUF, "%9d /%-6d   ",
+                                        vg[v]->lv[l]->lv_size >> 1,
+                                        vg[v]->lv[l]->lv_size / vg[v]->pe_size);
+
+                        if ( vg[v]->lv[l]->lv_open == 0)
+                           sz += sprintf ( LVM_PROC_BUF, "close");
+                        else
+                           sz += sprintf ( LVM_PROC_BUF, "%dx open",
+                                           vg[v]->lv[l]->lv_open);
+                        c++;
+                        if ( c < vg[v]->lv_cur) sz += sprintf ( LVM_PROC_BUF,
+                                                                "\n         ");
+                     }
+                  }
+                  if ( vg[v]->lv_cur == 0)
+                     sz += sprintf ( LVM_PROC_BUF, "none");
+                  sz += sprintf ( LVM_PROC_BUF, "\n");
+               }
+            }
+         }
+
+         if ( buf == NULL) {
+            if ( ( buf = vmalloc ( sz)) == NULL) {
+               sz = 0;
+               return sprintf ( page, "%s - vmalloc error at line %d\n",
+                                      lvm_name, __LINE__);
+            }
+         }
+         sz_last = sz;
+      }
+   }
+
+   if ( pos > sz - 1) {
+      vfree ( buf);
+      buf = NULL;
+      return 0;
+   }
+
+   *start = &buf[pos];
+   if ( sz - pos < count) return sz - pos;
+   else                   return count;
+} /* lvm_proc_get_info () */
+#endif /* #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS */
+
+
+/*
+ * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
+ * (see init_module/lvm_init)
+ */
+static int lvm_map ( struct buffer_head *bh, int rw) {
+   int minor = MINOR ( bh->b_dev);
+   int ret = 0;
+   ulong index;
+   ulong size = bh->b_size >> 9;
+   ulong rsector_tmp = bh->b_blocknr * size;
+   ulong rsector_sav;
+   kdev_t rdev_tmp = bh->b_dev;
+   kdev_t rdev_sav;
+   lv_t *lv = vg[VG_BLK(minor)]->lv[LV_BLK(minor)];
+   unsigned long pe_start;
+   
+
+   if ( ! ( lv->lv_status & LV_ACTIVE)) {
+      printk ( KERN_ALERT
+               "%s - lvm_map: ll_rw_blk for inactive LV %s\n",
+               lvm_name, lv->lv_name);
+      return -1;
+   }
+
+/*
+if ( lv->lv_access & LV_SNAPSHOT)
+printk ( "%s -- %02d:%02d  block: %lu  rw: %d\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), bh->b_blocknr, rw);
+*/
+
+   /* take care of snapshot chunk writes before
+      check for writable logical volume */
+   if ( ( lv->lv_access & LV_SNAPSHOT) &&
+        MAJOR ( bh->b_dev) != 0 &&
+        MAJOR ( bh->b_dev) != MAJOR_NR &&
+#ifdef WRITEA
+        ( rw == WRITEA || rw == WRITE))
+#else
+        rw == WRITE)
+#endif
+   {
+/*
+printk ( "%s -- doing snapshot write for %02d:%02d[%02d:%02d]  b_blocknr: %lu  b_rsector: %lu\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), MAJOR ( bh->b_dev), MINOR ( bh->b_dev), bh->b_blocknr, bh->b_rsector);
+*/
+      return 0;
+   }
+
+#ifdef WRITEA
+   if ( ( rw == WRITE || rw == WRITEA) &&
+#else
+   if ( rw == WRITE &&
+#endif
+        ! ( lv->lv_access & LV_WRITE)) {
+      printk ( KERN_CRIT
+               "%s - lvm_map: ll_rw_blk write for readonly LV %s\n",
+               lvm_name, lv->lv_name);
+      return -1;
+   }
+
+
+#ifdef DEBUG_MAP
+   printk ( KERN_DEBUG
+            "%s - lvm_map minor:%d  *rdev: %02d:%02d  *rsector: %lu  "
+            "size:%lu\n",
+            lvm_name, minor,
+            MAJOR ( rdev_tmp),
+            MINOR ( rdev_tmp),
+            rsector_tmp, size);
+#endif
+
+   if ( rsector_tmp + size > lv->lv_size) {
+      printk ( KERN_ALERT
+               "%s - lvm_map *rsector: %lu or size: %lu wrong for"
+               " minor: %2d\n", lvm_name, rsector_tmp, size, minor);
+      return -1;
+   }
+
+   rsector_sav = rsector_tmp;
+   rdev_sav    = rdev_tmp;
+
+lvm_second_remap:
+   /* linear mapping */
+   if ( lv->lv_stripes < 2) {
+      index = rsector_tmp / vg[VG_BLK(minor)]->pe_size; /* get the index */
+      pe_start = lv->lv_current_pe[index].pe;
+      rsector_tmp = lv->lv_current_pe[index].pe +
+                    ( rsector_tmp % vg[VG_BLK(minor)]->pe_size);
+      rdev_tmp    = lv->lv_current_pe[index].dev;
+
+#ifdef DEBUG_MAP
+      printk ( KERN_DEBUG
+               "lv_current_pe[%ld].pe: %d  rdev: %02d:%02d  rsector:%ld\n",
+               index,
+               lv->lv_current_pe[index].pe,
+               MAJOR ( rdev_tmp),
+               MINOR ( rdev_tmp),
+               rsector_tmp);
+#endif
+
+   /* striped mapping */
+   } else {
+      ulong stripe_index;
+      ulong stripe_length;
+
+      stripe_length = vg[VG_BLK(minor)]->pe_size * lv->lv_stripes;
+      stripe_index = ( rsector_tmp % stripe_length) / lv->lv_stripesize;
+      index = rsector_tmp / stripe_length +
+              ( stripe_index % lv->lv_stripes) *
+              ( lv->lv_allocated_le / lv->lv_stripes);
+      pe_start = lv->lv_current_pe[index].pe;
+      rsector_tmp = lv->lv_current_pe[index].pe +
+                    ( rsector_tmp % stripe_length) -
+                    ( stripe_index % lv->lv_stripes) * lv->lv_stripesize -
+                    stripe_index / lv->lv_stripes *
+                    ( lv->lv_stripes - 1) * lv->lv_stripesize;
+      rdev_tmp = lv->lv_current_pe[index].dev;
+
+#ifdef DEBUG_MAP
+      printk(KERN_DEBUG
+	     "lv_current_pe[%ld].pe: %d  rdev: %02d:%02d  rsector:%ld\n"
+	     "stripe_length: %ld  stripe_index: %ld\n",
+	     index,
+	     lv->lv_current_pe[index].pe,
+	     MAJOR ( rdev_tmp),
+	     MINOR ( rdev_tmp),
+	     rsector_tmp,
+	     stripe_length,
+	     stripe_index);
+#endif
+   }
+
+   /* handle physical extents on the move */
+   if ( pe_lock_req.lock == LOCK_PE) {
+      if ( rdev_tmp == pe_lock_req.data.pv_dev &&
+           rsector_tmp >= pe_lock_req.data.pv_offset &&
+           rsector_tmp < ( pe_lock_req.data.pv_offset +
+                        vg[VG_BLK(minor)]->pe_size)) {
+         sleep_on ( &lvm_map_wait);
+         rsector_tmp = rsector_sav;
+         rdev_tmp    = rdev_sav;
+         goto lvm_second_remap;
+      }
+   }
+
+   /* statistic */
+#ifdef WRITEA
+   if ( rw == WRITE || rw == WRITEA)
+#else
+   if ( rw == WRITE)
+#endif
+      lv->lv_current_pe[index].writes++;
+   else
+      lv->lv_current_pe[index].reads++;
+
+   /* snapshot volume exception handling on physical device address base */
+   if ( lv->lv_access & ( LV_SNAPSHOT | LV_SNAPSHOT_ORG)) {
+      /* original logical volume */
+      if ( lv->lv_access & LV_SNAPSHOT_ORG) {
+#ifdef WRITEA
+         if ( rw == WRITE || rw == WRITEA)
+#else
+         if ( rw == WRITE)
+#endif
+         {
+            lv_t *lv_ptr;
+
+            /* start with first snapshot and loop thrugh all of them */
+            for ( lv_ptr = lv->lv_snapshot_next;
+                  lv_ptr != NULL;
+                  lv_ptr = lv_ptr->lv_snapshot_next) {
+	       down(&lv_ptr->lv_snapshot_sem);
+               /* do we still have exception storage for this snapshot free? */
+               if ( lv_ptr->lv_block_exception != NULL) {
+			kdev_t __dev;
+			unsigned long __sector;
+
+			__dev = rdev_tmp;
+			__sector = rsector_tmp;
+			if (!lvm_snapshot_remap_block(&rdev_tmp,
+						      &rsector_tmp,
+						      pe_start,
+						      lv_ptr))
+				/* create a new mapping */
+				ret = lvm_snapshot_COW(rdev_tmp,
+						       rsector_tmp,
+						       pe_start,
+						       rsector_sav,
+						       lv_ptr);
+			rdev_tmp    = __dev;
+			rsector_tmp = __sector;
+               }
+	       up(&lv_ptr->lv_snapshot_sem);
+            }
+         }
+      } else {
+         /* remap snapshot logical volume */
+	 down(&lv->lv_snapshot_sem);
+         if ( lv->lv_block_exception != NULL)
+            lvm_snapshot_remap_block ( &rdev_tmp, &rsector_tmp, pe_start, lv);
+	 up(&lv->lv_snapshot_sem);
+      }
+   }
+
+   bh->b_rdev    = rdev_tmp;
+   bh->b_rsector = rsector_tmp;
+
+   return ret;
+} /* lvm_map () */
+
+
+/*
+ * lvm_map snapshot logical volume support functions
+ */
+
+/*
+ * end lvm_map snapshot logical volume support functions
+ */
+
+
+/*
+ * internal support functions
+ */
+
+#ifdef LVM_HD_NAME
+/*
+ * generate "hard disk" name
+ */
+void lvm_hd_name ( char *buf, int minor) {
+   int len = 0;
+
+   if ( vg[VG_BLK(minor)] == NULL ||
+        vg[VG_BLK(minor)]->lv[LV_BLK(minor)] == NULL) return;
+   len = lvm_strlen ( vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_name) - 5;
+   lvm_memcpy ( buf, &vg[VG_BLK(minor)]->lv[LV_BLK(minor)]->lv_name[5], len);
+   buf[len] = 0;
+   return;
+}
+#endif
+
+
+/*
+ * this one never should be called...
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION ( 2, 3, 30)
+static void lvm_dummy_device_request ( request_queue_t *t)
+#else
+static void lvm_dummy_device_request ( void)
+#endif
+{
+  printk ( KERN_EMERG
+           "%s -- oops, got lvm request for %02d:%02d [sector: %lu]\n",
+           lvm_name,
+           MAJOR ( CURRENT->rq_dev),
+           MINOR ( CURRENT->rq_dev),
+           CURRENT->sector);
+  return;
+}
+
+
+/*
+ * character device support function VGDA create
+ */
+int do_vg_create ( int minor, void *arg) {
+   int snaporg_minor = 0;
+   ulong  l, p;
+   lv_t lv;
+   vg_t *vg_ptr;
+
+   if ( vg[VG_CHR(minor)] != NULL) return -EPERM;
+
+   if ( ( vg_ptr = kmalloc ( sizeof ( vg_t), GFP_USER)) == NULL) {
+      printk ( KERN_CRIT
+               "%s -- VG_CREATE: kmalloc error VG at line %d\n",
+               lvm_name, __LINE__);
+      return -ENOMEM;
+   }
+
+   /* get the volume group structure */
+   if ( copy_from_user ( vg_ptr, arg, sizeof ( vg_t)) != 0) {
+      kfree ( vg_ptr);
+      return -EFAULT;
+   }
+
+   /* we are not that active so far... */
+   vg_ptr->vg_status &= ~VG_ACTIVE;
+   vg[VG_CHR(minor)] = vg_ptr;
+
+   vg[VG_CHR(minor)]->pe_allocated = 0;
+   if ( vg[VG_CHR(minor)]->pv_max > ABS_MAX_PV) {
+      printk ( KERN_WARNING
+               "%s -- Can't activate VG: ABS_MAX_PV too small\n",
+               lvm_name);
+      kfree ( vg[VG_CHR(minor)]);
+      vg[VG_CHR(minor)] = NULL;
+      return -EPERM;
+   }
+   if ( vg[VG_CHR(minor)]->lv_max > ABS_MAX_LV) {
+      printk ( KERN_WARNING
+               "%s -- Can't activate VG: ABS_MAX_LV too small for %u\n",
+               lvm_name, vg[VG_CHR(minor)]->lv_max);
+      kfree ( vg[VG_CHR(minor)]);
+      vg[VG_CHR(minor)] = NULL;
+      return -EPERM;
+   }
+
+   /* get the physical volume structures */
+   vg[VG_CHR(minor)]->pv_act = vg[VG_CHR(minor)]->pv_cur = 0; 
+   for ( p = 0; p < vg[VG_CHR(minor)]->pv_max; p++) {
+      /* user space address */
+      if ( ( pvp = vg[VG_CHR(minor)]->pv[p]) != NULL) {
+         vg[VG_CHR(minor)]->pv[p] = kmalloc ( sizeof ( pv_t), GFP_USER);
+         if ( vg[VG_CHR(minor)]->pv[p] == NULL) {
+            printk ( KERN_CRIT
+                     "%s -- VG_CREATE: kmalloc error PV at line %d\n",
+                     lvm_name, __LINE__);
+            do_vg_remove ( minor);
+            return -ENOMEM;
+         }
+         if ( copy_from_user ( vg[VG_CHR(minor)]->pv[p], pvp,
+                               sizeof ( pv_t)) != 0) {
+            do_vg_remove ( minor);
+            return -EFAULT;
+         }
+
+         /* We don't need the PE list
+            in kernel space as with LVs pe_t list (see below) */
+         vg[VG_CHR(minor)]->pv[p]->pe = NULL;
+         vg[VG_CHR(minor)]->pv[p]->pe_allocated = 0;
+         vg[VG_CHR(minor)]->pv[p]->pv_status = PV_ACTIVE;
+         vg[VG_CHR(minor)]->pv_act++;
+         vg[VG_CHR(minor)]->pv_cur++;
+
+#ifdef LVM_GET_INODE
+         /* insert a dummy inode for fs_may_mount */
+         vg[VG_CHR(minor)]->pv[p]->inode =
+            lvm_get_inode ( vg[VG_CHR(minor)]->pv[p]->pv_dev);
+#endif
+      }
+   }
+
+   /* get the logical volume structures */
+   vg[VG_CHR(minor)]->lv_cur = 0;
+   for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+      /* user space address */
+      if ( ( lvp = vg[VG_CHR(minor)]->lv[l]) != NULL) {
+         if ( copy_from_user ( &lv, lvp, sizeof ( lv_t)) != 0) {
+            do_vg_remove ( minor);
+            return -EFAULT;
+         }
+         vg[VG_CHR(minor)]->lv[l] = NULL;
+	 {
+		 int err;
+
+		 err = do_lv_create(minor, lv.lv_name, &lv);
+		 if (err)
+		 {
+			 do_vg_remove(minor);
+			 return err;
+		 }
+         }
+      }
+   }
+
+   /* Second path to correct snapshot logical volumes which are not
+      in place during first path above */
+   for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+      if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+           vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) {
+         snaporg_minor = vg[VG_CHR(minor)]->lv[l]->lv_snapshot_minor;
+         if ( vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)] != NULL) {
+            /* get pointer to original logical volume */
+            lv_t *lv_ptr = vg[VG_CHR(minor)]->lv[l]->lv_snapshot_org =
+                           vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)];
+
+            /* set necessary fields of original logical volume */
+            lv_ptr->lv_access |= LV_SNAPSHOT_ORG;
+            lv_ptr->lv_snapshot_minor = 0;
+            lv_ptr->lv_snapshot_org = lv_ptr;
+            lv_ptr->lv_snapshot_prev = NULL;
+
+            /* find last snapshot logical volume in the chain */
+            while ( lv_ptr->lv_snapshot_next != NULL)
+               lv_ptr = lv_ptr->lv_snapshot_next;
+
+            /* set back pointer to this last one in our new logical volume */
+            vg[VG_CHR(minor)]->lv[l]->lv_snapshot_prev = lv_ptr;
+
+            /* last logical volume now points to our new snapshot volume */
+            lv_ptr->lv_snapshot_next = vg[VG_CHR(minor)]->lv[l];
+
+            /* now point to the new one */
+            lv_ptr = lv_ptr->lv_snapshot_next;
+
+            /* set necessary fields of new snapshot logical volume */
+            lv_ptr->lv_snapshot_next = NULL;
+            lv_ptr->lv_current_pe =
+               vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_current_pe;
+            lv_ptr->lv_allocated_le =
+               vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_allocated_le;
+            lv_ptr->lv_current_le =
+               vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_current_le;
+            lv_ptr->lv_size =
+               vg[VG_CHR(minor)]->lv[LV_BLK(snaporg_minor)]->lv_size;
+         }
+      }
+   }
+
+   vg_count++;
+
+   /* let's go active */
+   vg[VG_CHR(minor)]->vg_status |= VG_ACTIVE;
+
+#ifdef MODULE
+   MOD_INC_USE_COUNT;
+#endif
+   return 0;
+} /* do_vg_create () */
+
+
+/*
+ * character device support function VGDA remove
+ */
+static int do_vg_remove ( int minor) {
+   int i;
+
+   if ( vg[VG_CHR(minor)] == NULL) return -ENXIO;
+
+#ifdef LVM_TOTAL_RESET
+   if ( vg[VG_CHR(minor)]->lv_open > 0 && lvm_reset_spindown == 0)
+#else
+   if ( vg[VG_CHR(minor)]->lv_open > 0)
+#endif
+      return -EPERM;
+
+   /* let's go inactive */
+   vg[VG_CHR(minor)]->vg_status &= ~VG_ACTIVE;
+
+   /* free LVs */
+   /* first free snapshot logical volumes */
+   for ( i = 0; i < vg[VG_CHR(minor)]->lv_max; i++) {
+      if ( vg[VG_CHR(minor)]->lv[i] != NULL &&
+           vg[VG_CHR(minor)]->lv[i]->lv_access & LV_SNAPSHOT) {
+         do_lv_remove ( minor, NULL, i);
+         current->state = TASK_INTERRUPTIBLE;
+         schedule_timeout ( 1);
+      }
+   }
+   /* then free the rest */
+   for ( i = 0; i < vg[VG_CHR(minor)]->lv_max; i++) {
+      if ( vg[VG_CHR(minor)]->lv[i] != NULL) {
+         do_lv_remove ( minor, NULL, i);
+         current->state = TASK_INTERRUPTIBLE;
+         schedule_timeout ( 1);
+      }
+   }
+
+   /* free PVs */
+   for ( i = 0; i < vg[VG_CHR(minor)]->pv_max; i++) {
+      if ( vg[VG_CHR(minor)]->pv[i] != NULL) {
+#ifdef DEBUG_VFREE
+         printk ( KERN_DEBUG
+                  "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+#ifdef LVM_GET_INODE
+         lvm_clear_inode ( vg[VG_CHR(minor)]->pv[i]->inode);
+#endif
+         kfree ( vg[VG_CHR(minor)]->pv[i]);
+         vg[VG_CHR(minor)]->pv[i] = NULL;
+      }
+   }
+
+#ifdef DEBUG_VFREE
+   printk ( KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+   kfree ( vg[VG_CHR(minor)]);
+   vg[VG_CHR(minor)] = NULL;
+
+   vg_count--;
+
+#ifdef MODULE
+   MOD_DEC_USE_COUNT;
+#endif
+   return 0;
+} /* do_vg_remove () */
+
+
+/*
+ * character device support function logical volume create
+ */
+static int do_lv_create ( int minor, char *lv_name, lv_t *lv) {
+   int l, le, l_new, p, size;
+   ulong lv_status_save;
+   lv_block_exception_t *lvbe = lv->lv_block_exception;
+   lv_t *lv_ptr = NULL;
+
+   if ( ( pep = lv->lv_current_pe) == NULL) return -EINVAL;
+   if ( lv->lv_chunk_size > LVM_SNAPSHOT_MAX_CHUNK) return -EINVAL;
+
+   for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+      if ( vg[VG_CHR(minor)]->lv[l] != NULL && 
+           lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name, lv_name) == 0)
+         return -EEXIST;
+   }
+
+   /* in case of lv_remove(), lv_create() pair; for eg. lvrename does this */
+   l_new = -1;
+   if ( vg[VG_CHR(minor)]->lv[lv->lv_number] == NULL) l_new = lv->lv_number;
+   else {
+      for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+         if ( vg[VG_CHR(minor)]->lv[l] == NULL) if ( l_new == -1) l_new = l;
+      }
+   }
+   if ( l_new == -1) return -EPERM;
+   l = l_new;
+
+   if ( ( lv_ptr = kmalloc ( sizeof ( lv_t), GFP_USER)) == NULL) {;
+      printk ( KERN_CRIT "%s -- LV_CREATE: kmalloc error LV at line %d\n",
+                         lvm_name, __LINE__);
+      return -ENOMEM;
+   }
+
+   /* copy preloaded LV */
+   lvm_memcpy ( ( char*) lv_ptr, ( char *) lv, sizeof ( lv_t));
+
+   lv_status_save = lv_ptr->lv_status;
+   lv_ptr->lv_status &= ~LV_ACTIVE;
+   lv_ptr->lv_snapshot_org =  \
+   lv_ptr->lv_snapshot_prev = \
+   lv_ptr->lv_snapshot_next = NULL;
+   lv_ptr->lv_block_exception = NULL;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 3, 4)
+   lv_ptr->lv_snapshot_sem = MUTEX;
+#else
+   init_MUTEX(&lv_ptr->lv_snapshot_sem);
+#endif
+   vg[VG_CHR(minor)]->lv[l] = lv_ptr;
+
+   /* get the PE structures from user space if this
+      is no snapshot logical volume */
+   if ( ! ( lv_ptr->lv_access & LV_SNAPSHOT)) {
+      size = lv_ptr->lv_allocated_le * sizeof ( pe_t);
+      if ( ( lv_ptr->lv_current_pe = vmalloc ( size)) == NULL) {
+         printk ( KERN_CRIT
+                  "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte "
+                  "at line %d\n",
+                  lvm_name, size, __LINE__);
+#ifdef DEBUG_VFREE
+         printk ( KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
+#endif
+         kfree ( lv_ptr);
+         vg[VG_CHR(minor)]->lv[l] = NULL;
+         return -ENOMEM;
+      }
+   
+      if ( copy_from_user ( lv_ptr->lv_current_pe, pep, size)) {
+         vfree ( lv_ptr->lv_current_pe);
+         kfree ( lv_ptr);
+         vg[VG_CHR(minor)]->lv[l] = NULL;
+         return -EFAULT;
+      }
+
+      /* correct the PE count in PVs */
+      for ( le = 0; le < lv_ptr->lv_allocated_le; le++) {
+         vg[VG_CHR(minor)]->pe_allocated++;
+         for ( p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
+            if ( vg[VG_CHR(minor)]->pv[p]->pv_dev ==
+                 lv_ptr->lv_current_pe[le].dev)
+               vg[VG_CHR(minor)]->pv[p]->pe_allocated++;
+         }
+      }
+   } else {
+      /* Get snapshot exception data and block list */
+      if ( lvbe != NULL) {
+         lv_ptr->lv_snapshot_org =
+            vg[VG_CHR(minor)]->lv[LV_BLK(lv_ptr->lv_snapshot_minor)];
+         if ( lv_ptr->lv_snapshot_org != NULL) {
+            size = lv_ptr->lv_remap_end * sizeof ( lv_block_exception_t);
+            if ( ( lv_ptr->lv_block_exception = vmalloc ( size)) == NULL) {
+               printk ( KERN_CRIT
+                        "%s -- do_lv_create: vmalloc error LV_BLOCK_EXCEPTION "
+                        "of %d byte at line %d\n",
+                        lvm_name, size, __LINE__);
+#ifdef DEBUG_VFREE
+               printk ( KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
+#endif
+               kfree ( lv_ptr);
+               vg[VG_CHR(minor)]->lv[l] = NULL;
+               return -ENOMEM;
+            }
+      
+            if ( copy_from_user ( lv_ptr->lv_block_exception, lvbe, size)) {
+               vfree ( lv_ptr->lv_block_exception);
+               kfree ( lv_ptr);
+               vg[VG_CHR(minor)]->lv[l] = NULL;
+               return -EFAULT;
+            }
+
+            /* get pointer to original logical volume */
+            lv_ptr = lv_ptr->lv_snapshot_org;
+
+            lv_ptr->lv_snapshot_minor = 0;
+            lv_ptr->lv_snapshot_org = lv_ptr;
+            lv_ptr->lv_snapshot_prev = NULL;
+            /* walk thrugh the snapshot list */
+            while ( lv_ptr->lv_snapshot_next != NULL)
+               lv_ptr = lv_ptr->lv_snapshot_next;
+            /* now lv_ptr points to the last existing snapshot in the chain */
+            vg[VG_CHR(minor)]->lv[l]->lv_snapshot_prev = lv_ptr;
+            /* our new one now back points to the previous last in the chain */
+            lv_ptr = vg[VG_CHR(minor)]->lv[l];
+            /* now lv_ptr points to our new last snapshot logical volume */
+            lv_ptr->lv_snapshot_org = lv_ptr->lv_snapshot_prev->lv_snapshot_org;
+            lv_ptr->lv_snapshot_next = NULL;
+            lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe;
+            lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le;
+            lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le;
+            lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size;
+            lv_ptr->lv_stripes = lv_ptr->lv_snapshot_org->lv_stripes;
+            lv_ptr->lv_stripesize = lv_ptr->lv_snapshot_org->lv_stripesize;
+	    {
+		int err;
+
+		err = lvm_snapshot_alloc(lv_ptr);
+		if (err)
+		{
+			vfree(lv_ptr->lv_block_exception);
+			kfree(lv_ptr);
+			vg[VG_CHR(minor)]->lv[l] = NULL;
+			return err;
+		}
+	    }
+         } else {
+            vfree ( lv_ptr->lv_block_exception);
+            kfree ( lv_ptr);
+            vg[VG_CHR(minor)]->lv[l] = NULL;
+            return -EFAULT;
+         }
+      } else {
+         kfree ( vg[VG_CHR(minor)]->lv[l]);
+         vg[VG_CHR(minor)]->lv[l] = NULL;
+         return -EINVAL;
+      }
+   } /* if ( vg[VG_CHR(minor)]->lv[l]->lv_access & LV_SNAPSHOT) */
+
+   lv_ptr = vg[VG_CHR(minor)]->lv[l];
+   lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0;
+   lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
+   lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
+   vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = vg[VG_CHR(minor)]->vg_number;
+   vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = lv_ptr->lv_number;
+   LVM_CORRECT_READ_AHEAD ( lv_ptr->lv_read_ahead);
+   read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead;
+   vg[VG_CHR(minor)]->lv_cur++;
+   lv_ptr->lv_status = lv_status_save;
+
+   /* optionally add our new snapshot LV */
+   if ( lv_ptr->lv_access & LV_SNAPSHOT) {
+      /* sync the original logical volume */
+      fsync_dev ( lv_ptr->lv_snapshot_org->lv_dev);
+      /* put ourselve into the chain */
+      lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr;
+      lv_ptr->lv_snapshot_org->lv_access |= LV_SNAPSHOT_ORG;
+   }
+
+   return 0;
+} /* do_lv_create () */
+
+
+/*
+ * character device support function logical volume remove
+ */
+static int do_lv_remove ( int minor, char *lv_name, int l) {
+   uint le, p;
+   lv_t *lv_ptr;
+
+   if ( l == -1) {
+      for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+         if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+              lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name, lv_name) == 0) {
+            break;
+         }
+      }
+   }
+
+   lv_ptr = vg[VG_CHR(minor)]->lv[l];
+   if ( l < vg[VG_CHR(minor)]->lv_max) {
+#ifdef LVM_TOTAL_RESET
+      if ( lv_ptr->lv_open > 0 && lvm_reset_spindown == 0)
+#else
+      if ( lv_ptr->lv_open > 0)
+#endif
+         return -EBUSY;
+
+      /* check for deletion of snapshot source while
+         snapshot volume still exists */
+      if ( ( lv_ptr->lv_access & LV_SNAPSHOT_ORG) &&
+           lv_ptr->lv_snapshot_next != NULL)
+         return -EPERM;
+
+      lv_ptr->lv_status |= LV_SPINDOWN;
+
+      /* sync the buffers */
+      fsync_dev ( lv_ptr->lv_dev);
+
+      lv_ptr->lv_status &= ~LV_ACTIVE;
+
+      /* invalidate the buffers */
+      invalidate_buffers ( lv_ptr->lv_dev);
+
+      /* reset generic hd */
+      lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1;
+      lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0;
+      lvm_size[MINOR(lv_ptr->lv_dev)] = 0;
+
+      /* reset VG/LV mapping */
+      vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = ABS_MAX_VG;
+      vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = -1;
+   
+      /* correct the PE count in PVs if this is no snapshot logical volume */
+      if ( ! ( lv_ptr->lv_access & LV_SNAPSHOT)) {
+         /* only if this is no snapshot logical volume because we share
+            the lv_current_pe[] structs with the original logical volume */
+         for ( le = 0; le < lv_ptr->lv_allocated_le; le++) {
+            vg[VG_CHR(minor)]->pe_allocated--;
+            for ( p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
+               if (  vg[VG_CHR(minor)]->pv[p]->pv_dev ==
+                     lv_ptr->lv_current_pe[le].dev)
+                  vg[VG_CHR(minor)]->pv[p]->pe_allocated--;
+            }
+         }
+         vfree ( lv_ptr->lv_current_pe);
+      /* LV_SNAPSHOT */
+      } else {
+/*
+         if ( lv_ptr->lv_block_exception != NULL) {
+            int i;
+            kdev_t last_dev;
+            for ( i = last_dev = 0; i < lv_ptr->lv_remap_ptr; i++) {
+               if ( lv_ptr->lv_block_exception[i].rdev_new != last_dev) {
+                  last_dev = lv_ptr->lv_block_exception[i].rdev_new;
+                  invalidate_buffers ( last_dev);
+                  current->state = TASK_INTERRUPTIBLE;
+                  schedule_timeout ( 1);
+               }
+            }
+         }
+*/
+         /* remove this snapshot logical volume from the chain */
+         lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next;
+         if ( lv_ptr->lv_snapshot_next != NULL) {
+            lv_ptr->lv_snapshot_next->lv_snapshot_prev =
+               lv_ptr->lv_snapshot_prev;
+         }
+         /* no more snapshots? */
+         if ( lv_ptr->lv_snapshot_org->lv_snapshot_next == NULL)
+            lv_ptr->lv_snapshot_org->lv_access &= ~LV_SNAPSHOT_ORG;
+	 lvm_snapshot_release(lv_ptr);
+      }
+
+#ifdef DEBUG_VFREE
+      printk ( KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__);
+#endif
+      kfree ( lv_ptr);
+      vg[VG_CHR(minor)]->lv[l] = NULL;
+      vg[VG_CHR(minor)]->lv_cur--;
+      return 0;
+   }
+
+   return -ENXIO;
+} /* do_lv_remove () */
+
+
+/*
+ * character device support function logical volume extend / reduce
+ */
+static int do_lv_extend_reduce ( int minor, char *lv_name, lv_t *lv) {
+   int l, le, p, size, old_allocated_le;
+   uint32_t end, lv_status_save;
+   pe_t *pe;
+
+   if ( ( pep = lv->lv_current_pe) == NULL) return -EINVAL;
+
+   for ( l = 0; l < vg[VG_CHR(minor)]->lv_max; l++) {
+      if ( vg[VG_CHR(minor)]->lv[l] != NULL &&
+           lvm_strcmp ( vg[VG_CHR(minor)]->lv[l]->lv_name, lv_name) == 0)
+         break;
+   }
+   if ( l == vg[VG_CHR(minor)]->lv_max) return -ENXIO;
+
+   /* check for active snapshot */
+   if ( lv->lv_access & ( LV_SNAPSHOT|LV_SNAPSHOT_ORG)) return -EPERM;
+
+   if ( ( pe = vmalloc ( size = lv->lv_current_le * sizeof ( pe_t))) == NULL) {
+      printk ( KERN_CRIT
+               "%s -- do_lv_extend_reduce: vmalloc error LV_CURRENT_PE "
+               "of %d Byte at line %d\n",
+               lvm_name, size, __LINE__);
+      return -ENOMEM;
+   }
+
+   /* get the PE structures from user space */
+   if ( copy_from_user ( pe, pep, size)) {
+      vfree ( pe);
+      return -EFAULT;
+   }
+
+#ifdef DEBUG
+   printk ( KERN_DEBUG
+            "%s -- fsync_dev and "
+            "invalidate_buffers for %s [%s] in %s\n",
+            lvm_name, vg[VG_CHR(minor)]->lv[l]->lv_name,
+            kdevname ( vg[VG_CHR(minor)]->lv[l]->lv_dev),
+            vg[VG_CHR(minor)]->vg_name);
+#endif
+
+   vg[VG_CHR(minor)]->lv[l]->lv_status |= LV_SPINDOWN;
+   fsync_dev ( vg[VG_CHR(minor)]->lv[l]->lv_dev);
+   vg[VG_CHR(minor)]->lv[l]->lv_status &= ~LV_ACTIVE;
+   invalidate_buffers ( vg[VG_CHR(minor)]->lv[l]->lv_dev);
+
+   /* reduce allocation counters on PV(s) */
+   for ( le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le; le++) {
+      vg[VG_CHR(minor)]->pe_allocated--;
+      for ( p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
+         if (  vg[VG_CHR(minor)]->pv[p]->pv_dev ==
+               vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev) {
+            vg[VG_CHR(minor)]->pv[p]->pe_allocated--;
+            break;
+         }
+      }
+   }
+
+#ifdef DEBUG_VFREE
+   printk ( KERN_DEBUG "%s -- vfree %d\n", lvm_name, __LINE__);
+#endif
+
+   /* save pointer to "old" lv/pe pointer array */
+   pep1 = vg[VG_CHR(minor)]->lv[l]->lv_current_pe;
+   end  = vg[VG_CHR(minor)]->lv[l]->lv_current_le;
+
+   /* save open counter */
+   lv_open = vg[VG_CHR(minor)]->lv[l]->lv_open;
+
+   /* save # of old allocated logical extents */
+   old_allocated_le = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le;
+
+   /* copy preloaded LV */
+   lv_status_save = lv->lv_status;
+   lv->lv_status |= LV_SPINDOWN;
+   lv->lv_status &= ~LV_ACTIVE;
+   lvm_memcpy ( ( char*) vg[VG_CHR(minor)]->lv[l], ( char*) lv, sizeof ( lv_t));
+   vg[VG_CHR(minor)]->lv[l]->lv_current_pe = pe;
+   vg[VG_CHR(minor)]->lv[l]->lv_open = lv_open;
+
+   /* save availiable i/o statistic data */
+   /* linear logical volume */
+   if ( vg[VG_CHR(minor)]->lv[l]->lv_stripes < 2) {
+      /* Check what last LE shall be used */
+      if ( end > vg[VG_CHR(minor)]->lv[l]->lv_current_le)
+         end = vg[VG_CHR(minor)]->lv[l]->lv_current_le;
+      for ( le = 0; le < end; le++) {
+         vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].reads  = pep1[le].reads;
+         vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].writes = pep1[le].writes;
+      }
+   /* striped logical volume */
+   } else {
+      uint i, j, source, dest, end, old_stripe_size, new_stripe_size;
+
+      old_stripe_size = old_allocated_le / vg[VG_CHR(minor)]->lv[l]->lv_stripes;
+      new_stripe_size = vg[VG_CHR(minor)]->lv[l]->lv_allocated_le /
+                        vg[VG_CHR(minor)]->lv[l]->lv_stripes;
+      end = old_stripe_size;
+      if ( end > new_stripe_size) end = new_stripe_size;
+      for ( i = source = dest = 0;
+            i < vg[VG_CHR(minor)]->lv[l]->lv_stripes; i++) {
+         for ( j = 0; j < end; j++) {
+            vg[VG_CHR(minor)]->lv[l]->lv_current_pe[dest+j].reads =
+               pep1[source+j].reads;
+            vg[VG_CHR(minor)]->lv[l]->lv_current_pe[dest+j].writes =
+               pep1[source+j].writes;
+         }
+         source += old_stripe_size;
+         dest   += new_stripe_size;
+      }
+   }
+   vfree ( pep1); pep1 = NULL;
+
+
+   /* extend the PE count in PVs */
+   for ( le = 0; le < vg[VG_CHR(minor)]->lv[l]->lv_allocated_le; le++) {
+      vg[VG_CHR(minor)]->pe_allocated++;
+      for ( p = 0; p < vg[VG_CHR(minor)]->pv_cur; p++) {
+         if ( vg[VG_CHR(minor)]->pv[p]->pv_dev ==
+              vg[VG_CHR(minor)]->lv[l]->lv_current_pe[le].dev) {
+            vg[VG_CHR(minor)]->pv[p]->pe_allocated++;
+            break;
+         }
+      }
+   }
+
+   lvm_gendisk.part[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)].start_sect = 0;
+   lvm_gendisk.part[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)].nr_sects =
+      vg[VG_CHR(minor)]->lv[l]->lv_size;
+   lvm_size[MINOR(vg[VG_CHR(minor)]->lv[l]->lv_dev)] =
+      vg[VG_CHR(minor)]->lv[l]->lv_size >> 1;
+   /* vg_lv_map array doesn't have to be changed here */
+
+   LVM_CORRECT_READ_AHEAD ( vg[VG_CHR(minor)]->lv[l]->lv_read_ahead);
+   read_ahead[MAJOR_NR] = vg[VG_CHR(minor)]->lv[l]->lv_read_ahead;
+   vg[VG_CHR(minor)]->lv[l]->lv_status = lv_status_save;
+
+   return 0;
+} /* do_lv_extend_reduce () */
+
+
+/*
+ * support function initialize gendisk variables
+ */
+#ifdef __initfunc
+__initfunc ( void lvm_geninit ( struct gendisk *lvm_gdisk))
+#else
+void __init lvm_geninit ( struct gendisk *lvm_gdisk)
+#endif
+{
+   int i = 0;
+
+#ifdef DEBUG_GENDISK
+   printk ( KERN_DEBUG "%s -- lvm_gendisk\n", lvm_name);
+#endif
+
+   for ( i = 0; i < MAX_LV; i++) {
+      lvm_gendisk.part[i].start_sect = -1; /* avoid partition check */
+      lvm_size[i] = lvm_gendisk.part[i].nr_sects = 0;
+      lvm_blocksizes[i] = BLOCK_SIZE;
+   }
+
+   blksize_size[MAJOR_NR] = lvm_blocksizes;
+   blk_size[MAJOR_NR] = lvm_size;
+
+   return;
+} /* lvm_gen_init () */
+
+
+#ifdef LVM_GET_INODE
+/*
+ * support function to get an empty inode
+ *
+ * Gets an empty inode to be inserted into the inode hash,
+ * so that a physical volume can't be mounted.
+ * This is analog to drivers/block/md.c
+ *
+ * Is this the real thing?
+ *
+ */
+struct inode *lvm_get_inode ( int dev) {
+   struct inode *inode_this = NULL;
+
+   /* Lock the device by inserting a dummy inode. */
+   inode_this = get_empty_inode ();
+   inode_this->i_dev = dev;
+   insert_inode_hash ( inode_this);
+   return inode_this;
+}
+
+
+/*
+ * support function to clear an inode
+ *
+ */
+void lvm_clear_inode ( struct inode *inode) {
+#ifdef I_FREEING
+   inode->i_state |= I_FREEING;
+#endif
+   clear_inode ( inode);
+   return;
+}
+#endif /* #ifdef LVM_GET_INODE */
+
+
+/* my strlen */
+inline int lvm_strlen ( char *s1) {
+   int len = 0;
+
+   while ( s1[len] != 0) len++;
+   return len;
+}
+
+
+/* my strcmp */
+inline int lvm_strcmp ( char *s1, char *s2) {
+   while ( *s1 != 0 && *s2 != 0) {
+      if ( *s1 != *s2) return -1;
+      s1++; s2++;
+   }
+   if ( *s1 == 0 && *s2 == 0) return 0;
+   return -1;
+}
+
+
+/* my strrchr */
+inline char *lvm_strrchr ( char *s1, char c) {
+   char *s2 = NULL;
+
+   while ( *s1 != 0) {
+      if ( *s1 == c) s2 = s1;
+      s1++;
+   }
+   return s2;
+}
+
+
+/* my memcpy */
+inline void lvm_memcpy ( char *dest, char *source, int size) {
+   for ( ;size > 0; size--) *dest++ = *source++;
+}
diff -urN 2.2.15pre16/drivers/block/rd.c 2.2.15pre16aa3/drivers/block/rd.c
--- 2.2.15pre16/drivers/block/rd.c	Fri Jan  7 18:19:11 2000
+++ 2.2.15pre16aa3/drivers/block/rd.c	Thu Mar 30 16:00:56 2000
@@ -177,7 +177,7 @@
 	if (CURRENT->cmd == READ) 
 		memset(CURRENT->buffer, 0, len); 
 	else	
-		set_bit(BH_Protected, &CURRENT->bh->b_state);
+		mark_buffer_protected(CURRENT->bh);
 
 	end_request(1);
 	goto repeat;
diff -urN 2.2.15pre16/drivers/char/Config.in 2.2.15pre16aa3/drivers/char/Config.in
--- 2.2.15pre16/drivers/char/Config.in	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/drivers/char/Config.in	Thu Mar 30 16:00:57 2000
@@ -116,6 +116,9 @@
 
 tristate '/dev/nvram support' CONFIG_NVRAM
 bool 'Enhanced Real Time Clock Support' CONFIG_RTC
+if [ "$CONFIG_RTC" = "y" -a "$ARCH" = "alpha" ]; then
+  bool '  Use only lightweight version (no interrupts)' CONFIG_RTC_LIGHT
+fi
 if [ "$CONFIG_ALPHA_BOOK1" = "y" ]; then
   bool 'Tadpole ANA H8 Support'  CONFIG_H8
 fi
diff -urN 2.2.15pre16/drivers/char/Makefile 2.2.15pre16aa3/drivers/char/Makefile
--- 2.2.15pre16/drivers/char/Makefile	Wed Mar 29 19:42:09 2000
+++ 2.2.15pre16aa3/drivers/char/Makefile	Thu Mar 30 16:00:57 2000
@@ -20,7 +20,7 @@
 
 L_TARGET := char.a
 M_OBJS   :=
-L_OBJS   := tty_io.o n_tty.o tty_ioctl.o mem.o random.o
+L_OBJS   := tty_io.o n_tty.o tty_ioctl.o mem.o random.o raw.o
 LX_OBJS  := pty.o misc.o
 
 ifdef CONFIG_VT
diff -urN 2.2.15pre16/drivers/char/mem.c 2.2.15pre16aa3/drivers/char/mem.c
--- 2.2.15pre16/drivers/char/mem.c	Wed Jan  5 14:16:52 2000
+++ 2.2.15pre16aa3/drivers/char/mem.c	Thu Mar 30 16:00:57 2000
@@ -17,6 +17,7 @@
 #include <linux/joystick.h>
 #include <linux/i2c.h>
 #include <linux/capability.h>
+#include <linux/raw.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -620,6 +621,7 @@
 	if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
 		printk("unable to get major %d for memory devs\n", MEM_MAJOR);
 	rand_initialize();
+	raw_init();
 #ifdef CONFIG_USB
 #ifdef CONFIG_USB_UHCI
 	uhci_init();
diff -urN 2.2.15pre16/drivers/char/raw.c 2.2.15pre16aa3/drivers/char/raw.c
--- 2.2.15pre16/drivers/char/raw.c	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/drivers/char/raw.c	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,384 @@
+/*
+ * linux/drivers/char/raw.c
+ *
+ * Front-end raw character devices.  These can be bound to any block
+ * devices to provide genuine Unix raw character device semantics.
+ *
+ * We reserve minor number 0 for a control interface.  ioctl()s on this
+ * device are used to bind the other minor numbers to block devices.
+ */
+
+#include <linux/fs.h>
+#include <linux/iobuf.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/raw.h>
+#include <asm/uaccess.h>
+
+#define dprintk(x...) 
+
+static kdev_t raw_device_bindings[256] = {};
+static int raw_device_inuse[256] = {};
+static int raw_device_sector_size[256] = {};
+static int raw_device_sector_bits[256] = {};
+
+extern struct file_operations * get_blkfops(unsigned int major);
+
+static ssize_t rw_raw_dev(int rw, struct file *, char *, size_t, loff_t *);
+
+ssize_t	raw_read(struct file *, char *, size_t, loff_t *);
+ssize_t	raw_write(struct file *, const char *, size_t, loff_t *);
+int	raw_open(struct inode *, struct file *);
+int	raw_release(struct inode *, struct file *);
+int	raw_ctl_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+
+
+static struct file_operations raw_fops = {
+	NULL,		/* llseek */
+	raw_read,	/* read */
+	raw_write,	/* write */
+	NULL,		/* readdir */
+	NULL,		/* poll */
+	NULL,		/* ioctl */
+	NULL,		/* mmap */
+	raw_open,	/* open */
+	NULL,		/* flush */
+	raw_release,	/* release */
+	NULL		/* fsync */
+};
+
+static struct file_operations raw_ctl_fops = {
+	NULL,		/* llseek */
+	NULL,		/* read */
+	NULL,		/* write */
+	NULL,		/* readdir */
+	NULL,		/* poll */
+	raw_ctl_ioctl,	/* ioctl */
+	NULL,		/* mmap */
+	raw_open,	/* open */
+	NULL,		/* flush */
+	NULL,		/* no special release code */
+	NULL		/* fsync */
+};
+
+
+
+void __init raw_init(void)
+{
+	register_chrdev(RAW_MAJOR, "raw", &raw_fops);
+}
+
+
+/*
+ * The raw IO open and release code needs to fake appropriate
+ * open/release calls to the underlying block devices.  
+ */
+
+static int bdev_open(kdev_t dev, int mode)
+{
+	int err = 0;
+	struct file dummy_file = {};
+	struct dentry dummy_dentry = {};
+	struct inode * inode = get_empty_inode();
+	
+	if (!inode)
+		return -ENOMEM;
+	
+	dummy_file.f_op = get_blkfops(MAJOR(dev));
+	if (!dummy_file.f_op) {
+		err = -ENODEV;
+		goto done;
+	}
+	
+	if (dummy_file.f_op->open) {
+		inode->i_rdev = dev;
+		dummy_dentry.d_inode = inode;
+		dummy_file.f_dentry = &dummy_dentry;
+		dummy_file.f_mode = mode;
+		err = dummy_file.f_op->open(inode, &dummy_file);
+	}
+
+ done:
+	iput(inode);
+	return err;
+}
+
+static int bdev_close(kdev_t dev)
+{
+	int err;
+	struct inode * inode = get_empty_inode();
+
+	if (!inode)
+		return -ENOMEM;
+	
+	inode->i_rdev = dev;
+	err = blkdev_release(inode);
+	iput(inode);
+	return err;
+}
+
+
+
+/* 
+ * Open/close code for raw IO.
+ */
+
+int raw_open(struct inode *inode, struct file *filp)
+{
+	int minor;
+	kdev_t bdev;
+	int err;
+	int sector_size;
+	int sector_bits;
+
+	minor = MINOR(inode->i_rdev);
+	
+	/* 
+	 * Is it the control device? 
+	 */
+	
+	if (minor == 0) {
+		filp->f_op = &raw_ctl_fops;
+		return 0;
+	}
+	
+	/*
+	 * No, it is a normal raw device.  All we need to do on open is
+	 * to check that the device is bound, and force the underlying
+	 * block device to a sector-size blocksize. 
+	 */
+
+	bdev = raw_device_bindings[minor];
+	if (bdev == NODEV) 
+		return -ENODEV;
+
+	err = bdev_open(bdev, filp->f_mode);
+	if (err)
+		return err;
+	
+	/*
+	 * Don't change the blocksize if we already have users using
+	 * this device 
+	 */
+
+	if (raw_device_inuse[minor]++)
+		return 0;
+	
+	/* 
+	 * Don't interfere with mounted devices: we cannot safely set
+	 * the blocksize on a device which is already mounted.  
+	 */
+	
+	sector_size = 512;
+	if (lookup_vfsmnt(bdev) != NULL) {
+		if (blksize_size[MAJOR(bdev)])
+			sector_size = blksize_size[MAJOR(bdev)][MINOR(bdev)];
+	} else {
+		if (hardsect_size[MAJOR(bdev)])
+			sector_size = hardsect_size[MAJOR(bdev)][MINOR(bdev)];
+	}
+
+	set_blocksize(bdev, sector_size);
+	raw_device_sector_size[minor] = sector_size;
+
+	for (sector_bits = 0; !(sector_size & 1); )
+		sector_size>>=1, sector_bits++;
+	raw_device_sector_bits[minor] = sector_bits;
+	
+	return 0;
+}
+
+int raw_release(struct inode *inode, struct file *filp)
+{
+	int minor;
+	kdev_t bdev;
+	
+	minor = MINOR(inode->i_rdev);
+	bdev = raw_device_bindings[minor];
+	bdev_close(bdev);
+	raw_device_inuse[minor]--;
+	return 0;
+}
+
+
+
+/*
+ * Deal with ioctls against the raw-device control interface, to bind
+ * and unbind other raw devices.  
+ */
+
+int raw_ctl_ioctl(struct inode *inode, 
+		  struct file *flip,
+		  unsigned int command, 
+		  unsigned long arg)
+{
+	struct raw_config_request rq;
+	int err = 0;
+	int minor;
+	
+	switch (command) {
+	case RAW_SETBIND:
+	case RAW_GETBIND:
+
+		/* First, find out which raw minor we want */
+
+		err = copy_from_user(&rq, (void *) arg, sizeof(rq));
+		if (err)
+			break;
+		
+		minor = rq.raw_minor;
+		if (minor == 0 || minor > MINORMASK) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (command == RAW_SETBIND) {
+			/* 
+			 * For now, we don't need to check that the underlying
+			 * block device is present or not: we can do that when
+			 * the raw device is opened.  Just check that the
+			 * major/minor numbers make sense. 
+			 */
+
+			if (rq.block_major == NODEV || 
+			    rq.block_major > MAX_BLKDEV ||
+			    rq.block_minor > MINORMASK) {
+				err = -EINVAL;
+				break;
+			}
+			
+			if (raw_device_inuse[minor]) {
+				err = -EBUSY;
+				break;
+			}
+			raw_device_bindings[minor] = 
+				MKDEV(rq.block_major, rq.block_minor);
+		} else {
+			rq.block_major = MAJOR(raw_device_bindings[minor]);
+			rq.block_minor = MINOR(raw_device_bindings[minor]);
+			err = copy_to_user((void *) arg, &rq, sizeof(rq));
+		}
+		break;
+		
+	default:
+		err = -EINVAL;
+	}
+	
+	return err;
+}
+
+
+
+ssize_t	raw_read(struct file *filp, char * buf, 
+		 size_t size, loff_t *offp)
+{
+	return rw_raw_dev(READ, filp, buf, size, offp);
+}
+
+ssize_t	raw_write(struct file *filp, const char *buf, 
+		  size_t size, loff_t *offp)
+{
+	return rw_raw_dev(WRITE, filp, (char *) buf, size, offp);
+}
+
+#define SECTOR_BITS 9
+#define SECTOR_SIZE (1U << SECTOR_BITS)
+#define SECTOR_MASK (SECTOR_SIZE - 1)
+
+ssize_t	rw_raw_dev(int rw, struct file *filp, char *buf, 
+		   size_t size, loff_t *offp)
+{
+	struct kiobuf * iobuf;
+	int		err;
+	unsigned long	blocknr, blocks;
+	unsigned long	b[KIO_MAX_SECTORS];
+	size_t		transferred;
+	int		iosize;
+	int		i;
+	int		minor;
+	kdev_t		dev;
+	unsigned long	limit;
+
+	int		sector_size, sector_bits, sector_mask;
+	int		max_sectors;
+	
+	/*
+	 * First, a few checks on device size limits 
+	 */
+
+	minor = MINOR(filp->f_dentry->d_inode->i_rdev);
+	dev = raw_device_bindings[minor];
+	sector_size = raw_device_sector_size[minor];
+	sector_bits = raw_device_sector_bits[minor];
+	sector_mask = sector_size- 1;
+	max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9);
+	
+	if (blk_size[MAJOR(dev)])
+		limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits;
+	else
+		limit = INT_MAX;
+	dprintk ("rw_raw_dev: dev %d:%d (+%d)\n",
+		 MAJOR(dev), MINOR(dev), limit);
+	
+	if ((*offp & sector_mask) || (size & sector_mask))
+		return -EINVAL;
+	if ((*offp >> sector_bits) > limit)
+		return 0;
+
+	/* 
+	 * We'll just use one kiobuf
+	 */
+
+	err = alloc_kiovec(1, &iobuf);
+	if (err)
+		return err;
+
+	/*
+	 * Split the IO into KIO_MAX_SECTORS chunks, mapping and
+	 * unmapping the single kiobuf as we go to perform each chunk of
+	 * IO.  
+	 */
+
+	transferred = 0;
+	blocknr = *offp >> sector_bits;
+	while (size > 0) {
+		blocks = size >> sector_bits;
+		if (blocks > max_sectors)
+			blocks = max_sectors;
+		if (blocks > limit - blocknr)
+			blocks = limit - blocknr;
+		if (!blocks)
+			break;
+
+		iosize = blocks << sector_bits;
+		
+		err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
+		if (err)
+			break;
+		
+		for (i=0; i < blocks; i++)
+ 			b[i] = blocknr++;
+		
+		err = brw_kiovec(rw, 1, &iobuf, dev, b, sector_size);
+
+		if (err >= 0) {
+			transferred += err;
+			size -= err;
+			buf += err;
+		}
+
+		unmap_kiobuf(iobuf);
+
+		if (err != iosize)
+			break;
+	}
+	
+	free_kiovec(1, &iobuf);
+
+	if (transferred) {
+		*offp += transferred;
+		return transferred;
+	}
+	
+	return err;
+}
diff -urN 2.2.15pre16/drivers/char/rtc.c 2.2.15pre16aa3/drivers/char/rtc.c
--- 2.2.15pre16/drivers/char/rtc.c	Sun Jan  2 18:26:37 2000
+++ 2.2.15pre16aa3/drivers/char/rtc.c	Thu Mar 30 16:00:57 2000
@@ -113,13 +113,19 @@
 unsigned char days_in_mo[] = 
 {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
 
+extern spinlock_t rtc_lock;
+
 /*
  *	A very tiny interrupt handler. It runs with SA_INTERRUPT set,
- *	so that there is no possibility of conflicting with the
- *	set_rtc_mmss() call that happens during some timer interrupts.
+ *	but there is possibility of conflicting with the set_rtc_mmss()
+ *	call (the rtc irq and the timer irq can easily run at the same
+ *	time in two different CPUs). So we need to serializes
+ *	accesses to the chip with the rtc_lock spinlock that each
+ *	architecture should implement in the timer code.
  *	(See ./arch/XXXX/kernel/time.c for the set_rtc_mmss() function.)
  */
 
+#ifndef CONFIG_RTC_LIGHT
 static void rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	/*
@@ -131,12 +137,16 @@
 
 	rtc_irq_data += 0x100;
 	rtc_irq_data &= ~0xff;
+	/* runs with irq locally disabled (see SA_INTERRUPT flag). */
+	spin_lock(&rtc_lock);
 	rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & 0xF0);
+	spin_unlock(&rtc_lock);
 	wake_up_interruptible(&rtc_wait);	
 
 	if (rtc_status & RTC_TIMER_ON)
 		mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);
 }
+#endif
 
 /*
  *	Now all the various file operations that we export.
@@ -150,6 +160,9 @@
 static ssize_t rtc_read(struct file *file, char *buf,
 			size_t count, loff_t *ppos)
 {
+#ifdef CONFIG_RTC_LIGHT
+	return -EIO;
+#else
 	struct wait_queue wait = { current, NULL };
 	unsigned long data;
 	ssize_t retval;
@@ -181,6 +194,7 @@
 	remove_wait_queue(&rtc_wait, &wait);
 
 	return retval;
+#endif
 }
 
 static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
@@ -191,6 +205,7 @@
 	struct rtc_time wtime; 
 
 	switch (cmd) {
+#ifndef CONFIG_RTC_LIGHT
 	case RTC_AIE_OFF:	/* Mask alarm int. enab. bit	*/
 	{
 		mask_rtc_irq_bit(RTC_AIE);
@@ -238,6 +253,7 @@
 		set_rtc_irq_bit(RTC_UIE);
 		return 0;
 	}
+#endif
 	case RTC_ALM_READ:	/* Read the present alarm time */
 	{
 		/*
@@ -276,8 +292,7 @@
 		if (sec >= 60)
 			sec = 0xff;
 
-		save_flags(flags);
-		cli();
+		spin_lock_irqsave(&rtc_lock, flags);
 		if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) ||
 		    RTC_ALWAYS_BCD)
 		{
@@ -288,7 +303,7 @@
 		CMOS_WRITE(hrs, RTC_HOURS_ALARM);
 		CMOS_WRITE(min, RTC_MINUTES_ALARM);
 		CMOS_WRITE(sec, RTC_SECONDS_ALARM);
-		restore_flags(flags);
+		spin_unlock_irqrestore(&rtc_lock, flags);
 
 		return 0;
 	}
@@ -336,12 +351,11 @@
 		if ((yrs -= epoch) > 255)    /* They are unsigned */
 			return -EINVAL;
 
-		save_flags(flags);
-		cli();
+		spin_lock_irqsave(&rtc_lock, flags);
 		if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)
 		    || RTC_ALWAYS_BCD) {
 			if (yrs > 169) {
-				restore_flags(flags);
+				spin_unlock_irqrestore(&rtc_lock, flags);
 				return -EINVAL;
 			}
 			if (yrs >= 100)
@@ -370,13 +384,14 @@
 		CMOS_WRITE(save_control, RTC_CONTROL);
 		CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
 
-		restore_flags(flags);
+		spin_unlock_irqrestore(&rtc_lock, flags);
 		return 0;
 	}
 	case RTC_IRQP_READ:	/* Read the periodic IRQ rate.	*/
 	{
 		return put_user(rtc_freq, (unsigned long *)arg);
 	}
+#ifndef CONFIG_RTC_LIGHT
 	case RTC_IRQP_SET:	/* Set periodic IRQ rate.	*/
 	{
 		int tmp = 0;
@@ -405,14 +420,14 @@
 
 		rtc_freq = arg;
 
-		save_flags(flags);
-		cli();
+		spin_lock_irqsave(&rtc_lock, flags);
 		val = CMOS_READ(RTC_FREQ_SELECT) & 0xf0;
 		val |= (16 - tmp);
 		CMOS_WRITE(val, RTC_FREQ_SELECT);
-		restore_flags(flags);
+		spin_unlock_irqrestore(&rtc_lock, flags);
 		return 0;
 	}
+#endif
 #ifdef __alpha__
 	case RTC_EPOCH_READ:	/* Read the epoch.	*/
 	{
@@ -462,18 +477,18 @@
 	 * in use, and clear the data.
 	 */
 
+#ifndef CONFIG_RTC_LIGHT
 	unsigned char tmp;
 	unsigned long flags;
 
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&rtc_lock, flags);
 	tmp = CMOS_READ(RTC_CONTROL);
 	tmp &=  ~RTC_PIE;
 	tmp &=  ~RTC_AIE;
 	tmp &=  ~RTC_UIE;
 	CMOS_WRITE(tmp, RTC_CONTROL);
 	CMOS_READ(RTC_INTR_FLAGS);
-	restore_flags(flags);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	if (rtc_status & RTC_TIMER_ON) {
 		rtc_status &= ~RTC_TIMER_ON;
@@ -481,10 +496,12 @@
 	}
 
 	rtc_irq_data = 0;
+#endif
 	rtc_status &= ~RTC_IS_OPEN;
 	return 0;
 }
 
+#ifndef CONFIG_RTC_LIGHT
 static unsigned int rtc_poll(struct file *file, poll_table *wait)
 {
 	poll_wait(file, &rtc_wait, wait);
@@ -492,6 +509,7 @@
 		return POLLIN | POLLRDNORM;
 	return 0;
 }
+#endif
 
 /*
  *	The various file operations we support.
@@ -502,7 +520,11 @@
 	rtc_read,
 	NULL,		/* No write */
 	NULL,		/* No readdir */
+#ifdef CONFIG_RTC_LIGHT
+	NULL,
+#else
 	rtc_poll,
+#endif
 	rtc_ioctl,
 	NULL,		/* No mmap */
 	rtc_open,
@@ -526,12 +548,14 @@
 	char *guess = NULL;
 #endif
 	printk(KERN_INFO "Real Time Clock Driver v%s\n", RTC_VERSION);
+#ifndef CONFIG_RTC_LIGHT
 	if(request_irq(RTC_IRQ, rtc_interrupt, SA_INTERRUPT, "rtc", NULL))
 	{
 		/* Yeah right, seeing as irq 8 doesn't even hit the bus. */
 		printk(KERN_ERR "rtc: IRQ %d is not free.\n", RTC_IRQ);
 		return -EIO;
 	}
+#endif
 	misc_register(&rtc_dev);
 	/* Check region? Naaah! Just snarf it up. */
 	request_region(RTC_PORT(0), RTC_IO_EXTENT, "rtc");
@@ -546,11 +570,10 @@
 		while (jiffies - uip_watchdog < 2*HZ/100)
 			barrier();
 	
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&rtc_lock, flags);
 	year = CMOS_READ(RTC_YEAR);
 	ctrl = CMOS_READ(RTC_CONTROL);
-	restore_flags(flags);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 	
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
 		BCD_TO_BIN(year);       /* This should never happen... */
@@ -565,14 +588,15 @@
 	if (guess)
 		printk("rtc: %s epoch (%lu) detected\n", guess, epoch);
 #endif
+#ifndef CONFIG_RTC_LIGHT
 	init_timer(&rtc_irq_timer);
 	rtc_irq_timer.function = rtc_dropped_irq;
 	rtc_wait = NULL;
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&rtc_lock, flags);
 	/* Initialize periodic freq. to CMOS reset default, which is 1024Hz */
 	CMOS_WRITE(((CMOS_READ(RTC_FREQ_SELECT) & 0xF0) | 0x06), RTC_FREQ_SELECT);
-	restore_flags(flags);
+	spin_unlock_irqrestore(&rtc_lock, flags);
+#endif
 	rtc_freq = 1024;
 	return 0;
 }
@@ -589,6 +613,7 @@
  *	for something that requires a steady > 1KHz signal anyways.)
  */
 
+#ifndef CONFIG_RTC_LIGHT
 void rtc_dropped_irq(unsigned long data)
 {
 	unsigned long flags;
@@ -596,13 +621,13 @@
 	printk(KERN_INFO "rtc: lost some interrupts at %ldHz.\n", rtc_freq);
 	mod_timer(&rtc_irq_timer, jiffies + HZ/rtc_freq + 2*HZ/100);
 
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&rtc_lock, flags);
 	rtc_irq_data += ((rtc_freq/HZ)<<8);
 	rtc_irq_data &= ~0xff;
 	rtc_irq_data |= (CMOS_READ(RTC_INTR_FLAGS) & 0xF0);	/* restart */
-	restore_flags(flags);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 }
+#endif
 
 /*
  *	Info exported via "/proc/rtc".
@@ -615,11 +640,10 @@
 	unsigned char batt, ctrl;
 	unsigned long flags;
 
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&rtc_lock, flags);
 	batt = CMOS_READ(RTC_VALID) & RTC_VRT;
 	ctrl = CMOS_READ(RTC_CONTROL);
-	restore_flags(flags);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	p = buf;
 
@@ -690,10 +714,9 @@
 	unsigned long flags;
 	unsigned char uip;
 
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&rtc_lock, flags);
 	uip = (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP);
-	restore_flags(flags);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 	return uip;
 }
 
@@ -723,8 +746,7 @@
 	 * RTC has RTC_DAY_OF_WEEK, we ignore it, as it is only updated
 	 * by the RTC when initially set to a non-zero value.
 	 */
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&rtc_lock, flags);
 	rtc_tm->tm_sec = CMOS_READ(RTC_SECONDS);
 	rtc_tm->tm_min = CMOS_READ(RTC_MINUTES);
 	rtc_tm->tm_hour = CMOS_READ(RTC_HOURS);
@@ -732,7 +754,7 @@
 	rtc_tm->tm_mon = CMOS_READ(RTC_MONTH);
 	rtc_tm->tm_year = CMOS_READ(RTC_YEAR);
 	ctrl = CMOS_READ(RTC_CONTROL);
-	restore_flags(flags);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
 	{
@@ -763,13 +785,12 @@
 	 * Only the values that we read from the RTC are set. That
 	 * means only tm_hour, tm_min, and tm_sec.
 	 */
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&rtc_lock, flags);
 	alm_tm->tm_sec = CMOS_READ(RTC_SECONDS_ALARM);
 	alm_tm->tm_min = CMOS_READ(RTC_MINUTES_ALARM);
 	alm_tm->tm_hour = CMOS_READ(RTC_HOURS_ALARM);
 	ctrl = CMOS_READ(RTC_CONTROL);
-	restore_flags(flags);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
 	{
@@ -789,18 +810,18 @@
  * meddles with the interrupt enable/disable bits.
  */
 
+#ifndef CONFIG_RTC_LIGHT
 void mask_rtc_irq_bit(unsigned char bit)
 {
 	unsigned char val;
 	unsigned long flags;
 
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&rtc_lock, flags);
 	val = CMOS_READ(RTC_CONTROL);
 	val &=  ~bit;
 	CMOS_WRITE(val, RTC_CONTROL);
 	CMOS_READ(RTC_INTR_FLAGS);
-	restore_flags(flags);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 	rtc_irq_data = 0;
 }
 
@@ -809,12 +830,12 @@
 	unsigned char val;
 	unsigned long flags;
 
-	save_flags(flags);
-	cli();
+	spin_lock_irqsave(&rtc_lock, flags);
 	val = CMOS_READ(RTC_CONTROL);
 	val |= bit;
 	CMOS_WRITE(val, RTC_CONTROL);
 	CMOS_READ(RTC_INTR_FLAGS);
 	rtc_irq_data = 0;
-	restore_flags(flags);
+	spin_unlock_irqrestore(&rtc_lock, flags);
 }
+#endif
diff -urN 2.2.15pre16/drivers/scsi/sd_ioctl.c 2.2.15pre16aa3/drivers/scsi/sd_ioctl.c
--- 2.2.15pre16/drivers/scsi/sd_ioctl.c	Mon Jan 17 16:44:40 2000
+++ 2.2.15pre16aa3/drivers/scsi/sd_ioctl.c	Thu Mar 30 16:00:56 2000
@@ -113,6 +113,10 @@
 	return put_user(blksize_size[MAJOR(dev)][MINOR(dev)&0x0F],
 		(int *)arg);
 				
+    case BLKELVGET:
+    case BLKELVSET:
+            return blkelv_ioctl(inode->i_rdev, cmd, arg);
+
     RO_IOCTLS(dev, arg);
 
     default:
diff -urN 2.2.15pre16/fs/Makefile 2.2.15pre16aa3/fs/Makefile
--- 2.2.15pre16/fs/Makefile	Thu Aug 26 14:20:19 1999
+++ 2.2.15pre16aa3/fs/Makefile	Thu Mar 30 16:00:57 2000
@@ -13,7 +13,7 @@
 O_OBJS    = open.o read_write.o devices.o file_table.o buffer.o \
 		super.o  block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
 		ioctl.o readdir.o select.o fifo.o locks.o filesystems.o \
-		dcache.o inode.o attr.o bad_inode.o file.o $(BINFMTS) 
+		dcache.o inode.o attr.o bad_inode.o file.o iobuf.o $(BINFMTS) 
 
 MOD_LIST_NAME := FS_MODULES
 ALL_SUB_DIRS = coda minix ext2 fat msdos vfat proc isofs nfs umsdos ntfs \
diff -urN 2.2.15pre16/fs/adfs/inode.c 2.2.15pre16aa3/fs/adfs/inode.c
--- 2.2.15pre16/fs/adfs/inode.c	Mon Jan 17 16:44:41 2000
+++ 2.2.15pre16aa3/fs/adfs/inode.c	Thu Mar 30 16:00:58 2000
@@ -181,7 +181,7 @@
 		inode->i_nlink	 = 2;
 		inode->i_size	 = ADFS_NEWDIR_SIZE;
 		inode->i_blksize = PAGE_SIZE;
-		inode->i_blocks  = inode->i_size / sb->s_blocksize;
+		inode->i_blocks  = inode->i_size >> sb->s_blocksize_bits;
 		inode->i_mtime   =
 		inode->i_atime   =
 		inode->i_ctime   = 0;
diff -urN 2.2.15pre16/fs/affs/file.c 2.2.15pre16aa3/fs/affs/file.c
--- 2.2.15pre16/fs/affs/file.c	Mon Jan 17 16:44:41 2000
+++ 2.2.15pre16aa3/fs/affs/file.c	Thu Mar 30 16:00:58 2000
@@ -580,17 +580,17 @@
 affs_file_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
 {
 	struct inode		*inode = filp->f_dentry->d_inode;
-	off_t			 pos;
+	loff_t			 pos;
 	ssize_t			 written;
 	ssize_t			 c;
-	ssize_t			 blocksize;
+	ssize_t			 blocksize, blockshift;
 	struct buffer_head	*bh;
 	char			*p;
 
 	if (!count)
 		return 0;
-	pr_debug("AFFS: file_write(ino=%lu,pos=%lu,count=%d)\n",inode->i_ino,
-		 (unsigned long)*ppos,count);
+	pr_debug("AFFS: file_write(ino=%lu,pos=%Lu,count=%d)\n",inode->i_ino,
+		 *ppos,count);
 
 	if (!inode) {
 		affs_error(inode->i_sb,"file_write","Inode = NULL");
@@ -609,16 +609,22 @@
 	else
 		pos = *ppos;
 	written   = 0;
-	blocksize = AFFS_I2BSIZE(inode);
+	blocksize  = AFFS_I2BSIZE(inode);
+	blockshift = AFFS_I2BITS(inode);
+
+	if (pos >= 0x7fffffff) /* Max size: 2G-1 */
+		return -EFBIG;
+	if ((pos + count) > 0x7fffffff)
+		count = 0x7fffffff - pos;
 
 	while (written < count) {
-		bh = affs_getblock(inode,pos / blocksize);
+		bh = affs_getblock(inode, pos >> blockshift);
 		if (!bh) {
 			if (!written)
 				written = -ENOSPC;
 			break;
 		}
-		c = blocksize - (pos % blocksize);
+		c = blocksize - (pos & (blocksize -1));
 		if (c > count - written)
 			c = count - written;
 		if (c != blocksize && !buffer_uptodate(bh)) {
@@ -631,7 +637,7 @@
 				break;
 			}
 		}
-		p  = (pos % blocksize) + bh->b_data;
+		p  = (pos & (blocksize -1)) + bh->b_data;
 		c -= copy_from_user(p,buf,c);
 		if (!c) {
 			affs_brelse(bh);
@@ -662,7 +668,7 @@
 	off_t			 pos;
 	ssize_t			 written;
 	ssize_t			 c;
-	ssize_t			 blocksize;
+	ssize_t			 blocksize, blockshift;
 	struct buffer_head	*bh;
 	char			*p;
 
@@ -690,15 +696,16 @@
 
 	bh        = NULL;
 	blocksize = AFFS_I2BSIZE(inode) - 24;
+	blockshift = AFFS_I2BITS(inode);
 	written   = 0;
 	while (written < count) {
-		bh = affs_getblock(inode,pos / blocksize);
+		bh = affs_getblock(inode,pos >> blockshift);
 		if (!bh) {
 			if (!written)
 				written = -ENOSPC;
 			break;
 		}
-		c = blocksize - (pos % blocksize);
+		c = blocksize - (pos & (blocksize -1));
 		if (c > count - written)
 			c = count - written;
 		if (c != blocksize && !buffer_uptodate(bh)) {
@@ -711,7 +718,7 @@
 				break;
 			}
 		}
-		p  = (pos % blocksize) + bh->b_data + 24;
+		p  = (pos & (blocksize -1)) + bh->b_data + 24;
 		c -= copy_from_user(p,buf,c);
 		if (!c) {
 			affs_brelse(bh);
@@ -780,10 +787,10 @@
 	int	 rem;
 	int	 ext;
 
-	pr_debug("AFFS: truncate(inode=%ld,size=%lu)\n",inode->i_ino,inode->i_size);
+	pr_debug("AFFS: truncate(inode=%ld,size=%Lu)\n",inode->i_ino,inode->i_size);
 
 	net_blocksize = blocksize - ((inode->i_sb->u.affs_sb.s_flags & SF_OFS) ? 24 : 0);
-	first = (inode->i_size + net_blocksize - 1) / net_blocksize;
+	first = (u_long)(inode->i_size + net_blocksize - 1) / net_blocksize;
 	if (inode->u.affs_i.i_lastblock < first - 1) {
 		/* There has to be at least one new block to be allocated */
 		if (!inode->u.affs_i.i_ec && alloc_ext_cache(inode)) {
@@ -793,9 +800,9 @@
 		bh = affs_getblock(inode,first - 1);
 		if (!bh) {
 			affs_warning(inode->i_sb,"truncate","Cannot extend file");
-			inode->i_size = net_blocksize * (inode->u.affs_i.i_lastblock + 1);
+			inode->i_size = (inode->u.affs_i.i_lastblock + 1) * net_blocksize;
 		} else if (inode->i_sb->u.affs_sb.s_flags & SF_OFS) {
-			rem = inode->i_size % net_blocksize;
+			rem = ((u_long)inode->i_size) & (net_blocksize -1);
 			DATA_FRONT(bh)->data_size = cpu_to_be32(rem ? rem : net_blocksize);
 			affs_fix_checksum(blocksize,bh->b_data,5);
 			mark_buffer_dirty(bh,0);
@@ -862,7 +869,7 @@
 			affs_free_block(inode->i_sb,ekey);
 		ekey = key;
 	}
-	block = ((inode->i_size + net_blocksize - 1) / net_blocksize) - 1;
+	block = (((u_long)inode->i_size + net_blocksize - 1) / net_blocksize) - 1;
 	inode->u.affs_i.i_lastblock = block;
 
 	/* If the file is not truncated to a block boundary,
@@ -870,7 +877,7 @@
 	 * so it cannot become accessible again.
 	 */
 
-	rem = inode->i_size % net_blocksize;
+	rem = inode->i_size & (net_blocksize -1);
 	if (rem) {
 		if ((inode->i_sb->u.affs_sb.s_flags & SF_OFS)) 
 			rem += 24;
diff -urN 2.2.15pre16/fs/affs/inode.c 2.2.15pre16aa3/fs/affs/inode.c
--- 2.2.15pre16/fs/affs/inode.c	Mon Jan 17 16:44:41 2000
+++ 2.2.15pre16aa3/fs/affs/inode.c	Thu Mar 30 16:00:58 2000
@@ -146,7 +146,7 @@
 				block = AFFS_I2BSIZE(inode) - 24;
 			else
 				block = AFFS_I2BSIZE(inode);
-			inode->u.affs_i.i_lastblock = ((inode->i_size + block - 1) / block) - 1;
+			inode->u.affs_i.i_lastblock = (((u_long)inode->i_size + block - 1) / block) - 1;
 			break;
 		case ST_SOFTLINK:
 			inode->i_mode |= S_IFLNK;
diff -urN 2.2.15pre16/fs/buffer.c 2.2.15pre16aa3/fs/buffer.c
--- 2.2.15pre16/fs/buffer.c	Wed Mar 29 19:42:10 2000
+++ 2.2.15pre16aa3/fs/buffer.c	Thu Mar 30 16:00:58 2000
@@ -27,6 +27,8 @@
 /* invalidate_buffers/set_blocksize/sync_dev race conditions and
    fs corruption fixes, 1999, Andrea Arcangeli <andrea@suse.de> */
 
+/* async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de> */
+
 #include <linux/malloc.h>
 #include <linux/locks.h>
 #include <linux/errno.h>
@@ -39,6 +41,8 @@
 #include <linux/file.h>
 #include <linux/init.h>
 #include <linux/quotaops.h>
+#include <linux/iobuf.h>
+#include <linux/bigmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -79,6 +83,7 @@
 
 static int nr_buffers = 0;
 static int nr_buffers_type[NR_LIST] = {0,};
+static unsigned long size_buffers_type[NR_LIST];
 static int nr_buffer_heads = 0;
 static int nr_unused_buffer_heads = 0;
 static int nr_hashed_buffers = 0;
@@ -139,13 +144,14 @@
 	bh->b_count++;
 	wait.task = tsk;
 	add_wait_queue(&bh->b_wait, &wait);
-repeat:
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	run_task_queue(&tq_disk);
-	if (buffer_locked(bh)) {
+	do {
+		run_task_queue(&tq_disk);
+		tsk->state = TASK_UNINTERRUPTIBLE;
+		mb();
+		if (!buffer_locked(bh))
+			break;
 		schedule();
-		goto repeat;
-	}
+	} while (buffer_locked(bh));
 	tsk->state = TASK_RUNNING;
 	remove_wait_queue(&bh->b_wait, &wait);
 	bh->b_count--;
@@ -470,6 +476,7 @@
 		return;
 	}
 	nr_buffers_type[bh->b_list]--;
+	size_buffers_type[bh->b_list] -= bh->b_size;
 	remove_from_hash_queue(bh);
 	remove_from_lru_list(bh);
 }
@@ -519,6 +526,7 @@
 		(*bhp)->b_prev_free = bh;
 
 		nr_buffers_type[bh->b_list]++;
+		size_buffers_type[bh->b_list] += bh->b_size;
 
 		/* Put the buffer in new hash-queue if it has a device. */
 		bh->b_next = NULL;
@@ -679,21 +687,26 @@
 			bhnext = bh->b_next_free;
 			if (bh->b_dev != dev || bh->b_size == size)
 				continue;
-			if (buffer_dirty(bh))
-				printk(KERN_ERR "set_blocksize: dev %s buffer_dirty %lu size %lu\n", kdevname(dev), bh->b_blocknr, bh->b_size);
 			if (buffer_locked(bh))
 			{
 				slept = 1;
 				wait_on_buffer(bh);
 			}
+			if (buffer_dirty(bh))
+				printk(KERN_WARNING "set_blocksize: dev %s buffer_dirty %lu size %lu\n", kdevname(dev), bh->b_blocknr, bh->b_size);
 			if (!bh->b_count)
 				put_last_free(bh);
 			else
-				printk(KERN_ERR
+			{
+				mark_buffer_clean(bh);
+				clear_bit(BH_Uptodate, &bh->b_state);
+				clear_bit(BH_Req, &bh->b_state);
+				printk(KERN_WARNING
 				       "set_blocksize: "
-				       "b_count %d, dev %s, block %lu!\n",
+				       "b_count %d, dev %s, block %lu, from %p\n",
 				       bh->b_count, bdevname(bh->b_dev),
-				       bh->b_blocknr);
+				       bh->b_blocknr, __builtin_return_address(0));
+			}
 			if (slept)
 				goto again;
 		}
@@ -708,6 +721,7 @@
 	if (!grow_buffers(size)) {
 		wakeup_bdflush(1);
 		current->policy |= SCHED_YIELD;
+		current->state = TASK_RUNNING;
 		schedule();
 	}
 }
@@ -806,6 +820,46 @@
 	insert_into_queues(bh);
 }
 
+/* -1 -> no need to flush
+    0 -> async flush
+    1 -> sync flush (wait for I/O completation) */
+static int balance_dirty_state(kdev_t dev)
+{
+	unsigned long dirty, tot, hard_dirty_limit, soft_dirty_limit;
+
+	dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT;
+	tot = (buffermem >> PAGE_SHIFT) + nr_free_pages - nr_free_bigpages;
+	tot -= size_buffers_type[BUF_PROTECTED] >> PAGE_SHIFT;
+
+	dirty *= 200;
+	soft_dirty_limit = tot * bdf_prm.b_un.nfract;
+	hard_dirty_limit = soft_dirty_limit * 2;
+
+	if (dirty > soft_dirty_limit)
+	{
+		if (dirty > hard_dirty_limit)
+			return 1;
+		return 0;
+	}
+	return -1;
+}
+
+/*
+ * if a new dirty buffer is created we need to balance bdflush.
+ *
+ * in the future we might want to make bdflush aware of different
+ * pressures on different devices - thus the (currently unused)
+ * 'dev' parameter.
+ */
+void balance_dirty(kdev_t dev)
+{
+	int state = balance_dirty_state(dev);
+
+	if (state < 0)
+		return;
+	wakeup_bdflush(state);
+}
+
 /*
  * A buffer may need to be moved from one buffer list to another
  * (e.g. in case it is not shared any more). Handle this.
@@ -818,7 +872,9 @@
 		printk("Attempt to refile free buffer\n");
 		return;
 	}
-	if (buffer_dirty(buf))
+	if (buffer_protected(buf))
+		dispose = BUF_PROTECTED;
+	else if (buffer_dirty(buf))
 		dispose = BUF_DIRTY;
 	else if (buffer_locked(buf))
 		dispose = BUF_LOCKED;
@@ -827,13 +883,7 @@
 	if(dispose != buf->b_list) {
 		file_buffer(buf, dispose);
 		if(dispose == BUF_DIRTY) {
-			int too_many = (nr_buffers * bdf_prm.b_un.nfract/100);
-
-			/* This buffer is dirty, maybe we need to start flushing.
-			 * If too high a percentage of the buffers are dirty...
-			 */
-			if (nr_buffers_type[BUF_DIRTY] > too_many)
-				wakeup_bdflush(1);
+			balance_dirty(buf->b_dev);
 
 			/* If this is a loop device, and
 			 * more than half of the buffers are dirty...
@@ -1158,7 +1208,7 @@
 #endif
 	}
 	if (test_and_clear_bit(PG_swap_unlock_after, &page->flags))
-		swap_after_unlock_page(page->offset);
+		swap_after_unlock_page(pgoff2ulong(page->index));
 	if (test_and_clear_bit(PG_free_after, &page->flags))
 		__free_page(page);
 }
@@ -1251,6 +1301,225 @@
 	return;
 }
 
+
+/*
+ * For brw_kiovec: submit a set of buffer_head temporary IOs and wait
+ * for them to complete.  Clean up the buffer_heads afterwards.  
+ */
+
+#define dprintk(x...)
+
+static int do_kio(int rw, int nr, struct buffer_head *bh[], int size)
+{
+	int iosize;
+	int i;
+	int err;
+	struct buffer_head *tmp;
+
+	dprintk ("do_kio start\n");
+	
+	ll_rw_block(rw, nr, bh);
+	iosize = err = 0;
+	
+	for (i = nr; --i >= 0; ) {
+		tmp = bh[i];
+		wait_on_buffer(tmp);
+		if (!buffer_uptodate(tmp)) {
+			err = -EIO;
+			/* We are waiting on bh'es in reverse order so
+                           clearing iosize on error calculates the
+                           amount of IO before the first error. */
+			iosize = 0;
+		}
+		
+		free_async_buffers(tmp);
+		iosize += size;
+	}
+	
+	dprintk ("do_kio end %d %d\n", iosize, err);
+	
+	if (iosize)
+		return iosize;
+	else
+		return err;
+}
+
+/*
+ * Clean up the bounce buffers potentially used by brw_kiovec.  All of
+ * the kiovec's bounce buffers must be cleared of temporarily allocated
+ * bounce pages, but only READ pages for whom IO completed successfully
+ * can actually be transferred back to user space. 
+ */
+
+void cleanup_bounce_buffers(int rw, int nr, struct kiobuf *iovec[], 
+			    int transferred)
+{
+	int i;
+	for (i = 0; i < nr; i++) {
+		struct kiobuf *iobuf = iovec[i];
+		if (iobuf->bounced) {
+			if (transferred > 0 && !(rw & WRITE))
+				kiobuf_copy_bounce(iobuf, COPY_FROM_BOUNCE, 
+						   transferred);
+			
+			clear_kiobuf_bounce_pages(iobuf);
+		}
+		transferred -= iobuf->length;
+	}
+}
+
+/*
+ * Start I/O on a physical range of kernel memory, defined by a vector
+ * of kiobuf structs (much like a user-space iovec list).
+ *
+ * The kiobuf must already be locked for IO.  IO is submitted
+ * asynchronously: you need to check page->locked, page->uptodate, and
+ * maybe wait on page->wait.
+ *
+ * It is up to the caller to make sure that there are enough blocks
+ * passed in to completely map the iobufs to disk.
+ */
+
+int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
+	       kdev_t dev, unsigned long b[], int size)
+{
+	int		err;
+	int		length;
+	int		transferred;
+	int		i;
+	int		bufind;
+	int		pageind;
+	int		bhind;
+	int		offset;
+	unsigned long	blocknr;
+	struct kiobuf *	iobuf = NULL;
+	unsigned long	page;
+	unsigned long	bounce;
+	struct page *	map;
+	struct buffer_head *tmp, *bh[KIO_MAX_SECTORS];
+
+	/* 
+	 * First, do some alignment and validity checks 
+	 */
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+		if ((iobuf->offset & (size-1)) ||
+		    (iobuf->length & (size-1)))
+			return -EINVAL;
+		if (!iobuf->locked)
+			panic("brw_kiovec: iobuf not locked for I/O");
+		if (!iobuf->nr_pages)
+			panic("brw_kiovec: iobuf not initialised");
+	}
+
+	/* DEBUG */
+#if 0
+	return iobuf->length;
+#endif
+	dprintk ("brw_kiovec: start\n");
+	
+	/* 
+	 * OK to walk down the iovec doing page IO on each page we find. 
+	 */
+	bufind = bhind = transferred = err = 0;
+	for (i = 0; i < nr; i++) {
+		iobuf = iovec[i];
+		err = setup_kiobuf_bounce_pages(iobuf, GFP_USER);
+		if (err) 
+			goto finished;
+		if (rw & WRITE)
+			kiobuf_copy_bounce(iobuf, COPY_TO_BOUNCE, -1);
+		
+		offset = iobuf->offset;
+		length = iobuf->length;
+		dprintk ("iobuf %d %d %d\n", offset, length, size);
+
+		for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
+			map    = iobuf->maplist[pageind];
+			bounce = iobuf->bouncelist[pageind];
+
+			if (bounce)
+				page = bounce;
+			else
+				page = iobuf->pagelist[pageind];
+
+			while (length > 0) {
+				blocknr = b[bufind++];
+				tmp = get_unused_buffer_head(0);
+				if (!tmp) {
+					err = -ENOMEM;
+					goto error;
+				}
+				
+				tmp->b_dev = B_FREE;
+				tmp->b_size = size;
+				tmp->b_data = (char *) (page + offset);
+				tmp->b_this_page = tmp;
+
+				init_buffer(tmp, dev, blocknr,
+					    end_buffer_io_sync, NULL);
+				if (rw == WRITE) {
+					set_bit(BH_Uptodate, &tmp->b_state);
+					set_bit(BH_Dirty, &tmp->b_state);
+				}
+
+				dprintk ("buffer %d (%d) at %p\n", 
+					 bhind, tmp->b_blocknr, tmp->b_data);
+				bh[bhind++] = tmp;
+				length -= size;
+				offset += size;
+
+				/* 
+				 * Start the IO if we have got too much or if
+				 * this is the end of the last iobuf 
+				 */
+				if (bhind >= KIO_MAX_SECTORS) {
+					err = do_kio(rw, bhind, bh, size);
+					if (err >= 0)
+						transferred += err;
+					else
+						goto finished;
+					bhind = 0;
+				}
+				
+				if (offset >= PAGE_SIZE) {
+					offset = 0;
+					break;
+				}
+			} /* End of block loop */
+		} /* End of page loop */		
+	} /* End of iovec loop */
+
+	/* Is there any IO still left to submit? */
+	if (bhind) {
+		err = do_kio(rw, bhind, bh, size);
+		if (err >= 0)
+			transferred += err;
+		else
+			goto finished;
+	}
+
+ finished:
+	dprintk ("brw_kiovec: end (%d, %d)\n", transferred, err);
+
+	cleanup_bounce_buffers(rw, nr, iovec, transferred);
+	
+	if (transferred)
+		return transferred;
+	return err;
+
+ error:
+	/* We got an error allocation the bh'es.  Just free the current
+           buffer_heads and exit. */
+	for (i = bhind; --i >= 0; ) {
+		free_async_buffers(bh[bhind]);
+	}
+
+	clear_kiobuf_bounce_pages(iobuf);
+
+	goto finished;
+}
+
 /*
  * Start I/O on a page.
  * This function expects the page to be locked and may return before I/O is complete.
@@ -1385,15 +1654,46 @@
 	set_bit(PG_locked, &page->flags);
 	set_bit(PG_free_after, &page->flags);
 	
+	/* Blocks within a page */
 	i = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
-	block = page->offset >> inode->i_sb->s_blocksize_bits;
-	p = nr;
-	do {
-		*p = inode->i_op->bmap(inode, block);
-		i--;
-		block++;
-		p++;
-	} while (i > 0);
+
+	block = pgoff2ulong(page->index);
+	/* Scaled already by PAGE_SHIFT, which said shift should
+	   be same or larger, than that of any filesystem in
+	   this system -- that is, at i386 with 4k pages one
+	   can't use 8k (primitive) blocks at the filesystems... */
+
+	if (i > 0) {
+		/* Filesystem blocksize is same, or smaller than CPU
+		   page size, we can easily process this.. */
+
+		if (i > 1)
+			block *= i;
+		/* Scale by FS blocks per page, presuming FS-blocks are smaller
+		   than the processor page... */
+
+		p = nr;
+		do {
+			*p = inode->i_op->bmap(inode, block);
+			i--;
+			block++;
+			p++;
+		} while (i > 0);
+	} else {
+		/* Filesystem blocksize is larger than CPU page size,
+		   but if the underlying storage system block size is
+		   smaller than CPU page size, all is well, else we
+		   are in deep trouble -- for direct paging in at least.. */
+		/* Nobody needs such monsterous fs block sizes ?
+		   Well, it is the only way to get files in terabyte
+		   range..  Nobody needs them ?  You are for a surprise..
+		   However EXT2 (at least) needs access to internal
+		   blocks and there it needs allocations of 8k/16k (or
+		   whatever the block size is) for internal uses..
+		   Fixing this function alone isn't enough, although
+		   perhaps fairly trivial.. */
+		/* FIXME: WRITE THE CODE HERE !!! */
+	}
 
 	/* IO start */
 	brw_page(READ, page, inode->i_dev, nr, inode->i_sb->s_blocksize, 1);
@@ -1477,7 +1777,8 @@
 		if (!buffer_busy(p))
 			continue;
 
-		wakeup_bdflush(0);
+		if (buffer_dirty(bh))
+			wakeup_bdflush(0);
 		return 0;
 	} while (tmp != bh);
 
@@ -1508,7 +1809,7 @@
 	int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
 	int protected = 0;
 	int nlist;
-	static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","DIRTY"};
+	static char *buf_types[NR_LIST] = {"CLEAN","LOCKED","DIRTY","PROTECTED",};
 
 	printk("Buffer memory:   %8ldkB\n",buffermem>>10);
 	printk("Buffer heads:    %6d\n",nr_buffer_heads);
@@ -1532,7 +1833,7 @@
 			used++, lastused = found;
 		bh = bh->b_next_free;
 	  } while (bh != lru_list[nlist]);
-	  printk("%8s: %d buffers, %d used (last=%d), "
+	  printk("%9s: %d buffers, %d used (last=%d), "
 		 "%d locked, %d protected, %d dirty\n",
 		 buf_types[nlist], found, used, lastused,
 		 locked, protected, dirty);
@@ -1704,7 +2005,6 @@
 	if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount);
 	printk("Wrote %d/%d buffers\n", nwritten, ndirty);
 #endif
-	run_task_queue(&tq_disk);
 	return 0;
 }
 
@@ -1877,7 +2177,8 @@
 		
 		/* If there are still a lot of dirty buffers around, skip the sleep
 		   and flush some more */
-		if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
+		if (!ndirty || balance_dirty_state(NODEV) < 0)
+		{
 			spin_lock_irq(&current->sigmask_lock);
 			flush_signals(current);
 			spin_unlock_irq(&current->sigmask_lock);
@@ -1901,13 +2202,18 @@
 	tsk->session = 1;
 	tsk->pgrp = 1;
 	strcpy(tsk->comm, "kupdate");
+
+	/* sigstop and sigcont will stop and wakeup kupdate */
+	spin_lock_irq(&tsk->sigmask_lock);
 	sigfillset(&tsk->blocked);
-	/* sigcont will wakeup kupdate after setting interval to 0 */
 	sigdelset(&tsk->blocked, SIGCONT);
+	sigdelset(&tsk->blocked, SIGSTOP);
+	spin_unlock_irq(&tsk->sigmask_lock);
 
 	lock_kernel();
 
 	for (;;) {
+		/* update interval */
 		interval = bdf_prm.b_un.interval;
 		if (interval)
 		{
@@ -1916,8 +2222,24 @@
 		}
 		else
 		{
+		stop_kupdate:
 			tsk->state = TASK_STOPPED;
 			schedule(); /* wait for SIGCONT */
+		}
+		/* check for sigstop */
+		if (signal_pending(tsk))
+		{
+			int stopped = 0;
+			spin_lock_irq(&tsk->sigmask_lock);
+			if (sigismember(&tsk->signal, SIGSTOP))
+			{
+				sigdelset(&tsk->signal, SIGSTOP);
+				stopped = 1;
+			}
+			recalc_sigpending(tsk);
+			spin_unlock_irq(&tsk->sigmask_lock);
+			if (stopped)
+				goto stop_kupdate;
 		}
 #ifdef DEBUG
 		printk("kupdate() activated...\n");
diff -urN 2.2.15pre16/fs/coda/file.c 2.2.15pre16aa3/fs/coda/file.c
--- 2.2.15pre16/fs/coda/file.c	Mon Jan 17 16:44:41 2000
+++ 2.2.15pre16aa3/fs/coda/file.c	Thu Mar 30 16:00:58 2000
@@ -99,7 +99,7 @@
 			      &cont_file, &cont_dentry);
 
         CDEBUG(D_INODE, "coda ino: %ld, cached ino %ld, page offset: %lx\n", 
-	       coda_inode->i_ino, cii->c_ovp->i_ino, page->offset);
+	       coda_inode->i_ino, cii->c_ovp->i_ino, pgoff2ulong(page->index));
 
         generic_readpage(&cont_file, page);
         EXIT;
diff -urN 2.2.15pre16/fs/dcache.c 2.2.15pre16aa3/fs/dcache.c
--- 2.2.15pre16/fs/dcache.c	Wed Jan  5 14:16:55 2000
+++ 2.2.15pre16aa3/fs/dcache.c	Thu Mar 30 16:00:56 2000
@@ -476,7 +476,7 @@
 {
 	if (gfp_mask & __GFP_IO) {
 		int count = 0;
-		if (priority)
+		if (priority > 1)
 			count = dentry_stat.nr_unused / priority;
 		prune_dcache(count, -1);
 	}
diff -urN 2.2.15pre16/fs/dquot.c 2.2.15pre16aa3/fs/dquot.c
--- 2.2.15pre16/fs/dquot.c	Wed Mar 29 19:42:10 2000
+++ 2.2.15pre16aa3/fs/dquot.c	Thu Mar 30 16:00:56 2000
@@ -570,7 +570,7 @@
 	 */
 	if (prune_dcache(0, 128))
 	{
-		free_inode_memory(10);
+		free_inode_memory();
 		goto repeat;
 	}
 
diff -urN 2.2.15pre16/fs/ext2/file.c 2.2.15pre16aa3/fs/ext2/file.c
--- 2.2.15pre16/fs/ext2/file.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/ext2/file.c	Thu Mar 30 16:00:58 2000
@@ -39,10 +39,6 @@
 static long long ext2_file_lseek(struct file *, long long, int);
 static ssize_t ext2_file_write (struct file *, const char *, size_t, loff_t *);
 static int ext2_release_file (struct inode *, struct file *);
-#if BITS_PER_LONG < 64
-static int ext2_open_file (struct inode *, struct file *);
-
-#else
 
 #define EXT2_MAX_SIZE(bits)							\
 	(((EXT2_NDIR_BLOCKS + (1LL << (bits - 2)) + 				\
@@ -55,8 +51,6 @@
 EXT2_MAX_SIZE(10), EXT2_MAX_SIZE(11), EXT2_MAX_SIZE(12), EXT2_MAX_SIZE(13)
 };
 
-#endif
-
 /*
  * We have mostly NULL's here: the current defaults are ok for
  * the ext2 filesystem.
@@ -69,11 +63,7 @@
 	NULL,			/* poll - default */
 	ext2_ioctl,		/* ioctl */
 	generic_file_mmap,	/* mmap */
-#if BITS_PER_LONG == 64	
 	NULL,			/* no special open is needed */
-#else
-	ext2_open_file,
-#endif
 	NULL,			/* flush */
 	ext2_release_file,	/* release */
 	ext2_sync_file,		/* fsync */
@@ -121,12 +111,8 @@
 			offset += file->f_pos;
 	}
 	if (((unsigned long long) offset >> 32) != 0) {
-#if BITS_PER_LONG < 64
-		return -EINVAL;
-#else
 		if (offset > ext2_max_sizes[EXT2_BLOCK_SIZE_BITS(inode->i_sb)])
 			return -EINVAL;
-#endif
 	} 
 	if (offset != file->f_pos) {
 		file->f_pos = offset;
@@ -155,7 +141,7 @@
 				size_t count, loff_t *ppos)
 {
 	struct inode * inode = filp->f_dentry->d_inode;
-	off_t pos;
+	loff_t pos;
 	long block;
 	int offset;
 	int written, c;
@@ -202,24 +188,18 @@
 
 	/* Check for overflow.. */
 
-#if BITS_PER_LONG < 64
-	/* If the fd's pos is already greater than or equal to the file
-	 * descriptor's offset maximum, then we need to return EFBIG for
-	 * any non-zero count (and we already tested for zero above). */
-	if (((unsigned) pos) >= 0x7FFFFFFFUL)
-		return -EFBIG;
-	
-	/* If we are about to overflow the maximum file size, we also
-	 * need to return the error, but only if no bytes can be written
-	 * successfully. */
-	if (((unsigned) pos + count) > 0x7FFFFFFFUL) {
-		count = 0x7FFFFFFFL - pos;
-		if (((signed) count) < 0)
+	/* L-F-S spec 2.2.1.27: */
+	if (!(filp->f_flags & O_LARGEFILE)) {
+		if (pos >= 0x7ffffffeULL) /* pos@2G forbidden */
 			return -EFBIG;
+
+		if (pos + count >= 0x7fffffffULL)
+			/* Write only until end of allowed region */
+			count = 0x7fffffffULL - pos;
 	}
-#else
+
 	{
-		off_t max = ext2_max_sizes[EXT2_BLOCK_SIZE_BITS(sb)];
+		loff_t max = ext2_max_sizes[EXT2_BLOCK_SIZE_BITS(sb)];
 
 		if (pos >= max)
 			return -EFBIG;
@@ -239,7 +219,6 @@
 			mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
 		}
 	}
-#endif
 
 	/* From SUS: We must generate a SIGXFSZ for file size overflow
 	 * only if no bytes were actually written to the file. --sct */
@@ -382,15 +361,3 @@
 	return 0;
 }
 
-#if BITS_PER_LONG < 64
-/*
- * Called when an inode is about to be open.
- * We use this to disallow opening RW large files on 32bit systems.
- */
-static int ext2_open_file (struct inode * inode, struct file * filp)
-{
-	if (inode->u.ext2_i.i_high_size && (filp->f_mode & FMODE_WRITE))
-		return -EFBIG;
-	return 0;
-}
-#endif
diff -urN 2.2.15pre16/fs/ext2/inode.c 2.2.15pre16aa3/fs/ext2/inode.c
--- 2.2.15pre16/fs/ext2/inode.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/ext2/inode.c	Thu Mar 30 16:00:58 2000
@@ -533,15 +533,8 @@
 		inode->u.ext2_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
 	else {
 		inode->u.ext2_i.i_dir_acl = 0;
-		inode->u.ext2_i.i_high_size =
-			le32_to_cpu(raw_inode->i_size_high);
-#if BITS_PER_LONG < 64
-		if (raw_inode->i_size_high)
-			inode->i_size = (__u32)-1;
-#else
-		inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high))
-			<< 32;
-#endif
+		inode->i_size = ((__u64)(inode->i_size & 0xFFFFFFFFUL)) |
+			(((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32);
 	}
 	inode->u.ext2_i.i_version = le32_to_cpu(raw_inode->i_version);
 	inode->i_generation = inode->u.ext2_i.i_version;
@@ -667,12 +660,7 @@
 	if (S_ISDIR(inode->i_mode))
 		raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext2_i.i_dir_acl);
 	else { 
-#if BITS_PER_LONG < 64
-		raw_inode->i_size_high =
-			cpu_to_le32(inode->u.ext2_i.i_high_size);
-#else
 		raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32);
-#endif
 	}
 	raw_inode->i_version = cpu_to_le32(inode->u.ext2_i.i_version);
 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
@@ -728,21 +716,18 @@
 	}
 
 	if (iattr->ia_valid & ATTR_SIZE) {
-		off_t size = iattr->ia_size;
+		loff_t size = iattr->ia_size;
 		unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
 
 		if (size < 0)
 			return -EINVAL;
-#if BITS_PER_LONG == 64	
 		if (size > ext2_max_sizes[EXT2_BLOCK_SIZE_BITS(inode->i_sb)])
 			return -EFBIG;
-#endif
 		if (limit < RLIM_INFINITY && size > limit) {
 			send_sig(SIGXFSZ, current, 0);
 			return -EFBIG;
 		}
 
-#if BITS_PER_LONG == 64	
 		if (size >> 33) {
 			struct super_block *sb = inode->i_sb;
 			struct ext2_super_block *es = sb->u.ext2_sb.s_es;
@@ -755,7 +740,6 @@
 				mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1);
 			}
 		}
-#endif
 	}
 	
 	retval = inode_change_ok(inode, iattr);
diff -urN 2.2.15pre16/fs/ext2/truncate.c 2.2.15pre16aa3/fs/ext2/truncate.c
--- 2.2.15pre16/fs/ext2/truncate.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/ext2/truncate.c	Thu Mar 30 16:00:58 2000
@@ -53,9 +53,10 @@
  * Currently we always hold the inode semaphore during truncate, so
  * there's no need to test for changes during the operation.
  */
-#define DIRECT_BLOCK(inode) \
-	((inode->i_size + inode->i_sb->s_blocksize - 1) / \
-			  inode->i_sb->s_blocksize)
+#define DIRECT_BLOCK(inode)	\
+	((long)			\
+	 ((inode->i_size + inode->i_sb->s_blocksize - 1) >> \
+			  inode->i_sb->s_blocksize_bits))
 #define INDIRECT_BLOCK(inode,offset) ((int)DIRECT_BLOCK(inode) - offset)
 #define DINDIRECT_BLOCK(inode,offset) \
 	(INDIRECT_BLOCK(inode,offset) / addr_per_block)
diff -urN 2.2.15pre16/fs/fat/file.c 2.2.15pre16aa3/fs/fat/file.c
--- 2.2.15pre16/fs/fat/file.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/fat/file.c	Thu Mar 30 16:00:58 2000
@@ -227,7 +227,7 @@
 		Each time we process one block in bhlist, we replace
 		it by a new prefetch block if needed.
 	*/
-	PRINTK (("#### ino %ld pos %ld size %ld count %d\n",inode->i_ino,*ppos,inode->i_size,count));
+	PRINTK (("#### ino %ld pos %ld size %ld count %d\n",inode->i_ino,*ppos,(u_long)inode->i_size,count));
 	{
 		/*
 			We must prefetch complete block, so we must
@@ -253,7 +253,7 @@
 	}
 	pre.nolist = 0;
 	PRINTK (("count %d ahead %d nblist %d\n",count,read_ahead[MAJOR(inode->i_dev)],pre.nblist));
-	while ((left_in_file = inode->i_size - *ppos) > 0
+	while ((left_in_file = (u_long)inode->i_size - *ppos) > 0
 		&& buf < end){
 		struct buffer_head *bh = pre.bhlist[pre.nolist];
 		char *data;
@@ -451,7 +451,7 @@
 
 void fat_truncate(struct inode *inode)
 {
-	int cluster;
+	int cluster_bytes, cluster_shift;
 
 	/* Why no return value?  Surely the disk could fail... */
 	if (IS_IMMUTABLE(inode))
@@ -460,8 +460,10 @@
 		printk("FAT: fat_truncate called though fs is read-only, uhh...\n");
 		return /* -EROFS */;
 	}
-	cluster = SECTOR_SIZE*MSDOS_SB(inode->i_sb)->cluster_size;
-	(void) fat_free(inode,(inode->i_size+(cluster-1))/cluster);
+	cluster_bytes = SECTOR_SIZE * MSDOS_SB(inode->i_sb)->cluster_size;
+	cluster_shift = fslog2(cluster_bytes);
+	(void) fat_free(inode,
+			(inode->i_size+(cluster_bytes-1)) >> cluster_shift);
 	MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
 	mark_inode_dirty(inode);
 }
diff -urN 2.2.15pre16/fs/fat/inode.c 2.2.15pre16aa3/fs/fat/inode.c
--- 2.2.15pre16/fs/fat/inode.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/fat/inode.c	Thu Mar 30 16:00:58 2000
@@ -392,8 +392,9 @@
 			sizeof(struct msdos_dir_entry);
 	}
 	inode->i_blksize = MSDOS_SB(sb)->cluster_size* SECTOR_SIZE;
-	inode->i_blocks = (inode->i_size+inode->i_blksize-1)/
-		    inode->i_blksize*MSDOS_SB(sb)->cluster_size;
+		inode->i_blocks = (((inode->i_size+inode->i_blksize-1) >>
+				    fslog2(inode->i_blksize)) *
+				   MSDOS_SB(sb)->cluster_size);
 	MSDOS_I(inode)->i_logstart = 0;
 
 	MSDOS_I(inode)->i_attrs = 0;
@@ -823,8 +824,9 @@
 	MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED;
 	/* this is as close to the truth as we can get ... */
 	inode->i_blksize = MSDOS_SB(sb)->cluster_size*SECTOR_SIZE;
-	inode->i_blocks = (inode->i_size+inode->i_blksize-1)/
-	    inode->i_blksize*MSDOS_SB(sb)->cluster_size;
+	inode->i_blocks = (((inode->i_size+inode->i_blksize-1) >>
+			    fslog2(inode->i_blksize)) *
+			   MSDOS_SB(sb)->cluster_size);
 	inode->i_mtime = inode->i_atime =
 	    date_dos2unix(CF_LE_W(de->time),CF_LE_W(de->date));
 	inode->i_ctime =
diff -urN 2.2.15pre16/fs/fcntl.c 2.2.15pre16aa3/fs/fcntl.c
--- 2.2.15pre16/fs/fcntl.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/fcntl.c	Thu Mar 30 16:00:58 2000
@@ -182,6 +182,19 @@
 		case F_SETLKW:
 			err = fcntl_setlk(fd, cmd, (struct flock *) arg);
 			break;
+
+#if BITS_PER_LONG == 32
+		case F_GETLK64:
+			err = fcntl_getlk64(fd, (struct flock64 *) arg);
+			break;
+		case F_SETLK64:
+			err = fcntl_setlk64(fd, cmd, (struct flock64 *) arg);
+			break;
+		case F_SETLKW64:
+			err = fcntl_setlk64(fd, cmd, (struct flock64 *) arg);
+			break;
+#endif
+
 		case F_GETOWN:
 			/*
 			 * XXX If f_owner is a process group, the
diff -urN 2.2.15pre16/fs/inode.c 2.2.15pre16aa3/fs/inode.c
--- 2.2.15pre16/fs/inode.c	Fri Jan  7 18:19:18 2000
+++ 2.2.15pre16aa3/fs/inode.c	Thu Mar 30 16:00:56 2000
@@ -435,7 +435,7 @@
  * This is the externally visible routine for
  * inode memory management.
  */
-void free_inode_memory(int goal)
+void free_inode_memory(void)
 {
 	spin_lock(&inode_lock);
 	free_inodes();
diff -urN 2.2.15pre16/fs/iobuf.c 2.2.15pre16aa3/fs/iobuf.c
--- 2.2.15pre16/fs/iobuf.c	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/fs/iobuf.c	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,236 @@
+/*
+ * iobuf.c
+ *
+ * Keep track of the general-purpose IO-buffer structures used to track
+ * abstract kernel-space io buffers.
+ * 
+ */
+
+#include <linux/iobuf.h>
+#include <linux/malloc.h>
+#include <linux/slab.h>
+#include <linux/bigmem.h>
+
+static kmem_cache_t *kiobuf_cachep;
+
+void __init kiobuf_init(void)
+{
+	kiobuf_cachep =  kmem_cache_create("kiobuf",
+					   sizeof(struct kiobuf),
+					   0,
+					   SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if(!kiobuf_cachep)
+		panic("Cannot create kernel iobuf cache\n");
+}
+
+
+int alloc_kiovec(int nr, struct kiobuf **bufp)
+{
+	int i;
+	struct kiobuf *iobuf;
+	
+	for (i = 0; i < nr; i++) {
+		iobuf = kmem_cache_alloc(kiobuf_cachep, SLAB_KERNEL);
+		if (!iobuf) {
+			free_kiovec(i, bufp);
+			return -ENOMEM;
+		}
+		
+		memset(iobuf, 0, sizeof(*iobuf));
+		iobuf->array_len  = KIO_STATIC_PAGES;
+		iobuf->pagelist   = iobuf->page_array;
+		iobuf->maplist    = iobuf->map_array;
+		iobuf->bouncelist = iobuf->bounce_array;
+		*bufp++ = iobuf;
+	}
+	
+	return 0;
+}
+
+void clear_kiobuf_bounce_pages(struct kiobuf *iobuf)
+{
+	int i;
+	
+	if (!iobuf->bounced)
+		return;
+	
+	for (i = 0; i < iobuf->nr_pages; i++) {
+		unsigned long page = iobuf->bouncelist[i];
+		if (page)
+			free_page(page);
+	}
+	iobuf->bounced = 0;
+}
+
+void free_kiovec(int nr, struct kiobuf **bufp) 
+{
+	struct kiobuf *iobuf;
+	int i;
+	
+	for (i = 0; i < nr; i++) {
+		iobuf = bufp[i];
+		clear_kiobuf_bounce_pages(iobuf);
+		if (iobuf->array_len > KIO_STATIC_PAGES) {
+			kfree (iobuf->pagelist);
+		}
+		kmem_cache_free(kiobuf_cachep, bufp[i]);
+	}
+}
+
+int expand_kiobuf(struct kiobuf *iobuf, int wanted)
+{
+	unsigned long *	pagelist, * bouncelist;
+	struct page ** maplist;
+	
+	if (iobuf->array_len >= wanted)
+		return 0;
+
+	/*
+	 * kmalloc enough space for the page, map and bounce lists all
+	 * at once. 
+	 */
+	pagelist = (unsigned long *) 
+		kmalloc(3 * wanted * sizeof(unsigned long), GFP_KERNEL);
+	if (!pagelist)
+		return -ENOMEM;
+
+	/* Did it grow while we waited? */
+	if (iobuf->array_len >= wanted) {
+		kfree(pagelist);
+		return 0;
+	}
+	
+	maplist    = (struct page **) (pagelist + wanted);
+	bouncelist = pagelist + 2 * wanted;
+
+	memcpy (pagelist, iobuf->pagelist,
+		iobuf->array_len * sizeof(unsigned long));
+	memcpy (maplist, iobuf->maplist,
+		iobuf->array_len * sizeof(struct page **));
+	memcpy (bouncelist, iobuf->bouncelist,
+		iobuf->array_len * sizeof(unsigned long));
+
+	if (iobuf->array_len > KIO_STATIC_PAGES)
+		kfree (iobuf->pagelist);
+	
+	iobuf->pagelist   = pagelist;
+	iobuf->maplist    = maplist;
+	iobuf->bouncelist = bouncelist;
+	iobuf->array_len  = wanted;
+	return 0;
+}
+
+
+/*
+ * Test whether a given page from the bounce buffer matches the given
+ * gfp_mask.  Return true if a bounce buffer is required for this
+ * page. 
+ */
+
+static inline int test_bounce_page(unsigned long page, 
+				   struct page * map,
+				   int gfp_mask)
+{
+	/* Unmapped pages from PCI memory or BIGMEM pages always need a
+	 * bounce buffer unless the caller is prepared to accept
+	 * GFP_BIGMEM pages. */
+	
+	if (!map || PageBIGMEM(map) )
+		/* Careful, the following must return the right value
+		 * even if CONFIG_BIGMEM is not set */
+		return !(gfp_mask & __GFP_BIGMEM);
+	
+	/* A DMA-able page never needs a bounce buffer */
+	if (PageDMA(map))
+		return 0;
+	
+	/* Otherwise it is a non-ISA-DMA-capable page and needs bounce
+	 * buffers if GFP_DMA is requested */
+	return gfp_mask & __GFP_DMA;
+}
+
+int setup_kiobuf_bounce_pages(struct kiobuf *iobuf, int gfp_mask)
+{
+	int i;
+	
+	clear_kiobuf_bounce_pages(iobuf);
+	
+	for (i = 0; i < iobuf->nr_pages; i++) {
+		struct page *map = iobuf->maplist[i];
+		unsigned long page = iobuf->pagelist[i];
+		unsigned long bounce_page;
+		
+		if (!test_bounce_page(page, map, gfp_mask)) {
+			iobuf->bouncelist[i] = 0;
+			continue;
+		}
+		
+		bounce_page = __get_free_page(gfp_mask);
+		if (!bounce_page)
+			goto error;
+
+		iobuf->bouncelist[i] = bounce_page;
+		iobuf->bounced = 1;
+	}
+	return 0;
+	
+ error:
+	clear_kiobuf_bounce_pages(iobuf);
+	return -ENOMEM;
+}
+
+/*
+ * Copy a bounce buffer.  For completion of partially-failed read IOs,
+ * we need to be able to place an upper limit on the data successfully
+ * transferred from bounce buffers to the user's own buffers.  
+ */
+
+void kiobuf_copy_bounce(struct kiobuf *iobuf, int direction, int max)
+{
+	int i;
+	int offset, length;
+	
+	if (!iobuf->bounced)
+		return;
+	
+	offset = iobuf->offset;
+	length = iobuf->length;
+	if (max >= 0 && length > max)
+		length = max;
+	
+	i = 0;
+
+	if (offset > PAGE_SIZE) {
+		i = (offset >> PAGE_SHIFT);
+		offset &= ~PAGE_MASK;
+	}
+	
+	for (; i < iobuf->nr_pages && length > 0; i++) {
+		unsigned long page = iobuf->pagelist[i];
+		unsigned long bounce_page = iobuf->bouncelist[i];
+		unsigned long kin, kout;
+		int pagelen = length;
+		
+		if (bounce_page) {
+			if (pagelen > PAGE_SIZE)
+				pagelen = PAGE_SIZE;
+		
+			if (direction == COPY_TO_BOUNCE) {
+				kin  = kmap(page, KM_READ);
+				kout = kmap(bounce_page, KM_WRITE);
+			} else {
+				kin  = kmap(bounce_page, KM_READ);
+				kout = kmap(page, KM_WRITE);
+			}
+			
+			memcpy((char *) (kout+offset), 
+			       (char *) (kin+offset),
+			       pagelen);
+			kunmap(kout, KM_WRITE);
+			kunmap(kin, KM_READ);
+		}
+		
+		length -= pagelen;
+		offset = 0;
+	}
+}
diff -urN 2.2.15pre16/fs/isofs/inode.c 2.2.15pre16aa3/fs/isofs/inode.c
--- 2.2.15pre16/fs/isofs/inode.c	Wed Mar 29 19:42:10 2000
+++ 2.2.15pre16aa3/fs/isofs/inode.c	Thu Mar 30 16:00:58 2000
@@ -877,7 +877,8 @@
 
 int isofs_bmap(struct inode * inode,int block)
 {
-	off_t b_off, offset, size;
+	loff_t b_off;
+	unsigned offset, size;
 	struct inode *ino;
 	unsigned int firstext;
 	unsigned long nextino;
@@ -888,7 +889,7 @@
 		return 0;
 	}
 
-	b_off = block << ISOFS_BUFFER_BITS(inode);
+	b_off = (loff_t)block << ISOFS_BUFFER_BITS(inode);
 
 	/*
 	 * If we are beyond the end of this file, don't give out any
@@ -896,7 +897,7 @@
 	 */
 	if( b_off > inode->i_size )
 	  {
-	    off_t	max_legal_read_offset;
+	    loff_t	max_legal_read_offset;
 
 	    /*
 	     * If we are *way* beyond the end of the file, print a message.
@@ -907,20 +908,21 @@
 	     * I/O errors.
 	     */
 	    max_legal_read_offset = (inode->i_size + PAGE_SIZE - 1)
-	      & ~(PAGE_SIZE - 1);
+					& ~(loff_t)(PAGE_SIZE - 1);
 	    if( b_off >= max_legal_read_offset )
 	      {
 
 		printk("_isofs_bmap: block>= EOF(%d, %ld)\n", block,
-		       inode->i_size);
+		       (u_long)((inode->i_size >> ISOFS_BUFFER_BITS(inode)) +
+				((inode->i_size & ((1 << ISOFS_BUFFER_BITS(inode))-1)) != 0)));
 	      }
 	    return 0;
 	  }
 
 	offset = 0;
 	firstext = inode->u.isofs_i.i_first_extent;
-	size = inode->u.isofs_i.i_section_size;
-	nextino = inode->u.isofs_i.i_next_section_ino;
+	size     = inode->u.isofs_i.i_section_size;
+	nextino  = inode->u.isofs_i.i_next_section_ino;
 #ifdef DEBUG
 	printk("first inode: inode=%x nextino=%x firstext=%u size=%lu\n",
 		inode->i_ino, nextino, firstext, size);
@@ -1159,7 +1161,7 @@
 
 #ifdef DEBUG
 	printk("Get inode %x: %d %d: %d\n",inode->i_ino, block,
-	       ((int)pnt) & 0x3ff, inode->i_size);
+	       ((int)pnt) & 0x3ff, (u_long)inode->i_size);
 #endif
 
 	inode->i_mtime = inode->i_atime = inode->i_ctime =
diff -urN 2.2.15pre16/fs/lockd/svclock.c 2.2.15pre16aa3/fs/lockd/svclock.c
--- 2.2.15pre16/fs/lockd/svclock.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/lockd/svclock.c	Thu Mar 30 16:00:58 2000
@@ -94,13 +94,15 @@
 	struct file_lock	*fl;
 
 	dprintk("lockd: nlmsvc_lookup_block f=%p pd=%d %ld-%ld ty=%d\n",
-				file, lock->fl.fl_pid, lock->fl.fl_start,
-				lock->fl.fl_end, lock->fl.fl_type);
+		file, lock->fl.fl_pid,
+		(u_long)lock->fl.fl_start, (u_long)lock->fl.fl_end,
+		lock->fl.fl_type);
 	for (head = &nlm_blocked; (block = *head); head = &block->b_next) {
 		fl = &block->b_call.a_args.lock.fl;
 		dprintk("       check f=%p pd=%d %ld-%ld ty=%d\n",
-				block->b_file, fl->fl_pid, fl->fl_start,
-				fl->fl_end, fl->fl_type);
+				block->b_file, fl->fl_pid,
+				(u_long)fl->fl_start, (u_long)fl->fl_end,
+				fl->fl_type);
 		if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
 			if (remove)
 				*head = block->b_next;
@@ -286,8 +288,8 @@
 				file->f_file.f_dentry->d_inode->i_dev,
 				file->f_file.f_dentry->d_inode->i_ino,
 				lock->fl.fl_type, lock->fl.fl_pid,
-				lock->fl.fl_start,
-				lock->fl.fl_end,
+				(u_long)lock->fl.fl_start,
+				(u_long)lock->fl.fl_end,
 				wait);
 
 	/* Lock file against concurrent access */
@@ -359,12 +361,13 @@
 				file->f_file.f_dentry->d_inode->i_dev,
 				file->f_file.f_dentry->d_inode->i_ino,
 				lock->fl.fl_type,
-				lock->fl.fl_start,
-				lock->fl.fl_end);
+				(u_long)lock->fl.fl_start,
+				(u_long)lock->fl.fl_end);
 
 	if ((fl = posix_test_lock(&file->f_file, &lock->fl)) != NULL) {
 		dprintk("lockd: conflicting lock(ty=%d, %ld-%ld)\n",
-				fl->fl_type, fl->fl_start, fl->fl_end);
+				fl->fl_type,
+				(u_long)fl->fl_start, (u_long)fl->fl_end);
 		conflock->caller = "somehost";	/* FIXME */
 		conflock->oh.len = 0;		/* don't return OH info */
 		conflock->fl = *fl;
@@ -390,8 +393,8 @@
 				file->f_file.f_dentry->d_inode->i_dev,
 				file->f_file.f_dentry->d_inode->i_ino,
 				lock->fl.fl_pid,
-				lock->fl.fl_start,
-				lock->fl.fl_end);
+				(u_long)lock->fl.fl_start,
+				(u_long)lock->fl.fl_end);
 
 	/* First, cancel any lock that might be there */
 	nlmsvc_cancel_blocked(file, lock);
@@ -418,8 +421,8 @@
 				file->f_file.f_dentry->d_inode->i_dev,
 				file->f_file.f_dentry->d_inode->i_ino,
 				lock->fl.fl_pid,
-				lock->fl.fl_start,
-				lock->fl.fl_end);
+				(u_long)lock->fl.fl_start,
+				(u_long)lock->fl.fl_end);
 
 	down(&file->f_sema);
 	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
diff -urN 2.2.15pre16/fs/lockd/xdr.c 2.2.15pre16aa3/fs/lockd/xdr.c
--- 2.2.15pre16/fs/lockd/xdr.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/lockd/xdr.c	Thu Mar 30 16:00:58 2000
@@ -142,7 +142,7 @@
 	fl->fl_pid   = ntohl(*p++);
 	fl->fl_flags = FL_POSIX;
 	fl->fl_type  = F_RDLCK;		/* as good as anything else */
-	fl->fl_start = ntohl(*p++);
+	fl->fl_start = (u_long)ntohl(*p++);
 	len = ntohl(*p++);
 	if (len == 0 || (fl->fl_end = fl->fl_start + len - 1) < 0)
 		fl->fl_end = NLM_OFFSET_MAX;
@@ -163,11 +163,11 @@
 		return NULL;
 
 	*p++ = htonl(fl->fl_pid);
-	*p++ = htonl(lock->fl.fl_start);
+	*p++ = htonl((u_long)lock->fl.fl_start);
 	if (lock->fl.fl_end == NLM_OFFSET_MAX)
 		*p++ = xdr_zero;
 	else
-		*p++ = htonl(lock->fl.fl_end - lock->fl.fl_start + 1);
+		*p++ = htonl((u_long)(lock->fl.fl_end - lock->fl.fl_start + 1));
 
 	return p;
 }
@@ -192,11 +192,11 @@
 		if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
 			return 0;
 
-		*p++ = htonl(fl->fl_start);
+		*p++ = htonl((u_long)fl->fl_start);
 		if (fl->fl_end == NLM_OFFSET_MAX)
 			*p++ = xdr_zero;
 		else
-			*p++ = htonl(fl->fl_end - fl->fl_start + 1);
+			*p++ = htonl((u_long)(fl->fl_end - fl->fl_start + 1));
 	}
 
 	return p;
@@ -425,7 +425,7 @@
 
 		fl->fl_flags = FL_POSIX;
 		fl->fl_type  = excl? F_WRLCK : F_RDLCK;
-		fl->fl_start = ntohl(*p++);
+		fl->fl_start = (u_long)ntohl(*p++);
 		len = ntohl(*p++);
 		if (len == 0 || (fl->fl_end = fl->fl_start + len - 1) < 0)
 			fl->fl_end = NLM_OFFSET_MAX;
diff -urN 2.2.15pre16/fs/locks.c 2.2.15pre16aa3/fs/locks.c
--- 2.2.15pre16/fs/locks.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/locks.c	Thu Mar 30 16:00:58 2000
@@ -111,12 +111,12 @@
 
 #include <asm/uaccess.h>
 
-#define OFFSET_MAX	((off_t)LONG_MAX)	/* FIXME: move elsewhere? */
+#define OFFSET_MAX	((loff_t)((~0ULL)>>1))	/* FIXME: move elsewhere? */
 
 static int flock_make_lock(struct file *filp, struct file_lock *fl,
 			       unsigned int cmd);
 static int posix_make_lock(struct file *filp, struct file_lock *fl,
-			       struct flock *l);
+			       struct flock64 *l);
 static int flock_locks_conflict(struct file_lock *caller_fl,
 				struct file_lock *sys_fl);
 static int posix_locks_conflict(struct file_lock *caller_fl,
@@ -195,7 +195,7 @@
 
 	if (waiter->fl_prevblock) {
 		printk(KERN_ERR "locks_insert_block: remove duplicated lock "
-			"(pid=%d %ld-%ld type=%d)\n",
+			"(pid=%d %Ld-%Ld type=%d)\n",
 			waiter->fl_pid, waiter->fl_start,
 			waiter->fl_end, waiter->fl_type);
 		locks_delete_block(waiter->fl_prevblock, waiter);
@@ -323,11 +323,15 @@
 {
 	struct file *filp;
 	struct file_lock *fl,file_lock;
-	struct flock flock;
-	int error;
+	int error = -EFAULT;
+	struct flock64 flock;
 
-	error = -EFAULT;
-	if (copy_from_user(&flock, l, sizeof(flock)))
+	if (verify_area(VERIFY_READ, l, sizeof(*l))
+	    || __get_user(flock.l_type, &l->l_type)
+	    || __get_user(flock.l_whence, &l->l_whence)
+	    || __get_user(flock.l_start, &l->l_start)
+	    || __get_user(flock.l_len, &l->l_len)
+	    || __get_user(flock.l_pid, &l->l_pid))
 		goto out;
 	error = -EINVAL;
 	if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
@@ -349,11 +353,7 @@
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
 		if (error < 0)
 			goto out_putf;
-		else if (error == LOCK_USE_CLNT)
-		  /* Bypass for NFS with no locking - 2.0.36 compat */
-		  fl = posix_test_lock(filp, &file_lock);
-		else
-		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
+		fl = &file_lock;
 	} else {
 		fl = posix_test_lock(filp, &file_lock);
 	}
@@ -367,9 +367,24 @@
 		flock.l_whence = 0;
 		flock.l_type = fl->fl_type;
 	}
-	error = -EFAULT;
-	if (!copy_to_user(l, &flock, sizeof(flock)))
-		error = 0;
+
+	/* Convert to 32-bit offsets (at 32-bit systems) */
+
+	if (!off_t_presentable(flock.l_start) ||
+	    !off_t_presentable(flock.l_len) ||
+	    !off_t_presentable(flock.l_start + flock.l_len)) {
+		error = -EOVERFLOW;
+		goto out_putf;
+	}
+
+	error = 0;
+	if (verify_area(VERIFY_WRITE, l, sizeof(*l))
+	    || __put_user(flock.l_type, &l->l_type)
+	    || __put_user(flock.l_whence, &l->l_whence)
+	    || __put_user(flock.l_start, &l->l_start)
+	    || __put_user(flock.l_len, &l->l_len)
+	    || __put_user(flock.l_pid, &l->l_pid))
+		error = -EFAULT;
   
 out_putf:
 	fput(filp);
@@ -384,7 +399,7 @@
 {
 	struct file *filp;
 	struct file_lock file_lock;
-	struct flock flock;
+	struct flock64 flock;
 	struct dentry * dentry;
 	struct inode *inode;
 	int error;
@@ -393,7 +408,12 @@
 	 * This might block, so we do it before checking the inode.
 	 */
 	error = -EFAULT;
-	if (copy_from_user(&flock, l, sizeof(flock)))
+	if (verify_area(VERIFY_READ, l, sizeof(*l))
+	    || __get_user(flock.l_type, &l->l_type)
+	    || __get_user(flock.l_whence, &l->l_whence)
+	    || __get_user(flock.l_start, &l->l_start)
+	    || __get_user(flock.l_len, &l->l_len)
+	    || __get_user(flock.l_pid, &l->l_pid))
 		goto out;
 
 	/* Get arguments and validate them ...
@@ -475,6 +495,152 @@
 	return error;
 }
 
+#if BITS_PER_LONG == 32
+/* Report the first existing lock that would conflict with l.
+ * This implements the F_GETLK command of fcntl().
+ */
+int fcntl_getlk64(unsigned int fd, struct flock64 *l)
+{
+	struct file *filp;
+	struct file_lock *fl,file_lock;
+	struct flock64 flock;
+	int error;
+
+	error = -EFAULT;
+	if (copy_from_user(&flock, l, sizeof(flock)))
+		goto out;
+	error = -EINVAL;
+	if ((flock.l_type != F_RDLCK) && (flock.l_type != F_WRLCK))
+		goto out;
+
+	error = -EBADF;
+	filp = fget(fd);
+	if (!filp)
+		goto out;
+
+	error = -EINVAL;
+	if (!filp->f_dentry || !filp->f_dentry->d_inode)
+		goto out_putf;
+
+	if (!posix_make_lock(filp, &file_lock, &flock))
+		goto out_putf;
+
+	if (filp->f_op->lock) {
+		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
+		if (error < 0)
+			goto out_putf;
+		else if (error == LOCK_USE_CLNT)
+		  /* Bypass for NFS with no locking - 2.0.36 compat */
+		  fl = posix_test_lock(filp, &file_lock);
+		else
+		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
+	} else {
+		fl = posix_test_lock(filp, &file_lock);
+	}
+ 
+	flock.l_type = F_UNLCK;
+	if (fl != NULL) {
+		flock.l_pid = fl->fl_pid;
+		flock.l_start = fl->fl_start;
+		flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
+			fl->fl_end - fl->fl_start + 1;
+		flock.l_whence = 0;
+		flock.l_type = fl->fl_type;
+	}
+	error = -EFAULT;
+	if (!copy_to_user(l, &flock, sizeof(flock)))
+		error = 0;
+  
+out_putf:
+	fput(filp);
+out:
+	return error;
+}
+
+/* Apply the lock described by l to an open file descriptor.
+ * This implements both the F_SETLK and F_SETLKW commands of fcntl().
+ */
+int fcntl_setlk64(unsigned int fd, unsigned int cmd, struct flock64 *l)
+{
+	struct file *filp;
+	struct file_lock file_lock;
+	struct flock64 flock;
+	struct dentry * dentry;
+	struct inode *inode;
+	int error;
+
+	/*
+	 * This might block, so we do it before checking the inode.
+	 */
+	error = -EFAULT;
+	if (copy_from_user(&flock, l, sizeof(flock)))
+		goto out;
+
+	/* Get arguments and validate them ...
+	 */
+
+	error = -EBADF;
+	filp = fget(fd);
+	if (!filp)
+		goto out;
+
+	error = -EINVAL;
+	if (!(dentry = filp->f_dentry))
+		goto out_putf;
+	if (!(inode = dentry->d_inode))
+		goto out_putf;
+
+	/* Don't allow mandatory locks on files that may be memory mapped
+	 * and shared.
+	 */
+	if (IS_MANDLOCK(inode) &&
+	    (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
+	    inode->i_mmap) {
+		struct vm_area_struct *vma = inode->i_mmap;
+		error = -EAGAIN;
+		do {
+			if (vma->vm_flags & VM_MAYSHARE)
+				goto out_putf;
+		} while ((vma = vma->vm_next_share) != NULL);
+	}
+
+	error = -EINVAL;
+	if (!posix_make_lock(filp, &file_lock, &flock))
+		goto out_putf;
+	
+	error = -EBADF;
+	switch (flock.l_type) {
+	case F_RDLCK:
+		if (!(filp->f_mode & FMODE_READ))
+			goto out_putf;
+		break;
+	case F_WRLCK:
+		if (!(filp->f_mode & FMODE_WRITE))
+			goto out_putf;
+		break;
+	case F_UNLCK:
+		break;
+	case F_SHLCK:
+	case F_EXLCK:
+	default:
+		error = -EINVAL;
+		goto out_putf;
+	}
+
+	if (filp->f_op->lock != NULL) {
+		error = filp->f_op->lock(filp, cmd, &file_lock);
+		if (error < 0)
+			goto out_putf;
+	}
+	error = posix_lock_file(filp, &file_lock, cmd == F_SETLKW);
+
+out_putf:
+	fput(filp);
+out:
+	return error;
+}
+#endif /* BITS_PER_LONG == 32 */
+
 /*
  * This function is called when the file is being removed
  * from the task's fd array.
@@ -654,9 +820,9 @@
  * style lock.
  */
 static int posix_make_lock(struct file *filp, struct file_lock *fl,
-			   struct flock *l)
+			   struct flock64 *l)
 {
-	off_t start;
+	loff_t start;
 
 	memset(fl, 0, sizeof(*fl));
 	
@@ -1209,8 +1375,9 @@
 	p += sprintf(p, "%s ", (fl->fl_type == F_RDLCK) ? "READ " : "WRITE");
 	p += sprintf(p, "%d %s:%ld %ld %ld ",
 		     fl->fl_pid,
-		     kdevname(inode->i_dev), inode->i_ino, fl->fl_start,
-		     fl->fl_end);
+		     kdevname(inode->i_dev), inode->i_ino,
+		     (u_long)fl->fl_start,
+		     (u_long)fl->fl_end);
 	sprintf(p, "%08lx %08lx %08lx %08lx %08lx\n",
 		(long)fl, (long)fl->fl_prevlink, (long)fl->fl_nextlink,
 		(long)fl->fl_next, (long)fl->fl_nextblock);
@@ -1271,6 +1438,3 @@
 		*start = buffer;
 	return (q - buffer);
 }
-
-
-
diff -urN 2.2.15pre16/fs/minix/file.c 2.2.15pre16aa3/fs/minix/file.c
--- 2.2.15pre16/fs/minix/file.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/minix/file.c	Thu Mar 30 16:00:58 2000
@@ -70,7 +70,7 @@
 				size_t count, loff_t *ppos)
 {
 	struct inode * inode = filp->f_dentry->d_inode;
-	off_t pos;
+	loff_t pos;
 	ssize_t written, c;
 	struct buffer_head * bh;
 	char * p;
@@ -87,6 +87,24 @@
 		pos = inode->i_size;
 	else
 		pos = *ppos;
+
+	/* L-F-S spec 2.2.1.27: */
+	if (!(filp->f_flags & O_LARGEFILE)) {
+		if (pos >= 0x7ffffffeULL) /* pos@2G forbidden */
+			return -EFBIG;
+
+		if (pos + count >= 0x7fffffffULL)
+			/* Write only until end of allowed region */
+			count = 0x7fffffffULL - pos;
+	}
+	/* MINIX i-node file-size can't exceed 4G-1 */
+	/* With 1k blocks and triple indirection MINIX can have files
+	   up to 16 GB in size -- filesystem maximum is then 4G*1k = 4T */
+	if (pos >= 0xffffffffULL)
+		return -EFBIG; /* Absolutely too much! */
+	if ((pos + count) >= 0x100000000ULL) /* too much to write! */
+		count = 0xffffffffULL - pos;
+
 	written = 0;
 	while (written < count) {
 		bh = minix_getblk(inode,pos/BLOCK_SIZE,1);
diff -urN 2.2.15pre16/fs/ncpfs/file.c 2.2.15pre16aa3/fs/ncpfs/file.c
--- 2.2.15pre16/fs/ncpfs/file.c	Wed Mar 29 19:42:10 2000
+++ 2.2.15pre16aa3/fs/ncpfs/file.c	Thu Mar 30 16:00:58 2000
@@ -17,6 +17,7 @@
 #include <linux/mm.h>
 #include <linux/locks.h>
 #include <linux/malloc.h>
+#include <linux/unistd.h>
 
 #include <linux/ncp_fs.h>
 #include "ncplib_kernel.h"
@@ -161,7 +162,7 @@
 	/* First read in as much as possible for each bufsize. */
 	while (already_read < count) {
 		int read_this_time;
-		size_t to_read = min(bufsize - (pos % bufsize),
+		size_t to_read = min(bufsize - (pos & (bufsize-1)),
 				  count - already_read);
 
 		error = ncp_read_bounce(NCP_SERVER(inode),
@@ -201,7 +202,7 @@
 	struct dentry *dentry = file->f_dentry;
 	struct inode *inode = dentry->d_inode;
 	size_t already_written = 0;
-	off_t pos;
+	loff_t pos;
 	size_t bufsize;
 	int errno;
 	void* bouncebuffer;
@@ -238,12 +239,18 @@
 
 	already_written = 0;
 
+	/* Maximum file size: 2G-1 */
+	if (pos >= 0x7fffffffULL)
+		return -EFBIG;
+	if ((pos + count) >= 0x7fffffffULL)
+		count = 0x7fffffffULL - pos;
+
 	bouncebuffer = kmalloc(bufsize, GFP_NFS);
 	if (!bouncebuffer)
 		return -EIO;	/* -ENOMEM */
 	while (already_written < count) {
 		int written_this_time;
-		size_t to_write = min(bufsize - (pos % bufsize),
+		size_t to_write = min(bufsize - (pos & (bufsize-1)),
 				   count - already_written);
 
 		if (copy_from_user(bouncebuffer, buf, to_write)) {
diff -urN 2.2.15pre16/fs/ncpfs/inode.c 2.2.15pre16aa3/fs/ncpfs/inode.c
--- 2.2.15pre16/fs/ncpfs/inode.c	Wed Mar 29 19:42:10 2000
+++ 2.2.15pre16aa3/fs/ncpfs/inode.c	Thu Mar 30 16:00:58 2000
@@ -132,7 +132,7 @@
 	}
 	inode->i_blocks = 0;
 	if ((inode->i_size)&&(inode->i_blksize)) {
-		inode->i_blocks = (inode->i_size-1)/(inode->i_blksize)+1;
+		inode->i_blocks = ((inode->i_size-1) >> fslog2(inode->i_blksize)) +1;
 	}
 
 	inode->i_mtime = ncp_date_dos2unix(le16_to_cpu(nwi->modifyTime),
@@ -203,8 +203,7 @@
 
 	inode->i_blocks = 0;
 	if ((inode->i_blksize != 0) && (inode->i_size != 0)) {
-		inode->i_blocks =
-		    (inode->i_size - 1) / inode->i_blksize + 1;
+		inode->i_blocks = ((inode->i_size - 1) >> fslog2(inode->i_blksize)) + 1;
 	}
 
 	inode->i_mtime = ncp_date_dos2unix(le16_to_cpu(nwi->modifyTime),
diff -urN 2.2.15pre16/fs/nfs/file.c 2.2.15pre16aa3/fs/nfs/file.c
--- 2.2.15pre16/fs/nfs/file.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/nfs/file.c	Thu Mar 30 16:00:58 2000
@@ -114,6 +114,11 @@
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		(unsigned long) count, (unsigned long) *ppos);
 
+	/* Unconditionally allow only up to 2G files */
+	/* FIXME: NFSv3 could allow 64-bit file offsets! */
+	if (*ppos >= 0x7ffffffeULL)
+		return -EOVERFLOW;
+
 	result = nfs_revalidate_inode(NFS_DSERVER(dentry), dentry);
 	if (!result)
 		result = generic_file_read(file, buf, count, ppos);
@@ -178,6 +183,13 @@
 	if (result)
 		goto out;
 
+	/* Unconditionally allow only up to 2G files */
+	/* FIXME: NFSv3 could allow 64-bit file offsets! */
+	if (*ppos >= 0x7ffffffeULL) {
+		result = -EOVERFLOW;
+		goto out;
+	}
+
 	result = count;
 	if (!count)
 		goto out;
@@ -203,7 +215,7 @@
 	dprintk("NFS: nfs_lock(f=%4x/%ld, t=%x, fl=%x, r=%ld:%ld)\n",
 			inode->i_dev, inode->i_ino,
 			fl->fl_type, fl->fl_flags,
-			fl->fl_start, fl->fl_end);
+			(u_long)fl->fl_start, (u_long)fl->fl_end);
 
 	if (!inode)
 		return -EINVAL;
diff -urN 2.2.15pre16/fs/nfs/inode.c 2.2.15pre16aa3/fs/nfs/inode.c
--- 2.2.15pre16/fs/nfs/inode.c	Wed Mar 29 19:42:10 2000
+++ 2.2.15pre16aa3/fs/nfs/inode.c	Thu Mar 30 16:00:58 2000
@@ -700,7 +700,7 @@
 	 */
 	if (sattr.size != (u32) -1) {
 		if (sattr.size != fattr.size)
-			printk("nfs_notify_change: sattr=%d, fattr=%d??\n",
+			printk("nfs_notify_change: sattr=%Ld, fattr=%Ld??\n",
 				sattr.size, fattr.size);
 		inode->i_size  = sattr.size;
 		inode->i_mtime = fattr.mtime.seconds;
diff -urN 2.2.15pre16/fs/nfs/proc.c 2.2.15pre16aa3/fs/nfs/proc.c
--- 2.2.15pre16/fs/nfs/proc.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/nfs/proc.c	Thu Mar 30 16:00:58 2000
@@ -110,14 +110,14 @@
 
 int
 nfs_proc_read(struct nfs_server *server, struct nfs_fh *fhandle, int swap,
-			  unsigned long offset, unsigned int count,
-			  void *buffer, struct nfs_fattr *fattr)
+	      loff_t offset, unsigned int count,
+	      void *buffer, struct nfs_fattr *fattr)
 {
 	struct nfs_readargs	arg = { fhandle, offset, count, buffer };
 	struct nfs_readres	res = { fattr, count };
 	int			status;
 
-	dprintk("NFS call  read %d @ %ld\n", count, offset);
+	dprintk("NFS call  read %d @ %Ld\n", count, offset);
 	status = rpc_call(server->client, NFSPROC_READ, &arg, &res,
 			swap? NFS_RPC_SWAPFLAGS : 0);
 	dprintk("NFS reply read: %d\n", status);
@@ -126,13 +126,13 @@
 
 int
 nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle, int swap,
-			unsigned long offset, unsigned int count,
-			const void *buffer, struct nfs_fattr *fattr)
+	       loff_t offset, unsigned int count,
+	       const void *buffer, struct nfs_fattr *fattr)
 {
 	struct nfs_writeargs	arg = { fhandle, offset, count, buffer };
 	int			status;
 
-	dprintk("NFS call  write %d @ %ld\n", count, offset);
+	dprintk("NFS call  write %d @ %Ld\n", count, offset);
 	status = rpc_call(server->client, NFSPROC_WRITE, &arg, fattr,
 			swap? (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) : 0);
 	dprintk("NFS reply read: %d\n", status);
diff -urN 2.2.15pre16/fs/nfs/read.c 2.2.15pre16aa3/fs/nfs/read.c
--- 2.2.15pre16/fs/nfs/read.c	Wed Mar 29 19:42:10 2000
+++ 2.2.15pre16aa3/fs/nfs/read.c	Thu Mar 30 16:00:58 2000
@@ -52,7 +52,7 @@
  */
 static inline void
 nfs_readreq_setup(struct nfs_rreq *req, struct nfs_fh *fh,
-		  unsigned long offset, void *buffer, unsigned int rsize)
+		  loff_t offset, void *buffer, unsigned int rsize)
 {
 	req->ra_args.fh     = fh;
 	req->ra_args.offset = offset;
@@ -70,12 +70,12 @@
 nfs_readpage_sync(struct dentry *dentry, struct inode *inode, struct page *page)
 {
 	struct nfs_rreq	rqst;
-	unsigned long	offset = page->offset;
-	char		*buffer = (char *) page_address(page);
-	int		rsize = NFS_SERVER(inode)->rsize;
-	int		result, refresh = 0;
-	int		count = PAGE_SIZE;
-	int		flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;
+	loff_t	offset = pgoff2loff(page->index);
+	char	*buffer = (char *) page_address(page);
+	int	rsize = NFS_SERVER(inode)->rsize;
+	int	result = 0, refresh = 0;
+	int	count = PAGE_SIZE;
+	int	flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;
 
 	dprintk("NFS: nfs_readpage_sync(%p)\n", page);
 	clear_bit(PG_error, &page->flags);
@@ -84,13 +84,28 @@
 		if (count < rsize)
 			rsize = count;
 
-		dprintk("NFS: nfs_proc_read(%s, (%s/%s), %ld, %d, %p)\n",
+		dprintk("NFS: nfs_proc_read(%s, (%s/%s), %Ld, %d, %p)\n",
 			NFS_SERVER(inode)->hostname,
 			dentry->d_parent->d_name.name, dentry->d_name.name,
 			offset, rsize, buffer);
 
+		/* FIXME: NFSv3 could allow 64-bit offsets! ... */
+
+		if (offset > 0x7ffffffeULL) {
+		  if (result)
+		  	break;
+		  result = -EOVERFLOW;
+		  goto io_error;
+		}
+		if ((offset + rsize) > 0x7fffffffULL) /* 2G-1 */
+		  rsize = 0x7fffffffULL - offset;
+
+		/* ... END FIXME! */
+
 		/* Set up arguments and perform rpc call */
-		nfs_readreq_setup(&rqst, NFS_FH(dentry), offset, buffer, rsize);
+		nfs_readreq_setup(&rqst, NFS_FH(dentry), offset,
+				  buffer, rsize);
+
 		result = rpc_call(NFS_CLIENT(inode), NFSPROC_READ,
 					&rqst.ra_args, &rqst.ra_res, flags);
 
@@ -173,8 +188,16 @@
 	unsigned long address = page_address(page);
 	struct nfs_rreq	*req;
 	int		result = -1, flags;
+	loff_t	loffset = pgoff2loff(page->index);
 
 	dprintk("NFS: nfs_readpage_async(%p)\n", page);
+
+	/* FIXME: NFSv3 allows 64-bit offsets.. */
+	if ((loffset + PAGE_SIZE) >= 0x7fffffffULL) {
+	  dprintk("NFS: Async read beyond 2G-1 marker!\n");
+	  return -EOVERFLOW;
+	}
+
 	if (NFS_CONGESTED(inode))
 		goto out_defer;
 
@@ -186,8 +209,9 @@
 
 	/* Initialize request */
 	/* N.B. Will the dentry remain valid for life of request? */
-	nfs_readreq_setup(req, NFS_FH(dentry), page->offset,
-				(void *) address, PAGE_SIZE);
+	nfs_readreq_setup(req, NFS_FH(dentry), loffset,
+			  (void *) address, PAGE_SIZE);
+
 	req->ra_inode = inode;
 	req->ra_page = page; /* count has been incremented by caller */
 
@@ -230,8 +254,8 @@
 	struct inode *inode = dentry->d_inode;
 	int		error;
 
-	dprintk("NFS: nfs_readpage (%p %ld@%ld)\n",
-		page, PAGE_SIZE, page->offset);
+	dprintk("NFS: nfs_readpage (%p %ld@%Ld)\n",
+		page, PAGE_SIZE, pgoff2loff(page->index));
 	atomic_inc(&page->count);
 	set_bit(PG_locked, &page->flags);
 
diff -urN 2.2.15pre16/fs/nfs/write.c 2.2.15pre16aa3/fs/nfs/write.c
--- 2.2.15pre16/fs/nfs/write.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/nfs/write.c	Thu Mar 30 16:00:58 2000
@@ -86,26 +86,43 @@
  */
 static int
 nfs_writepage_sync(struct dentry *dentry, struct inode *inode,
-		struct page *page, unsigned long offset, unsigned int count)
+		   struct page *page, unsigned int offset, unsigned int count)
 {
 	unsigned int	wsize = NFS_SERVER(inode)->wsize;
 	int		result, refresh = 0, written = 0;
 	u8		*buffer;
 	struct nfs_fattr fattr;
+	loff_t		loffset = pgoff2loff(page->index)+offset;
 
-	dprintk("NFS:      nfs_writepage_sync(%s/%s %d@%ld)\n",
+	dprintk("NFS:      nfs_writepage_sync(%s/%s %d@%Ld)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
-		count, page->offset + offset);
+		count, loffset);
 
 	buffer = (u8 *) page_address(page) + offset;
-	offset += page->offset;
+
+	/* FIXME: NFSv3 !!! */
+#if 1
+	if (loffset >= 0x7ffffffeULL)
+	  return -EFBIG;
+	if (loffset + count >= 0x7fffffffULL) {
+	  /* At MOST this much! */
+	  count = 0x7fffffffULL - loffset;
+	}
+#else
+	if (S_ISREG(inode->i_flags) &&
+	    !(dentry->d_file->f_flags & O_LARGEFILE) &&
+	    (loffset >= 0x7ffffffeULL ||
+	     loffset + count >= 0x7fffffffULL) {
+	  /* Writing beyond LargeFile maximums without O_LARGEFILE */
+	}
+#endif
 
 	do {
 		if (count < wsize && !IS_SWAPFILE(inode))
 			wsize = count;
 
 		result = nfs_proc_write(NFS_DSERVER(dentry), NFS_FH(dentry),
-					IS_SWAPFILE(inode), offset, wsize,
+					IS_SWAPFILE(inode), loffset, wsize,
 					buffer, &fattr);
 
 		if (result < 0) {
@@ -118,15 +135,15 @@
 			wsize, result);
 		refresh = 1;
 		buffer  += wsize;
-		offset  += wsize;
+		loffset += wsize;
 		written += wsize;
 		count   -= wsize;
 		/*
 		 * If we've extended the file, update the inode
 		 * now so we don't invalidate the cache.
 		 */
-		if (offset > inode->i_size)
-			inode->i_size = offset;
+		if (loffset > inode->i_size)
+			inode->i_size = loffset;
 	} while (count);
 
 io_error:
@@ -271,7 +288,7 @@
 
 	dprintk("NFS:      create_write_request(%s/%s, %ld+%d)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
-		page->offset + offset, bytes);
+		(u_long)pgoff2loff(page->index) + offset, bytes);
 
 	/* FIXME: Enforce hard limit on number of concurrent writes? */
 	wreq = (struct nfs_wreq *) kmalloc(sizeof(*wreq), GFP_KERNEL);
@@ -417,7 +434,7 @@
 
 	dprintk("NFS:      nfs_updatepage(%s/%s %d@%ld, sync=%d)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
-		count, page->offset+offset, sync);
+		count, (u_long)pgoff2loff(page->index)+offset, sync);
 
 	/*
 	 * Try to find a corresponding request on the writeback queue.
@@ -603,7 +620,7 @@
 	/* Setup the task struct for a writeback call */
 	req->wb_flags |= NFS_WRITE_INPROGRESS;
 	req->wb_args.fh     = NFS_FH(dentry);
-	req->wb_args.offset = page->offset + req->wb_offset;
+	req->wb_args.offset = pgoff2loff(page->index) + req->wb_offset;
 	req->wb_args.count  = req->wb_bytes;
 	req->wb_args.buffer = (void *) (page_address(page) + req->wb_offset);
 
diff -urN 2.2.15pre16/fs/nfsd/vfs.c 2.2.15pre16aa3/fs/nfsd/vfs.c
--- 2.2.15pre16/fs/nfsd/vfs.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/nfsd/vfs.c	Thu Mar 30 16:00:58 2000
@@ -503,8 +503,9 @@
 	/* Write back readahead params */
 	if (ra != NULL) {
 		dprintk("nfsd: raparms %ld %ld %ld %ld %ld\n",
-			file.f_reada, file.f_ramax, file.f_raend,
-			file.f_ralen, file.f_rawin);
+			(u_long)file.f_reada, (u_long)file.f_ramax,
+			(u_long)file.f_raend, (u_long)file.f_ralen,
+			(u_long)file.f_rawin);
 		ra->p_reada = file.f_reada;
 		ra->p_ramax = file.f_ramax;
 		ra->p_raend = file.f_raend;
diff -urN 2.2.15pre16/fs/ntfs/fs.c 2.2.15pre16aa3/fs/ntfs/fs.c
--- 2.2.15pre16/fs/ntfs/fs.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/ntfs/fs.c	Thu Mar 30 16:00:58 2000
@@ -818,6 +818,7 @@
 	struct statfs fs;
 	struct inode *mft;
 	ntfs_volume *vol;
+	ntfs_u64 size;
 	int error;
 
 	ntfs_debug(DEBUG_OTHER, "ntfs_statfs\n");
@@ -826,16 +827,17 @@
 	fs.f_type=NTFS_SUPER_MAGIC;
 	fs.f_bsize=vol->clustersize;
 
-	error = ntfs_get_volumesize( NTFS_SB2VOL( sb ), &fs.f_blocks );
+	error = ntfs_get_volumesize( NTFS_SB2VOL( sb ), &size );
 	if( error )
 		return -error;
+	fs.f_blocks = size;
 	fs.f_bfree=ntfs_get_free_cluster_count(vol->bitmap);
 	fs.f_bavail=fs.f_bfree;
 
 	/* Number of files is limited by free space only, so we lie here */
 	fs.f_ffree=0;
 	mft=iget(sb,FILE_MFT);
-	fs.f_files=mft->i_size/vol->mft_recordsize;
+	fs.f_files = (long)mft->i_size / vol->mft_recordsize;
 	iput(mft);
 
 	/* should be read from volume */
diff -urN 2.2.15pre16/fs/ntfs/super.c 2.2.15pre16aa3/fs/ntfs/super.c
--- 2.2.15pre16/fs/ntfs/super.c	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/ntfs/super.c	Thu Mar 30 16:00:58 2000
@@ -253,7 +253,7 @@
  * Writes the volume size into vol_size. Returns 0 if successful
  * or error.
  */
-int ntfs_get_volumesize(ntfs_volume *vol, long *vol_size )
+int ntfs_get_volumesize(ntfs_volume *vol, ntfs_u64 *vol_size )
 {
 	ntfs_io io;
 	ntfs_u64 size;
@@ -274,9 +274,7 @@
 	ntfs_getput_clusters(vol,0,0,&io);
 	size=NTFS_GETU64(cluster0+0x28);
 	ntfs_free(cluster0);
-	/* FIXME: more than 2**32 cluster */
-	/* FIXME: gcc will emit udivdi3 if we don't truncate it */
-	*vol_size = ((unsigned long)size)/vol->clusterfactor;
+	*vol_size = size;
 	return 0;
 }
 
diff -urN 2.2.15pre16/fs/ntfs/super.h 2.2.15pre16aa3/fs/ntfs/super.h
--- 2.2.15pre16/fs/ntfs/super.h	Mon Jan 17 16:44:42 2000
+++ 2.2.15pre16aa3/fs/ntfs/super.h	Thu Mar 30 16:00:58 2000
@@ -10,7 +10,7 @@
 #define ALLOC_REQUIRE_SIZE     2
 
 int ntfs_get_free_cluster_count(ntfs_inode *bitmap);
-int ntfs_get_volumesize(ntfs_volume *vol, long *vol_size );
+int ntfs_get_volumesize(ntfs_volume *vol, ntfs_u64 *vol_size );
 int ntfs_init_volume(ntfs_volume *vol,char *boot);
 int ntfs_load_special_files(ntfs_volume *vol);
 int ntfs_release_volume(ntfs_volume *vol);
diff -urN 2.2.15pre16/fs/open.c 2.2.15pre16aa3/fs/open.c
--- 2.2.15pre16/fs/open.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/open.c	Thu Mar 30 16:00:58 2000
@@ -12,7 +12,7 @@
 
 #include <asm/uaccess.h>
 
-asmlinkage int sys_statfs(const char * path, struct statfs * buf)
+asmlinkage long sys_statfs(const char * path, struct statfs * buf)
 {
 	struct dentry * dentry;
 	int error;
@@ -34,7 +34,7 @@
 	return error;
 }
 
-asmlinkage int sys_fstatfs(unsigned int fd, struct statfs * buf)
+asmlinkage long sys_fstatfs(unsigned int fd, struct statfs * buf)
 {
 	struct file * file;
 	struct inode * inode;
@@ -63,15 +63,16 @@
 	return error;
 }
 
-int do_truncate(struct dentry *dentry, unsigned long length)
+int do_truncate(struct dentry *dentry, loff_t length)
 {
 	struct inode *inode = dentry->d_inode;
 	int error;
 	struct iattr newattrs;
 
-	/* Not pretty: "inode->i_size" shouldn't really be "off_t". But it is. */
-	if ((off_t) length < 0)
-		return -EINVAL;
+	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
+	error = -EINVAL;
+	if (length < 0)
+		goto out;
 
 	down(&inode->i_sem);
 	newattrs.ia_size = length;
@@ -84,15 +85,20 @@
 			inode->i_op->truncate(inode);
 	}
 	up(&inode->i_sem);
+out:
 	return error;
 }
 
-asmlinkage int sys_truncate(const char * path, unsigned long length)
+static inline long do_sys_truncate(const char * path, loff_t length)
 {
 	struct dentry * dentry;
 	struct inode * inode;
 	int error;
 
+	error = -EINVAL;
+	if (length < 0)
+		goto out_nolock;
+
 	lock_kernel();
 	dentry = namei(path);
 
@@ -133,10 +139,16 @@
 	dput(dentry);
 out:
 	unlock_kernel();
+out_nolock:
 	return error;
 }
 
-asmlinkage int sys_ftruncate(unsigned int fd, unsigned long length)
+asmlinkage long sys_truncate(const char * path, unsigned long length)
+{
+	return do_sys_truncate(path, length);
+}
+
+static inline long do_sys_ftruncate(unsigned int fd, loff_t length)
 {
 	struct inode * inode;
 	struct dentry *dentry;
@@ -171,6 +183,24 @@
 	return error;
 }
 
+asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
+{
+	return do_sys_ftruncate(fd, length);
+}
+
+/* LFS versions of truncate are only needed on 32 bit machines */
+#if BITS_PER_LONG == 32
+asmlinkage long sys_truncate64(const char * path, loff_t length)
+{
+	return do_sys_truncate(path, length);
+}
+
+asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
+{
+	return do_sys_ftruncate(fd, length);
+}
+#endif
+
 #ifndef __alpha__
 
 /*
@@ -184,7 +214,7 @@
  * must be owner or have write permission.
  * Else, update from *times, must be owner or super user.
  */
-asmlinkage int sys_utime(char * filename, struct utimbuf * times)
+asmlinkage long sys_utime(char * filename, struct utimbuf * times)
 {
 	int error;
 	struct dentry * dentry;
@@ -232,7 +262,7 @@
  * must be owner or have write permission.
  * Else, update from *times, must be owner or super user.
  */
-asmlinkage int sys_utimes(char * filename, struct timeval * utimes)
+asmlinkage long sys_utimes(char * filename, struct timeval * utimes)
 {
 	int error;
 	struct dentry * dentry;
@@ -278,7 +308,7 @@
  * We do this by temporarily clearing all FS-related capabilities and
  * switching the fsuid/fsgid around to the real ones.
  */
-asmlinkage int sys_access(const char * filename, int mode)
+asmlinkage long sys_access(const char * filename, int mode)
 {
 	struct dentry * dentry;
 	int old_fsuid, old_fsgid;
@@ -319,7 +349,7 @@
 	return res;
 }
 
-asmlinkage int sys_chdir(const char * filename)
+asmlinkage long sys_chdir(const char * filename)
 {
 	int error;
 	struct inode *inode;
@@ -354,7 +384,7 @@
 	return error;
 }
 
-asmlinkage int sys_fchdir(unsigned int fd)
+asmlinkage long sys_fchdir(unsigned int fd)
 {
 	struct file *file;
 	struct dentry *dentry;
@@ -391,7 +421,7 @@
 	return error;
 }
 
-asmlinkage int sys_chroot(const char * filename)
+asmlinkage long sys_chroot(const char * filename)
 {
 	int error;
 	struct inode *inode;
@@ -431,7 +461,7 @@
 	return error;
 }
 
-asmlinkage int sys_fchmod(unsigned int fd, mode_t mode)
+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
 {
 	struct inode * inode;
 	struct dentry * dentry;
@@ -469,7 +499,7 @@
 	return err;
 }
 
-asmlinkage int sys_chmod(const char * filename, mode_t mode)
+asmlinkage long sys_chmod(const char * filename, mode_t mode)
 {
 	struct dentry * dentry;
 	struct inode * inode;
@@ -565,7 +595,7 @@
 	return error;
 }
 
-asmlinkage int sys_chown(const char * filename, uid_t user, gid_t group)
+asmlinkage long sys_chown(const char * filename, uid_t user, gid_t group)
 {
 	struct dentry * dentry;
 	int error;
@@ -582,7 +612,7 @@
 	return error;
 }
 
-asmlinkage int sys_lchown(const char * filename, uid_t user, gid_t group)
+asmlinkage long sys_lchown(const char * filename, uid_t user, gid_t group)
 {
 	struct dentry * dentry;
 	int error;
@@ -600,7 +630,7 @@
 }
 
 
-asmlinkage int sys_fchown(unsigned int fd, uid_t user, gid_t group)
+asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
 {
 	struct dentry * dentry;
 	struct file * file;
@@ -760,6 +790,9 @@
 	char * tmp;
 	int fd, error;
 
+#if BITS_PER_LONG != 32
+	flags |= O_LARGEFILE;
+#endif
 	tmp = getname(filename);
 	fd = PTR_ERR(tmp);
 	if (!IS_ERR(tmp)) {
@@ -790,7 +823,7 @@
  * For backward compatibility?  Maybe this should be moved
  * into arch/i386 instead?
  */
-asmlinkage int sys_creat(const char * pathname, int mode)
+asmlinkage long sys_creat(const char * pathname, int mode)
 {
 	return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
 }
@@ -863,7 +896,7 @@
  * This routine simulates a hangup on the tty, to arrange that users
  * are given clean terminals at login time.
  */
-asmlinkage int sys_vhangup(void)
+asmlinkage long sys_vhangup(void)
 {
 	int ret = -EPERM;
 
diff -urN 2.2.15pre16/fs/proc/array.c 2.2.15pre16aa3/fs/proc/array.c
--- 2.2.15pre16/fs/proc/array.c	Wed Mar 29 19:42:10 2000
+++ 2.2.15pre16aa3/fs/proc/array.c	Thu Mar 30 16:00:58 2000
@@ -42,6 +42,8 @@
  * Alan Cox	     :  security fixes.
  *			<Alan.Cox@linux.org>
  *
+ * Gerhard Wichert   :  added BIGMEM support
+ * Siemens AG           <Gerhard.Wichert@pdb.siemens.de>
  */
 
 #include <linux/types.h>
@@ -387,6 +389,8 @@
 		"MemShared: %8lu kB\n"
 		"Buffers:   %8lu kB\n"
 		"Cached:    %8lu kB\n"
+		"BigTotal:  %8lu kB\n"
+		"BigFree:   %8lu kB\n"
 		"SwapTotal: %8lu kB\n"
 		"SwapFree:  %8lu kB\n",
 		i.totalram >> 10,
@@ -394,6 +398,8 @@
 		i.sharedram >> 10,
 		i.bufferram >> 10,
 		page_cache_size << (PAGE_SHIFT - 10),
+		i.totalbig >> 10,
+		i.freebig >> 10,
 		i.totalswap >> 10,
 		i.freeswap >> 10);
 }
@@ -449,6 +455,8 @@
 	return pte_page(pte) + (ptr & ~PAGE_MASK);
 }
 
+#include <linux/bigmem.h>
+
 static int get_array(struct task_struct *p, unsigned long start, unsigned long end, char * buffer)
 {
 	unsigned long addr;
@@ -461,6 +469,7 @@
 		addr = get_phys_addr(p, start);
 		if (!addr)
 			return result;
+		addr = kmap(addr, KM_READ);
 		do {
 			c = *(char *) addr;
 			if (!c)
@@ -468,12 +477,19 @@
 			if (size < PAGE_SIZE)
 				buffer[size++] = c;
 			else
+			{
+				kunmap(addr, KM_READ);
 				return result;
+			}
 			addr++;
 			start++;
 			if (!c && start >= end)
+			{
+				kunmap(addr, KM_READ);
 				return result;
+			}
 		} while (addr & ~PAGE_MASK);
+		kunmap(addr-1, KM_READ);
 	}
 	return result;
 }
@@ -1139,11 +1155,11 @@
  *         + (index into the line)
  */
 /* for systems with sizeof(void*) == 4: */
-#define MAPS_LINE_FORMAT4	  "%08lx-%08lx %s %08lx %s %lu"
-#define MAPS_LINE_MAX4	49 /* sum of 8  1  8  1 4 1 8 1 5 1 10 1 */
+#define MAPS_LINE_FORMAT4	  "%08lx-%08lx %s %016Lx %s %lu"
+#define MAPS_LINE_MAX4	57 /* sum of 8  1  8  1 4 1 16 1 5 1 10 1 */
 
 /* for systems with sizeof(void*) == 8: */
-#define MAPS_LINE_FORMAT8	  "%016lx-%016lx %s %016lx %s %lu"
+#define MAPS_LINE_FORMAT8	  "%016lx-%016lx %s %016Lx %s %lu"
 #define MAPS_LINE_MAX8	73 /* sum of 16  1  16  1 4 1 16 1 5 1 10 1 */
 
 #define MAPS_LINE_MAX	MAPS_LINE_MAX8
diff -urN 2.2.15pre16/fs/proc/fd.c 2.2.15pre16aa3/fs/proc/fd.c
--- 2.2.15pre16/fs/proc/fd.c	Sun Oct 31 23:31:32 1999
+++ 2.2.15pre16aa3/fs/proc/fd.c	Thu Mar 30 16:00:56 2000
@@ -87,7 +87,6 @@
 	fd = 0;
 	len = dentry->d_name.len;
 	name = dentry->d_name.name;
-	if (len > 1 && *name == '0') goto out;
 	while (len-- > 0) {
 		c = *name - '0';
 		name++;
diff -urN 2.2.15pre16/fs/proc/mem.c 2.2.15pre16aa3/fs/proc/mem.c
--- 2.2.15pre16/fs/proc/mem.c	Wed Jan  5 14:16:55 2000
+++ 2.2.15pre16aa3/fs/proc/mem.c	Thu Mar 30 16:00:57 2000
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/proc_fs.h>
+#include <linux/bigmem.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -120,7 +121,9 @@
 		i = PAGE_SIZE-(addr & ~PAGE_MASK);
 		if (i > scount)
 			i = scount;
+		page = (char *) kmap((unsigned long) page, KM_READ);
 		copy_to_user(tmp, page, i);
+		kunmap((unsigned long) page, KM_READ);
 		addr += i;
 		tmp += i;
 		scount -= i;
@@ -177,7 +180,9 @@
 		i = PAGE_SIZE-(addr & ~PAGE_MASK);
 		if (i > count)
 			i = count;
+		page = (unsigned long) kmap((unsigned long) page, KM_WRITE);
 		copy_from_user(page, tmp, i);
+		kunmap((unsigned long) page, KM_WRITE);
 		addr += i;
 		tmp += i;
 		count -= i;
diff -urN 2.2.15pre16/fs/proc/root.c 2.2.15pre16aa3/fs/proc/root.c
--- 2.2.15pre16/fs/proc/root.c	Sun Oct 31 23:31:32 1999
+++ 2.2.15pre16aa3/fs/proc/root.c	Thu Mar 30 16:00:56 2000
@@ -845,7 +845,6 @@
 		}
 		pid *= 10;
 		pid += c;
-		if (!pid) break;
 		if (pid & 0xffff0000) {
 			pid = 0;
 			break;
diff -urN 2.2.15pre16/fs/read_write.c 2.2.15pre16aa3/fs/read_write.c
--- 2.2.15pre16/fs/read_write.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/read_write.c	Thu Mar 30 16:00:58 2000
@@ -39,6 +39,10 @@
 static inline loff_t llseek(struct file *file, loff_t offset, int origin)
 {
 	loff_t (*fn)(struct file *, loff_t, int);
+	umode_t mode = file->f_dentry->d_inode->i_mode;
+
+	if (S_ISFIFO(mode) || S_ISSOCK(mode))
+	  return -ESPIPE;
 
 	fn = default_llseek;
 	if (file->f_op && file->f_op->llseek)
@@ -48,7 +52,7 @@
 
 asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin)
 {
-	off_t retval;
+	off_t retval, oldpos;
 	struct file * file;
 	struct dentry * dentry;
 	struct inode * inode;
@@ -62,9 +66,19 @@
 	if (!(dentry = file->f_dentry) ||
 	    !(inode = dentry->d_inode))
 		goto out_putf;
+	oldpos = file->f_pos;
 	retval = -EINVAL;
 	if (origin <= 2)
 		retval = llseek(file, offset, origin);
+
+	/* Demand L-F-S compliance only from normal files,
+	   thus raw devices can do whatever they please.. */
+	if (retval >= 0 && S_ISREG(inode->i_mode) &&
+	    !(file->f_flags & O_LARGEFILE) &&
+	    file->f_pos >= 0x7ffffffeULL) {
+		file->f_pos = oldpos;
+		retval = -EOVERFLOW;
+	}
 out_putf:
 	fput(file);
 bad:
@@ -81,7 +95,7 @@
 	struct file * file;
 	struct dentry * dentry;
 	struct inode * inode;
-	loff_t offset;
+	loff_t offset, oldpos;
 
 	lock_kernel();
 	retval = -EBADF;
@@ -96,6 +110,7 @@
 	if (origin > 2)
 		goto out_putf;
 
+	oldpos = file->f_pos;
 	offset = llseek(file, ((loff_t) offset_high << 32) | offset_low,
 			origin);
 
@@ -105,6 +120,14 @@
 		if (!copy_to_user(result, &offset, sizeof(offset)))
 			retval = 0;
 	}
+	if (!(file->f_flags & O_LARGEFILE) && S_ISREG(inode->i_mode) &&
+	    file->f_pos >= 0x7ffffffeULL) {
+		/* The target position isn't presentable without
+		   O_LARGEFILE flag being set --> yield error, and
+		   restore the file position. */
+		file->f_pos = oldpos;
+		retval = -EOVERFLOW;
+	}
 out_putf:
 	fput(file);
 bad:
@@ -325,6 +348,7 @@
 	ssize_t ret;
 	struct file * file;
 	ssize_t (*read)(struct file *, char *, size_t, loff_t *);
+	struct inode * inode;
 
 	lock_kernel();
 
@@ -332,10 +356,30 @@
 	file = fget(fd);
 	if (!file)
 		goto bad_file;
+
+	inode = file->f_dentry->d_inode;
+
 	if (!(file->f_mode & FMODE_READ))
 		goto out;
-	ret = locks_verify_area(FLOCK_VERIFY_READ, file->f_dentry->d_inode,
-				file, pos, count);
+
+	/* Start position must be non-negative! */
+	if (pos < 0) {
+	  ret = -EINVAL;
+	  goto out;
+	}
+	/* Read starting from beyond the end of file ? */
+	if (inode->i_size <= pos) {
+	  ret = -EOVERFLOW;
+	  goto out;
+	}
+
+	if (!(file->f_flags & O_LARGEFILE) && S_ISREG(inode->i_mode) &&
+	    file->f_pos >= 0x7ffffffeULL) {
+	  ret = -EOVERFLOW;
+	  goto out;
+	}
+
+	ret = locks_verify_area(FLOCK_VERIFY_READ, inode, file, pos, count);
 	if (ret)
 		goto out;
 	ret = -EINVAL;
@@ -357,6 +401,7 @@
 	ssize_t ret;
 	struct file * file;
 	ssize_t (*write)(struct file *, const char *, size_t, loff_t *);
+	struct inode * inode;
 
 	lock_kernel();
 
@@ -366,8 +411,21 @@
 		goto bad_file;
 	if (!(file->f_mode & FMODE_WRITE))
 		goto out;
-	ret = locks_verify_area(FLOCK_VERIFY_WRITE, file->f_dentry->d_inode,
-				file, pos, count);
+	/* Start position must be non-negative! */
+	if (pos < 0) {
+	  ret = -EINVAL;
+	  goto out;
+	}
+
+	inode = file->f_dentry->d_inode;
+
+	if (!(file->f_flags & O_LARGEFILE) && S_ISREG(inode->i_mode) &&
+	    file->f_pos >= 0x7ffffffeULL) {
+	  ret = -EOVERFLOW;
+	  goto out;
+	}
+
+	ret = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file, pos, count);
 	if (ret)
 		goto out;
 	ret = -EINVAL;
@@ -376,9 +434,9 @@
 	if (pos < 0)
 		goto out;
 
-	down(&file->f_dentry->d_inode->i_sem);
+	down(&inode->i_sem);
 	ret = write(file, buf, count, &pos);
-	up(&file->f_dentry->d_inode->i_sem);
+	up(&inode->i_sem);
 
 out:
 	fput(file);
diff -urN 2.2.15pre16/fs/romfs/inode.c 2.2.15pre16aa3/fs/romfs/inode.c
--- 2.2.15pre16/fs/romfs/inode.c	Wed Mar 29 19:42:10 2000
+++ 2.2.15pre16aa3/fs/romfs/inode.c	Thu Mar 30 16:00:58 2000
@@ -396,7 +396,7 @@
 	buf = page_address(page);
 	clear_bit(PG_uptodate, &page->flags);
 	clear_bit(PG_error, &page->flags);
-	offset = page->offset;
+	offset = pgoff2loff(page->index);
 	if (offset < inode->i_size) {
 		avail = inode->i_size-offset;
 		readlen = min(avail, PAGE_SIZE);
diff -urN 2.2.15pre16/fs/smbfs/cache.c 2.2.15pre16aa3/fs/smbfs/cache.c
--- 2.2.15pre16/fs/smbfs/cache.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/smbfs/cache.c	Thu Mar 30 16:00:58 2000
@@ -41,14 +41,13 @@
 printk("smb_get_dircache: finding cache for %s/%s\n",
 dentry->d_parent->d_name.name, dentry->d_name.name);
 #endif
-	cachep = (struct cache_head *) get_cached_page(inode, 0, 1);
+	cachep = (struct cache_head *) get_cached_page(inode,ulong2pgoff(0),1);
 	if (!cachep)
 		goto out;
 	if (cachep->valid)
 	{
 		struct cache_index * index = cachep->index;
 		struct cache_block * block;
-		unsigned long offset;
 		int i;
 
 		cachep->valid = 0;
@@ -62,9 +61,10 @@
 printk("smb_get_dircache: cache %s/%s has existing block!\n",
 dentry->d_parent->d_name.name, dentry->d_name.name);
 #endif
-			offset = PAGE_SIZE + (i << PAGE_SHIFT);
-			block = (struct cache_block *) get_cached_page(inode,
-								offset, 0);
+			/* byte_offset = PAGE_SIZE + (i << PAGE_SHIFT); */
+			/*    --> page_offset = 1 + i  */ 
+			block = (struct cache_block *)
+				get_cached_page(inode, ulong2pgoff(i+1), 0);
 			if (!block)
 				goto out;
 			index->block = block;
@@ -135,7 +135,7 @@
 	struct inode * inode = get_cache_inode(cachep);
 	struct cache_index * index;
 	struct cache_block * block;
-	unsigned long page_off;
+	pgoff_t page_off;
 	unsigned int nent, offset, len = entry->len;
 	unsigned int needed = len + sizeof(struct cache_entry);
 
@@ -191,7 +191,8 @@
 	 */
 get_block:
 	cachep->pages++;
-	page_off = PAGE_SIZE + (cachep->idx << PAGE_SHIFT);
+	/* page_byte_off = PAGE_SIZE + (cachep->idx << PAGE_SHIFT); */
+	page_off = ulong2pgoff(1 + cachep->idx);
 	block = (struct cache_block *) get_cached_page(inode, page_off, 1);
 	if (block)
 	{
@@ -199,7 +200,7 @@
 		index->space = PAGE_SIZE;
 #ifdef SMBFS_DEBUG_VERBOSE
 printk("smb_add_to_cache: inode=%p, pages=%d, block at %ld\n",
-inode, cachep->pages, page_off);
+inode, cachep->pages, (u_long)pgoff2loff(page_off));
 #endif
 		goto add_entry;
 	}
diff -urN 2.2.15pre16/fs/smbfs/file.c 2.2.15pre16aa3/fs/smbfs/file.c
--- 2.2.15pre16/fs/smbfs/file.c	Wed Mar 29 19:42:10 2000
+++ 2.2.15pre16aa3/fs/smbfs/file.c	Thu Mar 30 16:00:58 2000
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/malloc.h>
 #include <linux/pagemap.h>
+#include <linux/unistd.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -56,7 +57,7 @@
 smb_readpage_sync(struct dentry *dentry, struct page *page)
 {
 	char *buffer = (char *) page_address(page);
-	unsigned long offset = page->offset;
+	loff_t loffset = pgoff2loff(page->index);
 	int rsize = smb_get_rsize(server_from_dentry(dentry));
 	int count = PAGE_SIZE;
 	int result;
@@ -64,8 +65,8 @@
 	clear_bit(PG_error, &page->flags);
 
 #ifdef SMBFS_DEBUG_VERBOSE
-printk("smb_readpage_sync: file %s/%s, count=%d@%ld, rsize=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, count, offset, rsize);
+printk("smb_readpage_sync: file %s/%s, count=%d@%Ld, rsize=%d\n",
+dentry->d_parent->d_name.name, dentry->d_name.name, count, loffset, rsize);
 #endif
 	result = smb_open(dentry, SMB_O_RDONLY);
 	if (result < 0)
@@ -81,12 +82,12 @@
 		if (count < rsize)
 			rsize = count;
 
-		result = smb_proc_read(dentry, offset, rsize, buffer);
+		result = smb_proc_read(dentry, loffset, rsize, buffer);
 		if (result < 0)
 			goto io_error;
 
 		count -= result;
-		offset += result;
+		loffset += result;
 		buffer += result;
 		dentry->d_inode->i_atime = CURRENT_TIME;
 		if (result < rsize)
@@ -126,25 +127,40 @@
  * Offset is the data offset within the page.
  */
 static int
-smb_writepage_sync(struct dentry *dentry, struct page *page,
+smb_writepage_sync(struct file *file, struct page *page,
 		   unsigned long offset, unsigned int count)
 {
+	struct dentry * dentry = file->f_dentry;
 	struct inode *inode = dentry->d_inode;
 	u8 *buffer = (u8 *) page_address(page) + offset;
 	int wsize = smb_get_wsize(server_from_dentry(dentry));
 	int result, written = 0;
+	loff_t loffset = pgoff2loff(page->index) + offset;
 
-	offset += page->offset;
 #ifdef SMBFS_DEBUG_VERBOSE
 printk("smb_writepage_sync: file %s/%s, count=%d@%ld, wsize=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, count, offset, wsize);
+       dentry->d_parent->d_name.name, dentry->d_name.name, count,
+       loffset, wsize);
 #endif
 
+	if (!(file->f_flags & O_LARGEFILE) &&
+	    loffset >= 0x7ffffffeULL)
+	  return -EFBIG;
+
+	if (!(file->f_flags & O_LARGEFILE) &&
+	    loffset + count >= 0x7fffffffULL)
+	  count = LONG_MAX - loffset;
+
+	if (loffset >= 0xffffffffULL) /* 4G-1 ???  Or 2G-1 ??? */
+	  return -EFBIG;
+	if ((loffset + count) >= 0xffffffffULL)
+	  count = 0xffffffffULL - loffset;
+
 	do {
 		if (count < wsize)
 			wsize = count;
 
-		result = smb_proc_write(dentry, offset, wsize, buffer);
+		result = smb_proc_write(dentry, loffset, wsize, buffer);
 		if (result < 0)
 			break;
 		/* N.B. what if result < wsize?? */
@@ -152,29 +168,27 @@
 if (result < wsize)
 printk("smb_writepage_sync: short write, wsize=%d, result=%d\n", wsize, result);
 #endif
-		buffer += wsize;
-		offset += wsize;
+		buffer  += wsize;
+		loffset += wsize;
 		written += wsize;
-		count -= wsize;
+		count   -= wsize;
 		/*
 		 * Update the inode now rather than waiting for a refresh.
 		 */
 		inode->i_mtime = inode->i_atime = CURRENT_TIME;
-		if (offset > inode->i_size)
-			inode->i_size = offset;
+		if (loffset > inode->i_size)
+			inode->i_size = loffset;
 		inode->u.smbfs_i.cache_valid |= SMB_F_LOCALWRITE;
 	} while (count);
 	return written ? written : result;
 }
 
 /*
- * Write a page to the server. This will be used for NFS swapping only
- * (for now), and we currently do this synchronously only.
+ * Write a page to the server.
  */
 static int
 smb_writepage(struct file *file, struct page *page)
 {
-	struct dentry *dentry = file->f_dentry;
 	int 	result;
 
 #ifdef SMBFS_PARANOIA
@@ -183,7 +197,7 @@
 #endif
 	set_bit(PG_locked, &page->flags);
 	atomic_inc(&page->count);
-	result = smb_writepage_sync(dentry, page, 0, PAGE_SIZE);
+	result = smb_writepage_sync(file, page, 0, PAGE_SIZE);
 	smb_unlock_page(page);
 	free_page(page_address(page));
 	return result;
@@ -194,11 +208,11 @@
 {
 	struct dentry *dentry = file->f_dentry;
 
-	pr_debug("SMBFS: smb_updatepage(%s/%s %d@%ld, sync=%d)\n",
+	pr_debug("SMBFS: smb_updatepage(%s/%s %d@%Ld, sync=%d)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
-	 	count, page->offset+offset, sync);
+	 	count, pgoff2loff(page->index)+offset, sync);
 
-	return smb_writepage_sync(dentry, page, offset, count);
+	return smb_writepage_sync(file, page, offset, count);
 }
 
 static ssize_t
@@ -208,9 +222,9 @@
 	ssize_t	status;
 
 #ifdef SMBFS_DEBUG_VERBOSE
-printk("smb_file_read: file %s/%s, count=%lu@%lu\n",
-dentry->d_parent->d_name.name, dentry->d_name.name,
-(unsigned long) count, (unsigned long) *ppos);
+printk("smb_file_read: file %s/%s, count=%lu@%Lu\n",
+       dentry->d_parent->d_name.name, dentry->d_name.name,
+       (unsigned long) count, *ppos);
 #endif
 
 	status = smb_revalidate_inode(dentry);
@@ -241,7 +255,8 @@
 
 #ifdef SMBFS_DEBUG_VERBOSE
 printk("smb_file_mmap: file %s/%s, address %lu - %lu\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, vma->vm_start, vma->vm_end);
+       dentry->d_parent->d_name.name, dentry->d_name.name,
+       vma->vm_start, vma->vm_end);
 #endif
 
 	status = smb_revalidate_inode(dentry);
@@ -249,7 +264,7 @@
 	{
 #ifdef SMBFS_PARANOIA
 printk("smb_file_mmap: %s/%s validation failed, error=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, status);
+       dentry->d_parent->d_name.name, dentry->d_name.name, status);
 #endif
 		goto out;
 	}
@@ -268,9 +283,9 @@
 	ssize_t	result;
 
 #ifdef SMBFS_DEBUG_VERBOSE
-printk("smb_file_write: file %s/%s, count=%lu@%lu, pages=%ld\n",
-dentry->d_parent->d_name.name, dentry->d_name.name,
-(unsigned long) count, (unsigned long) *ppos, dentry->d_inode->i_nrpages);
+printk("smb_file_write: file %s/%s, count=%lu@%Lu, pages=%ld\n",
+       dentry->d_parent->d_name.name, dentry->d_name.name,
+       (unsigned long) count, *ppos, dentry->d_inode->i_nrpages);
 #endif
 
 	result = smb_revalidate_inode(dentry);
@@ -278,7 +293,7 @@
 	{
 #ifdef SMBFS_PARANOIA
 printk("smb_file_write: %s/%s validation failed, error=%d\n",
-dentry->d_parent->d_name.name, dentry->d_name.name, result);
+       dentry->d_parent->d_name.name, dentry->d_name.name, result);
 #endif
 			goto out;
 	}
@@ -291,9 +306,9 @@
 	{
 		result = generic_file_write(file, buf, count, ppos);
 #ifdef SMBFS_DEBUG_VERBOSE
-printk("smb_file_write: pos=%ld, size=%ld, mtime=%ld, atime=%ld\n",
-(long) file->f_pos, dentry->d_inode->i_size, dentry->d_inode->i_mtime,
-dentry->d_inode->i_atime);
+printk("smb_file_write: pos=%Ld, size=%ld, mtime=%ld, atime=%ld\n",
+       file->f_pos, dentry->d_inode->i_size, dentry->d_inode->i_mtime,
+       dentry->d_inode->i_atime);
 #endif
 	}
 out:
diff -urN 2.2.15pre16/fs/smbfs/proc.c 2.2.15pre16aa3/fs/smbfs/proc.c
--- 2.2.15pre16/fs/smbfs/proc.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/smbfs/proc.c	Thu Mar 30 16:00:58 2000
@@ -1008,13 +1008,16 @@
    file-id would not be valid after a reconnection. */
 
 int
-smb_proc_read(struct dentry *dentry, off_t offset, int count, char *data)
+smb_proc_read(struct dentry *dentry, loff_t offset, int count, char *data)
 {
 	struct smb_sb_info *server = server_from_dentry(dentry);
 	__u16 returned_count, data_len;
 	char *buf;
 	int result;
 
+	if (offset > 0xffffffff)
+		return -EIO;
+
 	smb_lock_server(server);
 	smb_setup_header(server, SMBread, 5, 0);
 	buf = server->packet;
@@ -1050,14 +1053,17 @@
 }
 
 int
-smb_proc_write(struct dentry *dentry, off_t offset, int count, const char *data)
+smb_proc_write(struct dentry *dentry, loff_t offset, int count, const char *data)
 {
 	struct smb_sb_info *server = server_from_dentry(dentry);
 	int result;
 	__u8 *p;
 
+	if (offset > 0xffffffff)
+		return -EIO;
+
 #if SMBFS_DEBUG_VERBOSE
-printk("smb_proc_write: file %s/%s, count=%d@%ld, packet_size=%d\n",
+printk("smb_proc_write: file %s/%s, count=%d@%Ld, packet_size=%d\n",
        DENTRY_PATH(dentry), count, offset, server->packet_size);
 #endif
 	smb_lock_server(server);
@@ -1805,7 +1811,7 @@
 	fattr->f_mtime = date_dos2unix(server, date, time);
 #ifdef SMBFS_DEBUG_VERBOSE
 printk("smb_proc_getattr_ff: name=%s, date=%x, time=%x, mtime=%ld\n",
-mask, date, time, fattr->f_mtime);
+       mask, date, time, fattr->f_mtime);
 #endif
 	fattr->f_size = DVAL(resp_data, 12);
 	/* ULONG allocation size */
@@ -2063,7 +2069,7 @@
 	WSET(server->packet, smb_vwv6, time);
 #ifdef SMBFS_DEBUG_TIMESTAMP
 printk("smb_proc_setattr_ext: date=%d, time=%d, mtime=%ld\n", 
-date, time, fattr->f_mtime);
+       date, time, fattr->f_mtime);
 #endif
 
 	result = smb_request_ok(server, SMBsetattrE, 0, 0);
diff -urN 2.2.15pre16/fs/stat.c 2.2.15pre16aa3/fs/stat.c
--- 2.2.15pre16/fs/stat.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/stat.c	Thu Mar 30 16:00:58 2000
@@ -280,3 +280,124 @@
 	unlock_kernel();
 	return error;
 }
+
+
+/* ---------- LFS-64 ----------- */
+#if !defined(__alpha__)
+
+static long cp_new_stat64(struct inode * inode, struct stat64 * statbuf)
+{
+	struct stat64 tmp;
+	unsigned int blocks, indirect;
+
+	memset(&tmp, 0, sizeof(tmp));
+	tmp.st_dev = kdev_t_to_nr(inode->i_dev);
+	tmp.st_ino = inode->i_ino;
+	tmp.st_mode = inode->i_mode;
+	tmp.st_nlink = inode->i_nlink;
+	tmp.st_uid = inode->i_uid;
+	tmp.st_gid = inode->i_gid;
+	tmp.st_rdev = kdev_t_to_nr(inode->i_rdev);
+	tmp.st_atime = inode->i_atime;
+	tmp.st_mtime = inode->i_mtime;
+	tmp.st_ctime = inode->i_ctime;
+	tmp.st_size = inode->i_size;
+/*
+ * st_blocks and st_blksize are approximated with a simple algorithm if
+ * they aren't supported directly by the filesystem. The minix and msdos
+ * filesystems don't keep track of blocks, so they would either have to
+ * be counted explicitly (by delving into the file itself), or by using
+ * this simple algorithm to get a reasonable (although not 100% accurate)
+ * value.
+ */
+
+/*
+ * Use minix fs values for the number of direct and indirect blocks.  The
+ * count is now exact for the minix fs except that it counts zero blocks.
+ * Everything is in units of BLOCK_SIZE until the assignment to
+ * tmp.st_blksize.
+ */
+#define D_B   7
+#define I_B   (BLOCK_SIZE / sizeof(unsigned short))
+
+	if (!inode->i_blksize) {
+		blocks = (tmp.st_size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
+		if (blocks > D_B) {
+			indirect = (blocks - D_B + I_B - 1) / I_B;
+			blocks += indirect;
+			if (indirect > 1) {
+				indirect = (indirect - 1 + I_B - 1) / I_B;
+				blocks += indirect;
+				if (indirect > 1)
+					blocks++;
+			}
+		}
+		tmp.st_blocks = (BLOCK_SIZE / 512) * blocks;
+		tmp.st_blksize = BLOCK_SIZE;
+	} else {
+		tmp.st_blocks = inode->i_blocks;
+		tmp.st_blksize = inode->i_blksize;
+	}
+	return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0;
+}
+
+asmlinkage long sys_stat64(char * filename, struct stat64 * statbuf, long flags)
+{
+	struct dentry * dentry;
+	int error;
+
+	lock_kernel();
+	dentry = namei(filename);
+
+	error = PTR_ERR(dentry);
+	if (!IS_ERR(dentry)) {
+		error = do_revalidate(dentry);
+		if (!error)
+			error = cp_new_stat64(dentry->d_inode, statbuf);
+
+		dput(dentry);
+	}
+	unlock_kernel();
+	return error;
+}
+
+asmlinkage long sys_lstat64(char * filename, struct stat64 * statbuf, long flags)
+{
+	struct dentry * dentry;
+	int error;
+
+	lock_kernel();
+	dentry = lnamei(filename);
+
+	error = PTR_ERR(dentry);
+	if (!IS_ERR(dentry)) {
+		error = do_revalidate(dentry);
+		if (!error)
+			error = cp_new_stat64(dentry->d_inode, statbuf);
+
+		dput(dentry);
+	}
+	unlock_kernel();
+	return error;
+}
+
+asmlinkage long sys_fstat64(unsigned long fd, struct stat64 * statbuf, long flags)
+{
+	struct file * f;
+	int err = -EBADF;
+
+	lock_kernel();
+	f = fget(fd);
+	if (f) {
+		struct dentry * dentry = f->f_dentry;
+
+		err = do_revalidate(dentry);
+		if (!err)
+			err = cp_new_stat64(dentry->d_inode, statbuf);
+		fput(f);
+	}
+	unlock_kernel();
+	return err;
+}
+
+#endif /* LFS-64 */
diff -urN 2.2.15pre16/fs/sysv/file.c 2.2.15pre16aa3/fs/sysv/file.c
--- 2.2.15pre16/fs/sysv/file.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/sysv/file.c	Thu Mar 30 16:00:58 2000
@@ -207,7 +207,7 @@
 {
 	struct inode * inode = filp->f_dentry->d_inode;
 	struct super_block * sb = inode->i_sb;
-	off_t pos;
+	loff_t pos;
 	ssize_t written, c;
 	struct buffer_head * bh;
 	char * p;
@@ -232,6 +232,21 @@
 	else
 		pos = *ppos;
 	written = 0;
+
+	/* L-F-S spec 2.2.1.27: */
+	if (!(filp->f_flags & O_LARGEFILE)) {
+		if (pos >= 0x7ffffffeULL) /* pos@2G forbidden */
+			return -EFBIG;
+
+		if (pos + count >= 0x7fffffffULL)
+			/* Write only until end of allowed region */
+			count = 0x7fffffffULL - pos;
+	}
+	if (pos >= 0xffffffffULL)
+		return -EFBIG; /* Only up to 4G-1! */
+	if ((pos + count) > 0xffffffffULL)
+		count = 0xffffffffULL - pos;
+
 	while (written<count) {
 		bh = sysv_getblk (inode, pos >> sb->sv_block_size_bits, 1);
 		if (!bh) {
diff -urN 2.2.15pre16/fs/ufs/balloc.c 2.2.15pre16aa3/fs/ufs/balloc.c
--- 2.2.15pre16/fs/ufs/balloc.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/ufs/balloc.c	Thu Mar 30 16:00:58 2000
@@ -660,9 +660,9 @@
 	struct ufs_sb_private_info * uspi;
 	struct ufs_super_block_first * usb1;
 	struct ufs_cylinder_group * ucg;
-	unsigned start, length, location, result;
-	unsigned possition, fragsize, blockmap, mask;
-	unsigned swab;
+	unsigned int start, length, location, result;
+	unsigned int possition, fragsize, blockmap, mask;
+	unsigned int swab;
 	
 	UFSD(("ENTER, cg %u, goal %u, count %u\n", ucpi->c_cgx, goal, count))
 
@@ -676,7 +676,7 @@
 	else
 		start = ucpi->c_frotor >> 3;
 		
-	length = howmany(uspi->s_fpg, 8) - start;
+	length = ((uspi->s_fpg + 7) >> 3) - start;
 	location = ubh_scanc(UCPI_UBH, ucpi->c_freeoff + start, length,
 		(uspi->s_fpb == 8) ? ufs_fragtable_8fpb : ufs_fragtable_other,
 		1 << (count - 1 + (uspi->s_fpb & 7))); 
diff -urN 2.2.15pre16/fs/ufs/dir.c 2.2.15pre16aa3/fs/ufs/dir.c
--- 2.2.15pre16/fs/ufs/dir.c	Wed Mar 29 19:42:10 2000
+++ 2.2.15pre16aa3/fs/ufs/dir.c	Thu Mar 30 16:00:58 2000
@@ -15,6 +15,7 @@
 
 #include <linux/fs.h>
 #include <linux/ufs_fs.h>
+#include <linux/unistd.h>
 
 #include "swab.h"
 #include "util.h"
@@ -170,7 +171,7 @@
 		error_msg = "inode out of bounds";
 
 	if (error_msg != NULL)
-		ufs_error (sb, function, "bad entry in directory #%lu, size %lu: %s - "
+		ufs_error (sb, function, "bad entry in directory #%lu, size %Lu: %s - "
 			    "offset=%lu, inode=%lu, reclen=%d, namlen=%d",
 			    dir->i_ino, dir->i_size, error_msg, offset,
 			    (unsigned long) SWAB32(de->d_ino),
diff -urN 2.2.15pre16/fs/ufs/file.c 2.2.15pre16aa3/fs/ufs/file.c
--- 2.2.15pre16/fs/ufs/file.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/ufs/file.c	Thu Mar 30 16:00:58 2000
@@ -140,7 +140,7 @@
 	loff_t *ppos )
 {
 	struct inode * inode = filp->f_dentry->d_inode;
-	__u32 pos;
+	loff_t pos;
 	long block;
 	int offset;
 	int written, c;
@@ -177,11 +177,14 @@
 			return -EINVAL;
 	}
 
-	/* Check for overflow.. */
-	if (pos > (__u32) (pos + count)) {
-		count = ~pos; /* == 0xFFFFFFFF - pos */
-		if (!count)
+	/* L-F-S spec 2.2.1.27: */
+	if (!(filp->f_flags & O_LARGEFILE)) {
+		if (pos >= 0x7ffffffeULL) /* pos@2G forbidden */
 			return -EFBIG;
+
+		if (pos + count >= 0x7fffffffULL)
+			/* Write only until end of allowed region */
+			count = 0x7fffffffULL - pos;
 	}
 
 	/*
diff -urN 2.2.15pre16/fs/ufs/inode.c 2.2.15pre16aa3/fs/ufs/inode.c
--- 2.2.15pre16/fs/ufs/inode.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/ufs/inode.c	Thu Mar 30 16:00:58 2000
@@ -54,7 +54,7 @@
 {
 	unsigned swab = inode->i_sb->u.ufs_sb.s_swab;
 	printk("ino %lu  mode 0%6.6o  nlink %d  uid %d  uid32 %u"
-	       "  gid %d  gid32 %u  size %lu blocks %lu\n",
+	       "  gid %d  gid32 %u  size %Lu blocks %lu\n",
 	       inode->i_ino, inode->i_mode, inode->i_nlink,
 	       inode->i_uid, inode->u.ufs_i.i_uid, inode->i_gid, 
 	       inode->u.ufs_i.i_gid, inode->i_size, inode->i_blocks);
@@ -213,13 +213,14 @@
 	if (!create)
 		return NULL;
 	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
-	if (limit < RLIM_INFINITY) {
+	if (limit != RLIM_INFINITY) {
 		limit >>= sb->s_blocksize_bits;
 		if (new_fragment >= limit) {
 			send_sig(SIGXFSZ, current, 0);
 			return NULL;
 		}
 	}
+
 	lastblock = ufs_fragstoblks (lastfrag);
 	lastblockoff = ufs_fragnum (lastfrag);
 	/*
@@ -321,7 +322,8 @@
 		brelse (result);
 		goto repeat;
 	}
-	if (!create || new_fragment >= (current->rlim[RLIMIT_FSIZE].rlim_cur >> sb->s_blocksize)) {
+	if (!create || (current->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY &&
+			new_fragment >= (current->rlim[RLIMIT_FSIZE].rlim_cur >> sb->s_blocksize))) {
 		brelse (bh);
 		*err = -EFBIG;
 		return NULL;
@@ -496,13 +498,10 @@
 	}
 	
 	/*
-	 * Linux i_size can be 32 on some architectures. We will mark 
-	 * big files as read only and let user access first 32 bits.
+	 * Linux i_size used to be 32 bits on some architectures.
+	 * These days we allow access to the entire file as is..
 	 */
-	inode->u.ufs_i.i_size = SWAB64(ufs_inode->ui_size);
-	inode->i_size = (off_t) inode->u.ufs_i.i_size;
-	if (sizeof(off_t) == 4 && (inode->u.ufs_i.i_size >> 32))
-		inode->i_size = (__u32)-1;
+	inode->i_size = SWAB64(ufs_inode->ui_size);
 
 	inode->i_atime = SWAB32(ufs_inode->ui_atime.tv_sec);
 	inode->i_ctime = SWAB32(ufs_inode->ui_ctime.tv_sec);
@@ -515,7 +514,7 @@
 	inode->u.ufs_i.i_gen = SWAB32(ufs_inode->ui_gen);
 	inode->u.ufs_i.i_shadow = SWAB32(ufs_inode->ui_u3.ui_sun.ui_shadow);
 	inode->u.ufs_i.i_oeftflag = SWAB32(ufs_inode->ui_u3.ui_sun.ui_oeftflag);
-	inode->u.ufs_i.i_lastfrag = howmany (inode->i_size, uspi->s_fsize);
+	inode->u.ufs_i.i_lastfrag = (inode->i_size + uspi->s_fsize -1) >> uspi->s_fshift;
 	
 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
 		inode->i_rdev = to_kdev_t(SWAB32(ufs_inode->ui_u2.ui_addr.ui_db[0]));
diff -urN 2.2.15pre16/fs/ufs/super.c 2.2.15pre16aa3/fs/ufs/super.c
--- 2.2.15pre16/fs/ufs/super.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/ufs/super.c	Thu Mar 30 16:00:58 2000
@@ -328,7 +328,7 @@
 	 * on the device. 
 	 */
 	size = uspi->s_cssize;
-	blks = howmany(size, uspi->s_fsize);
+	blks = (size + uspi->s_fsize-1) >> uspi->s_fshift;
 	base = space = kmalloc(size, GFP_KERNEL);
 	if (!base)
 		goto failed; 
@@ -405,7 +405,7 @@
 	uspi = sb->u.ufs_sb.s_uspi;
 
 	size = uspi->s_cssize;
-	blks = howmany(size, uspi->s_fsize);
+	blks = (size + uspi->s_fsize-1) >> uspi->s_fshift;
 	base = space = (char*) sb->u.ufs_sb.s_csp[0];
 	for (i = 0; i < blks; i += uspi->s_fpb) {
 		size = uspi->s_bsize;
diff -urN 2.2.15pre16/fs/ufs/truncate.c 2.2.15pre16aa3/fs/ufs/truncate.c
--- 2.2.15pre16/fs/ufs/truncate.c	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/fs/ufs/truncate.c	Thu Mar 30 16:00:58 2000
@@ -59,8 +59,8 @@
  *		Linus
  */
 
-#define DIRECT_BLOCK howmany (inode->i_size, uspi->s_bsize)
-#define DIRECT_FRAGMENT howmany (inode->i_size, uspi->s_fsize)
+#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize -1) >> uspi->s_bshift)
+#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize -1) >> uspi->s_fshift)
 
 static int ufs_trunc_direct (struct inode * inode)
 {
@@ -194,7 +194,7 @@
 }
 
 
-static int ufs_trunc_indirect (struct inode * inode, unsigned offset, u32 * p)
+static int ufs_trunc_indirect (struct inode * inode, u_long offset, u32 * p)
 {
 	struct super_block * sb;
 	struct ufs_sb_private_info * uspi;
@@ -297,7 +297,7 @@
 	struct super_block * sb;
 	struct ufs_sb_private_info * uspi;
 	struct ufs_buffer_head * dind_bh;
-	unsigned i, tmp, dindirect_block;
+	unsigned int i, tmp, dindirect_block;
 	u32 * dind;
 	int retry = 0;
 	unsigned swab;
@@ -308,8 +308,8 @@
 	swab = sb->u.ufs_sb.s_swab;
 	uspi = sb->u.ufs_sb.s_uspi;
 
-	dindirect_block = (DIRECT_BLOCK > offset) 
-		? ((DIRECT_BLOCK - offset) / uspi->s_apb) : 0;
+	dindirect_block = ((DIRECT_BLOCK > offset) ?
+			   ((DIRECT_BLOCK - offset) >> uspi->s_apbshift) : 0);
 	retry = 0;
 	
 	tmp = SWAB32(*p);
@@ -379,7 +379,7 @@
 	retry = 0;
 	
 	tindirect_block = (DIRECT_BLOCK > (UFS_NDADDR + uspi->s_apb + uspi->s_2apb))
-		? ((DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb) / uspi->s_2apb) : 0;
+		? ((DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb) >> uspi->s_2apbshift) : 0;
 	p = inode->u.ufs_i.i_u1.i_data + UFS_TIND_BLOCK;
 	if (!(tmp = SWAB32(*p)))
 		return 0;
@@ -467,7 +467,8 @@
 		}
 	}
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	inode->u.ufs_i.i_lastfrag = howmany (inode->i_size, uspi->s_fsize);
+	inode->u.ufs_i.i_lastfrag =
+	  (inode->i_size + uspi->s_fsize -1) >> uspi->s_fshift;
 	mark_inode_dirty(inode);
 	UFSD(("EXIT\n"))
 }
diff -urN 2.2.15pre16/fs/ufs/util.h 2.2.15pre16aa3/fs/ufs/util.h
--- 2.2.15pre16/fs/ufs/util.h	Fri Feb 25 17:20:34 2000
+++ 2.2.15pre16aa3/fs/ufs/util.h	Thu Mar 30 16:00:58 2000
@@ -14,7 +14,6 @@
  * some useful macros
  */
 #define in_range(b,first,len)	((b)>=(first)&&(b)<(first)+(len))
-#define howmany(x,y)		(((x)+(y)-1)/(y))
 #define min(x,y)		((x)<(y)?(x):(y))
 #define max(x,y)		((x)>(y)?(x):(y))
 
diff -urN 2.2.15pre16/include/asm-alpha/bigmem.h 2.2.15pre16aa3/include/asm-alpha/bigmem.h
--- 2.2.15pre16/include/asm-alpha/bigmem.h	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/include/asm-alpha/bigmem.h	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,27 @@
+/*
+ * linux/include/asm-alpha/bigmem.h
+ *
+ * On alpha we can address all the VM with a flat mapping. We need
+ * to differentiate BIGMEM memory only because the default PCI DMA window
+ * is currently limited to 2g. Thus kmap/kunmap are noops here.
+ *
+ * With bigmem support the alpha now is capable of allocating up to
+ * 2048Giga of memory.
+ *
+ * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de>, SuSE GmbH
+ */
+
+#ifndef _ASM_BIGMEM_H
+#define _ASM_BIGMEM_H
+
+#include <linux/init.h>
+
+#undef BIGMEM_DEBUG /* undef for production */
+
+/* declarations for bigmem.c */
+extern unsigned long bigmem_start, bigmem_end;
+
+#define kmap(kaddr, type) kaddr
+#define kunmap(vaddr, type) do { } while (0)
+
+#endif /* _ASM_BIGMEM_H */
diff -urN 2.2.15pre16/include/asm-alpha/fcntl.h 2.2.15pre16aa3/include/asm-alpha/fcntl.h
--- 2.2.15pre16/include/asm-alpha/fcntl.h	Mon Jan 17 16:44:43 2000
+++ 2.2.15pre16aa3/include/asm-alpha/fcntl.h	Thu Mar 30 16:00:58 2000
@@ -20,6 +20,7 @@
 #define O_DIRECT	040000	/* direct disk access - should check with OSF/1 */
 #define O_DIRECTORY	0100000	/* must be a directory */
 #define O_NOFOLLOW	0200000 /* don't follow links */
+#define O_LARGEFILE	0400000 /* will be set by the kernel on every open */
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get f_flags */
@@ -61,5 +62,9 @@
 	__kernel_off_t l_len;
 	__kernel_pid_t l_pid;
 };
+
+#ifdef __KERNEL__
+#define flock64	flock
+#endif
 
 #endif
diff -urN 2.2.15pre16/include/asm-alpha/pgtable.h 2.2.15pre16aa3/include/asm-alpha/pgtable.h
--- 2.2.15pre16/include/asm-alpha/pgtable.h	Wed Mar 29 19:42:15 2000
+++ 2.2.15pre16aa3/include/asm-alpha/pgtable.h	Thu Mar 30 16:00:56 2000
@@ -23,7 +23,17 @@
 #define flush_cache_range(mm, start, end)	do { } while (0)
 #define flush_cache_page(vma, vmaddr)		do { } while (0)
 #define flush_page_to_ram(page)			do { } while (0)
-#define flush_icache_range(start, end)		do { } while (0)
+/*
+ * The icache is not coherent with the dcache on alpha, thus before
+ * running self modified code like kernel modules we must always run
+ * an imb().
+ */
+#ifndef __SMP__
+#define flush_icache_range(start, end)		imb()
+#else
+#define flush_icache_range(start, end)		smp_imb()
+extern void smp_imb(void);
+#endif
 #define flush_dcache_page(page)			do { } while (0)
 
 /*
diff -urN 2.2.15pre16/include/asm-alpha/smplock.h 2.2.15pre16aa3/include/asm-alpha/smplock.h
--- 2.2.15pre16/include/asm-alpha/smplock.h	Thu Mar 30 01:40:52 2000
+++ 2.2.15pre16aa3/include/asm-alpha/smplock.h	Thu Mar 30 16:00:57 2000
@@ -17,8 +17,6 @@
 {
 	if (task->lock_depth >= 0)
 		spin_unlock(&kernel_flag);
-	release_irqlock(cpu);
-	__sti();
 }
 
 /*
diff -urN 2.2.15pre16/include/asm-arm/fcntl.h 2.2.15pre16aa3/include/asm-arm/fcntl.h
--- 2.2.15pre16/include/asm-arm/fcntl.h	Mon Jan 17 16:44:44 2000
+++ 2.2.15pre16aa3/include/asm-arm/fcntl.h	Thu Mar 30 16:00:58 2000
@@ -18,6 +18,8 @@
 #define FASYNC		020000	/* fcntl, for BSD compatibility */
 #define O_DIRECTORY	040000	/* must be a directory */
 #define O_NOFOLLOW	0100000	/* don't follow links */
+#define O_DIRECT	0200000 /* direct disk access hint - currently ignored */
+#define O_LARGEFILE	0400000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get f_flags */
@@ -33,6 +35,10 @@
 #define F_SETSIG	10	/*  for sockets. */
 #define F_GETSIG	11	/*  for sockets. */
 
+#define F_GETLK64	12	/*  using 'struct flock64' */
+#define F_SETLK64	13
+#define F_SETLKW64	14
+
 /* for F_[GET|SET]FL */
 #define FD_CLOEXEC	1	/* actually anything with low bit set goes */
 
@@ -58,6 +64,14 @@
 	off_t l_start;
 	off_t l_len;
 	pid_t l_pid;
+};
+
+struct flock64 {
+	short  l_type;
+	short  l_whence;
+	loff_t l_start;
+	loff_t l_len;
+	pid_t  l_pid;
 };
 
 #endif
diff -urN 2.2.15pre16/include/asm-arm/smplock.h 2.2.15pre16aa3/include/asm-arm/smplock.h
--- 2.2.15pre16/include/asm-arm/smplock.h	Mon Jan 17 16:44:44 2000
+++ 2.2.15pre16aa3/include/asm-arm/smplock.h	Thu Mar 30 16:00:57 2000
@@ -15,8 +15,6 @@
 do { \
 	if (task->lock_depth >= 0) \
 		spin_unlock(&kernel_flag); \
-	release_irqlock(cpu); \
-	__sti(); \
 } while (0)
 
 /*
diff -urN 2.2.15pre16/include/asm-arm/stat.h 2.2.15pre16aa3/include/asm-arm/stat.h
--- 2.2.15pre16/include/asm-arm/stat.h	Mon Jan 17 16:44:44 2000
+++ 2.2.15pre16aa3/include/asm-arm/stat.h	Thu Mar 30 16:00:58 2000
@@ -38,4 +38,5 @@
 	unsigned long  __unused5;
 };
 
+/* Someone please add a glibc/arm compatible stat64 struct here. */
 #endif
diff -urN 2.2.15pre16/include/asm-arm/unistd.h 2.2.15pre16aa3/include/asm-arm/unistd.h
--- 2.2.15pre16/include/asm-arm/unistd.h	Mon Jan 17 16:44:44 2000
+++ 2.2.15pre16aa3/include/asm-arm/unistd.h	Thu Mar 30 16:00:58 2000
@@ -198,6 +198,13 @@
 					/* 188 reserved */
 					/* 189 reserved */
 #define __NR_vfork			(__NR_SYSCALL_BASE+190)
+/* #define __NR_getrlimit			(__NR_SYSCALL_BASE+191) */
+#define __NR_mmap2			(__NR_SYSCALL_BASE+192)
+#define __NR_truncate64			(__NR_SYSCALL_BASE+193)
+#define __NR_ftruncate64		(__NR_SYSCALL_BASE+194)
+#define __NR_stat64			(__NR_SYSCALL_BASE+195)
+#define __NR_lstat64			(__NR_SYSCALL_BASE+196)
+#define __NR_fstat64			(__NR_SYSCALL_BASE+197)
 
 #define __sys2(x) #x
 #define __sys1(x) __sys2(x)
diff -urN 2.2.15pre16/include/asm-generic/smplock.h 2.2.15pre16aa3/include/asm-generic/smplock.h
--- 2.2.15pre16/include/asm-generic/smplock.h	Mon Jan 17 16:44:44 2000
+++ 2.2.15pre16aa3/include/asm-generic/smplock.h	Thu Mar 30 16:00:57 2000
@@ -15,8 +15,6 @@
 do { \
 	if (task->lock_depth >= 0) \
 		spin_unlock(&kernel_flag); \
-	release_irqlock(cpu); \
-	__sti(); \
 } while (0)
 
 /*
diff -urN 2.2.15pre16/include/asm-i386/bigmem.h 2.2.15pre16aa3/include/asm-i386/bigmem.h
--- 2.2.15pre16/include/asm-i386/bigmem.h	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/include/asm-i386/bigmem.h	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,69 @@
+/*
+ * bigmem.h:	virtual kernel memory mappings for big memory
+ *
+ * Used in CONFIG_BIGMEM systems for memory pages which	are not
+ * addressable by direct kernel virtual adresses.
+ *
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ *		      Gerhard.Wichert@pdb.siemens.de
+ */
+
+#ifndef _ASM_BIGMEM_H
+#define _ASM_BIGMEM_H
+
+#include <linux/init.h>
+
+#undef BIGMEM_DEBUG /* undef for production */
+
+/* declarations for bigmem.c */
+extern unsigned long bigmem_start, bigmem_end;
+extern int nr_free_bigpages;
+
+extern pte_t *kmap_pte;
+extern pgprot_t kmap_prot;
+
+extern void kmap_init(void) __init;
+
+/* kmap helper functions necessary to access the bigmem pages in kernel */
+#include <asm/pgtable.h>
+#include <asm/kmap_types.h>
+
+extern inline unsigned long kmap(unsigned long kaddr, enum km_type type)
+{
+	if (__pa(kaddr) < bigmem_start)
+		return kaddr;
+	{
+		enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id();
+		unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN+idx);
+
+#ifdef BIGMEM_DEBUG
+		if (!pte_none(*(kmap_pte-idx)))
+		{
+			__label__ here;
+		here:
+			printk(KERN_ERR "not null pte on CPU %d from %p\n",
+			       smp_processor_id(), &&here);
+		}
+#endif
+		set_pte(kmap_pte-idx, mk_pte(kaddr & PAGE_MASK, kmap_prot));
+		__flush_tlb_one(vaddr);
+
+		return vaddr | (kaddr & ~PAGE_MASK);
+	}
+}
+
+extern inline void kunmap(unsigned long vaddr, enum km_type type)
+{
+#ifdef BIGMEM_DEBUG
+	enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id();
+	if ((vaddr & PAGE_MASK) == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+	{
+		/* force other mappings to Oops if they'll try to access
+		   this pte without first remap it */
+		pte_clear(kmap_pte-idx);
+		__flush_tlb_one(vaddr);
+	}
+#endif
+}
+
+#endif /* _ASM_BIGMEM_H */
diff -urN 2.2.15pre16/include/asm-i386/fcntl.h 2.2.15pre16aa3/include/asm-i386/fcntl.h
--- 2.2.15pre16/include/asm-i386/fcntl.h	Mon Jan 17 16:44:44 2000
+++ 2.2.15pre16aa3/include/asm-i386/fcntl.h	Thu Mar 30 16:00:58 2000
@@ -35,6 +35,10 @@
 #define F_SETSIG	10	/*  for sockets. */
 #define F_GETSIG	11	/*  for sockets. */
 
+#define F_GETLK64	12	/*  using 'struct flock64' */
+#define F_SETLK64	13
+#define F_SETLKW64	14
+
 /* for F_[GET|SET]FL */
 #define FD_CLOEXEC	1	/* actually anything with low bit set goes */
 
@@ -60,6 +64,14 @@
 	off_t l_start;
 	off_t l_len;
 	pid_t l_pid;
+};
+
+struct flock64 {
+	short  l_type;
+	short  l_whence;
+	loff_t l_start;
+	loff_t l_len;
+	pid_t  l_pid;
 };
 
 #endif
diff -urN 2.2.15pre16/include/asm-i386/fixmap.h 2.2.15pre16aa3/include/asm-i386/fixmap.h
--- 2.2.15pre16/include/asm-i386/fixmap.h	Wed Mar 29 02:14:00 2000
+++ 2.2.15pre16aa3/include/asm-i386/fixmap.h	Thu Mar 30 16:00:57 2000
@@ -6,6 +6,8 @@
  * for more details.
  *
  * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
 #ifndef _ASM_FIXMAP_H
@@ -14,6 +16,10 @@
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <asm/page.h>
+#ifdef CONFIG_BIGMEM
+#include <linux/tasks.h>
+#include <asm/kmap_types.h>
+#endif
 
 /*
  * Here we define all the compile-time 'special' virtual
@@ -55,6 +61,10 @@
 	FIX_CO_APIC,	/* Cobalt APIC Redirection Table */ 
 	FIX_LI_PCIA,	/* Lithium PCI Bridge A */
 	FIX_LI_PCIB,	/* Lithium PCI Bridge B */
+#endif
+#ifdef CONFIG_BIGMEM
+	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
+	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
 #endif
 	__end_of_fixed_addresses
 };
diff -urN 2.2.15pre16/include/asm-i386/io.h 2.2.15pre16aa3/include/asm-i386/io.h
--- 2.2.15pre16/include/asm-i386/io.h	Wed Mar 29 02:14:03 2000
+++ 2.2.15pre16aa3/include/asm-i386/io.h	Thu Mar 30 16:00:57 2000
@@ -27,6 +27,7 @@
 
  /*
   *  Bit simplified and optimized by Jan Hubicka
+  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
   */
 
 #ifdef SLOW_IO_BY_JUMPING
@@ -109,12 +110,20 @@
  */
 extern inline unsigned long virt_to_phys(volatile void * address)
 {
+#ifdef CONFIG_BIGMEM
+	return __pa(address);
+#else
 	return __io_phys(address);
+#endif
 }
 
 extern inline void * phys_to_virt(unsigned long address)
 {
+#ifdef CONFIG_BIGMEM
+	return __va(address);
+#else
 	return __io_virt(address);
+#endif
 }
 
 extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
diff -urN 2.2.15pre16/include/asm-i386/kmap_types.h 2.2.15pre16aa3/include/asm-i386/kmap_types.h
--- 2.2.15pre16/include/asm-i386/kmap_types.h	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/include/asm-i386/kmap_types.h	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,10 @@
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+enum km_type {
+	KM_READ,
+	KM_WRITE,
+	KM_TYPE_NR,
+};
+
+#endif
diff -urN 2.2.15pre16/include/asm-i386/page.h 2.2.15pre16aa3/include/asm-i386/page.h
--- 2.2.15pre16/include/asm-i386/page.h	Wed Mar 29 02:14:00 2000
+++ 2.2.15pre16aa3/include/asm-i386/page.h	Thu Mar 30 16:00:57 2000
@@ -88,6 +88,7 @@
 #define __pa(x)			((unsigned long)(x)-PAGE_OFFSET)
 #define __va(x)			((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define MAP_NR(addr)		(__pa(addr) >> PAGE_SHIFT)
+#define PHYSMAP_NR(addr)	((unsigned long)(addr) >> PAGE_SHIFT)
 
 #endif /* __KERNEL__ */
 
diff -urN 2.2.15pre16/include/asm-i386/smplock.h 2.2.15pre16aa3/include/asm-i386/smplock.h
--- 2.2.15pre16/include/asm-i386/smplock.h	Thu Mar 30 01:41:26 2000
+++ 2.2.15pre16aa3/include/asm-i386/smplock.h	Thu Mar 30 16:00:57 2000
@@ -15,8 +15,6 @@
 do { \
 	if (task->lock_depth >= 0) \
 		spin_unlock(&kernel_flag); \
-	release_irqlock(cpu); \
-	__sti(); \
 } while (0)
 
 /*
diff -urN 2.2.15pre16/include/asm-i386/stat.h 2.2.15pre16aa3/include/asm-i386/stat.h
--- 2.2.15pre16/include/asm-i386/stat.h	Mon Jan 17 16:44:44 2000
+++ 2.2.15pre16aa3/include/asm-i386/stat.h	Thu Mar 30 16:00:58 2000
@@ -38,4 +38,40 @@
 	unsigned long  __unused5;
 };
 
+/* This matches struct stat64 in glibc2.1, hence the absolutely
+ * insane amounts of padding around dev_t's.
+ */
+struct stat64 {
+	unsigned short	st_dev;
+	unsigned char	__pad0[10];
+
+	unsigned long	st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+
+	unsigned long	st_uid;
+	unsigned long	st_gid;
+
+	unsigned short	st_rdev;
+	unsigned char	__pad3[10];
+
+	long long	st_size;
+	unsigned long	st_blksize;
+
+	unsigned long	st_blocks;	/* Number 512-byte blocks allocated. */
+	unsigned long	__pad4;		/* future possible st_blocks high bits */
+
+	unsigned long	st_atime;
+	unsigned long	__pad5;
+
+	unsigned long	st_mtime;
+	unsigned long	__pad6;
+
+	unsigned long	st_ctime;
+	unsigned long	__pad7;		/* will be high 32 bits of ctime someday */
+
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+};
+
 #endif
diff -urN 2.2.15pre16/include/asm-i386/unistd.h 2.2.15pre16aa3/include/asm-i386/unistd.h
--- 2.2.15pre16/include/asm-i386/unistd.h	Mon Jan 17 16:44:44 2000
+++ 2.2.15pre16aa3/include/asm-i386/unistd.h	Thu Mar 30 16:00:58 2000
@@ -80,7 +80,7 @@
 #define __NR_sigpending		 73
 #define __NR_sethostname	 74
 #define __NR_setrlimit		 75
-#define __NR_getrlimit		 76
+#define __NR_getrlimit		 76	/* Back compatible 2Gig limited rlimit */
 #define __NR_getrusage		 77
 #define __NR_gettimeofday	 78
 #define __NR_settimeofday	 79
@@ -195,8 +195,15 @@
 #define __NR_getpmsg		188	/* some people actually want streams */
 #define __NR_putpmsg		189	/* some people actually want streams */
 #define __NR_vfork		190
+/* #define __NR_ugetrlimit		191	SuS compliant getrlimit */
+#define __NR_mmap2		192
+#define __NR_truncate64		193
+#define __NR_ftruncate64	194
+#define __NR_stat64		195
+#define __NR_lstat64		196
+#define __NR_fstat64		197
 
-/* user-visible error numbers are in the range -1 - -122: see <asm-i386/errno.h> */
+/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
 #define __syscall_return(type, res) \
 do { \
@@ -269,6 +276,19 @@
 	: "=a" (__res) \
 	: "0" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
 	  "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5))); \
+__syscall_return(type,__res); \
+}
+
+#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
+	  type5,arg5,type6,arg6) \
+type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5,type6 arg6) \
+{ \
+long __res; \
+__asm__ volatile ("push %%ebp ; movl %%eax,%%ebp ; movl %1,%%eax ; int $0x80 ; pop %%ebp" \
+	: "=a" (__res) \
+	: "i" (__NR_##name),"b" ((long)(arg1)),"c" ((long)(arg2)), \
+	  "d" ((long)(arg3)),"S" ((long)(arg4)),"D" ((long)(arg5)), \
+	  "0" ((long)(arg6))); \
 __syscall_return(type,__res); \
 }
 
diff -urN 2.2.15pre16/include/asm-m68k/fcntl.h 2.2.15pre16aa3/include/asm-m68k/fcntl.h
--- 2.2.15pre16/include/asm-m68k/fcntl.h	Mon Jan 17 16:44:44 2000
+++ 2.2.15pre16aa3/include/asm-m68k/fcntl.h	Thu Mar 30 16:00:58 2000
@@ -33,6 +33,10 @@
 #define F_SETSIG	10	/*  for sockets. */
 #define F_GETSIG	11	/*  for sockets. */
 
+#define F_GETLK64	12	/*  using 'struct flock64' */
+#define F_SETLK64	13
+#define F_SETLKW64	14
+
 /* for F_[GET|SET]FL */
 #define FD_CLOEXEC	1	/* actually anything with low bit set goes */
 
@@ -58,6 +62,14 @@
 	off_t l_start;
 	off_t l_len;
 	pid_t l_pid;
+};
+
+struct flock64 {
+	short  l_type;
+	short  l_whence;
+	loff_t l_start;
+	loff_t l_len;
+	pid_t  l_pid;
 };
 
 #endif /* _M68K_FCNTL_H */
diff -urN 2.2.15pre16/include/asm-m68k/smplock.h 2.2.15pre16aa3/include/asm-m68k/smplock.h
--- 2.2.15pre16/include/asm-m68k/smplock.h	Mon Jan 17 16:44:45 2000
+++ 2.2.15pre16aa3/include/asm-m68k/smplock.h	Thu Mar 30 16:00:57 2000
@@ -15,8 +15,6 @@
 do { \
 	if (task->lock_depth >= 0) \
 		spin_unlock(&kernel_flag); \
-	release_irqlock(cpu); \
-	__sti(); \
 } while (0)
 
 /*
diff -urN 2.2.15pre16/include/asm-m68k/stat.h 2.2.15pre16aa3/include/asm-m68k/stat.h
--- 2.2.15pre16/include/asm-m68k/stat.h	Mon Jan 17 16:44:45 2000
+++ 2.2.15pre16aa3/include/asm-m68k/stat.h	Thu Mar 30 16:00:58 2000
@@ -38,4 +38,8 @@
 	unsigned long  __unused5;
 };
 
+/* stat64 struct goes here -- someone please make
+ * it mesh with whatever glibc does in userland on
+ * m68k's.
+ */
 #endif /* _M68K_STAT_H */
diff -urN 2.2.15pre16/include/asm-m68k/unistd.h 2.2.15pre16aa3/include/asm-m68k/unistd.h
--- 2.2.15pre16/include/asm-m68k/unistd.h	Mon Jan 17 16:44:45 2000
+++ 2.2.15pre16aa3/include/asm-m68k/unistd.h	Thu Mar 30 16:00:58 2000
@@ -80,7 +80,7 @@
 #define __NR_sigpending		 73
 #define __NR_sethostname	 74
 #define __NR_setrlimit		 75
-#define __NR_getrlimit		 76
+#define __NR_getrlimit	 	 76
 #define __NR_getrusage		 77
 #define __NR_gettimeofday	 78
 #define __NR_settimeofday	 79
@@ -194,6 +194,13 @@
 #define __NR_getpmsg		188	/* some people actually want streams */
 #define __NR_putpmsg		189	/* some people actually want streams */
 #define __NR_vfork		190
+/* #define __NR_getrlimit		191 */
+#define __NR_mmap2		192
+#define __NR_truncate64		193
+#define __NR_ftruncate64	194
+#define __NR_stat64		195
+#define __NR_lstat64		196
+#define __NR_fstat64		197
 
 /* user-visible error numbers are in the range -1 - -122: see
    <asm-m68k/errno.h> */
diff -urN 2.2.15pre16/include/asm-mips/fcntl.h 2.2.15pre16aa3/include/asm-mips/fcntl.h
--- 2.2.15pre16/include/asm-mips/fcntl.h	Mon Jan 17 16:44:45 2000
+++ 2.2.15pre16aa3/include/asm-mips/fcntl.h	Thu Mar 30 16:00:58 2000
@@ -44,6 +44,10 @@
 #define F_SETSIG	10	/*  for sockets. */
 #define F_GETSIG	11	/*  for sockets. */
 
+#define F_GETLK64	40	/*  using 'struct flock64' */
+#define F_SETLK64	41
+#define F_SETLKW64	42
+
 /* for F_[GET|SET]FL */
 #define FD_CLOEXEC	1	/* actually anything with low bit set goes */
 
@@ -72,5 +76,13 @@
 	__kernel_pid_t l_pid;
 	long  pad[4];			/* ZZZZZZZZZZZZZZZZZZZZZZZZZZ */
 } flock_t;
+
+typedef struct flock64 {
+	short  l_type;
+	short  l_whence;
+	loff_t l_start;
+	loff_t l_len;
+	pid_t  l_pid;
+} flock64_t;
 
 #endif /* __ASM_MIPS_FCNTL_H */
diff -urN 2.2.15pre16/include/asm-mips/smplock.h 2.2.15pre16aa3/include/asm-mips/smplock.h
--- 2.2.15pre16/include/asm-mips/smplock.h	Mon Jan 17 16:44:45 2000
+++ 2.2.15pre16aa3/include/asm-mips/smplock.h	Thu Mar 30 16:00:57 2000
@@ -18,8 +18,6 @@
 do { \
 	if (task->lock_depth >= 0) \
 		spin_unlock(&kernel_flag); \
-	release_irqlock(cpu); \
-	__sti(); \
 } while (0)
 
 /*
diff -urN 2.2.15pre16/include/asm-ppc/fcntl.h 2.2.15pre16aa3/include/asm-ppc/fcntl.h
--- 2.2.15pre16/include/asm-ppc/fcntl.h	Mon Jan 17 16:44:45 2000
+++ 2.2.15pre16aa3/include/asm-ppc/fcntl.h	Thu Mar 30 16:00:58 2000
@@ -18,6 +18,8 @@
 #define FASYNC		020000	/* fcntl, for BSD compatibility */
 #define O_DIRECTORY	040000	/* must be a directory */
 #define O_NOFOLLOW	0100000	/* don't follow links */
+#define O_LARGEFILE     0200000
+#define O_DIRECT	0400000	/* direct disk access hint - currently ignored */
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get f_flags */
@@ -33,6 +35,10 @@
 #define F_SETSIG	10	/*  for sockets. */
 #define F_GETSIG	11	/*  for sockets. */
 
+#define F_GETLK64	12	/*  using 'struct flock64' */
+#define F_SETLK64	13
+#define F_SETLKW64	14
+
 /* for F_[GET|SET]FL */
 #define FD_CLOEXEC	1	/* actually anything with low bit set goes */
 
@@ -64,6 +70,14 @@
 	off_t l_start;
 	off_t l_len;
 	pid_t l_pid;
+};
+
+struct flock64 {
+	short  l_type;
+	short  l_whence;
+	loff_t l_start;
+	loff_t l_len;
+	pid_t  l_pid;
 };
 
 #endif
diff -urN 2.2.15pre16/include/asm-ppc/smplock.h 2.2.15pre16aa3/include/asm-ppc/smplock.h
--- 2.2.15pre16/include/asm-ppc/smplock.h	Mon Jan 17 16:44:45 2000
+++ 2.2.15pre16aa3/include/asm-ppc/smplock.h	Thu Mar 30 16:00:57 2000
@@ -15,8 +15,6 @@
 do { \
 	if (task->lock_depth >= 0) \
 		spin_unlock(&kernel_flag); \
-	release_irqlock(cpu); \
-	__sti(); \
 } while (0)
 
 /*
diff -urN 2.2.15pre16/include/asm-s390/smplock.h 2.2.15pre16aa3/include/asm-s390/smplock.h
--- 2.2.15pre16/include/asm-s390/smplock.h	Mon Jan 17 16:44:45 2000
+++ 2.2.15pre16aa3/include/asm-s390/smplock.h	Thu Mar 30 16:00:57 2000
@@ -18,8 +18,6 @@
 do {                                       \
 	if (task->lock_depth >= 0)         \
 		spin_unlock(&kernel_flag); \
-	release_irqlock(cpu);              \
-	__sti();                           \
 } while (0)
 
 /*
diff -urN 2.2.15pre16/include/asm-sparc/fcntl.h 2.2.15pre16aa3/include/asm-sparc/fcntl.h
--- 2.2.15pre16/include/asm-sparc/fcntl.h	Mon Jan 17 16:44:46 2000
+++ 2.2.15pre16aa3/include/asm-sparc/fcntl.h	Thu Mar 30 16:00:58 2000
@@ -19,6 +19,7 @@
 #define O_NOCTTY	0x8000	/* not fcntl */
 #define O_DIRECTORY	0x10000	/* must be a directory */
 #define O_NOFOLLOW	0x20000	/* don't follow links */
+#define O_LARGEFILE	0x40000	/* LFS */
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get f_flags */
@@ -32,6 +33,9 @@
 #define F_SETLKW	9
 #define F_SETSIG	10	/*  for sockets. */
 #define F_GETSIG	11	/*  for sockets. */
+#define F_GETLK64	12
+#define F_SETLK64	13
+#define F_SETLKW64	14
 
 /* for F_[GET|SET]FL */
 #define FD_CLOEXEC	1	/* actually anything with low bit set goes */
@@ -57,6 +61,15 @@
 	short l_whence;
 	off_t l_start;
 	off_t l_len;
+	pid_t l_pid;
+	short __unused;
+};
+
+struct flock64 {
+	short l_type;
+	short l_whence;
+	loff_t l_start;
+	loff_t l_len;
 	pid_t l_pid;
 	short __unused;
 };
diff -urN 2.2.15pre16/include/asm-sparc/smplock.h 2.2.15pre16aa3/include/asm-sparc/smplock.h
--- 2.2.15pre16/include/asm-sparc/smplock.h	Mon Jan 17 16:44:46 2000
+++ 2.2.15pre16aa3/include/asm-sparc/smplock.h	Thu Mar 30 16:00:57 2000
@@ -15,8 +15,6 @@
 do { \
 	if (task->lock_depth >= 0) \
 		spin_unlock(&kernel_flag); \
-	release_irqlock(cpu); \
-	__sti(); \
 } while (0)
 
 /*
diff -urN 2.2.15pre16/include/asm-sparc/stat.h 2.2.15pre16aa3/include/asm-sparc/stat.h
--- 2.2.15pre16/include/asm-sparc/stat.h	Mon Jan 17 16:44:46 2000
+++ 2.2.15pre16aa3/include/asm-sparc/stat.h	Thu Mar 30 16:00:58 2000
@@ -1,4 +1,4 @@
-/* $Id: stat.h,v 1.9 1998/07/26 05:24:39 davem Exp $ */
+/* $Id: stat.h,v 1.10 1999/12/21 14:09:41 jj Exp $ */
 #ifndef _SPARC_STAT_H
 #define _SPARC_STAT_H
 
@@ -36,6 +36,42 @@
 	off_t   st_blksize;
 	off_t   st_blocks;
 	unsigned long  __unused4[2];
+};
+
+struct stat64 {
+	unsigned char	__pad0[6];
+	unsigned short	st_dev;
+	unsigned char	__pad1[4];
+
+	unsigned int	st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+
+	unsigned char	__pad2[6];
+	unsigned short	st_rdev;
+
+	unsigned char	__pad3[8];
+
+	long long	st_size;
+	unsigned int	st_blksize;
+
+	unsigned char	__pad4[8];
+	unsigned int	st_blocks;
+
+	unsigned int	st_atime;
+	unsigned int	__unused1;
+
+	unsigned int	st_mtime;
+	unsigned int	__unused2;
+
+	unsigned int	st_ctime;
+	unsigned int	__unused3;
+
+	unsigned int	__unused4;
+	unsigned int	__unused5;
 };
 
 #endif
diff -urN 2.2.15pre16/include/asm-sparc/unistd.h 2.2.15pre16aa3/include/asm-sparc/unistd.h
--- 2.2.15pre16/include/asm-sparc/unistd.h	Mon Jan 17 16:44:46 2000
+++ 2.2.15pre16aa3/include/asm-sparc/unistd.h	Thu Mar 30 16:00:58 2000
@@ -71,14 +71,14 @@
 /* #define __NR_mctl             53    SunOS specific                              */
 #define __NR_ioctl               54 /* Common                                      */
 #define __NR_reboot              55 /* Common                                      */
-/* #define __NR_ni_syscall       56    ENOSYS under SunOS                          */
+#define __NR_mmap2		 56 /* Linux sparc32 Specific			   */
 #define __NR_symlink             57 /* Common                                      */
 #define __NR_readlink            58 /* Common                                      */
 #define __NR_execve              59 /* Common                                      */
 #define __NR_umask               60 /* Common                                      */
 #define __NR_chroot              61 /* Common                                      */
 #define __NR_fstat               62 /* Common                                      */
-/* #define __NR_ni_syscall       63    ENOSYS under SunOS                          */
+#define __NR_fstat64		 63 /* Linux sparc32 Specific			   */
 #define __NR_getpagesize         64 /* Common                                      */
 #define __NR_msync               65 /* Common in newer 1.3.x revs...               */
 #define __NR_vfork               66 /* Common                                      */
@@ -92,14 +92,14 @@
 #define __NR_mprotect            74 /* Common                                      */
 /* #define __NR_madvise          75    SunOS Specific                              */
 #define __NR_vhangup             76 /* Common                                      */
-/* #define __NR_ni_syscall       77    ENOSYS under SunOS                          */
+#define __NR_truncate64		 77 /* Linux sparc32 Specific			   */
 /* #define __NR_mincore          78    SunOS Specific                              */
 #define __NR_getgroups           79 /* Common                                      */
 #define __NR_setgroups           80 /* Common                                      */
 #define __NR_getpgrp             81 /* Common                                      */
 /* #define __NR_setpgrp          82    setpgid, same difference...                 */
 #define __NR_setitimer           83 /* Common                                      */
-/* #define __NR_ni_syscall       84    ENOSYS under SunOS                          */
+#define __NR_ftruncate64	 84 /* Linux sparc32 Specific			   */
 #define __NR_swapon              85 /* Common                                      */
 #define __NR_getitimer           86 /* Common                                      */
 /* #define __NR_gethostname      87    SunOS Specific                              */
@@ -147,14 +147,14 @@
 #define __NR_truncate           129 /* Common                                      */
 #define __NR_ftruncate          130 /* Common                                      */
 #define __NR_flock              131 /* Common                                      */
-/* #define __NR_ni_syscall      132    ENOSYS under SunOS                          */
+#define __NR_lstat64		132 /* Linux sparc32 Specific			   */
 #define __NR_sendto             133 /* Common                                      */
 #define __NR_shutdown           134 /* Common                                      */
 #define __NR_socketpair         135 /* Common                                      */
 #define __NR_mkdir              136 /* Common                                      */
 #define __NR_rmdir              137 /* Common                                      */
 #define __NR_utimes             138 /* SunOS Specific                              */
-/* #define __NR_ni_syscall      139    ENOSYS under SunOS                          */
+#define __NR_stat64		139 /* Linux sparc32 Specific			   */
 /* #define __NR_adjtime         140    SunOS Specific                              */
 #define __NR_getpeername        141 /* Common                                      */
 /* #define __NR_gethostid       142    SunOS Specific                              */
diff -urN 2.2.15pre16/include/asm-sparc64/fcntl.h 2.2.15pre16aa3/include/asm-sparc64/fcntl.h
--- 2.2.15pre16/include/asm-sparc64/fcntl.h	Mon Jan 17 16:44:46 2000
+++ 2.2.15pre16aa3/include/asm-sparc64/fcntl.h	Thu Mar 30 16:00:58 2000
@@ -19,6 +19,7 @@
 #define O_NOCTTY	0x8000	/* not fcntl */
 #define O_DIRECTORY	0x10000	/* must be a directory */
 #define O_NOFOLLOW	0x20000	/* don't follow links */
+#define O_LARGEFILE	0x40000
 
 #define F_DUPFD		0	/* dup */
 #define F_GETFD		1	/* get f_flags */
@@ -32,6 +33,11 @@
 #define F_SETLKW	9
 #define F_SETSIG	10	/*  for sockets. */
 #define F_GETSIG	11	/*  for sockets. */
+#ifdef __KERNEL__
+#define F_GETLK64	12
+#define F_SETLK64	13
+#define F_SETLKW64	14
+#endif
 
 /* for F_[GET|SET]FL */
 #define FD_CLOEXEC	1	/* actually anything with low bit set goes */
@@ -58,7 +64,6 @@
 	off_t l_start;
 	off_t l_len;
 	pid_t l_pid;
-	short __unused;
 };
 
 #ifdef __KERNEL__
@@ -70,6 +75,17 @@
 	__kernel_pid_t32 l_pid;
 	short __unused;
 };
+
+struct flock32_64 {
+	short l_type;
+	short l_whence;
+	__kernel_loff_t32 l_start;
+	__kernel_loff_t32 l_len;
+	__kernel_pid_t32 l_pid;
+	short __unused;
+};
+
+#define flock64 flock
 #endif
 
 #endif /* !(_SPARC64_FCNTL_H) */
diff -urN 2.2.15pre16/include/asm-sparc64/smplock.h 2.2.15pre16aa3/include/asm-sparc64/smplock.h
--- 2.2.15pre16/include/asm-sparc64/smplock.h	Mon Jan 17 16:44:46 2000
+++ 2.2.15pre16aa3/include/asm-sparc64/smplock.h	Thu Mar 30 16:00:57 2000
@@ -16,8 +16,6 @@
 do { \
 	if (task->lock_depth >= 0) \
 		spin_unlock(&kernel_flag); \
-	release_irqlock(cpu); \
-	__sti(); \
 } while (0)
 
 /*
diff -urN 2.2.15pre16/include/asm-sparc64/stat.h 2.2.15pre16aa3/include/asm-sparc64/stat.h
--- 2.2.15pre16/include/asm-sparc64/stat.h	Mon Jan 17 16:44:46 2000
+++ 2.2.15pre16aa3/include/asm-sparc64/stat.h	Thu Mar 30 16:00:58 2000
@@ -1,4 +1,4 @@
-/* $Id: stat.h,v 1.5 1998/07/26 05:24:41 davem Exp $ */
+/* $Id: stat.h,v 1.6 1999/12/21 14:09:48 jj Exp $ */
 #ifndef _SPARC64_STAT_H
 #define _SPARC64_STAT_H
 
@@ -41,5 +41,46 @@
 	off_t   st_blocks;
 	unsigned long  __unused4[2];
 };
+
+#ifdef __KERNEL__
+/* This is sparc32 stat64 structure. */
+
+struct stat64 {
+	unsigned char	__pad0[6];
+	unsigned short	st_dev;
+	unsigned char	__pad1[4];
+
+	unsigned int	st_ino;
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+
+	unsigned char	__pad2[6];
+	unsigned short	st_rdev;
+
+	unsigned char	__pad3[8];
+
+	long long	st_size;
+	unsigned int	st_blksize;
+
+	unsigned char	__pad4[8];
+	unsigned int	st_blocks;
+
+	unsigned int	st_atime;
+	unsigned int	__unused1;
+
+	unsigned int	st_mtime;
+	unsigned int	__unused2;
+
+	unsigned int	st_ctime;
+	unsigned int	__unused3;
+
+	unsigned int	__unused4;
+	unsigned int	__unused5;
+};
+
+#endif
 
 #endif
diff -urN 2.2.15pre16/include/asm-sparc64/unistd.h 2.2.15pre16aa3/include/asm-sparc64/unistd.h
--- 2.2.15pre16/include/asm-sparc64/unistd.h	Mon Jan 17 16:44:46 2000
+++ 2.2.15pre16aa3/include/asm-sparc64/unistd.h	Thu Mar 30 16:00:58 2000
@@ -71,14 +71,14 @@
 /* #define __NR_mctl             53    SunOS specific                              */
 #define __NR_ioctl               54 /* Common                                      */
 #define __NR_reboot              55 /* Common                                      */
-/* #define __NR_ni_syscall       56    ENOSYS under SunOS                          */
+/* #define __NR_mmap2		 56    Linux sparc32 Specific                      */
 #define __NR_symlink             57 /* Common                                      */
 #define __NR_readlink            58 /* Common                                      */
 #define __NR_execve              59 /* Common                                      */
 #define __NR_umask               60 /* Common                                      */
 #define __NR_chroot              61 /* Common                                      */
 #define __NR_fstat               62 /* Common                                      */
-/* #define __NR_ni_syscall       63    ENOSYS under SunOS                          */
+/* #define __NR_fstat64          63    Linux sparc32 Specific                      */
 #define __NR_getpagesize         64 /* Common                                      */
 #define __NR_msync               65 /* Common in newer 1.3.x revs...               */
 #define __NR_vfork               66 /* Common                                      */
@@ -92,14 +92,14 @@
 #define __NR_mprotect            74 /* Common                                      */
 /* #define __NR_madvise          75    SunOS Specific                              */
 #define __NR_vhangup             76 /* Common                                      */
-/* #define __NR_ni_syscall       77    ENOSYS under SunOS                          */
+/* #define __NR_truncate64       77    Linux sparc32 Specific			   */
 /* #define __NR_mincore          78    SunOS Specific                              */
 #define __NR_getgroups           79 /* Common                                      */
 #define __NR_setgroups           80 /* Common                                      */
 #define __NR_getpgrp             81 /* Common                                      */
 /* #define __NR_setpgrp          82    setpgid, same difference...                 */
 #define __NR_setitimer           83 /* Common                                      */
-/* #define __NR_ni_syscall       84    ENOSYS under SunOS                          */
+/* #define __NR_ftruncate64      84    Linux sparc32 Specific			   */
 #define __NR_swapon              85 /* Common                                      */
 #define __NR_getitimer           86 /* Common                                      */
 /* #define __NR_gethostname      87    SunOS Specific                              */
@@ -147,19 +147,19 @@
 #define __NR_truncate           129 /* Common                                      */
 #define __NR_ftruncate          130 /* Common                                      */
 #define __NR_flock              131 /* Common                                      */
-/* #define __NR_ni_syscall      132    ENOSYS under SunOS                          */
+/* #define __NR_lstat64		132    Linux sparc32 Specific                      */
 #define __NR_sendto             133 /* Common                                      */
 #define __NR_shutdown           134 /* Common                                      */
 #define __NR_socketpair         135 /* Common                                      */
 #define __NR_mkdir              136 /* Common                                      */
 #define __NR_rmdir              137 /* Common                                      */
 #define __NR_utimes             138 /* SunOS Specific                              */
-/* #define __NR_ni_syscall      139    ENOSYS under SunOS                          */
+/* #define __NR_stat64		139    Linux sparc32 Specific			   */
 /* #define __NR_adjtime         140    SunOS Specific                              */
 #define __NR_getpeername        141 /* Common                                      */
 /* #define __NR_gethostid       142    SunOS Specific                              */
 /* #define __NR_ni_syscall      143    ENOSYS under SunOS                          */
-#define __NR_getrlimit          144 /* Common                                      */
+#define __NR_getrlimit		144 /* Common                                      */
 #define __NR_setrlimit          145 /* Common                                      */
 /* #define __NR_killpg          146    SunOS Specific                              */
 #define __NR_prctl		147 /* ENOSYS under SunOS                          */
diff -urN 2.2.15pre16/include/linux/bigmem.h 2.2.15pre16aa3/include/linux/bigmem.h
--- 2.2.15pre16/include/linux/bigmem.h	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/include/linux/bigmem.h	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,50 @@
+#ifndef _LINUX_BIGMEM_H
+#define _LINUX_BIGMEM_H
+
+#include <linux/config.h>
+
+#ifdef CONFIG_BIGMEM
+
+#include <asm/bigmem.h>
+
+/* declarations for linux/mm/bigmem.c */
+extern unsigned long bigmem_mapnr;
+extern int nr_free_bigpages;
+
+extern struct page * prepare_bigmem_swapout(struct page *);
+extern struct page * replace_with_bigmem(struct page *);
+extern unsigned long prepare_bigmem_shm_swapin(unsigned long);
+
+#else /* CONFIG_BIGMEM */
+
+#define prepare_bigmem_swapout(page) page
+#define replace_with_bigmem(page) page
+#define prepare_bigmem_shm_swapin(page) page
+#define kmap(kaddr, type) kaddr
+#define kunmap(vaddr, type) do { } while (0)
+#define nr_free_bigpages 0
+
+#endif /* CONFIG_BIGMEM */
+
+/* when CONFIG_BIGMEM is not set these will be plain clear/copy_page */
+extern inline void clear_bigpage(unsigned long kaddr)
+{
+	unsigned long vaddr;
+
+	vaddr = kmap(kaddr, KM_WRITE);
+	clear_page(vaddr);
+	kunmap(vaddr, KM_WRITE);
+}
+
+extern inline void copy_bigpage(unsigned long to, unsigned long from)
+{
+	unsigned long vfrom, vto;
+
+	vfrom = kmap(from, KM_READ);
+	vto = kmap(to, KM_WRITE);
+	copy_page(vto, vfrom);
+	kunmap(vfrom, KM_READ);
+	kunmap(vto, KM_WRITE);
+}
+
+#endif /* _LINUX_BIGMEM_H */
diff -urN 2.2.15pre16/include/linux/blkdev.h 2.2.15pre16aa3/include/linux/blkdev.h
--- 2.2.15pre16/include/linux/blkdev.h	Wed Mar 29 14:57:46 2000
+++ 2.2.15pre16aa3/include/linux/blkdev.h	Thu Mar 30 16:00:56 2000
@@ -32,11 +32,38 @@
 	struct buffer_head * bh;
 	struct buffer_head * bhtail;
 	struct request * next;
+	int elevator_latency;
 };
 
 typedef void (request_fn_proc) (void);
 typedef struct request ** (queue_proc) (kdev_t dev);
 
+typedef struct elevator_s
+{
+	int read_latency;
+	int write_latency;
+	int max_bomb_segments;
+} elevator_t;
+
+#define ELEVATOR_DEFAULTS				\
+((elevator_t) {						\
+	128,			/* read_latency */	\
+	8192,			/* write_latency */	\
+	4,			/* max_bomb_segments */	\
+	})
+
+extern int blkelv_ioctl(kdev_t, unsigned long, unsigned long);
+
+typedef struct blkelv_ioctl_arg_s {
+	void * queue_ID;
+	int read_latency;
+	int write_latency;
+	int max_bomb_segments;
+} blkelv_ioctl_arg_t;
+
+#define BLKELVGET   _IO(0x12,106)
+#define BLKELVSET   _IO(0x12,107)
+
 struct blk_dev_struct {
 	request_fn_proc		*request_fn;
 	/*
@@ -47,6 +74,8 @@
 	struct request		*current_request;
 	struct request   plug;
 	struct tq_struct plug_tq;
+
+	elevator_t elevator;
 };
 
 struct sec_size {
diff -urN 2.2.15pre16/include/linux/dcache.h 2.2.15pre16aa3/include/linux/dcache.h
--- 2.2.15pre16/include/linux/dcache.h	Fri Jan  7 18:19:21 2000
+++ 2.2.15pre16aa3/include/linux/dcache.h	Thu Mar 30 16:00:56 2000
@@ -143,7 +143,7 @@
 /* dcache memory management */
 extern void shrink_dcache_memory(int, unsigned int);
 extern void check_dcache_memory(void);
-extern void free_inode_memory(int);	/* defined in fs/inode.c */
+extern void free_inode_memory(void);	/* defined in fs/inode.c */
 
 /* only used at mount-time */
 extern struct dentry * d_alloc_root(struct inode * root_inode, struct dentry * old_root);
diff -urN 2.2.15pre16/include/linux/ext2_fs_i.h 2.2.15pre16aa3/include/linux/ext2_fs_i.h
--- 2.2.15pre16/include/linux/ext2_fs_i.h	Tue Feb  1 18:24:19 2000
+++ 2.2.15pre16aa3/include/linux/ext2_fs_i.h	Thu Mar 30 16:00:58 2000
@@ -35,7 +35,6 @@
 	__u32	i_next_alloc_goal;
 	__u32	i_prealloc_block;
 	__u32	i_prealloc_count;
-	__u32	i_high_size;
 	int	i_new_inode:1;	/* Is a freshly allocated inode */
 };
 
diff -urN 2.2.15pre16/include/linux/fs.h 2.2.15pre16aa3/include/linux/fs.h
--- 2.2.15pre16/include/linux/fs.h	Wed Mar 29 14:57:44 2000
+++ 2.2.15pre16aa3/include/linux/fs.h	Thu Mar 30 16:00:58 2000
@@ -154,6 +154,10 @@
 #define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
 #define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
 #define BLKSSZGET  _IO(0x12,104) /* get block device sector size */
+#if 0
+#define BLKELVGET   _IO(0x12,106)
+#define BLKELVSET   _IO(0x12,107)
+#endif
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
@@ -256,6 +260,25 @@
 #define buffer_page(bh)		(mem_map + MAP_NR((bh)->b_data))
 #define touch_buffer(bh)	set_bit(PG_referenced, &buffer_page(bh)->flags)
 
+/* log of base-2 for filesystem uses, in case their super-blocks
+   don't have the shift counts readily calculated.. -- presuming
+   the divisors in question are power-of-two values! */
+static int fslog2(unsigned long val) __attribute__ ((const));
+static __inline__ int fslog2(unsigned long val)
+{
+	int i;
+	for (i = 0; val != 0; ++i, val >>= 1) {
+	  if (val & 1) return i;
+	}
+	return 0;
+}
+
+static int off_t_presentable(loff_t) __attribute((const));
+static __inline__ int off_t_presentable(loff_t loff)
+{
+	return ((unsigned long long)loff < (long)(~0UL >> 1));
+}
+
 #include <linux/pipe_fs_i.h>
 #include <linux/minix_fs_i.h>
 #include <linux/ext2_fs_i.h>
@@ -306,7 +329,7 @@
 	umode_t		ia_mode;
 	uid_t		ia_uid;
 	gid_t		ia_gid;
-	off_t		ia_size;
+	loff_t		ia_size;
 	time_t		ia_atime;
 	time_t		ia_mtime;
 	time_t		ia_ctime;
@@ -341,7 +364,7 @@
 	uid_t			i_uid;
 	gid_t			i_gid;
 	kdev_t			i_rdev;
-	off_t			i_size;
+	loff_t			i_size;
 	time_t			i_atime;
 	time_t			i_mtime;
 	time_t			i_ctime;
@@ -418,7 +441,7 @@
 	mode_t			f_mode;
 	loff_t			f_pos;
 	unsigned int 		f_count, f_flags;
-	unsigned long 		f_reada, f_ramax, f_raend, f_ralen, f_rawin;
+	loff_t			f_reada, f_ramax, f_raend, f_ralen, f_rawin;
 	struct fown_struct	f_owner;
 	unsigned int		f_uid, f_gid;
 	int			f_error;
@@ -458,8 +481,8 @@
 	struct file *fl_file;
 	unsigned char fl_flags;
 	unsigned char fl_type;
-	off_t fl_start;
-	off_t fl_end;
+	loff_t fl_start;
+	loff_t fl_end;
 
 	void (*fl_notify)(struct file_lock *);	/* unblock callback */
 
@@ -475,6 +498,9 @@
 extern int fcntl_getlk(unsigned int fd, struct flock *l);
 extern int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l);
 
+extern int fcntl_getlk64(unsigned int fd, struct flock64 *l);
+extern int fcntl_setlk64(unsigned int fd, unsigned int cmd, struct flock64 *l);
+
 /* fs/locks.c */
 extern void locks_remove_posix(struct file *, fl_owner_t id);
 extern void locks_remove_flock(struct file *);
@@ -696,7 +722,7 @@
 
 asmlinkage int sys_open(const char *, int, int);
 asmlinkage int sys_close(unsigned int);		/* yes, it's really unsigned */
-extern int do_truncate(struct dentry *, unsigned long);
+extern int do_truncate(struct dentry *, loff_t);
 extern int get_unused_fd(void);
 extern void put_unused_fd(unsigned int);
 
@@ -759,9 +785,18 @@
 #define BUF_CLEAN	0
 #define BUF_LOCKED	1	/* Buffers scheduled for write */
 #define BUF_DIRTY	2	/* Dirty buffers, not yet scheduled for write */
-#define NR_LIST		3
+#define BUF_PROTECTED	3	/* Ramdisk persistent storage */
+#define NR_LIST		4
 
 void mark_buffer_uptodate(struct buffer_head * bh, int on);
+
+extern inline void mark_buffer_protected(struct buffer_head * bh)
+{
+	if (!test_and_set_bit(BH_Protected, &bh->b_state)) {
+		if (bh->b_list != BUF_PROTECTED)
+			refile_buffer(bh);
+	}
+}
 
 extern inline void mark_buffer_clean(struct buffer_head * bh)
 {
diff -urN 2.2.15pre16/include/linux/iobuf.h 2.2.15pre16aa3/include/linux/iobuf.h
--- 2.2.15pre16/include/linux/iobuf.h	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/include/linux/iobuf.h	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,82 @@
+/*
+ * iobuf.h
+ *
+ * Defines the structures used to track abstract kernel-space io buffers.
+ *
+ */
+
+#ifndef __LINUX_IOBUF_H
+#define __LINUX_IOBUF_H
+
+#include <linux/mm.h>
+#include <linux/init.h>
+
+/*
+ * The kiobuf structure describes a physical set of pages reserved
+ * locked for IO.  The reference counts on each page will have been
+ * incremented, and the flags field will indicate whether or not we have
+ * pre-locked all of the pages for IO.
+ *
+ * kiobufs may be passed in arrays to form a kiovec, but we must
+ * preserve the property that no page is present more than once over the
+ * entire iovec.
+ */
+
+#define KIO_MAX_ATOMIC_IO	64 /* in kb */
+#define KIO_MAX_ATOMIC_BYTES	(64 * 1024)
+#define KIO_STATIC_PAGES	(KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10))
+#define KIO_MAX_SECTORS		(KIO_MAX_ATOMIC_IO * 2)
+
+struct kiobuf 
+{
+	int		nr_pages;	/* Pages actually referenced */
+	int		array_len;	/* Space in the allocated lists */
+	int		offset;		/* Offset to start of valid data */
+	int		length;		/* Number of valid bytes of data */
+
+	/* Keep separate track of the physical addresses and page
+	 * structs involved.  If we do IO to a memory-mapped device
+	 * region, there won't necessarily be page structs defined for
+	 * every address. */
+
+	unsigned long *	pagelist;
+	struct page **	maplist;
+	unsigned long *	bouncelist;
+
+	unsigned int	locked : 1;	/* If set, pages has been locked */
+	unsigned int	bounced : 1;	/* If set, bounce pages are set up */
+	
+	/* Always embed enough struct pages for 64k of IO */
+	unsigned long	page_array[KIO_STATIC_PAGES];
+	struct page *	map_array[KIO_STATIC_PAGES];
+	unsigned long	bounce_array[KIO_STATIC_PAGES];
+};
+
+
+/* mm/memory.c */
+
+int	map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len);
+void	unmap_kiobuf(struct kiobuf *iobuf);
+
+/* fs/iobuf.c */
+
+void __init kiobuf_init(void);
+int	alloc_kiovec(int nr, struct kiobuf **);
+void	free_kiovec(int nr, struct kiobuf **);
+int	expand_kiobuf(struct kiobuf *, int);
+int	setup_kiobuf_bounce_pages(struct kiobuf *, int gfp_mask);
+void	clear_kiobuf_bounce_pages(struct kiobuf *);
+void	kiobuf_copy_bounce(struct kiobuf *, int direction, int max);
+
+/* Direction codes for kiobuf_copy_bounce: */
+enum {
+	COPY_TO_BOUNCE,
+	COPY_FROM_BOUNCE
+};
+
+/* fs/buffer.c */
+
+int	brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
+		   kdev_t dev, unsigned long b[], int size);
+
+#endif /* __LINUX_IOBUF_H */
diff -urN 2.2.15pre16/include/linux/kernel.h 2.2.15pre16aa3/include/linux/kernel.h
--- 2.2.15pre16/include/linux/kernel.h	Thu Feb  3 17:56:05 2000
+++ 2.2.15pre16aa3/include/linux/kernel.h	Thu Mar 30 16:00:57 2000
@@ -90,7 +90,9 @@
 	unsigned long totalswap;	/* Total swap space size */
 	unsigned long freeswap;		/* swap space still available */
 	unsigned short procs;		/* Number of current processes */
-	char _f[22];			/* Pads structure to 64 bytes */
+	unsigned long totalbig;		/* Total big memory size */
+	unsigned long freebig;		/* Available big memory size */
+	char _f[20-2*sizeof(long)];	/* Padding: libc5 uses this.. */
 };
 
 #endif
diff -urN 2.2.15pre16/include/linux/lvm.h 2.2.15pre16aa3/include/linux/lvm.h
--- 2.2.15pre16/include/linux/lvm.h	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/include/linux/lvm.h	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,827 @@
+/*
+ * kernel/lvm.h
+ *
+ * Copyright (C) 1997 - 2000  Heinz Mauelshagen, Germany
+ *
+ * February-November 1997
+ * May-July 1998
+ * January-March,July,September,October,Dezember 1999
+ * January 2000
+ *
+ * lvm is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ * 
+ * lvm is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA. 
+ *
+ */
+
+/*
+ * Changelog
+ *
+ *    10/10/1997 - beginning of new structure creation
+ *    12/05/1998 - incorporated structures from lvm_v1.h and deleted lvm_v1.h
+ *    07/06/1998 - avoided LVM_KMALLOC_MAX define by using vmalloc/vfree
+ *                 instead of kmalloc/kfree
+ *    01/07/1998 - fixed wrong LVM_MAX_SIZE
+ *    07/07/1998 - extended pe_t structure by ios member (for statistic)
+ *    02/08/1998 - changes for official char/block major numbers
+ *    07/08/1998 - avoided init_module() and cleanup_module() to be static
+ *    29/08/1998 - seprated core and disk structure type definitions
+ *    01/09/1998 - merged kernel integration version (mike)
+ *    20/01/1999 - added LVM_PE_DISK_OFFSET macro for use in
+ *                 vg_read_with_pv_and_lv(), pv_move_pe(), pv_show_pe_text()...
+ *    18/02/1999 - added definition of time_disk_t structure for;
+ *                 keeps time stamps on disk for nonatomic writes (future)
+ *    15/03/1999 - corrected LV() and VG() macro definition to use argument
+ *                 instead of minor
+ *    03/07/1999 - define for genhd.c name handling
+ *    23/07/1999 - implemented snapshot part
+ *    08/12/1999 - changed LVM_LV_SIZE_MAX macro to reflect current 1TB limit
+ *    01/01/2000 - extended lv_v2 core structure by wait_queue member
+ *
+ */
+
+
+#ifndef _LVM_H_INCLUDE
+#define _LVM_H_INCLUDE
+
+#define	_LVM_H_VERSION	"LVM 0.8 (1/1/2000)"
+
+/*
+ * preprocessor definitions
+ */
+/* if you like emergency reset code in the driver */
+#define	LVM_TOTAL_RESET
+
+#define LVM_GET_INODE
+#define	LVM_HD_NAME
+
+/* lots of debugging output (see driver source)
+#define DEBUG_LVM_GET_INFO
+#define DEBUG
+#define DEBUG_MAP
+#define DEBUG_MAP_SIZE
+#define DEBUG_IOCTL
+#define DEBUG_READ
+#define DEBUG_GENDISK
+#define DEBUG_VG_CREATE
+#define DEBUG_LVM_BLK_OPEN
+#define DEBUG_VFREE
+#define DEBUG_SNAPSHOT
+*/
+/*
+ * end of preprocessor definitions
+ */
+
+#ifndef LINUX_VERSION_CODE
+#  include <linux/version.h>
+   /* for 2.0.x series */
+#  ifndef KERNEL_VERSION
+#    define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
+#  endif
+#endif
+
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/iobuf.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION ( 2, 3 ,0)
+#  include <linux/spinlock.h>
+#else
+#  include <asm/spinlock.h>
+#endif
+
+/* leave this for now until major.h is updated (mike) */
+#ifndef	LVM_BLK_MAJOR
+#  define	LVM_BLK_MAJOR	58
+#endif
+#ifndef	LVM_CHAR_MAJOR
+#  define	LVM_CHAR_MAJOR	109
+#endif
+
+#if !defined ( LVM_BLK_MAJOR) || !defined ( LVM_CHAR_MAJOR)
+   #error Bad include/linux/major.h - LVM MAJOR undefined
+#endif
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION ( 2, 1 ,0)
+#  ifndef	uint8_t
+#    define uint8_t __u8
+#  endif
+#  ifndef	uint16_t
+#    define uint16_t __u16
+#  endif
+#  ifndef	uint32_t
+#    define uint32_t __u32
+#  endif
+#  ifndef	uint64_t
+#    define uint64_t __u64
+#  endif
+#endif
+
+#define LVM_STRUCT_VERSION	1  /* structure version */
+
+#ifndef	min
+#define min(a,b) (((a)<(b))?(a):(b))
+#endif
+#ifndef	max
+#define max(a,b) (((a)>(b))?(a):(b))
+#endif
+
+/* set the default structure version */
+#if ( LVM_STRUCT_VERSION == 1)
+#  define pv_t pv_v1_t
+#  define lv_t lv_v2_t
+#  define vg_t vg_v1_t
+#  define pv_disk_t pv_disk_v1_t
+#  define lv_disk_t lv_disk_v1_t
+#  define vg_disk_t vg_disk_v1_t
+#  define lv_exception_t lv_v2_exception_t
+#endif
+
+
+/*
+ * i/o protocoll version
+ *
+ * defined here for the driver and defined seperate in the
+ * user land LVM parts
+ *
+ */
+#define	LVM_DRIVER_IOP_VERSION	        6
+
+#define LVM_NAME        "lvm"
+
+/*
+ * VG/LV indexing macros
+ */
+/* character minor maps directly to volume group */
+#define	VG_CHR(a) ( a)
+
+/* block minor indexes into a volume group/logical volume indirection table */
+#define	VG_BLK(a)	( vg_lv_map[a].vg_number)
+#define LV_BLK(a)	( vg_lv_map[a].lv_number)
+
+/*
+ * absolute limits for VGs, PVs per VG and LVs per VG
+ */
+#define ABS_MAX_VG	99
+#define ABS_MAX_PV	256
+#define ABS_MAX_LV	256 /* caused by 8 bit minor */
+
+#define MAX_VG  ABS_MAX_VG
+#define MAX_LV	ABS_MAX_LV
+#define	MAX_PV	ABS_MAX_PV
+
+#if ( MAX_VG > ABS_MAX_VG)
+#   undef MAX_VG
+#   define MAX_VG ABS_MAX_VG
+#endif
+
+#if ( MAX_LV > ABS_MAX_LV)
+#   undef MAX_LV
+#   define MAX_LV ABS_MAX_LV
+#endif
+
+
+/*
+ * VGDA: default disk spaces and offsets
+ *
+ *   there's space after the structures for later extensions.
+ *
+ *   offset            what                                size
+ *   ---------------   ----------------------------------  ------------
+ *   0                 physical volume structure           ~500 byte
+ *
+ *   1K                volume group structure              ~200 byte
+ *
+ *   5K                time stamp structure                ~
+ *
+ *   6K                namelist of physical volumes        128 byte each
+ *
+ *   6k + n * 128byte  n logical volume structures         ~300 byte each
+ *
+ *   + m * 328byte     m physical extent alloc. structs    4 byte each
+ *
+ *   End of disk -     first physical extent               typical 4 megabyte
+ *   PE total *
+ *   PE size
+ *
+ *
+ */
+
+/* DONT TOUCH THESE !!! */
+/* base of PV structure in disk partition */
+#define	LVM_PV_DISK_BASE  	0L
+
+/* size reserved for PV structure on disk */
+#define	LVM_PV_DISK_SIZE  	1024L
+
+/* base of VG structure in disk partition */
+#define	LVM_VG_DISK_BASE  	LVM_PV_DISK_SIZE
+
+/* size reserved for VG structure */
+#define	LVM_VG_DISK_SIZE  	( 9 * 512L)
+
+/* size reserved for timekeeping */
+#define	LVM_TIMESTAMP_DISK_BASE	( LVM_VG_DISK_BASE +  LVM_VG_DISK_SIZE)
+#define	LVM_TIMESTAMP_DISK_SIZE	512L	/* reserved for timekeeping */
+
+/* name list of physical volumes on disk */
+#define	LVM_PV_NAMELIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \
+                                    LVM_TIMESTAMP_DISK_SIZE)
+
+/* now for the dynamically calculated parts of the VGDA */
+#define	LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + sizeof ( lv_t) * b)
+#define	LVM_DISK_SIZE(pv) 	 ( (pv)->pe_on_disk.base + \
+                                   (pv)->pe_on_disk.size)
+#define	LVM_PE_DISK_OFFSET(pe, pv)	( pe * pv->pe_size + \
+					  ( LVM_DISK_SIZE ( pv) / SECTOR_SIZE))
+#define	LVM_PE_ON_DISK_BASE(pv) \
+   { int rest; \
+     pv->pe_on_disk.base = pv->lv_on_disk.base + pv->lv_on_disk.size; \
+     if ( ( rest = pv->pe_on_disk.base % SECTOR_SIZE) != 0) \
+        pv->pe_on_disk.base += ( SECTOR_SIZE - rest); \
+   }
+/* END default disk spaces and offsets for PVs */
+
+
+/*
+ * LVM_PE_T_MAX corresponds to:
+ *
+ * 8KB PE size can map a ~512 MB logical volume at the cost of 1MB memory,
+ *
+ * 128MB PE size can map a 8TB logical volume at the same cost of memory.
+ *
+ * Default PE size of 4 MB gives a maximum logical volume size of 256 GB.
+ *
+ * Maximum PE size of 16GB gives a maximum logical volume size of 1024 TB.
+ *
+ * AFAIK, the actual kernels limit this to 1 TB.
+ *
+ * Should be a sufficient spectrum ;*)
+ */
+
+/* This is the usable size of disk_pe_t.le_num !!!        v     v */
+#define	LVM_PE_T_MAX		( ( 1 << ( sizeof ( uint16_t) * 8)) - 2)
+
+#define	LVM_LV_SIZE_MAX(a)	( ( long long) LVM_PE_T_MAX * (a)->pe_size > ( long long) 2*1024*1024*1024 ? ( long long) 2*1024*1024*1024 : ( long long) LVM_PE_T_MAX * (a)->pe_size)
+#define	LVM_MIN_PE_SIZE		( 8L * 2)           /* 8 KB in sectors */
+#define	LVM_MAX_PE_SIZE		( 16L * 1024L * 1024L * 2) /* 16GB in sectors */
+#define	LVM_DEFAULT_PE_SIZE	( 4096L * 2)        /* 4 MB in sectors */
+#define	LVM_DEFAULT_STRIPE_SIZE	16L                 /* 16 KB  */
+#define	LVM_MIN_STRIPE_SIZE	2L                  /* 1 KB in sectors */
+#define	LVM_MAX_STRIPE_SIZE	( 512L * 2)         /* 512 KB in sectors */
+#define	LVM_MAX_STRIPES		128		    /* max # of stripes */
+#define	LVM_MAX_SIZE            ( 1024LU * 1024 * 1024 * 2) /* 1TB[sectors] */
+#define	LVM_MAX_MIRRORS    	2		/* future use */
+#define	LVM_MIN_READ_AHEAD	0	/* minimum read ahead sectors */
+#define	LVM_MAX_READ_AHEAD	256	/* maximum read ahead sectors */
+#define	LVM_DEF_READ_AHEAD	((LVM_MAX_READ_AHEAD-LVM_MIN_READ_AHEAD)/2 + LVM_MIN_READ_AHEAD)
+#define	LVM_MAX_LV_IO_TIMEOUT	60	/* seconds I/O timeout (future use) */
+#define	LVM_PARTITION           0xfe	/* LVM partition id */
+#define	LVM_NEW_PARTITION       0x8e	/* new LVM partition id (10/09/1999) */
+#define	LVM_PE_SIZE_PV_SIZE_REL	5 /* max relation PV size and PE size */
+
+#define	LVM_SNAPSHOT_MAX_CHUNK	256	/* 256 KB */
+#define	LVM_SNAPSHOT_DEF_CHUNK	64	/* 64  KB */
+#define	LVM_SNAPSHOT_MIN_CHUNK	1	/* 1   KB */
+
+#define	UNDEF	-1
+#define FALSE	0
+#define TRUE	1
+
+
+/*
+ * ioctls
+ */
+/* volume group */
+#define	VG_CREATE               _IOW ( 0xfe, 0x00, 1)
+#define	VG_REMOVE               _IOW ( 0xfe, 0x01, 1)
+
+#define	VG_EXTEND               _IOW ( 0xfe, 0x03, 1)
+#define	VG_REDUCE               _IOW ( 0xfe, 0x04, 1)
+
+#define	VG_STATUS               _IOWR ( 0xfe, 0x05, 1)
+#define	VG_STATUS_GET_COUNT     _IOWR ( 0xfe, 0x06, 1)
+#define	VG_STATUS_GET_NAMELIST  _IOWR ( 0xfe, 0x07, 1)
+
+#define	VG_SET_EXTENDABLE       _IOW ( 0xfe, 0x08, 1)
+
+
+/* logical volume */
+#define	LV_CREATE               _IOW ( 0xfe, 0x20, 1)
+#define	LV_REMOVE               _IOW ( 0xfe, 0x21, 1)
+
+#define	LV_ACTIVATE             _IO ( 0xfe, 0x22)
+#define	LV_DEACTIVATE           _IO ( 0xfe, 0x23)
+
+#define	LV_EXTEND               _IOW ( 0xfe, 0x24, 1)
+#define	LV_REDUCE               _IOW ( 0xfe, 0x25, 1)
+
+#define	LV_STATUS_BYNAME        _IOWR ( 0xfe, 0x26, 1)
+#define	LV_STATUS_BYINDEX       _IOWR ( 0xfe, 0x27, 1)
+
+#define LV_SET_ACCESS           _IOW ( 0xfe, 0x28, 1)
+#define LV_SET_ALLOCATION       _IOW ( 0xfe, 0x29, 1)
+#define LV_SET_STATUS           _IOW ( 0xfe, 0x2a, 1)
+
+#define LE_REMAP                _IOW ( 0xfe, 0x2b, 1)
+
+
+/* physical volume */
+#define	PV_STATUS               _IOWR ( 0xfe, 0x40, 1)
+#define	PV_CHANGE               _IOWR ( 0xfe, 0x41, 1)
+#define	PV_FLUSH                _IOW ( 0xfe, 0x42, 1)
+
+/* physical extent */
+#define	PE_LOCK_UNLOCK          _IOW ( 0xfe, 0x50, 1)
+
+/* i/o protocol version */
+#define	LVM_GET_IOP_VERSION     _IOR ( 0xfe, 0x98, 1)
+
+#ifdef LVM_TOTAL_RESET
+/* special reset function for testing purposes */
+#define	LVM_RESET               _IO ( 0xfe, 0x99)
+#endif
+
+/* lock the logical volume manager */
+#define	LVM_LOCK_LVM            _IO ( 0xfe, 0x100)
+/* END ioctls */
+
+
+/*
+ * Status flags
+ */
+/* volume group */
+#define	VG_ACTIVE            0x01 /* vg_status */
+#define	VG_EXPORTED          0x02 /*     "     */
+#define	VG_EXTENDABLE        0x04 /*     "     */
+
+#define	VG_READ              0x01 /* vg_access */
+#define	VG_WRITE             0x02 /*     "     */
+
+/* logical volume */
+#define	LV_ACTIVE            0x01 /* lv_status */
+#define	LV_SPINDOWN          0x02 /*     "     */
+
+#define	LV_READ              0x01 /* lv_access */
+#define	LV_WRITE             0x02 /*     "     */
+#define	LV_SNAPSHOT          0x04 /*     "     */
+#define	LV_SNAPSHOT_ORG      0x08 /*     "     */
+
+#define	LV_BADBLOCK_ON       0x01 /* lv_badblock */
+
+#define	LV_STRICT            0x01 /* lv_allocation */
+#define	LV_CONTIGUOUS        0x02 /*       "       */
+
+/* physical volume */
+#define	PV_ACTIVE            0x01 /* pv_status */
+#define	PV_ALLOCATABLE       0x02 /* pv_allocatable */
+
+
+/*
+ * Structure definitions core/disk follow
+ *
+ * conditional conversion takes place on big endian architectures
+ * in functions * pv_copy_*(), vg_copy_*() and lv_copy_*()
+ *
+ */
+
+#define	NAME_LEN		128 /* don't change!!! */
+#define	UUID_LEN		16  /* don't change!!! */
+
+/* remap physical sector/rdev pairs */
+typedef struct {
+   struct list_head hash;
+   ulong  rsector_org;
+   kdev_t rdev_org;
+   ulong  rsector_new;
+   kdev_t rdev_new;
+} lv_block_exception_t;
+
+
+/* disk stored pe information */
+typedef struct {
+   uint16_t lv_num;
+   uint16_t le_num;
+} disk_pe_t;
+
+/* disk stored PV, VG, LV and PE size and offset information */
+typedef struct {
+   uint32_t base;
+   uint32_t size;
+} lvm_disk_data_t;
+
+
+/*
+ * Structure Physical Volume (PV) Version 1
+ */
+
+/* core */
+typedef struct {
+   uint8_t          id[2];                 /* Identifier */
+   uint16_t         version;               /* HM lvm version */
+   lvm_disk_data_t  pv_on_disk;
+   lvm_disk_data_t  vg_on_disk;
+   lvm_disk_data_t  pv_namelist_on_disk;
+   lvm_disk_data_t  lv_on_disk;
+   lvm_disk_data_t  pe_on_disk;
+   uint8_t          pv_name[NAME_LEN];
+   uint8_t          vg_name[NAME_LEN];
+   uint8_t          system_id[NAME_LEN]; /* for vgexport/vgimport */
+   kdev_t           pv_dev;
+   uint32_t         pv_number;
+   uint32_t         pv_status;
+   uint32_t         pv_allocatable;
+   uint32_t         pv_size;		/* HM */
+   uint32_t         lv_cur;
+   uint32_t         pe_size;
+   uint32_t         pe_total;
+   uint32_t         pe_allocated;
+   uint32_t         pe_stale;     /* for future use */
+   disk_pe_t        *pe;          /* HM */
+   struct inode     *inode;       /* HM */
+} pv_v1_t;
+
+/* disk */
+typedef struct {
+   uint8_t          id[2];                 /* Identifier */
+   uint16_t         version;               /* HM lvm version */
+   lvm_disk_data_t  pv_on_disk;
+   lvm_disk_data_t  vg_on_disk;
+   lvm_disk_data_t  pv_namelist_on_disk;
+   lvm_disk_data_t  lv_on_disk;
+   lvm_disk_data_t  pe_on_disk;
+   uint8_t          pv_name[NAME_LEN];
+   uint8_t          vg_name[NAME_LEN];
+   uint8_t          system_id[NAME_LEN]; /* for vgexport/vgimport */
+   uint32_t         pv_major;
+   uint32_t         pv_number;
+   uint32_t         pv_status;
+   uint32_t         pv_allocatable;
+   uint32_t         pv_size;		/* HM */
+   uint32_t         lv_cur;
+   uint32_t         pe_size;
+   uint32_t         pe_total;
+   uint32_t         pe_allocated;
+   uint32_t	    dummy1;
+   uint32_t	    dummy2;
+   uint32_t	    dummy3;
+} pv_disk_v1_t;
+
+
+/*
+ * Structure Physical Volume (PV) Version 2 (future!)
+ */
+
+typedef struct {
+   uint8_t          id[2];                 /* Identifier */
+   uint16_t         version;               /* HM lvm version */
+   lvm_disk_data_t  pv_on_disk;
+   lvm_disk_data_t  vg_on_disk;
+   lvm_disk_data_t  pv_uuid_on_disk;
+   lvm_disk_data_t  lv_on_disk;
+   lvm_disk_data_t  pe_on_disk;
+   uint8_t          pv_name[NAME_LEN];
+   uint8_t          vg_name[NAME_LEN];
+   uint8_t          system_id[NAME_LEN]; /* for vgexport/vgimport */
+   kdev_t           pv_dev;
+   uint32_t         pv_number;
+   uint32_t         pv_status;
+   uint32_t         pv_allocatable;
+   uint32_t         pv_size;		/* HM */
+   uint32_t         lv_cur;
+   uint32_t         pe_size;
+   uint32_t         pe_total;
+   uint32_t         pe_allocated;
+   uint32_t         pe_stale;     /* for future use */
+   disk_pe_t        *pe;          /* HM */
+   struct inode     *inode;       /* HM */
+   /* delta to version 1 starts here */
+   uint8_t          pv_uuid[UUID_LEN];
+   uint32_t         pv_atime;     /* PV access time */
+   uint32_t         pv_ctime;     /* PV creation time */
+   uint32_t         pv_mtime;     /* PV modification time */
+} pv_v2_t;
+
+
+/*
+ * Structures for Logical Volume (LV)
+ */
+
+/* core PE information */
+typedef struct {
+   kdev_t   dev;
+   uint32_t pe;		/* to be changed if > 2TB */
+   uint32_t reads;
+   uint32_t writes;
+} pe_t;
+
+typedef struct {
+   uint8_t  lv_name[NAME_LEN];
+   kdev_t   old_dev;
+   kdev_t   new_dev;
+   ulong    old_pe;
+   ulong    new_pe;
+} le_remap_req_t;
+
+
+
+/*
+ * Structure Logical Volume (LV) Version 1
+ */
+
+/* core */
+typedef struct {
+   uint8_t        lv_name[NAME_LEN];
+   uint8_t        vg_name[NAME_LEN];
+   uint32_t       lv_access;
+   uint32_t       lv_status;
+   uint32_t       lv_open;		/* HM */
+   kdev_t         lv_dev;		/* HM */
+   uint32_t       lv_number;		/* HM */
+   uint32_t       lv_mirror_copies;	/* for future use */
+   uint32_t       lv_recovery;          /*       "        */
+   uint32_t       lv_schedule;		/*       "        */
+   uint32_t       lv_size;
+   pe_t           *lv_current_pe;	/* HM */
+   uint32_t       lv_current_le;	/* for future use */
+   uint32_t       lv_allocated_le;
+   uint32_t       lv_stripes;
+   uint32_t       lv_stripesize;
+   uint32_t       lv_badblock;		/* for future use */
+   uint32_t       lv_allocation;
+   uint32_t       lv_io_timeout;	/* for future use */
+   uint32_t       lv_read_ahead;
+} lv_v1_t;
+
+/* disk */
+typedef struct {
+   uint8_t        lv_name[NAME_LEN];
+   uint8_t        vg_name[NAME_LEN];
+   uint32_t       lv_access;
+   uint32_t       lv_status;
+   uint32_t       lv_open;		/* HM */
+   uint32_t       lv_dev;		/* HM */
+   uint32_t       lv_number;		/* HM */
+   uint32_t       lv_mirror_copies;	/* for future use */
+   uint32_t       lv_recovery;          /*       "        */
+   uint32_t       lv_schedule;		/*       "        */
+   uint32_t       lv_size;
+   uint32_t       dummy;
+   uint32_t       lv_current_le;	/* for future use */
+   uint32_t       lv_allocated_le;
+   uint32_t       lv_stripes;
+   uint32_t       lv_stripesize;
+   uint32_t       lv_badblock;		/* for future use */
+   uint32_t       lv_allocation;
+   uint32_t       lv_io_timeout;	/* for future use */
+   uint32_t       lv_read_ahead;	/* HM, for future use */
+} lv_disk_v1_t;
+
+
+/*
+ * Structure Logical Volume (LV) Version 2
+ */
+
+/* core */
+typedef struct lv_v2 {
+   uint8_t        lv_name[NAME_LEN];
+   uint8_t        vg_name[NAME_LEN];
+   uint32_t       lv_access;
+   uint32_t       lv_status;
+   uint32_t       lv_open;		/* HM */
+   kdev_t         lv_dev;		/* HM */
+   uint32_t       lv_number;		/* HM */
+   uint32_t       lv_mirror_copies;	/* for future use */
+   uint32_t       lv_recovery;          /*       "        */
+   uint32_t       lv_schedule;		/*       "        */
+   uint32_t       lv_size;
+   pe_t           *lv_current_pe;	/* HM */
+   uint32_t       lv_current_le;	/* for future use */
+   uint32_t       lv_allocated_le;
+   uint32_t       lv_stripes;
+   uint32_t       lv_stripesize;
+   uint32_t       lv_badblock;		/* for future use */
+   uint32_t       lv_allocation;
+   uint32_t       lv_io_timeout;	/* for future use */
+   uint32_t       lv_read_ahead;
+   /* delta to version 1 starts here */
+   struct lv_v2   *lv_snapshot_org;
+   struct lv_v2   *lv_snapshot_prev;
+   struct lv_v2   *lv_snapshot_next;
+   lv_block_exception_t *lv_block_exception;
+   uint8_t __unused2;
+   uint32_t       lv_remap_ptr;
+   uint32_t       lv_remap_end;
+   uint32_t       lv_chunk_size;
+   uint32_t       lv_snapshot_minor;
+   struct kiobuf * lv_iobuf;
+   struct semaphore lv_snapshot_sem;
+   struct list_head * lv_snapshot_hash_table;
+   unsigned long lv_snapshot_hash_mask;
+} lv_v2_t;
+
+/* disk */
+typedef struct {
+   uint8_t        lv_name[NAME_LEN];
+   uint8_t        vg_name[NAME_LEN];
+   uint32_t       lv_access;
+   uint32_t       lv_status;
+   uint32_t       lv_open;		/* HM */
+   uint32_t       lv_dev;		/* HM */
+   uint32_t       lv_number;		/* HM */
+   uint32_t       lv_mirror_copies;	/* for future use */
+   uint32_t       lv_recovery;          /*       "        */
+   uint32_t       lv_schedule;		/*       "        */
+   uint32_t       lv_size;
+   uint32_t       dummy;
+   uint32_t       lv_current_le;	/* for future use */
+   uint32_t       lv_allocated_le;
+   uint32_t       lv_stripes;
+   uint32_t       lv_stripesize;
+   uint32_t       lv_badblock;		/* for future use */
+   uint32_t       lv_allocation;
+   uint32_t       lv_io_timeout;	/* for future use */
+   uint32_t       lv_read_ahead;	/* HM, for future use */
+} lv_disk_v2_t;
+
+
+/*
+ * Structure Volume Group (VG) Version 1
+ */
+
+typedef struct {
+   uint8_t        vg_name[NAME_LEN];     /* volume group name */
+   uint32_t       vg_number;             /* volume group number */
+   uint32_t       vg_access;             /* read/write */
+   uint32_t       vg_status;             /* active or not */
+   uint32_t       lv_max;                /* maximum logical volumes */
+   uint32_t       lv_cur;                /* current logical volumes */
+   uint32_t       lv_open;               /* open    logical volumes */
+   uint32_t       pv_max;        /* maximum physical volumes */
+   uint32_t       pv_cur;	 /* current physical volumes FU */
+   uint32_t       pv_act;        /* active physical volumes */
+   uint32_t       dummy;         /* was obsolete max_pe_per_pv */
+   uint32_t       vgda;		 /* volume group descriptor arrays FU */
+   uint32_t       pe_size;       /* physical extent size in sectors */
+   uint32_t       pe_total;      /* total of physical extents */
+   uint32_t       pe_allocated;  /* allocated physical extents */
+   uint32_t       pvg_total;	 /* physical volume groups FU */
+   struct proc_dir_entry *proc;
+   pv_t           *pv[ABS_MAX_PV+1]; /* physical volume struct pointers */
+   lv_t           *lv[ABS_MAX_LV+1]; /* logical  volume struct pointers */
+} vg_v1_t;
+
+typedef struct {
+   uint8_t        vg_name[NAME_LEN];     /* volume group name */
+   uint32_t       vg_number;             /* volume group number */
+   uint32_t       vg_access;             /* read/write */
+   uint32_t       vg_status;             /* active or not */
+   uint32_t       lv_max;                /* maximum logical volumes */
+   uint32_t       lv_cur;                /* current logical volumes */
+   uint32_t       lv_open;               /* open    logical volumes */
+   uint32_t       pv_max;        /* maximum physical volumes */
+   uint32_t       pv_cur;	 /* current physical volumes FU */
+   uint32_t       pv_act;        /* active physical volumes */
+   uint32_t       dummy;
+   uint32_t       vgda;		 /* volume group descriptor arrays FU */
+   uint32_t       pe_size;       /* physical extent size in sectors */
+   uint32_t       pe_total;      /* total of physical extents */
+   uint32_t       pe_allocated;  /* allocated physical extents */
+   uint32_t       pvg_total;	 /* physical volume groups FU */
+} vg_disk_v1_t;
+
+/*
+ * Structure Volume Group (VG) Version 2
+ */
+
+typedef struct {
+   uint8_t        vg_name[NAME_LEN];     /* volume group name */
+   uint32_t       vg_number;             /* volume group number */
+   uint32_t       vg_access;             /* read/write */
+   uint32_t       vg_status;             /* active or not */
+   uint32_t       lv_max;                /* maximum logical volumes */
+   uint32_t       lv_cur;                /* current logical volumes */
+   uint32_t       lv_open;               /* open    logical volumes */
+   uint32_t       pv_max;                /* maximum physical volumes */
+   uint32_t       pv_cur;		 /* current physical volumes FU */
+   uint32_t       pv_act;                /* future: active physical volumes */
+   uint32_t       max_pe_per_pv;         /* OBSOLETE maximum PE/PV */
+   uint32_t       vgda;			 /* volume group descriptor arrays FU */
+   uint32_t       pe_size;               /* physical extent size in sectors */
+   uint32_t       pe_total;              /* total of physical extents */
+   uint32_t       pe_allocated;          /* allocated physical extents */
+   uint32_t       pvg_total;		 /* physical volume groups FU */
+   struct proc_dir_entry *proc;
+   pv_t           *pv[ABS_MAX_PV+1]; /* physical volume struct pointers */
+   lv_t           *lv[ABS_MAX_LV+1]; /* logical  volume struct pointers */
+   /* delta to version 1 starts here */
+   uint8_t        vg_uuid[UUID_LEN];     /*  volume group UUID */
+   time_t         vg_atime;              /* VG access time */
+   time_t         vg_ctime;              /* VG creation time */
+   time_t         vg_mtime;              /* VG modification time */
+} vg_v2_t;
+
+
+/*
+ * Timekeeping structure on disk (0.7 feature)
+ *
+ * Holds several timestamps for start/stop time of non
+ * atomic VGDA disk i/o operations
+ *
+ */
+
+typedef struct {
+   uint32_t	seconds; /* seconds since the epoch */
+   uint32_t	jiffies; /* micro timer */
+} lvm_time_t;
+
+#define	TIMESTAMP_ID_SIZE	2
+typedef struct {
+   uint8_t    id[TIMESTAMP_ID_SIZE]; /* Identifier */
+   lvm_time_t pv_vg_lv_pe_io_begin;
+   lvm_time_t pv_vg_lv_pe_io_end;
+   lvm_time_t pv_io_begin;
+   lvm_time_t pv_io_end;
+   lvm_time_t vg_io_begin;
+   lvm_time_t vg_io_end;
+   lvm_time_t lv_io_begin;
+   lvm_time_t lv_io_end;
+   lvm_time_t pe_io_begin;
+   lvm_time_t pe_io_end;
+   lvm_time_t pe_move_io_begin;
+   lvm_time_t pe_move_io_end;
+   uint8_t    dummy[LVM_TIMESTAMP_DISK_SIZE - 
+                    TIMESTAMP_ID_SIZE -
+                    12 * sizeof(lvm_time_t)];
+      /* ATTENTION  ^^ */
+} timestamp_disk_t;
+
+/* same on disk and in core so far */
+typedef timestamp_disk_t timestamp_t;
+
+/* function identifiers for timestamp actions */
+typedef enum { PV_VG_LV_PE_IO_BEGIN,
+               PV_VG_LV_PE_IO_END,
+               PV_IO_BEGIN,
+               PV_IO_END,
+               VG_IO_BEGIN,
+               VG_IO_END,
+               LV_IO_BEGIN,
+               LV_IO_END,
+               PE_IO_BEGIN,
+               PE_IO_END,
+               PE_MOVE_IO_BEGIN,
+               PE_MOVE_IO_END} ts_fct_id_t;
+
+
+/*
+ * Request structures for ioctls
+ */
+
+/* Request structure PV_STATUS */
+typedef struct {
+   char pv_name[NAME_LEN];
+   pv_t *pv;
+} pv_status_req_t, pv_change_req_t;
+
+/* Request structure PV_FLUSH */
+typedef struct {
+   char pv_name[NAME_LEN];
+} pv_flush_req_t;
+
+
+/* Request structure PE_MOVE */
+typedef struct {
+   enum { LOCK_PE, UNLOCK_PE} lock;
+   struct {
+      kdev_t lv_dev;
+      kdev_t pv_dev;
+      uint32_t      pv_offset;
+   } data;
+} pe_lock_req_t;
+
+
+/* Request structure LV_STATUS_BYNAME */
+typedef struct {
+   char lv_name[NAME_LEN];
+   lv_t *lv;
+} lv_status_byname_req_t, lv_req_t;
+
+/* Request structure LV_STATUS_BYINDEX */
+typedef struct {
+   ulong lv_index;
+   lv_t *lv;
+} lv_status_byindex_req_t;
+
+#endif /* #ifndef _LVM_H_INCLUDE */
diff -urN 2.2.15pre16/include/linux/major.h 2.2.15pre16aa3/include/linux/major.h
--- 2.2.15pre16/include/linux/major.h	Wed Mar 29 19:42:15 2000
+++ 2.2.15pre16aa3/include/linux/major.h	Thu Mar 30 16:00:57 2000
@@ -115,6 +115,8 @@
 
 #define AURORA_MAJOR 79
 
+#define RAW_MAJOR	162
+
 #define UNIX98_PTY_MASTER_MAJOR	128
 #define UNIX98_PTY_MAJOR_COUNT	8
 #define UNIX98_PTY_SLAVE_MAJOR	(UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT)
diff -urN 2.2.15pre16/include/linux/mm.h 2.2.15pre16aa3/include/linux/mm.h
--- 2.2.15pre16/include/linux/mm.h	Wed Mar 29 19:42:15 2000
+++ 2.2.15pre16aa3/include/linux/mm.h	Thu Mar 30 16:00:58 2000
@@ -54,7 +54,7 @@
 	struct vm_area_struct **vm_pprev_share;
 
 	struct vm_operations_struct * vm_ops;
-	unsigned long vm_offset;
+	loff_t vm_offset;
 	struct file * vm_file;
 	unsigned long vm_pte;			/* shared mem */
 };
@@ -106,9 +106,46 @@
 	unsigned long (*wppage)(struct vm_area_struct * area, unsigned long address,
 		unsigned long page);
 	int (*swapout)(struct vm_area_struct *, struct page *);
-	pte_t (*swapin)(struct vm_area_struct *, unsigned long, unsigned long);
+	pte_t (*swapin)(struct vm_area_struct *, loff_t, unsigned long);
 };
 
+
+/*
+ *  pgoff_t  type -- a complex one, and its simple alternate.
+ *  The complex one has type that compiler can trap at compile
+ *  time, but the simple one does simpler code (?)
+ */
+
+#if 0
+typedef struct pgoff_t {
+  unsigned long pgoff;
+} pgoff_t;
+
+#define pgoff2ulong(pgof) ((pgof).pgoff)
+extern __inline__ pgoff_t ulong2pgoff(unsigned long ul) {
+  pgoff_t up;
+  up.pgoff = ul;
+  return up;
+}
+
+#define pgoff2loff(pgof) (((loff_t)(pgof).pgoff) << PAGE_SHIFT)
+#define loff2pgoff(loff) ulong2pgoff((loff) >> PAGE_SHIFT)
+
+#else /* Integer scalars -- simpler code.. */
+
+typedef unsigned long pgoff_t;
+
+#define pgoff2ulong(pgof) (pgof)
+#define ulong2pgoff(pgof) (pgof)
+
+#define pgoff2loff(pgof) (((loff_t)(pgof)) << PAGE_SHIFT)
+#define loff2pgoff(loff) ulong2pgoff((loff) >> PAGE_SHIFT)
+
+#endif
+
+#define PAGE_MASK_loff ((loff_t)(long)(PAGE_MASK))
+
+
 /*
  * Try to keep the most commonly accessed fields in single cache lines
  * here (16 bytes or greater).  This ordering should be particularly
@@ -117,12 +154,13 @@
  * The first line is data used in page cache lookup, the second line
  * is used for linear searches (eg. clock algorithm scans). 
  */
+
 typedef struct page {
 	/* these must be first (free area handling) */
 	struct page *next;
 	struct page *prev;
+	pgoff_t index;
 	struct inode *inode;
-	unsigned long offset;
 	struct page *next_hash;
 	atomic_t count;
 	unsigned long flags;	/* atomic flags, some possibly updated asynchronously */
@@ -144,6 +182,7 @@
 #define PG_Slab			 9
 #define PG_swap_cache		10
 #define PG_skip			11
+#define PG_BIGMEM		12
 #define PG_reserved		31
 
 /* Make it prettier to test the above... */
@@ -175,6 +214,11 @@
 			(test_and_clear_bit(PG_dirty, &(page)->flags))
 #define PageTestandClearSwapCache(page)	\
 			(test_and_clear_bit(PG_swap_cache, &(page)->flags))
+#ifdef CONFIG_BIGMEM
+#define PageBIGMEM(page)	(test_bit(PG_BIGMEM, &(page)->flags))
+#else
+#define PageBIGMEM(page) 0 /* needed to optimize away at compile time */
+#endif
 
 /*
  * Various page->flags bits:
@@ -291,7 +335,7 @@
 extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
 extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
 
-extern void vmtruncate(struct inode * inode, unsigned long offset);
+extern void vmtruncate(struct inode * inode, loff_t offset);
 extern int handle_mm_fault(struct task_struct *tsk,struct vm_area_struct *vma, unsigned long address, int write_access);
 extern int make_pages_present(unsigned long addr, unsigned long end);
 
@@ -311,16 +355,31 @@
 extern void exit_mmap(struct mm_struct *);
 extern unsigned long get_unmapped_area(unsigned long, unsigned long);
 
-extern unsigned long do_mmap(struct file *, unsigned long, unsigned long,
-	unsigned long, unsigned long, unsigned long);
+extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+	unsigned long len, unsigned long prot,
+	unsigned long flag, unsigned long pgoff);
+
+extern inline unsigned long do_mmap(struct file *file, unsigned long addr,
+	unsigned long len, unsigned long prot,
+	unsigned long flag, unsigned long offset)
+{
+	unsigned long ret = -EINVAL;
+	if ((offset + PAGE_ALIGN(len)) < offset)
+		goto out;
+	if (!(offset & ~PAGE_MASK))
+		ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
+out:
+	return ret;
+}
+
 extern int do_munmap(unsigned long, size_t);
 
 /* filemap.c */
 extern void remove_inode_page(struct page *);
 extern unsigned long page_unuse(struct page *);
 extern int shrink_mmap(int, int);
-extern void truncate_inode_pages(struct inode *, unsigned long);
-extern unsigned long get_cached_page(struct inode *, unsigned long, int);
+extern void truncate_inode_pages(struct inode *, loff_t);
+extern unsigned long get_cached_page(struct inode *, pgoff_t, int);
 extern void put_cached_page(unsigned long);
 
 /*
@@ -332,11 +391,17 @@
 #define __GFP_HIGH	0x08
 #define __GFP_IO	0x10
 #define __GFP_SWAP	0x20
+#ifdef CONFIG_BIGMEM
+#define __GFP_BIGMEM	0x40
+#else
+#define __GFP_BIGMEM	0x0 /* noop */
+#endif
 
 #define __GFP_DMA	0x80
 
 #define GFP_BUFFER	(__GFP_MED | __GFP_WAIT)
 #define GFP_ATOMIC	(__GFP_HIGH)
+#define GFP_BIGUSER	(__GFP_LOW | __GFP_WAIT | __GFP_IO | __GFP_BIGMEM)
 #define GFP_USER	(__GFP_LOW | __GFP_WAIT | __GFP_IO)
 #define GFP_KERNEL	(__GFP_MED | __GFP_WAIT | __GFP_IO)
 #define GFP_NFS		(__GFP_HIGH | __GFP_WAIT | __GFP_IO)
@@ -347,13 +412,23 @@
 
 #define GFP_DMA		__GFP_DMA
 
+/* Flag - indicates that the buffer can be taken from big memory which is not
+   directly addressable by the kernel */
+
+#define GFP_BIGMEM	__GFP_BIGMEM
+
+extern int heap_stack_gap;
+
 /* vma is the first one with  address < vma->vm_end,
  * and even  address < vma->vm_start. Have to extend vma. */
-static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
+static inline int expand_stack(struct vm_area_struct * vma, unsigned long address,
+			       struct vm_area_struct * prev_vma)
 {
 	unsigned long grow;
 
 	address &= PAGE_MASK;
+	if (prev_vma && prev_vma->vm_end + (heap_stack_gap << PAGE_SHIFT) > address)
+		return -ENOMEM;
 	grow = vma->vm_start - address;
 	if ((vma->vm_end - address
 	    > current->rlim[RLIMIT_STACK].rlim_cur) ||
diff -urN 2.2.15pre16/include/linux/nfs.h 2.2.15pre16aa3/include/linux/nfs.h
--- 2.2.15pre16/include/linux/nfs.h	Tue Feb  1 18:24:19 2000
+++ 2.2.15pre16aa3/include/linux/nfs.h	Thu Mar 30 16:00:58 2000
@@ -111,7 +111,7 @@
 	__u32			nlink;
 	__u32			uid;
 	__u32			gid;
-	__u32			size;
+	__u64			size;
 	__u32			blocksize;
 	__u32			rdev;
 	__u32			blocks;
@@ -126,7 +126,7 @@
 	__u32			mode;
 	__u32			uid;
 	__u32			gid;
-	__u32			size;
+	__u64			size;
 	struct nfs_time		atime;
 	struct nfs_time		mtime;
 };
@@ -141,7 +141,7 @@
 
 struct nfs_writeargs {
 	struct nfs_fh *		fh;
-	__u32			offset;
+	__u64			offset;
 	__u32			count;
 	const void *		buffer;
 };
@@ -160,7 +160,7 @@
 
 struct nfs_readargs {
 	struct nfs_fh *		fh;
-	__u32			offset;
+	__u64			offset;
 	__u32			count;
 	void *			buffer;
 };
diff -urN 2.2.15pre16/include/linux/nfs_fs.h 2.2.15pre16aa3/include/linux/nfs_fs.h
--- 2.2.15pre16/include/linux/nfs_fs.h	Wed Mar 29 14:58:09 2000
+++ 2.2.15pre16aa3/include/linux/nfs_fs.h	Thu Mar 30 16:00:58 2000
@@ -143,10 +143,10 @@
 			void **p0, char **string, unsigned int *len,
 			unsigned int maxlen);
 extern int nfs_proc_read(struct nfs_server *server, struct nfs_fh *fhandle,
-			int swap, unsigned long offset, unsigned int count,
+			int swap, loff_t offset, unsigned int count,
 			void *buffer, struct nfs_fattr *fattr);
 extern int nfs_proc_write(struct nfs_server *server, struct nfs_fh *fhandle,
-			int swap, unsigned long offset, unsigned int count,
+			int swap, loff_t offset, unsigned int count,
 			const void *buffer, struct nfs_fattr *fattr);
 extern int nfs_proc_create(struct nfs_server *server, struct nfs_fh *dir,
 			const char *name, struct nfs_sattr *sattr,
diff -urN 2.2.15pre16/include/linux/pagemap.h 2.2.15pre16aa3/include/linux/pagemap.h
--- 2.2.15pre16/include/linux/pagemap.h	Wed Mar 29 14:57:46 2000
+++ 2.2.15pre16aa3/include/linux/pagemap.h	Thu Mar 30 16:00:58 2000
@@ -28,6 +28,7 @@
 #define PAGE_CACHE_SHIFT	PAGE_SHIFT
 #define PAGE_CACHE_SIZE		PAGE_SIZE
 #define PAGE_CACHE_MASK		PAGE_MASK
+#define PAGE_CACHE_MASK_loff	PAGE_MASK_loff
 
 #define page_cache_alloc()	__get_free_page(GFP_USER)
 #define page_cache_free(x)	free_page(x)
@@ -54,10 +55,10 @@
  * inode pointer and offsets are distributed (ie, we
  * roughly know which bits are "significant")
  */
-static inline unsigned long _page_hashfn(struct inode * inode, unsigned long offset)
+static inline unsigned long _page_hashfn(struct inode * inode, pgoff_t index)
 {
 #define i (((unsigned long) inode)/(sizeof(struct inode) & ~ (sizeof(struct inode) - 1)))
-#define o ((offset >> PAGE_SHIFT) + (offset & ~PAGE_MASK))
+#define o ((pgoff2ulong(index) >> PAGE_SHIFT) + (pgoff2ulong(index) & ~PAGE_MASK))
 	return ((i+o) & PAGE_HASH_MASK);
 #undef i
 #undef o
@@ -65,7 +66,7 @@
 
 #define page_hash(inode,offset) (page_hash_table+_page_hashfn(inode,offset))
 
-static inline struct page * __find_page(struct inode * inode, unsigned long offset, struct page *page)
+static inline struct page * __find_page(struct inode * inode, pgoff_t index, struct page *page)
 {
 	goto inside;
 	for (;;) {
@@ -75,7 +76,7 @@
 			goto not_found;
 		if (page->inode != inode)
 			continue;
-		if (page->offset == offset)
+		if (pgoff2ulong(page->index) == pgoff2ulong(index))
 			break;
 	}
 	/* Found the page. */
@@ -85,9 +86,9 @@
 	return page;
 }
 
-static inline struct page *find_page(struct inode * inode, unsigned long offset)
+static inline struct page *find_page(struct inode * inode, pgoff_t poffset)
 {
-	return __find_page(inode, offset, *page_hash(inode, offset));
+	return __find_page(inode, poffset, *page_hash(inode, poffset));
 }
 
 static inline void remove_page_from_hash_queue(struct page * page)
@@ -110,9 +111,9 @@
 	page->pprev_hash = p;
 }
 
-static inline void add_page_to_hash_queue(struct page * page, struct inode * inode, unsigned long offset)
+static inline void add_page_to_hash_queue(struct page * page, struct inode * inode, pgoff_t poffset)
 {
-	__add_page_to_hash_queue(page, page_hash(inode,offset));
+	__add_page_to_hash_queue(page, page_hash(inode,poffset));
 }
 
 static inline void remove_page_from_inode_queue(struct page * page)
@@ -150,7 +151,7 @@
 		__wait_on_page(page);
 }
 
-extern void update_vm_cache_conditional(struct inode *, unsigned long, const char *, int, unsigned long);
-extern void update_vm_cache(struct inode *, unsigned long, const char *, int);
+extern void update_vm_cache_conditional(struct inode *, loff_t, const char *, int, unsigned long);
+extern void update_vm_cache(struct inode *, loff_t, const char *, int);
 
 #endif
diff -urN 2.2.15pre16/include/linux/raw.h 2.2.15pre16aa3/include/linux/raw.h
--- 2.2.15pre16/include/linux/raw.h	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/include/linux/raw.h	Thu Mar 30 16:00:57 2000
@@ -0,0 +1,23 @@
+#ifndef __LINUX_RAW_H
+#define __LINUX_RAW_H
+
+#include <linux/types.h>
+
+#define RAW_SETBIND	_IO( 0xac, 0 )
+#define RAW_GETBIND	_IO( 0xac, 1 )
+
+struct raw_config_request 
+{
+	int	raw_minor;
+	__u64	block_major;
+	__u64	block_minor;
+};
+
+#ifdef __KERNEL__
+
+/* drivers/char/raw.c */
+extern void raw_init(void);
+
+#endif /* __KERNEL__ */
+
+#endif /* __LINUX_RAW_H */
diff -urN 2.2.15pre16/include/linux/sched.h 2.2.15pre16aa3/include/linux/sched.h
--- 2.2.15pre16/include/linux/sched.h	Wed Mar 29 19:42:15 2000
+++ 2.2.15pre16aa3/include/linux/sched.h	Thu Mar 30 16:00:58 2000
@@ -292,6 +292,7 @@
 	unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
 	int swappable:1;
 	int trashing_mem:1;
+	int trashing_bigmem:1;
 /* process credentials */
 	uid_t uid,euid,suid,fsuid;
 	gid_t gid,egid,sgid,fsgid;
@@ -300,7 +301,7 @@
         kernel_cap_t   cap_effective, cap_inheritable, cap_permitted;
 	struct user_struct *user;
 /* limits */
-	struct rlimit rlim[RLIM_NLIMITS];
+	struct rlimit   rlim[RLIM_NLIMITS];
 	unsigned short used_math;
 	char comm[16];
 /* file system info */
@@ -382,7 +383,7 @@
 /* utime */	{0,0,0,0},0, \
 /* per CPU times */ {0, }, {0, }, \
 /* flt */	0,0,0,0,0,0, \
-/* swp */	0,0, \
+/* swp */	0,0,0, \
 /* process credentials */					\
 /* uid etc */	0,0,0,0,0,0,0,0,				\
 /* suppl grps*/ 0, {0,},					\
diff -urN 2.2.15pre16/include/linux/shm.h 2.2.15pre16aa3/include/linux/shm.h
--- 2.2.15pre16/include/linux/shm.h	Tue Feb  1 18:24:19 2000
+++ 2.2.15pre16aa3/include/linux/shm.h	Thu Mar 30 16:00:56 2000
@@ -7,7 +7,7 @@
 
 struct shmid_ds {
 	struct ipc_perm		shm_perm;	/* operation perms */
-	int			shm_segsz;	/* size of segment (bytes) */
+	unsigned int		shm_segsz;	/* size of segment (bytes) */
 	__kernel_time_t		shm_atime;	/* last attach time */
 	__kernel_time_t		shm_dtime;	/* last detach time */
 	__kernel_time_t		shm_ctime;	/* last change time */
@@ -68,7 +68,7 @@
 #define	SHM_DEST	01000	/* segment will be destroyed on last detach */
 #define SHM_LOCKED      02000   /* segment will not be swapped */
 
-asmlinkage int sys_shmget (key_t key, int size, int flag);
+asmlinkage int sys_shmget (key_t key, unsigned int size, int flag);
 asmlinkage int sys_shmat (int shmid, char *shmaddr, int shmflg, unsigned long *addr);
 asmlinkage int sys_shmdt (char *shmaddr);
 asmlinkage int sys_shmctl (int shmid, int cmd, struct shmid_ds *buf);
diff -urN 2.2.15pre16/include/linux/smb_fs.h 2.2.15pre16aa3/include/linux/smb_fs.h
--- 2.2.15pre16/include/linux/smb_fs.h	Wed Mar 29 14:59:07 2000
+++ 2.2.15pre16aa3/include/linux/smb_fs.h	Thu Mar 30 16:00:58 2000
@@ -128,8 +128,8 @@
 void smb_close_dentry(struct dentry *);
 int smb_close_fileid(struct dentry *, __u16);
 int smb_open(struct dentry *, int);
-int smb_proc_read(struct dentry *, off_t, int, char *);
-int smb_proc_write(struct dentry *, off_t, int, const char *);
+int smb_proc_read(struct dentry *, loff_t, int, char *);
+int smb_proc_write(struct dentry *, loff_t, int, const char *);
 int smb_proc_create(struct dentry *, __u16, time_t, __u16 *);
 int smb_proc_mv(struct dentry *, struct dentry *);
 int smb_proc_mkdir(struct dentry *);
diff -urN 2.2.15pre16/include/linux/swap.h 2.2.15pre16aa3/include/linux/swap.h
--- 2.2.15pre16/include/linux/swap.h	Wed Mar 29 14:57:45 2000
+++ 2.2.15pre16aa3/include/linux/swap.h	Thu Mar 30 16:00:58 2000
@@ -114,7 +114,7 @@
 extern unsigned int nr_swapfiles;
 extern struct swap_info_struct swap_info[];
 void si_swapinfo(struct sysinfo *);
-unsigned long get_swap_page(void);
+extern unsigned long  get_swap_page(void);
 extern void FASTCALL(swap_free(unsigned long));
 struct swap_list_t {
 	int head;	/* head of priority-ordered swapfile list */
@@ -147,7 +147,7 @@
 extern inline unsigned long in_swap_cache(struct page *page)
 {
 	if (PageSwapCache(page))
-		return page->offset;
+		return pgoff2ulong(page->index);
 	return 0;
 }
 
@@ -164,7 +164,7 @@
 		return 1;
 	count = atomic_read(&page->count);
 	if (PageSwapCache(page))
-		count += swap_count(page->offset) - 2;
+		count += swap_count(pgoff2ulong(page->index)) - 2;
 	if (PageFreeAfter(page))
 		count--;
 	return  count > 1;
diff -urN 2.2.15pre16/include/linux/sysctl.h 2.2.15pre16aa3/include/linux/sysctl.h
--- 2.2.15pre16/include/linux/sysctl.h	Tue Feb  1 18:24:19 2000
+++ 2.2.15pre16aa3/include/linux/sysctl.h	Thu Mar 30 16:00:57 2000
@@ -121,7 +121,8 @@
 	VM_PAGECACHE=7,		/* struct: Set cache memory thresholds */
 	VM_PAGERDAEMON=8,	/* struct: Control kswapd behaviour */
 	VM_PGT_CACHE=9,		/* struct: Set page table cache parameters */
-	VM_PAGE_CLUSTER=10	/* int: set number of pages to swap together */
+	VM_PAGE_CLUSTER=10,	/* int: set number of pages to swap together */
+	VM_HEAP_STACK_GAP=11,	/* int: page gap between heap and stack */
 };
 
 
diff -urN 2.2.15pre16/include/linux/time.h 2.2.15pre16aa3/include/linux/time.h
--- 2.2.15pre16/include/linux/time.h	Tue Feb  1 18:24:19 2000
+++ 2.2.15pre16aa3/include/linux/time.h	Thu Mar 30 16:00:56 2000
@@ -46,10 +46,53 @@
 	value->tv_sec = jiffies / HZ;
 }
  
+static __inline__ int
+timespec_before(struct timespec a, struct timespec b)
+{
+	if (a.tv_sec == b.tv_sec)
+		return a.tv_nsec < b.tv_nsec;
+	return a.tv_sec < b.tv_sec;
+}
+
+/* computes `a - b'  and write the result in `result', assumes `a >= b' */
+static inline void
+timespec_less(struct timespec a, struct timespec b, struct timespec * result)
+{
+	if (a.tv_nsec < b.tv_nsec)
+	{
+		a.tv_sec--;
+		a.tv_nsec += 1000000000;
+	}
+
+	result->tv_sec = a.tv_sec - b.tv_sec;
+	result->tv_nsec = a.tv_nsec - b.tv_nsec;
+}
+
 struct timeval {
 	time_t		tv_sec;		/* seconds */
 	suseconds_t	tv_usec;	/* microseconds */
 };
+
+/* computes `a - b'  and write the result in `result', assumes `a >= b' */
+static inline void
+timeval_less(struct timeval a, struct timeval b, struct timeval * result)
+{
+	if (a.tv_usec < b.tv_usec)
+	{
+		a.tv_sec--;
+		a.tv_usec += 1000000;
+	}
+
+	result->tv_sec = a.tv_sec - b.tv_sec;
+	result->tv_usec = a.tv_usec - b.tv_usec;
+}
+
+static __inline__ void
+timeval_to_timespec(struct timeval tv, struct timespec * ts)
+{
+	ts->tv_sec = tv.tv_sec;
+	ts->tv_nsec = (long) tv.tv_usec * 1000;
+}
 
 struct timezone {
 	int	tz_minuteswest;	/* minutes west of Greenwich */
diff -urN 2.2.15pre16/include/linux/ufs_fs_i.h 2.2.15pre16aa3/include/linux/ufs_fs_i.h
--- 2.2.15pre16/include/linux/ufs_fs_i.h	Tue Feb  1 18:24:19 2000
+++ 2.2.15pre16aa3/include/linux/ufs_fs_i.h	Thu Mar 30 16:00:58 2000
@@ -18,7 +18,6 @@
 		__u32	i_data[15];
 		__u8	i_symlink[4*15];
 	} i_u1;
-	__u64	i_size;
 	__u32	i_flags;
 	__u32	i_gen;
 	__u32	i_shadow;
diff -urN 2.2.15pre16/init/main.c 2.2.15pre16aa3/init/main.c
--- 2.2.15pre16/init/main.c	Wed Mar 29 19:42:16 2000
+++ 2.2.15pre16aa3/init/main.c	Thu Mar 30 16:00:57 2000
@@ -22,6 +22,7 @@
 #include <linux/smp_lock.h>
 #include <linux/blk.h>
 #include <linux/hdreg.h>
+#include <linux/iobuf.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -1252,6 +1253,13 @@
 
 extern void initialize_secondary(void);
 
+static void __init clear_bss(void)
+{
+	extern char * __bss_start, __bss_stop;
+	memset(__bss_start, 0,
+	       (unsigned long) __bss_stop - (unsigned long) __bss_start);
+}
+
 /*
  *	Activate the first processor.
  */
@@ -1259,9 +1267,13 @@
 asmlinkage void __init start_kernel(void)
 {
 	char * command_line;
-
 #ifdef __SMP__
 	static int boot_cpu = 1;
+#endif
+
+	clear_bss();
+
+#ifdef __SMP__
 	/* "current" has been set up, we need to load it now */
 	if (!boot_cpu)
 		initialize_secondary();
@@ -1323,6 +1335,7 @@
 	vma_init();
 	buffer_init(memory_end-memory_start);
  	page_cache_init(memory_end-memory_start);
+	kiobuf_init();
 	signals_init();
 	inode_init();
 	file_table_init();
diff -urN 2.2.15pre16/ipc/shm.c 2.2.15pre16aa3/ipc/shm.c
--- 2.2.15pre16/ipc/shm.c	Wed Mar 29 19:42:16 2000
+++ 2.2.15pre16aa3/ipc/shm.c	Thu Mar 30 16:00:57 2000
@@ -4,6 +4,7 @@
  *         Many improvements/fixes by Bruno Haible.
  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
+ * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
  */
 
 #include <linux/malloc.h>
@@ -12,6 +13,8 @@
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/vmalloc.h>
+#include <linux/bigmem.h>
+#include <linux/pagemap.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -19,7 +22,7 @@
 extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
 extern unsigned long get_swap_page (void);
 static int findkey (key_t key);
-static int newseg (key_t key, int shmflg, int size);
+static int newseg (key_t key, int shmflg, unsigned int size);
 static int shm_map (struct vm_area_struct *shmd);
 static void killseg (int id);
 static void shm_open (struct vm_area_struct *shmd);
@@ -74,7 +77,7 @@
 /*
  * allocate new shmid_kernel and pgtable. protected by shm_segs[id] = NOID.
  */
-static int newseg (key_t key, int shmflg, int size)
+static int newseg (key_t key, int shmflg, unsigned int size)
 {
 	struct shmid_kernel *shp;
 	int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
@@ -135,9 +138,9 @@
 	return (unsigned int) shp->u.shm_perm.seq * SHMMNI + id;
 }
 
-int shmmax = SHMMAX;
+unsigned int shmmax = SHMMAX;
 
-asmlinkage int sys_shmget (key_t key, int size, int shmflg)
+asmlinkage int sys_shmget (key_t key, unsigned int size, int shmflg)
 {
 	struct shmid_kernel *shp;
 	int err, id = 0;
@@ -644,21 +647,29 @@
 
 	pte = __pte(shp->shm_pages[idx]);
 	if (!pte_present(pte)) {
-		unsigned long page = get_free_page(GFP_USER);
+		unsigned long page = __get_free_page(GFP_BIGUSER);
 		if (!page)
 			return -1;
+		clear_bigpage(page);
 		pte = __pte(shp->shm_pages[idx]);
 		if (pte_present(pte)) {
 			free_page (page); /* doesn't sleep */
 			goto done;
 		}
 		if (!pte_none(pte)) {
+			struct page * page_map;
+
+			page = prepare_bigmem_shm_swapin(page);
+			if (!page)
+				return -1;
 			rw_swap_page_nocache(READ, pte_val(pte), (char *)page);
 			pte = __pte(shp->shm_pages[idx]);
 			if (pte_present(pte))  {
 				free_page (page); /* doesn't sleep */
 				goto done;
 			}
+			page_map = replace_with_bigmem(&mem_map[MAP_NR(page)]);
+			page = page_address(page_map);
 			swap_free(pte_val(pte));
 			shm_swp--;
 		}
@@ -675,7 +686,7 @@
 }
 
 /*
- * Goes through counter = (shm_rss >> prio) present shm pages.
+ * Goes through counter = (shm_rss / prio) present shm pages.
  */
 static unsigned long swap_id = 0; /* currently being swapped */
 static unsigned long swap_idx = 0; /* next to swap */
@@ -688,8 +699,9 @@
 	unsigned long id, idx;
 	int loop = 0;
 	int counter;
+	struct page * page_map;
 	
-	counter = shm_rss >> prio;
+	counter = shm_rss / prio;
 	if (!counter || !(swap_nr = get_swap_page()))
 		return 0;
 
@@ -716,7 +728,10 @@
 	page = __pte(shp->shm_pages[idx]);
 	if (!pte_present(page))
 		goto check_table;
-	if ((gfp_mask & __GFP_DMA) && !PageDMA(&mem_map[MAP_NR(pte_page(page))]))
+	page_map = &mem_map[MAP_NR(pte_page(page))];
+	if ((gfp_mask & __GFP_DMA) && !PageDMA(page_map))
+		goto check_table;
+	if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page_map))
 		goto check_table;
 	swap_attempts++;
 
@@ -725,11 +740,13 @@
 		swap_free (swap_nr);
 		return 0;
 	}
-	if (atomic_read(&mem_map[MAP_NR(pte_page(page))].count) != 1)
+	if (atomic_read(&page_map->count) != 1)
+		goto check_table;
+	if (!(page_map = prepare_bigmem_swapout(page_map)))
 		goto check_table;
 	shp->shm_pages[idx] = swap_nr;
-	rw_swap_page_nocache (WRITE, swap_nr, (char *) pte_page(page));
-	free_page(pte_page(page));
+	rw_swap_page_nocache (WRITE, swap_nr, (char *) page_address(page_map));
+	__free_page(page_map);
 	swap_successes++;
 	shm_swp++;
 	shm_rss--;
diff -urN 2.2.15pre16/kernel/ksyms.c 2.2.15pre16aa3/kernel/ksyms.c
--- 2.2.15pre16/kernel/ksyms.c	Wed Mar 29 19:42:16 2000
+++ 2.2.15pre16aa3/kernel/ksyms.c	Thu Mar 30 16:00:58 2000
@@ -39,6 +39,7 @@
 #include <linux/poll.h>
 #include <linux/mm.h>
 #include <linux/capability.h>
+#include <linux/iobuf.h>
 
 #if defined(CONFIG_PROC_FS)
 #include <linux/proc_fs.h>
@@ -72,6 +73,13 @@
 };
 #endif
 
+#ifdef CONFIG_BLK_DEV_LVM_MODULE
+   extern int (*lvm_map_ptr) ( int, kdev_t *, unsigned long *,
+                               unsigned long, int);
+   extern void (*lvm_hd_name_ptr) ( char*, int);
+   EXPORT_SYMBOL(lvm_map_ptr);
+   EXPORT_SYMBOL(lvm_hd_name_ptr);
+#endif
 
 #ifdef CONFIG_KMOD
 EXPORT_SYMBOL(request_module);
@@ -83,7 +91,7 @@
 EXPORT_SYMBOL(get_options);
 
 /* process memory management */
-EXPORT_SYMBOL(do_mmap);
+EXPORT_SYMBOL(do_mmap_pgoff);
 EXPORT_SYMBOL(do_munmap);
 EXPORT_SYMBOL(exit_mm);
 EXPORT_SYMBOL(exit_files);
@@ -107,6 +115,7 @@
 EXPORT_SYMBOL(mem_map);
 EXPORT_SYMBOL(remap_page_range);
 EXPORT_SYMBOL(max_mapnr);
+EXPORT_SYMBOL(num_physpages);
 EXPORT_SYMBOL(high_memory);
 EXPORT_SYMBOL(update_vm_cache);
 EXPORT_SYMBOL(update_vm_cache_conditional);
@@ -239,6 +248,13 @@
 EXPORT_SYMBOL(max_sectors);
 EXPORT_SYMBOL(max_segments);
 EXPORT_SYMBOL(max_readahead);
+
+/* rawio */
+EXPORT_SYMBOL(alloc_kiovec);
+EXPORT_SYMBOL(expand_kiobuf);
+EXPORT_SYMBOL(unmap_kiobuf);
+EXPORT_SYMBOL(brw_kiovec);
+EXPORT_SYMBOL(free_kiovec);
 
 /* tty routines */
 EXPORT_SYMBOL(tty_hangup);
diff -urN 2.2.15pre16/kernel/sched.c 2.2.15pre16aa3/kernel/sched.c
--- 2.2.15pre16/kernel/sched.c	Wed Jan  5 14:16:56 2000
+++ 2.2.15pre16aa3/kernel/sched.c	Thu Mar 30 16:00:57 2000
@@ -212,101 +212,89 @@
 }
 
 /*
- * If there is a dependency between p1 and p2,
- * don't be too eager to go into the slow schedule.
- * In particular, if p1 and p2 both want the kernel
- * lock, there is no point in trying to make them
- * extremely parallel..
- *
- * (No lock - lock_depth < 0)
- *
- * There are two additional metrics here:
- *
- * first, a 'cutoff' interval, currently 0-200 usecs on
- * x86 CPUs, depending on the size of the 'SMP-local cache'.
- * If the current process has longer average timeslices than
- * this, then we utilize the idle CPU.
- *
- * second, if the wakeup comes from a process context,
- * then the two processes are 'related'. (they form a
- * 'gang')
- *
- * An idle CPU is almost always a bad thing, thus we skip
- * the idle-CPU utilization only if both these conditions
- * are true. (ie. a 'process-gang' rescheduling with rather
- * high frequency should stay on the same CPU).
- *
- * [We can switch to something more finegrained in 2.3.]
- *
- * do not 'guess' if the to-be-scheduled task is RT.
+ * This is ugly, but reschedule_idle() is very timing-critical.
+ * We enter with the runqueue spinlock held, but we might end
+ * up unlocking it early, so the caller must not unlock the
+ * runqueue, it's always done by reschedule_idle().
  */
-#define related(p1,p2) (((p1)->lock_depth >= 0) && (p2)->lock_depth >= 0) && \
-	(((p2)->policy == SCHED_OTHER) && ((p1)->avg_slice < cacheflush_time))
-
-static inline void reschedule_idle_slow(struct task_struct * p)
+static inline void reschedule_idle(struct task_struct * p, unsigned long flags)
 {
 #ifdef __SMP__
-/*
- * (see reschedule_idle() for an explanation first ...)
- *
- * Pass #2
- *
- * We try to find another (idle) CPU for this woken-up process.
- *
- * On SMP, we mostly try to see if the CPU the task used
- * to run on is idle.. but we will use another idle CPU too,
- * at this point we already know that this CPU is not
- * willing to reschedule in the near future.
- *
- * An idle CPU is definitely wasted, especially if this CPU is
- * running long-timeslice processes. The following algorithm is
- * pretty good at finding the best idle CPU to send this process
- * to.
- *
- * [We can try to preempt low-priority processes on other CPUs in
- * 2.3. Also we can try to use the avg_slice value to predict
- * 'likely reschedule' events even on other CPUs.]
- */
 	int this_cpu = smp_processor_id(), target_cpu;
-	struct task_struct *tsk, *target_tsk;
-	int cpu, best_cpu, weight, best_weight, i;
-	unsigned long flags;
-
-	best_weight = 0; /* prevents negative weight */
-
-	spin_lock_irqsave(&runqueue_lock, flags);
+	struct task_struct *tsk;
+	int cpu, best_cpu, i;
 
 	/*
 	 * shortcut if the woken up task's last CPU is
 	 * idle now.
 	 */
 	best_cpu = p->processor;
-	target_tsk = idle_task(best_cpu);
-	if (cpu_curr(best_cpu) == target_tsk)
+	tsk = idle_task(best_cpu);
+	if (cpu_curr(best_cpu) == tsk)
 		goto send_now;
 
-	target_tsk = NULL;
-	for (i = 0; i < smp_num_cpus; i++) {
+	/*
+	 * We know that the preferred CPU has a cache-affine current
+	 * process, lets try to find a new idle CPU for the woken-up
+	 * process:
+	 */
+	for (i = smp_num_cpus - 1; i >= 0; i--) {
 		cpu = cpu_logical_map(i);
+		if (cpu == best_cpu)
+			continue;
 		tsk = cpu_curr(cpu);
-		if (related(tsk, p))
-			goto out_no_target;
-		weight = preemption_goodness(tsk, p, cpu);
-		if (weight > best_weight) {
-			best_weight = weight;
-			target_tsk = tsk;
-		}
+		/*
+		 * We use the last available idle CPU. This creates
+		 * a priority list between idle CPUs, but this is not
+		 * a problem.
+		 */
+		if (tsk == idle_task(cpu))
+			goto send_now;
 	}
 
 	/*
-	 * found any suitable CPU?
+	 * No CPU is idle, but maybe this process has enough priority
+	 * to preempt it's preferred CPU.
 	 */
-	if (!target_tsk)
-		goto out_no_target;
+	tsk = cpu_curr(best_cpu);
+	if (preemption_goodness(tsk, p, best_cpu) > 0)
+		goto send_now;
+
+	/*
+	 * We will get here often - or in the high CPU contention
+	 * case. No CPU is idle and this process is either lowprio or
+	 * the preferred CPU is highprio. Try to preemt some other CPU
+	 * only if it's RT or if it's iteractive and the preferred
+	 * cpu won't reschedule shortly.
+	 */
+	if ((p->avg_slice < cacheflush_time && cpu_curr(best_cpu)->avg_slice > cacheflush_time) ||
+	    ((p->policy & ~SCHED_YIELD) != SCHED_OTHER))
+	{
+		int weight, best_weight = 0;
+		struct task_struct * best_tsk = NULL;
+
+		for (i = smp_num_cpus - 1; i >= 0; i--) {
+			cpu = cpu_logical_map(i);
+			if (cpu == best_cpu)
+				continue;
+			tsk = cpu_curr(cpu);
+			weight = preemption_goodness(tsk, p, cpu);
+			if (weight > best_weight) {
+				best_weight = weight;
+				best_tsk = tsk;
+			}
+		}
+
+		if ((tsk = best_tsk))
+			goto send_now;
+	}
+
+	spin_unlock_irqrestore(&runqueue_lock, flags);
+	return;
 		
 send_now:
-	target_cpu = target_tsk->processor;
-	target_tsk->need_resched = 1;
+	target_cpu = tsk->processor;
+	tsk->need_resched = 1;
 	spin_unlock_irqrestore(&runqueue_lock, flags);
 	/*
 	 * the APIC stuff can go outside of the lock because
@@ -315,9 +303,6 @@
 	if (target_cpu != this_cpu)
 		smp_send_reschedule(target_cpu);
 	return;
-out_no_target:
-	spin_unlock_irqrestore(&runqueue_lock, flags);
-	return;
 #else /* UP */
 	int this_cpu = smp_processor_id();
 	struct task_struct *tsk;
@@ -325,38 +310,10 @@
 	tsk = current;
 	if (preemption_goodness(tsk, p, this_cpu) > 0)
 		tsk->need_resched = 1;
+	spin_unlock_irqrestore(&runqueue_lock, flags);
 #endif
 }
 
-static void reschedule_idle(struct task_struct * p)
-{
-#ifdef __SMP__
-	int cpu = smp_processor_id();
-	/*
-	 * ("wakeup()" should not be called before we've initialized
-	 * SMP completely.
-	 * Basically a not-yet initialized SMP subsystem can be
-	 * considered as a not-yet working scheduler, simply dont use
-	 * it before it's up and running ...)
-	 *
-	 * SMP rescheduling is done in 2 passes:
-	 *  - pass #1: faster: 'quick decisions'
-	 *  - pass #2: slower: 'lets try and find a suitable CPU'
-	 */
-
-	/*
-	 * Pass #1. (subtle. We might be in the middle of __switch_to, so
-	 * to preserve scheduling atomicity we have to use cpu_curr)
-	 */
-	if ((p->processor == cpu) && related(cpu_curr(cpu), p))
-		return;
-#endif /* __SMP__ */
-	/*
-	 * Pass #2
-	 */
-	reschedule_idle_slow(p);
-}
-
 /*
  * Careful!
  *
@@ -453,9 +410,8 @@
 	if (p->next_run)
 		goto out;
 	add_to_runqueue(p);
-	spin_unlock_irqrestore(&runqueue_lock, flags);
+	reschedule_idle(p, flags); // spin_unlocks runqueue
 
-	reschedule_idle(p);
 	return;
 out:
 	spin_unlock_irqrestore(&runqueue_lock, flags);
@@ -498,17 +454,19 @@
 	(struct timer_vec *)&tv1, &tv2, &tv3, &tv4, &tv5
 };
 
+static struct timer_list ** run_timer_list_running;
+
 #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0]))
 
 static unsigned long timer_jiffies = 0;
 
 static inline void insert_timer(struct timer_list *timer,
-				struct timer_list **vec, int idx)
+				struct timer_list **vec)
 {
-	if ((timer->next = vec[idx]))
-		vec[idx]->prev = timer;
-	vec[idx] = timer;
-	timer->prev = (struct timer_list *)&vec[idx];
+	if ((timer->next = *vec))
+		(*vec)->prev = timer;
+	*vec = timer;
+	timer->prev = (struct timer_list *)vec;
 }
 
 static inline void internal_add_timer(struct timer_list *timer)
@@ -518,31 +476,36 @@
 	 */
 	unsigned long expires = timer->expires;
 	unsigned long idx = expires - timer_jiffies;
+	struct timer_list ** vec;
 
-	if (idx < TVR_SIZE) {
+	if (run_timer_list_running)
+		vec = run_timer_list_running;
+	else if (idx < TVR_SIZE) {
 		int i = expires & TVR_MASK;
-		insert_timer(timer, tv1.vec, i);
+		vec = tv1.vec + i;
 	} else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
 		int i = (expires >> TVR_BITS) & TVN_MASK;
-		insert_timer(timer, tv2.vec, i);
+		vec = tv2.vec + i;
 	} else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
 		int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
-		insert_timer(timer, tv3.vec, i);
+		vec = tv3.vec + i;
 	} else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
 		int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
-		insert_timer(timer, tv4.vec, i);
+		vec = tv4.vec + i;
 	} else if ((signed long) idx < 0) {
 		/* can happen if you add a timer with expires == jiffies,
 		 * or you set a timer to go off in the past
 		 */
-		insert_timer(timer, tv1.vec, tv1.index);
+		vec = tv1.vec + tv1.index;
 	} else if (idx <= 0xffffffffUL) {
 		int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
-		insert_timer(timer, tv5.vec, i);
+		vec = tv5.vec + i;
 	} else {
 		/* Can only get here on architectures with 64-bit jiffies */
 		timer->next = timer->prev = timer;
+		return;
 	}
+	insert_timer(timer, vec);
 }
 
 spinlock_t timerlist_lock = SPIN_LOCK_UNLOCKED;
@@ -661,8 +624,12 @@
 {
 #ifdef __SMP__
 	if ((prev->state == TASK_RUNNING) &&
-			(prev != idle_task(smp_processor_id())))
-		reschedule_idle(prev);
+			(prev != idle_task(smp_processor_id()))) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&runqueue_lock, flags);
+		reschedule_idle(prev, flags); // spin_unlocks runqueue
+	}
 	wmb();
 	prev->has_cpu = 0;
 #endif /* __SMP__ */
@@ -689,6 +656,7 @@
 	struct task_struct *prev, *next, *p;
 	int this_cpu, c;
 
+	sti();
 	if (tq_scheduler)
 		goto handle_tq_scheduler;
 tq_scheduler_back:
@@ -1128,13 +1096,14 @@
 {
 	spin_lock_irq(&timerlist_lock);
 	while ((long)(jiffies - timer_jiffies) >= 0) {
-		struct timer_list *timer;
+		struct timer_list *timer, * queued = NULL;
 		if (!tv1.index) {
 			int n = 1;
 			do {
 				cascade_timers(tvecs[n]);
 			} while (tvecs[n]->index == 1 && ++n < NOOF_TVECS);
 		}
+		run_timer_list_running = &queued;
 		while ((timer = tv1.vec[tv1.index])) {
 			void (*fn)(unsigned long) = timer->function;
 			unsigned long data = timer->data;
@@ -1144,8 +1113,15 @@
 			fn(data);
 			spin_lock_irq(&timerlist_lock);
 		}
+		run_timer_list_running = NULL;
 		++timer_jiffies; 
 		tv1.index = (tv1.index + 1) & TVR_MASK;
+		while (queued)
+		{
+			timer = queued;
+			queued = queued->next;
+			internal_add_timer(timer);
+		}			
 	}
 	spin_unlock_irq(&timerlist_lock);
 }
@@ -1904,6 +1880,7 @@
 {
 	struct timespec t;
 	unsigned long expire;
+	struct timeval before, after;
 
 	if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
 		return -EFAULT;
@@ -1928,11 +1905,20 @@
 	expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
 
 	current->state = TASK_INTERRUPTIBLE;
+	get_fast_time(&before);
 	expire = schedule_timeout(expire);
+	get_fast_time(&after);
 
 	if (expire) {
 		if (rmtp) {
-			jiffies_to_timespec(expire, &t);
+			struct timespec elapsed;
+
+			timeval_less(after, before, &after);
+			timeval_to_timespec(after, &elapsed);
+			if (timespec_before(elapsed, t))
+				timespec_less(t, elapsed, &t);
+			else
+				t.tv_nsec = t.tv_sec = 0;
 			if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
 				return -EFAULT;
 		}
diff -urN 2.2.15pre16/kernel/sysctl.c 2.2.15pre16aa3/kernel/sysctl.c
--- 2.2.15pre16/kernel/sysctl.c	Wed Mar 29 19:42:16 2000
+++ 2.2.15pre16aa3/kernel/sysctl.c	Thu Mar 30 16:00:57 2000
@@ -252,6 +252,8 @@
 	 &pgt_cache_water, 2*sizeof(int), 0600, NULL, &proc_dointvec},
 	{VM_PAGE_CLUSTER, "page-cluster", 
 	 &page_cluster, sizeof(int), 0600, NULL, &proc_dointvec},
+	{VM_HEAP_STACK_GAP, "heap-stack-gap", 
+	 &heap_stack_gap, sizeof(int), 0644, NULL, &proc_dointvec},
 	{0}
 };
 
diff -urN 2.2.15pre16/lib/vsprintf.c 2.2.15pre16aa3/lib/vsprintf.c
--- 2.2.15pre16/lib/vsprintf.c	Mon Jan 17 16:44:50 2000
+++ 2.2.15pre16aa3/lib/vsprintf.c	Thu Mar 30 16:00:58 2000
@@ -67,10 +67,106 @@
 #define LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
 
 #define do_div(n,base) ({ \
-int __res; \
-__res = ((unsigned long) n) % (unsigned) base; \
-n = ((unsigned long) n) / (unsigned) base; \
-__res; })
+  int __res; \
+  __res = ((unsigned long) n) % (unsigned) base; \
+  n = ((unsigned long) n) / (unsigned) base; \
+  __res; })
+
+#if BITS_PER_LONG < 64
+
+/* Note: do_ldiv assumes that unsigned long long is a 64 bit long
+ * and unsigned long is at least a 32 bits long.
+ */
+#define do_ldiv(n, base) \
+({ \
+	unsigned long long value = n; \
+	unsigned long long leftover; \
+	unsigned long temp; \
+	unsigned long result_div1, result_div2, result_div3, result_mod; \
+\
+	temp = value >> 32; \
+	result_div1 = temp/(base); \
+	result_mod = temp%(base); \
+\
+	temp = (result_mod << 24) | ((value >> 8) & 0xFFFFFF); \
+	result_div2 = temp/(base); \
+	result_mod = temp%(base); \
+\
+	temp = (result_mod << 8) | (value & 0xFF); \
+	result_div3 = temp/(base); \
+	result_mod = temp%(base);\
+\
+	leftover = ((unsigned long long)result_div1 << 32) | \
+		((unsigned long long)result_div2 << 8) | (result_div3); \
+\
+	n = leftover; \
+	result_mod; \
+})
+
+
+static char * lnumber(char * str, long long num, int base, int size,
+		      int precision, int type)
+{
+	char c,sign,tmp[66];
+	const char *digits="0123456789abcdef";
+	int i;
+
+	if (type & LARGE)
+		digits = "0123456789ABCDEF";
+	if (type & LEFT)
+		type &= ~ZEROPAD;
+	if (base < 2 || base > 36)
+		return 0;
+	c = (type & ZEROPAD) ? '0' : ' ';
+	sign = 0;
+	if (type & SIGN) {
+		if (num < 0) {
+			sign = '-';
+			num = -num;
+			size--;
+		} else if (type & PLUS) {
+			sign = '+';
+			size--;
+		} else if (type & SPACE) {
+			sign = ' ';
+			size--;
+		}
+	}
+	if (type & SPECIAL) {
+		if (base == 16)
+			size -= 2;
+	}
+	i = 0;
+	if (num == 0)
+		tmp[i++]='0';
+	else while (num != 0)
+		tmp[i++] = digits[do_ldiv(num,base)];
+	if (i > precision)
+		precision = i;
+	size -= precision;
+	if (!(type&(ZEROPAD+LEFT)))
+		while(size-->0)
+			*str++ = ' ';
+	if (sign)
+		*str++ = sign;
+	if (type & SPECIAL) {
+		if (base==16) {
+			*str++ = '0';
+			*str++ = digits[33];
+		}
+	}
+	if (!(type & LEFT))
+		while (size-- > 0)
+			*str++ = c;
+	while (i < precision--)
+		*str++ = '0';
+	while (i-- > 0)
+		*str++ = tmp[i];
+	while (size-- > 0)
+		*str++ = ' ';
+	return str;
+}
+#endif
 
 static char * number(char * str, long num, int base, int size, int precision
 	,int type)
@@ -207,7 +303,10 @@
 		/* get the conversion qualifier */
 		qualifier = -1;
 		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
-			qualifier = *fmt;
+			if (*fmt == 'l' && qualifier == 'l')
+				qualifier = 'L';
+			else
+				qualifier = *fmt;
 			++fmt;
 		}
 
@@ -290,7 +389,22 @@
 				--fmt;
 			continue;
 		}
-		if (qualifier == 'l')
+		if (qualifier == 'L') {
+
+#if BITS_PER_LONG < 64
+		/* 64-bit printout in 32-bit systems !!
+		   Needed at some point for 64-bit file offsets and
+		   mmap() reporting functions. */
+
+			unsigned long long lnum;
+			lnum = va_arg(args, unsigned long long);
+			str = lnumber(str, lnum, base, field_width,
+				      precision, flags);
+			continue;
+#else
+			num = va_arg(args, unsigned long); /* 64-bit longs..*/
+#endif
+		} else if (qualifier == 'l')
 			num = va_arg(args, unsigned long);
 		else if (qualifier == 'h') {
 			num = (unsigned short) va_arg(args, int);
diff -urN 2.2.15pre16/mm/Makefile 2.2.15pre16aa3/mm/Makefile
--- 2.2.15pre16/mm/Makefile	Mon Jan 18 02:27:01 1999
+++ 2.2.15pre16aa3/mm/Makefile	Thu Mar 30 16:00:57 2000
@@ -12,4 +12,8 @@
 	    vmalloc.o slab.o \
 	    swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o
 
+ifeq ($(CONFIG_BIGMEM),y)
+O_OBJS += bigmem.o
+endif
+
 include $(TOPDIR)/Rules.make
diff -urN 2.2.15pre16/mm/bigmem.c 2.2.15pre16aa3/mm/bigmem.c
--- 2.2.15pre16/mm/bigmem.c	Thu Jan  1 01:00:00 1970
+++ 2.2.15pre16aa3/mm/bigmem.c	Thu Mar 30 16:00:58 2000
@@ -0,0 +1,87 @@
+/*
+ * BIGMEM common code and variables.
+ *
+ * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
+ *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/bigmem.h>
+
+unsigned long bigmem_mapnr;
+int nr_free_bigpages = 0;
+
+struct page * prepare_bigmem_swapout(struct page * page)
+{
+	/* if this is a bigmem page so it can't be swapped out directly
+	   otherwise the b_data buffer addresses will break
+	   the lowlevel device drivers. */
+	if (PageBIGMEM(page))
+	{
+		unsigned long regular_page;
+		unsigned long vaddr;
+
+		regular_page = __get_free_page(GFP_ATOMIC);
+		if (!regular_page)
+			return NULL;
+
+		vaddr = kmap(page_address(page), KM_READ);
+		copy_page(regular_page, vaddr);
+		kunmap(vaddr, KM_READ);
+
+		/* ok, we can just forget about our bigmem page since 
+		   we stored its data into the new regular_page. */
+		__free_page(page);
+
+		page = MAP_NR(regular_page) + mem_map;
+	}
+	return page;
+}
+
+struct page * replace_with_bigmem(struct page * page)
+{
+	if (!PageBIGMEM(page) && nr_free_bigpages)
+	{
+		unsigned long kaddr;
+
+		kaddr = __get_free_page(GFP_ATOMIC|GFP_BIGMEM);
+		if (kaddr)
+		{
+			struct page * bigmem_page;
+
+			bigmem_page = MAP_NR(kaddr) + mem_map;
+			if (PageBIGMEM(bigmem_page))
+			{
+				unsigned long vaddr;
+
+				vaddr = kmap(kaddr, KM_WRITE);
+				copy_page(vaddr, page_address(page));
+				kunmap(vaddr, KM_WRITE);
+
+				/* Preserve the caching of the swap_entry. */
+				bigmem_page->index = page->index;
+
+				/* We can just forget the old page since 
+				   we stored its data into the new
+				   bigmem_page. */
+				__free_page(page);
+
+				page = bigmem_page;
+			}
+		}
+	}
+	return page;
+}
+
+unsigned long prepare_bigmem_shm_swapin(unsigned long page)
+{
+	if (!PageBIGMEM(&mem_map[MAP_NR(page)]))
+		return page;
+
+	free_page(page);
+
+	/* no need to clear the page since it will be rewrited by the
+	   swapin. */
+	return __get_free_page(GFP_ATOMIC);
+}
diff -urN 2.2.15pre16/mm/filemap.c 2.2.15pre16aa3/mm/filemap.c
--- 2.2.15pre16/mm/filemap.c	Wed Mar 29 19:42:16 2000
+++ 2.2.15pre16aa3/mm/filemap.c	Thu Mar 30 16:00:58 2000
@@ -21,6 +21,7 @@
 #include <linux/swapctl.h>
 #include <linux/slab.h>
 #include <linux/init.h>
+#include <linux/bigmem.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -88,7 +89,7 @@
  * Truncate the page cache at a set offset, removing the pages
  * that are beyond that offset (and zeroing out partial pages).
  */
-void truncate_inode_pages(struct inode * inode, unsigned long start)
+void truncate_inode_pages(struct inode * inode, loff_t start)
 {
 	struct page ** p;
 	struct page * page;
@@ -96,10 +97,10 @@
 repeat:
 	p = &inode->i_pages;
 	while ((page = *p) != NULL) {
-		unsigned long offset = page->offset;
+		loff_t loffset = pgoff2loff(page->index);
 
 		/* page wholly truncated - free it */
-		if (offset >= start) {
+		if (loffset >= start) {
 			if (PageLocked(page)) {
 				wait_on_page(page);
 				goto repeat;
@@ -115,9 +116,10 @@
 			continue;
 		}
 		p = &page->next;
-		offset = start - offset;
+		loffset = start - loffset;
 		/* partial truncate, clear end of page */
-		if (offset < PAGE_CACHE_SIZE) {
+		if (loffset < PAGE_CACHE_SIZE) {
+			unsigned int  offset  = loffset; /* truncate ok */
 			unsigned long address = page_address(page);
 			memset((void *) (offset + address), 0, PAGE_CACHE_SIZE - offset);
 			flush_page_to_ram(address);
@@ -138,11 +140,15 @@
 int shrink_mmap(int priority, int gfp_mask)
 {
 	static unsigned long clock = 0;
+#ifndef CONFIG_BIGMEM
 	unsigned long limit = num_physpages;
+#else
+	unsigned long limit = bigmem_mapnr;
+#endif
 	struct page * page;
 	int count;
 
-	count = limit >> priority;
+	count = limit / priority;
 
 	page = mem_map + clock;
 	do {
@@ -154,7 +160,11 @@
 		 */
 		page++;
 		clock++;
+#ifndef CONFIG_BIGMEM
 		if (clock >= max_mapnr) {
+#else
+		if (clock >= bigmem_mapnr) {
+#endif
 			clock = 0;
 			page = mem_map;
 		}
@@ -168,6 +178,9 @@
 		if (atomic_read(&page->count) != 1)
 			continue;
 
+		if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page))
+			continue;
+
 		referenced = test_and_clear_bit(PG_referenced, &page->flags);
 
 		if (PageLocked(page))
@@ -184,7 +197,8 @@
 		 * were to be marked referenced..
 		 */
 		if (PageSwapCache(page)) {
-			if (referenced && swap_count(page->offset) != 1)
+			if (referenced &&
+			    swap_count(pgoff2ulong(page->index)) != 1)
 				continue;
 			delete_from_swap_cache(page);
 			return 1;
@@ -235,11 +249,12 @@
  * memory maps.  --sct
  */
 
-void update_vm_cache_conditional(struct inode * inode, unsigned long pos, const char * buf, int count, unsigned long source_address)
+void update_vm_cache_conditional(struct inode * inode, loff_t pos, const char * buf, int count, unsigned long source_address)
 {
 	unsigned long offset, len;
+	pgoff_t pgoff = loff2pgoff(pos);
 
-	offset = (pos & ~PAGE_CACHE_MASK);
+	offset = ((unsigned long)pos & ~PAGE_CACHE_MASK);
 	pos = pos & PAGE_CACHE_MASK;
 	len = PAGE_CACHE_SIZE - offset;
 	do {
@@ -247,7 +262,7 @@
 
 		if (len > count)
 			len = count;
-		page = find_page(inode, pos);
+		page = find_page(inode, pgoff);
 		if (page) {
 			char *dest = (char*) (offset + page_address(page));
 
@@ -266,19 +281,20 @@
 	} while (count);
 }
 
-void update_vm_cache(struct inode * inode, unsigned long pos, const char * buf, int count)
+void update_vm_cache(struct inode * inode, loff_t pos, const char * buf, int count)
 {
 	update_vm_cache_conditional(inode, pos, buf, count, 0);
 }
 
 
 static inline void add_to_page_cache(struct page * page,
-	struct inode * inode, unsigned long offset,
-	struct page **hash)
+				     struct inode * inode,
+				     pgoff_t pgoff,
+				     struct page **hash)
 {
 	atomic_inc(&page->count);
 	page->flags = (page->flags & ~((1 << PG_uptodate) | (1 << PG_error))) | (1 << PG_referenced);
-	page->offset = offset;
+	page->index = pgoff;
 	add_page_to_inode_queue(inode, page);
 	__add_page_to_hash_queue(page, hash);
 }
@@ -289,29 +305,32 @@
  * this is all overlapped with the IO on the previous page finishing anyway)
  */
 static unsigned long try_to_read_ahead(struct file * file,
-				unsigned long offset, unsigned long page_cache)
+				       pgoff_t pgoff, unsigned long page_cache)
 {
 	struct inode *inode = file->f_dentry->d_inode;
-	struct page * page;
-	struct page ** hash;
+	pgoff_t pg_size;
+
+	/* Calculate file size in 'pages' -- if even one byte (according to
+	   the 'i_size') exceeds the final page-size block, round up. */
+	pg_size = loff2pgoff(inode->i_size+(PAGE_SIZE-1));
 
-	offset &= PAGE_CACHE_MASK;
-	switch (page_cache) {
-	case 0:
+	if (!page_cache) {
 		page_cache = page_cache_alloc();
 		if (!page_cache)
-			break;
-	default:
-		if (offset >= inode->i_size)
-			break;
-		hash = page_hash(inode, offset);
-		page = __find_page(inode, offset, *hash);
+			return 0; /* Can't allocate! */
+	}
+	/* Ok, we have a page, make sure it is in the page cache */
+	if (pgoff2ulong(pgoff) < pgoff2ulong(pg_size)) {
+		struct page * page;
+		struct page ** hash;
+		hash = page_hash(inode, pgoff);
+		page = __find_page(inode, pgoff, *hash);
 		if (!page) {
 			/*
 			 * Ok, add the new page to the hash-queues...
 			 */
 			page = page_cache_entry(page_cache);
-			add_to_page_cache(page, inode, offset, hash);
+			add_to_page_cache(page, inode, pgoff, hash);
 			inode->i_op->readpage(file, page);
 			page_cache = 0;
 		}
@@ -334,13 +353,14 @@
 
 	wait.task = tsk;
 	add_wait_queue(&page->wait, &wait);
-repeat:
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	run_task_queue(&tq_disk);
-	if (PageLocked(page)) {
+	do {
+		run_task_queue(&tq_disk);
+		tsk->state = TASK_UNINTERRUPTIBLE;
+		mb();
+		if (!PageLocked(page))
+			break;
 		schedule();
-		goto repeat;
-	}
+	} while (PageLocked(page));
 	tsk->state = TASK_RUNNING;
 	remove_wait_queue(&page->wait, &wait);
 }
@@ -365,11 +385,11 @@
 
 #define PROFILE_MAXREADCOUNT 1000
 
-static unsigned long total_reada;
-static unsigned long total_async;
-static unsigned long total_ramax;
-static unsigned long total_ralen;
-static unsigned long total_rawin;
+static u_long total_reada;
+static u_long total_async;
+static u_long total_ramax;
+static u_long total_ralen;
+static u_long total_rawin;
 
 static void profile_readahead(int async, struct file *filp)
 {
@@ -477,13 +497,13 @@
 
 static inline unsigned long generic_file_readahead(int reada_ok,
 	struct file * filp, struct inode * inode,
-	unsigned long ppos, struct page * page, unsigned long page_cache)
+	loff_t ppos, struct page * page, unsigned long page_cache)
 {
-	unsigned long max_ahead, ahead;
-	unsigned long raend;
+	loff_t max_ahead, ahead;
+	loff_t raend;
 	int max_readahead = get_max_readahead(inode);
 
-	raend = filp->f_raend & PAGE_CACHE_MASK;
+	raend = filp->f_raend & PAGE_CACHE_MASK_loff;
 	max_ahead = 0;
 
 /*
@@ -541,7 +561,7 @@
 	ahead = 0;
 	while (ahead < max_ahead) {
 		ahead += PAGE_CACHE_SIZE;
-		page_cache = try_to_read_ahead(filp, raend + ahead,
+		page_cache = try_to_read_ahead(filp, loff2pgoff(raend + ahead),
 						page_cache);
 	}
 /*
@@ -607,14 +627,17 @@
 {
 	struct dentry *dentry = filp->f_dentry;
 	struct inode *inode = dentry->d_inode;
-	size_t pos, pgpos, page_cache;
+	size_t page_cache;
+	pgoff_t pgpos;
+	loff_t pos, posp;
 	int reada_ok;
 	int max_readahead = get_max_readahead(inode);
 
 	page_cache = 0;
 
 	pos = *ppos;
-	pgpos = pos & PAGE_CACHE_MASK;
+	posp = pos & PAGE_CACHE_MASK_loff;
+	pgpos = loff2pgoff(pos);
 /*
  * If the current position is outside the previous read-ahead window, 
  * we reset the current read-ahead context and set read ahead max to zero
@@ -622,7 +645,7 @@
  * otherwise, we assume that the file accesses are sequential enough to
  * continue read-ahead.
  */
-	if (pgpos > filp->f_raend || pgpos + filp->f_rawin < filp->f_raend) {
+	if (posp > filp->f_raend || posp + filp->f_rawin < filp->f_raend) {
 		reada_ok = 0;
 		filp->f_raend = 0;
 		filp->f_ralen = 0;
@@ -638,12 +661,12 @@
  * Then, at least MIN_READAHEAD if read ahead is ok,
  * and at most MAX_READAHEAD in all cases.
  */
-	if (pos + desc->count <= (PAGE_CACHE_SIZE >> 1)) {
+	if (pos + desc->count <= (loff_t)(PAGE_CACHE_SIZE >> 1)) {
 		filp->f_ramax = 0;
 	} else {
-		unsigned long needed;
+		loff_t needed;
 
-		needed = ((pos + desc->count) & PAGE_CACHE_MASK) - pgpos;
+		needed = ((pos + desc->count) & PAGE_CACHE_MASK) - posp;
 
 		if (filp->f_ramax < needed)
 			filp->f_ramax = needed;
@@ -656,6 +679,7 @@
 
 	for (;;) {
 		struct page *page, **hash;
+		pgoff_t pgoff;
 
 		if (pos >= inode->i_size)
 			break;
@@ -663,8 +687,9 @@
 		/*
 		 * Try to find the data in the page cache..
 		 */
-		hash = page_hash(inode, pos & PAGE_CACHE_MASK);
-		page = __find_page(inode, pos & PAGE_CACHE_MASK, *hash);
+		pgoff = loff2pgoff(pos);
+		hash = page_hash(inode, pgoff);
+		page = __find_page(inode, pgoff, *hash);
 		if (!page)
 			goto no_cached_page;
 
@@ -677,7 +702,7 @@
  * the page has been rewritten.
  */
 		if (PageUptodate(page) || PageLocked(page))
-			page_cache = generic_file_readahead(reada_ok, filp, inode, pos & PAGE_CACHE_MASK, page, page_cache);
+			page_cache = generic_file_readahead(reada_ok, filp, inode, pos & PAGE_CACHE_MASK_loff, page, page_cache);
 		else if (reada_ok && filp->f_ramax > MIN_READAHEAD)
 				filp->f_ramax = MIN_READAHEAD;
 
@@ -695,8 +720,8 @@
 		unsigned long offset, nr;
 
 		offset = pos & ~PAGE_CACHE_MASK;
-		nr = PAGE_CACHE_SIZE - offset;
-		if (nr > inode->i_size - pos)
+		nr = PAGE_CACHE_SIZE - offset; /* small value */
+		if ((loff_t)nr > (inode->i_size - pos))
 			nr = inode->i_size - pos;
 
 		/*
@@ -736,7 +761,7 @@
 		 */
 		page = page_cache_entry(page_cache);
 		page_cache = 0;
-		add_to_page_cache(page, inode, pos & PAGE_CACHE_MASK, hash);
+		add_to_page_cache(page, inode, pgoff, hash);
 
 		/*
 		 * Error handling is tricky. If we get a read error,
@@ -817,10 +842,26 @@
 ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
 {
 	ssize_t retval;
+	struct inode *inode = filp->f_dentry->d_inode;
 
 	retval = -EFAULT;
 	if (access_ok(VERIFY_WRITE, buf, count)) {
 		retval = 0;
+
+		/* L-F-S spec 2.2.1.25: */
+		if (count && !(filp->f_flags & O_LARGEFILE) &&
+		    S_ISREG(inode->i_mode) &&
+		    (*ppos < inode->i_size) &&
+		    (*ppos >= 0x7ffffffeULL)) /* pos@2G forbidden */
+			return -EOVERFLOW;
+		if (count && !(filp->f_flags & O_LARGEFILE) &&
+		    S_ISREG(inode->i_mode) &&
+		    (*ppos < inode->i_size) &&
+		    (*ppos + count >= 0x7fffffffULL)) {
+			/* Read only until end of allowed region */
+			count = LONG_MAX - *ppos;
+		}
+
 		if (count) {
 			read_descriptor_t desc;
 
@@ -962,20 +1003,25 @@
 	struct file * file = area->vm_file;
 	struct dentry * dentry = file->f_dentry;
 	struct inode * inode = dentry->d_inode;
-	unsigned long offset, reada, i;
+	loff_t offset;
+	pgoff_t pgoff, reada;
+	int i;
 	struct page * page, **hash;
 	unsigned long old_page, new_page;
 
 	new_page = 0;
-	offset = (address & PAGE_MASK) - area->vm_start + area->vm_offset;
+	offset = ((loff_t)((address & PAGE_MASK) - area->vm_start) +
+		  area->vm_offset);
+
 	if (offset >= inode->i_size && (area->vm_flags & VM_SHARED) && area->vm_mm == current->mm)
 		goto no_page;
 
 	/*
 	 * Do we have something in the page cache already?
 	 */
-	hash = page_hash(inode, offset);
-	page = __find_page(inode, offset, *hash);
+	pgoff = loff2pgoff(offset);
+	hash = page_hash(inode, pgoff);
+	page = __find_page(inode, pgoff, *hash);
 	if (!page)
 		goto no_cached_page;
 
@@ -1026,11 +1072,12 @@
 	/*
 	 * Try to read in an entire cluster at once.
 	 */
-	reada   = offset;
-	reada >>= PAGE_CACHE_SHIFT + page_cluster;
-	reada <<= PAGE_CACHE_SHIFT + page_cluster;
+	reada   = loff2pgoff(offset);
+	/* Mask lowest  'page_cluster'  worth of the lowest bits */
+	reada   = ulong2pgoff(pgoff2ulong(reada) & ((~(0UL)) << page_cluster));
 
-	for (i = 1 << page_cluster; i > 0; --i, reada += PAGE_CACHE_SIZE)
+	for (i = 1 << page_cluster; i > 0;
+	     --i, reada = ulong2pgoff(pgoff2ulong(reada)+1))
 		new_page = try_to_read_ahead(file, reada, new_page);
 
 	if (!new_page)
@@ -1044,7 +1091,7 @@
 	 * cache.. The page we just got may be useful if we
 	 * can't share, so don't get rid of it here.
 	 */
-	page = find_page(inode, offset);
+	page = find_page(inode, pgoff);
 	if (page)
 		goto found_page;
 
@@ -1053,7 +1100,7 @@
 	 */
 	page = page_cache_entry(new_page);
 	new_page = 0;
-	add_to_page_cache(page, inode, offset, hash);
+	add_to_page_cache(page, inode, pgoff, hash);
 
 	if (inode->i_op->readpage(file, page) != 0)
 		goto failure;
@@ -1102,10 +1149,10 @@
  * if the disk is full.
  */
 static inline int do_write_page(struct inode * inode, struct file * file,
-	const char * page, unsigned long offset)
+				const char * page, loff_t offset)
 {
 	int retval;
-	unsigned long size;
+	loff_t size;
 	loff_t loff = offset;
 	mm_segment_t old_fs;
 
@@ -1129,7 +1176,7 @@
 }
 
 static int filemap_write_page(struct vm_area_struct * vma,
-			      unsigned long offset,
+			      loff_t offset,
 			      unsigned long page,
 			      int wait)
 {
@@ -1175,7 +1222,7 @@
  */
 int filemap_swapout(struct vm_area_struct * vma, struct page * page)
 {
-	return filemap_write_page(vma, page->offset, page_address(page), 0);
+	return filemap_write_page(vma, pgoff2loff(page->index), page_address(page), 0);
 }
 
 static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
@@ -1474,7 +1521,7 @@
 {
 	struct dentry	*dentry = file->f_dentry; 
 	struct inode	*inode = dentry->d_inode; 
-	unsigned long	pos = *ppos;
+	loff_t		pos = *ppos;
 	unsigned long	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
 	struct page	*page, **hash;
 	unsigned long	page_cache = 0;
@@ -1500,7 +1547,7 @@
 	 * Check whether we've reached the file size limit.
 	 */
 	status = -EFBIG;
-	if (pos >= limit) {
+	if (limit != RLIM_INFINITY && pos >= limit) {
 		send_sig(SIGXFSZ, current, 0);
 		goto out;
 	}
@@ -1510,21 +1557,21 @@
 	 * Check whether to truncate the write,
 	 * and send the signal if we do.
 	 */
-	if (count > limit - pos) {
+	if (limit != RLIM_INFINITY && count > limit - pos) {
 		send_sig(SIGXFSZ, current, 0);
 		count = limit - pos;
 	}
 
 	while (count) {
-		unsigned long bytes, pgpos, offset;
+		unsigned long bytes, offset;
+		pgoff_t pgpos = loff2pgoff(pos);
 		char * dest;
 
 		/*
 		 * Try to find the page in the cache. If it isn't there,
 		 * allocate a free page.
 		 */
-		offset = (pos & ~PAGE_CACHE_MASK);
-		pgpos = pos & PAGE_CACHE_MASK;
+		offset = ((unsigned long)pos & ~PAGE_CACHE_MASK);
 		bytes = PAGE_CACHE_SIZE - offset;
 		if (bytes > count)
 			bytes = count;
@@ -1596,15 +1643,14 @@
  * Note: we don't have to worry about races here, as the caller
  * is holding the inode semaphore.
  */
-unsigned long get_cached_page(struct inode * inode, unsigned long offset,
-				int new)
+unsigned long get_cached_page(struct inode * inode, pgoff_t pgoff, int new)
 {
 	struct page * page;
 	struct page ** hash;
 	unsigned long page_cache = 0;
 
-	hash = page_hash(inode, offset);
-	page = __find_page(inode, offset, *hash);
+	hash = page_hash(inode, pgoff);
+	page = __find_page(inode, pgoff, *hash);
 	if (!page) {
 		if (!new)
 			goto out;
@@ -1613,7 +1659,7 @@
 			goto out;
 		clear_page(page_cache);
 		page = page_cache_entry(page_cache);
-		add_to_page_cache(page, inode, offset, hash);
+		add_to_page_cache(page, inode, pgoff, hash);
 	}
 	if (atomic_read(&page->count) != 2)
 		printk(KERN_ERR "get_cached_page: page count=%d\n",
diff -urN 2.2.15pre16/mm/memory.c 2.2.15pre16aa3/mm/memory.c
--- 2.2.15pre16/mm/memory.c	Wed Jan  5 14:16:56 2000
+++ 2.2.15pre16aa3/mm/memory.c	Thu Mar 30 16:00:58 2000
@@ -31,12 +31,18 @@
 /*
  * 05.04.94  -  Multi-page memory management added for v1.1.
  * 		Idea by Alex Bligh (alex@cconcepts.co.uk)
+ *
+ * 16.07.99  -  Support of BIGMEM added by Gerhard Wichert, Siemens AG
+ *		(Gerhard.Wichert@pdb.siemens.de)
  */
 
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
 #include <linux/smp_lock.h>
+#include <linux/bigmem.h>
+#include <linux/pagemap.h>
+#include <linux/iobuf.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -53,10 +59,10 @@
 static inline void copy_cow_page(unsigned long from, unsigned long to)
 {
 	if (from == ZERO_PAGE(to)) {
-		clear_page(to);
+		clear_bigpage(to);
 		return;
 	}
-	copy_page(to, from);
+	copy_bigpage(to, from);
 }
 
 mem_map_t * mem_map = NULL;
@@ -397,6 +403,183 @@
 	}
 }
 
+
+/*
+ * Do a quick page-table lookup for a single page. 
+ */
+static unsigned long get_page(unsigned long address) 
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+
+	pgd = pgd_offset(current->mm, address);
+	pmd = pmd_offset(pgd, address);
+	if (pmd) {
+		pte_t * pte = pte_offset(pmd, address);
+		if (pte && pte_present(*pte)) {
+			return pte_page(*pte);
+		}
+	}
+	
+	printk(KERN_ERR "Missing page in lock_down_page\n");
+	return 0;
+}
+
+/* 
+ * Given a physical address, is there a useful struct page pointing to it?
+ */
+
+static struct page * get_page_map(unsigned long page)
+{
+	struct page *map;
+	
+	if (MAP_NR(page) >= max_mapnr)
+		return 0;
+	if (page == ZERO_PAGE(page))
+		return 0;
+	map = mem_map + MAP_NR(page);
+	if (PageReserved(map))
+		return 0;
+	return map;
+}
+
+/*
+ * Force in an entire range of pages from the current process's user VA,
+ * and pin and lock the pages for IO.  
+ */
+
+#define dprintk(x...)
+int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
+{
+	unsigned long		ptr, end;
+	int			err;
+	struct mm_struct *	mm;
+	struct vm_area_struct *	vma = 0;
+	unsigned long		page;
+	struct page *		map;
+	int			doublepage = 0;
+	int			repeat = 0;
+	int			i;
+	
+	/* Make sure the iobuf is not already mapped somewhere. */
+	if (iobuf->nr_pages)
+		return -EINVAL;
+
+	mm = current->mm;
+	dprintk ("map_user_kiobuf: begin\n");
+	
+	ptr = va & PAGE_MASK;
+	end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
+	err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
+	if (err)
+		return err;
+
+ repeat:
+	down(&mm->mmap_sem);
+
+	err = -EFAULT;
+	iobuf->locked = 1;
+	iobuf->offset = va & ~PAGE_MASK;
+	iobuf->length = len;
+	
+	i = 0;
+	
+	/* 
+	 * First of all, try to fault in all of the necessary pages
+	 */
+	while (ptr < end) {
+		if (!vma || ptr >= vma->vm_end) {
+			vma = find_vma(current->mm, ptr);
+			if (!vma) 
+				goto out_unlock;
+		}
+		if (!handle_mm_fault(current, vma, ptr, (rw==READ))) 
+			goto out_unlock;
+		page = get_page(ptr);
+		if (!page) {
+			printk (KERN_ERR "Missing page in map_user_kiobuf\n");
+			goto out_unlock;
+		}
+		map = get_page_map(page);
+		if (map) {
+			if (PageLocked(map))
+				goto retry;
+			atomic_inc(&map->count);
+			set_bit(PG_locked, &map->flags);
+		}
+		dprintk ("Installing page %p %p: %d\n", (void *)page, map, i);
+		iobuf->pagelist[i] = page;
+		iobuf->maplist[i] = map;
+		iobuf->nr_pages = ++i;
+		
+		ptr += PAGE_SIZE;
+	}
+
+	up(&mm->mmap_sem);
+	dprintk ("map_user_kiobuf: end OK\n");
+	return 0;
+
+ out_unlock:
+	up(&mm->mmap_sem);
+	unmap_kiobuf(iobuf);
+	dprintk ("map_user_kiobuf: end %d\n", err);
+	return err;
+
+ retry:
+
+	/* 
+	 * Undo the locking so far, wait on the page we got to, and try again.
+	 */
+	unmap_kiobuf(iobuf);
+	up(&mm->mmap_sem);
+
+	/* 
+	 * Did the release also unlock the page we got stuck on?
+	 */
+	if (!PageLocked(map)) {
+		/* If so, we may well have the page mapped twice in the
+		 * IO address range.  Bad news.  Of course, it _might_
+		 * just be a coincidence, but if it happens more than
+		 * once, chances are we have a double-mapped page. */
+		if (++doublepage >= 3) {
+			return -EINVAL;
+		}
+	}
+	
+	/*
+	 * Try again...
+	 */
+	wait_on_page(map);
+	if (++repeat < 16)
+		goto repeat;
+	return -EAGAIN;
+}
+
+
+/*
+ * Unmap all of the pages referenced by a kiobuf.  We release the pages,
+ * and unlock them if they were locked. 
+ */
+
+void unmap_kiobuf (struct kiobuf *iobuf) 
+{
+	int i;
+	struct page *map;
+	
+	for (i = 0; i < iobuf->nr_pages; i++) {
+		map = iobuf->maplist[i];
+		
+		if (map && iobuf->locked) {
+			clear_bit(PG_locked, &map->flags);
+			wake_up(&map->wait);
+			__free_page(map);
+		}
+	}
+	
+	iobuf->nr_pages = 0;
+	iobuf->locked = 0;
+}
+
 static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
                                      unsigned long size, pgprot_t prot)
 {
@@ -613,7 +796,7 @@
 	struct page * page_map;
 	
 	pte = *page_table;
-	new_page = __get_free_page(GFP_USER);
+	new_page = __get_free_page(GFP_BIGUSER);
 	/* Did swap_out() unmapped the protected page while we slept? */
 	if (pte_val(*page_table) != pte_val(pte))
 		goto end_wp_page;
@@ -639,7 +822,7 @@
 	case 2:
 		if (!PageSwapCache(page_map))
 			break;
-		if (swap_count(page_map->offset) != 1)
+		if (swap_count(pgoff2ulong(page_map->index)) != 1)
 			break;
 		delete_from_swap_cache(page_map);
 		/* FallThrough */
@@ -730,7 +913,7 @@
  * between the file and the memory map for a potential last
  * incomplete page.  Ugly, but necessary.
  */
-void vmtruncate(struct inode * inode, unsigned long offset)
+void vmtruncate(struct inode * inode, loff_t offset)
 {
 	struct vm_area_struct * mpnt;
 
@@ -807,10 +990,10 @@
 {
 	pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
 	if (write_access) {
-		unsigned long page = __get_free_page(GFP_USER);
+		unsigned long page = __get_free_page(GFP_BIGUSER);
 		if (!page)
 			return -1;
-		clear_page(page);
+		clear_bigpage(page);
 		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
 		vma->vm_mm->rss++;
 		tsk->min_flt++;
diff -urN 2.2.15pre16/mm/mmap.c 2.2.15pre16aa3/mm/mmap.c
--- 2.2.15pre16/mm/mmap.c	Mon Jan 17 16:44:50 2000
+++ 2.2.15pre16aa3/mm/mmap.c	Thu Mar 30 16:00:58 2000
@@ -40,6 +40,7 @@
 kmem_cache_t *vm_area_cachep;
 
 int sysctl_overcommit_memory;
+int heap_stack_gap = 128;
 
 /* Check that a process has enough memory to allocate a
  * new virtual mapping.
@@ -66,7 +67,6 @@
 	free += page_cache_size;
 	free += nr_free_pages;
 	free += nr_swap_pages;
-	free -= (page_cache.min_percent + buffer_mem.min_percent + 2)*num_physpages/100; 
 	return free > pages;
 }
 
@@ -169,11 +169,12 @@
 #undef _trans
 }
 
-unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags, unsigned long off)
+unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len,
+	unsigned long prot, unsigned long flags, unsigned long pg_off)
 {
 	struct mm_struct * mm = current->mm;
 	struct vm_area_struct * vma;
+	loff_t off = (loff_t)pg_off << PAGE_SHIFT;
 	int error;
 
 	if (file && (!file->f_op || !file->f_op->mmap))
@@ -367,9 +368,14 @@
 
 	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
 		/* At this point:  (!vmm || addr < vmm->vm_end). */
+		unsigned long __heap_stack_gap = 0;
 		if (TASK_SIZE - len < addr)
 			return 0;
-		if (!vmm || addr + len <= vmm->vm_start)
+		if (!vmm)
+			return addr;
+		if (vmm->vm_flags & VM_GROWSDOWN)
+			__heap_stack_gap = heap_stack_gap << PAGE_SHIFT;
+		if (addr + len + __heap_stack_gap <= vmm->vm_start)
 			return addr;
 		addr = vmm->vm_end;
 	}
@@ -832,7 +838,8 @@
 		 * the offsets must be contiguous..
 		 */
 		if ((mpnt->vm_file != NULL) || (mpnt->vm_flags & VM_SHM)) {
-			unsigned long off = prev->vm_offset+prev->vm_end-prev->vm_start;
+			loff_t off = (prev->vm_offset +
+				      (loff_t)(prev->vm_end - prev->vm_start));
 			if (off != mpnt->vm_offset)
 				continue;
 		}
diff -urN 2.2.15pre16/mm/mremap.c 2.2.15pre16aa3/mm/mremap.c
--- 2.2.15pre16/mm/mremap.c	Mon Jan 17 16:44:50 2000
+++ 2.2.15pre16aa3/mm/mremap.c	Thu Mar 30 16:00:56 2000
@@ -127,7 +127,7 @@
 
 	new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 	if (new_vma) {
-		unsigned long new_addr = get_unmapped_area(addr, new_len);
+		unsigned long new_addr = get_unmapped_area(0, new_len);
 
 		if (new_addr && !move_page_tables(current->mm, new_addr, addr, old_len)) {
 			*new_vma = *vma;
diff -urN 2.2.15pre16/mm/page_alloc.c 2.2.15pre16aa3/mm/page_alloc.c
--- 2.2.15pre16/mm/page_alloc.c	Wed Mar 29 19:42:16 2000
+++ 2.2.15pre16aa3/mm/page_alloc.c	Thu Mar 30 16:00:57 2000
@@ -3,6 +3,7 @@
  *
  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
  *  Swap reorganised 29.12.95, Stephen Tweedie
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
 #include <linux/config.h>
@@ -13,6 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
+#include <linux/bigmem.h> /* export bigmem vars */
 
 #include <asm/dma.h>
 #include <asm/uaccess.h> /* for copy_to/from_user */
@@ -35,7 +37,11 @@
 #else
 #define NR_MEM_LISTS 10
 #endif
+#ifndef CONFIG_BIGMEM
 #define NR_MEM_TYPES 2		/* GFP_DMA vs not for now. */
+#else
+#define NR_MEM_TYPES 3
+#endif
 
 /* The start of this MUST match the start of "struct page" */
 struct free_area_struct {
@@ -104,6 +110,13 @@
 
 #define list(x) (mem_map+(x))
 
+#ifdef CONFIG_BIGMEM
+	if (map_nr >= bigmem_mapnr)
+	{
+		area = free_area[2] + order;
+		nr_free_bigpages -= mask;
+	}
+#endif
 	map_nr &= mask;
 	nr_free_pages -= mask;
 	while (mask + (1 << (NR_MEM_LISTS-1))) {
@@ -148,6 +161,17 @@
 #define MARK_USED(index, order, area) \
 	change_bit((index) >> (1+(order)), (area)->map)
 #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
+#ifdef CONFIG_BIGMEM
+#define UPDATE_NR_FREE_BIGPAGES(map_nr, order)			\
+	do							\
+	{							\
+		if ((map_nr) >= bigmem_mapnr)			\
+			nr_free_bigpages -= 1 << (order);	\
+	}							\
+	while (0)
+#else
+#define UPDATE_NR_FREE_BIGPAGES(map_nr, order) do { } while (0)
+#endif
 #define RMQUEUE_TYPE(order, type) \
 do { struct free_area_struct * area = free_area[type]+order; \
      unsigned long new_order = order; \
@@ -158,6 +182,7 @@
 			map_nr = ret - mem_map; \
 			MARK_USED(map_nr, new_order, area); \
 			nr_free_pages -= 1 << order; \
+			UPDATE_NR_FREE_BIGPAGES(map_nr, order); \
 			area->count--; \
 			EXPAND(ret, map_nr, order, new_order, area); \
 			spin_unlock_irqrestore(&page_alloc_lock, flags); \
@@ -206,6 +231,7 @@
 		int freed;
 		extern struct wait_queue * kswapd_wait;
 
+#ifndef CONFIG_BIGMEM
 		if (nr_free_pages >= freepages.high)
 		{
 			/* share RO cachelines in fast path */
@@ -222,6 +248,47 @@
 		}
 
 		current->trashing_mem = 1;
+#else
+		if (gfp_mask & __GFP_BIGMEM)
+		{
+			if (nr_free_pages >= freepages.high)
+			{
+				/* share RO cachelines in fast path */
+				if (current->trashing_bigmem)
+					current->trashing_bigmem = 0;
+				goto ok_to_allocate;
+			}
+			else
+			{
+				if (nr_free_pages < freepages.low)
+					wake_up_interruptible(&kswapd_wait);
+				if (nr_free_pages > freepages.min && !current->trashing_bigmem)
+					goto ok_to_allocate;
+			}
+
+			current->trashing_bigmem = 1;
+		}
+		else
+		{
+			if (nr_free_pages-nr_free_bigpages >= freepages.high)
+			{
+				/* share RO cachelines in fast path */
+				if (current->trashing_mem)
+					current->trashing_mem = 0;
+				goto ok_to_allocate;
+			}
+			else
+			{
+				if (nr_free_pages-nr_free_bigpages < freepages.low)
+					wake_up_interruptible(&kswapd_wait);
+				if (nr_free_pages-nr_free_bigpages > freepages.min && !current->trashing_mem)
+					goto ok_to_allocate;
+			}
+
+			current->trashing_mem = 1;
+		}
+#endif
+
 		current->flags |= PF_MEMALLOC;
 		freed = try_to_free_pages(gfp_mask);
 		current->flags &= ~PF_MEMALLOC;
@@ -233,7 +300,13 @@
 	spin_lock_irqsave(&page_alloc_lock, flags);
 	/* if it's not a dma request, try non-dma first */
 	if (!(gfp_mask & __GFP_DMA))
+	{
+#ifdef CONFIG_BIGMEM
+		if (gfp_mask & __GFP_BIGMEM)
+			RMQUEUE_TYPE(order, 2);
+#endif
 		RMQUEUE_TYPE(order, 0);
+	}
 	RMQUEUE_TYPE(order, 1);
 	spin_unlock_irqrestore(&page_alloc_lock, flags);
 
@@ -252,7 +325,9 @@
 	unsigned type;
 
 	spin_lock_irqsave(&page_alloc_lock, flags);
-	printk("Free pages:      %6dkB\n ( ",nr_free_pages<<(PAGE_SHIFT-10));
+	printk("Free pages:      %6dkB (%6dkB BigMem)\n ( ",
+		nr_free_pages<<(PAGE_SHIFT-10),
+		nr_free_bigpages<<(PAGE_SHIFT-10));
 	printk("Free: %d (%d %d %d)\n",
 		nr_free_pages,
 		freepages.min,
@@ -260,7 +335,19 @@
 		freepages.high);
 	for (type = 0; type < NR_MEM_TYPES; type++) {
  		unsigned long total = 0;
+#ifdef CONFIG_BIGMEM
+		switch (type)
+		{
+		case 0:
+		case 1:
+#endif
 		printk("%sDMA: ", type ? "" : "Non");
+#ifdef CONFIG_BIGMEM
+			break;
+		case 2:
+			printk("BIGMEM: ");
+		}
+#endif
  		for (order=0 ; order < NR_MEM_LISTS; order++) {
 			unsigned long nr = free_area[type][order].count;
 
@@ -412,6 +499,8 @@
 	 * this process.
 	 */
 	delete_from_swap_cache(page_map);
+	page_map = replace_with_bigmem(page_map);
+	page = page_address(page_map);
 	set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))));
   	return 1;
 }
diff -urN 2.2.15pre16/mm/page_io.c 2.2.15pre16aa3/mm/page_io.c
--- 2.2.15pre16/mm/page_io.c	Mon Jan 17 16:44:50 2000
+++ 2.2.15pre16aa3/mm/page_io.c	Thu Mar 30 16:00:58 2000
@@ -86,9 +86,22 @@
 
 	if (PageSwapCache(page)) {
 		/* Make sure we are the only process doing I/O with this swap page. */
-		while (test_and_set_bit(offset,p->swap_lockmap)) {
-			run_task_queue(&tq_disk);
-			sleep_on(&lock_queue);
+		if (test_and_set_bit(offset, p->swap_lockmap))
+		{
+			struct wait_queue __wait;
+			
+			__wait.task = current;
+			add_wait_queue(&lock_queue, &__wait);
+			for (;;) {
+				current->state = TASK_UNINTERRUPTIBLE;
+				mb();
+				if (!test_and_set_bit(offset, p->swap_lockmap))
+					break;
+				run_task_queue(&tq_disk);
+				schedule();
+			}
+			current->state = TASK_RUNNING;
+			remove_wait_queue(&lock_queue, &__wait);
 		}
 
 		/* 
@@ -99,7 +112,7 @@
 		 * as if it were: we are not allowed to manipulate the inode
 		 * hashing for locked pages.
 		 */
-		if (page->offset != entry) {
+		if (pgoff2ulong(page->index) != entry) {
 			printk ("swap entry mismatch");
 			return;
 		}
@@ -252,8 +265,8 @@
 		printk("VM: swap page is not in swap cache\n");
 		return;
 	}
-	if (page->offset != entry) {
-		printk ("swap entry mismatch");
+	if (pgoff2ulong(page->index) != entry) {
+		printk ("VM: swap entry mismatch");
 		return;
 	}
 	rw_swap_page_base(rw, entry, page, wait);
@@ -278,12 +291,12 @@
 		printk ("VM: read_swap_page: page already in page cache!\n");
 		return;
 	}
-	page->inode = &swapper_inode;
-	page->offset = entry;
+	page->inode     = &swapper_inode;
+	page->index = ulong2pgoff(entry);
 	atomic_inc(&page->count);	/* Protect from shrink_mmap() */
 	rw_swap_page(rw, entry, buffer, 1);
 	atomic_dec(&page->count);
-	page->inode = 0;
+	page->inode     = 0;
 	clear_bit(PG_swap_cache, &page->flags);
 }
 
diff -urN 2.2.15pre16/mm/swap.c 2.2.15pre16aa3/mm/swap.c
--- 2.2.15pre16/mm/swap.c	Mon Jan 18 02:27:01 1999
+++ 2.2.15pre16aa3/mm/swap.c	Thu Mar 30 16:00:56 2000
@@ -47,13 +47,13 @@
 atomic_t nr_async_pages = ATOMIC_INIT(0);
 
 buffer_mem_t buffer_mem = {
-	2,	/* minimum percent buffer */
+	0,	/* minimum percent buffer */
 	10,	/* borrow percent buffer */
 	60	/* maximum percent buffer */
 };
 
 buffer_mem_t page_cache = {
-	2,	/* minimum percent page cache */
+	0,	/* minimum percent page cache */
 	15,	/* borrow percent page cache */
 	75	/* maximum */
 };
diff -urN 2.2.15pre16/mm/swap_state.c 2.2.15pre16aa3/mm/swap_state.c
--- 2.2.15pre16/mm/swap_state.c	Mon Jan 17 16:44:50 2000
+++ 2.2.15pre16aa3/mm/swap_state.c	Thu Mar 30 16:00:58 2000
@@ -54,7 +54,7 @@
 	if (PageTestandSetSwapCache(page)) {
 		printk(KERN_ERR "swap_cache: replacing non-empty entry %08lx "
 		       "on page %08lx\n",
-		       page->offset, page_address(page));
+		       pgoff2ulong(page->index), page_address(page));
 		return 0;
 	}
 	if (page->inode) {
@@ -64,8 +64,8 @@
 	}
 	atomic_inc(&page->count);
 	page->inode = &swapper_inode;
-	page->offset = entry;
-	add_page_to_hash_queue(page, &swapper_inode, entry);
+	page->index = ulong2pgoff(entry);
+	add_page_to_hash_queue(page, &swapper_inode, ulong2pgoff(entry));
 	add_page_to_inode_queue(&swapper_inode, page);
 	return 1;
 }
@@ -203,7 +203,7 @@
  */
 void delete_from_swap_cache(struct page *page)
 {
-	long entry = page->offset;
+	long entry = pgoff2ulong(page->index);
 
 #ifdef SWAP_CACHE_INFO
 	swap_cache_del_total++;
@@ -251,7 +251,7 @@
 	swap_cache_find_total++;
 #endif
 	while (1) {
-		found = find_page(&swapper_inode, entry);
+		found = find_page(&swapper_inode, ulong2pgoff(entry));
 		if (!found)
 			return 0;
 		if (found->inode != &swapper_inode || !PageSwapCache(found))
diff -urN 2.2.15pre16/mm/vmalloc.c 2.2.15pre16aa3/mm/vmalloc.c
--- 2.2.15pre16/mm/vmalloc.c	Tue Jul 13 00:33:04 1999
+++ 2.2.15pre16aa3/mm/vmalloc.c	Thu Mar 30 16:00:57 2000
@@ -2,6 +2,7 @@
  *  linux/mm/vmalloc.c
  *
  *  Copyright (C) 1993  Linus Torvalds
+ *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
  */
 
 #include <linux/malloc.h>
@@ -94,7 +95,7 @@
 		unsigned long page;
 		if (!pte_none(*pte))
 			printk("alloc_area_pte: page already exists\n");
-		page = __get_free_page(GFP_KERNEL);
+		page = __get_free_page(GFP_KERNEL|GFP_BIGMEM);
 		if (!page)
 			return -ENOMEM;
 		set_pte(pte, mk_pte(page, PAGE_KERNEL));
diff -urN 2.2.15pre16/mm/vmscan.c 2.2.15pre16aa3/mm/vmscan.c
--- 2.2.15pre16/mm/vmscan.c	Wed Mar 29 19:42:16 2000
+++ 2.2.15pre16aa3/mm/vmscan.c	Thu Mar 30 16:00:58 2000
@@ -17,6 +17,7 @@
 #include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
+#include <linux/bigmem.h>
 
 #include <asm/pgtable.h>
 
@@ -60,7 +61,8 @@
 
 	if (PageReserved(page_map)
 	    || PageLocked(page_map)
-	    || ((gfp_mask & __GFP_DMA) && !PageDMA(page_map)))
+	    || ((gfp_mask & __GFP_DMA) && !PageDMA(page_map))
+	    || (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page_map)))
 		return 0;
 
 	/*
@@ -72,7 +74,7 @@
 	 * memory, and we should just continue our scan.
 	 */
 	if (PageSwapCache(page_map)) {
-		entry = page_map->offset;
+		entry = pgoff2ulong(page_map->index);
 		swap_duplicate(entry);
 		set_pte(page_table, __pte(entry));
 drop_pte:
@@ -151,6 +153,9 @@
 	if (!entry)
 		return 0; /* No swap space left */
 		
+	if (!(page_map = prepare_bigmem_swapout(page_map)))
+		goto out_swap_free;
+
 	vma->vm_mm->rss--;
 	tsk->nswap++;
 	set_pte(page_table, __pte(entry));
@@ -162,10 +167,14 @@
 	set_bit(PG_locked, &page_map->flags);
 
 	/* OK, do a physical asynchronous write to swap.  */
-	rw_swap_page(WRITE, entry, (char *) page, 0);
+	rw_swap_page(WRITE, entry, (char *) page_address(page_map), 0);
 
 	__free_page(page_map);
 	return 1;
+
+ out_swap_free:
+	swap_free(entry);
+	return 0;
 }
 
 /*
@@ -327,7 +336,7 @@
 	 * Think of swap_cnt as a "shadow rss" - it tells us which process
 	 * we want to page out (always try largest first).
 	 */
-	counter = nr_tasks / (priority+1);
+	counter = nr_tasks / priority;
 	if (counter < 1)
 		counter = 1;
 
@@ -387,7 +396,7 @@
 	/* Always trim SLAB caches when memory gets low. */
 	kmem_cache_reap(gfp_mask);
 
-	priority = 6;
+	priority = 5;
 	do {
 		while (shrink_mmap(priority, gfp_mask)) {
 			if (!--count)
@@ -409,11 +418,11 @@
 		}
 
 		shrink_dcache_memory(priority, gfp_mask);
-	} while (--priority >= 0);
+	} while (--priority > 0);
 done:
 	unlock_kernel();
 
-	return priority >= 0;
+	return priority > 0;
 }
 
 /*
@@ -486,7 +495,10 @@
 		 * up on a more timely basis.
 		 */
 		interruptible_sleep_on(&kswapd_wait);
-		while (nr_free_pages < freepages.high)
+
+		/* kswapd is critical to provide GFP_ATOMIC
+		   allocations (not GFP_BIGMEM ones). */
+		while (nr_free_pages - nr_free_bigpages < freepages.high)
 		{
 			if (do_try_to_free_pages(GFP_KSWAPD))
 			{
diff -urN 2.2.15pre16/net/ipv4/tcp_input.c 2.2.15pre16aa3/net/ipv4/tcp_input.c
--- 2.2.15pre16/net/ipv4/tcp_input.c	Wed Mar 29 19:42:16 2000
+++ 2.2.15pre16aa3/net/ipv4/tcp_input.c	Thu Mar 30 16:00:56 2000
@@ -96,6 +96,7 @@
  */
 static void tcp_delack_estimator(struct tcp_opt *tp)
 {
+	tcp_exit_quickack_mode(tp);
 	if(tp->ato == 0) {
 		tp->lrcvtime = tcp_time_stamp;
 
@@ -114,10 +115,7 @@
 		if(m > tp->rto)
 			tp->ato = tp->rto;
 		else {
-			/* This funny shift makes sure we
-			 * clear the "quick ack mode" bit.
-			 */
-			tp->ato = ((tp->ato << 1) >> 2) + m;
+			tp->ato = (tp->ato >> 1) + m;
 		}
 	}
 }
diff -urN 2.2.15pre16/net/ipv4/tcp_ipv4.c 2.2.15pre16aa3/net/ipv4/tcp_ipv4.c
--- 2.2.15pre16/net/ipv4/tcp_ipv4.c	Fri Jan  7 18:19:25 2000
+++ 2.2.15pre16aa3/net/ipv4/tcp_ipv4.c	Thu Mar 30 16:00:56 2000
@@ -1394,6 +1394,7 @@
 		newtp->snd_una = req->snt_isn + 1;
 		newtp->srtt = 0;
 		newtp->ato = 0;
+		tcp_enter_quickack_mode(newtp);
 		newtp->snd_wl1 = req->rcv_isn;
 		newtp->snd_wl2 = req->snt_isn;
 
@@ -1937,6 +1938,7 @@
 	skb_queue_head_init(&tp->out_of_order_queue);
 	tcp_init_xmit_timers(sk);
 
+	tcp_enter_quickack_mode(tp);
 	tp->rto  = TCP_TIMEOUT_INIT;		/*TCP_WRITE_TIME*/
 	tp->mdev = TCP_TIMEOUT_INIT;
 	tp->mss_clamp = ~0;
diff -urN 2.2.15pre16/net/ipv4/tcp_output.c 2.2.15pre16aa3/net/ipv4/tcp_output.c
--- 2.2.15pre16/net/ipv4/tcp_output.c	Wed Mar 29 19:42:16 2000
+++ 2.2.15pre16aa3/net/ipv4/tcp_output.c	Thu Mar 30 16:00:56 2000
@@ -1012,6 +1012,17 @@
 	timeout = (tp->ato << 1) >> 1;
 	if (timeout > max_timeout)
 		timeout = max_timeout;
+	if (!timeout)
+	{
+		timeout = tp->rto;
+		if ((signed) timeout <= 0)
+		{
+			printk(KERN_ERR
+				"tcp_send_delayed_ack: rto %ld!\n", timeout);
+			timeout = 1;
+		}
+		timeout = min(timeout, max_timeout);
+	}
 	timeout += jiffies;
 
 	/* Use new timeout only if there wasn't a older one earlier. */
diff -urN 2.2.15pre16/net/ipv4/tcp_timer.c 2.2.15pre16aa3/net/ipv4/tcp_timer.c
--- 2.2.15pre16/net/ipv4/tcp_timer.c	Wed Mar 29 19:42:16 2000
+++ 2.2.15pre16aa3/net/ipv4/tcp_timer.c	Thu Mar 30 16:00:56 2000
@@ -194,7 +194,21 @@
 		if (!atomic_read(&sk->sock_readers))
 			tcp_send_ack(sk);
 		else
-			tcp_send_delayed_ack(&(sk->tp_pinfo.af_tcp), HZ/10);
+		{
+			struct tcp_opt * tp = &(sk->tp_pinfo.af_tcp);
+			int rto;
+
+			rto = tp->rto;
+			if (rto <= 0)
+			{
+				printk(KERN_ERR
+				       "tcp_delack_timer: rto %d!\n", rto);
+				rto = 1;
+			}
+			rto = min(rto, HZ/10);
+			tp->delack_timer.expires = rto + jiffies;
+			add_timer(&tp->delack_timer);
+		}
 	}
 }