diff -u --recursive --new-file v2.4.0/linux/Documentation/Changes linux/Documentation/Changes
--- v2.4.0/linux/Documentation/Changes	Mon Jan  1 10:00:04 2001
+++ linux/Documentation/Changes	Mon Jan  8 15:18:32 2001
@@ -56,7 +56,7 @@
 o  e2fsprogs              1.19                    # tune2fs --version
 o  pcmcia-cs              3.1.21                  # cardmgr -V
 o  PPP                    2.4.0                   # pppd --version
-o  isdn4k-utils           3.1beta7                # isdnctrl 2>&1|grep version
+o  isdn4k-utils           3.1pre1                 # isdnctrl 2>&1|grep version
 			  
 Kernel compilation
 ==================
diff -u --recursive --new-file v2.4.0/linux/Documentation/Configure.help linux/Documentation/Configure.help
--- v2.4.0/linux/Documentation/Configure.help	Thu Jan  4 13:00:55 2001
+++ linux/Documentation/Configure.help	Mon Jan 15 12:42:32 2001
@@ -10745,6 +10745,46 @@
   called minix.o. Note that the file system of your root partition
   (the one containing the directory /) cannot be compiled as a module.
 
+Reiserfs support
+CONFIG_REISERFS_FS
+
+  Stores not just filenames but the files themselves in a balanced
+  tree.  Uses journaling.
+
+  Balanced trees are more efficient than traditional
+  filesystem architectural foundations.
+
+  You can use reiserfs in all cases where you use the ext2fs file
+  system, and you will gain in speed and disk space.  It has fewer
+  worst case performance situations than other file systems
+  because balanced trees are hardier creatures than other algorithms
+  are (if that is not technical enough, read www.namesys.com....:-) )
+
+  It is more easily extended to have features currently found in
+  database and keyword search systems than block allocation based
+  filesystems are.  The next version will be so extended, and will
+  support plugins consistent with our motto ``It takes more than a
+  license to make source code open.''
+
+  Read www.namesys.com to learn more about reiserfs.
+
+  Sponsored by Threshold Networks, Emusic.com, and Bigstorage.com.
+
+  If you like it, you can pay us to add new features to it that you
+  need, buy a support contract, or pay us to port it to another OS.
+
+Enable Reiserfs consistency checks
+CONFIG_REISERFS_CHECK
+  If you set this to yes, then ReiserFS will perform every check it
+  can possibly imagine of its internal consistency throughout its
+  operation.  It will also go substantially slower.  More than once we
+  have forgotten that this was on, and then gone despondent over the
+  latest benchmarks.:-) Use of this option allows our team to go all
+  out in checking for consistency when debugging without fear of its
+  effect on end users.  If you are on the verge of sending in a bug
+  report, say yes and you might get a useful error message.  Almost
+  everyone should say no.
+
 Second extended fs support
 CONFIG_EXT2_FS
   This is the de facto standard Linux file system (method to organize
@@ -14618,6 +14658,14 @@
 CONFIG_ISDN_PPP_VJ
   This enables Van Jacobson header compression for synchronous PPP.
   Say Y if the other end of the connection supports it.
+
+CONFIG_ISDN_PPP_BSDCOMP
+  Support for the BSD-Compress compression method for PPP, which uses
+  the LZW compression method to compress each PPP packet before it is
+  sent over the wire. The machine at the other end of the PPP link
+  (usually your ISP) has to support the BSD-Compress compression
+  method as well for this to be useful. Even if they don't support it,
+  it is safe to say Y here.
 
 Support audio via ISDN
 CONFIG_ISDN_AUDIO
diff -u --recursive --new-file v2.4.0/linux/Makefile linux/Makefile
--- v2.4.0/linux/Makefile	Thu Jan  4 13:48:13 2001
+++ linux/Makefile	Mon Jan 15 17:23:48 2001
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 4
-SUBLEVEL = 0
-EXTRAVERSION =
+SUBLEVEL = 1
+EXTRAVERSION =-pre7
 
 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
 
@@ -457,9 +457,8 @@
 
 depend dep: dep-files
 
-# make checkconfig: Prune 'scripts' directory to avoid "false positives".
 checkconfig:
-	find * -name '*.[hcS]' -type f -print | grep -v scripts/ | sort | xargs $(PERL) -w scripts/checkconfig.pl
+	find * -name '*.[hcS]' -type f -print | sort | xargs $(PERL) -w scripts/checkconfig.pl
 
 checkhelp:
 	find * -name [cC]onfig.in -print | sort | xargs $(PERL) -w scripts/checkhelp.pl
diff -u --recursive --new-file v2.4.0/linux/arch/i386/Makefile linux/arch/i386/Makefile
--- v2.4.0/linux/arch/i386/Makefile	Fri Dec 29 14:07:19 2000
+++ linux/arch/i386/Makefile	Wed Jan 10 15:06:14 2001
@@ -50,7 +50,7 @@
 CFLAGS += -march=i686
 endif
 
-ifdef CONFIG_M686FXSR
+ifdef CONFIG_MPENTIUMIII
 CFLAGS += -march=i686
 endif
 
diff -u --recursive --new-file v2.4.0/linux/arch/i386/config.in linux/arch/i386/config.in
--- v2.4.0/linux/arch/i386/config.in	Fri Dec 29 14:35:47 2000
+++ linux/arch/i386/config.in	Mon Jan  8 13:27:56 2001
@@ -33,7 +33,7 @@
 	 Pentium-Classic		CONFIG_M586TSC \
 	 Pentium-MMX			CONFIG_M586MMX \
 	 Pentium-Pro/Celeron/Pentium-II	CONFIG_M686 \
-	 Pentium-III			CONFIG_M686FXSR \
+	 Pentium-III			CONFIG_MPENTIUMIII \
 	 Pentium-4			CONFIG_MPENTIUM4 \
 	 K6/K6-II/K6-III		CONFIG_MK6 \
 	 Athlon/K7			CONFIG_MK7 \
@@ -45,8 +45,6 @@
 # Define implied options from the CPU selection here
 #
 
-unset CONFIG_X86_FXSR
-
 if [ "$CONFIG_M386" = "y" ]; then
    define_bool CONFIG_X86_CMPXCHG n
    define_int  CONFIG_X86_L1_CACHE_SHIFT 4
@@ -87,14 +85,12 @@
    define_bool CONFIG_X86_PGE y
    define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
 fi
-if [ "$CONFIG_M686FXSR" = "y" ]; then
+if [ "$CONFIG_MPENTIUMIII" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
    define_bool CONFIG_X86_TSC y
    define_bool CONFIG_X86_GOOD_APIC y
    define_bool CONFIG_X86_PGE y
    define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
-   define_bool CONFIG_X86_FXSR y
-   define_bool CONFIG_X86_XMM y
 fi
 if [ "$CONFIG_MPENTIUM4" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 7
@@ -102,8 +98,6 @@
    define_bool CONFIG_X86_GOOD_APIC y
    define_bool CONFIG_X86_PGE y
    define_bool CONFIG_X86_USE_PPRO_CHECKSUM y
-   define_bool CONFIG_X86_FXSR y
-   define_bool CONFIG_X86_XMM y
 fi
 if [ "$CONFIG_MK6" = "y" ]; then
    define_int  CONFIG_X86_L1_CACHE_SHIFT 5
@@ -158,9 +152,7 @@
    define_bool CONFIG_X86_PAE y
 fi
 
-if [ "$CONFIG_X86_FXSR" != "y" ]; then
-   bool 'Math emulation' CONFIG_MATH_EMULATION
-fi
+bool 'Math emulation' CONFIG_MATH_EMULATION
 bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
 bool 'Symmetric multi-processing support' CONFIG_SMP
 if [ "$CONFIG_SMP" != "y" ]; then
diff -u --recursive --new-file v2.4.0/linux/arch/i386/defconfig linux/arch/i386/defconfig
--- v2.4.0/linux/arch/i386/defconfig	Sun Dec 31 09:17:18 2000
+++ linux/arch/i386/defconfig	Mon Jan 15 12:49:47 2001
@@ -27,7 +27,7 @@
 # CONFIG_M586TSC is not set
 # CONFIG_M586MMX is not set
 # CONFIG_M686 is not set
-CONFIG_M686FXSR=y
+CONFIG_MPENTIUMIII=y
 # CONFIG_MPENTIUM4 is not set
 # CONFIG_MK6 is not set
 # CONFIG_MK7 is not set
@@ -45,8 +45,6 @@
 CONFIG_X86_GOOD_APIC=y
 CONFIG_X86_PGE=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
-CONFIG_X86_FXSR=y
-CONFIG_X86_XMM=y
 # CONFIG_TOSHIBA is not set
 # CONFIG_MICROCODE is not set
 # CONFIG_X86_MSR is not set
@@ -54,6 +52,7 @@
 CONFIG_NOHIGHMEM=y
 # CONFIG_HIGHMEM4G is not set
 # CONFIG_HIGHMEM64G is not set
+# CONFIG_MATH_EMULATION is not set
 # CONFIG_MTRR is not set
 CONFIG_SMP=y
 CONFIG_HAVE_DEC_LOCK=y
@@ -538,6 +537,8 @@
 # CONFIG_QUOTA is not set
 # CONFIG_AUTOFS_FS is not set
 CONFIG_AUTOFS4_FS=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_REISERFS_CHECK is not set
 # CONFIG_ADFS_FS is not set
 # CONFIG_ADFS_FS_RW is not set
 # CONFIG_AFFS_FS is not set
diff -u --recursive --new-file v2.4.0/linux/arch/i386/kernel/i387.c linux/arch/i386/kernel/i387.c
--- v2.4.0/linux/arch/i386/kernel/i387.c	Fri Nov  3 09:47:48 2000
+++ linux/arch/i386/kernel/i387.c	Thu Jan 11 17:12:18 2001
@@ -18,14 +18,6 @@
 #include <asm/ptrace.h>
 #include <asm/uaccess.h>
 
-#if defined(CONFIG_X86_FXSR)
-#define HAVE_FXSR 1
-#elif defined(CONFIG_X86_RUNTIME_FXSR)
-#define HAVE_FXSR (cpu_has_fxsr)
-#else
-#define HAVE_FXSR 0
-#endif
-
 #ifdef CONFIG_MATH_EMULATION
 #define HAVE_HWFP (boot_cpu_data.hard_math)
 #else
@@ -35,13 +27,13 @@
 /*
  * The _current_ task is using the FPU for the first time
  * so initialize it and set the mxcsr to its default
- * value at reset if we support FXSR and then
+ * value at reset if we support XMM instructions and then
  * remeber the current task has used the FPU.
  */
 void init_fpu(void)
 {
 	__asm__("fninit");
-	if ( HAVE_FXSR )
+	if ( cpu_has_xmm )
 		load_mxcsr(0x1f80);
 		
 	current->used_math = 1;
@@ -51,9 +43,9 @@
  * FPU lazy state save handling.
  */
 
-void save_init_fpu( struct task_struct *tsk )
+static inline void __save_init_fpu( struct task_struct *tsk )
 {
-	if ( HAVE_FXSR ) {
+	if ( cpu_has_fxsr ) {
 		asm volatile( "fxsave %0 ; fnclex"
 			      : "=m" (tsk->thread.i387.fxsave) );
 	} else {
@@ -61,12 +53,28 @@
 			      : "=m" (tsk->thread.i387.fsave) );
 	}
 	tsk->flags &= ~PF_USEDFPU;
+}
+
+void save_init_fpu( struct task_struct *tsk )
+{
+	__save_init_fpu(tsk);
 	stts();
 }
 
+void kernel_fpu_begin(void)
+{
+	struct task_struct *tsk = current;
+
+	if (tsk->flags & PF_USEDFPU) {
+		__save_init_fpu(tsk);
+		return;
+	}
+	clts();
+}
+
 void restore_fpu( struct task_struct *tsk )
 {
-	if ( HAVE_FXSR ) {
+	if ( cpu_has_fxsr ) {
 		asm volatile( "fxrstor %0"
 			      : : "m" (tsk->thread.i387.fxsave) );
 	} else {
@@ -144,7 +152,7 @@
 
 unsigned short get_fpu_cwd( struct task_struct *tsk )
 {
-	if ( HAVE_FXSR ) {
+	if ( cpu_has_fxsr ) {
 		return tsk->thread.i387.fxsave.cwd;
 	} else {
 		return (unsigned short)tsk->thread.i387.fsave.cwd;
@@ -153,7 +161,7 @@
 
 unsigned short get_fpu_swd( struct task_struct *tsk )
 {
-	if ( HAVE_FXSR ) {
+	if ( cpu_has_fxsr ) {
 		return tsk->thread.i387.fxsave.swd;
 	} else {
 		return (unsigned short)tsk->thread.i387.fsave.swd;
@@ -162,7 +170,7 @@
 
 unsigned short get_fpu_twd( struct task_struct *tsk )
 {
-	if ( HAVE_FXSR ) {
+	if ( cpu_has_fxsr ) {
 		return tsk->thread.i387.fxsave.twd;
 	} else {
 		return (unsigned short)tsk->thread.i387.fsave.twd;
@@ -171,7 +179,7 @@
 
 unsigned short get_fpu_mxcsr( struct task_struct *tsk )
 {
-	if ( HAVE_FXSR ) {
+	if ( cpu_has_fxsr ) {
 		return tsk->thread.i387.fxsave.mxcsr;
 	} else {
 		return 0x1f80;
@@ -180,7 +188,7 @@
 
 void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd )
 {
-	if ( HAVE_FXSR ) {
+	if ( cpu_has_fxsr ) {
 		tsk->thread.i387.fxsave.cwd = cwd;
 	} else {
 		tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000);
@@ -189,7 +197,7 @@
 
 void set_fpu_swd( struct task_struct *tsk, unsigned short swd )
 {
-	if ( HAVE_FXSR ) {
+	if ( cpu_has_fxsr ) {
 		tsk->thread.i387.fxsave.swd = swd;
 	} else {
 		tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000);
@@ -198,7 +206,7 @@
 
 void set_fpu_twd( struct task_struct *tsk, unsigned short twd )
 {
-	if ( HAVE_FXSR ) {
+	if ( cpu_has_fxsr ) {
 		tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd);
 	} else {
 		tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000);
@@ -207,7 +215,7 @@
 
 void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr )
 {
-	if ( HAVE_FXSR ) {
+	if ( cpu_has_xmm ) {
 		tsk->thread.i387.fxsave.mxcsr = mxcsr;
 	}
 }
@@ -321,7 +329,7 @@
 	current->used_math = 0;
 
 	if ( HAVE_HWFP ) {
-		if ( HAVE_FXSR ) {
+		if ( cpu_has_fxsr ) {
 			return save_i387_fxsave( buf );
 		} else {
 			return save_i387_fsave( buf );
@@ -354,7 +362,7 @@
 	int err;
 
 	if ( HAVE_HWFP ) {
-		if ( HAVE_FXSR ) {
+		if ( cpu_has_fxsr ) {
 			err =  restore_i387_fxsave( buf );
 		} else {
 			err = restore_i387_fsave( buf );
@@ -387,7 +395,7 @@
 int get_fpregs( struct user_i387_struct *buf, struct task_struct *tsk )
 {
 	if ( HAVE_HWFP ) {
-		if ( HAVE_FXSR ) {
+		if ( cpu_has_fxsr ) {
 			return get_fpregs_fxsave( buf, tsk );
 		} else {
 			return get_fpregs_fsave( buf, tsk );
@@ -415,7 +423,7 @@
 int set_fpregs( struct task_struct *tsk, struct user_i387_struct *buf )
 {
 	if ( HAVE_HWFP ) {
-		if ( HAVE_FXSR ) {
+		if ( cpu_has_fxsr ) {
 			return set_fpregs_fxsave( tsk, buf );
 		} else {
 			return set_fpregs_fsave( tsk, buf );
@@ -428,9 +436,10 @@
 
 int get_fpxregs( struct user_fxsr_struct *buf, struct task_struct *tsk )
 {
-	if ( HAVE_FXSR ) {
-		__copy_to_user( (void *)buf, &tsk->thread.i387.fxsave,
-				sizeof(struct user_fxsr_struct) );
+	if ( cpu_has_fxsr ) {
+		if (__copy_to_user( (void *)buf, &tsk->thread.i387.fxsave,
+				    sizeof(struct user_fxsr_struct) ))
+			return -EFAULT;
 		return 0;
 	} else {
 		return -EIO;
@@ -439,7 +448,7 @@
 
 int set_fpxregs( struct task_struct *tsk, struct user_fxsr_struct *buf )
 {
-	if ( HAVE_FXSR ) {
+	if ( cpu_has_fxsr ) {
 		__copy_from_user( &tsk->thread.i387.fxsave, (void *)buf,
 				  sizeof(struct user_fxsr_struct) );
 		/* mxcsr bit 6 and 31-16 must be zero for security reasons */
@@ -485,7 +494,7 @@
 	fpvalid = tsk->used_math;
 	if ( fpvalid ) {
 		unlazy_fpu( tsk );
-		if ( HAVE_FXSR ) {
+		if ( cpu_has_fxsr ) {
 			copy_fpu_fxsave( tsk, fpu );
 		} else {
 			copy_fpu_fsave( tsk, fpu );
@@ -500,7 +509,7 @@
 	int fpvalid;
 	struct task_struct *tsk = current;
 
-	fpvalid = tsk->used_math && HAVE_FXSR;
+	fpvalid = tsk->used_math && cpu_has_fxsr;
 	if ( fpvalid ) {
 		unlazy_fpu( tsk );
 		memcpy( fpu, &tsk->thread.i387.fxsave,
diff -u --recursive --new-file v2.4.0/linux/arch/i386/kernel/setup.c linux/arch/i386/kernel/setup.c
--- v2.4.0/linux/arch/i386/kernel/setup.c	Sun Dec 31 10:26:18 2000
+++ linux/arch/i386/kernel/setup.c	Mon Jan 15 12:39:32 2001
@@ -147,6 +147,7 @@
 extern unsigned long cpu_khz;
 
 static int disable_x86_serial_nr __initdata = 1;
+static int disable_x86_fxsr __initdata = 0;
 
 /*
  * This is set up by the setup-routine at boot-time
@@ -518,7 +519,7 @@
 
 		e820.nr_map = 0;
 		add_memory_region(0, LOWMEMSIZE(), E820_RAM);
-		add_memory_region(HIGH_MEMORY, (mem_size << 10) - HIGH_MEMORY, E820_RAM);
+		add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
   	}
 	printk("BIOS-provided physical RAM map:\n");
 	print_memory_map(who);
@@ -1796,6 +1797,13 @@
 }
 __setup("serialnumber", x86_serial_nr_setup);
 
+int __init x86_fxsr_setup(char * s)
+{
+	disable_x86_fxsr = 1;
+	return 1;
+}
+__setup("nofxsr", x86_fxsr_setup);
+
 
 /* Standard macro to see if a specific flag is changeable */
 static inline int flag_is_changeable_p(u32 flag)
@@ -2000,10 +2008,16 @@
 	 */
 
 	/* TSC disabled? */
-#ifdef CONFIG_TSC
+#ifndef CONFIG_X86_TSC
 	if ( tsc_disable )
 		clear_bit(X86_FEATURE_TSC, &c->x86_capability);
 #endif
+
+	/* FXSR disabled? */
+	if (disable_x86_fxsr) {
+		clear_bit(X86_FEATURE_FXSR, &c->x86_capability);
+		clear_bit(X86_FEATURE_XMM, &c->x86_capability);
+	}
 
 	/* Disable the PN if appropriate */
 	squash_the_stupid_serial_number(c);
diff -u --recursive --new-file v2.4.0/linux/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c
--- v2.4.0/linux/arch/i386/kernel/traps.c	Wed Jan  3 20:45:26 2001
+++ linux/arch/i386/kernel/traps.c	Mon Jan 15 16:54:20 2001
@@ -23,6 +23,7 @@
 #include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/highmem.h>
 
 #ifdef CONFIG_MCA
 #include <linux/mca.h>
diff -u --recursive --new-file v2.4.0/linux/arch/i386/lib/mmx.c linux/arch/i386/lib/mmx.c
--- v2.4.0/linux/arch/i386/lib/mmx.c	Wed Nov  8 17:09:49 2000
+++ linux/arch/i386/lib/mmx.c	Thu Jan 11 17:42:24 2001
@@ -2,6 +2,8 @@
 #include <linux/string.h>
 #include <linux/sched.h>
 
+#include <asm/i387.h>
+
 /*
  *	MMX 3DNow! library helper functions
  *
@@ -26,13 +28,7 @@
 	void *p=to;
 	int i= len >> 6;	/* len/64 */
 
-	if (!(current->flags & PF_USEDFPU))
-		clts();
-	else
-	{
-		__asm__ __volatile__ ( " fnsave %0; fwait\n"::"m"(current->thread.i387));
-		current->flags &= ~PF_USEDFPU;
-	}
+	kernel_fpu_begin();
 
 	__asm__ __volatile__ (
 		"1: prefetch (%0)\n"		/* This set is 28 bytes */
@@ -88,20 +84,15 @@
 	 *	Now do the tail of the block
 	 */
 	__memcpy(to, from, len&63);
-	stts();
+	kernel_fpu_end();
 	return p;
 }
 
 static void fast_clear_page(void *page)
 {
 	int i;
-	if (!(current->flags & PF_USEDFPU))
-		clts();
-	else
-	{
-		__asm__ __volatile__ ( " fnsave %0; fwait\n"::"m"(current->thread.i387));
-		current->flags &= ~PF_USEDFPU;
-	}
+
+	kernel_fpu_begin();
 	
 	__asm__ __volatile__ (
 		"  pxor %%mm0, %%mm0\n" : :
@@ -127,19 +118,14 @@
 	__asm__ __volatile__ (
 		"  sfence \n" : :
 	);
-	stts();
+	kernel_fpu_end();
 }
 
 static void fast_copy_page(void *to, void *from)
 {
 	int i;
-	if (!(current->flags & PF_USEDFPU))
-		clts();
-	else
-	{
-		__asm__ __volatile__ ( " fnsave %0; fwait\n"::"m"(current->thread.i387));
-		current->flags &= ~PF_USEDFPU;
-	}
+
+	kernel_fpu_begin();
 
 	/* maybe the prefetch stuff can go before the expensive fnsave...
 	 * but that is for later. -AV
@@ -199,7 +185,7 @@
 	__asm__ __volatile__ (
 		"  sfence \n" : :
 	);
-	stts();
+	kernel_fpu_end();
 }
 
 /*
diff -u --recursive --new-file v2.4.0/linux/arch/i386/mm/init.c linux/arch/i386/mm/init.c
--- v2.4.0/linux/arch/i386/mm/init.c	Tue Nov 28 22:43:39 2000
+++ linux/arch/i386/mm/init.c	Mon Jan 15 11:06:55 2001
@@ -317,7 +317,7 @@
 	pgd_t *pgd, *pgd_base;
 	int i, j, k;
 	pmd_t *pmd;
-	pte_t *pte;
+	pte_t *pte, *pte_base;
 
 	/*
 	 * This can be zero as well - no problem, in that case we exit
@@ -366,11 +366,7 @@
 				continue;
 			}
 
-			pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-			set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)));
-
-			if (pte != pte_offset(pmd, 0))
-				BUG();
+			pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 
 			for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
 				vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
@@ -378,6 +374,10 @@
 					break;
 				*pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
 			}
+			set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
+			if (pte_base != pte_offset(pmd, 0))
+				BUG();
+
 		}
 	}
 
diff -u --recursive --new-file v2.4.0/linux/drivers/block/DAC960.c linux/drivers/block/DAC960.c
--- v2.4.0/linux/drivers/block/DAC960.c	Thu Dec  7 17:08:24 2000
+++ linux/drivers/block/DAC960.c	Mon Jan 15 13:08:15 2001
@@ -1820,7 +1820,6 @@
       Request->nr_segments < Controller->DriverScatterGatherLimit)
     {
       Request->nr_segments++;
-      RequestQueue->elevator.nr_segments++;
       return true;
     }
   return false;
@@ -1844,7 +1843,6 @@
       Request->nr_segments < Controller->DriverScatterGatherLimit)
     {
       Request->nr_segments++;
-      RequestQueue->elevator.nr_segments++;
       return true;
     }
   return false;
@@ -1874,7 +1872,6 @@
   if (TotalSegments > MaxSegments ||
       TotalSegments > Controller->DriverScatterGatherLimit)
     return false;
-  RequestQueue->elevator.nr_segments -= SameSegment;
   Request->nr_segments = TotalSegments;
   return true;
 }
diff -u --recursive --new-file v2.4.0/linux/drivers/block/elevator.c linux/drivers/block/elevator.c
--- v2.4.0/linux/drivers/block/elevator.c	Tue Dec  5 15:05:26 2000
+++ linux/drivers/block/elevator.c	Mon Jan 15 13:08:15 2001
@@ -24,125 +24,115 @@
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/blk.h>
+#include <linux/module.h>
 #include <asm/uaccess.h>
 
-/*
- * Order ascending, but only allow a request to be skipped a certain
- * number of times
- */
-void elevator_linus(struct request *req, elevator_t *elevator,
-		    struct list_head *real_head,
-		    struct list_head *head, int orig_latency)
-{
-	struct list_head *entry = real_head;
-	struct request *tmp;
-
-	req->elevator_sequence = orig_latency;
-
-	while ((entry = entry->prev) != head) {
-		tmp = blkdev_entry_to_request(entry);
-		if (IN_ORDER(tmp, req))
-			break;
-		if (!tmp->elevator_sequence)
-			break;
-		tmp->elevator_sequence--;
-	}
-	list_add(&req->queue, entry);
-}
-
 int elevator_linus_merge(request_queue_t *q, struct request **req,
+			 struct list_head * head,
 			 struct buffer_head *bh, int rw,
-			 int *max_sectors, int *max_segments)
+			 int max_sectors, int max_segments)
 {
-	struct list_head *entry, *head = &q->queue_head;
+	struct list_head *entry = &q->queue_head;
 	unsigned int count = bh->b_size >> 9, ret = ELEVATOR_NO_MERGE;
 
-	entry = head;
-	if (q->head_active && !q->plugged)
-		head = head->next;
-
 	while ((entry = entry->prev) != head) {
-		struct request *__rq = *req = blkdev_entry_to_request(entry);
+		struct request *__rq = blkdev_entry_to_request(entry);
+
+		/*
+		 * simply "aging" of requests in queue
+		 */
+		if (__rq->elevator_sequence-- <= 0) {
+			*req = __rq;
+			break;
+		}
+
 		if (__rq->sem)
 			continue;
 		if (__rq->cmd != rw)
 			continue;
-		if (__rq->nr_sectors + count > *max_sectors)
-			continue;
 		if (__rq->rq_dev != bh->b_rdev)
 			continue;
+		if (__rq->nr_sectors + count > max_sectors)
+			continue;
+		if (__rq->elevator_sequence < count)
+			break;
 		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
 			ret = ELEVATOR_BACK_MERGE;
+			*req = __rq;
 			break;
-		}
-		if (!__rq->elevator_sequence)
-			break;
-		if (__rq->sector - count == bh->b_rsector) {
-			__rq->elevator_sequence--;
+		} else if (__rq->sector - count == bh->b_rsector) {
 			ret = ELEVATOR_FRONT_MERGE;
+			__rq->elevator_sequence -= count;
+			*req = __rq;
 			break;
-		}
+		} else if (!*req && BHRQ_IN_ORDER(bh, __rq))
+			*req = __rq;
 	}
 
+	return ret;
+}
+
+void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int count)
+{
+	struct list_head *entry = &req->queue, *head = &q->queue_head;
+
 	/*
 	 * second pass scan of requests that got passed over, if any
 	 */
-	if (ret != ELEVATOR_NO_MERGE && *req) {
-		while ((entry = entry->next) != &q->queue_head) {
-			struct request *tmp = blkdev_entry_to_request(entry);
-			tmp->elevator_sequence--;
-		}
+	while ((entry = entry->next) != head) {
+		struct request *tmp = blkdev_entry_to_request(entry);
+		tmp->elevator_sequence -= count;
 	}
-
-	return ret;
 }
 
-/*
- * No request sorting, just add it to the back of the list
- */
-void elevator_noop(struct request *req, elevator_t *elevator,
-		   struct list_head *real_head, struct list_head *head,
-		   int orig_latency)
+void elevator_linus_merge_req(struct request *req, struct request *next)
 {
-	list_add_tail(&req->queue, real_head);
+	if (next->elevator_sequence < req->elevator_sequence)
+		req->elevator_sequence = next->elevator_sequence;
 }
 
 /*
- * See if we can find a request that is buffer can be coalesced with.
+ * See if we can find a request that this buffer can be coalesced with.
  */
 int elevator_noop_merge(request_queue_t *q, struct request **req,
+			struct list_head * head,
 			struct buffer_head *bh, int rw,
-			int *max_sectors, int *max_segments)
+			int max_sectors, int max_segments)
 {
-	struct list_head *entry, *head = &q->queue_head;
+	struct list_head *entry;
 	unsigned int count = bh->b_size >> 9;
 
-	if (q->head_active && !q->plugged)
-		head = head->next;
+	if (list_empty(&q->queue_head))
+		return ELEVATOR_NO_MERGE;
 
-	entry = head;
+	entry = &q->queue_head;
 	while ((entry = entry->prev) != head) {
-		struct request *__rq = *req = blkdev_entry_to_request(entry);
-		if (__rq->sem)
-			continue;
+		struct request *__rq = blkdev_entry_to_request(entry);
+
 		if (__rq->cmd != rw)
 			continue;
-		if (__rq->nr_sectors + count > *max_sectors)
-			continue;
 		if (__rq->rq_dev != bh->b_rdev)
 			continue;
-		if (__rq->sector + __rq->nr_sectors == bh->b_rsector)
+		if (__rq->nr_sectors + count > max_sectors)
+			continue;
+		if (__rq->sem)
+			continue;
+		if (__rq->sector + __rq->nr_sectors == bh->b_rsector) {
+			*req = __rq;
 			return ELEVATOR_BACK_MERGE;
-		if (__rq->sector - count == bh->b_rsector)
+		} else if (__rq->sector - count == bh->b_rsector) {
+			*req = __rq;
 			return ELEVATOR_FRONT_MERGE;
+		}
 	}
+
+	*req = blkdev_entry_to_request(q->queue_head.prev);
 	return ELEVATOR_NO_MERGE;
 }
 
-/*
- * The noop "elevator" does not do any accounting
- */
-void elevator_noop_dequeue(struct request *req) {}
+void elevator_noop_merge_cleanup(request_queue_t *q, struct request *req, int count) {}
+
+void elevator_noop_merge_req(struct request *req, struct request *next) {}
 
 int blkelvget_ioctl(elevator_t * elevator, blkelv_ioctl_arg_t * arg)
 {
diff -u --recursive --new-file v2.4.0/linux/drivers/block/ll_rw_blk.c linux/drivers/block/ll_rw_blk.c
--- v2.4.0/linux/drivers/block/ll_rw_blk.c	Sun Dec 31 11:16:58 2000
+++ linux/drivers/block/ll_rw_blk.c	Mon Jan 15 16:52:57 2001
@@ -19,6 +19,7 @@
 #include <linux/config.h>
 #include <linux/locks.h>
 #include <linux/mm.h>
+#include <linux/swap.h>
 #include <linux/init.h>
 #include <linux/smp_lock.h>
 
@@ -118,6 +119,19 @@
  */
 int * max_sectors[MAX_BLKDEV];
 
+/*
+ * queued sectors for all devices, used to make sure we don't fill all
+ * of memory with locked buffers
+ */
+atomic_t queued_sectors;
+
+/*
+ * high and low watermark for above
+ */
+static int high_queued_sectors, low_queued_sectors;
+static int batch_requests, queue_nr_requests;
+static DECLARE_WAIT_QUEUE_HEAD(blk_buffers_wait);
+
 static inline int get_max_sectors(kdev_t dev)
 {
 	if (!max_sectors[MAJOR(dev)])
@@ -125,7 +139,7 @@
 	return max_sectors[MAJOR(dev)][MINOR(dev)];
 }
 
-static inline request_queue_t *__blk_get_queue(kdev_t dev)
+inline request_queue_t *__blk_get_queue(kdev_t dev)
 {
 	struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
 
@@ -153,17 +167,14 @@
 
 static int __blk_cleanup_queue(struct list_head *head)
 {
-	struct list_head *entry;
 	struct request *rq;
 	int i = 0;
 
 	if (list_empty(head))
 		return 0;
 
-	entry = head->next;
 	do {
-		rq = list_entry(entry, struct request, table);
-		entry = entry->next;
+		rq = list_entry(head->next, struct request, table);
 		list_del(&rq->table);
 		kmem_cache_free(request_cachep, rq);
 		i++;
@@ -188,10 +199,12 @@
  **/
 void blk_cleanup_queue(request_queue_t * q)
 {
-	int count = QUEUE_NR_REQUESTS;
+	int count = queue_nr_requests;
 
 	count -= __blk_cleanup_queue(&q->request_freelist[READ]);
 	count -= __blk_cleanup_queue(&q->request_freelist[WRITE]);
+	count -= __blk_cleanup_queue(&q->pending_freelist[READ]);
+	count -= __blk_cleanup_queue(&q->pending_freelist[WRITE]);
 
 	if (count)
 		printk("blk_cleanup_queue: leaked requests (%d)\n", count);
@@ -290,7 +303,6 @@
 {
 	if (req->nr_segments < max_segments) {
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	}
 	return 0;
@@ -327,7 +339,6 @@
 	if (total_segments > max_segments)
 		return 0;
 
-	q->elevator.nr_segments -= same_segment;
 	req->nr_segments = total_segments;
 	return 1;
 }
@@ -364,7 +375,7 @@
 	}
 }
 
-static void generic_unplug_device(void *data)
+void generic_unplug_device(void *data)
 {
 	request_queue_t *q = (request_queue_t *) data;
 	unsigned long flags;
@@ -379,19 +390,24 @@
 	struct request *rq;
 	int i;
 
+	INIT_LIST_HEAD(&q->request_freelist[READ]);
+	INIT_LIST_HEAD(&q->request_freelist[WRITE]);
+	INIT_LIST_HEAD(&q->pending_freelist[READ]);
+	INIT_LIST_HEAD(&q->pending_freelist[WRITE]);
+	q->pending_free[READ] = q->pending_free[WRITE] = 0;
+
 	/*
-	 * Divide requests in half between read and write. This used to
-	 * be a 2/3 advantage for reads, but now reads can steal from
-	 * the write free list.
+	 * Divide requests in half between read and write
 	 */
-	for (i = 0; i < QUEUE_NR_REQUESTS; i++) {
+	for (i = 0; i < queue_nr_requests; i++) {
 		rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL);
+		memset(rq, 0, sizeof(struct request));
 		rq->rq_status = RQ_INACTIVE;
 		list_add(&rq->table, &q->request_freelist[i & 1]);
 	}
 
 	init_waitqueue_head(&q->wait_for_request);
-	spin_lock_init(&q->request_lock);
+	spin_lock_init(&q->queue_lock);
 }
 
 static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
@@ -426,14 +442,12 @@
  *    blk_queue_headactive().
  *
  * Note:
- *    blk_init_queue() must be paired with a blk_cleanup-queue() call
+ *    blk_init_queue() must be paired with a blk_cleanup_queue() call
  *    when the block device is deactivated (such as at module unload).
  **/
 void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
 {
 	INIT_LIST_HEAD(&q->queue_head);
-	INIT_LIST_HEAD(&q->request_freelist[READ]);
-	INIT_LIST_HEAD(&q->request_freelist[WRITE]);
 	elevator_init(&q->elevator, ELEVATOR_LINUS);
 	blk_init_free_list(q);
 	q->request_fn     	= rfn;
@@ -455,7 +469,6 @@
 	q->head_active    	= 1;
 }
 
-
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, table);
 /*
  * Get a free request. io_request_lock must be held and interrupts
@@ -463,37 +476,16 @@
  */
 static inline struct request *get_request(request_queue_t *q, int rw)
 {
-	struct list_head *list = &q->request_freelist[rw];
-	struct request *rq;
-
-	/*
-	 * Reads get preferential treatment and are allowed to steal
-	 * from the write free list if necessary.
-	 */
-	if (!list_empty(list)) {
-		rq = blkdev_free_rq(list);
-		goto got_rq;
-	}
+	struct request *rq = NULL;
 
-	/*
-	 * if the WRITE list is non-empty, we know that rw is READ
-	 * and that the READ list is empty. allow reads to 'steal'
-	 * from the WRITE list.
-	 */
-	if (!list_empty(&q->request_freelist[WRITE])) {
-		list = &q->request_freelist[WRITE];
-		rq = blkdev_free_rq(list);
-		goto got_rq;
+	if (!list_empty(&q->request_freelist[rw])) {
+		rq = blkdev_free_rq(&q->request_freelist[rw]);
+		list_del(&rq->table);
+		rq->rq_status = RQ_ACTIVE;
+		rq->special = NULL;
+		rq->q = q;
 	}
 
-	return NULL;
-
-got_rq:
-	list_del(&rq->table);
-	rq->free_list = list;
-	rq->rq_status = RQ_ACTIVE;
-	rq->special = NULL;
-	rq->q = q;
 	return rq;
 }
 
@@ -581,25 +573,29 @@
 
 /*
  * add-request adds a request to the linked list.
- * It disables interrupts (acquires the request spinlock) so that it can muck
- * with the request-lists in peace. Thus it should be called with no spinlocks
- * held.
+ * io_request_lock is held and interrupts disabled, as we muck with the
+ * request queue list.
  *
  * By this point, req->cmd is always either READ/WRITE, never READA,
  * which is important for drive_stat_acct() above.
  */
-
 static inline void add_request(request_queue_t * q, struct request * req,
-			       struct list_head *head, int lat)
+			       struct list_head *insert_here)
 {
 	int major;
 
 	drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
 
+	if (!q->plugged && q->head_active && insert_here == &q->queue_head) {
+		spin_unlock_irq(&io_request_lock);
+		BUG();
+	}
+
 	/*
-	 * let selected elevator insert the request
+	 * elevator indicated where it wants this request to be
+	 * inserted at elevator_merge time
 	 */
-	q->elevator.elevator_fn(req, &q->elevator, &q->queue_head, head, lat);
+	list_add(&req->queue, insert_here);
 
         /*
 	 * FIXME(eric) I don't understand why there is a need for this
@@ -617,20 +613,55 @@
 		(q->request_fn)(q);
 }
 
+void inline blk_refill_freelist(request_queue_t *q, int rw)
+{
+	if (q->pending_free[rw]) {
+		list_splice(&q->pending_freelist[rw], &q->request_freelist[rw]);
+		INIT_LIST_HEAD(&q->pending_freelist[rw]);
+		q->pending_free[rw] = 0;
+	}
+}
+
 /*
  * Must be called with io_request_lock held and interrupts disabled
  */
 void inline blkdev_release_request(struct request *req)
 {
+	request_queue_t *q = req->q;
+	int rw = req->cmd;
+
 	req->rq_status = RQ_INACTIVE;
+	req->q = NULL;
 
 	/*
-	 * Request may not have originated from ll_rw_blk
+	 * Request may not have originated from ll_rw_blk. if not,
+	 * asumme it has free buffers and check waiters
 	 */
-	if (req->free_list) {
-		list_add(&req->table, req->free_list);
-		req->free_list = NULL;
-		wake_up(&req->q->wait_for_request);
+	if (q) {
+		/*
+		 * we've released enough buffers to start I/O again
+		 */
+		if (waitqueue_active(&blk_buffers_wait)
+		    && atomic_read(&queued_sectors) < low_queued_sectors)
+			wake_up(&blk_buffers_wait);
+
+		if (!list_empty(&q->request_freelist[rw])) {
+			blk_refill_freelist(q, rw);
+			list_add(&req->table, &q->request_freelist[rw]);
+			return;
+		}
+
+		/*
+		 * free list is empty, add to pending free list and
+		 * batch wakeups
+		 */
+		list_add(&req->table, &q->pending_freelist[rw]);
+
+		if (++q->pending_free[rw] >= batch_requests) {
+			int wake_up = q->pending_free[rw];
+			blk_refill_freelist(q, rw);
+			wake_up_nr(&q->wait_for_request, wake_up);
+		}
 	}
 }
 
@@ -658,9 +689,10 @@
 	 * will have been updated to the appropriate number,
 	 * and we shouldn't do it here too.
 	 */
-	if(!(q->merge_requests_fn)(q, req, next, max_segments))
+	if (!q->merge_requests_fn(q, req, next, max_segments))
 		return;
 
+	q->elevator.elevator_merge_req_fn(req, next);
 	req->bhtail->b_reqnext = next->bh;
 	req->bhtail = next->bhtail;
 	req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
@@ -699,7 +731,7 @@
 	int max_segments = MAX_SEGMENTS;
 	struct request * req = NULL, *freereq = NULL;
 	int rw_ahead, max_sectors, el_ret;
-	struct list_head *head;
+	struct list_head *head, *insert_here;
 	int latency;
 	elevator_t *elevator = &q->elevator;
 
@@ -713,6 +745,7 @@
 			rw = READ;	/* drop into READ */
 		case READ:
 		case WRITE:
+			latency = elevator_request_latency(elevator, rw);
 			break;
 		default:
 			BUG();
@@ -741,38 +774,33 @@
 	 */
 	max_sectors = get_max_sectors(bh->b_rdev);
 
-	latency = elevator_request_latency(elevator, rw);
-
+again:
+	head = &q->queue_head;
 	/*
 	 * Now we acquire the request spinlock, we have to be mega careful
 	 * not to schedule or do something nonatomic
 	 */
-again:
 	spin_lock_irq(&io_request_lock);
 
-	/*
-	 * skip first entry, for devices with active queue head
-	 */
-	head = &q->queue_head;
-	if (q->head_active && !q->plugged)
-		head = head->next;
-
+	insert_here = head->prev;
 	if (list_empty(head)) {
 		q->plug_device_fn(q, bh->b_rdev); /* is atomic */
 		goto get_rq;
-	}
+	} else if (q->head_active && !q->plugged)
+		head = head->next;
 
-	el_ret = elevator->elevator_merge_fn(q, &req, bh, rw,
-					     &max_sectors, &max_segments);
+	el_ret = elevator->elevator_merge_fn(q, &req, head, bh, rw,
+					     max_sectors, max_segments);
 	switch (el_ret) {
 
 		case ELEVATOR_BACK_MERGE:
 			if (!q->back_merge_fn(q, req, bh, max_segments))
 				break;
+			elevator->elevator_merge_cleanup_fn(q, req, count);
 			req->bhtail->b_reqnext = bh;
 			req->bhtail = bh;
 			req->nr_sectors = req->hard_nr_sectors += count;
-			req->e = elevator;
+			blk_started_io(count);
 			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
 			attempt_back_merge(q, req, max_sectors, max_segments);
 			goto out;
@@ -780,20 +808,29 @@
 		case ELEVATOR_FRONT_MERGE:
 			if (!q->front_merge_fn(q, req, bh, max_segments))
 				break;
+			elevator->elevator_merge_cleanup_fn(q, req, count);
 			bh->b_reqnext = req->bh;
 			req->bh = bh;
 			req->buffer = bh->b_data;
 			req->current_nr_sectors = count;
 			req->sector = req->hard_sector = sector;
 			req->nr_sectors = req->hard_nr_sectors += count;
-			req->e = elevator;
+			blk_started_io(count);
 			drive_stat_acct(req->rq_dev, req->cmd, count, 0);
 			attempt_front_merge(q, head, req, max_sectors, max_segments);
 			goto out;
+
 		/*
 		 * elevator says don't/can't merge. get new request
 		 */
 		case ELEVATOR_NO_MERGE:
+			/*
+			 * use elevator hints as to where to insert the
+			 * request. if no hints, just add it to the back
+			 * of the queue
+			 */
+			if (req)
+				insert_here = &req->queue;
 			break;
 
 		default:
@@ -802,10 +839,9 @@
 	}
 		
 	/*
-	 * Grab a free request from the freelist. Read first try their
-	 * own queue - if that is empty, we steal from the write list.
-	 * Writes must block if the write list is empty, and read aheads
-	 * are not crucial.
+	 * Grab a free request from the freelist - if that is empty, check
+	 * if we are doing read ahead and abort instead of blocking for
+	 * a free slot.
 	 */
 get_rq:
 	if (freereq) {
@@ -821,6 +857,7 @@
 	}
 
 /* fill up the request-info, and add it to the queue */
+	req->elevator_sequence = latency;
 	req->cmd = rw;
 	req->errors = 0;
 	req->hard_sector = req->sector = sector;
@@ -833,13 +870,13 @@
 	req->bh = bh;
 	req->bhtail = bh;
 	req->rq_dev = bh->b_rdev;
-	req->e = elevator;
-	add_request(q, req, head, latency);
+	blk_started_io(count);
+	add_request(q, req, insert_here);
 out:
-	if (!q->plugged)
-		(q->request_fn)(q);
 	if (freereq)
 		blkdev_release_request(freereq);
+	if (!q->plugged)
+		q->request_fn(q);
 	spin_unlock_irq(&io_request_lock);
 	return 0;
 end_io:
@@ -886,13 +923,13 @@
 	int major = MAJOR(bh->b_rdev);
 	request_queue_t *q;
 
-	if (!bh->b_end_io) BUG();
+	if (!bh->b_end_io)
+		BUG();
+
 	if (blk_size[major]) {
 		unsigned long maxsector = (blk_size[major][MINOR(bh->b_rdev)] << 1) + 1;
-		unsigned int sector, count;
-
-		count = bh->b_size >> 9;
-		sector = bh->b_rsector;
+		unsigned long sector = bh->b_rsector;
+		unsigned int count = bh->b_size >> 9;
 
 		if (maxsector < count || maxsector - count < sector) {
 			bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
@@ -903,7 +940,7 @@
 				   when mounting a device. */
 				printk(KERN_INFO
 				       "attempt to access beyond end of device\n");
-				printk(KERN_INFO "%s: rw=%d, want=%d, limit=%d\n",
+				printk(KERN_INFO "%s: rw=%d, want=%ld, limit=%d\n",
 				       kdevname(bh->b_rdev), rw,
 				       (sector + count)>>1,
 				       blk_size[major][MINOR(bh->b_rdev)]);
@@ -930,15 +967,13 @@
 			buffer_IO_error(bh);
 			break;
 		}
-
-	}
-	while (q->make_request_fn(q, rw, bh));
+	} while (q->make_request_fn(q, rw, bh));
 }
 
 
 /**
  * submit_bh: submit a buffer_head to the block device later for I/O
- * @rw: whether to %READ or %WRITE, or mayve to %READA (read ahead)
+ * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bh: The &struct buffer_head which describes the I/O
  *
  * submit_bh() is very similar in purpose to generic_make_request(), and
@@ -961,7 +996,7 @@
 	 * further remap this.
 	 */
 	bh->b_rdev = bh->b_dev;
-	bh->b_rsector = bh->b_blocknr * (bh->b_size>>9);
+	bh->b_rsector = bh->b_blocknr * (bh->b_size >> 9);
 
 	generic_make_request(rw, bh);
 
@@ -1021,6 +1056,9 @@
 	int correct_size;
 	int i;
 
+	if (!nr)
+		return;
+
 	major = MAJOR(bhs[0]->b_dev);
 
 	/* Determine correct block size for this device. */
@@ -1033,9 +1071,8 @@
 
 	/* Verify requested block sizes. */
 	for (i = 0; i < nr; i++) {
-		struct buffer_head *bh;
-		bh = bhs[i];
-		if (bh->b_size != correct_size) {
+		struct buffer_head *bh = bhs[i];
+		if (bh->b_size % correct_size) {
 			printk(KERN_NOTICE "ll_rw_block: device %s: "
 			       "only %d-char blocks implemented (%u)\n",
 			       kdevname(bhs[0]->b_dev),
@@ -1051,8 +1088,17 @@
 	}
 
 	for (i = 0; i < nr; i++) {
-		struct buffer_head *bh;
-		bh = bhs[i];
+		struct buffer_head *bh = bhs[i];
+
+		/*
+		 * don't lock any more buffers if we are above the high
+		 * water mark. instead start I/O on the queued stuff.
+		 */
+		if (atomic_read(&queued_sectors) >= high_queued_sectors) {
+			run_task_queue(&tq_disk);
+			wait_event(blk_buffers_wait,
+			 atomic_read(&queued_sectors) < low_queued_sectors);
+		}
 
 		/* Only one thread can actually submit the I/O. */
 		if (test_and_set_bit(BH_Lock, &bh->b_state))
@@ -1115,6 +1161,7 @@
 
 	if ((bh = req->bh) != NULL) {
 		nsect = bh->b_size >> 9;
+		blk_finished_io(nsect);
 		req->bh = bh->b_reqnext;
 		bh->b_reqnext = NULL;
 		bh->b_end_io(bh, uptodate);
@@ -1138,19 +1185,18 @@
 
 void end_that_request_last(struct request *req)
 {
-	if (req->e) {
-		printk("end_that_request_last called with non-dequeued req\n");
-		BUG();
-	}
 	if (req->sem != NULL)
 		up(req->sem);
 
 	blkdev_release_request(req);
 }
 
+#define MB(kb)	((kb) << 10)
+
 int __init blk_dev_init(void)
 {
 	struct blk_dev_struct *dev;
+	int total_ram;
 
 	request_cachep = kmem_cache_create("blkdev_requests",
 					   sizeof(struct request),
@@ -1165,6 +1211,51 @@
 	memset(ro_bits,0,sizeof(ro_bits));
 	memset(max_readahead, 0, sizeof(max_readahead));
 	memset(max_sectors, 0, sizeof(max_sectors));
+
+	atomic_set(&queued_sectors, 0);
+	total_ram = nr_free_pages() << (PAGE_SHIFT - 10);
+
+	/*
+	 * Try to keep 128MB max hysteris. If not possible,
+	 * use half of RAM
+	 */
+	high_queued_sectors = (total_ram * 2) / 3;
+	low_queued_sectors = high_queued_sectors - MB(128);
+	if (low_queued_sectors < 0)
+		low_queued_sectors = total_ram / 2;
+
+	/*
+	 * for big RAM machines (>= 384MB), use more for I/O
+	 */
+	if (total_ram >= MB(384)) {
+		high_queued_sectors = (total_ram * 4) / 5;
+		low_queued_sectors = high_queued_sectors - MB(128);
+	}
+
+	/*
+	 * make it sectors (512b)
+	 */
+	high_queued_sectors <<= 1;
+	low_queued_sectors <<= 1;
+
+	/*
+	 * Scale free request slots per queue too
+	 */
+	total_ram = (total_ram + MB(32) - 1) & ~(MB(32) - 1);
+	if ((queue_nr_requests = total_ram >> 9) > QUEUE_NR_REQUESTS)
+		queue_nr_requests = QUEUE_NR_REQUESTS;
+
+	/*
+	 * adjust batch frees according to queue length, with upper limit
+	 */
+	if ((batch_requests = queue_nr_requests >> 3) > 32)
+		batch_requests = 32;
+
+	printk("block: queued sectors max/low %dkB/%dkB, %d slots per queue\n",
+						high_queued_sectors / 2,
+						low_queued_sectors / 2,
+						queue_nr_requests);
+
 #ifdef CONFIG_AMIGA_Z2RAM
 	z2_init();
 #endif
@@ -1279,9 +1370,12 @@
 EXPORT_SYMBOL(end_that_request_last);
 EXPORT_SYMBOL(blk_init_queue);
 EXPORT_SYMBOL(blk_get_queue);
+EXPORT_SYMBOL(__blk_get_queue);
 EXPORT_SYMBOL(blk_cleanup_queue);
 EXPORT_SYMBOL(blk_queue_headactive);
 EXPORT_SYMBOL(blk_queue_pluggable);
 EXPORT_SYMBOL(blk_queue_make_request);
 EXPORT_SYMBOL(generic_make_request);
 EXPORT_SYMBOL(blkdev_release_request);
+EXPORT_SYMBOL(generic_unplug_device);
+EXPORT_SYMBOL(queued_sectors);
diff -u --recursive --new-file v2.4.0/linux/drivers/block/paride/pd.c linux/drivers/block/paride/pd.c
--- v2.4.0/linux/drivers/block/paride/pd.c	Thu Oct 26 23:35:47 2000
+++ linux/drivers/block/paride/pd.c	Mon Jan 15 13:08:15 2001
@@ -392,7 +392,6 @@
 
 	if (req->nr_segments < max_segments) {
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	}
 	return 0;
@@ -432,7 +431,6 @@
 	if (total_segments > max_segments)
 		return 0;
 
-	q->elevator.nr_segments -= same_segment;
 	req->nr_segments = total_segments;
 	return 1;
 }
diff -u --recursive --new-file v2.4.0/linux/drivers/block/paride/pf.c linux/drivers/block/paride/pf.c
--- v2.4.0/linux/drivers/block/paride/pf.c	Thu Oct 26 23:35:47 2000
+++ linux/drivers/block/paride/pf.c	Mon Jan 15 13:08:15 2001
@@ -346,7 +346,6 @@
 
 	if (req->nr_segments < max_segments) {
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	}
 	return 0;
@@ -386,7 +385,6 @@
 	if (total_segments > max_segments)
 		return 0;
 
-	q->elevator.nr_segments -= same_segment;
 	req->nr_segments = total_segments;
 	return 1;
 }
diff -u --recursive --new-file v2.4.0/linux/drivers/char/drm/Config.in linux/drivers/char/drm/Config.in
--- v2.4.0/linux/drivers/char/drm/Config.in	Tue Aug  8 09:27:33 2000
+++ linux/drivers/char/drm/Config.in	Mon Jan 15 11:08:13 2001
@@ -9,7 +9,7 @@
 if [ "$CONFIG_DRM" != "n" ]; then
     tristate '  3dfx Banshee/Voodoo3+' CONFIG_DRM_TDFX
     tristate '  3dlabs GMX 2000' CONFIG_DRM_GAMMA
-    tristate '  ATI Rage 128' CONFIG_DRM_R128
+    dep_tristate '  ATI Rage 128' CONFIG_DRM_R128 $CONFIG_AGP
     dep_tristate '  Intel I810' CONFIG_DRM_I810 $CONFIG_AGP
     dep_tristate '  Matrox g200/g400' CONFIG_DRM_MGA $CONFIG_AGP
 fi
diff -u --recursive --new-file v2.4.0/linux/drivers/i2o/i2o_block.c linux/drivers/i2o/i2o_block.c
--- v2.4.0/linux/drivers/i2o/i2o_block.c	Wed Nov  8 17:09:50 2000
+++ linux/drivers/i2o/i2o_block.c	Mon Jan 15 13:08:15 2001
@@ -392,7 +392,6 @@
 
 	if (req->nr_segments < max_segments) {
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	}
 	return 0;
@@ -436,7 +435,6 @@
 	if (total_segments > max_segments)
 		return 0;
 
-	q->elevator.nr_segments -= same_segment;
 	req->nr_segments = total_segments;
 	return 1;
 }
diff -u --recursive --new-file v2.4.0/linux/drivers/ide/ide-dma.c linux/drivers/ide/ide-dma.c
--- v2.4.0/linux/drivers/ide/ide-dma.c	Tue Jan  2 16:58:45 2001
+++ linux/drivers/ide/ide-dma.c	Mon Jan 15 13:08:15 2001
@@ -226,6 +226,9 @@
 		unsigned char *virt_addr = bh->b_data;
 		unsigned int size = bh->b_size;
 
+		if (nents >= PRD_ENTRIES)
+			return 0;
+
 		while ((bh = bh->b_reqnext) != NULL) {
 			if ((virt_addr + size) != (unsigned char *) bh->b_data)
 				break;
@@ -259,6 +262,9 @@
 
 	HWIF(drive)->sg_nents = i = ide_build_sglist(HWIF(drive), HWGROUP(drive)->rq);
 
+	if (!i)
+		return 0;
+
 	sg = HWIF(drive)->sg_table;
 	while (i && sg_dma_len(sg)) {
 		u32 cur_addr;
@@ -274,7 +280,7 @@
 		 */
 
 		while (cur_len) {
-			if (++count >= PRD_ENTRIES) {
+			if (count++ >= PRD_ENTRIES) {
 				printk("%s: DMA table too small\n", drive->name);
 				pci_unmap_sg(HWIF(drive)->pci_dev,
 					     HWIF(drive)->sg_table,
diff -u --recursive --new-file v2.4.0/linux/drivers/ide/ide-probe.c linux/drivers/ide/ide-probe.c
--- v2.4.0/linux/drivers/ide/ide-probe.c	Thu Oct 26 23:35:48 2000
+++ linux/drivers/ide/ide-probe.c	Mon Jan 15 13:08:15 2001
@@ -134,7 +134,7 @@
 					break;
 				}
 #endif
-				printk ("CDROM");
+				printk ("CD/DVD-ROM");
 				break;
 			case ide_tape:
 				printk ("TAPE");
@@ -761,9 +761,10 @@
 	for (unit = 0; unit < minors; ++unit) {
 		*bs++ = BLOCK_SIZE;
 #ifdef CONFIG_BLK_DEV_PDC4030
-		*max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : MAX_SECTORS);
+		*max_sect++ = ((hwif->chipset == ide_pdc4030) ? 127 : 256);
 #else
-		*max_sect++ = MAX_SECTORS;
+		/* IDE can do up to 128K per request. */
+		*max_sect++ = 256;
 #endif
 		*max_ra++ = MAX_READAHEAD;
 	}
diff -u --recursive --new-file v2.4.0/linux/drivers/isdn/hisax/Makefile linux/drivers/isdn/hisax/Makefile
--- v2.4.0/linux/drivers/isdn/hisax/Makefile	Fri Dec 29 14:40:54 2000
+++ linux/drivers/isdn/hisax/Makefile	Mon Jan  8 15:06:01 2001
@@ -34,8 +34,8 @@
 hisax-objs-$(CONFIG_HISAX_ASUSCOM) += asuscom.o isac.o arcofi.o hscx.o
 hisax-objs-$(CONFIG_HISAX_TELEINT) += teleint.o isac.o arcofi.o hfc_2bs0.o
 hisax-objs-$(CONFIG_HISAX_SEDLBAUER) += sedlbauer.o isac.o arcofi.o hscx.o isar.o
-hisax-objs-$(CONFIG_HISAX_SPORTSTER) += sportster.o isac.o arcofi.o hfc_2bs0.o
-hisax-objs-$(CONFIG_HISAX_MIC) += mic.o isac.o arcofi.o hfc_2bs0.o
+hisax-objs-$(CONFIG_HISAX_SPORTSTER) += sportster.o isac.o arcofi.o hscx.o
+hisax-objs-$(CONFIG_HISAX_MIC) += mic.o isac.o arcofi.o hscx.o
 hisax-objs-$(CONFIG_HISAX_NETJET) += nj_s.o netjet.o isac.o arcofi.o
 hisax-objs-$(CONFIG_HISAX_NETJET_U) += nj_u.o netjet.o icc.o
 hisax-objs-$(CONFIG_HISAX_HFCS) += hfcscard.o hfc_2bds0.o
diff -u --recursive --new-file v2.4.0/linux/drivers/isdn/hisax/isdnl3.c linux/drivers/isdn/hisax/isdnl3.c
--- v2.4.0/linux/drivers/isdn/hisax/isdnl3.c	Tue Nov 28 21:43:13 2000
+++ linux/drivers/isdn/hisax/isdnl3.c	Mon Jan  8 15:19:34 2001
@@ -566,7 +566,7 @@
 			} else {
 				struct sk_buff *skb = arg;
 
-				skb_queue_head(&st->l3.squeue, skb);
+				skb_queue_tail(&st->l3.squeue, skb);
 				FsmEvent(&st->l3.l3m, EV_ESTABLISH_REQ, NULL); 
 			}
 			break;
diff -u --recursive --new-file v2.4.0/linux/drivers/isdn/hisax/md5sums.asc linux/drivers/isdn/hisax/md5sums.asc
--- v2.4.0/linux/drivers/isdn/hisax/md5sums.asc	Thu Jan  4 13:20:17 2001
+++ linux/drivers/isdn/hisax/md5sums.asc	Wed Jan 10 14:12:53 2001
@@ -10,7 +10,7 @@
 ca7bd9bac39203f3074f3f093948cc3c  isac.c
 a2ad619fd404b3149099a2984de9d23c  isdnl1.c
 d2a78e407f3d94876deac160c6f9aae6  isdnl2.c
-a109841c2e75b11fc8ef2c8718e24c3e  isdnl3.c
+e7932ca7ae39c497c17f13a2e1434fcd  isdnl3.c
 afb5f2f4ac296d6de45c856993b161e1  tei.c
 00023e2a482cb86a26ea870577ade5d6  callc.c
 a1834e9b2ec068440cff2e899eff4710  cert.c
@@ -25,9 +25,9 @@
 Version: 2.6.3i
 Charset: noconv
 
-iQCVAwUBOlMTgDpxHvX/mS9tAQFSbgP/W9y6tnnWHTRLGqyr3EY1OHZiQXERkAAu
-hp+Y8PIoX1GgAh4yZ7xhYwUsk6y0z5USdGuhC9ZHh+oZd57lPsJMnhkEZR5BVsYT
-r7jHwelP527+QCLkVUCHIVIWUW0ANzeZBhDV2vefkFb+gWLiZsBhaHssbcKGsMNG
-Ak4xS1ByqsM=
-=lsIJ
+iQCVAwUBOlxeLTpxHvX/mS9tAQH6RwP8DhyvqAnXFV6WIGi16iQ3vKikkPoqnDQs
+GEn5uCW0dPYKlwthD2Grj/JbMYZhOmCFuDxF7ufJnjTSDe/D8XNe2wngxzAiwcIe
+WjCrT8X95cuP3HZHscbFTEinVV0GAnoI0ZEgs5eBDhVHDqILLYMaTFBQaRH3jgXc
+i5VH88jPfUM=
+=qc+J
 -----END PGP SIGNATURE-----
diff -u --recursive --new-file v2.4.0/linux/drivers/isdn/isdn_common.c linux/drivers/isdn/isdn_common.c
--- v2.4.0/linux/drivers/isdn/isdn_common.c	Tue Jan  2 16:45:38 2001
+++ linux/drivers/isdn/isdn_common.c	Mon Jan  8 15:06:01 2001
@@ -1512,7 +1512,7 @@
 					int i;
 
 					if ((ret = verify_area(VERIFY_READ, (void *) arg,
-					(ISDN_MODEM_NUMREG + ISDN_MSNLEN)
+					(ISDN_MODEM_NUMREG + ISDN_MSNLEN + ISDN_LMSNLEN)
 						   * ISDN_MAX_CHANNELS)))
 						return ret;
 
@@ -1521,6 +1521,9 @@
 						     ISDN_MODEM_NUMREG))
 							return -EFAULT;
 						p += ISDN_MODEM_NUMREG;
+						if (copy_from_user(dev->mdm.info[i].emu.plmsn, p, ISDN_LMSNLEN))
+							return -EFAULT;
+						p += ISDN_LMSNLEN;
 						if (copy_from_user(dev->mdm.info[i].emu.pmsn, p, ISDN_MSNLEN))
 							return -EFAULT;
 						p += ISDN_MSNLEN;
diff -u --recursive --new-file v2.4.0/linux/drivers/isdn/isdn_net.c linux/drivers/isdn/isdn_net.c
--- v2.4.0/linux/drivers/isdn/isdn_net.c	Fri Dec 29 14:07:22 2000
+++ linux/drivers/isdn/isdn_net.c	Mon Jan  8 15:06:01 2001
@@ -2325,6 +2325,7 @@
 	memset(netdev, 0, sizeof(isdn_net_dev));
 	if (!(netdev->local = (isdn_net_local *) kmalloc(sizeof(isdn_net_local), GFP_KERNEL))) {
 		printk(KERN_WARNING "isdn_net: Could not allocate device locals\n");
+		kfree(netdev);
 		return NULL;
 	}
 	memset(netdev->local, 0, sizeof(isdn_net_local));
diff -u --recursive --new-file v2.4.0/linux/drivers/isdn/isdn_ppp.c linux/drivers/isdn/isdn_ppp.c
--- v2.4.0/linux/drivers/isdn/isdn_ppp.c	Tue Nov 28 21:43:13 2000
+++ linux/drivers/isdn/isdn_ppp.c	Mon Jan  8 15:20:19 2001
@@ -1131,9 +1131,9 @@
 			proto = PPP_IPX;	/* untested */
 			break;
 		default:
-			dev_kfree_skb(skb);
 			printk(KERN_ERR "isdn_ppp: skipped unsupported protocol: %#x.\n", 
 			       skb->protocol);
+			dev_kfree_skb(skb);
 			return 0;
 	}
 
diff -u --recursive --new-file v2.4.0/linux/drivers/isdn/isdn_v110.c linux/drivers/isdn/isdn_v110.c
--- v2.4.0/linux/drivers/isdn/isdn_v110.c	Sun Aug  6 12:43:42 2000
+++ linux/drivers/isdn/isdn_v110.c	Mon Jan 15 15:31:18 2001
@@ -102,7 +102,7 @@
 	int i;
 	isdn_v110_stream *v;
 
-	if ((v = kmalloc(sizeof(isdn_v110_stream), GFP_KERNEL)) == NULL)
+	if ((v = kmalloc(sizeof(isdn_v110_stream), GFP_ATOMIC)) == NULL)
 		return NULL;
 	memset(v, 0, sizeof(isdn_v110_stream));
 	v->key = key;
@@ -134,7 +134,7 @@
 	v->b = 0;
 	v->skbres = hdrlen;
 	v->maxsize = maxsize - hdrlen;
-	if ((v->encodebuf = kmalloc(maxsize, GFP_KERNEL)) == NULL) {
+	if ((v->encodebuf = kmalloc(maxsize, GFP_ATOMIC)) == NULL) {
 		kfree(v);
 		return NULL;
 	}
diff -u --recursive --new-file v2.4.0/linux/drivers/net/3c59x.c linux/drivers/net/3c59x.c
--- v2.4.0/linux/drivers/net/3c59x.c	Tue Nov 14 11:34:25 2000
+++ linux/drivers/net/3c59x.c	Sat Jan  6 09:27:42 2001
@@ -118,6 +118,14 @@
    LK1.1.11 13 Nov 2000 andrewm
     - Dump MOD_INC/DEC_USE_COUNT, use SET_MODULE_OWNER
 
+   LK1.1.12 1 Jan 2001 andrewm
+    - Call pci_enable_device before we request our IRQ (Tobias Ringstrom)
+    - Add 3c590 PCI latency timer hack to vortex_probe1 (from 0.99Ra)
+    - Added extended wait_for_completion for the 3c905CX.
+    - Look for an MII on PHY index 24 first (3c905CX oddity).
+    - Add HAS_NWAY to 3cSOHO100-TX (Brett Frankenberger)
+    - Don't free skbs we don't own on oom path in vortex_open().
+
     - See http://www.uow.edu.au/~andrewm/linux/#3c59x-2.3 for more details.
     - Also see Documentation/networking/vortex.txt
 */
@@ -203,7 +211,7 @@
 #include <linux/delay.h>
 
 static char version[] __devinitdata =
-"3c59x.c:LK1.1.11 13 Nov 2000  Donald Becker and others. http://www.scyld.com/network/vortex.html " "$Revision: 1.102.2.46 $\n";
+"3c59x.c:LK1.1.12 06 Jan 2000  Donald Becker and others. http://www.scyld.com/network/vortex.html " "$Revision: 1.102.2.46 $\n";
 
 MODULE_AUTHOR("Donald Becker <becker@scyld.com>");
 MODULE_DESCRIPTION("3Com 3c59x/3c90x/3c575 series Vortex/Boomerang/Cyclone driver");
@@ -424,7 +432,7 @@
 	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE, 128, },
 
 	{"3cSOHO100-TX Hurricane",
-	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE, 128, },
+	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY, 128, },
 	{"3c555 Laptop Hurricane",
 	 PCI_USES_IO|PCI_USES_MASTER, IS_CYCLONE|EEPROM_8BIT, 128, },
 	{"3c556 Laptop Tornado",
@@ -843,10 +851,15 @@
 {
 	int rc;
 
-	rc = vortex_probe1 (pdev, pci_resource_start (pdev, 0), pdev->irq,
-			    ent->driver_data, vortex_cards_found);
-	if (rc == 0)
-		vortex_cards_found++;
+	/* wake up and enable device */		
+	if (pci_enable_device (pdev)) {
+		rc = -EIO;
+	} else {
+		rc = vortex_probe1 (pdev, pci_resource_start (pdev, 0), pdev->irq,
+				    ent->driver_data, vortex_cards_found);
+		if (rc == 0)
+			vortex_cards_found++;
+	}
 	return rc;
 }
 
@@ -863,7 +876,7 @@
 	struct vortex_private *vp;
 	int option;
 	unsigned int eeprom[0x40], checksum = 0;		/* EEPROM contents */
-	int i;
+	int i, step;
 	struct net_device *dev;
 	static int printed_version;
 	int retval;
@@ -889,7 +902,6 @@
 	       vci->name,
 	       ioaddr);
 
-	/* private struct aligned and zeroed by init_etherdev */
 	vp = dev->priv;
 	dev->base_addr = ioaddr;
 	dev->irq = irq;
@@ -908,19 +920,29 @@
 	if (pdev) {
 		/* EISA resources already marked, so only PCI needs to do this here */
 		/* Ignore return value, because Cardbus drivers already allocate for us */
-		if (request_region(ioaddr, vci->io_size, dev->name) != NULL) {
+		if (request_region(ioaddr, vci->io_size, dev->name) != NULL)
 			vp->must_free_region = 1;
-		}
-
-		/* wake up and enable device */		
-		if (pci_enable_device (pdev)) {
-			retval = -EIO;
-			goto free_region;
-		}
 
 		/* enable bus-mastering if necessary */		
 		if (vci->flags & PCI_USES_MASTER)
 			pci_set_master (pdev);
+
+		if (vci->drv_flags & IS_VORTEX) {
+			u8 pci_latency;
+			u8 new_latency = 248;
+
+			/* Check the PCI latency value.  On the 3c590 series the latency timer
+			   must be set to the maximum value to avoid data corruption that occurs
+			   when the timer expires during a transfer.  This bug exists the Vortex
+			   chip only. */
+			pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &pci_latency);
+			if (pci_latency < new_latency) {
+				printk(KERN_INFO "%s: Overriding PCI latency"
+					" timer (CFLT) setting of %d, new value is %d.\n",
+					dev->name, pci_latency, new_latency);
+					pci_write_config_byte(pdev, PCI_LATENCY_TIMER, new_latency);
+			}
+		}
 	}
 
 	spin_lock_init(&vp->lock);
@@ -1025,6 +1047,13 @@
 			   dev->irq);
 #endif
 
+	EL3WINDOW(4);
+	step = (inb(ioaddr + Wn4_NetDiag) & 0x1e) >> 1;
+	printk(KERN_INFO "  product code '%c%c' rev %02x.%d date %02d-"
+		   "%02d-%02d\n", eeprom[6]&0xff, eeprom[6]>>8, eeprom[0x14],
+		   step, (eeprom[4]>>5) & 15, eeprom[4] & 31, eeprom[4]>>9);
+
+
 	if (pdev && vci->drv_flags & HAS_CB_FNS) {
 		unsigned long fn_st_addr;			/* Cardbus function status space */
 		unsigned short n;
@@ -1089,8 +1118,19 @@
 		mii_preamble_required++;
 		mii_preamble_required++;
 		mdio_read(dev, 24, 1);
-		for (phy = 1; phy <= 32 && phy_idx < sizeof(vp->phys); phy++) {
-			int mii_status, phyx = phy & 0x1f;
+		for (phy = 0; phy < 32 && phy_idx < 1; phy++) {
+			int mii_status, phyx;
+
+			/*
+			 * For the 3c905CX we look at index 24 first, because it bogusly
+			 * reports an external PHY at all indices
+			 */
+			if (phy == 0)
+				phyx = 24;
+			else if (phy <= 24)
+				phyx = phy - 1;
+			else
+				phyx = phy;
 			mii_status = mdio_read(dev, phyx, 1);
 			if (mii_status  &&  mii_status != 0xffff) {
 				vp->phys[phy_idx++] = phyx;
@@ -1135,12 +1175,13 @@
 	dev->set_multicast_list = set_rx_mode;
 	dev->tx_timeout = vortex_tx_timeout;
 	dev->watchdog_timeo = (watchdog * HZ) / 1000;
-
+//	publish_netdev(dev);
 	return 0;
 
 free_region:
 	if (vp->must_free_region)
 		release_region(ioaddr, vci->io_size);
+//	withdraw_netdev(dev);
 	unregister_netdev(dev);
 	kfree (dev);
 	printk(KERN_ERR PFX "vortex_probe1 fails.  Returns %d\n", retval);
@@ -1150,13 +1191,23 @@
 
 static void wait_for_completion(struct net_device *dev, int cmd)
 {
-	int i = 4000;
+	int i;
 
 	outw(cmd, dev->base_addr + EL3_CMD);
-	while (--i > 0) {
+	for (i = 0; i < 2000; i++) {
 		if (!(inw(dev->base_addr + EL3_STATUS) & CmdInProgress))
 			return;
 	}
+
+	/* OK, that didn't work.  Do it the slow way.  One second */
+	for (i = 0; i < 100000; i++) {
+		if (!(inw(dev->base_addr + EL3_STATUS) & CmdInProgress)) {
+			printk(KERN_INFO "%s: command 0x%04x took %d usecs! Please tell andrewm@uow.edu.au\n",
+					   dev->name, cmd, i * 10);
+			return;
+		}
+		udelay(10);
+	}
 	printk(KERN_ERR "%s: command 0x%04x did not complete! Status=0x%x\n",
 			   dev->name, cmd, inw(dev->base_addr + EL3_STATUS));
 }
@@ -1331,6 +1382,7 @@
 	set_rx_mode(dev);
 	outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */
 
+//	wait_for_completion(dev, SetTxStart|0x07ff);
 	outw(RxEnable, ioaddr + EL3_CMD); /* Enable the receiver. */
 	outw(TxEnable, ioaddr + EL3_CMD); /* Enable transmitter. */
 	/* Allow status bits to be seen. */
@@ -1384,7 +1436,8 @@
 		}
 		if (i != RX_RING_SIZE) {
 			int j;
-			for (j = 0; j < RX_RING_SIZE; j++) {
+			printk(KERN_EMERG "%s: no memory for rx ring\n", dev->name);
+			for (j = 0; j < i; j++) {
 				if (vp->rx_skbuff[j]) {
 					dev_kfree_skb(vp->rx_skbuff[j]);
 					vp->rx_skbuff[j] = 0;
@@ -1532,7 +1585,10 @@
 	printk(KERN_ERR "%s: transmit timed out, tx_status %2.2x status %4.4x.\n",
 		   dev->name, inb(ioaddr + TxStatus),
 		   inw(ioaddr + EL3_STATUS));
-
+	EL3WINDOW(4);
+	printk(KERN_ERR "  diagnostics: net %04x media %04x dma %8.8x.\n",
+		   inw(ioaddr + Wn4_NetDiag), inw(ioaddr + Wn4_Media),
+		   inl(ioaddr + PktStatus));
 	/* Slight code bloat to be user friendly. */
 	if ((inb(ioaddr + TxStatus) & 0x88) == 0x88)
 		printk(KERN_ERR "%s: Transmitter encountered 16 collisions --"
@@ -1663,6 +1719,12 @@
 			   dev->name, fifo_diag);
 		/* Adapter failure requires Tx/Rx reset and reinit. */
 		if (vp->full_bus_master_tx) {
+			int bus_status = inl(ioaddr + PktStatus);
+			/* 0x80000000 PCI master abort. */
+			/* 0x40000000 PCI target abort. */
+			if (vortex_debug)
+				printk(KERN_ERR "%s: PCI bus error, bus status %8.8x\n", dev->name, bus_status);
+
 			/* In this case, blow the card away */
 			vortex_down(dev);
 			wait_for_completion(dev, TotalReset | 0xff);
diff -u --recursive --new-file v2.4.0/linux/drivers/net/Makefile linux/drivers/net/Makefile
--- v2.4.0/linux/drivers/net/Makefile	Thu Jan  4 13:00:55 2001
+++ linux/drivers/net/Makefile	Sat Jan  6 19:45:14 2001
@@ -26,7 +26,7 @@
   obj-$(CONFIG_ISDN) += slhc.o
 endif
 
-subdir-$(CONFIG_PCMCIA) += pcmcia
+subdir-$(CONFIG_NET_PCMCIA) += pcmcia
 subdir-$(CONFIG_TULIP) += tulip
 subdir-$(CONFIG_IRDA) += irda
 subdir-$(CONFIG_TR) += tokenring
diff -u --recursive --new-file v2.4.0/linux/drivers/net/depca.c linux/drivers/net/depca.c
--- v2.4.0/linux/drivers/net/depca.c	Mon Oct 23 15:51:36 2000
+++ linux/drivers/net/depca.c	Mon Jan  8 09:09:36 2001
@@ -1817,7 +1817,9 @@
   ManCode[5]='\0';
 
   for (i=0;(*signatures[i] != '\0') && (*name == '\0');i++) {
-    if (strstr(ManCode, signatures[i]) != NULL) {
+    const char * volatile lhs = ManCode;
+    const char * volatile rhs = signatures[i];		/* egcs-1.1.2 bug */
+    if (strstr(lhs, rhs) != NULL) {
       strcpy(name,ManCode);
       status = 1;
     }
diff -u --recursive --new-file v2.4.0/linux/drivers/net/dmfe.c linux/drivers/net/dmfe.c
--- v2.4.0/linux/drivers/net/dmfe.c	Tue Dec  5 12:29:38 2000
+++ linux/drivers/net/dmfe.c	Mon Jan  8 09:09:36 2001
@@ -1596,10 +1596,10 @@
 		break;
 	}
 
-	rc = pci_register_driver(&dmfe_driver);
+	rc = pci_module_init(&dmfe_driver);
 	if (rc < 0)
 		return rc;
-	if (rc > 0) {
+	if (rc >= 0) {
 		printk (KERN_INFO "Davicom DM91xx net driver loaded, version "
 			DMFE_VERSION "\n");
 		return 0;
diff -u --recursive --new-file v2.4.0/linux/drivers/net/ppp_async.c linux/drivers/net/ppp_async.c
--- v2.4.0/linux/drivers/net/ppp_async.c	Fri Apr 21 13:31:10 2000
+++ linux/drivers/net/ppp_async.c	Mon Jan 15 11:04:57 2001
@@ -33,13 +33,6 @@
 #include <linux/init.h>
 #include <asm/uaccess.h>
 
-#ifndef spin_trylock_bh
-#define spin_trylock_bh(lock)	({ int __r; local_bh_disable();	\
-				   __r = spin_trylock(lock);	\
-				   if (!__r) local_bh_enable();	\
-				   __r; })
-#endif
-
 #define PPP_VERSION	"2.4.1"
 
 #define OBUFSIZE	256
@@ -76,6 +69,7 @@
 /* Bit numbers in xmit_flags */
 #define XMIT_WAKEUP	0
 #define XMIT_FULL	1
+#define XMIT_BUSY	2
 
 /* State bits */
 #define SC_TOSS		0x20000000
@@ -181,18 +175,14 @@
 }
 
 /*
- * Read does nothing.
+ * Read does nothing - no data is ever available this way.
+ * Pppd reads and writes packets via /dev/ppp instead.
  */
 static ssize_t
 ppp_asynctty_read(struct tty_struct *tty, struct file *file,
 		  unsigned char *buf, size_t count)
 {
-	/* For now, do the same as the old 2.3.x code useta */
-	struct asyncppp *ap = tty->disc_data;
-
-	if (ap == 0)
-		return -ENXIO;
-	return ppp_channel_read(&ap->chan, file, buf, count);
+	return -EAGAIN;
 }
 
 /*
@@ -203,12 +193,7 @@
 ppp_asynctty_write(struct tty_struct *tty, struct file *file,
 		   const unsigned char *buf, size_t count)
 {
-	/* For now, do the same as the old 2.3.x code useta */
-	struct asyncppp *ap = tty->disc_data;
-
-	if (ap == 0)
-		return -ENXIO;
-	return ppp_channel_write(&ap->chan, buf, count);
+	return -EAGAIN;
 }
 
 static int
@@ -259,25 +244,6 @@
 		err = 0;
 		break;
 
-/*
- * For now, do the same as the old 2.3 driver useta
- */
-	case PPPIOCGFLAGS:
-	case PPPIOCSFLAGS:
-	case PPPIOCGASYNCMAP:
-	case PPPIOCSASYNCMAP:
-	case PPPIOCGRASYNCMAP:
-	case PPPIOCSRASYNCMAP:
-	case PPPIOCGXASYNCMAP:
-	case PPPIOCSXASYNCMAP:
-	case PPPIOCGMRU:
-	case PPPIOCSMRU:
-		err = -EPERM;
-		if (!capable(CAP_NET_ADMIN))
-			break;
-		err = ppp_async_ioctl(&ap->chan, cmd, arg);
-		break;
-
 	case PPPIOCATTACH:
 	case PPPIOCDETACH:
 		err = ppp_channel_ioctl(&ap->chan, cmd, arg);
@@ -294,18 +260,7 @@
 static unsigned int
 ppp_asynctty_poll(struct tty_struct *tty, struct file *file, poll_table *wait)
 {
-	unsigned int mask;
-	struct asyncppp *ap = tty->disc_data;
-
-	mask = POLLOUT | POLLWRNORM;
-/*
- * For now, do the same as the old 2.3 driver useta
- */
-	if (ap != 0)
-		mask |= ppp_channel_poll(&ap->chan, file, wait);
-	if (test_bit(TTY_OTHER_CLOSED, &tty->flags) || tty_hung_up_p(file))
-		mask |= POLLHUP;
-	return mask;
+	return 0;
 }
 
 static int
@@ -637,8 +592,18 @@
 	int tty_stuffed = 0;
 
 	set_bit(XMIT_WAKEUP, &ap->xmit_flags);
-	if (!spin_trylock_bh(&ap->xmit_lock))
+	/*
+	 * We can get called recursively here if the tty write
+	 * function calls our wakeup function.  This can happen
+	 * for example on a pty with both the master and slave
+	 * set to PPP line discipline.
+	 * We use the XMIT_BUSY bit to detect this and get out,
+	 * leaving the XMIT_WAKEUP bit set to tell the other
+	 * instance that it may now be able to write more now.
+	 */
+	if (test_and_set_bit(XMIT_BUSY, &ap->xmit_flags))
 		return 0;
+	spin_lock_bh(&ap->xmit_lock);
 	for (;;) {
 		if (test_and_clear_bit(XMIT_WAKEUP, &ap->xmit_flags))
 			tty_stuffed = 0;
@@ -653,7 +618,7 @@
 				tty_stuffed = 1;
 			continue;
 		}
-		if (ap->optr == ap->olim && ap->tpkt != 0) {
+		if (ap->optr >= ap->olim && ap->tpkt != 0) {
 			if (ppp_async_encode(ap)) {
 				/* finished processing ap->tpkt */
 				clear_bit(XMIT_FULL, &ap->xmit_flags);
@@ -661,17 +626,29 @@
 			}
 			continue;
 		}
-		/* haven't made any progress */
-		spin_unlock_bh(&ap->xmit_lock);
+		/*
+		 * We haven't made any progress this time around.
+		 * Clear XMIT_BUSY to let other callers in, but
+		 * after doing so we have to check if anyone set
+		 * XMIT_WAKEUP since we last checked it.  If they
+		 * did, we should try again to set XMIT_BUSY and go
+		 * around again in case XMIT_BUSY was still set when
+		 * the other caller tried.
+		 */
+		clear_bit(XMIT_BUSY, &ap->xmit_flags);
+		/* any more work to do? if not, exit the loop */
 		if (!(test_bit(XMIT_WAKEUP, &ap->xmit_flags)
 		      || (!tty_stuffed && ap->tpkt != 0)))
 			break;
-		if (!spin_trylock_bh(&ap->xmit_lock))
+		/* more work to do, see if we can do it now */
+		if (test_and_set_bit(XMIT_BUSY, &ap->xmit_flags))
 			break;
 	}
+	spin_unlock_bh(&ap->xmit_lock);
 	return done;
 
 flush:
+	clear_bit(XMIT_BUSY, &ap->xmit_flags);
 	if (ap->tpkt != 0) {
 		kfree_skb(ap->tpkt);
 		ap->tpkt = 0;
diff -u --recursive --new-file v2.4.0/linux/drivers/s390/block/dasd.c linux/drivers/s390/block/dasd.c
--- v2.4.0/linux/drivers/s390/block/dasd.c	Thu Oct 26 23:35:48 2000
+++ linux/drivers/s390/block/dasd.c	Mon Jan 15 13:08:15 2001
@@ -952,7 +952,6 @@
         go = 1;
         while (go && !list_empty(&queue->queue_head)) {
                 req = blkdev_entry_next_request(&queue->queue_head);
-                req = blkdev_entry_next_request(&queue->queue_head);
 		di = DEVICE_NR (req->rq_dev);
 		dasd_debug ((unsigned long) req);	/* req */
 		dasd_debug (0xc4d90000 +	/* DR## */
diff -u --recursive --new-file v2.4.0/linux/drivers/scsi/constants.c linux/drivers/scsi/constants.c
--- v2.4.0/linux/drivers/scsi/constants.c	Mon Mar 13 22:15:03 2000
+++ linux/drivers/scsi/constants.c	Mon Jan 15 13:08:15 2001
@@ -776,7 +776,7 @@
 	    printk("%s%s: sns = %2x %2x\n", devclass,
 	      kdevname(dev), sense_buffer[0], sense_buffer[2]);
 	
-	printk("Non-extended sense class %d code 0x%0x ", sense_class, code);
+	printk("Non-extended sense class %d code 0x%0x\n", sense_class, code);
 	s = 4;
     }
     
diff -u --recursive --new-file v2.4.0/linux/drivers/scsi/megaraid.c linux/drivers/scsi/megaraid.c
--- v2.4.0/linux/drivers/scsi/megaraid.c	Wed Dec  6 12:06:18 2000
+++ linux/drivers/scsi/megaraid.c	Tue Jan  9 10:40:43 2001
@@ -149,7 +149,6 @@
 #include <linux/version.h>
 
 #ifdef MODULE
-#include <linux/modversions.h>
 #include <linux/module.h>
 
 char kernel_version[] = UTS_RELEASE;
diff -u --recursive --new-file v2.4.0/linux/drivers/scsi/ppa.c linux/drivers/scsi/ppa.c
--- v2.4.0/linux/drivers/scsi/ppa.c	Thu Jan  4 13:00:55 2001
+++ linux/drivers/scsi/ppa.c	Tue Jan  9 10:40:03 2001
@@ -222,8 +222,8 @@
 	    printk("  supported by the imm (ZIP Plus) driver. If the\n");
 	    printk("  cable is marked with \"AutoDetect\", this is what has\n");
 	    printk("  happened.\n");
-	    return 0;
 	    spin_lock_irq(&io_request_lock);
+	    return 0;
 	}
 	try_again = 1;
 	goto retry_entry;
diff -u --recursive --new-file v2.4.0/linux/drivers/scsi/scsi_lib.c linux/drivers/scsi/scsi_lib.c
--- v2.4.0/linux/drivers/scsi/scsi_lib.c	Sun Sep 17 10:09:29 2000
+++ linux/drivers/scsi/scsi_lib.c	Mon Jan 15 16:52:57 2001
@@ -50,6 +50,50 @@
  * This entire source file deals with the new queueing code.
  */
 
+/*
+ * Function:	__scsi_insert_special()
+ *
+ * Purpose:	worker for scsi_insert_special_*()
+ *
+ * Arguments:	q - request queue where request should be inserted
+ *		rq - request to be inserted
+ * 		data - private data
+ *		at_head - insert request at head or tail of queue
+ *
+ * Lock status:	Assumed that io_request_lock is not held upon entry.
+ *
+ * Returns:	Nothing
+ */
+static void __scsi_insert_special(request_queue_t *q, struct request *rq,
+				  void *data, int at_head)
+{
+	unsigned long flags;
+
+	ASSERT_LOCK(&io_request_lock, 0);
+
+	rq->cmd = SPECIAL;
+	rq->special = data;
+	rq->q = NULL;
+	rq->nr_segments = 0;
+	rq->elevator_sequence = 0;
+
+	/*
+	 * We have the option of inserting the head or the tail of the queue.
+	 * Typically we use the tail for new ioctls and so forth.  We use the
+	 * head of the queue for things like a QUEUE_FULL message from a
+	 * device, or a host that is unable to accept a particular command.
+	 */
+	spin_lock_irqsave(&io_request_lock, flags);
+
+	if (at_head)
+		list_add(&rq->queue, &q->queue_head);
+	else
+		list_add_tail(&rq->queue, &q->queue_head);
+
+	q->request_fn(q);
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
 
 /*
  * Function:    scsi_insert_special_cmd()
@@ -73,52 +117,9 @@
  */
 int scsi_insert_special_cmd(Scsi_Cmnd * SCpnt, int at_head)
 {
-	unsigned long flags;
-	request_queue_t *q;
-
-	ASSERT_LOCK(&io_request_lock, 0);
-
-	/*
-	 * The SCpnt already contains a request structure - we will doctor the
-	 * thing up with the appropriate values and use that in the actual
-	 * request queue.
-	 */
-	q = &SCpnt->device->request_queue;
-	SCpnt->request.cmd = SPECIAL;
-	SCpnt->request.special = (void *) SCpnt;
-	SCpnt->request.q = NULL;
-	SCpnt->request.free_list = NULL;
-	SCpnt->request.nr_segments = 0;
-
-	/*
-	 * We have the option of inserting the head or the tail of the queue.
-	 * Typically we use the tail for new ioctls and so forth.  We use the
-	 * head of the queue for things like a QUEUE_FULL message from a
-	 * device, or a host that is unable to accept a particular command.
-	 */
-	spin_lock_irqsave(&io_request_lock, flags);
-
-	if (at_head) {
-		list_add(&SCpnt->request.queue, &q->queue_head);
-	} else {
-		/*
-		 * FIXME(eric) - we always insert at the tail of the
-		 * list.  Otherwise ioctl commands would always take
-		 * precedence over normal I/O.  An ioctl on a busy
-		 * disk might be delayed indefinitely because the
-		 * request might not float high enough in the queue
-		 * to be scheduled.
-		 */
-		list_add_tail(&SCpnt->request.queue, &q->queue_head);
-	}
+	request_queue_t *q = &SCpnt->device->request_queue;
 
-	/*
-	 * Now hit the requeue function for the queue.  If the host is
-	 * already busy, so be it - we have nothing special to do.  If
-	 * the host can queue it, then send it off.  
-	 */
-	q->request_fn(q);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	__scsi_insert_special(q, &SCpnt->request, SCpnt, at_head);
 	return 0;
 }
 
@@ -144,51 +145,9 @@
  */
 int scsi_insert_special_req(Scsi_Request * SRpnt, int at_head)
 {
-	unsigned long flags;
-	request_queue_t *q;
-
-	ASSERT_LOCK(&io_request_lock, 0);
-
-	/*
-	 * The SCpnt already contains a request structure - we will doctor the
-	 * thing up with the appropriate values and use that in the actual
-	 * request queue.
-	 */
-	q = &SRpnt->sr_device->request_queue;
-	SRpnt->sr_request.cmd = SPECIAL;
-	SRpnt->sr_request.special = (void *) SRpnt;
-	SRpnt->sr_request.q = NULL;
-	SRpnt->sr_request.nr_segments = 0;
-
-	/*
-	 * We have the option of inserting the head or the tail of the queue.
-	 * Typically we use the tail for new ioctls and so forth.  We use the
-	 * head of the queue for things like a QUEUE_FULL message from a
-	 * device, or a host that is unable to accept a particular command.
-	 */
-	spin_lock_irqsave(&io_request_lock, flags);
+	request_queue_t *q = &SRpnt->sr_device->request_queue;
 
-	if (at_head) {
-		list_add(&SRpnt->sr_request.queue, &q->queue_head);
-	} else {
-		/*
-		 * FIXME(eric) - we always insert at the tail of the
-		 * list.  Otherwise ioctl commands would always take
-		 * precedence over normal I/O.  An ioctl on a busy
-		 * disk might be delayed indefinitely because the
-		 * request might not float high enough in the queue
-		 * to be scheduled.
-		 */
-		list_add_tail(&SRpnt->sr_request.queue, &q->queue_head);
-	}
-
-	/*
-	 * Now hit the requeue function for the queue.  If the host is
-	 * already busy, so be it - we have nothing special to do.  If
-	 * the host can queue it, then send it off.  
-	 */
-	q->request_fn(q);
-	spin_unlock_irqrestore(&io_request_lock, flags);
+	__scsi_insert_special(q, &SRpnt->sr_request, SRpnt, at_head);
 	return 0;
 }
 
@@ -403,6 +362,7 @@
 	struct request *req;
 	struct buffer_head *bh;
         Scsi_Device * SDpnt;
+	int nsect;
 
 	ASSERT_LOCK(&io_request_lock, 0);
 
@@ -414,11 +374,13 @@
 	}
 	do {
 		if ((bh = req->bh) != NULL) {
+			nsect = bh->b_size >> 9;
+			blk_finished_io(nsect);
 			req->bh = bh->b_reqnext;
-			req->nr_sectors -= bh->b_size >> 9;
-			req->sector += bh->b_size >> 9;
+			req->nr_sectors -= nsect;
+			req->sector += nsect;
 			bh->b_reqnext = NULL;
-			sectors -= bh->b_size >> 9;
+			sectors -= nsect;
 			bh->b_end_io(bh, uptodate);
 			if ((bh = req->bh) != NULL) {
 				req->current_nr_sectors = bh->b_size >> 9;
@@ -863,17 +825,6 @@
 	SHpnt = SDpnt->host;
 
 	/*
-	 * If the host for this device is in error recovery mode, don't
-	 * do anything at all here.  When the host leaves error recovery
-	 * mode, it will automatically restart things and start queueing
-	 * commands again.  Same goes if the queue is actually plugged,
-	 * if the device itself is blocked, or if the host is fully
-	 * occupied.
-	 */
-	if (SHpnt->in_recovery || q->plugged)
-		return;
-
-	/*
 	 * To start with, we keep looping until the queue is empty, or until
 	 * the host is no longer able to accept any more requests.
 	 */
@@ -896,10 +847,11 @@
 		    || (SHpnt->host_blocked) 
 		    || (SHpnt->host_self_blocked)) {
 			/*
-			 * If we are unable to process any commands at all for this
-			 * device, then we consider it to be starved.  What this means
-			 * is that there are no outstanding commands for this device
-			 * and hence we need a little help getting it started again
+			 * If we are unable to process any commands at all for
+			 * this device, then we consider it to be starved.
+			 * What this means is that there are no outstanding
+			 * commands for this device and hence we need a
+			 * little help getting it started again
 			 * once the host isn't quite so busy.
 			 */
 			if (SDpnt->device_busy == 0) {
@@ -1000,8 +952,8 @@
 			}
 			/*
 			 * If so, we are ready to do something.  Bump the count
-			 * while the queue is locked and then break out of the loop.
-			 * Otherwise loop around and try another request.
+			 * while the queue is locked and then break out of the
+			 * loop. Otherwise loop around and try another request.
 			 */
 			if (!SCpnt) {
 				break;
@@ -1029,8 +981,9 @@
 			memcpy(&SCpnt->request, req, sizeof(struct request));
 
 			/*
-			 * We have copied the data out of the request block - it is now in
-			 * a field in SCpnt.  Release the request block.
+			 * We have copied the data out of the request block -
+			 * it is now in a field in SCpnt.  Release the request
+			 * block.
 			 */
 			blkdev_release_request(req);
 		}
@@ -1047,12 +1000,14 @@
 			/*
 			 * This will do a couple of things:
 			 *  1) Fill in the actual SCSI command.
-			 *  2) Fill in any other upper-level specific fields (timeout).
+			 *  2) Fill in any other upper-level specific fields
+			 * (timeout).
 			 *
-			 * If this returns 0, it means that the request failed (reading
-			 * past end of disk, reading offline device, etc).   This won't
-			 * actually talk to the device, but some kinds of consistency
-			 * checking may cause the request to be rejected immediately.
+			 * If this returns 0, it means that the request failed
+			 * (reading past end of disk, reading offline device,
+			 * etc).   This won't actually talk to the device, but
+			 * some kinds of consistency checking may cause the	
+			 * request to be rejected immediately.
 			 */
 			if (STpnt == NULL) {
 				STpnt = scsi_get_request_dev(req);
@@ -1103,8 +1058,8 @@
 		scsi_dispatch_cmd(SCpnt);
 
 		/*
-		 * Now we need to grab the lock again.  We are about to mess with
-		 * the request queue and try to find another command.
+		 * Now we need to grab the lock again.  We are about to mess
+		 * with the request queue and try to find another command.
 		 */
 		spin_lock_irq(&io_request_lock);
 	}
diff -u --recursive --new-file v2.4.0/linux/drivers/scsi/scsi_merge.c linux/drivers/scsi/scsi_merge.c
--- v2.4.0/linux/drivers/scsi/scsi_merge.c	Thu Oct 12 11:16:26 2000
+++ linux/drivers/scsi/scsi_merge.c	Mon Jan 15 13:08:15 2001
@@ -324,7 +324,6 @@
 	    req->nr_segments >= SHpnt->sg_tablesize)
 		return 0;
 	req->nr_segments++;
-	q->elevator.nr_segments++;
 	return 1;
 }
 
@@ -341,11 +340,8 @@
 	if (req->nr_hw_segments >= SHpnt->sg_tablesize ||
 	     req->nr_segments >= SHpnt->sg_tablesize)
 		return 0;
-	if (req->nr_segments >= max_segments)
-		return 0;
 	req->nr_hw_segments++;
 	req->nr_segments++;
-	q->elevator.nr_segments++;
 	return 1;
 }
 #else
@@ -361,7 +357,6 @@
 		 * counter.
 		 */
 		req->nr_segments++;
-		q->elevator.nr_segments++;
 		return 1;
 	} else {
 		return 0;
@@ -417,8 +412,10 @@
 	SDpnt = (Scsi_Device *) q->queuedata;
 	SHpnt = SDpnt->host;
 
+#ifdef DMA_CHUNK_SIZE
 	if (max_segments > 64)
 		max_segments = 64;
+#endif
 
 	if (use_clustering) {
 		/* 
@@ -471,8 +468,10 @@
 	SDpnt = (Scsi_Device *) q->queuedata;
 	SHpnt = SDpnt->host;
 
+#ifdef DMA_CHUNK_SIZE
 	if (max_segments > 64)
 		max_segments = 64;
+#endif
 
 	if (use_clustering) {
 		/* 
@@ -601,10 +600,10 @@
 	SDpnt = (Scsi_Device *) q->queuedata;
 	SHpnt = SDpnt->host;
 
+#ifdef DMA_CHUNK_SIZE
 	if (max_segments > 64)
 		max_segments = 64;
 
-#ifdef DMA_CHUNK_SIZE
 	/* If it would not fit into prepared memory space for sg chain,
 	 * then don't allow the merge.
 	 */
@@ -664,7 +663,6 @@
 			 * This one is OK.  Let it go.
 			 */
 			req->nr_segments += next->nr_segments - 1;
-			q->elevator.nr_segments--;
 #ifdef DMA_CHUNK_SIZE
 			req->nr_hw_segments += next->nr_hw_segments - 1;
 #endif
diff -u --recursive --new-file v2.4.0/linux/drivers/scsi/sg.c linux/drivers/scsi/sg.c
--- v2.4.0/linux/drivers/scsi/sg.c	Thu Jan  4 12:50:17 2001
+++ linux/drivers/scsi/sg.c	Mon Jan 15 13:08:15 2001
@@ -694,6 +694,7 @@
 		(void *)SRpnt->sr_buffer, hp->dxfer_len,
 		sg_cmd_done_bh, timeout, SG_DEFAULT_RETRIES);
     /* dxfer_len overwrites SRpnt->sr_bufflen, hence need for b_malloc_len */
+    generic_unplug_device(&SRpnt->sr_device->request_queue);
     return 0;
 }
 
diff -u --recursive --new-file v2.4.0/linux/drivers/scsi/sr.c linux/drivers/scsi/sr.c
--- v2.4.0/linux/drivers/scsi/sr.c	Fri Dec 29 14:07:22 2000
+++ linux/drivers/scsi/sr.c	Mon Jan 15 13:08:15 2001
@@ -671,12 +671,14 @@
 	cmd[3] = cmd[5] = 0;
 	rc = sr_do_ioctl(i, cmd, buffer, 128, 1, SCSI_DATA_READ, NULL);
 
-	if (-EINVAL == rc) {
-		/* failed, drive has'nt this mode page */
+	if (rc) {
+		/* failed, drive doesn't have capabilities mode page */
 		scsi_CDs[i].cdi.speed = 1;
-		/* disable speed select, drive probably can't do this either */
-		scsi_CDs[i].cdi.mask |= CDC_SELECT_SPEED;
+		scsi_CDs[i].cdi.mask |= (CDC_CD_R | CDC_CD_RW | CDC_DVD_R |
+					 CDC_DVD | CDC_DVD_RAM |
+					 CDC_SELECT_DISC | CDC_SELECT_SPEED);
 		scsi_free(buffer, 512);
+		printk("sr%i: scsi-1 drive\n");
 		return;
 	}
 	n = buffer[3] + 4;
diff -u --recursive --new-file v2.4.0/linux/fs/Config.in linux/fs/Config.in
--- v2.4.0/linux/fs/Config.in	Thu Nov  9 16:04:42 2000
+++ linux/fs/Config.in	Mon Jan 15 12:42:32 2001
@@ -8,6 +8,8 @@
 tristate 'Kernel automounter support' CONFIG_AUTOFS_FS
 tristate 'Kernel automounter version 4 support (also supports v3)' CONFIG_AUTOFS4_FS
 
+dep_tristate 'Reiserfs support' CONFIG_REISERFS_FS $CONFIG_EXPERIMENTAL
+dep_mbool '  Have reiserfs do extra internal checking' CONFIG_REISERFS_CHECK $CONFIG_REISERFS_FS $CONFIG_EXPERIMENTAL
 
 dep_tristate 'ADFS file system support' CONFIG_ADFS_FS $CONFIG_EXPERIMENTAL
 dep_mbool '  ADFS write support (DANGEROUS)' CONFIG_ADFS_FS_RW $CONFIG_ADFS_FS $CONFIG_EXPERIMENTAL
diff -u --recursive --new-file v2.4.0/linux/fs/Makefile linux/fs/Makefile
--- v2.4.0/linux/fs/Makefile	Fri Dec 29 14:07:23 2000
+++ linux/fs/Makefile	Mon Jan 15 12:42:32 2001
@@ -58,6 +58,7 @@
 subdir-$(CONFIG_AUTOFS_FS)	+= autofs
 subdir-$(CONFIG_AUTOFS4_FS)	+= autofs4
 subdir-$(CONFIG_ADFS_FS)	+= adfs
+subdir-$(CONFIG_REISERFS_FS)	+= reiserfs
 subdir-$(CONFIG_DEVPTS_FS)	+= devpts
 subdir-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs
 
diff -u --recursive --new-file v2.4.0/linux/fs/buffer.c linux/fs/buffer.c
--- v2.4.0/linux/fs/buffer.c	Wed Jan  3 20:45:26 2001
+++ linux/fs/buffer.c	Mon Jan 15 12:42:32 2001
@@ -834,6 +834,10 @@
 	return;
 }
 
+void set_buffer_async_io(struct buffer_head *bh) {
+    bh->b_end_io = end_buffer_io_async ;
+}
+
 /*
  * Synchronise all the inode's dirty buffers to the disk.
  *
@@ -1151,7 +1155,7 @@
 	/* grab the lru lock here to block bdflush. */
 	spin_lock(&lru_list_lock);
 	write_lock(&hash_table_lock);
-	if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf))
+	if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf) || buffer_protected(buf))
 		goto in_use;
 	__hash_unlink(buf);
 	remove_inode_queue(buf);
@@ -2411,6 +2415,7 @@
 			loop = 1;
 			goto cleaned_buffers_try_again;
 		}
+		wakeup_bdflush(0);
 	}
 	return 0;
 }
diff -u --recursive --new-file v2.4.0/linux/fs/exec.c linux/fs/exec.c
--- v2.4.0/linux/fs/exec.c	Wed Jan  3 20:45:26 2001
+++ linux/fs/exec.c	Mon Jan  8 13:31:56 2001
@@ -407,6 +407,7 @@
 		/* Add it to the list of mm's */
 		spin_lock(&mmlist_lock);
 		list_add(&mm->mmlist, &init_mm.mmlist);
+		mmlist_nr++;
 		spin_unlock(&mmlist_lock);
 
 		task_lock(current);
diff -u --recursive --new-file v2.4.0/linux/fs/inode.c linux/fs/inode.c
--- v2.4.0/linux/fs/inode.c	Fri Dec 29 15:35:42 2000
+++ linux/fs/inode.c	Mon Jan 15 18:20:14 2001
@@ -136,6 +136,16 @@
 	struct super_block * sb = inode->i_sb;
 
 	if (sb) {
+		/* Don't do this for I_DIRTY_PAGES - that doesn't actually dirty the inode itself */
+		if (flags & (I_DIRTY | I_DIRTY_SYNC)) {
+			if (sb->s_op && sb->s_op->dirty_inode)
+				sb->s_op->dirty_inode(inode);
+		}
+
+		/* avoid the locking if we can */
+		if ((inode->i_state & flags) == flags)
+			return;
+
 		spin_lock(&inode_lock);
 		if ((inode->i_state & flags) != flags) {
 			inode->i_state |= flags;
@@ -676,7 +686,17 @@
 			spin_unlock(&inode_lock);
 
 			clean_inode(inode);
-			sb->s_op->read_inode(inode);
+
+			/* reiserfs specific hack right here.  We don't
+			** want this to last, and are looking for VFS changes
+			** that will allow us to get rid of it.
+			** -- mason@suse.com 
+			*/
+			if (sb->s_op->read_inode2) {
+				sb->s_op->read_inode2(inode, opaque) ;
+			} else {
+				sb->s_op->read_inode(inode);
+			}
 
 			/*
 			 * This is special!  We do not need the spinlock
diff -u --recursive --new-file v2.4.0/linux/fs/nfs/flushd.c linux/fs/nfs/flushd.c
--- v2.4.0/linux/fs/nfs/flushd.c	Wed Jun 21 07:25:17 2000
+++ linux/fs/nfs/flushd.c	Wed Jan 10 14:18:29 2001
@@ -71,18 +71,17 @@
 	int			status = 0;
 
 	dprintk("NFS: writecache_init\n");
+
+	/* Create the RPC task */
+	if (!(task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC)))
+		return -ENOMEM;
+
 	spin_lock(&nfs_flushd_lock);
 	cache = server->rw_requests;
 
 	if (cache->task)
 		goto out_unlock;
 
-	/* Create the RPC task */
-	status = -ENOMEM;
-	task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC);
-	if (!task)
-		goto out_unlock;
-
 	task->tk_calldata = server;
 
 	cache->task = task;
@@ -99,6 +98,7 @@
 	return 0;
  out_unlock:
 	spin_unlock(&nfs_flushd_lock);
+	rpc_release_task(task);
 	return status;
 }
 
@@ -195,7 +195,9 @@
 	if (*q) {
 		*q = inode->u.nfs_i.hash_next;
 		NFS_FLAGS(inode) &= ~NFS_INO_FLUSH;
+		spin_unlock(&nfs_flushd_lock);
 		iput(inode);
+		return;
 	}
  out:
 	spin_unlock(&nfs_flushd_lock);
diff -u --recursive --new-file v2.4.0/linux/fs/proc/kcore.c linux/fs/proc/kcore.c
--- v2.4.0/linux/fs/proc/kcore.c	Thu Sep  7 08:43:49 2000
+++ linux/fs/proc/kcore.c	Mon Jan 15 16:54:20 2001
@@ -17,6 +17,7 @@
 #include <linux/elf.h>
 #include <linux/elfcore.h>
 #include <linux/vmalloc.h>
+#include <linux/highmem.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
diff -u --recursive --new-file v2.4.0/linux/fs/ramfs/inode.c linux/fs/ramfs/inode.c
--- v2.4.0/linux/fs/ramfs/inode.c	Fri Dec 29 19:26:31 2000
+++ linux/fs/ramfs/inode.c	Fri Jan  5 23:06:19 2001
@@ -81,6 +81,7 @@
 static int ramfs_writepage(struct page *page)
 {
 	SetPageDirty(page);
+	UnlockPage(page);
 	return 0;
 }
 
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/Makefile linux/fs/reiserfs/Makefile
--- v2.4.0/linux/fs/reiserfs/Makefile	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/Makefile	Mon Jan 15 12:42:32 2001
@@ -0,0 +1,20 @@
+#
+# Makefile for the linux reiser-filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+O_TARGET := reiserfs.o
+obj-y   := bitmap.o do_balan.o namei.o inode.o file.o dir.o fix_node.o super.o prints.o objectid.o \
+lbalance.o ibalance.o stree.o hashes.o buffer2.o tail_conversion.o journal.o resize.o tail_conversion.o version.o item_ops.o ioctl.o
+
+obj-m   := $(O_TARGET)
+
+include $(TOPDIR)/Rules.make
+
+TAGS:
+	etags *.c
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/README linux/fs/reiserfs/README
--- v2.4.0/linux/fs/reiserfs/README	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/README	Mon Jan 15 12:42:32 2001
@@ -0,0 +1,157 @@
+[LICENSING] 
+
+ReiserFS is hereby licensed under the GNU General
+Public License version 2.
+
+Source code files that contain the phrase "licensing governed by
+reiserfs/README" are "governed files" throughout this file.  Governed
+files are licensed under the GPL.  The portions of them owned by Hans
+Reiser, or authorized to be licensed by him, have been in the past,
+and likely will be in the future, licensed to other parties under
+other licenses.  If you add your code to governed files, and don't
+want it to be owned by Hans Reiser, put your copyright label on that
+code so the poor blight and his customers can keep things straight.
+All portions of governed files not labeled otherwise are owned by Hans
+Reiser, and by adding your code to it, widely distributing it to
+others or sending us a patch, and leaving the sentence in stating that
+licensing is governed by the statement in this file, you accept this.
+It will be a kindness if you identify whether Hans Reiser is allowed
+to license code labeled as owned by you on your behalf other than
+under the GPL, because he wants to know if it is okay to do so and put
+a check in the mail to you (for non-trivial improvements) when he
+makes his next sale.  He makes no guarantees as to the amount if any,
+though he feels motivated to motivate contributors, and you can surely
+discuss this with him before or after contributing.  You have the
+right to decline to allow him to license your code contribution other
+than under the GPL.
+
+Further licensing options are available for commercial and/or other
+interests directly from Hans Reiser: hans@reiser.to.  If you interpret
+the GPL as not allowing those additional licensing options, you read
+it wrongly, and Richard Stallman agrees with me, when carefully read
+you can see that those restrictions on additional terms do not apply
+to the owner of the copyright, and my interpretation of this shall
+govern for this license.  
+
+Finally, nothing in this license shall be interpreted to allow you to
+fail to fairly credit me, or to remove my credits, without my
+permission, unless you are an end user not redistributing to others.
+If you have doubts about how to properly do that, or about what is
+fair, ask.  (Last I spoke with him Richard was contemplating how best
+to address the fair crediting issue in the next GPL version.)
+
+[END LICENSING]
+
+Reiserfs is a file system based on balanced tree algorithms, which is
+described at http://devlinux.com/namesys.
+
+Stop reading here.  Go there, then return.
+
+Send bug reports to yura@namesys.botik.ru.
+
+mkreiserfs and other utilities are in reiserfs/utils, or wherever your
+Linux provider put them.  There is some disagreement about how useful
+it is for users to get their fsck and mkreiserfs out of sync with the
+version of reiserfs that is in their kernel, with many important
+distributors wanting them out of sync.:-) Please try to remember to
+recompile and reinstall fsck and mkreiserfs with every update of
+reiserfs, this is a common source of confusion.  Note that some of the
+utilities cannot be compiled without accessing the balancing code
+which is in the kernel code, and relocating the utilities may require
+you to specify where that code can be found.
+
+Yes, if you update your reiserfs kernel module you do have to
+recompile your kernel, most of the time.  The errors you get will be
+quite cryptic if your forget to do so.
+
+Real users, as opposed to folks who want to hack and then understand
+what went wrong, will want REISERFS_CHECK off.
+
+Hideous Commercial Pitch: Spread your development costs across other OS
+vendors.  Select from the best in the world, not the best in your
+building, by buying from third party OS component suppliers.  Leverage
+the software component development power of the internet.  Be the most
+aggressive in taking advantage of the commercial possibilities of
+decentralized internet development, and add value through your branded
+integration that you sell as an operating system.  Let your competitors
+be the ones to compete against the entire internet by themselves.  Be
+hip, get with the new economic trend, before your competitors do.  Send
+email to hans@reiser.to.
+
+To understand the code, after reading the website, start reading the
+code by reading reiserfs_fs.h first.
+
+Hans Reiser was the project initiator, primary architect, source of all
+funding for the first 5.5 years, and one of the programmers.  He owns
+the copyright.
+
+Vladimir Saveljev was one of the programmers, and he worked long hours
+writing the cleanest code.  He always made the effort to be the best he
+could be, and to make his code the best that it could be.  What resulted
+was quite remarkable. I don't think that money can ever motivate someone
+to work the way he did, he is one of the most selfless men I know.
+
+Yura helps with benchmarking, coding hashes, and block pre-allocation
+code.
+
+Anatoly Pinchuk is a former member of our team who worked closely with
+Vladimir throughout the project's development.  He wrote a quite
+substantial portion of the total code.  He realized that there was a
+space problem with packing tails of files for files larger than a node
+that start on a node aligned boundary (there are reasons to want to node
+align files), and he invented and implemented indirect items and
+unformatted nodes as the solution.
+
+Konstantin Shvachko, with the help of the Russian version of a VC,
+tried to put me in a position where I was forced into giving control
+of the project to him.  (Fortunately, as the person paying the money
+for all salaries from my dayjob I owned all copyrights, and you can't
+really force takeovers of sole proprietorships.)  This was something
+curious, because he never really understood the value of our project,
+why we should do what we do, or why innovation was possible in
+general, but he was sure that he ought to be controlling it.  Every
+innovation had to be forced past him while he was with us.  He added
+two years to the time required to complete reiserfs, and was a net
+loss for me.  Mikhail Gilula was a brilliant innovator who also left
+in a destructive way that erased the value of his contributions, and
+that he was shown much generosity just makes it more painful.
+
+Grigory Zaigralin was an extremely effective system administrator for
+our group.
+
+Igor Krasheninnikov was wonderful at hardware procurement, repair, and
+network installation.
+
+Jeremy Fitzhardinge wrote the teahash.c code, and he gives credit to a
+textbook he got the algorithm from in the code.  Note that his analysis
+of how we could use the hashing code in making 32 bit NFS cookies work
+was probably more important than the actual algorithm.  Colin Plumb also
+contributed to it.
+
+Chris Mason dived right into our code, and in just a few months produced
+the journaling code that dramatically increased the value of ReiserFS.
+He is just an amazing programmer.
+
+Igor Zagorovsky is writing much of the new item handler and extent code
+for our next major release.
+
+Alexander Zarochentcev (sometimes known as zam, or sasha), wrote the
+resizer, and is hard at work on implementing allocate on flush.  SGI
+implemented allocate on flush before us for XFS, and generously took
+the time to convince me we should do it also.  They are great people,
+and a great company.
+
+Yuri Shevchuk and Nikita Danilov are doing squid cache optimization.
+
+Vitaly Fertman is doing fsck.
+
+SuSE, IntegratedLinux.com, Ecila, MP3.com, bigstorage.com, and the
+Alpha PC Company made it possible for me to not have a day job
+anymore, and to dramatically increase our staffing.  Ecila funded
+hypertext feature development, MP3.com funded journaling, SuSE funded
+core development, IntegratedLinux.com funded squid web cache
+appliances, bigstorage.com funded HSM, and the alpha PC company funded
+the alpha port.  Many of these tasks were helped by sponsors other
+than the ones just named.  SuSE has helped in much more than just
+funding....
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/bitmap.c linux/fs/reiserfs/bitmap.c
--- v2.4.0/linux/fs/reiserfs/bitmap.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/bitmap.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,679 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/locks.h>
+#include <asm/bitops.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+
+#ifdef CONFIG_REISERFS_CHECK
+
+/* this is a safety check to make sure
+** blocks are reused properly.  used for debugging only.
+**
+** this checks, that block can be reused, and it has correct state
+**   (free or busy) 
+*/
+int is_reusable (struct super_block * s, unsigned long block, int bit_value)
+{
+    int i, j;
+  
+    if (block == 0 || block >= SB_BLOCK_COUNT (s)) {
+	reiserfs_warning ("vs-4010: is_reusable: block number is out of range %lu (%u)\n",
+			  block, SB_BLOCK_COUNT (s));
+	return 0;
+    }
+
+    /* it can't be one of the bitmap blocks */
+    for (i = 0; i < SB_BMAP_NR (s); i ++)
+	if (block == SB_AP_BITMAP (s)[i]->b_blocknr) {
+	    reiserfs_warning ("vs: 4020: is_reusable: "
+			      "bitmap block %lu(%u) can't be freed or reused\n",
+			      block, SB_BMAP_NR (s));
+	    return 0;
+	}
+  
+    i = block / (s->s_blocksize << 3);
+    if (i >= SB_BMAP_NR (s)) {
+	reiserfs_warning ("vs-4030: is_reusable: there is no so many bitmap blocks: "
+			  "block=%lu, bitmap_nr=%d\n", block, i);
+	return 0;
+    }
+
+    j = block % (s->s_blocksize << 3);
+    if ((bit_value == 0 && 
+         reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i]->b_data)) ||
+	(bit_value == 1 && 
+	 reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i]->b_data) == 0)) {
+	reiserfs_warning ("vs-4040: is_reusable: corresponding bit of block %lu does not "
+			  "match required value (i==%d, j==%d) test_bit==%d\n",
+		block, i, j, reiserfs_test_le_bit (j, SB_AP_BITMAP (s)[i]->b_data));
+	return 0;
+    }
+
+    if (bit_value == 0 && block == SB_ROOT_BLOCK (s)) {
+	reiserfs_warning ("vs-4050: is_reusable: this is root block (%u), "
+			  "it must be busy", SB_ROOT_BLOCK (s));
+	return 0;
+    }
+
+    return 1;
+}
+
+
+
+
+#endif /* CONFIG_REISERFS_CHECK */
+
+#if 0
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+int is_used (struct super_block * s, unsigned long block)
+{
+    int i, j;
+
+    i = block / (s->s_blocksize << 3);
+    j = block % (s->s_blocksize << 3);
+    if (reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i]->b_data))
+	return 1;
+    return 0;
+  
+}
+/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%*/
+#endif
+
+
+/* get address of corresponding bit (bitmap block number and offset in it) */
+static inline void get_bit_address (struct super_block * s, unsigned long block, int * bmap_nr, int * offset)
+{
+                                /* It is in the bitmap block number equal to the block number divided by the number of
+                                   bits in a block. */
+    *bmap_nr = block / (s->s_blocksize << 3);
+                                /* Within that bitmap block it is located at bit offset *offset. */
+    *offset = block % (s->s_blocksize << 3);
+    return;
+}
+
+
+/* There would be a modest performance benefit if we write a version
+   to free a list of blocks at once. -Hans */
+				/* I wonder if it would be less modest
+                                   now that we use journaling. -Hans */
+void reiserfs_free_block (struct reiserfs_transaction_handle *th, unsigned long block)
+{
+    struct super_block * s = th->t_super;
+    struct reiserfs_super_block * rs;
+    struct buffer_head * sbh;
+    struct buffer_head ** apbh;
+    int nr, offset;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (!s)
+	reiserfs_panic (s, "vs-4060: reiserfs_free_block: trying to free block on nonexistent device");
+
+    if (is_reusable (s, block, 1) == 0)
+	reiserfs_panic (s, "vs-4070: reiserfs_free_block: can not free such block");
+#endif
+
+  rs = SB_DISK_SUPER_BLOCK (s);
+  sbh = SB_BUFFER_WITH_SB (s);
+  apbh = SB_AP_BITMAP (s);
+
+  get_bit_address (s, block, &nr, &offset);
+
+  /* mark it before we clear it, just in case */
+  journal_mark_freed(th, s, block) ;
+
+  reiserfs_prepare_for_journal(s, apbh[nr], 1 ) ;
+
+  /* clear bit for the given block in bit map */
+  if (!reiserfs_test_and_clear_le_bit (offset, apbh[nr]->b_data)) {
+      reiserfs_warning ("vs-4080: reiserfs_free_block: "
+			"free_block (%04x:%lu)[dev:blocknr]: bit already cleared\n", 
+	    s->s_dev, block);
+  }
+  journal_mark_dirty (th, s, apbh[nr]);
+
+  reiserfs_prepare_for_journal(s, sbh, 1) ;
+  /* update super block */
+  rs->s_free_blocks = cpu_to_le32 (le32_to_cpu (rs->s_free_blocks) + 1);
+
+  journal_mark_dirty (th, s, sbh);
+  s->s_dirt = 1;
+}
+
+
+
+/* beginning from offset-th bit in bmap_nr-th bitmap block,
+   find_forward finds the closest zero bit. It returns 1 and zero
+   bit address (bitmap, offset) if zero bit found or 0 if there is no
+   zero bit in the forward direction */
+/* The function is NOT SCHEDULE-SAFE! */
+static int find_forward (struct super_block * s, int * bmap_nr, int * offset, int for_unformatted)
+{
+  int i, j;
+  struct buffer_head * bh;
+  unsigned long block_to_try = 0;
+  unsigned long next_block_to_try = 0 ;
+
+  for (i = *bmap_nr; i < SB_BMAP_NR (s); i ++, *offset = 0) {
+    /* get corresponding bitmap block */
+    bh = SB_AP_BITMAP (s)[i];
+    if (buffer_locked (bh)) {
+        __wait_on_buffer (bh);
+    }
+retry:
+    j = reiserfs_find_next_zero_le_bit ((unsigned long *)bh->b_data, 
+                                         s->s_blocksize << 3, *offset);
+
+    /* wow, this really needs to be redone.  We can't allocate a block if
+    ** it is in the journal somehow.  reiserfs_in_journal makes a suggestion
+    ** for a good block if the one you ask for is in the journal.  Note,
+    ** reiserfs_in_journal might reject the block it suggests.  The big
+    ** gain from the suggestion is when a big file has been deleted, and
+    ** many blocks show free in the real bitmap, but are all not free
+    ** in the journal list bitmaps.
+    **
+    ** this whole system sucks.  The bitmaps should reflect exactly what
+    ** can and can't be allocated, and the journal should update them as
+    ** it goes.  TODO.
+    */
+    if (j < (s->s_blocksize << 3)) {
+      block_to_try = (i * (s->s_blocksize << 3)) + j; 
+
+      /* the block is not in the journal, we can proceed */
+      if (!(reiserfs_in_journal(s, s->s_dev, block_to_try, s->s_blocksize, for_unformatted, &next_block_to_try))) {
+	*bmap_nr = i;
+	*offset = j;
+	return 1;
+      } 
+      /* the block is in the journal */
+      else if ((j+1) < (s->s_blocksize << 3)) { /* try again */
+	/* reiserfs_in_journal suggested a new block to try */
+	if (next_block_to_try > 0) {
+	  int new_i ;
+	  get_bit_address (s, next_block_to_try, &new_i, offset);
+
+	  /* block is not in this bitmap. reset i and continue
+	  ** we only reset i if new_i is in a later bitmap.
+	  */
+	  if (new_i > i) {
+	    i = (new_i - 1 ); /* i gets incremented by the for loop */
+	    continue ;
+	  }
+	} else {
+	  /* no suggestion was made, just try the next block */
+	  *offset = j+1 ;
+	}
+	goto retry ;
+      }
+    }
+  }
+    /* zero bit not found */
+    return 0;
+}
+
+/* return 0 if no free blocks, else return 1 */
+/* The function is NOT SCHEDULE-SAFE!  
+** because the bitmap block we want to change could be locked, and on its
+** way to the disk when we want to read it, and because of the 
+** flush_async_commits.  Per bitmap block locks won't help much, and 
+** really aren't needed, as we retry later on if we try to set the bit
+** and it is already set.
+*/
+static int find_zero_bit_in_bitmap (struct super_block * s, 
+                                    unsigned long search_start, 
+				    int * bmap_nr, int * offset, 
+				    int for_unformatted)
+{
+  int retry_count = 0 ;
+  /* get bit location (bitmap number and bit offset) of search_start block */
+  get_bit_address (s, search_start, bmap_nr, offset);
+
+    /* note that we search forward in the bitmap, benchmarks have shown that it is better to allocate in increasing
+       sequence, which is probably due to the disk spinning in the forward direction.. */
+    if (find_forward (s, bmap_nr, offset, for_unformatted) == 0) {
+      /* there wasn't a free block with number greater than our
+         starting point, so we are going to go to the beginning of the disk */
+
+retry:
+      search_start = 0; /* caller will reset search_start for itself also. */
+      get_bit_address (s, search_start, bmap_nr, offset);
+      if (find_forward (s, bmap_nr,offset,for_unformatted) == 0) {
+	if (for_unformatted) {	/* why only unformatted nodes? -Hans */
+	  if (retry_count == 0) {
+	    /* we've got a chance that flushing async commits will free up
+	    ** some space.  Sync then retry
+	    */
+	    flush_async_commits(s) ;
+	    retry_count++ ;
+	    goto retry ;
+	  } else if (retry_count > 0) {
+	    /* nothing more we can do.  Make the others wait, flush
+	    ** all log blocks to disk, and flush to their home locations.
+	    ** this will free up any blocks held by the journal
+	    */
+	    SB_JOURNAL(s)->j_must_wait = 1 ;
+	  }
+	}
+        return 0;
+      }
+    }
+  return 1;
+}
+
+/* get amount_needed free block numbers from scanning the bitmap of
+   free/used blocks.
+   
+   Optimize layout by trying to find them starting from search_start
+   and moving in increasing blocknr direction.  (This was found to be
+   faster than using a bi-directional elevator_direction, in part
+   because of disk spin direction, in part because by the time one
+   reaches the end of the disk the beginning of the disk is the least
+   congested).
+
+   search_start is the block number of the left
+   semantic neighbor of the node we create.
+
+   return CARRY_ON if everything is ok
+   return NO_DISK_SPACE if out of disk space
+   return NO_MORE_UNUSED_CONTIGUOUS_BLOCKS if the block we found is not contiguous to the last one
+   
+   return block numbers found, in the array free_blocknrs.  assumes
+   that any non-zero entries already present in the array are valid.
+   This feature is perhaps convenient coding when one might not have
+   used all blocknrs from the last time one called this function, or
+   perhaps it is an archaism from the days of schedule tracking, one
+   of us ought to reread the code that calls this, and analyze whether
+   it is still the right way to code it.
+
+   spare space is used only when priority is set to 1. reiserfsck has
+   its own reiserfs_new_blocknrs, which can use reserved space
+
+   exactly what reserved space?  the SPARE_SPACE?  if so, please comment reiserfs.h.
+
+   Give example of who uses spare space, and say that it is a deadlock
+   avoidance mechanism.  -Hans */
+
+/* This function is NOT SCHEDULE-SAFE! */
+
+static int do_reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th,
+                                     unsigned long * free_blocknrs, 
+				     unsigned long search_start, 
+				     int amount_needed, int priority, 
+				     int for_unformatted,
+				     int for_prealloc)
+{
+  struct super_block * s = th->t_super;
+  int i, j;
+  unsigned long * block_list_start = free_blocknrs;
+  int init_amount_needed = amount_needed;
+  unsigned long new_block = 0 ; 
+
+    if (SB_FREE_BLOCKS (s) < SPARE_SPACE && !priority)
+	/* we can answer NO_DISK_SPACE being asked for new block with
+	   priority 0 */
+	return NO_DISK_SPACE;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (!s)
+	reiserfs_panic (s, "vs-4090: reiserfs_new_blocknrs: trying to get new block from nonexistent device");
+    
+    if (search_start == MAX_B_NUM)
+	reiserfs_panic (s, "vs-4100: reiserfs_new_blocknrs: we are optimizing location based on "
+			"the bogus location of a temp buffer (%lu).", search_start);
+    
+    if (amount_needed < 1 || amount_needed > 2) 
+	reiserfs_panic (s, "vs-4110: reiserfs_new_blocknrs: amount_needed parameter incorrect (%d)", amount_needed);
+#endif /* CONFIG_REISERFS_CHECK */
+
+  /* We continue the while loop if another process snatches our found
+   * free block from us after we find it but before we successfully
+   * mark it as in use, or if we need to use sync to free up some
+   * blocks on the preserve list.  */
+
+  while (amount_needed--) {
+    /* skip over any blocknrs already gotten last time. */
+    if (*(free_blocknrs) != 0) {
+#ifdef CONFIG_REISERFS_CHECK
+      if (is_reusable (s, *free_blocknrs, 1) == 0)
+	reiserfs_panic(s, "vs-4120: reiserfs_new_blocknrs: bad blocknr on free_blocknrs list");
+#endif /* CONFIG_REISERFS_CHECK */
+      free_blocknrs++;
+      continue;
+    }
+    /* look for zero bits in bitmap */
+    if (find_zero_bit_in_bitmap(s,search_start, &i, &j,for_unformatted) == 0) {
+      if (find_zero_bit_in_bitmap(s,search_start,&i,&j, for_unformatted) == 0) {
+				/* recode without the goto and without
+				   the if.  It will require a
+				   duplicate for.  This is worth the
+				   code clarity.  Your way was
+				   admirable, and just a bit too
+				   clever in saving instructions.:-)
+				   I'd say create a new function, but
+				   that would slow things also, yes?
+				   -Hans */
+free_and_return:
+	for ( ; block_list_start != free_blocknrs; block_list_start++) {
+	  reiserfs_free_block (th, *block_list_start);
+	  *block_list_start = 0;
+	}
+	if (for_prealloc) 
+	    return NO_MORE_UNUSED_CONTIGUOUS_BLOCKS;
+	else
+	    return NO_DISK_SPACE;
+      }
+    }
+    
+    /* i and j now contain the results of the search. i = bitmap block
+       number containing free block, j = offset in this block.  we
+       compute the blocknr which is our result, store it in
+       free_blocknrs, and increment the pointer so that on the next
+       loop we will insert into the next location in the array.  Also
+       in preparation for the next loop, search_start is changed so
+       that the next search will not rescan the same range but will
+       start where this search finished.  Note that while it is
+       possible that schedule has occurred and blocks have been freed
+       in that range, it is perhaps more important that the blocks
+       returned be near each other than that they be near their other
+       neighbors, and it also simplifies and speeds the code this way.  */
+
+    /* journal: we need to make sure the block we are giving out is not
+    ** a log block, horrible things would happen there.
+    */
+    new_block = (i * (s->s_blocksize << 3)) + j; 
+    if (for_prealloc && (new_block - 1) != search_start) {
+      /* preallocated blocks must be contiguous, bail if we didnt find one.
+      ** this is not a bug.  We want to do the check here, before the
+      ** bitmap block is prepared, and before we set the bit and log the
+      ** bitmap. 
+      **
+      ** If we do the check after this function returns, we have to 
+      ** call reiserfs_free_block for new_block, which would be pure
+      ** overhead.
+      **
+      ** for_prealloc should only be set if the caller can deal with the
+      ** NO_MORE_UNUSED_CONTIGUOUS_BLOCKS return value.  This can be
+      ** returned before the disk is actually full
+      */
+      goto free_and_return ;
+    }
+    search_start = new_block ;
+    if (search_start >= reiserfs_get_journal_block(s) &&
+        search_start < (reiserfs_get_journal_block(s) + JOURNAL_BLOCK_COUNT)) {
+	reiserfs_warning("vs-4130: reiserfs_new_blocknrs: trying to allocate log block %lu\n",
+			 search_start) ;
+	search_start++ ;
+	amount_needed++ ;
+	continue ;
+    }
+       
+
+    reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[i], 1) ;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (buffer_locked (SB_AP_BITMAP (s)[i]) || is_reusable (s, search_start, 0) == 0)
+	reiserfs_panic (s, "vs-4140: reiserfs_new_blocknrs: bitmap block is locked or bad block number found");
+#endif
+
+    /* if this bit was already set, we've scheduled, and someone else
+    ** has allocated it.  loop around and try again
+    */
+    if (reiserfs_test_and_set_le_bit (j, SB_AP_BITMAP (s)[i]->b_data)) {
+	reiserfs_warning("vs-4150: reiserfs_new_blocknrs, block not free");
+	reiserfs_restore_prepared_buffer(s, SB_AP_BITMAP(s)[i]) ;
+	amount_needed++ ;
+	continue ;
+    }    
+    journal_mark_dirty (th, s, SB_AP_BITMAP (s)[i]); 
+    *free_blocknrs = search_start ;
+    free_blocknrs ++;
+  }
+
+  reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ;
+  /* update free block count in super block */
+  s->u.reiserfs_sb.s_rs->s_free_blocks = cpu_to_le32 (SB_FREE_BLOCKS (s) - init_amount_needed);
+  journal_mark_dirty (th, s, SB_BUFFER_WITH_SB (s));
+  s->s_dirt = 1;
+
+  return CARRY_ON;
+}
+
+// this is called only by get_empty_nodes with for_preserve_list==0
+int reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th, unsigned long * free_blocknrs,
+			    unsigned long search_start, int amount_needed) {
+  return do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, amount_needed, 0/*for_preserve_list-priority*/, 0/*for_formatted*/, 0/*for_prealloc */) ;
+}
+
+
+// called by get_new_buffer and by reiserfs_get_block with amount_needed == 1 and for_preserve_list == 0
+int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle *th, unsigned long * free_blocknrs,
+			      unsigned long search_start) {
+  return do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, 
+                                  1/*amount_needed*/,
+				  0/*for_preserve_list-priority*/, 
+				  1/*for formatted*/,
+				  0/*for prealloc */) ;
+}
+
+#ifdef REISERFS_PREALLOCATE
+
+/* 
+** We pre-allocate 8 blocks.  Pre-allocation is used for files > 16 KB only.
+** This lowers fragmentation on large files by grabbing a contiguous set of
+** blocks at once.  It also limits the number of times the bitmap block is
+** logged by making X number of allocation changes in a single transaction.
+**
+** We are using a border to divide the disk into two parts.  The first part
+** is used for tree blocks, which have a very high turnover rate (they
+** are constantly allocated then freed)
+**
+** The second part of the disk is for the unformatted nodes of larger files.
+** Putting them away from the tree blocks lowers fragmentation, and makes
+** it easier to group files together.  There are a number of different
+** allocation schemes being tried right now, each is documented below.
+**
+** A great deal of the allocator's speed comes because reiserfs_get_block
+** sends us the block number of the last unformatted node in the file.  Once
+** a given block is allocated past the border, we don't collide with the
+** blocks near the search_start again.
+** 
+*/
+int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th, 
+				struct inode       * p_s_inode,
+				unsigned long      * free_blocknrs,
+				unsigned long        search_start)
+{
+  int ret=0, blks_gotten=0;
+  unsigned long border = 0;
+  unsigned long bstart = 0;
+  unsigned long hash_in, hash_out;
+  int allocated[PREALLOCATION_SIZE];
+  int blks;
+
+  if (!reiserfs_no_border(th->t_super)) {
+    /* we default to having the border at the 10% mark of the disk.  This
+    ** is an arbitrary decision and it needs tuning.  It also needs a limit
+    ** to prevent it from taking too much space on huge drives.
+    */
+    bstart = (SB_BLOCK_COUNT(th->t_super) / 10); 
+  }
+  if (!reiserfs_no_unhashed_relocation(th->t_super)) {
+    /* this is a very simple first attempt at preventing too much grouping
+    ** around the border value.  Since k_dir_id is never larger than the
+    ** highest allocated oid, it is far from perfect, and files will tend
+    ** to be grouped towards the start of the border
+    */
+    border = (INODE_PKEY(p_s_inode)->k_dir_id) % (SB_BLOCK_COUNT(th->t_super) - bstart - 1) ;
+  } else {
+    /* why would we want to delcare a local variable to this if statement
+    ** name border????? -chris
+    ** unsigned long border = 0;
+    */
+    if (!reiserfs_hashed_relocation(th->t_super)) {
+      hash_in = (INODE_PKEY(p_s_inode))->k_dir_id;
+				/* I wonder if the CPU cost of the
+                                   hash will obscure the layout
+                                   effect? Of course, whether that
+                                   effect is good or bad we don't
+                                   know.... :-) */
+      
+      hash_out = keyed_hash(((char *) (&hash_in)), 4);
+      border = hash_out % (SB_BLOCK_COUNT(th->t_super) - bstart - 1) ;
+    }
+  }
+  border += bstart ;
+  allocated[0] = 0 ; /* important.  Allows a check later on to see if at
+                      * least one block was allocated.  This prevents false
+		      * no disk space returns
+		      */
+
+  if ( (p_s_inode->i_size < 4 * 4096) || 
+       !(S_ISREG(p_s_inode->i_mode)) )
+    {
+      if ( search_start < border 
+	   || (
+				/* allow us to test whether it is a
+                                   good idea to prevent files from
+                                   getting too far away from their
+                                   packing locality by some unexpected
+                                   means.  This might be poor code for
+                                   directories whose files total
+                                   larger than 1/10th of the disk, and
+                                   it might be good code for
+                                   suffering from old insertions when the disk
+                                   was almost full. */
+               /* changed from !reiserfs_test3(th->t_super), which doesn't
+               ** seem like a good idea.  Think about adding blocks to
+               ** a large file.  If you've allocated 10% of the disk
+               ** in contiguous blocks, you start over at the border value
+               ** for every new allocation.  This throws away all the
+               ** information sent in about the last block that was allocated
+               ** in the file.  Not a good general case at all.
+               ** -chris
+               */
+	       reiserfs_test4(th->t_super) && 
+	       (search_start > border + (SB_BLOCK_COUNT(th->t_super) / 10))
+	       )
+	   )
+	search_start=border;
+  
+      ret = do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, 
+				     1/*amount_needed*/, 
+				     0/*use reserved blocks for root */,
+				     1/*for_formatted*/,
+				     0/*for prealloc */) ;  
+      return ret;
+    }
+
+  /* take a block off the prealloc list and return it -Hans */
+  if (p_s_inode->u.reiserfs_i.i_prealloc_count > 0) {
+    p_s_inode->u.reiserfs_i.i_prealloc_count--;
+    *free_blocknrs = p_s_inode->u.reiserfs_i.i_prealloc_block++;
+    return ret;
+  }
+
+				/* else get a new preallocation for the file */
+  reiserfs_discard_prealloc (th, p_s_inode);
+  /* this uses the last preallocated block as the search_start.  discard
+  ** prealloc does not zero out this number.
+  */
+  if (search_start <= p_s_inode->u.reiserfs_i.i_prealloc_block) {
+    search_start = p_s_inode->u.reiserfs_i.i_prealloc_block;
+  }
+  
+  /* doing the compare again forces search_start to be >= the border,
+  ** even if the file already had prealloction done.  This seems extra,
+  ** and should probably be removed
+  */
+  if ( search_start < border ) search_start=border; 
+  
+  *free_blocknrs = 0;
+  blks = PREALLOCATION_SIZE-1;
+  for (blks_gotten=0; blks_gotten<PREALLOCATION_SIZE; blks_gotten++) {
+    ret = do_reiserfs_new_blocknrs(th, free_blocknrs, search_start, 
+				   1/*amount_needed*/, 
+				   0/*for root reserved*/,
+				   1/*for_formatted*/,
+				   (blks_gotten > 0)/*must_be_contiguous*/) ;
+    /* if we didn't find a block this time, adjust blks to reflect
+    ** the actual number of blocks allocated
+    */ 
+    if (ret != CARRY_ON) {
+      blks = blks_gotten > 0 ? (blks_gotten - 1) : 0 ;
+      break ;
+    }
+    allocated[blks_gotten]= *free_blocknrs;
+#ifdef CONFIG_REISERFS_CHECK
+    if ( (blks_gotten>0) && (allocated[blks_gotten] - allocated[blks_gotten-1]) != 1 ) {
+      /* this should be caught by new_blocknrs now, checking code */
+      reiserfs_warning("yura-1, reiserfs_new_unf_blocknrs2: pre-allocated not contiguous set of blocks!\n") ;
+      reiserfs_free_block(th, allocated[blks_gotten]);
+      blks = blks_gotten-1; 
+      break;
+    }
+#endif
+    if (blks_gotten==0) {
+      p_s_inode->u.reiserfs_i.i_prealloc_block = *free_blocknrs;
+    }
+    search_start = *free_blocknrs; 
+    *free_blocknrs = 0;
+  }
+  p_s_inode->u.reiserfs_i.i_prealloc_count = blks;
+  *free_blocknrs = p_s_inode->u.reiserfs_i.i_prealloc_block;
+  p_s_inode->u.reiserfs_i.i_prealloc_block++;
+
+  /* we did actually manage to get 1 block */
+  if (ret != CARRY_ON && allocated[0] > 0) {
+    return CARRY_ON ;
+  }
+  /* NO_MORE_UNUSED_CONTIGUOUS_BLOCKS should only mean something to
+  ** the preallocation code.  The rest of the filesystem asks for a block
+  ** and should either get it, or know the disk is full.  The code
+  ** above should never allow ret == NO_MORE_UNUSED_CONTIGUOUS_BLOCK,
+  ** as it doesn't send for_prealloc = 1 to do_reiserfs_new_blocknrs
+  ** unless it has already successfully allocated at least one block.
+  ** Just in case, we translate into a return value the rest of the
+  ** filesystem can understand.
+  **
+  ** It is an error to change this without making the
+  ** rest of the filesystem understand NO_MORE_UNUSED_CONTIGUOUS_BLOCKS
+  ** If you consider it a bug to return NO_DISK_SPACE here, fix the rest
+  ** of the fs first.
+  */
+  if (ret == NO_MORE_UNUSED_CONTIGUOUS_BLOCKS) {
+#ifdef CONFIG_REISERFS_CHECK
+    reiserfs_warning("reiser-2015: this shouldn't happen, may cause false out of disk space error");
+#endif
+     return NO_DISK_SPACE; 
+  }
+  return ret;
+}
+
+//
+// a portion of this function, was derived from minix or ext2's
+// analog. You should be able to tell which portion by looking at the
+// ext2 code and comparing. 
+
+void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, 
+				struct inode * inode)
+{
+    if (inode->u.reiserfs_i.i_prealloc_count > 0) {
+      while (inode->u.reiserfs_i.i_prealloc_count--) {
+	reiserfs_free_block(th,inode->u.reiserfs_i.i_prealloc_block);
+	inode->u.reiserfs_i.i_prealloc_block++;
+      }
+    }
+    inode->u.reiserfs_i.i_prealloc_count = 0;
+}
+#endif
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/buffer2.c linux/fs/reiserfs/buffer2.c
--- v2.4.0/linux/fs/reiserfs/buffer2.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/buffer2.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,358 @@
+/*
+ *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README  
+ */
+
+
+/*
+ * Contains code from
+ *
+ *  linux/include/linux/lock.h and linux/fs/buffer.c /linux/fs/minix/fsync.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/locks.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/smp_lock.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+
+/*
+ *  wait_buffer_until_released
+ *  reiserfs_bread
+ *  reiserfs_getblk
+ *  get_new_buffer
+ */
+
+
+
+/* when we allocate a new block (get_new_buffer, get_empty_nodes) and
+   get buffer for it, it is possible that it is held by someone else
+   or even by this process. In this function we wait until all other
+   holders release buffer. To make sure, that current process does not
+   hold we did free all buffers in tree balance structure
+   (get_empty_nodes and get_nodes_for_preserving) or in path structure
+   only (get_new_buffer) just before calling this */
+void wait_buffer_until_released (struct buffer_head * bh)
+{
+  int repeat_counter = 0;
+
+  while (atomic_read (&(bh->b_count)) > 1) {
+
+    if ( !(++repeat_counter % 30000000) ) {
+      reiserfs_warning ("vs-3050: wait_buffer_until_released: nobody releases buffer (%b). Still waiting (%d) %cJDIRTY %cJWAIT\n",
+			bh, repeat_counter, buffer_journaled(bh) ? ' ' : '!',
+			buffer_journal_dirty(bh) ? ' ' : '!');
+    }
+    run_task_queue(&tq_disk);
+    current->policy |= SCHED_YIELD;
+    /*current->counter = 0;*/
+    schedule();
+  }
+  if (repeat_counter > 30000000) {
+    reiserfs_warning("vs-3051: done waiting, ignore vs-3050 messages for (%b)\n", bh) ;
+  }
+}
+
+/*
+ * reiserfs_bread() reads a specified block and returns the buffer that contains
+ * it. It returns NULL if the block was unreadable.
+ */
+/* It first tries to find the block in cache, and if it cannot do so
+   then it creates a new buffer and schedules I/O to read the
+   block. */
+/* The function is NOT SCHEDULE-SAFE! */
+
+struct buffer_head  * reiserfs_bread (kdev_t n_dev, int n_block, int n_size) 
+{
+    return bread (n_dev, n_block, n_size);
+}
+
+/* This function looks for a buffer which contains a given block.  If
+   the block is in cache it returns it, otherwise it returns a new
+   buffer which is not uptodate.  This is called by reiserfs_bread and
+   other functions. Note that get_new_buffer ought to be called this
+   and this ought to be called get_new_buffer, since this doesn't
+   actually get the block off of the disk. */
+/* The function is NOT SCHEDULE-SAFE! */
+
+struct buffer_head  * reiserfs_getblk (kdev_t n_dev, int n_block, int n_size)
+{
+    return getblk (n_dev, n_block, n_size);
+}
+
+#ifdef NEW_GET_NEW_BUFFER
+
+/* returns one buffer with a blocknr near blocknr. */
+static int get_new_buffer_near_blocknr(
+                   struct super_block *  p_s_sb,
+                   int blocknr,
+                   struct buffer_head ** pp_s_new_bh,
+                   struct path         * p_s_path 
+                   ) {
+  unsigned      long n_new_blocknumber = 0;
+  int           n_ret_value,
+                n_repeat = CARRY_ON;
+
+#ifdef CONFIG_REISERFS_CHECK
+  int repeat_counter = 0;
+  
+  if (!blocknr)
+    printk ("blocknr passed to get_new_buffer_near_blocknr was 0");
+#endif
+
+
+  if ( (n_ret_value = reiserfs_new_blocknrs (p_s_sb, &n_new_blocknumber,
+                                             blocknr, 1)) == NO_DISK_SPACE )
+    return NO_DISK_SPACE;
+  
+  *pp_s_new_bh = reiserfs_getblk(p_s_sb->s_dev, n_new_blocknumber, p_s_sb->s_blocksize);
+  if ( buffer_uptodate(*pp_s_new_bh) ) {
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( buffer_dirty(*pp_s_new_bh) || (*pp_s_new_bh)->b_dev == NODEV ) {
+      reiserfs_panic(p_s_sb, "PAP-14080: get_new_buffer: invalid uptodate buffer %b for the new block", *pp_s_new_bh);
+    }
+#endif
+
+    /* Free path buffers to prevent deadlock. */
+    /* It is possible that this process has the buffer, which this function is getting, already in
+       its path, and is responsible for double incrementing the value of b_count.  If we recalculate
+       the path after schedule we can avoid risking an endless loop.  This problematic situation is
+       possible in a multiple processing environment.  Suppose process 1 has acquired a path P; then
+       process 2 balanced and remove block A from the tree.  Process 1 continues and runs
+       get_new_buffer, that returns buffer with block A. If node A was on the path P, then it will
+       have b_count == 2. If we now will simply wait in while ( (*pp_s_new_bh)->b_count > 1 ) we get
+       into an endless loop, as nobody will release this buffer and the current process holds buffer
+       twice. That is why we do decrement_counters_in_path(p_s_path) before waiting until b_count
+       becomes 1. (it there were other processes holding node A, then eventually we will get a
+       moment, when all of them released a buffer). */
+    if ( atomic_read (&((*pp_s_new_bh)->b_count)) > 1  ) {
+      decrement_counters_in_path(p_s_path);
+      n_ret_value |= SCHEDULE_OCCURRED;
+    }
+
+    while ( atomic_read (&((*pp_s_new_bh)->b_count)) > 1 ) {
+
+#ifdef REISERFS_INFO
+      printk("get_new_buffer() calls schedule to decrement b_count\n");
+#endif
+
+#ifdef CONFIG_REISERFS_CHECK
+      if ( ! (++repeat_counter % 10000) )
+	printk("get_new_buffer(%u): counter(%d) too big", current->pid, repeat_counter);
+#endif
+
+      current->counter = 0;
+      schedule();
+    }
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( buffer_dirty(*pp_s_new_bh) || (*pp_s_new_bh)->b_dev == NODEV ) {
+      print_buffer_head(*pp_s_new_bh,"get_new_buffer");
+      reiserfs_panic(p_s_sb, "PAP-14090: get_new_buffer: invalid uptodate buffer %b for the new block(case 2)", *pp_s_new_bh);
+    }
+#endif
+
+  }
+  else {
+    ;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (atomic_read (&((*pp_s_new_bh)->b_count)) != 1) {
+      reiserfs_panic(p_s_sb,"PAP-14100: get_new_buffer: not uptodate buffer %b for the new block has b_count more than one",
+		     *pp_s_new_bh);
+    }
+#endif
+
+  }
+  return (n_ret_value | n_repeat);
+}
+
+
+/* returns the block number of the last unformatted node, assumes p_s_key_to_search.k_offset is a byte in the tail of
+   the file, Useful for when you want to append to a file, and convert a direct item into an unformatted node near the
+   last unformatted node of the file.  Putting the unformatted node near the direct item is potentially very bad to do.
+   If there is no unformatted node in the file, then we return the block number of the direct item.  */
+/* The function is NOT SCHEDULE-SAFE! */
+inline int get_last_unformatted_node_blocknr_of_file(  struct key * p_s_key_to_search, struct super_block * p_s_sb,
+                                                       struct buffer_head * p_s_bh
+                                                       struct path * p_unf_search_path, struct inode * p_s_inode)
+
+{
+  struct key unf_key_to_search;
+  struct item_head * p_s_ih;
+  int n_pos_in_item;
+  struct buffer_head * p_indirect_item_bh;
+
+      copy_key(&unf_key_to_search,p_s_key_to_search);
+      unf_key_to_search.k_uniqueness = TYPE_INDIRECT;
+      unf_key_to_search.k_offset = p_s_inode->u.reiserfs_i.i_first_direct_byte - 1;
+
+        /* p_s_key_to_search->k_offset -  MAX_ITEM_LEN(p_s_sb->s_blocksize); */
+      if (search_for_position_by_key (p_s_sb, &unf_key_to_search, p_unf_search_path, &n_pos_in_item) == POSITION_FOUND)
+        {
+          p_s_ih = B_N_PITEM_HEAD(p_indirect_item_bh = PATH_PLAST_BUFFER(p_unf_search_path), PATH_LAST_POSITION(p_unf_search_path));
+          return (B_I_POS_UNFM_POINTER(p_indirect_item_bh, p_s_ih, n_pos_in_item));
+        }
+     /*  else */
+      printk("reiser-1800: search for unformatted node failed, p_s_key_to_search->k_offset = %u,  unf_key_to_search.k_offset = %u, MAX_ITEM_LEN(p_s_sb->s_blocksize) = %ld, debug this\n", p_s_key_to_search->k_offset, unf_key_to_search.k_offset,  MAX_ITEM_LEN(p_s_sb->s_blocksize) );
+      print_buffer_head(PATH_PLAST_BUFFER(p_unf_search_path), "the buffer holding the item before the key we failed to find");
+      print_block_head(PATH_PLAST_BUFFER(p_unf_search_path), "the block head");
+      return 0;                         /* keeps the compiler quiet */
+}
+
+
+                                /* hasn't been out of disk space tested  */
+/* The function is NOT SCHEDULE-SAFE! */
+static int get_buffer_near_last_unf ( struct super_block * p_s_sb, struct key * p_s_key_to_search,
+                                                 struct inode *  p_s_inode,  struct buffer_head * p_s_bh, 
+                                                 struct buffer_head ** pp_s_un_bh, struct path * p_s_search_path)
+{
+  int unf_blocknr = 0, /* blocknr from which we start search for a free block for an unformatted node, if 0
+                          then we didn't find an unformatted node though we might have found a file hole */
+      n_repeat = CARRY_ON;
+  struct key unf_key_to_search;
+  struct path unf_search_path;
+
+  copy_key(&unf_key_to_search,p_s_key_to_search);
+  unf_key_to_search.k_uniqueness = TYPE_INDIRECT;
+  
+  if (
+      (p_s_inode->u.reiserfs_i.i_first_direct_byte > 4095) /* i_first_direct_byte gets used for all sorts of
+                                                              crap other than what the name indicates, thus
+                                                              testing to see if it is 0 is not enough */
+      && (p_s_inode->u.reiserfs_i.i_first_direct_byte < MAX_KEY_OFFSET) /* if there is no direct item then
+                                                                           i_first_direct_byte = MAX_KEY_OFFSET */
+      )
+    {
+                                /* actually, we don't want the last unformatted node, we want the last unformatted node
+                                   which is before the current file offset */
+      unf_key_to_search.k_offset = ((p_s_inode->u.reiserfs_i.i_first_direct_byte -1) < unf_key_to_search.k_offset) ? p_s_inode->u.reiserfs_i.i_first_direct_byte -1 :  unf_key_to_search.k_offset;
+
+      while (unf_key_to_search.k_offset > -1)
+        {
+                                /* This is our poorly documented way of initializing paths. -Hans */
+          init_path (&unf_search_path);
+                                /* get the blocknr from which we start the search for a free block. */
+          unf_blocknr = get_last_unformatted_node_blocknr_of_file(  p_s_key_to_search, /* assumes this points to the file tail */
+                                                                    p_s_sb,     /* lets us figure out the block size */
+                                                                    p_s_bh, /* if there is no unformatted node in the file,
+                                                                               then it returns p_s_bh->b_blocknr */
+                                                                    &unf_search_path,
+                                                                    p_s_inode
+                                                                    );
+/*        printk("in while loop: unf_blocknr = %d,  *pp_s_un_bh = %p\n", unf_blocknr, *pp_s_un_bh); */
+          if (unf_blocknr) 
+            break;
+          else                  /* release the path and search again, this could be really slow for huge
+                                   holes.....better to spend the coding time adding compression though.... -Hans */
+            {
+                                /* Vladimir, is it a problem that I don't brelse these buffers ?-Hans */
+              decrement_counters_in_path(&unf_search_path);
+              unf_key_to_search.k_offset -= 4096;
+            }
+        }
+      if (unf_blocknr) {
+        n_repeat |= get_new_buffer_near_blocknr(p_s_sb, unf_blocknr, pp_s_un_bh, p_s_search_path);
+      }
+      else {                    /* all unformatted nodes are holes */
+        n_repeat |= get_new_buffer_near_blocknr(p_s_sb, p_s_bh->b_blocknr, pp_s_un_bh, p_s_search_path); 
+      }
+    }
+  else {                        /* file has no unformatted nodes */
+    n_repeat |= get_new_buffer_near_blocknr(p_s_sb, p_s_bh->b_blocknr, pp_s_un_bh, p_s_search_path);
+/*     printk("in else: unf_blocknr = %d,  *pp_s_un_bh = %p\n", unf_blocknr, *pp_s_un_bh); */
+/*     print_path (0,  p_s_search_path); */
+  }
+
+  return n_repeat;
+}
+
+#endif /* NEW_GET_NEW_BUFFER */
+
+
+#ifdef OLD_GET_NEW_BUFFER
+
+/* The function is NOT SCHEDULE-SAFE! */
+int get_new_buffer(
+		   struct reiserfs_transaction_handle *th, 
+		   struct buffer_head *  p_s_bh,
+		   struct buffer_head ** pp_s_new_bh,
+		   struct path	       * p_s_path
+		   ) {
+  unsigned	long n_new_blocknumber = 0;
+  int		n_repeat;
+  struct super_block *	 p_s_sb = th->t_super;
+
+  if ( (n_repeat = reiserfs_new_unf_blocknrs (th, &n_new_blocknumber, p_s_bh->b_blocknr)) == NO_DISK_SPACE )
+    return NO_DISK_SPACE;
+  
+  *pp_s_new_bh = reiserfs_getblk(p_s_sb->s_dev, n_new_blocknumber, p_s_sb->s_blocksize);
+  if (atomic_read (&(*pp_s_new_bh)->b_count) > 1) {
+    /* Free path buffers to prevent deadlock which can occur in the
+       situation like : this process holds p_s_path; Block
+       (*pp_s_new_bh)->b_blocknr is on the path p_s_path, but it is
+       not necessary, that *pp_s_new_bh is in the tree; process 2
+       could remove it from the tree and freed block
+       (*pp_s_new_bh)->b_blocknr. Reiserfs_new_blocknrs in above
+       returns block (*pp_s_new_bh)->b_blocknr. Reiserfs_getblk gets
+       buffer for it, and it has b_count > 1. If we now will simply
+       wait in while ( (*pp_s_new_bh)->b_count > 1 ) we get into an
+       endless loop, as nobody will release this buffer and the
+       current process holds buffer twice. That is why we do
+       decrement_counters_in_path(p_s_path) before waiting until
+       b_count becomes 1. (it there were other processes holding node
+       pp_s_new_bh, then eventually we will get a moment, when all of
+       them released a buffer). */
+    decrement_counters_in_path(p_s_path);
+    wait_buffer_until_released (*pp_s_new_bh);
+    n_repeat |= SCHEDULE_OCCURRED;
+  }
+
+#ifdef CONFIG_REISERFS_CHECK
+  if ( atomic_read (&((*pp_s_new_bh)->b_count)) != 1 || buffer_dirty (*pp_s_new_bh)) {
+    reiserfs_panic(p_s_sb,"PAP-14100: get_new_buffer: not free or dirty buffer %b for the new block",
+		   *pp_s_new_bh);
+  }
+#endif
+
+  return n_repeat;
+}
+
+#endif /* OLD_GET_NEW_BUFFER */
+
+
+#ifdef GET_MANY_BLOCKNRS
+                                /* code not yet functional */
+get_next_blocknr (
+                  unsigned long *       p_blocknr_array,          /* we get a whole bunch of blocknrs all at once for
+                                                                     the write.  This is better than getting them one at
+                                                                     a time.  */
+                  unsigned long **      p_blocknr_index,        /* pointer to current offset into the array. */
+                  unsigned long        blocknr_array_length
+)
+{
+  unsigned long return_value;
+
+  if (*p_blocknr_index < p_blocknr_array + blocknr_array_length) {
+    return_value = **p_blocknr_index;
+    **p_blocknr_index = 0;
+    *p_blocknr_index++;
+    return (return_value);
+  }
+  else
+    {
+      kfree (p_blocknr_array);
+    }
+}
+#endif /* GET_MANY_BLOCKNRS */
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/dir.c linux/fs/reiserfs/dir.c
--- v2.4.0/linux/fs/reiserfs/dir.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/dir.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/stat.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+extern struct key  MIN_KEY;
+
+static int reiserfs_readdir (struct file *, void *, filldir_t);
+int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, int datasync) ;
+
+struct file_operations reiserfs_dir_operations = {
+    read:	generic_read_dir,
+    readdir:	reiserfs_readdir,
+    fsync:	reiserfs_dir_fsync,
+};
+
+/*
+ * directories can handle most operations...
+ */
+struct inode_operations reiserfs_dir_inode_operations = {
+  //&reiserfs_dir_operations,	/* default_file_ops */
+    create:	reiserfs_create,
+    lookup:	reiserfs_lookup,
+    link:	reiserfs_link,
+    unlink:	reiserfs_unlink,
+    symlink:	reiserfs_symlink,
+    mkdir:	reiserfs_mkdir,
+    rmdir:	reiserfs_rmdir,
+    mknod:	reiserfs_mknod,
+    rename:	reiserfs_rename,
+};
+
+int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry, int datasync) {
+  int ret = 0 ;
+  int windex ;
+  struct reiserfs_transaction_handle th ;
+
+  journal_begin(&th, dentry->d_inode->i_sb, 1) ;
+  windex = push_journal_writer("dir_fsync") ;
+  reiserfs_prepare_for_journal(th.t_super, SB_BUFFER_WITH_SB(th.t_super), 1) ;
+  journal_mark_dirty(&th, dentry->d_inode->i_sb, SB_BUFFER_WITH_SB (dentry->d_inode->i_sb)) ;
+  pop_journal_writer(windex) ;
+  journal_end_sync(&th, dentry->d_inode->i_sb, 1) ;
+
+  return ret ;
+}
+
+
+#define store_ih(where,what) copy_item_head (where, what)
+
+//
+static int reiserfs_readdir (struct file * filp, void * dirent, filldir_t filldir)
+{
+    struct inode *inode = filp->f_dentry->d_inode;
+    struct cpu_key pos_key;	/* key of current position in the directory (key of directory entry) */
+    INITIALIZE_PATH (path_to_entry);
+    struct buffer_head * bh;
+    int item_num, entry_num;
+    struct key * rkey;
+    struct item_head * ih, tmp_ih;
+    int search_res;
+    char * local_buf;
+    loff_t next_pos;
+    char small_buf[32] ; /* avoid kmalloc if we can */
+    struct reiserfs_dir_entry de;
+
+
+    reiserfs_check_lock_depth("readdir") ;
+
+    /* form key for search the next directory entry using f_pos field of
+       file structure */
+    make_cpu_key (&pos_key, inode, (filp->f_pos) ? (filp->f_pos) : DOT_OFFSET,
+		  TYPE_DIRENTRY, 3);
+    next_pos = cpu_key_k_offset (&pos_key);
+
+    /*  reiserfs_warning ("reiserfs_readdir 1: f_pos = %Ld\n", filp->f_pos);*/
+
+    while (1) {
+    research:
+	/* search the directory item, containing entry with specified key */
+	search_res = search_by_entry_key (inode->i_sb, &pos_key, &path_to_entry, &de);
+	if (search_res == IO_ERROR) {
+	    // FIXME: we could just skip part of directory which could
+	    // not be read
+	    return -EIO;
+	}
+	entry_num = de.de_entry_num;
+	bh = de.de_bh;
+	item_num = de.de_item_num;
+	ih = de.de_ih;
+	store_ih (&tmp_ih, ih);
+		
+#ifdef CONFIG_REISERFS_CHECK
+	/* we must have found item, that is item of this directory, */
+	if (COMP_SHORT_KEYS (&(ih->ih_key), &pos_key))
+	    reiserfs_panic (inode->i_sb, "vs-9000: reiserfs_readdir: "
+			    "found item %h does not match to dir we readdir %k",
+			    ih, &pos_key);
+      
+	if (item_num > B_NR_ITEMS (bh) - 1)
+	    reiserfs_panic (inode->i_sb, "vs-9005: reiserfs_readdir: "
+			    "item_num == %d, item amount == %d",
+			    item_num, B_NR_ITEMS (bh));
+      
+	/* and entry must be not more than number of entries in the item */
+	if (I_ENTRY_COUNT (ih) < entry_num)
+	    reiserfs_panic (inode->i_sb, "vs-9010: reiserfs_readdir: "
+			    "entry number is too big %d (%d)",
+			    entry_num, I_ENTRY_COUNT (ih));
+#endif	/* CONFIG_REISERFS_CHECK */
+
+	if (search_res == POSITION_FOUND || entry_num < I_ENTRY_COUNT (ih)) {
+	    /* go through all entries in the directory item beginning from the entry, that has been found */
+	    struct reiserfs_de_head * deh = B_I_DEH (bh, ih) + entry_num;
+
+	    for (; entry_num < I_ENTRY_COUNT (ih); entry_num ++, deh ++) {
+		int d_reclen;
+		char * d_name;
+		off_t d_off;
+		ino_t d_ino;
+
+		if (!de_visible (deh))
+		    /* it is hidden entry */
+		    continue;
+		d_reclen = entry_length (bh, ih, entry_num);
+		d_name = B_I_DEH_ENTRY_FILE_NAME (bh, ih, deh);
+		if (!d_name[d_reclen - 1])
+		    d_reclen = strlen (d_name);
+	
+		if (d_reclen > REISERFS_MAX_NAME_LEN(inode->i_sb->s_blocksize)){
+		    /* too big to send back to VFS */
+		    continue ;
+		}
+		d_off = deh_offset (deh);
+		filp->f_pos = d_off ;
+		d_ino = deh_objectid (deh);
+		if (d_reclen <= 32) {
+		  local_buf = small_buf ;
+		} else {
+		    local_buf = kmalloc(d_reclen, GFP_BUFFER) ;
+		    if (!local_buf) {
+			pathrelse (&path_to_entry);
+			return -ENOMEM ;
+		    }
+		    if (item_moved (&tmp_ih, &path_to_entry)) {
+			kfree(local_buf) ;
+			goto research;
+		    }
+		}
+		// Note, that we copy name to user space via temporary
+		// buffer (local_buf) because filldir will block if
+		// user space buffer is swapped out. At that time
+		// entry can move to somewhere else
+		memcpy (local_buf, d_name, d_reclen);
+		if (filldir (dirent, d_name, d_reclen, d_off, d_ino, 
+		             DT_UNKNOWN) < 0) {
+		    if (local_buf != small_buf) {
+			kfree(local_buf) ;
+		    }
+		    goto end;
+		}
+		if (local_buf != small_buf) {
+		    kfree(local_buf) ;
+		}
+
+		// next entry should be looked for with such offset
+		next_pos = deh_offset (deh) + 1;
+
+		if (item_moved (&tmp_ih, &path_to_entry)) {
+		    reiserfs_warning ("vs-9020: reiserfs_readdir "
+				      "things are moving under hands. Researching..\n");
+		    goto research;
+		}
+	    } /* for */
+	}
+
+	if (item_num != B_NR_ITEMS (bh) - 1)
+	    // end of directory has been reached
+	    goto end;
+
+	/* item we went through is last item of node. Using right
+	   delimiting key check is it directory end */
+	rkey = get_rkey (&path_to_entry, inode->i_sb);
+	if (! comp_le_keys (rkey, &MIN_KEY)) {
+#ifdef CONFIG_REISERFS_CHECK
+	    reiserfs_warning ("vs-9025: reiserfs_readdir:"
+			      "get_rkey failed. Researching..\n");
+#endif
+	    /* set pos_key to key, that is the smallest and greater
+	       that key of the last entry in the item */
+	    set_cpu_key_k_offset (&pos_key, next_pos);
+	    continue;
+	}
+
+	if ( COMP_SHORT_KEYS (rkey, &pos_key)) {
+	    // end of directory has been reached
+	    goto end;
+	}
+	
+	/* directory continues in the right neighboring block */
+	set_cpu_key_k_offset (&pos_key, le_key_k_offset (ITEM_VERSION_1, rkey));
+
+    } /* while */
+
+
+ end:
+    // FIXME: ext2_readdir does not reset f_pos
+    filp->f_pos = next_pos;
+    pathrelse (&path_to_entry);
+    reiserfs_check_path(&path_to_entry) ;
+    return 0;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/do_balan.c linux/fs/reiserfs/do_balan.c
--- v2.4.0/linux/fs/reiserfs/do_balan.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/do_balan.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,2043 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+/* Now we have all buffers that must be used in balancing of the tree 	*/
+/* Further calculations can not cause schedule(), and thus the buffer 	*/
+/* tree will be stable until the balancing will be finished 		*/
+/* balance the tree according to the analysis made before,		*/
+/* and using buffers obtained after all above.				*/
+
+
+/**
+ ** balance_leaf_when_delete
+ ** balance_leaf
+ ** do_balance
+ **
+ **/
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <linux/sched.h>
+#include <linux/reiserfs_fs.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+
+#ifdef CONFIG_REISERFS_CHECK
+
+struct tree_balance * cur_tb = NULL; /* detects whether more than one
+                                        copy of tb exists as a means
+                                        of checking whether schedule
+                                        is interrupting do_balance */
+#endif
+
+
+inline void do_balance_mark_leaf_dirty (struct tree_balance * tb, 
+					struct buffer_head * bh, int flag)
+{
+    if (reiserfs_dont_log(tb->tb_sb)) {
+	if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
+	    __mark_buffer_dirty(bh) ;
+	    tb->need_balance_dirty = 1;
+	}
+    } else {
+	int windex = push_journal_writer("do_balance") ;
+	journal_mark_dirty(tb->transaction_handle, tb->transaction_handle->t_super, bh) ;
+	pop_journal_writer(windex) ;
+    }
+}
+
+#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
+#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
+
+
+/* summary: 
+ if deleting something ( tb->insert_size[0] < 0 )
+   return(balance_leaf_when_delete()); (flag d handled here)
+ else
+   if lnum is larger than 0 we put items into the left node
+   if rnum is larger than 0 we put items into the right node
+   if snum1 is larger than 0 we put items into the new node s1
+   if snum2 is larger than 0 we put items into the new node s2 
+Note that all *num* count new items being created.
+
+It would be easier to read balance_leaf() if each of these summary
+lines was a separate procedure rather than being inlined.  I think
+that there are many passages here and in balance_leaf_when_delete() in
+which two calls to one procedure can replace two passages, and it
+might save cache space and improve software maintenance costs to do so.  
+
+Vladimir made the perceptive comment that we should offload most of
+the decision making in this function into fix_nodes/check_balance, and
+then create some sort of structure in tb that says what actions should
+be performed by do_balance.
+
+-Hans */
+
+
+
+/* Balance leaf node in case of delete or cut: insert_size[0] < 0
+ *
+ * lnum, rnum can have values >= -1
+ *	-1 means that the neighbor must be joined with S
+ *	 0 means that nothing should be done with the neighbor
+ *	>0 means to shift entirely or partly the specified number of items to the neighbor
+ */
+static int balance_leaf_when_delete (struct tree_balance * tb, int flag)
+{
+    struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+    int item_pos = PATH_LAST_POSITION (tb->tb_path);
+    int pos_in_item = tb->tb_path->pos_in_item;
+    struct buffer_info bi;
+    int n;
+    struct item_head * ih;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( tb->FR[0] && B_LEVEL (tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1)
+	reiserfs_panic (tb->tb_sb,
+			"vs- 12000: balance_leaf_when_delete:level: wrong FR %z\n", tb->FR[0]);
+    if ( tb->blknum[0] > 1 )
+	reiserfs_panic (tb->tb_sb,
+			"PAP-12005: balance_leaf_when_delete: "
+			"tb->blknum == %d, can not be > 1", tb->blknum[0]);
+	
+    if ( ! tb->blknum[0] && ! PATH_H_PPARENT(tb->tb_path, 0))
+	reiserfs_panic (tb->tb_sb, "PAP-12010: balance_leaf_when_delete: tree can not be empty");
+#endif
+
+    ih = B_N_PITEM_HEAD (tbS0, item_pos);
+
+    /* Delete or truncate the item */
+
+    switch (flag) {
+    case M_DELETE:   /* delete item in S[0] */
+
+#ifdef CONFIG_REISERFS_CHECK
+	if (le16_to_cpu (ih->ih_item_len) + IH_SIZE != -tb->insert_size [0])
+	    reiserfs_panic (tb->tb_sb, "vs-12013: balance_leaf_when_delete: "
+			    "mode Delete, insert size %d, ih to be deleted %h", ih);
+
+#if 0 /* rigth delim key not supported */
+	if ( ! item_pos && (! tb->L[0] || COMP_KEYS(B_PRIGHT_DELIM_KEY(tb->L[0]), B_N_PKEY(tbS0, 0))) ) {
+	    print_cur_tb ("12015");
+	    reiserfs_panic (tb->tb_sb, "PAP-12015: balance_leaf_when_delete: L0's rkey does not match to 1st key of S0: "
+			    "rkey in L %k, first key in S0 %k, rkey in CFL %k",
+			    tb->L[0] ? B_PRIGHT_DELIM_KEY(tb->L[0]) : 0, 
+			    B_N_PKEY(tbS0, 0),
+			    tb->CFL[0] ? B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]) : 0);
+	}
+#endif
+
+#endif
+
+	bi.tb = tb;
+	bi.bi_bh = tbS0;
+	bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
+	bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
+	leaf_delete_items (&bi, 0, item_pos, 1, -1);
+
+	if ( ! item_pos && tb->CFL[0] ) {
+	    if ( B_NR_ITEMS(tbS0) ) {
+		replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
+#if 0 /* right delim key support */
+		copy_key(B_PRIGHT_DELIM_KEY(tb->L[0]), B_N_PKEY(tbS0, 0));
+		reiserfs_mark_buffer_dirty (tb->L[0], 0);
+#endif
+	    }
+	    else {
+		if ( ! PATH_H_POSITION (tb->tb_path, 1) )
+		    replace_key(tb, tb->CFL[0],tb->lkey[0],PATH_H_PPARENT(tb->tb_path, 0),0);
+#if 0 /* right delim key support */
+		copy_key(B_PRIGHT_DELIM_KEY(tb->L[0]), B_PRIGHT_DELIM_KEY(tbS0));
+		reiserfs_mark_buffer_dirty (tb->L[0], 0);
+#endif
+	    }
+	} 
+
+#ifdef CONFIG_REISERFS_CHECK
+#if 0
+	if (! item_pos && (!tb->CFL[0] || !tb->L[0]))
+#endif
+	    if (! item_pos && !tb->CFL[0])
+		reiserfs_panic (tb->tb_sb, "PAP-12020: balance_leaf_when_delete: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]);
+#endif
+    
+	break;
+
+    case M_CUT: {  /* cut item in S[0] */
+	bi.tb = tb;
+	bi.bi_bh = tbS0;
+	bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
+	bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
+	if (is_direntry_le_ih (ih)) {
+
+#ifdef CONFIG_REISERFS_CHECK
+#if 0 /* right delim key support */
+	    if ( ! item_pos && ! pos_in_item && (! tb->L[0] || COMP_KEYS(B_PRIGHT_DELIM_KEY(tb->L[0]), 
+									 B_N_PKEY(tbS0, 0))) )
+		reiserfs_panic(tb->tb_sb, "PAP-12025: balance_leaf_when_delete: illegal right delimiting key");
+#endif
+#endif
+
+	    /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
+	    /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
+	    tb->insert_size[0] = -1;
+	    leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
+
+#ifdef CONFIG_REISERFS_CHECK
+	    if (! item_pos && ! pos_in_item && ! tb->CFL[0])
+		reiserfs_panic (tb->tb_sb, "PAP-12030: balance_leaf_when_delete: can not change delimiting key. CFL[0]=%p", tb->CFL[0]);
+#endif /* CONFIG_REISERFS_CHECK */
+
+	    if ( ! item_pos && ! pos_in_item && tb->CFL[0] ) {
+		replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
+#if 0/* right delim key support */
+		copy_key(B_PRIGHT_DELIM_KEY(tb->L[0]), B_N_PKEY(tbS0, 0));
+		reiserfs_mark_buffer_dirty (tb->L[0], 0);
+#endif
+	    }
+	} else {
+	    leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
+
+#ifdef CONFIG_REISERFS_CHECK
+	    if (! ih->ih_item_len)
+		reiserfs_panic (tb->tb_sb, "PAP-12035: balance_leaf_when_delete: cut must leave non-zero dynamic length of item");
+#endif /* CONFIG_REISERFS_CHECK */
+	}
+	break;
+    }
+
+    default:
+	print_cur_tb ("12040");
+	reiserfs_panic (tb->tb_sb, "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
+			(flag == M_PASTE) ? "PASTE" : ((flag == M_INSERT) ? "INSERT" : "UNKNOWN"), flag);
+    }
+
+    /* the rule is that no shifting occurs unless by shifting a node can be freed */
+    n = B_NR_ITEMS(tbS0);
+    if ( tb->lnum[0] )     /* L[0] takes part in balancing */
+    {
+	if ( tb->lnum[0] == -1 )    /* L[0] must be joined with S[0] */
+	{
+	    if ( tb->rnum[0] == -1 )    /* R[0] must be also joined with S[0] */
+	    {			
+		if ( tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0) )
+		{
+		    /* all contents of all the 3 buffers will be in L[0] */
+		    if ( PATH_H_POSITION (tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0]) )
+			replace_key(tb, tb->CFL[0],tb->lkey[0],tb->FR[0],1);
+
+		    /* update right_delimiting_key field */
+#if 0
+		    copy_key (B_PRIGHT_DELIM_KEY (tb->L[0]), B_PRIGHT_DELIM_KEY (tb->R[0]));
+#endif
+		    leaf_move_items (LEAF_FROM_S_TO_L, tb, n, -1, 0);
+		    leaf_move_items (LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, 0);
+
+#if 0/*preserve list*/
+		    preserve_invalidate(tb, tbS0, tb->L[0]); 
+		    preserve_invalidate(tb, tb->R[0], tb->L[0]);
+#endif
+		    reiserfs_invalidate_buffer (tb, tbS0);
+		    reiserfs_invalidate_buffer (tb, tb->R[0]);
+
+		    return 0;
+		}
+		/* all contents of all the 3 buffers will be in R[0] */
+		leaf_move_items (LEAF_FROM_S_TO_R, tb, n, -1, 0);
+		leaf_move_items (LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, 0);
+
+		/* right_delimiting_key is correct in R[0] */
+		replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
+
+#if 0
+		/* mark tb->R[0] as suspected recipient */
+		preserve_invalidate(tb,tbS0, tb->R[0]);
+		preserve_invalidate(tb,tb->L[0], tb->R[0]); 
+#endif
+		reiserfs_invalidate_buffer (tb, tbS0);
+		reiserfs_invalidate_buffer (tb, tb->L[0]);
+
+		return -1;
+	    }
+
+#ifdef CONFIG_REISERFS_CHECK
+	    if ( tb->rnum[0] != 0 )
+		reiserfs_panic (tb->tb_sb, "PAP-12045: balance_leaf_when_delete: "
+				"rnum must be 0 (%d)", tb->rnum[0]);
+#endif /* CONFIG_REISERFS_CHECK */
+
+	    /* all contents of L[0] and S[0] will be in L[0] */
+	    leaf_shift_left(tb, n, -1);
+
+#if 0/*preserve list*/
+	    preserve_invalidate(tb, tbS0, tb->L[0]);  /* preserved, shifting */
+#endif
+	    reiserfs_invalidate_buffer (tb, tbS0);
+
+	    return 0;
+	}
+	/* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
+
+#ifdef CONFIG_REISERFS_CHECK
+	if (( tb->lnum[0] + tb->rnum[0] < n ) || ( tb->lnum[0] + tb->rnum[0] > n+1 ))
+	    reiserfs_panic (tb->tb_sb, "PAP-12050: balance_leaf_when_delete: "
+			    "rnum(%d) and lnum(%d) and item number in S[0] are not consistent",
+			    tb->rnum[0], tb->lnum[0], n);
+
+	if (( tb->lnum[0] + tb->rnum[0] == n ) && (tb->lbytes != -1 || tb->rbytes != -1))
+	    reiserfs_panic (tb->tb_sb, "PAP-12055: balance_leaf_when_delete: "
+			    "bad rbytes (%d)/lbytes (%d) parameters when items are not split", 
+			    tb->rbytes, tb->lbytes);
+	if (( tb->lnum[0] + tb->rnum[0] == n + 1 ) && (tb->lbytes < 1 || tb->rbytes != -1))
+	    reiserfs_panic (tb->tb_sb, "PAP-12060: balance_leaf_when_delete: "
+			    "bad rbytes (%d)/lbytes (%d) parameters when items are split", 
+			    tb->rbytes, tb->lbytes);
+#endif
+
+	leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
+	leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
+
+#if 0/*preserve list*/
+	preserve_invalidate (tb, tbS0, tb->L[0]);
+	mark_suspected_recipient (tb->tb_sb, tb->R[0]);
+#endif
+	reiserfs_invalidate_buffer (tb, tbS0);
+
+	return 0;
+    }
+
+    if ( tb->rnum[0] == -1 ) {
+	/* all contents of R[0] and S[0] will be in R[0] */
+	leaf_shift_right(tb, n, -1);
+#if 0/*preserve list*/
+	preserve_invalidate(tb, tbS0, tb->R[0]); 
+#endif
+	reiserfs_invalidate_buffer (tb, tbS0);
+	return 0;
+    }
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( tb->rnum[0] )
+	reiserfs_panic (tb->tb_sb, "PAP-12065: balance_leaf_when_delete: "
+			"bad rnum parameter must be 0 (%d)", tb->rnum[0]);
+#endif
+
+    return 0;
+}
+
+
+static int balance_leaf (struct tree_balance * tb,
+			 struct item_head * ih,		/* item header of inserted item (this is on little endian) */
+			 const char * body,		/* body  of inserted item or bytes to paste */
+			 int flag,			/* i - insert, d - delete, c - cut, p - paste
+							   (see comment to do_balance) */
+			 struct item_head * insert_key,  /* in our processing of one level we sometimes determine what
+							    must be inserted into the next higher level.  This insertion
+							    consists of a key or two keys and their corresponding
+							    pointers */
+			 struct buffer_head ** insert_ptr /* inserted node-ptrs for the next level */
+    )
+{
+    struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#if 0/*preserve list*/
+    struct buffer_head * tbF0 = PATH_H_PPARENT (tb->tb_path, 0);
+    int S0_b_item_order = PATH_H_B_ITEM_ORDER (tb->tb_path, 0);
+#endif
+    int item_pos = PATH_LAST_POSITION (tb->tb_path);	/*  index into the array of item headers in S[0] 
+							    of the affected item */
+    struct buffer_info bi;
+    struct buffer_head *S_new[2];  /* new nodes allocated to hold what could not fit into S */
+    int snum[2];	    /* number of items that will be placed
+                               into S_new (includes partially shifted
+                               items) */
+    int sbytes[2];          /* if an item is partially shifted into S_new then 
+			       if it is a directory item 
+			       it is the number of entries from the item that are shifted into S_new
+			       else
+			       it is the number of bytes from the item that are shifted into S_new
+			    */
+    int n, i;
+    int ret_val;
+    int pos_in_item;
+    int zeros_num;
+
+#if 0
+    if (tb->insert_size [0] % 4) {
+	reiserfs_panic (tb->tb_sb, "balance_leaf: wrong insert_size %d", 
+			tb->insert_size [0]);
+    }
+#endif
+    /* Make balance in case insert_size[0] < 0 */
+    if ( tb->insert_size[0] < 0 )
+	return balance_leaf_when_delete (tb, flag);
+  
+    zeros_num = 0;
+    if (flag == M_INSERT && body == 0)
+	zeros_num = le16_to_cpu (ih->ih_item_len); 
+
+    pos_in_item = tb->tb_path->pos_in_item;
+    /* for indirect item pos_in_item is measured in unformatted node
+       pointers. Recalculate to bytes */
+    if (flag != M_INSERT && is_indirect_le_ih (B_N_PITEM_HEAD (tbS0, item_pos)))
+	pos_in_item *= UNFM_P_SIZE;
+
+    if ( tb->lnum[0] > 0 ) {
+	/* Shift lnum[0] items from S[0] to the left neighbor L[0] */
+	if ( item_pos < tb->lnum[0] ) {
+	    /* new item or it part falls to L[0], shift it too */
+	    n = B_NR_ITEMS(tb->L[0]);
+
+	    switch (flag) {
+	    case M_INSERT:   /* insert item into L[0] */
+
+		if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
+		    /* part of new item falls into L[0] */
+		    int new_item_len;
+		    int version;
+
+#ifdef CONFIG_REISERFS_CHECK
+		    if (!is_direct_le_ih (ih))
+			reiserfs_panic (tb->tb_sb, "PAP-12075: balance_leaf: " 
+					"only direct inserted item can be broken. %h", ih);
+#endif
+		    ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1);
+		    /* when reading the if conditions preceding the subsequent preserve_shifted
+		       lines understand that their goal is to determine if all that we are
+		       shifting is the new data being added */
+#if 0/*preserve list*/
+		    if (tb->lnum[0] - 1 > 0) {
+			preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]);
+			tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+		    }
+#endif
+
+		    /* Calculate item length to insert to S[0] */
+		    new_item_len = le16_to_cpu (ih->ih_item_len) - tb->lbytes;
+		    /* Calculate and check item length to insert to L[0] */
+		    ih->ih_item_len -= new_item_len;
+
+#ifdef CONFIG_REISERFS_CHECK
+		    if ( (int)(ih->ih_item_len) <= 0 )
+			reiserfs_panic(tb->tb_sb, "PAP-12080: balance_leaf: "
+				       "there is nothing to insert into L[0]: ih_item_len=%d",
+				       (int)ih->ih_item_len);
+#endif
+
+		    /* Insert new item into L[0] */
+		    bi.tb = tb;
+		    bi.bi_bh = tb->L[0];
+		    bi.bi_parent = tb->FL[0];
+		    bi.bi_position = get_left_neighbor_position (tb, 0);
+		    leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body,
+					  zeros_num > ih->ih_item_len ? ih->ih_item_len : zeros_num);
+
+		    version = ih_version (ih);
+
+		    /* Calculate key component, item length and body to insert into S[0] */
+		    set_le_key_k_offset (ih_version (ih), &(ih->ih_key),
+					 le_key_k_offset (ih_version (ih), &(ih->ih_key)) + tb->lbytes);
+		    ih->ih_item_len = cpu_to_le16 (new_item_len);
+		    if ( tb->lbytes >  zeros_num ) {
+			body += (tb->lbytes - zeros_num);
+			zeros_num = 0;
+		    }
+		    else
+			zeros_num -= tb->lbytes;
+
+#ifdef CONFIG_REISERFS_CHECK
+		    if ( (int)(ih->ih_item_len) <= 0 )
+			reiserfs_panic(tb->tb_sb, "PAP-12085: balance_leaf: "
+				       "there is nothing to insert into S[0]: ih_item_len=%d",
+				       (int)ih->ih_item_len);
+#endif
+		} else {
+		    /* new item in whole falls into L[0] */
+		    /* Shift lnum[0]-1 items to L[0] */
+		    ret_val = leaf_shift_left(tb, tb->lnum[0]-1, tb->lbytes);
+#if 0/*preserve list*/
+		    if (tb->lnum[0] > 1) {
+			preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]);
+			tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+		    }
+#endif
+		    /* Insert new item into L[0] */
+		    bi.tb = tb;
+		    bi.bi_bh = tb->L[0];
+		    bi.bi_parent = tb->FL[0];
+		    bi.bi_position = get_left_neighbor_position (tb, 0);
+		    leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, zeros_num);
+#if 0/*preserve list*/
+		    if (tb->preserve_mode == PRESERVE_INDIRECT_TO_DIRECT){
+			mark_suspected_recipient (tb->tb_sb, bi.bi_bh);
+		    }
+#endif
+		    tb->insert_size[0] = 0;
+		    zeros_num = 0;
+		}
+		break;
+
+	    case M_PASTE:   /* append item in L[0] */
+
+		if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
+		    /* we must shift the part of the appended item */
+		    if ( is_direntry_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) {
+
+#ifdef CONFIG_REISERFS_CHECK
+			if ( zeros_num )
+			    reiserfs_panic(tb->tb_sb, "PAP-12090: balance_leaf: illegal parameter in case of a directory");
+#endif
+            
+			/* directory item */
+			if ( tb->lbytes > pos_in_item ) {
+			    /* new directory entry falls into L[0] */
+			    struct item_head * pasted;
+			    int l_pos_in_item = pos_in_item;
+							  
+			    /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
+			    ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1);
+#if 0/*preserve list*/
+			    preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]);
+			    tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+			    if ( ret_val && ! item_pos ) {
+				pasted =  B_N_PITEM_HEAD(tb->L[0],B_NR_ITEMS(tb->L[0])-1);
+				l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes-1);
+			    }
+
+			    /* Append given directory entry to directory item */
+			    bi.tb = tb;
+			    bi.bi_bh = tb->L[0];
+			    bi.bi_parent = tb->FL[0];
+			    bi.bi_position = get_left_neighbor_position (tb, 0);
+			    leaf_paste_in_buffer (&bi, n + item_pos - ret_val, l_pos_in_item,
+						  tb->insert_size[0], body, zeros_num);
+
+			    /* previous string prepared space for pasting new entry, following string pastes this entry */
+
+			    /* when we have merge directory item, pos_in_item has been changed too */
+
+			    /* paste new directory entry. 1 is entry number */
+			    leaf_paste_entries (bi.bi_bh, n + item_pos - ret_val, l_pos_in_item, 1,
+						(struct reiserfs_de_head *)body, 
+						body + DEH_SIZE, tb->insert_size[0]
+				);
+			    tb->insert_size[0] = 0;
+			} else {
+			    /* new directory item doesn't fall into L[0] */
+			    /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
+			    leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
+#if 0/*preserve list*/
+			    preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]);
+			    tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+			}
+			/* Calculate new position to append in item body */
+			pos_in_item -= tb->lbytes;
+		    }
+		    else {
+			/* regular object */
+
+#ifdef CONFIG_REISERFS_CHECK
+			if ( tb->lbytes  <= 0 )
+			    reiserfs_panic(tb->tb_sb, "PAP-12095: balance_leaf: " 
+					   "there is nothing to shift to L[0]. lbytes=%d",
+					   tb->lbytes);
+			if ( pos_in_item != B_N_PITEM_HEAD(tbS0, item_pos)->ih_item_len )
+			    reiserfs_panic(tb->tb_sb, "PAP-12100: balance_leaf: " 
+					   "incorrect position to paste: item_len=%d, pos_in_item=%d",
+					   B_N_PITEM_HEAD(tbS0,item_pos)->ih_item_len, pos_in_item);
+#endif
+
+			if ( tb->lbytes >= pos_in_item ) {
+			    /* appended item will be in L[0] in whole */
+			    int l_n;
+
+			    /* this bytes number must be appended to the last item of L[h] */
+			    l_n = tb->lbytes - pos_in_item;
+
+			    /* Calculate new insert_size[0] */
+			    tb->insert_size[0] -= l_n;
+
+#ifdef CONFIG_REISERFS_CHECK
+			    if ( tb->insert_size[0] <= 0 )
+				reiserfs_panic(tb->tb_sb, "PAP-12105: balance_leaf: " 
+					       "there is nothing to paste into L[0]. insert_size=%d",
+					       tb->insert_size[0]);
+#endif
+
+			    ret_val =  leaf_shift_left(tb,tb->lnum[0], 
+						       B_N_PITEM_HEAD(tbS0,item_pos)->ih_item_len);
+#if 0/*preserve list*/
+			    preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]);
+			    tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+			    /* Append to body of item in L[0] */
+			    bi.tb = tb;
+			    bi.bi_bh = tb->L[0];
+			    bi.bi_parent = tb->FL[0];
+			    bi.bi_position = get_left_neighbor_position (tb, 0);
+			    leaf_paste_in_buffer(
+				&bi,n + item_pos - ret_val,
+				B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)->ih_item_len,
+				l_n,body, zeros_num > l_n ? l_n : zeros_num
+				);
+
+#ifdef CONFIG_REISERFS_CHECK
+			    if (l_n && is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0],
+									n + item_pos - ret_val)))
+				reiserfs_panic(tb->tb_sb, "PAP-12110: balance_leaf: "
+					       "pasting more than 1 unformatted node pointer into indirect item");
+#endif
+
+			    /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/
+			    {
+			      int version;
+
+			      version = le16_to_cpu (B_N_PITEM_HEAD (tbS0, 0)->ih_version);
+			      set_le_key_k_offset (version, B_N_PKEY (tbS0, 0), 
+						   le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + l_n);
+			      set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
+						   le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + l_n);
+			    }
+#if 0
+			    set_le_key_k_offset (B_PRIGHT_DELIM_KEY(tb->L[0]), le_key_k_offset (B_PRIGHT_DELIM_KEY(tb->L[0])) + l_n);
+#endif
+			    /*    k_offset (B_N_PKEY (tbS0, 0)) += l_n;
+				  k_offset (B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) += l_n;
+				  k_offset (B_PRIGHT_DELIM_KEY(tb->L[0])) += l_n;*/
+
+#ifdef NO_CONFIG_REISERFS_CHECK /* journal victim */
+			    if (!buffer_dirty (tbS0) || !buffer_dirty (tb->CFL[0]) || !buffer_dirty (tb->L[0]))
+				reiserfs_panic(tb->tb_sb, "PAP-12115: balance_leaf: L, CLF and S must be dirty already");
+#endif
+
+			    /* Calculate new body, position in item and insert_size[0] */
+			    if ( l_n > zeros_num ) {
+				body += (l_n - zeros_num);
+				zeros_num = 0;
+			    }
+			    else
+				zeros_num -= l_n;
+			    pos_in_item = 0;	
+
+#ifdef CONFIG_REISERFS_CHECK	
+			    if (comp_short_le_keys (B_N_PKEY(tbS0,0),
+						    B_N_PKEY(tb->L[0],B_NR_ITEMS(tb->L[0])-1)) ||
+				!op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) ||
+				!op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), tbS0->b_size))
+				reiserfs_panic (tb->tb_sb, "PAP-12120: balance_leaf: "
+						"item must be merge-able with left neighboring item");
+#endif
+
+			}
+			else /* only part of the appended item will be in L[0] */
+			{
+			    /* Calculate position in item for append in S[0] */
+			    pos_in_item -= tb->lbytes;
+
+#ifdef CONFIG_REISERFS_CHECK
+			    if ( pos_in_item <= 0 )
+				reiserfs_panic(tb->tb_sb, "PAP-12125: balance_leaf: "
+					       "no place for paste. pos_in_item=%d", pos_in_item);
+#endif
+
+			    /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
+			    leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
+#if 0/*preserve list*/
+			    preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]);
+			    tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+			}
+		    }
+		}
+		else /* appended item will be in L[0] in whole */
+		{
+		    struct item_head * pasted;
+
+#ifdef REISERFS_FSCK
+		    if ( ! item_pos  && is_left_mergeable (tb->tb_sb, tb->tb_path) == 1 )
+#else
+			if ( ! item_pos  && op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) )
+#endif
+			{ /* if we paste into first item of S[0] and it is left mergable */
+			    /* then increment pos_in_item by the size of the last item in L[0] */
+			    pasted = B_N_PITEM_HEAD(tb->L[0],n-1);
+			    if ( is_direntry_le_ih (pasted) )
+				pos_in_item += le16_to_cpu (pasted->u.ih_entry_count);
+			    else
+				pos_in_item += le16_to_cpu (pasted->ih_item_len);
+			}
+
+		    /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
+		    ret_val = leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
+#if 0/*preserve list*/
+		    preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]);
+		    tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+		    /* Append to body of item in L[0] */
+		    bi.tb = tb;
+		    bi.bi_bh = tb->L[0];
+		    bi.bi_parent = tb->FL[0];
+		    bi.bi_position = get_left_neighbor_position (tb, 0);
+		    leaf_paste_in_buffer (&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0],
+					  body, zeros_num);
+
+		    /* if appended item is directory, paste entry */
+		    pasted = B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val);
+		    if (is_direntry_le_ih (pasted))
+			leaf_paste_entries (
+			    bi.bi_bh, n + item_pos - ret_val, pos_in_item, 1, 
+			    (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
+			    );
+		    /* if appended item is indirect item, put unformatted node into un list */
+		    if (is_indirect_le_ih (pasted))
+			set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace);
+		    tb->insert_size[0] = 0;
+		    zeros_num = 0;
+		}
+		break;
+	    default:    /* cases d and t */
+		reiserfs_panic (tb->tb_sb, "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
+				(flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
+	    }
+	} else { 
+	    /* new item doesn't fall into L[0] */
+	    leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
+#if 0/*preserve list*/
+	    preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->L[0]);
+	    tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+	}
+    }	/* tb->lnum[0] > 0 */
+
+    /* Calculate new item position */
+    item_pos -= ( tb->lnum[0] - (( tb->lbytes != -1 ) ? 1 : 0));
+
+    if ( tb->rnum[0] > 0 ) {
+	/* shift rnum[0] items from S[0] to the right neighbor R[0] */
+	n = B_NR_ITEMS(tbS0);
+	switch ( flag ) {
+
+	case M_INSERT:   /* insert item */
+	    if ( n - tb->rnum[0] < item_pos )
+	    { /* new item or its part falls to R[0] */
+		if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 )
+		{ /* part of new item falls into R[0] */
+		    int old_key_comp, old_len, r_zeros_number;
+		    const char * r_body;
+		    int version;
+		    loff_t offset;
+
+#ifdef CONFIG_REISERFS_CHECK
+		    if ( !is_direct_le_ih (ih) )
+			reiserfs_panic(tb->tb_sb, "PAP-12135: balance_leaf: "
+				       "only direct item can be split. (%h)", ih);
+#endif
+
+		    leaf_shift_right(tb,tb->rnum[0]-1,-1);
+#if 0/*preserve list*/
+		    if (tb->rnum[0]>1) {
+			preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]);
+			tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+		    }
+#endif
+
+		    version = le16_to_cpu (ih->ih_version);
+		    /* Remember key component and item length */
+		    old_key_comp = le_key_k_offset (version, &(ih->ih_key));
+		    old_len = le16_to_cpu (ih->ih_item_len);
+
+		    /* Calculate key component and item length to insert into R[0] */
+		    offset = le_key_k_offset (version, &(ih->ih_key)) + (old_len - tb->rbytes);
+		    set_le_key_k_offset (version, &(ih->ih_key), offset);
+		    ih->ih_item_len = cpu_to_le16 (tb->rbytes);
+		    /* Insert part of the item into R[0] */
+		    bi.tb = tb;
+		    bi.bi_bh = tb->R[0];
+		    bi.bi_parent = tb->FR[0];
+		    bi.bi_position = get_right_neighbor_position (tb, 0);
+		    if ( offset - old_key_comp > zeros_num ) {
+			r_zeros_number = 0;
+			r_body = body + offset - old_key_comp - zeros_num;
+		    }
+		    else {
+			r_body = body;
+			r_zeros_number = zeros_num - (offset - old_key_comp);
+			zeros_num -= r_zeros_number;
+		    }
+
+		    leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
+
+		    /* Replace right delimiting key by first key in R[0] */
+		    replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
+
+		    /* Calculate key component and item length to insert into S[0] */
+		    set_le_key_k_offset (version, &(ih->ih_key), old_key_comp);
+		    ih->ih_item_len = cpu_to_le16 (old_len - tb->rbytes);
+
+		    tb->insert_size[0] -= tb->rbytes;
+
+		}
+		else /* whole new item falls into R[0] */
+		{					  
+		    /* Shift rnum[0]-1 items to R[0] */
+		    ret_val = leaf_shift_right(tb,tb->rnum[0]-1,tb->rbytes);
+#if 0/*preserve list*/
+		    if (tb->rnum[0]>1) {
+			preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]);
+			tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+		    }
+#endif
+		    /* Insert new item into R[0] */
+		    bi.tb = tb;
+		    bi.bi_bh = tb->R[0];
+		    bi.bi_parent = tb->FR[0];
+		    bi.bi_position = get_right_neighbor_position (tb, 0);
+		    leaf_insert_into_buf (&bi, item_pos - n + tb->rnum[0] - 1, ih, body, zeros_num);
+#if 0/*preserve list*/
+		    if (tb->preserve_mode == PRESERVE_INDIRECT_TO_DIRECT){
+			mark_suspected_recipient (tb->tb_sb, bi.bi_bh);
+		    }
+#endif
+
+		    /* If we insert new item in the begin of R[0] change the right delimiting key */
+		    if ( item_pos - n + tb->rnum[0] - 1 == 0 ) {
+			replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
+
+#if 0
+			/* update right delimiting key */
+			copy_key(B_PRIGHT_DELIM_KEY(tbS0), &(ih->ih_key));
+			reiserfs_mark_buffer_dirty (tbS0, 0);
+#endif
+		    }
+		    zeros_num = tb->insert_size[0] = 0;
+		}
+	    }
+	    else /* new item or part of it doesn't fall into R[0] */
+	    {
+		leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
+#if 0/*preserve list*/
+		preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]);
+		tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+	    }
+	    break;
+
+	case M_PASTE:   /* append item */
+
+	    if ( n - tb->rnum[0] <= item_pos )  /* pasted item or part of it falls to R[0] */
+	    {
+		if ( item_pos == n - tb->rnum[0] && tb->rbytes != -1 )
+		{ /* we must shift the part of the appended item */
+		    if ( is_direntry_le_ih (B_N_PITEM_HEAD(tbS0, item_pos)))
+		    { /* we append to directory item */
+			int entry_count;
+
+#ifdef CONFIG_REISERFS_CHECK
+			if ( zeros_num )
+			    reiserfs_panic(tb->tb_sb, "PAP-12145: balance_leaf: illegal parametr in case of a directory");
+#endif
+
+			entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD(tbS0, item_pos));
+			if ( entry_count - tb->rbytes < pos_in_item )
+			    /* new directory entry falls into R[0] */
+			{
+			    int paste_entry_position;
+
+#ifdef CONFIG_REISERFS_CHECK
+			    if ( tb->rbytes - 1 >= entry_count || ! tb->insert_size[0] )
+				reiserfs_panic(tb->tb_sb, "PAP-12150: balance_leaf: "
+					       "no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
+					       tb->rbytes, entry_count);
+#endif
+
+			    /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
+			    leaf_shift_right(tb,tb->rnum[0],tb->rbytes - 1);
+#if 0/*preserve list*/
+			    /* if we are shifting more than just the new entry */
+			    if (tb->rbytes > 1 || tb->rnum[0] > 1) {
+				preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]);
+				tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+			    }
+#endif
+			    /* Paste given directory entry to directory item */
+			    paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
+			    bi.tb = tb;
+			    bi.bi_bh = tb->R[0];
+			    bi.bi_parent = tb->FR[0];
+			    bi.bi_position = get_right_neighbor_position (tb, 0);
+			    leaf_paste_in_buffer (&bi, 0, paste_entry_position,
+						  tb->insert_size[0],body,zeros_num);
+			    /* paste entry */
+			    leaf_paste_entries (
+				bi.bi_bh, 0, paste_entry_position, 1, (struct reiserfs_de_head *)body, 
+				body + DEH_SIZE, tb->insert_size[0]
+				);								
+						
+			    if ( paste_entry_position == 0 ) {
+				/* change delimiting keys */
+				replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
+#if 0
+				copy_key(B_PRIGHT_DELIM_KEY(tbS0), B_N_PKEY(tb->R[0], 0));
+				reiserfs_mark_buffer_dirty (tbS0, 0);
+#endif
+			    }
+
+			    tb->insert_size[0] = 0;
+			    pos_in_item++;
+			}
+			else /* new directory entry doesn't fall into R[0] */
+			{
+			    leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
+#if 0/*preserve list*/
+			    preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]);
+			    tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+			}
+		    }
+		    else /* regular object */
+		    {
+			int n_shift, n_rem, r_zeros_number;
+			const char * r_body;
+
+			/* Calculate number of bytes which must be shifted from appended item */
+			if ( (n_shift = tb->rbytes - tb->insert_size[0]) < 0 )
+			    n_shift = 0;
+
+#ifdef CONFIG_REISERFS_CHECK
+			if (pos_in_item != B_N_PITEM_HEAD (tbS0, item_pos)->ih_item_len)
+			    reiserfs_panic(tb->tb_sb,"PAP-12155: balance_leaf: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
+					   pos_in_item, B_N_PITEM_HEAD(tbS0,item_pos)->ih_item_len);
+#endif
+
+			leaf_shift_right(tb,tb->rnum[0],n_shift);
+#if 0/*preserve list*/
+			/* if we are shifting an old part from the appended item or more than the appended item is going into R */
+			if (n_shift || tb->rnum[0] > 1) {
+			    preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]);
+			    tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+			}
+#endif
+			/* Calculate number of bytes which must remain in body after appending to R[0] */
+			if ( (n_rem = tb->insert_size[0] - tb->rbytes) < 0 )
+			    n_rem = 0;
+			
+			{
+			  int version;
+			  
+			  version = ih_version (B_N_PITEM_HEAD (tb->R[0],0));
+			  set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0), 
+					       le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + n_rem);
+			  set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]), 
+					       le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + n_rem);
+			}
+/*		  k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
+		  k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
+			do_balance_mark_internal_dirty (tb, tb->CFR[0], 0);
+
+#if 0
+			set_le_key_k_offset (B_PRIGHT_DELIM_KEY(tbS0), le_key_k_offset (B_PRIGHT_DELIM_KEY(tbS0)) + n_rem);
+/*		  k_offset (B_PRIGHT_DELIM_KEY(tbS0)) += n_rem;*/
+			reiserfs_mark_buffer_dirty (tbS0, 0);
+#endif
+			/* Append part of body into R[0] */
+			bi.tb = tb;
+			bi.bi_bh = tb->R[0];
+			bi.bi_parent = tb->FR[0];
+			bi.bi_position = get_right_neighbor_position (tb, 0);
+			if ( n_rem > zeros_num ) {
+			    r_zeros_number = 0;
+			    r_body = body + n_rem - zeros_num;
+			}
+			else {
+			    r_body = body;
+			    r_zeros_number = zeros_num - n_rem;
+			    zeros_num -= r_zeros_number;
+			}
+
+			leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number);
+
+			if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) {
+
+#ifdef CONFIG_REISERFS_CHECK
+			    if (n_rem)
+				reiserfs_panic(tb->tb_sb, "PAP-12160: balance_leaf: paste more than one unformatted node pointer");
+#endif
+
+			    set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), ((struct unfm_nodeinfo*)body)->unfm_freespace);
+			}
+
+			tb->insert_size[0] = n_rem;
+			if ( ! n_rem )
+			    pos_in_item ++;
+		    }
+		}
+		else /* pasted item in whole falls into R[0] */
+		{
+		    struct item_head * pasted;
+
+		    ret_val = leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
+#if 0/*preserve list*/
+		    preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]);
+		    tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+		    /* append item in R[0] */
+		    if ( pos_in_item >= 0 ) {
+			bi.tb = tb;
+			bi.bi_bh = tb->R[0];
+			bi.bi_parent = tb->FR[0];
+			bi.bi_position = get_right_neighbor_position (tb, 0);
+			leaf_paste_in_buffer(&bi,item_pos - n + tb->rnum[0], pos_in_item,
+					     tb->insert_size[0],body, zeros_num);
+		    }
+
+		    /* paste new entry, if item is directory item */
+		    pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
+		    if (is_direntry_le_ih (pasted) && pos_in_item >= 0 ) {
+			leaf_paste_entries (
+			    bi.bi_bh, item_pos - n + tb->rnum[0], pos_in_item, 1, 
+			    (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
+			    );
+			if ( ! pos_in_item ) {
+
+#ifdef CONFIG_REISERFS_CHECK
+			    if ( item_pos - n + tb->rnum[0] )
+				reiserfs_panic (tb->tb_sb, "PAP-12165: balance_leaf: " 
+						"directory item must be first item of node when pasting is in 0th position");
+#endif
+
+			    /* update delimiting keys */
+			    replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
+#if 0
+			    copy_key(B_PRIGHT_DELIM_KEY(tbS0),B_N_PKEY(tb->R[0], 0));
+			    reiserfs_mark_buffer_dirty (tbS0, 0);
+#endif
+			}
+		    }
+
+		    if (is_indirect_le_ih (pasted))
+			set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace);
+		    zeros_num = tb->insert_size[0] = 0;
+		}
+	    }
+	    else /* new item doesn't fall into R[0] */
+	    {
+		leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
+#if 0/*preserve list*/
+		preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, tb->R[0]);
+		tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+	    }
+	    break;
+	default:    /* cases d and t */
+	    reiserfs_panic (tb->tb_sb, "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
+			    (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
+	}
+    
+    }	/* tb->rnum[0] > 0 */
+
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( tb->blknum[0] > 3 )  
+	reiserfs_panic (tb->tb_sb, "PAP-12180: balance_leaf: blknum can not be %d. It must be <= 3",  tb->blknum[0]);
+
+    if ( tb->blknum[0] < 0 )  
+	reiserfs_panic (tb->tb_sb, "PAP-12185: balance_leaf: blknum can not be %d. It must be >= 0",  tb->blknum[0]);
+#endif
+
+    /* if while adding to a node we discover that it is possible to split
+       it in two, and merge the left part into the left neighbor and the
+       right part into the right neighbor, eliminating the node */
+    if ( tb->blknum[0] == 0 ) { /* node S[0] is empty now */
+
+#ifdef CONFIG_REISERFS_CHECK
+	if ( ! tb->lnum[0] || ! tb->rnum[0] )
+	    reiserfs_panic(tb->tb_sb, "PAP-12190: balance_leaf: lnum and rnum must not be zero");
+#if 0
+	if (COMP_KEYS (B_N_PKEY(tb->R[0], 0), B_PRIGHT_DELIM_KEY(tbS0)))
+	    reiserfs_panic (tb->tb_sb, "vs-12192: balance_leaf: S[0] is being removed from the tree, it has incorrect right delimiting key");
+#endif
+#endif
+
+#if 0
+	/* if insertion was done before 0-th position in R[0], right
+	   delimiting key of the tb->L[0]'s and left delimiting key are
+	   not set correctly */
+	if (tb->L[0]) {
+	    copy_key(B_PRIGHT_DELIM_KEY(tb->L[0]), B_PRIGHT_DELIM_KEY(tbS0));
+	    reiserfs_mark_buffer_dirty (tb->L[0], 0);
+	}
+
+	if (tb->CFL[0]) {
+	    copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_PRIGHT_DELIM_KEY(tbS0));
+	    reiserfs_mark_buffer_dirty (tb->CFL[0], 0);
+	}
+#endif
+    
+	/* if insertion was done before 0-th position in R[0], right
+	   delimiting key of the tb->L[0]'s and left delimiting key are
+	   not set correctly */
+	if (tb->CFL[0]) {
+	    if (!tb->CFR[0])
+		reiserfs_panic (tb->tb_sb, "vs-12195: balance_leaf: CFR not initialized");
+	    copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]));
+	    do_balance_mark_internal_dirty (tb, tb->CFL[0], 0);
+	}
+
+	reiserfs_invalidate_buffer(tb,tbS0);									
+	return 0;
+    }
+
+
+    /* Fill new nodes that appear in place of S[0] */
+
+    /* I am told that this copying is because we need an array to enable
+       the looping code. -Hans */
+    snum[0] = tb->s1num,
+	snum[1] = tb->s2num;
+    sbytes[0] = tb->s1bytes;
+    sbytes[1] = tb->s2bytes;
+    for( i = tb->blknum[0] - 2; i >= 0; i-- ) {
+
+#ifdef CONFIG_REISERFS_CHECK
+	if (!snum[i])
+	    reiserfs_panic(tb->tb_sb,"PAP-12200: balance_leaf: snum[%d] == %d. Must be > 0", i, snum[i]);
+#endif /* CONFIG_REISERFS_CHECK */
+
+	/* here we shift from S to S_new nodes */
+
+	S_new[i] = get_FEB(tb);
+
+	/* initialized block type and tree level */
+	B_BLK_HEAD(S_new[i])->blk_level = cpu_to_le16 (DISK_LEAF_NODE_LEVEL);
+
+
+	n = B_NR_ITEMS(tbS0);
+	
+	switch (flag) {
+	case M_INSERT:   /* insert item */
+
+	    if ( n - snum[i] < item_pos )
+	    { /* new item or it's part falls to first new node S_new[i]*/
+		if ( item_pos == n - snum[i] + 1 && sbytes[i] != -1 )
+		{ /* part of new item falls into S_new[i] */
+		    int old_key_comp, old_len, r_zeros_number;
+		    const char * r_body;
+		    int version;
+
+#ifdef CONFIG_REISERFS_CHECK
+		    if ( !is_direct_le_ih(ih) )
+			/* The items which can be inserted are:
+			   Stat_data item, direct item, indirect item and directory item which consist of only two entries "." and "..".
+			   These items must not be broken except for a direct one. */
+			reiserfs_panic(tb->tb_sb, "PAP-12205: balance_leaf: "
+				       "non-direct item can not be broken when inserting");
+#endif
+
+		    /* Move snum[i]-1 items from S[0] to S_new[i] */
+		    leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]);
+#if 0/*preserve list*/
+		    if (snum[i] > 1 ) {
+			preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, S_new[i]);
+			tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+		    }
+#endif
+		    /* Remember key component and item length */
+		    version = ih_version (ih);
+		    old_key_comp = le_key_k_offset (version, &(ih->ih_key));
+		    old_len = le16_to_cpu (ih->ih_item_len);
+
+		    /* Calculate key component and item length to insert into S_new[i] */
+		    set_le_key_k_offset (version, &(ih->ih_key), 
+					 le_key_k_offset (version, &(ih->ih_key)) + (old_len - sbytes[i]));
+
+		    ih->ih_item_len = cpu_to_le16 (sbytes[i]);
+
+		    /* Insert part of the item into S_new[i] before 0-th item */
+		    bi.tb = tb;
+		    bi.bi_bh = S_new[i];
+		    bi.bi_parent = 0;
+		    bi.bi_position = 0;
+
+		    if ( le_key_k_offset (version, &(ih->ih_key)) - old_key_comp > zeros_num ) {
+			r_zeros_number = 0;
+			r_body = body + (le_key_k_offset (version, &(ih->ih_key)) - old_key_comp) - zeros_num;
+		    }
+		    else {
+			r_body = body;
+			r_zeros_number = zeros_num - (le_key_k_offset (version, &(ih->ih_key)) - old_key_comp);
+			zeros_num -= r_zeros_number;
+		    }
+
+		    leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
+
+		    /* Calculate key component and item length to insert into S[i] */
+		    set_le_key_k_offset (version, &(ih->ih_key), old_key_comp);
+		    ih->ih_item_len = cpu_to_le16 (old_len - sbytes[i]);
+		    tb->insert_size[0] -= sbytes[i];
+		}
+		else /* whole new item falls into S_new[i] */
+		{
+		    /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
+		    leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, sbytes[i], S_new[i]);
+
+		    /* Insert new item into S_new[i] */
+		    bi.tb = tb;
+		    bi.bi_bh = S_new[i];
+		    bi.bi_parent = 0;
+		    bi.bi_position = 0;
+		    leaf_insert_into_buf (&bi, item_pos - n + snum[i] - 1, ih, body, zeros_num);
+#if 0/*preserve list*/
+		    if (tb->preserve_mode == PRESERVE_INDIRECT_TO_DIRECT){
+			mark_suspected_recipient (tb->tb_sb, bi.bi_bh);
+		    }
+#endif
+
+		    zeros_num = tb->insert_size[0] = 0;
+		}
+	    }
+
+	    else /* new item or it part don't falls into S_new[i] */
+	    {
+		leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
+#if 0/*preserve list*/
+		preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, S_new[i]);
+		tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+	    }
+	    break;
+
+	case M_PASTE:   /* append item */
+
+	    if ( n - snum[i] <= item_pos )  /* pasted item or part if it falls to S_new[i] */
+	    {
+		if ( item_pos == n - snum[i] && sbytes[i] != -1 )
+		{ /* we must shift part of the appended item */
+		    struct item_head * aux_ih;
+
+#ifdef CONFIG_REISERFS_CHECK
+		    if ( ih )
+			reiserfs_panic (tb->tb_sb, "PAP-12210: balance_leaf: ih must be 0");
+#endif /* CONFIG_REISERFS_CHECK */
+
+		    if ( is_direntry_le_ih (aux_ih = B_N_PITEM_HEAD(tbS0,item_pos))) {
+			/* we append to directory item */
+
+			int entry_count;
+		
+			entry_count = le16_to_cpu (aux_ih->u.ih_entry_count);
+
+			if ( entry_count - sbytes[i] < pos_in_item  && pos_in_item <= entry_count ) {
+			    /* new directory entry falls into S_new[i] */
+		  
+#ifdef CONFIG_REISERFS_CHECK
+			    if ( ! tb->insert_size[0] )
+				reiserfs_panic (tb->tb_sb, "PAP-12215: balance_leaif: insert_size is already 0");
+			    if ( sbytes[i] - 1 >= entry_count )
+				reiserfs_panic (tb->tb_sb, "PAP-12220: balance_leaf: "
+						"there are no so much entries (%d), only %d",
+						sbytes[i] - 1, entry_count);
+#endif
+
+			    /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
+			    leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i]-1, S_new[i]);
+#if 0/*preserve list*/
+			    /* if more than the affected item is shifted, or if more than
+			       one entry (from the affected item) is shifted */
+			    if (snum[i] > 1 || sbytes[i] > 1) {
+				preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, S_new[i]);
+				tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+			    }
+#endif
+			    /* Paste given directory entry to directory item */
+			    bi.tb = tb;
+			    bi.bi_bh = S_new[i];
+			    bi.bi_parent = 0;
+			    bi.bi_position = 0;
+			    leaf_paste_in_buffer (&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
+						  tb->insert_size[0], body,zeros_num);
+			    /* paste new directory entry */
+			    leaf_paste_entries (
+				bi.bi_bh, 0, pos_in_item - entry_count + sbytes[i] - 1,
+				1, (struct reiserfs_de_head *)body, body + DEH_SIZE,
+				tb->insert_size[0]
+				);
+			    tb->insert_size[0] = 0;
+			    pos_in_item++;
+			} else { /* new directory entry doesn't fall into S_new[i] */
+			    leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
+			}
+		    }
+		    else /* regular object */
+		    {
+			int n_shift, n_rem, r_zeros_number;
+			const char * r_body;
+
+#ifdef CONFIG_REISERFS_CHECK
+			if ( pos_in_item != B_N_PITEM_HEAD(tbS0,item_pos)->ih_item_len ||
+			     tb->insert_size[0] <= 0 )
+			    reiserfs_panic (tb->tb_sb, "PAP-12225: balance_leaf: item too short or insert_size <= 0");
+#endif
+
+			/* Calculate number of bytes which must be shifted from appended item */
+			n_shift = sbytes[i] - tb->insert_size[0];
+			if ( n_shift < 0 )
+			    n_shift = 0;
+			leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
+
+			/* Calculate number of bytes which must remain in body after append to S_new[i] */
+			n_rem = tb->insert_size[0] - sbytes[i];
+			if ( n_rem < 0 )
+			    n_rem = 0;
+			/* Append part of body into S_new[0] */
+			bi.tb = tb;
+			bi.bi_bh = S_new[i];
+			bi.bi_parent = 0;
+			bi.bi_position = 0;
+
+			if ( n_rem > zeros_num ) {
+			    r_zeros_number = 0;
+			    r_body = body + n_rem - zeros_num;
+			}
+			else {
+			    r_body = body;
+			    r_zeros_number = zeros_num - n_rem;
+			    zeros_num -= r_zeros_number;
+			}
+
+			leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0]-n_rem, r_body,r_zeros_number);
+			{
+			    struct item_head * tmp;
+
+			    tmp = B_N_PITEM_HEAD(S_new[i],0);
+			    if (is_indirect_le_ih (tmp)) {
+				if (n_rem)
+				    reiserfs_panic (tb->tb_sb, "PAP-12230: balance_leaf: invalid action with indirect item");
+				set_ih_free_space (tmp, ((struct unfm_nodeinfo*)body)->unfm_freespace);
+			    }
+			    set_le_key_k_offset (ih_version (tmp), &tmp->ih_key, 
+						 le_key_k_offset (ih_version (tmp), &tmp->ih_key) + n_rem);
+			}
+
+			tb->insert_size[0] = n_rem;
+			if ( ! n_rem )
+			    pos_in_item++;
+		    }
+		}
+		else
+		    /* item falls wholly into S_new[i] */
+		{
+		    int ret_val;
+		    struct item_head * pasted;
+
+#ifdef CONFIG_REISERFS_CHECK
+		    struct item_head * ih = B_N_PITEM_HEAD(tbS0,item_pos);
+
+		    if ( ! is_direntry_le_ih(ih) && (pos_in_item != ih->ih_item_len ||
+						     tb->insert_size[0] <= 0) )
+			reiserfs_panic (tb->tb_sb, "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
+#endif /* CONFIG_REISERFS_CHECK */
+
+		    ret_val = leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
+#if 0/*preserve list*/
+		    /* we must preserve that which we are pasting onto the end of and shifting */
+		    preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, S_new[i]);
+		    tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+
+#ifdef CONFIG_REISERFS_CHECK
+		    if ( ret_val )
+			reiserfs_panic (tb->tb_sb, "PAP-12240: balance_leaf: "
+					"unexpected value returned by leaf_move_items (%d)",
+					ret_val);
+#endif /* CONFIG_REISERFS_CHECK */
+
+		    /* paste into item */
+		    bi.tb = tb;
+		    bi.bi_bh = S_new[i];
+		    bi.bi_parent = 0;
+		    bi.bi_position = 0;
+		    leaf_paste_in_buffer(&bi, item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num);
+
+		    pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
+		    if (is_direntry_le_ih (pasted))
+		    {
+			leaf_paste_entries (
+			    bi.bi_bh, item_pos - n + snum[i], pos_in_item, 1, 
+			    (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
+			    );
+		    }
+
+		    /* if we paste to indirect item update ih_free_space */
+		    if (is_indirect_le_ih (pasted))
+			set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace);
+		    zeros_num = tb->insert_size[0] = 0;
+		}
+	    }
+
+	    else /* pasted item doesn't fall into S_new[i] */
+	    {
+		leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
+#if 0/*preserve list*/
+		preserve_shifted(tb, &(PATH_PLAST_BUFFER (tb->tb_path)), tbF0, S0_b_item_order, S_new[i]);
+		tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
+#endif
+	    }
+	    break;
+	default:    /* cases d and t */
+	    reiserfs_panic (tb->tb_sb, "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
+			    (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
+	}
+
+	memcpy (insert_key + i,B_N_PKEY(S_new[i],0),KEY_SIZE);
+	insert_ptr[i] = S_new[i];
+
+#ifdef CONFIG_REISERFS_CHECK
+	if (atomic_read (&(S_new[i]->b_count)) != 1) {
+	    if (atomic_read(&(S_new[i]->b_count)) != 2 ||
+	      !(buffer_journaled(S_new[i]) || buffer_journal_dirty(S_new[i]))) {
+	      reiserfs_panic (tb->tb_sb, "PAP-12247: balance_leaf: S_new[%d] : (%b)\n", i, S_new[i]);
+	    }
+	}
+#endif
+
+#if 0
+	/* update right_delimiting_key fields */
+	copy_key (B_PRIGHT_DELIM_KEY (S_new[i]), B_PRIGHT_DELIM_KEY (tbS0));
+	copy_key (B_PRIGHT_DELIM_KEY (tbS0), B_N_PKEY (S_new[i], 0));
+	reiserfs_mark_buffer_dirty (tbS0, 0);
+#endif
+
+    }
+
+    /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
+       affected item which remains in S */
+    if ( 0 <= item_pos && item_pos < tb->s0num )
+    { /* if we must insert or append into buffer S[0] */
+
+	switch (flag)
+	{
+	case M_INSERT:   /* insert item into S[0] */
+	    bi.tb = tb;
+	    bi.bi_bh = tbS0;
+	    bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
+	    bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
+	    leaf_insert_into_buf (&bi, item_pos, ih, body, zeros_num);
+#if 0/*preserve list*/
+	    if (tb->preserve_mode == PRESERVE_INDIRECT_TO_DIRECT){
+		mark_suspected_recipient (tb->tb_sb, bi.bi_bh);
+	    }
+#endif
+
+	    /* If we insert the first key change the delimiting key */
+	    if( item_pos == 0 ) {
+		if (tb->CFL[0]) /* can be 0 in reiserfsck */
+		    replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
+
+#if 0 /* right delim key support */
+#ifdef CONFIG_REISERFS_CHECK
+		if ( ! tb->CFL[0] || ! tb->L[0] || (B_NR_ITEMS (tbS0) > 1 && 
+						    COMP_KEYS(B_PRIGHT_DELIM_KEY(tb->L[0]), B_N_PKEY(tbS0, 1))) )
+		    reiserfs_panic(tb->tb_sb, "PAP-12250: balance_leaf: invalid right delimiting key");
+		if (!buffer_dirty (tb->L[0]) && !(buffer_journaled(tb->L[0]) ||
+		     buffer_journal_dirty(tb->L[0])))
+		    reiserfs_panic (tb->tb_sb, "PAP-12255: balance_leaf: tb->L[0] must be dirty");
+#endif
+		if (tb->L[0]) /* can be 0 in reiserfsck */
+		    copy_key (B_PRIGHT_DELIM_KEY (tb->L[0]), &(ih->ih_key));   
+#endif /* right delim key support */
+	    }
+	    break;
+
+	case M_PASTE: {  /* append item in S[0] */
+	    struct item_head * pasted;
+
+	    pasted = B_N_PITEM_HEAD (tbS0, item_pos);
+	    /* when directory, may be new entry already pasted */
+	    if (is_direntry_le_ih (pasted)) {
+		if ( pos_in_item >= 0 && pos_in_item <= le16_to_cpu (pasted->u.ih_entry_count) ) {
+
+#ifdef CONFIG_REISERFS_CHECK
+		    if ( ! tb->insert_size[0] )
+			reiserfs_panic (tb->tb_sb, "PAP-12260: balance_leaf: insert_size is 0 already");
+#endif /* CONFIG_REISERFS_CHECK */
+
+		    /* prepare space */
+		    bi.tb = tb;
+		    bi.bi_bh = tbS0;
+		    bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
+		    bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
+		    leaf_paste_in_buffer(&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
+
+
+#ifdef CONFIG_REISERFS_CHECK
+#if 0
+		    if ( ! item_pos && ! pos_in_item  && (! tb->L[0] || COMP_KEYS(B_PRIGHT_DELIM_KEY(tb->L[0]), 
+										  B_N_PKEY(tbS0, 0))) )
+			reiserfs_panic(tb->tb_sb, "PAP-12265: balance_leaf: invalid right delimiting key");
+#endif
+#endif
+
+		    /* paste entry */
+		    leaf_paste_entries (
+			bi.bi_bh, item_pos, pos_in_item, 1, (struct reiserfs_de_head *)body,
+			body + DEH_SIZE, tb->insert_size[0]
+			);
+		    if ( ! item_pos && ! pos_in_item ) {
+
+#ifdef CONFIG_REISERFS_CHECK
+			if (!tb->CFL[0] || !tb->L[0])
+			    reiserfs_panic (tb->tb_sb, "PAP-12270: balance_leaf: CFL[0]/L[0] must be specified");
+#endif /* CONFIG_REISERFS_CHECK */
+
+			if (tb->CFL[0]) {
+			    replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
+
+#if 0
+			    /* update right delimiting key */
+			    copy_key (B_PRIGHT_DELIM_KEY (tb->L[0]), B_N_PKEY(tbS0, 0));   
+			    /* probably not needed as something has been shifted to tb->L[0] already */
+			    reiserfs_mark_buffer_dirty (tb->L[0], 0);
+#endif
+			}
+		    }
+		    tb->insert_size[0] = 0;
+		}
+	    } else { /* regular object */
+		if ( pos_in_item == pasted->ih_item_len ) {
+
+#ifdef CONFIG_REISERFS_CHECK
+		    if ( tb->insert_size[0] <= 0 )
+			reiserfs_panic (tb->tb_sb,
+					"PAP-12275: balance_leaf: insert size must not be %d", tb->insert_size[0]);
+#endif /* CONFIG_REISERFS_CHECK */
+		    bi.tb = tb;
+		    bi.bi_bh = tbS0;
+		    bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
+		    bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
+		    leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
+
+		    if (is_indirect_le_ih (pasted)) {
+
+#ifdef CONFIG_REISERFS_CHECK
+			if ( tb->insert_size[0] != UNFM_P_SIZE )
+			    reiserfs_panic (tb->tb_sb,
+					    "PAP-12280: balance_leaf: insert_size for indirect item must be %d, not %d",
+					    UNFM_P_SIZE, tb->insert_size[0]);
+#endif /* CONFIG_REISERFS_CHECK */
+
+			set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace);
+		    }
+		    tb->insert_size[0] = 0;
+		}
+
+#ifdef CONFIG_REISERFS_CHECK
+		else {
+		    if ( tb->insert_size[0] ) {
+			print_cur_tb ("12285");
+			reiserfs_panic (tb->tb_sb, "PAP-12285: balance_leaf: insert_size must be 0 (%d)", tb->insert_size[0]);
+		    }
+		}
+#endif /* CONFIG_REISERFS_CHECK */
+	    
+	    }
+	} /* case M_PASTE: */
+	}
+    }
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( flag == M_PASTE && tb->insert_size[0] ) {
+	print_cur_tb ("12290");
+	reiserfs_panic (tb->tb_sb, "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", tb->insert_size[0]);
+    }
+#endif /* CONFIG_REISERFS_CHECK */
+
+    return 0;
+} /* Leaf level of the tree is balanced (end of balance_leaf) */
+
+
+
+/* Make empty node */
+void make_empty_node (struct buffer_info * bi)
+{
+    struct block_head * blkh;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (bi->bi_bh == NULL)
+	reiserfs_panic (0, "PAP-12295: make_empty_node: pointer to the buffer is NULL");
+#endif
+
+    (blkh = B_BLK_HEAD(bi->bi_bh))->blk_nr_item = cpu_to_le16 (0);
+    blkh->blk_free_space = cpu_to_le16 (MAX_CHILD_SIZE(bi->bi_bh));
+
+    if (bi->bi_parent)
+	B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size = 0; 
+}
+
+
+/* Get first empty buffer */
+struct buffer_head * get_FEB (struct tree_balance * tb)
+{
+    int i;
+    struct buffer_head * first_b;
+    struct buffer_info bi;
+
+    for (i = 0; i < MAX_FEB_SIZE; i ++)
+	if (tb->FEB[i] != 0)
+	    break;
+
+    if (i == MAX_FEB_SIZE)
+	reiserfs_panic(tb->tb_sb, "vs-12300: get_FEB: FEB list is empty");
+
+    bi.tb = tb;
+    bi.bi_bh = first_b = tb->FEB[i];
+    bi.bi_parent = 0;
+    bi.bi_position = 0;
+    make_empty_node (&bi);
+    set_bit(BH_Uptodate, &first_b->b_state);
+    tb->FEB[i] = 0;
+    tb->used[i] = first_b;
+
+#ifdef REISERFS_FSCK
+    mark_block_formatted (first_b->b_blocknr);
+#endif
+
+    return(first_b);
+}
+
+
+/* This is now used because reiserfs_free_block has to be able to
+** schedule.
+*/
+static void store_thrown (struct tree_balance * tb, struct buffer_head * bh)
+{
+    int i;
+
+    if (buffer_dirty (bh))
+      printk ("store_thrown deals with dirty buffer\n");
+    for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i ++)
+	if (!tb->thrown[i]) {
+	    tb->thrown[i] = bh;
+	    atomic_inc(&bh->b_count) ; /* decremented in free_thrown */
+	    return;
+	}
+    reiserfs_warning ("store_thrown: too many thrown buffers\n");
+}
+
+static void free_thrown(struct tree_balance *tb) {
+    int i ;
+    unsigned long blocknr ;
+    for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i++) {
+	if (tb->thrown[i]) {
+	    blocknr = tb->thrown[i]->b_blocknr ;
+	    if (buffer_dirty (tb->thrown[i]))
+	      printk ("free_thrown deals with dirty buffer %ld\n", blocknr);
+	    brelse(tb->thrown[i]) ; /* incremented in store_thrown */
+	    reiserfs_free_block (tb->transaction_handle, blocknr);
+	}
+    }
+}
+
+void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh)
+{
+    B_BLK_HEAD (bh)->blk_level = cpu_to_le16 (FREE_LEVEL)/*0*/;
+    B_BLK_HEAD (bh)->blk_nr_item = cpu_to_le16 (0);
+    mark_buffer_clean (bh);
+    /* reiserfs_free_block is no longer schedule safe 
+    reiserfs_free_block (tb->transaction_handle, tb->tb_sb, bh->b_blocknr);
+    */
+
+    store_thrown (tb, bh);
+#if 0
+#ifdef REISERFS_FSCK
+    {
+	struct buffer_head * to_be_forgotten;
+	
+	to_be_forgotten = find_buffer (bh->b_dev, bh->b_blocknr, bh->b_size);
+	if (to_be_forgotten) {
+	    to_be_forgotten->b_count ++;
+	    bforget (to_be_forgotten);
+	}
+	unmark_block_formatted (bh->b_blocknr);
+    }
+#endif
+#endif
+}
+
+/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
+void replace_key (struct tree_balance * tb, struct buffer_head * dest, int n_dest,
+		  struct buffer_head * src, int n_src)
+{
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (dest == NULL || src == NULL)
+	reiserfs_panic (0, "vs-12305: replace_key: sourse or destination buffer is 0 (src=%p, dest=%p)", src, dest);
+
+    if ( ! B_IS_KEYS_LEVEL (dest) )
+	reiserfs_panic (0, "vs-12310: replace_key: invalid level (%z) for destination buffer. dest must be leaf",
+			dest);
+
+    if (n_dest < 0 || n_src < 0)
+	reiserfs_panic (0, "vs-12315: replace_key: src(%d) or dest(%d) key number less than 0", n_src, n_dest);
+
+    if (n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src))
+	reiserfs_panic (0, "vs-12320: replace_key: src(%d(%d)) or dest(%d(%d)) key number is too big",
+			n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
+#endif	/* CONFIG_REISERFS_CHECK */
+   
+    if (B_IS_ITEMS_LEVEL (src))
+	/* source buffer contains leaf node */
+	memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PITEM_HEAD(src,n_src), KEY_SIZE);
+    else
+	memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PDELIM_KEY(src,n_src), KEY_SIZE);
+
+    do_balance_mark_internal_dirty (tb, dest, 0);
+}
+
+
+int get_left_neighbor_position (
+				struct tree_balance * tb, 
+				int h
+				)
+{
+  int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
+
+#ifdef CONFIG_REISERFS_CHECK
+  if (PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FL[h] == 0)
+    reiserfs_panic (tb->tb_sb, "vs-12325: get_left_neighbor_position: FL[%d](%p) or F[%d](%p) does not exist", 
+		    h, tb->FL[h], h, PATH_H_PPARENT (tb->tb_path, h));
+#endif
+
+  if (Sh_position == 0)
+    return B_NR_ITEMS (tb->FL[h]);
+  else
+    return Sh_position - 1;
+}
+
+
+int get_right_neighbor_position (struct tree_balance * tb, int h)
+{
+  int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
+
+#ifdef CONFIG_REISERFS_CHECK
+  if (PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FR[h] == 0)
+    reiserfs_panic (tb->tb_sb, "vs-12330: get_right_neighbor_position: F[%d](%p) or FR[%d](%p) does not exist", 
+		    h, PATH_H_PPARENT (tb->tb_path, h), h, tb->FR[h]);
+#endif
+
+  if (Sh_position == B_NR_ITEMS (PATH_H_PPARENT (tb->tb_path, h)))
+    return 0;
+  else
+    return Sh_position + 1;
+}
+
+
+#ifdef CONFIG_REISERFS_CHECK
+
+int is_reusable (struct super_block * s, unsigned long block, int bit_value);
+static void check_internal_node (struct super_block * s, struct buffer_head * bh, char * mes)
+{
+  struct disk_child * dc;
+  int i;
+
+  if (!bh)
+    reiserfs_panic (s, "PAP-12336: check_internal_node: bh == 0");
+
+  if (!bh || !B_IS_IN_TREE (bh))
+    return;
+ 
+  if (!buffer_dirty (bh) && 
+      !(buffer_journaled(bh) || buffer_journal_dirty(bh))) {
+    reiserfs_panic (s, "PAP-12337: check_internal_node: buffer (%b) must be dirty", bh);
+  }
+
+  dc = B_N_CHILD (bh, 0);
+
+  for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) {
+    if (!is_reusable (s, dc->dc_block_number, 1) ) {
+      print_cur_tb (mes);
+      reiserfs_panic (s, "PAP-12338: check_internal_node: invalid child pointer %y in %b", dc, bh);
+    }
+  }
+}
+
+
+static int locked_or_not_in_tree (struct buffer_head * bh, char * which)
+{
+  if ( buffer_locked (bh) || !B_IS_IN_TREE (bh) ) {
+    reiserfs_warning ("vs-12339: locked_or_not_in_tree: %s (%b)\n", which, bh);
+    return 1;
+  } 
+  return 0;
+}
+
+
+static int check_before_balancing (struct tree_balance * tb)
+{
+  int retval = 0;	
+
+  if ( cur_tb ) {
+    reiserfs_panic (tb->tb_sb, "vs-12335: check_before_balancing: "
+		    "suspect that schedule occurred based on cur_tb not being null at this point in code. "
+		    "do_balance cannot properly handle schedule occuring while it runs.");
+  }
+  
+  /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
+     prepped all of these for us). */
+  if ( tb->lnum[0] ) {
+    retval |= locked_or_not_in_tree (tb->L[0], "L[0]");
+    retval |= locked_or_not_in_tree (tb->FL[0], "FL[0]");
+    retval |= locked_or_not_in_tree (tb->CFL[0], "CFL[0]");
+    check_leaf (tb->L[0]);
+  }
+  if ( tb->rnum[0] ) {
+    retval |= locked_or_not_in_tree (tb->R[0], "R[0]");
+    retval |= locked_or_not_in_tree (tb->FR[0], "FR[0]");
+    retval |= locked_or_not_in_tree (tb->CFR[0], "CFR[0]");
+    check_leaf (tb->R[0]);
+  }
+  retval |= locked_or_not_in_tree (PATH_PLAST_BUFFER (tb->tb_path), "S[0]");
+  check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
+
+  return retval;
+}
+
+
+void check_after_balance_leaf (struct tree_balance * tb)
+{
+    if (tb->lnum[0]) {
+	if (B_FREE_SPACE (tb->L[0]) != 
+	    MAX_CHILD_SIZE (tb->L[0]) - B_N_CHILD (tb->FL[0], get_left_neighbor_position (tb, 0))->dc_size) {
+	    print_cur_tb ("12221");
+	    reiserfs_panic (tb->tb_sb, "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
+	}
+    }
+    if (tb->rnum[0]) {
+	if (B_FREE_SPACE (tb->R[0]) != 
+	    MAX_CHILD_SIZE (tb->R[0]) - B_N_CHILD (tb->FR[0], get_right_neighbor_position (tb, 0))->dc_size) {
+	    print_cur_tb ("12222");
+	    reiserfs_panic (tb->tb_sb, "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
+	}
+    }
+    if (PATH_H_PBUFFER(tb->tb_path,1) && (B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) != 
+					  (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
+					   B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
+						      PATH_H_POSITION (tb->tb_path, 1))->dc_size))) {
+	print_cur_tb ("12223");
+	reiserfs_panic (tb->tb_sb, "PAP-12365: check_after_balance_leaf: S is incorrect");
+    }
+}
+
+
+void check_leaf_level (struct tree_balance * tb)
+{
+  check_leaf (tb->L[0]);
+  check_leaf (tb->R[0]);
+  check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
+}
+
+void check_internal_levels (struct tree_balance * tb)
+{
+  int h;
+
+  /* check all internal nodes */
+  for (h = 1; tb->insert_size[h]; h ++) {
+    check_internal_node (tb->tb_sb, PATH_H_PBUFFER (tb->tb_path, h), "BAD BUFFER ON PATH");
+    if (tb->lnum[h])
+      check_internal_node (tb->tb_sb, tb->L[h], "BAD L");
+    if (tb->rnum[h])
+      check_internal_node (tb->tb_sb, tb->R[h], "BAD R");
+  }
+
+}
+
+#endif
+
+
+
+
+
+
+/* Now we have all of the buffers that must be used in balancing of
+   the tree.  We rely on the assumption that schedule() will not occur
+   while do_balance works. ( Only interrupt handlers are acceptable.)
+   We balance the tree according to the analysis made before this,
+   using buffers already obtained.  For SMP support it will someday be
+   necessary to add ordered locking of tb. */
+
+/* Some interesting rules of balancing:
+
+   we delete a maximum of two nodes per level per balancing: we never
+   delete R, when we delete two of three nodes L, S, R then we move
+   them into R.
+
+   we only delete L if we are deleting two nodes, if we delete only
+   one node we delete S
+
+   if we shift leaves then we shift as much as we can: this is a
+   deliberate policy of extremism in node packing which results in
+   higher average utilization after repeated random balance operations
+   at the cost of more memory copies and more balancing as a result of
+   small insertions to full nodes.
+
+   if we shift internal nodes we try to evenly balance the node
+   utilization, with consequent less balancing at the cost of lower
+   utilization.
+
+   one could argue that the policy for directories in leaves should be
+   that of internal nodes, but we will wait until another day to
+   evaluate this....  It would be nice to someday measure and prove
+   these assumptions as to what is optimal....
+
+*/
+
+static inline void do_balance_starts (struct tree_balance *tb)
+{
+    /* use print_cur_tb() to see initial state of struct
+       tree_balance */
+
+    /* store_print_tb (tb); */
+
+#ifdef CONFIG_REISERFS_CHECK
+
+    /* do not delete, just comment it out */
+/*    print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 
+	     "check");*/
+
+    if (check_before_balancing (tb))
+	reiserfs_panic (tb->tb_sb, "PAP-12340: do_balance: locked buffers in TB");
+
+#ifndef __KERNEL__
+    if ( atomic_read(&(PATH_PLAST_BUFFER(tb->tb_path)->b_count)) > 1 || (tb->L[0] && atomic_read(&(tb->L[0]->b_count)) > 1) ||
+	 (tb->R[0] && atomic_read(&(tb->R[0]->b_count)) > 1) ) {
+	print_cur_tb ("first three parameters are invalid");
+	reiserfs_panic (tb->tb_sb, "PAP-12345: do_balance: counter too big");
+    }
+#endif /* !__KERNEL__ */
+    cur_tb = tb;
+    
+#endif /* CONFIG_REISERFS_CHECK */
+}
+
+
+static inline void do_balance_completed (struct tree_balance * tb)
+{
+    
+#ifdef CONFIG_REISERFS_CHECK
+    check_leaf_level (tb);
+    check_internal_levels (tb);
+    cur_tb = NULL;
+#endif
+
+    /* reiserfs_free_block is no longer schedule safe.  So, we need to
+    ** put the buffers we want freed on the thrown list during do_balance,
+    ** and then free them now
+    */
+
+    tb->tb_sb->u.reiserfs_sb.s_do_balance ++;
+
+
+    /* release all nodes hold to perform the balancing */
+    unfix_nodes(tb);
+
+    free_thrown(tb) ;
+}
+
+
+
+
+
+void do_balance (struct tree_balance * tb, /* tree_balance structure */
+		 struct item_head * ih,	   /* item header of inserted item */
+		 const char * body,  /* body  of inserted item or bytes to paste */
+		 int flag)  /* i - insert, d - delete
+			       c - cut, p - paste
+						      
+			       Cut means delete part of an item
+			       (includes removing an entry from a
+			       directory).
+						      
+			       Delete means delete whole item.
+						      
+			       Insert means add a new item into the
+			       tree.
+						      						      
+			       Paste means to append to the end of an
+			       existing file or to insert a directory
+			       entry.  */
+{
+    int child_pos,					/* position of a child node in its parent */
+	h;								/* level of the tree being processed */
+    struct item_head insert_key[2]; /* in our processing of one level
+				       we sometimes determine what
+				       must be inserted into the next
+				       higher level.  This insertion
+				       consists of a key or two keys
+				       and their corresponding
+				       pointers */
+    struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
+					  level */
+
+    tb->tb_mode = flag;
+    tb->need_balance_dirty = 0;
+
+    if (FILESYSTEM_CHANGED_TB(tb)) {
+        reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ;
+    }
+    /* if we have no real work to do  */
+    if ( ! tb->insert_size[0] ) {
+	reiserfs_warning ("PAP-12350: do_balance: insert_size == 0, mode == %c",
+			  flag);
+	unfix_nodes(tb);
+	return;
+    }
+
+    atomic_inc (&(fs_generation (tb->tb_sb)));
+    do_balance_starts (tb);
+    
+#ifdef REISERFS_FSCK
+    if (flag == M_INTERNAL) {
+	insert_ptr[0] = (struct buffer_head *)body;
+	/* we must prepare insert_key */
+
+	if (PATH_H_B_ITEM_ORDER (tb->tb_path, 0)/*LAST_POSITION (tb->tb_path)*//*item_pos*/ == -1) {
+		/* get delimiting key from buffer in tree */
+		copy_key (&insert_key[0].ih_key, B_N_PKEY (PATH_PLAST_BUFFER (tb->tb_path), 0));
+		/*insert_ptr[0]->b_item_order = 0;*/
+	} else {
+	    /* get delimiting key from new buffer */
+	    copy_key (&insert_key[0].ih_key, B_N_PKEY((struct buffer_head *)body,0));
+	    /*insert_ptr[0]->b_item_order = item_pos;*/
+	}
+      
+	/* and insert_ptr instead of balance_leaf */
+	child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0)/*item_pos*/;
+    } else
+#endif
+
+	/* balance leaf returns 0 except if combining L R and S into
+	   one node.  see balance_internal() for explanation of this
+	   line of code.*/
+	child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0) +
+	  balance_leaf (tb, ih, body, flag, insert_key, insert_ptr);
+
+#ifdef CONFIG_REISERFS_CHECK
+    check_after_balance_leaf (tb);
+#endif
+
+    /* Balance internal level of the tree. */
+    for ( h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++ )
+	child_pos = balance_internal (tb, h, child_pos, insert_key, insert_ptr);
+
+
+    do_balance_completed (tb);
+
+}
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/file.c linux/fs/reiserfs/file.c
--- v2.4.0/linux/fs/reiserfs/file.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/file.c	Mon Jan 15 13:23:01 2001
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+
+#ifdef __KERNEL__
+
+#include <linux/sched.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/smp_lock.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+/*
+** We pack the tails of files on file close, not at the time they are written.
+** This implies an unnecessary copy of the tail and an unnecessary indirect item
+** insertion/balancing, for files that are written in one write.
+** It avoids unnecessary tail packings (balances) for files that are written in
+** multiple writes and are small enough to have tails.
+** 
+** file_release is called by the VFS layer when the file is closed.  If
+** this is the last open file descriptor, and the file
+** small enough to have a tail, and the tail is currently in an
+** unformatted node, the tail is converted back into a direct item.
+** 
+** We use reiserfs_truncate_file to pack the tail, since it already has
+** all the conditions coded.  
+*/
+static int reiserfs_file_release (struct inode * inode, struct file * filp)
+{
+
+    struct reiserfs_transaction_handle th ;
+    int windex ;
+
+    if (!S_ISREG (inode->i_mode))
+	BUG ();
+
+    /* fast out for when nothing needs to be done */
+    if ((atomic_read(&inode->i_count) > 1 ||
+         !inode->u.reiserfs_i.i_pack_on_close || 
+         !tail_has_to_be_packed(inode))       && 
+	inode->u.reiserfs_i.i_prealloc_count <= 0) {
+	return 0;
+    }    
+    
+    lock_kernel() ;
+    down (&inode->i_sem); 
+    journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3) ;
+
+#ifdef REISERFS_PREALLOCATE
+    reiserfs_discard_prealloc (&th, inode);
+#endif
+    journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3) ;
+
+    if (atomic_read(&inode->i_count) <= 1 &&
+	inode->u.reiserfs_i.i_pack_on_close &&
+        tail_has_to_be_packed (inode)) {
+	/* if regular file is released by last holder and it has been
+	   appended (we append by unformatted node only) or its direct
+	   item(s) had to be converted, then it may have to be
+	   indirect2direct converted */
+	windex = push_journal_writer("file_release") ;
+	reiserfs_truncate_file(inode, 0) ;
+	pop_journal_writer(windex) ;
+    }
+    up (&inode->i_sem); 
+    unlock_kernel() ;
+    return 0;
+}
+
+static void reiserfs_vfs_truncate_file(struct inode *inode) {
+    reiserfs_truncate_file(inode, 1) ;
+}
+
+/* Sync a reiserfs file. */
+static int reiserfs_sync_file(
+			      struct file   * p_s_filp,
+			      struct dentry * p_s_dentry,
+			      int datasync
+			      ) {
+  struct inode * p_s_inode = p_s_dentry->d_inode;
+  struct reiserfs_transaction_handle th ;
+  int n_err = 0;
+  int windex ;
+  int jbegin_count = 1 ;
+
+  lock_kernel() ;
+
+  if (!S_ISREG(p_s_inode->i_mode))
+      BUG ();
+
+  n_err = fsync_inode_buffers(p_s_inode) ;
+  /* commit the current transaction to flush any metadata
+  ** changes.  sys_fsync takes care of flushing the dirty pages for us
+  */
+  journal_begin(&th, p_s_inode->i_sb, jbegin_count) ;
+  windex = push_journal_writer("sync_file") ;
+  reiserfs_update_sd(&th, p_s_inode);
+  pop_journal_writer(windex) ;
+  journal_end_sync(&th, p_s_inode->i_sb,jbegin_count) ;
+  unlock_kernel() ;
+  return ( n_err < 0 ) ? -EIO : 0;
+}
+
+
+struct file_operations reiserfs_file_operations = {
+    read:	generic_file_read,
+    write:	generic_file_write,
+    ioctl:	reiserfs_ioctl,
+    mmap:	generic_file_mmap,
+    release:	reiserfs_file_release,
+    fsync:	reiserfs_sync_file,
+};
+
+
+struct  inode_operations reiserfs_file_inode_operations = {
+    truncate:	reiserfs_vfs_truncate_file,
+};
+
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/fix_node.c linux/fs/reiserfs/fix_node.c
--- v2.4.0/linux/fs/reiserfs/fix_node.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/fix_node.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,2908 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+/**
+ ** old_item_num
+ ** old_entry_num
+ ** set_entry_sizes
+ ** create_virtual_node
+ ** check_left
+ ** check_right
+ ** directory_part_size
+ ** get_num_ver
+ ** set_parameters
+ ** is_leaf_removable
+ ** are_leaves_removable
+ ** get_empty_nodes
+ ** get_lfree
+ ** get_rfree
+ ** is_left_neighbor_in_cache
+ ** decrement_key
+ ** get_far_parent
+ ** get_parents
+ ** can_node_be_removed
+ ** ip_check_balance
+ ** dc_check_balance_internal
+ ** dc_check_balance_leaf
+ ** dc_check_balance
+ ** check_balance
+ ** get_direct_parent
+ ** get_neighbors
+ ** fix_nodes
+ ** 
+ ** 
+ **/
+
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/reiserfs_fs.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+
+
+/* To make any changes in the tree we find a node, that contains item
+   to be changed/deleted or position in the node we insert a new item
+   to. We call this node S. To do balancing we need to decide what we
+   will shift to left/right neighbor, or to a new node, where new item
+   will be etc. To make this analysis simpler we build virtual
+   node. Virtual node is an array of items, that will replace items of
+   node S. (For instance if we are going to delete an item, virtual
+   node does not contain it). Virtual node keeps information about
+   item sizes and types, mergeability of first and last items, sizes
+   of all entries in directory item. We use this array of items when
+   calculating what we can shift to neighbors and how many nodes we
+   have to have if we do not any shiftings, if we shift to left/right
+   neighbor or to both. */
+
+
+/* taking item number in virtual node, returns number of item, that it has in source buffer */
+static inline int old_item_num (int new_num, int affected_item_num, int mode)
+{
+  if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
+    return new_num;
+
+  if (mode == M_INSERT) {
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (new_num == 0)
+      reiserfs_panic (0,"vs-8005: old_item_num: for INSERT mode and item number of inserted item");
+#endif
+
+    return new_num - 1;
+  }
+
+#ifdef CONFIG_REISERFS_CHECK
+  if (mode != M_DELETE)
+      reiserfs_panic (0, "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", mode);
+#endif
+
+  /* delete mode */
+  return new_num + 1;
+}
+
+static void create_virtual_node (struct tree_balance * tb, int h)
+{
+    struct item_head * ih;
+    struct virtual_node * vn = tb->tb_vn;
+    int new_num;
+    struct buffer_head * Sh;	/* this comes from tb->S[h] */
+
+    Sh = PATH_H_PBUFFER (tb->tb_path, h);
+
+    /* size of changed node */
+    vn->vn_size = MAX_CHILD_SIZE (Sh) - B_FREE_SPACE (Sh) + tb->insert_size[h];
+
+    /* for internal nodes array if virtual items is not created */
+    if (h) {
+	vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
+	return;
+    }
+
+    /* number of items in virtual node  */
+    vn->vn_nr_item = B_NR_ITEMS (Sh) + ((vn->vn_mode == M_INSERT)? 1 : 0) - ((vn->vn_mode == M_DELETE)? 1 : 0);
+
+    /* first virtual item */
+    vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
+    memset (vn->vn_vi, 0, vn->vn_nr_item * sizeof (struct virtual_item));
+    vn->vn_free_ptr += vn->vn_nr_item * sizeof (struct virtual_item);
+
+
+    /* first item in the node */
+    ih = B_N_PITEM_HEAD (Sh, 0);
+
+    /* define the mergeability for 0-th item (if it is not being deleted) */
+#ifdef REISERFS_FSCK
+    if (is_left_mergeable (tb->tb_sb, tb->tb_path) == 1 && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
+#else
+    if (op_is_left_mergeable (&(ih->ih_key), Sh->b_size) && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
+#endif
+	    vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
+
+    /* go through all items those remain in the virtual node (except for the new (inserted) one) */
+    for (new_num = 0; new_num < vn->vn_nr_item; new_num ++) {
+	int j;
+	struct virtual_item * vi = vn->vn_vi + new_num;
+	int is_affected = ((new_num != vn->vn_affected_item_num) ? 0 : 1);
+    
+
+	if (is_affected && vn->vn_mode == M_INSERT)
+	    continue;
+    
+	/* get item number in source node */
+	j = old_item_num (new_num, vn->vn_affected_item_num, vn->vn_mode);
+    
+	vi->vi_item_len += ih[j].ih_item_len + IH_SIZE;
+	vi->vi_ih = ih + j;
+	vi->vi_item = B_I_PITEM (Sh, ih + j);
+	vi->vi_uarea = vn->vn_free_ptr;
+
+	// FIXME: there is no check, that item operation did not
+	// consume too much memory
+	vn->vn_free_ptr += op_create_vi (vn, vi, is_affected, tb->insert_size [0]);
+	if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
+	    reiserfs_panic (tb->tb_sb, "vs-8030: create_virtual_node: "
+			    "virtual node space consumed");
+
+	if (!is_affected)
+	    /* this is not being changed */
+	    continue;
+    
+	if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
+	    vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
+	    vi->vi_new_data = vn->vn_data; // pointer to data which is going to be pasted
+	}
+    }
+
+  
+    /* virtual inserted item is not defined yet */
+    if (vn->vn_mode == M_INSERT) {
+	struct virtual_item * vi = vn->vn_vi + vn->vn_affected_item_num;
+      
+#ifdef CONFIG_REISERFS_CHECK
+	if (vn->vn_ins_ih == 0)
+	    reiserfs_panic (0, "vs-8040: create_virtual_node: item header of inserted item is not specified");
+#endif
+
+	vi->vi_item_len = tb->insert_size[0];
+	vi->vi_ih = vn->vn_ins_ih;
+	vi->vi_item = vn->vn_data;
+	vi->vi_uarea = vn->vn_free_ptr;
+	
+	op_create_vi (vn, vi, 0/*not pasted or cut*/, tb->insert_size [0]);
+#if 0
+	switch (type/*le_key_k_type (ih_version (vn->vn_ins_ih), &(vn->vn_ins_ih->ih_key))*/) {
+	case TYPE_STAT_DATA:
+	    vn->vn_vi[vn->vn_affected_item_num].vi_type |= VI_TYPE_STAT_DATA;
+	    break;
+	case TYPE_DIRECT:
+	    vn->vn_vi[vn->vn_affected_item_num].vi_type |= VI_TYPE_DIRECT;
+	    break;
+	case TYPE_INDIRECT:
+	    vn->vn_vi[vn->vn_affected_item_num].vi_type |= VI_TYPE_INDIRECT;
+	    break;
+	default:
+	    /* inseted item is directory (it must be item with "." and "..") */
+	    vn->vn_vi[vn->vn_affected_item_num].vi_type |= 
+	    	(VI_TYPE_DIRECTORY | VI_TYPE_FIRST_DIRECTORY_ITEM | VI_TYPE_INSERTED_DIRECTORY_ITEM);
+      
+	    /* this directory item can not be split, so do not set sizes of entries */
+	    break;
+	}
+#endif
+    }
+  
+    /* set right merge flag we take right delimiting key and check whether it is a mergeable item */
+    if (tb->CFR[0]) {
+	struct key * key;
+
+	key = B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]);
+#ifdef REISERFS_FSCK
+	if (is_right_mergeable (tb->tb_sb, tb->tb_path) == 1 && (vn->vn_mode != M_DELETE ||
+								 vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1))
+#else
+	if (op_is_left_mergeable (key, Sh->b_size) && (vn->vn_mode != M_DELETE ||
+						       vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1))
+#endif
+		vn->vn_vi[vn->vn_nr_item-1].vi_type |= VI_TYPE_RIGHT_MERGEABLE;
+
+#ifdef CONFIG_REISERFS_CHECK
+	if (op_is_left_mergeable (key, Sh->b_size) &&
+	    !(vn->vn_mode != M_DELETE || vn->vn_affected_item_num != B_NR_ITEMS (Sh) - 1) ) {
+	    /* we delete last item and it could be merged with right neighbor's first item */
+	    if (!(B_NR_ITEMS (Sh) == 1 && is_direntry_le_ih (B_N_PITEM_HEAD (Sh, 0)) &&
+		  I_ENTRY_COUNT (B_N_PITEM_HEAD (Sh, 0)) == 1)) {
+		/* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
+		print_block (Sh, 0, -1, -1);
+		reiserfs_panic (tb->tb_sb, "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c", 
+				key, vn->vn_affected_item_num, vn->vn_mode, M_DELETE);
+	    } else
+		/* we can delete directory item, that has only one directory entry in it */
+		;
+	}
+#endif
+    
+    }
+}
+
+
+/* using virtual node check, how many items can be shifted to left
+   neighbor */
+static void check_left (struct tree_balance * tb, int h, int cur_free)
+{
+    int i;
+    struct virtual_node * vn = tb->tb_vn;
+    struct virtual_item * vi;
+    int d_size, ih_size;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (cur_free < 0)
+	reiserfs_panic (0, "vs-8050: check_left: cur_free (%d) < 0", cur_free);
+#endif
+
+    /* internal level */
+    if (h > 0) {	
+	tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
+	return;
+    }
+
+    /* leaf level */
+
+    if (!cur_free || !vn->vn_nr_item) {
+	/* no free space or nothing to move */
+	tb->lnum[h] = 0;
+	tb->lbytes = -1;
+	return;
+    }
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (!PATH_H_PPARENT (tb->tb_path, 0))
+	reiserfs_panic (0, "vs-8055: check_left: parent does not exist or invalid");
+#endif
+
+    vi = vn->vn_vi;
+    if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
+	/* all contents of S[0] fits into L[0] */
+
+#ifdef CONFIG_REISERFS_CHECK
+	if (vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE)
+	    reiserfs_panic (0, "vs-8055: check_left: invalid mode or balance condition failed");
+#endif
+
+	tb->lnum[0] = vn->vn_nr_item;
+	tb->lbytes = -1;
+	return;
+    }
+  
+
+    d_size = 0, ih_size = IH_SIZE;
+
+    /* first item may be merge with last item in left neighbor */
+    if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
+	d_size = -((int)IH_SIZE), ih_size = 0;
+
+    tb->lnum[0] = 0;
+    for (i = 0; i < vn->vn_nr_item; i ++, ih_size = IH_SIZE, d_size = 0, vi ++) {
+	d_size += vi->vi_item_len;
+	if (cur_free >= d_size) {	
+	    /* the item can be shifted entirely */
+	    cur_free -= d_size;
+	    tb->lnum[0] ++;
+	    continue;
+	}
+      
+	/* the item cannot be shifted entirely, try to split it */
+	/* check whether L[0] can hold ih and at least one byte of the item body */
+	if (cur_free <= ih_size) {
+	    /* cannot shift even a part of the current item */
+	    tb->lbytes = -1;
+	    return;
+	}
+	cur_free -= ih_size;
+    
+	tb->lbytes = op_check_left (vi, cur_free, 0, 0);
+	if (tb->lbytes != -1)
+	    /* count partially shifted item */
+	    tb->lnum[0] ++;
+    
+	break;
+    }
+  
+    return;
+}
+
+
+/* using virtual node check, how many items can be shifted to right
+   neighbor */
+static void check_right (struct tree_balance * tb, int h, int cur_free)
+{
+    int i;
+    struct virtual_node * vn = tb->tb_vn;
+    struct virtual_item * vi;
+    int d_size, ih_size;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (cur_free < 0)
+	reiserfs_panic (tb->tb_sb, "vs-8070: check_right: cur_free < 0");
+#endif
+    
+    /* internal level */
+    if (h > 0) {
+	tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
+	return;
+    }
+    
+    /* leaf level */
+    
+    if (!cur_free || !vn->vn_nr_item) {
+	/* no free space  */
+	tb->rnum[h] = 0;
+	tb->rbytes = -1;
+	return;
+    }
+  
+#ifdef CONFIG_REISERFS_CHECK
+    if (!PATH_H_PPARENT (tb->tb_path, 0))
+	reiserfs_panic (tb->tb_sb, "vs-8075: check_right: parent does not exist or invalid");
+#endif
+  
+    vi = vn->vn_vi + vn->vn_nr_item - 1;
+    if ((unsigned int)cur_free >= (vn->vn_size - ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
+	/* all contents of S[0] fits into R[0] */
+	
+#ifdef CONFIG_REISERFS_CHECK
+	if (vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE)
+	    reiserfs_panic (tb->tb_sb, "vs-8080: check_right: invalid mode or balance condition failed");
+#endif
+
+	tb->rnum[h] = vn->vn_nr_item;
+	tb->rbytes = -1;
+	return;
+    }
+    
+    d_size = 0, ih_size = IH_SIZE;
+    
+    /* last item may be merge with first item in right neighbor */
+    if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
+	d_size = -(int)IH_SIZE, ih_size = 0;
+
+    tb->rnum[0] = 0;
+    for (i = vn->vn_nr_item - 1; i >= 0; i --, d_size = 0, ih_size = IH_SIZE, vi --) {
+	d_size += vi->vi_item_len;
+	if (cur_free >= d_size) {	
+	    /* the item can be shifted entirely */
+	    cur_free -= d_size;
+	    tb->rnum[0] ++;
+	    continue;
+	}
+	
+	/* check whether R[0] can hold ih and at least one byte of the item body */
+	if ( cur_free <= ih_size ) {    /* cannot shift even a part of the current item */
+	    tb->rbytes = -1;
+	    return;
+	}
+	
+	/* R[0] can hold the header of the item and at least one byte of its body */
+	cur_free -= ih_size;	/* cur_free is still > 0 */
+
+	tb->rbytes = op_check_right (vi, cur_free);
+	if (tb->rbytes != -1)
+	    /* count partially shifted item */
+	    tb->rnum[0] ++;
+    
+	break;
+    }
+	
+  return;
+}
+
+
+/*
+ * from - number of items, which are shifted to left neighbor entirely
+ * to - number of item, which are shifted to right neighbor entirely
+ * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
+ * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
+static int get_num_ver (int mode, struct tree_balance * tb, int h,
+			int from, int from_bytes,
+			int to,   int to_bytes,
+			short * snum012, int flow
+    )
+{
+    int i;
+    int cur_free;
+    //    int bytes;
+    int units;
+    struct virtual_node * vn = tb->tb_vn;
+    //    struct virtual_item * vi;
+
+    int total_node_size, max_node_size, current_item_size;
+    int needed_nodes;
+    int start_item, 	/* position of item we start filling node from */
+	end_item,	/* position of item we finish filling node by */
+	start_bytes,/* number of first bytes (entries for directory) of start_item-th item 
+		       we do not include into node that is being filled */
+	end_bytes;	/* number of last bytes (entries for directory) of end_item-th item 
+			   we do node include into node that is being filled */
+    int split_item_positions[2]; /* these are positions in virtual item of
+				    items, that are split between S[0] and
+				    S1new and S1new and S2new */
+
+    split_item_positions[0] = -1;
+    split_item_positions[1] = -1;
+
+#ifdef CONFIG_REISERFS_CHECK
+    /* We only create additional nodes if we are in insert or paste mode
+       or we are in replace mode at the internal level. If h is 0 and
+       the mode is M_REPLACE then in fix_nodes we change the mode to
+       paste or insert before we get here in the code.  */
+    if ( tb->insert_size[h] < 0  || (mode != M_INSERT && mode != M_PASTE))
+	reiserfs_panic (0, "vs-8100: get_num_ver: insert_size < 0 in overflow");
+#endif
+
+    max_node_size = MAX_CHILD_SIZE (PATH_H_PBUFFER (tb->tb_path, h));
+
+    /* snum012 [0-2] - number of items, that lay
+       to S[0], first new node and second new node */
+    snum012[3] = -1;	/* s1bytes */
+    snum012[4] = -1;	/* s2bytes */
+
+    /* internal level */
+    if (h > 0) {
+	i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
+	if (i == max_node_size)
+	    return 1;
+	return (i / max_node_size + 1);
+    }
+
+    /* leaf level */
+    needed_nodes = 1;
+    total_node_size = 0;
+    cur_free = max_node_size;
+
+    // start from 'from'-th item
+    start_item = from;
+    // skip its first 'start_bytes' units
+    start_bytes = ((from_bytes != -1) ? from_bytes : 0);
+
+    // last included item is the 'end_item'-th one
+    end_item = vn->vn_nr_item - to - 1;
+    // do not count last 'end_bytes' units of 'end_item'-th item
+    end_bytes = (to_bytes != -1) ? to_bytes : 0;
+
+    /* go through all item begining from the start_item-th item and ending by
+       the end_item-th item. Do not count first 'start_bytes' units of
+       'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
+    
+    for (i = start_item; i <= end_item; i ++) {
+	struct virtual_item * vi = vn->vn_vi + i;
+	int skip_from_end = ((i == end_item) ? end_bytes : 0);
+
+#ifdef CONFIG_REISERFS_CHECK
+	if (needed_nodes > 3) {
+	    reiserfs_panic (tb->tb_sb, "vs-8105: get_num_ver: too many nodes are needed");
+	}
+#endif
+
+	/* get size of current item */
+	current_item_size = vi->vi_item_len;
+
+	/* do not take in calculation head part (from_bytes) of from-th item */
+	current_item_size -= op_part_size (vi, 0/*from start*/, start_bytes);
+
+	/* do not take in calculation tail part of last item */
+	current_item_size -= op_part_size (vi, 1/*from end*/, skip_from_end);
+
+	/* if item fits into current node entierly */
+	if (total_node_size + current_item_size <= max_node_size) {
+	    snum012[needed_nodes - 1] ++;
+	    total_node_size += current_item_size;
+	    start_bytes = 0;
+	    continue;
+	}
+
+	if (current_item_size > max_node_size) {
+	    /* virtual item length is longer, than max size of item in
+               a node. It is impossible for direct item */
+#ifdef CONFIG_REISERFS_CHECK
+	    if (is_direct_le_ih (vi->vi_ih))
+		reiserfs_panic (tb->tb_sb, "vs-8110: get_num_ver: "
+				"direct item length is %d. It can not be longer than %d", 
+				current_item_size, max_node_size);
+#endif
+	    /* we will try to split it */
+	    flow = 1;
+	}
+
+	if (!flow) {
+	    /* as we do not split items, take new node and continue */
+	    needed_nodes ++; i --; total_node_size = 0;
+	    continue;
+	}
+
+	// calculate number of item units which fit into node being
+	// filled
+	{
+	    int free_space;
+
+	    free_space = max_node_size - total_node_size - IH_SIZE;
+	    units = op_check_left (vi, free_space, start_bytes, skip_from_end);
+	    if (units == -1) {
+		/* nothing fits into current node, take new node and continue */
+		needed_nodes ++, i--, total_node_size = 0;
+		continue;
+	    }
+	}
+
+	/* something fits into the current node */
+	//if (snum012[3] != -1 || needed_nodes != 1)
+	//  reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
+	//snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
+	start_bytes += units;
+	snum012[needed_nodes - 1 + 3] = units;
+
+	if (needed_nodes > 2)
+	    reiserfs_warning ("vs-8111: get_num_ver: split_item_position is out of boundary\n");
+	snum012[needed_nodes - 1] ++;
+	split_item_positions[needed_nodes - 1] = i;
+	needed_nodes ++;
+	/* continue from the same item with start_bytes != -1 */
+	start_item = i;
+	i --;
+	total_node_size = 0;
+    }
+
+    // sum012[4] (if it is not -1) contains number of units of which
+    // are to be in S1new, snum012[3] - to be in S0. They are supposed
+    // to be S1bytes and S2bytes correspondingly, so recalculate
+    if (snum012[4] > 0) {
+	int split_item_num;
+	int bytes_to_r, bytes_to_l;
+	int bytes_to_S1new;
+    
+	split_item_num = split_item_positions[1];
+	bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0);
+	bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0);
+	bytes_to_S1new = ((split_item_positions[0] == split_item_positions[1]) ? snum012[3] : 0);
+
+	// s2bytes
+	snum012[4] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[4] - bytes_to_r - bytes_to_l - bytes_to_S1new;
+
+	if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY)
+	    reiserfs_warning ("vs-8115: get_num_ver: not directory item\n");
+    }
+
+    /* now we know S2bytes, calculate S1bytes */
+    if (snum012[3] > 0) {
+	int split_item_num;
+	int bytes_to_r, bytes_to_l;
+	int bytes_to_S2new;
+    
+	split_item_num = split_item_positions[0];
+	bytes_to_l = ((from == split_item_num && from_bytes != -1) ? from_bytes : 0);
+	bytes_to_r = ((end_item == split_item_num && end_bytes != -1) ? end_bytes : 0);
+	bytes_to_S2new = ((split_item_positions[0] == split_item_positions[1] && snum012[4] != -1) ? snum012[4] : 0);
+
+	// s1bytes
+	snum012[3] = op_unit_num (&vn->vn_vi[split_item_num]) - snum012[3] - bytes_to_r - bytes_to_l - bytes_to_S2new;
+    }
+    
+    return needed_nodes;
+}
+
+
+#ifdef CONFIG_REISERFS_CHECK
+extern struct tree_balance * cur_tb;
+#endif
+
+
+/* Set parameters for balancing.
+ * Performs write of results of analysis of balancing into structure tb,
+ * where it will later be used by the functions that actually do the balancing. 
+ * Parameters:
+ *	tb	tree_balance structure;
+ *	h	current level of the node;
+ *	lnum	number of items from S[h] that must be shifted to L[h];
+ *	rnum	number of items from S[h] that must be shifted to R[h];
+ *	blk_num	number of blocks that S[h] will be splitted into;
+ *	s012	number of items that fall into splitted nodes.
+ *	lbytes	number of bytes which flow to the left neighbor from the item that is not
+ *		not shifted entirely
+ *	rbytes	number of bytes which flow to the right neighbor from the item that is not
+ *		not shifted entirely
+ *	s1bytes	number of bytes which flow to the first  new node when S[0] splits (this number is contained in s012 array)
+ */
+
+static void set_parameters (struct tree_balance * tb, int h, int lnum,
+			    int rnum, int blk_num, short * s012, int lb, int rb)
+{
+
+  tb->lnum[h] = lnum;
+  tb->rnum[h] = rnum;
+  tb->blknum[h] = blk_num;
+
+  if (h == 0)
+    {  /* only for leaf level */
+      if (s012 != NULL)
+	{
+	  tb->s0num = * s012 ++,
+	  tb->s1num = * s012 ++,
+	  tb->s2num = * s012 ++;
+	  tb->s1bytes = * s012 ++;
+	  tb->s2bytes = * s012;
+	}
+      tb->lbytes = lb;
+      tb->rbytes = rb;
+    }
+}
+
+
+
+/* check, does node disappear if we shift tb->lnum[0] items to left
+   neighbor and tb->rnum[0] to the right one. */
+static int is_leaf_removable (struct tree_balance * tb)
+{
+  struct virtual_node * vn = tb->tb_vn;
+  int to_left, to_right;
+  int size;
+  int remain_items;
+
+  /* number of items, that will be shifted to left (right) neighbor
+     entirely */
+  to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
+  to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
+  remain_items = vn->vn_nr_item;
+
+  /* how many items remain in S[0] after shiftings to neighbors */
+  remain_items -= (to_left + to_right);
+
+  if (remain_items < 1) {
+    /* all content of node can be shifted to neighbors */
+    set_parameters (tb, 0, to_left, vn->vn_nr_item - to_left, 0, NULL, -1, -1);    
+    return 1;
+  }
+  
+  if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
+    /* S[0] is not removable */
+    return 0;
+
+  /* check, whether we can divide 1 remaining item between neighbors */
+
+  /* get size of remaining item (in item units) */
+  size = op_unit_num (&(vn->vn_vi[to_left]));
+
+  if (tb->lbytes + tb->rbytes >= size) {
+    set_parameters (tb, 0, to_left + 1, to_right + 1, 0, NULL, tb->lbytes, -1);
+    return 1;
+  }
+
+  return 0;
+}
+
+
+/* check whether L, S, R can be joined in one node */
+static int are_leaves_removable (struct tree_balance * tb, int lfree, int rfree)
+{
+  struct virtual_node * vn = tb->tb_vn;
+  int ih_size;
+  struct buffer_head *S0;
+
+  S0 = PATH_H_PBUFFER (tb->tb_path, 0);
+
+  ih_size = 0;
+  if (vn->vn_nr_item) {
+    if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
+      ih_size += IH_SIZE;
+    
+	if (vn->vn_vi[vn->vn_nr_item-1].vi_type & VI_TYPE_RIGHT_MERGEABLE)
+	    ih_size += IH_SIZE;
+    } else {
+	/* there was only one item and it will be deleted */
+	struct item_head * ih;
+    
+#ifdef CONFIG_REISERFS_CHECK
+    if (B_NR_ITEMS (S0) != 1)
+      reiserfs_panic (0, "vs-8125: are_leaves_removable: item number must be 1: it is %d", B_NR_ITEMS(S0));
+#endif
+
+    ih = B_N_PITEM_HEAD (S0, 0);
+    if (tb->CFR[0] && !comp_short_le_keys (&(ih->ih_key), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0])))
+	if (is_direntry_le_ih (ih)) {
+#ifndef REISERFS_FSCK
+		
+	    /* Directory must be in correct state here: that is
+	       somewhere at the left side should exist first directory
+	       item. But the item being deleted can not be that first
+	       one because its right neighbor is item of the same
+	       directory. (But first item always gets deleted in last
+	       turn). So, neighbors of deleted item can be merged, so
+	       we can save ih_size */
+	    ih_size = IH_SIZE;
+	    
+#ifdef CONFIG_REISERFS_CHECK
+	    /* we might check that left neighbor exists and is of the
+	       same directory */
+	    if (le_key_k_offset (ih_version (ih), &(ih->ih_key)) == DOT_OFFSET)
+		reiserfs_panic (tb->tb_sb, "vs-8130: are_leaves_removable: "
+				"first directory item can not be removed until directory is not empty");
+#endif
+	
+	
+#else	/* REISERFS_FSCK */
+
+	    /* we can delete any directory item in fsck (if it is unreachable) */
+	    if (ih->ih_key.k_offset != DOT_OFFSET) {
+		/* must get left neighbor here to make sure, that left
+		   neighbor is of the same directory */
+		struct buffer_head * left;
+		
+		left = get_left_neighbor (tb->tb_sb, tb->tb_path);
+		if (left) {
+		    struct item_head * last;
+		    
+		    if (B_NR_ITEMS (left) == 0)
+			reiserfs_panic (tb->tb_sb, "vs-8135: are_leaves_removable: "
+					"empty node in the tree");
+		    last = B_N_PITEM_HEAD (left, B_NR_ITEMS (left) - 1);
+		    if (!comp_short_keys (&last->ih_key, &ih->ih_key))
+			ih_size = IH_SIZE;
+		    brelse (left);
+		}
+	    }
+#endif
+      }
+    
+  }
+
+  if (MAX_CHILD_SIZE (S0) + vn->vn_size <= rfree + lfree + ih_size) {
+    set_parameters (tb, 0, -1, -1, -1, NULL, -1, -1);
+    return 1;  
+  }
+  return 0;
+  
+}
+
+
+
+/* when we do not split item, lnum and rnum are numbers of entire items */
+#define SET_PAR_SHIFT_LEFT \
+if (h)\
+{\
+   int to_l;\
+   \
+   to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\
+	      (MAX_NR_KEY(Sh) + 1 - lpar);\
+	      \
+	      set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\
+}\
+else \
+{\
+   if (lset==LEFT_SHIFT_FLOW)\
+     set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\
+		     tb->lbytes, -1);\
+   else\
+     set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\
+		     -1, -1);\
+}
+
+
+#define SET_PAR_SHIFT_RIGHT \
+if (h)\
+{\
+   int to_r;\
+   \
+   to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\
+   \
+   set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\
+}\
+else \
+{\
+   if (rset==RIGHT_SHIFT_FLOW)\
+     set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\
+		  -1, tb->rbytes);\
+   else\
+     set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\
+		  -1, -1);\
+}
+
+
+void free_buffers_in_tb (
+		       struct tree_balance * p_s_tb
+		       ) {
+  int n_counter;
+
+  decrement_counters_in_path(p_s_tb->tb_path);
+  
+  for ( n_counter = 0; n_counter < MAX_HEIGHT; n_counter++ ) {
+    decrement_bcount(p_s_tb->L[n_counter]);
+    p_s_tb->L[n_counter] = NULL;
+    decrement_bcount(p_s_tb->R[n_counter]);
+    p_s_tb->R[n_counter] = NULL;
+    decrement_bcount(p_s_tb->FL[n_counter]);
+    p_s_tb->FL[n_counter] = NULL;
+    decrement_bcount(p_s_tb->FR[n_counter]);
+    p_s_tb->FR[n_counter] = NULL;
+    decrement_bcount(p_s_tb->CFL[n_counter]);
+    p_s_tb->CFL[n_counter] = NULL;
+    decrement_bcount(p_s_tb->CFR[n_counter]);
+    p_s_tb->CFR[n_counter] = NULL;
+  }
+}
+
+
+/* Get new buffers for storing new nodes that are created while balancing.
+ * Returns:	SCHEDULE_OCCURED - schedule occured while the function worked;
+ *	        CARRY_ON - schedule didn't occur while the function worked;
+ *	        NO_DISK_SPACE - no disk space.
+ */
+/* The function is NOT SCHEDULE-SAFE! */
+static int  get_empty_nodes(
+              struct tree_balance * p_s_tb,
+              int n_h
+            ) {
+  struct buffer_head  * p_s_new_bh,
+    		      *	p_s_Sh = PATH_H_PBUFFER (p_s_tb->tb_path, n_h);
+  unsigned long	      *	p_n_blocknr,
+    			a_n_blocknrs[MAX_AMOUNT_NEEDED] = {0, };
+  int       		n_counter,
+   			n_number_of_freeblk,
+                	n_amount_needed,/* number of needed empty blocks */
+			n_retval = CARRY_ON;
+  struct super_block *	p_s_sb = p_s_tb->tb_sb;
+
+
+#ifdef REISERFS_FSCK
+   if (n_h == 0 && p_s_tb->insert_size[n_h] == 0x7fff)
+     return CARRY_ON;
+#endif
+
+  /* number_of_freeblk is the number of empty blocks which have been
+     acquired for use by the balancing algorithm minus the number of
+     empty blocks used in the previous levels of the analysis,
+     number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
+     after empty blocks are acquired, and the balancing analysis is
+     then restarted, amount_needed is the number needed by this level
+     (n_h) of the balancing analysis.
+			    
+     Note that for systems with many processes writing, it would be
+     more layout optimal to calculate the total number needed by all
+     levels and then to run reiserfs_new_blocks to get all of them at once.  */
+
+  /* Initiate number_of_freeblk to the amount acquired prior to the restart of
+     the analysis or 0 if not restarted, then subtract the amount needed
+     by all of the levels of the tree below n_h. */
+  /* blknum includes S[n_h], so we subtract 1 in this calculation */
+  for ( n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum; n_counter < n_h; n_counter++ )
+    n_number_of_freeblk -= ( p_s_tb->blknum[n_counter] ) ? (p_s_tb->blknum[n_counter] - 1) : 0;
+
+  /* Allocate missing empty blocks. */
+  /* if p_s_Sh == 0  then we are getting a new root */
+  n_amount_needed = ( p_s_Sh ) ? (p_s_tb->blknum[n_h] - 1) : 1;
+  /*  Amount_needed = the amount that we need more than the amount that we have. */
+  if ( n_amount_needed > n_number_of_freeblk )
+    n_amount_needed -= n_number_of_freeblk;
+  else /* If we have enough already then there is nothing to do. */
+    return CARRY_ON;
+
+  if ( reiserfs_new_blocknrs (p_s_tb->transaction_handle, a_n_blocknrs,
+			      PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_blocknr, n_amount_needed) == NO_DISK_SPACE )
+    return NO_DISK_SPACE;
+
+  /* for each blocknumber we just got, get a buffer and stick it on FEB */
+  for ( p_n_blocknr = a_n_blocknrs, n_counter = 0; n_counter < n_amount_needed;
+	p_n_blocknr++, n_counter++ ) { 
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( ! *p_n_blocknr )
+      reiserfs_panic(p_s_sb, "PAP-8135: get_empty_nodes: reiserfs_new_blocknrs failed when got new blocks");
+#endif
+
+    p_s_new_bh = reiserfs_getblk(p_s_sb->s_dev, *p_n_blocknr, p_s_sb->s_blocksize);
+    if (atomic_read (&(p_s_new_bh->b_count)) > 1) {
+/*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&*/
+/*
+      reiserfs_warning ("waiting for buffer %b, iput inode pid = %d, this pid %d, mode %c, %h\n",
+			p_s_new_bh, put_inode_pid, current->pid, p_s_tb->tb_vn->vn_mode, p_s_tb->tb_vn->vn_ins_ih);
+      print_tb (0, 0, 0, p_s_tb, "tb");
+*/
+/*&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&*/
+      if (atomic_read(&(p_s_new_bh->b_count)) > 2 || 
+          !(buffer_journaled(p_s_new_bh) || buffer_journal_dirty(p_s_new_bh))) {
+	n_retval = REPEAT_SEARCH ;
+	free_buffers_in_tb (p_s_tb);
+	wait_buffer_until_released (p_s_new_bh);
+      }
+    }
+#ifdef CONFIG_REISERFS_CHECK
+    if (atomic_read (&(p_s_new_bh->b_count)) != 1 || buffer_dirty (p_s_new_bh)) {
+      if (atomic_read(&(p_s_new_bh->b_count)) > 2 || 
+          !(buffer_journaled(p_s_new_bh) || buffer_journal_dirty(p_s_new_bh))) {
+	reiserfs_panic(p_s_sb,"PAP-8140: get_empty_nodes: not free or dirty buffer %b for the new block",
+		     p_s_new_bh);
+      }
+    }
+#endif
+    
+    /* Put empty buffers into the array. */
+    if (p_s_tb->FEB[p_s_tb->cur_blknum])
+      BUG();
+
+    p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh;
+  }
+
+  if ( n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB (p_s_tb) )
+    n_retval = REPEAT_SEARCH ;
+
+  return n_retval;
+}
+
+
+/* Get free space of the left neighbor, which is stored in the parent
+ * node of the left neighbor.  */
+static int get_lfree (struct tree_balance * tb, int h)
+{
+    struct buffer_head * l, * f;
+    int order;
+
+    if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0)
+	return 0;
+
+    if (f == l)
+	order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) - 1;
+    else {
+	order = B_NR_ITEMS (l);
+	f = l;
+    }
+
+    return (MAX_CHILD_SIZE(f) - le16_to_cpu (B_N_CHILD(f,order)->dc_size));
+}
+
+
+/* Get free space of the right neighbor,
+ * which is stored in the parent node of the right neighbor.
+ */
+static int get_rfree (struct tree_balance * tb, int h)
+{
+  struct buffer_head * r, * f;
+  int order;
+
+  if ((f = PATH_H_PPARENT (tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0)
+    return 0;
+
+  if (f == r)
+      order = PATH_H_B_ITEM_ORDER (tb->tb_path, h) + 1;
+  else {
+      order = 0;
+      f = r;
+  }
+
+  return (MAX_CHILD_SIZE(f) - B_N_CHILD(f,order)->dc_size);
+
+}
+
+
+/* Check whether left neighbor is in memory. */
+static int  is_left_neighbor_in_cache(
+              struct tree_balance * p_s_tb,
+              int                   n_h
+            ) {
+  struct buffer_head  * p_s_father, * left;
+  struct super_block  * p_s_sb = p_s_tb->tb_sb;
+  unsigned long         n_left_neighbor_blocknr;
+  int                   n_left_neighbor_position;
+
+  if ( ! p_s_tb->FL[n_h] ) /* Father of the left neighbor does not exist. */
+    return 0;
+
+  /* Calculate father of the node to be balanced. */
+  p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1);
+
+#ifdef CONFIG_REISERFS_CHECK
+  if ( ! p_s_father || ! B_IS_IN_TREE (p_s_father) || ! B_IS_IN_TREE (p_s_tb->FL[n_h]) ||
+       ! buffer_uptodate (p_s_father) || ! buffer_uptodate (p_s_tb->FL[n_h]) ) {
+    reiserfs_panic (p_s_sb, "vs-8165: is_left_neighbor_in_cache: F[h] (%b) or FL[h] (%b) is invalid",
+		    p_s_father, p_s_tb->FL[n_h]);
+  }
+#endif
+
+
+  /* Get position of the pointer to the left neighbor into the left father. */
+  n_left_neighbor_position = ( p_s_father == p_s_tb->FL[n_h] ) ?
+                      p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]);
+  /* Get left neighbor block number. */
+  n_left_neighbor_blocknr = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position);
+  /* Look for the left neighbor in the cache. */
+  if ( (left = get_hash_table(p_s_sb->s_dev, n_left_neighbor_blocknr, p_s_sb->s_blocksize)) ) {
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( buffer_uptodate (left) && ! B_IS_IN_TREE(left) ) {
+      reiserfs_panic(p_s_sb, "vs-8170: is_left_neighbor_in_cache: left neighbor (%b %z) is not in the tree",
+		     left, left);
+    }
+#endif
+    atomic_dec (&(left->b_count));
+    return 1;
+  }
+
+  return 0;
+}
+
+
+#define LEFT_PARENTS  'l'
+#define RIGHT_PARENTS 'r'
+
+
+static void decrement_key (struct cpu_key * p_s_key)
+{
+    // call item specific function for this key
+    item_ops[cpu_key_k_type (p_s_key)]->decrement_key (p_s_key);
+    
+
+#if 0 /* this works wrong when key is key of second part of tail: it
+         sets key to be of indirect type. It looks like it makes no
+         harm but it is unclear */
+
+  unsigned long * p_n_key_field = (unsigned long *)p_s_key + REISERFS_FULL_KEY_LEN - 1;
+  int		  n_counter;
+
+  for( n_counter = 0; n_counter < REISERFS_FULL_KEY_LEN; n_counter++, p_n_key_field-- ) {
+      if ( *p_n_key_field ) {
+	  (*p_n_key_field)--;
+	  break;
+      }
+  }
+#ifdef CONFIG_REISERFS_CHECK
+  if ( n_counter == REISERFS_FULL_KEY_LEN )
+      reiserfs_panic(NULL, "PAP-8175: decrement_key: zero key");
+#endif
+
+#endif /*0*/
+
+}
+
+
+
+
+/* Calculate far left/right parent of the left/right neighbor of the current node, that
+ * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
+ * Calculate left/right common parent of the current node and L[h]/R[h].
+ * Calculate left/right delimiting key position.
+ * Returns:	PATH_INCORRECT   - path in the tree is not correct;
+ 		SCHEDULE_OCCURRED - schedule occured while the function worked;
+ *	        CARRY_ON         - schedule didn't occur while the function worked;
+ */
+static int  get_far_parent (struct tree_balance *   p_s_tb,
+			    int                     n_h,
+			    struct buffer_head  **  pp_s_father,
+			    struct buffer_head  **  pp_s_com_father,
+			    char                    c_lr_par) 
+{
+    struct buffer_head  * p_s_parent;
+    INITIALIZE_PATH (s_path_to_neighbor_father);
+    struct path * p_s_path = p_s_tb->tb_path;
+    struct cpu_key	s_lr_father_key;
+    int                   n_counter,
+	n_position = MAX_INT,
+	n_first_last_position = 0,
+	n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h);
+
+    /* Starting from F[n_h] go upwards in the tree, and look for the common
+      ancestor of F[n_h], and its neighbor l/r, that should be obtained. */
+
+    n_counter = n_path_offset;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( n_counter < FIRST_PATH_ELEMENT_OFFSET )
+	reiserfs_panic(p_s_tb->tb_sb, "PAP-8180: get_far_parent: invalid path length");
+#endif
+
+  
+    for ( ; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--  )  {
+	/* Check whether parent of the current buffer in the path is really parent in the tree. */
+	if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)) )
+	    return REPEAT_SEARCH;
+	/* Check whether position in the parent is correct. */
+	if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_counter - 1)) > B_NR_ITEMS(p_s_parent) )
+	    return REPEAT_SEARCH;
+	/* Check whether parent at the path really points to the child. */
+	if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
+	     PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr )
+	    return REPEAT_SEARCH;
+	/* Return delimiting key if position in the parent is not equal to first/last one. */
+	if ( c_lr_par == RIGHT_PARENTS )
+	    n_first_last_position = B_NR_ITEMS (p_s_parent);
+	if ( n_position != n_first_last_position ) {
+	    *pp_s_com_father = p_s_parent;
+	    atomic_inc (&((*pp_s_com_father)->b_count));
+	    /*(*pp_s_com_father = p_s_parent)->b_count++;*/
+	    break;
+	}
+    }
+
+    /* if we are in the root of the tree, then there is no common father */
+    if ( n_counter == FIRST_PATH_ELEMENT_OFFSET ) {
+	/* Check whether first buffer in the path is the root of the tree. */
+	if ( PATH_OFFSET_PBUFFER(p_s_tb->tb_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
+	     SB_ROOT_BLOCK (p_s_tb->tb_sb) ) {
+	    *pp_s_father = *pp_s_com_father = NULL;
+	    return CARRY_ON;
+	}
+	return REPEAT_SEARCH;
+    }
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( B_LEVEL (*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL ) {
+	reiserfs_panic(p_s_tb->tb_sb, "PAP-8185: get_far_parent: (%b %z) level too small", *pp_s_com_father, *pp_s_com_father);
+    }
+#endif
+
+    /* Check whether the common parent is locked. */
+
+    if ( buffer_locked (*pp_s_com_father) ) {
+	__wait_on_buffer(*pp_s_com_father);
+	if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
+	    decrement_bcount(*pp_s_com_father);
+	    return REPEAT_SEARCH;
+	}
+    }
+
+    /* So, we got common parent of the current node and its left/right neighbor.
+     Now we are geting the parent of the left/right neighbor. */
+
+    /* Form key to get parent of the left/right neighbor. */
+    le_key2cpu_key (&s_lr_father_key, B_N_PDELIM_KEY(*pp_s_com_father, ( c_lr_par == LEFT_PARENTS ) ?
+						     (p_s_tb->lkey[n_h - 1] = n_position - 1) : (p_s_tb->rkey[n_h - 1] = n_position)));
+
+
+    if ( c_lr_par == LEFT_PARENTS )
+	decrement_key(&s_lr_father_key);
+
+    if (search_by_key(p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, n_h + 1) == IO_ERROR)
+	// path is released
+	return IO_ERROR;
+
+    if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
+	decrement_counters_in_path(&s_path_to_neighbor_father);
+	decrement_bcount(*pp_s_com_father);
+	return REPEAT_SEARCH;
+    }
+
+    *pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( B_LEVEL (*pp_s_father) != n_h + 1 ) {
+	reiserfs_panic(p_s_tb->tb_sb, "PAP-8190: get_far_parent: (%b %z) level too small", *pp_s_father, *pp_s_father);
+    }
+  
+    if ( s_path_to_neighbor_father.path_length < FIRST_PATH_ELEMENT_OFFSET )
+	reiserfs_panic(0, "PAP-8192: get_far_parent: path length is too small");
+
+#endif
+
+    s_path_to_neighbor_father.path_length--;
+    decrement_counters_in_path(&s_path_to_neighbor_father);
+    return CARRY_ON;
+}
+
+
+/* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of
+ * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset],
+ * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset].
+ * Calculate numbers of left and right delimiting keys position: lkey[n_path_offset], rkey[n_path_offset].
+ * Returns:	SCHEDULE_OCCURRED - schedule occured while the function worked;
+ *	        CARRY_ON - schedule didn't occur while the function worked;
+ */
+static int  get_parents (struct tree_balance * p_s_tb, int n_h)
+{
+    struct path         * p_s_path = p_s_tb->tb_path;
+    int                   n_position,
+	n_ret_value,
+	n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
+    struct buffer_head  * p_s_curf,
+	* p_s_curcf;
+
+    /* Current node is the root of the tree or will be root of the tree */
+    if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) {
+	/* The root can not have parents.
+	   Release nodes which previously were obtained as parents of the current node neighbors. */
+	decrement_bcount(p_s_tb->FL[n_h]);
+	decrement_bcount(p_s_tb->CFL[n_h]);
+	decrement_bcount(p_s_tb->FR[n_h]);
+	decrement_bcount(p_s_tb->CFR[n_h]);
+	p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] = p_s_tb->CFR[n_h] = NULL;
+	return CARRY_ON;
+    }
+  
+    /* Get parent FL[n_path_offset] of L[n_path_offset]. */
+    if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) )  {
+	/* Current node is not the first child of its parent. */
+	/*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/
+	p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
+	atomic_inc (&(p_s_curf->b_count));
+	atomic_inc (&(p_s_curf->b_count));
+	p_s_tb->lkey[n_h] = n_position - 1;
+    }
+    else  {
+	/* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node.
+	   Calculate current common parent of L[n_path_offset] and the current node. Note that
+	   CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset].
+	   Calculate lkey[n_path_offset]. */
+	if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,
+					   &p_s_curcf, LEFT_PARENTS)) != CARRY_ON )
+	    return n_ret_value;
+    }
+
+    decrement_bcount(p_s_tb->FL[n_h]);
+    p_s_tb->FL[n_h] = p_s_curf; /* New initialization of FL[n_h]. */
+    decrement_bcount(p_s_tb->CFL[n_h]);
+    p_s_tb->CFL[n_h] = p_s_curcf; /* New initialization of CFL[n_h]. */
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ((p_s_curf && !B_IS_IN_TREE (p_s_curf)) || (p_s_curcf && !B_IS_IN_TREE (p_s_curcf))) {
+	reiserfs_panic (p_s_tb->tb_sb, "PAP-8195: get_parents: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf);
+    }
+#endif
+
+/* Get parent FR[n_h] of R[n_h]. */
+
+/* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */
+    if ( n_position == B_NR_ITEMS (PATH_H_PBUFFER(p_s_path, n_h + 1)) ) {
+/* Calculate current parent of R[n_h], which is the right neighbor of F[n_h].
+   Calculate current common parent of R[n_h] and current node. Note that CFR[n_h]
+   not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */
+	if ( (n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,  &p_s_curcf, RIGHT_PARENTS)) != CARRY_ON )
+	    return n_ret_value;
+    }
+    else {
+/* Current node is not the last child of its parent F[n_h]. */
+	/*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2;*/
+	p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
+	atomic_inc (&(p_s_curf->b_count));
+	atomic_inc (&(p_s_curf->b_count));
+	p_s_tb->rkey[n_h] = n_position;
+    }	
+
+    decrement_bcount(p_s_tb->FR[n_h]);
+    p_s_tb->FR[n_h] = p_s_curf; /* New initialization of FR[n_path_offset]. */
+    
+    decrement_bcount(p_s_tb->CFR[n_h]);
+    p_s_tb->CFR[n_h] = p_s_curcf; /* New initialization of CFR[n_path_offset]. */
+
+#ifdef CONFIG_REISERFS_CHECK
+#if 0
+    if (n_h == 0 && p_s_tb->CFR[n_h] && COMP_KEYS (B_PRIGHT_DELIM_KEY (PATH_H_PBUFFER(p_s_path, n_h)), 
+						   B_N_PDELIM_KEY (p_s_tb->CFR[n_h], p_s_tb->rkey[n_h]))) {
+	reiserfs_panic (p_s_tb->tb_sb, "PAP-8200: get_parents: rdkey in S0 %k, rdkey in CFR0 %k do not match",
+			B_PRIGHT_DELIM_KEY (PATH_H_PBUFFER(p_s_path, n_h)), B_N_PDELIM_KEY (p_s_tb->CFR[n_h], p_s_tb->rkey[n_h]));
+    }
+#endif
+    if ((p_s_curf && !B_IS_IN_TREE (p_s_curf)) || (p_s_curcf && !B_IS_IN_TREE (p_s_curcf))) {
+	reiserfs_panic (p_s_tb->tb_sb, "PAP-8205: get_parents: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf);
+    }
+#endif
+
+    return CARRY_ON;
+}
+
+
+/* it is possible to remove node as result of shiftings to
+   neighbors even when we insert or paste item. */
+static inline int can_node_be_removed (int mode, int lfree, int sfree, int rfree, struct tree_balance * tb, int h)
+{
+    struct buffer_head * Sh = PATH_H_PBUFFER (tb->tb_path, h);
+    int levbytes = tb->insert_size[h];
+    struct item_head * ih;
+    struct key * r_key = NULL;
+
+    ih = B_N_PITEM_HEAD (Sh, 0);
+    if ( tb->CFR[h] )
+	r_key = B_N_PDELIM_KEY(tb->CFR[h],tb->rkey[h]);
+  
+    if (
+	lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
+	/* shifting may merge items which might save space */
+#ifdef REISERFS_FSCK
+	- (( ! h && is_left_mergeable (tb->tb_sb, tb->tb_path) == 1 ) ? IH_SIZE : 0)
+	- (( ! h && r_ih && is_right_mergeable (tb->tb_sb, tb->tb_path) == 1 ) ? IH_SIZE : 0)
+#else
+	- (( ! h && op_is_left_mergeable (&(ih->ih_key), Sh->b_size) ) ? IH_SIZE : 0)
+	- (( ! h && r_key && op_is_left_mergeable (r_key, Sh->b_size) ) ? IH_SIZE : 0)
+#endif
+	+ (( h ) ? KEY_SIZE : 0))
+    {
+	/* node can not be removed */
+	if (sfree >= levbytes ) { /* new item fits into node S[h] without any shifting */
+	    if ( ! h )
+		tb->s0num = B_NR_ITEMS(Sh) + ((mode == M_INSERT ) ? 1 : 0);
+	    set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
+	    return NO_BALANCING_NEEDED;
+	}
+    }
+    return !NO_BALANCING_NEEDED;
+}
+
+
+
+/* Check whether current node S[h] is balanced when increasing its size by
+ * Inserting or Pasting.
+ * Calculate parameters for balancing for current level h.
+ * Parameters:
+ *	tb	tree_balance structure;
+ *	h	current level of the node;
+ *	inum	item number in S[h];
+ *	mode	i - insert, p - paste;
+ * Returns:	1 - schedule occured; 
+ *	        0 - balancing for higher levels needed;
+ *	       -1 - no balancing for higher levels needed;
+ *	       -2 - no disk space.
+ */
+/* ip means Inserting or Pasting */
+static int ip_check_balance (struct tree_balance * tb, int h)
+{
+    struct virtual_node * vn = tb->tb_vn;
+    int levbytes,  /* Number of bytes that must be inserted into (value
+		      is negative if bytes are deleted) buffer which
+		      contains node being balanced.  The mnemonic is
+		      that the attempted change in node space used level
+		      is levbytes bytes. */
+	n_ret_value;
+
+    int lfree, sfree, rfree /* free space in L, S and R */;
+
+    /* nver is short for number of vertixes, and lnver is the number if
+       we shift to the left, rnver is the number if we shift to the
+       right, and lrnver is the number if we shift in both directions.
+       The goal is to minimize first the number of vertixes, and second,
+       the number of vertixes whose contents are changed by shifting,
+       and third the number of uncached vertixes whose contents are
+       changed by shifting and must be read from disk.  */
+    int nver, lnver, rnver, lrnver;
+
+    /* used at leaf level only, S0 = S[0] is the node being balanced,
+       sInum [ I = 0,1,2 ] is the number of items that will
+       remain in node SI after balancing.  S1 and S2 are new
+       nodes that might be created. */
+  
+    /* we perform 8 calls to get_num_ver().  For each call we calculate five parameters.
+       where 4th parameter is s1bytes and 5th - s2bytes
+    */
+    short snum012[40] = {0,};	/* s0num, s1num, s2num for 8 cases 
+				   0,1 - do not shift and do not shift but bottle
+				   2 - shift only whole item to left
+				   3 - shift to left and bottle as much as possible
+				   4,5 - shift to right	(whole items and as much as possible
+				   6,7 - shift to both directions (whole items and as much as possible)
+				*/
+
+    /* Sh is the node whose balance is currently being checked */
+    struct buffer_head * Sh;
+  
+#ifdef REISERFS_FSCK
+    /* special mode for insert pointer to the most low internal node */
+    if (h == 0 && vn->vn_mode == M_INTERNAL) {
+	/* blk_num == 2 is to get pointer inserted to the next level */
+	set_parameters (tb, h, 0, 0, 2, NULL, -1, -1);
+	return 0;
+    }
+#endif
+
+    Sh = PATH_H_PBUFFER (tb->tb_path, h);
+    levbytes = tb->insert_size[h];
+  
+    /* Calculate balance parameters for creating new root. */
+    if ( ! Sh )  {
+	if ( ! h )
+	    reiserfs_panic (tb->tb_sb, "vs-8210: ip_check_balance: S[0] can not be 0");
+	switch ( n_ret_value = get_empty_nodes (tb, h) )  {
+	case CARRY_ON:
+	    set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
+	    return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
+
+	case NO_DISK_SPACE:
+	case REPEAT_SEARCH:
+	    return n_ret_value;
+	default:   
+	    reiserfs_panic(tb->tb_sb, "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes");
+	}
+    }
+  
+    if ( (n_ret_value = get_parents (tb, h)) != CARRY_ON ) /* get parents of S[h] neighbors. */
+	return n_ret_value;
+  
+    sfree = B_FREE_SPACE (Sh);
+
+    /* get free space of neighbors */
+    rfree = get_rfree (tb, h);
+    lfree = get_lfree (tb, h);
+
+    if (can_node_be_removed (vn->vn_mode, lfree, sfree, rfree, tb, h) == NO_BALANCING_NEEDED)
+	/* and new item fits into node S[h] without any shifting */
+	return NO_BALANCING_NEEDED;
+     
+    create_virtual_node (tb, h);
+
+    /*	
+	determine maximal number of items we can shift to the left neighbor (in tb structure)
+	and the maximal number of bytes that can flow to the left neighbor
+	from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
+    */
+    check_left (tb, h, lfree);
+
+    /*
+      determine maximal number of items we can shift to the right neighbor (in tb structure)
+      and the maximal number of bytes that can flow to the right neighbor
+      from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
+    */
+    check_right (tb, h, rfree);
+
+
+    /* all contents of internal node S[h] can be moved into its
+       neighbors, S[h] will be removed after balancing */
+    if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
+	int to_r; 
+       
+	/* Since we are working on internal nodes, and our internal
+	   nodes have fixed size entries, then we can balance by the
+	   number of items rather than the space they consume.  In this
+	   routine we set the left node equal to the right node,
+	   allowing a difference of less than or equal to 1 child
+	   pointer. */
+	to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 - 
+	    (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
+	set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
+	return CARRY_ON;
+    }
+
+#ifdef CONFIG_REISERFS_CHECK
+    /* this checks balance condition, that any two neighboring nodes can not fit in one node */
+    if ( h && ( tb->lnum[h] >= vn->vn_nr_item + 1 || tb->rnum[h] >= vn->vn_nr_item + 1) )
+	reiserfs_panic (tb->tb_sb, "vs-8220: ip_check_balance: tree is not balanced on internal level");
+
+    if ( ! h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
+		 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1)) ))
+	reiserfs_panic(tb->tb_sb, "vs-8225: ip_check_balance: tree is not balanced on leaf level");
+#endif
+
+    /* all contents of S[0] can be moved into its neighbors
+       S[0] will be removed after balancing. */
+    if (!h && is_leaf_removable (tb))
+	return CARRY_ON;
+
+
+    /* why do we perform this check here rather than earlier??
+       Answer: we can win 1 node in some cases above. Moreover we
+       checked it above, when we checked, that S[0] is not removable
+       in principle */
+    if (sfree >= levbytes) { /* new item fits into node S[h] without any shifting */
+	if ( ! h )
+	    tb->s0num = vn->vn_nr_item;
+	set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
+	return NO_BALANCING_NEEDED;
+    }
+
+
+    {
+	int lpar, rpar, nset, lset, rset, lrset;
+	/* 
+	 * regular overflowing of the node
+	 */
+
+	/* get_num_ver works in 2 modes (FLOW & NO_FLOW) 
+	   lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
+	   nset, lset, rset, lrset - shows, whether flowing items give better packing 
+	*/
+#define FLOW 1
+#define NO_FLOW 0	/* do not any splitting */
+
+	/* we choose one the following */
+#define NOTHING_SHIFT_NO_FLOW	0
+#define NOTHING_SHIFT_FLOW	5
+#define LEFT_SHIFT_NO_FLOW	10
+#define LEFT_SHIFT_FLOW		15
+#define RIGHT_SHIFT_NO_FLOW	20
+#define RIGHT_SHIFT_FLOW	25
+#define LR_SHIFT_NO_FLOW	30
+#define LR_SHIFT_FLOW		35
+
+
+	lpar = tb->lnum[h];
+	rpar = tb->rnum[h];
+
+
+	/* calculate number of blocks S[h] must be split into when
+	   nothing is shifted to the neighbors,
+	   as well as number of items in each part of the split node (s012 numbers),
+	   and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
+	nset = NOTHING_SHIFT_NO_FLOW;
+	nver = get_num_ver (vn->vn_mode, tb, h,
+			    0, -1, h?vn->vn_nr_item:0, -1, 
+			    snum012, NO_FLOW);
+
+	if (!h)
+	{
+	    int nver1;
+
+	    /* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
+	    nver1 = get_num_ver (vn->vn_mode, tb, h, 
+				 0, -1, 0, -1, 
+				 snum012 + NOTHING_SHIFT_FLOW, FLOW);
+	    if (nver > nver1)
+		nset = NOTHING_SHIFT_FLOW, nver = nver1;
+	}
+       
+ 
+	/* calculate number of blocks S[h] must be split into when
+	   l_shift_num first items and l_shift_bytes of the right most
+	   liquid item to be shifted are shifted to the left neighbor,
+	   as well as number of items in each part of the splitted node (s012 numbers),
+	   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
+	*/
+	lset = LEFT_SHIFT_NO_FLOW;
+	lnver = get_num_ver (vn->vn_mode, tb, h, 
+			     lpar - (( h || tb->lbytes == -1 ) ? 0 : 1), -1, h ? vn->vn_nr_item:0, -1,
+			     snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
+	if (!h)
+	{
+	    int lnver1;
+
+	    lnver1 = get_num_ver (vn->vn_mode, tb, h, 
+				  lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, 0, -1,
+				  snum012 + LEFT_SHIFT_FLOW, FLOW);
+	    if (lnver > lnver1)
+		lset = LEFT_SHIFT_FLOW, lnver = lnver1;
+	}
+
+
+	/* calculate number of blocks S[h] must be split into when
+	   r_shift_num first items and r_shift_bytes of the left most
+	   liquid item to be shifted are shifted to the right neighbor,
+	   as well as number of items in each part of the splitted node (s012 numbers),
+	   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
+	*/
+	rset = RIGHT_SHIFT_NO_FLOW;
+	rnver = get_num_ver (vn->vn_mode, tb, h, 
+			     0, -1, h ? (vn->vn_nr_item-rpar) : (rpar - (( tb->rbytes != -1 ) ? 1 : 0)), -1, 
+			     snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
+	if (!h)
+	{
+	    int rnver1;
+
+	    rnver1 = get_num_ver (vn->vn_mode, tb, h, 
+				  0, -1, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes, 
+				  snum012 + RIGHT_SHIFT_FLOW, FLOW);
+
+	    if (rnver > rnver1)
+		rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
+	}
+
+
+	/* calculate number of blocks S[h] must be split into when
+	   items are shifted in both directions,
+	   as well as number of items in each part of the splitted node (s012 numbers),
+	   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
+	*/
+	lrset = LR_SHIFT_NO_FLOW;
+	lrnver = get_num_ver (vn->vn_mode, tb, h, 
+			      lpar - ((h || tb->lbytes == -1) ? 0 : 1), -1, h ? (vn->vn_nr_item-rpar):(rpar - ((tb->rbytes != -1) ? 1 : 0)), -1,
+			      snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
+	if (!h)
+	{
+	    int lrnver1;
+
+	    lrnver1 = get_num_ver (vn->vn_mode, tb, h, 
+				   lpar - ((tb->lbytes != -1) ? 1 : 0), tb->lbytes, (rpar - ((tb->rbytes != -1) ? 1 : 0)), tb->rbytes,
+				   snum012 + LR_SHIFT_FLOW, FLOW);
+	    if (lrnver > lrnver1)
+		lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
+	}
+
+
+
+	/* Our general shifting strategy is:
+	   1) to minimized number of new nodes;
+	   2) to minimized number of neighbors involved in shifting;
+	   3) to minimized number of disk reads; */
+
+	/* we can win TWO or ONE nodes by shifting in both directions */
+	if (lrnver < lnver && lrnver < rnver)
+	{
+#ifdef CONFIG_REISERFS_CHECK
+	    if (h && (tb->lnum[h] != 1 || tb->rnum[h] != 1 || lrnver != 1 || rnver != 2 || lnver != 2 || h != 1))
+		reiserfs_panic (0, "vs-8230: check_balance: bad h");
+#endif
+	    if (lrset == LR_SHIFT_FLOW)
+		set_parameters (tb, h, tb->lnum[h], tb->rnum[h], lrnver, snum012 + lrset,
+				tb->lbytes, tb->rbytes);
+	    else
+		set_parameters (tb, h, tb->lnum[h] - ((tb->lbytes == -1) ? 0 : 1), 
+				tb->rnum[h] - ((tb->rbytes == -1) ? 0 : 1), lrnver, snum012 + lrset, -1, -1);
+
+	    return CARRY_ON;
+	}
+
+	/* if shifting doesn't lead to better packing then don't shift */
+	if (nver == lrnver)
+	{
+	    set_parameters (tb, h, 0, 0, nver, snum012 + nset, -1, -1);
+	    return CARRY_ON;
+	}
+
+
+	/* now we know that for better packing shifting in only one
+	   direction either to the left or to the right is required */
+
+	/*  if shifting to the left is better than shifting to the right */
+	if (lnver < rnver)
+	{
+	    SET_PAR_SHIFT_LEFT;
+	    return CARRY_ON;
+	}
+
+	/* if shifting to the right is better than shifting to the left */
+	if (lnver > rnver)
+	{
+	    SET_PAR_SHIFT_RIGHT;
+	    return CARRY_ON;
+	}
+
+
+	/* now shifting in either direction gives the same number
+	   of nodes and we can make use of the cached neighbors */
+	if (is_left_neighbor_in_cache (tb,h))
+	{
+	    SET_PAR_SHIFT_LEFT;
+	    return CARRY_ON;
+	}
+
+	/* shift to the right independently on whether the right neighbor in cache or not */
+	SET_PAR_SHIFT_RIGHT;
+	return CARRY_ON;
+    }
+}
+
+
+/* Check whether current node S[h] is balanced when Decreasing its size by
+ * Deleting or Cutting for INTERNAL node of S+tree.
+ * Calculate parameters for balancing for current level h.
+ * Parameters:
+ *	tb	tree_balance structure;
+ *	h	current level of the node;
+ *	inum	item number in S[h];
+ *	mode	i - insert, p - paste;
+ * Returns:	1 - schedule occured; 
+ *	        0 - balancing for higher levels needed;
+ *	       -1 - no balancing for higher levels needed;
+ *	       -2 - no disk space.
+ *
+ * Note: Items of internal nodes have fixed size, so the balance condition for
+ * the internal part of S+tree is as for the B-trees.
+ */
+static int dc_check_balance_internal (struct tree_balance * tb, int h)
+{
+  struct virtual_node * vn = tb->tb_vn;
+
+  /* Sh is the node whose balance is currently being checked,
+     and Fh is its father.  */
+  struct buffer_head * Sh, * Fh;
+  int maxsize,
+      n_ret_value;
+  int lfree, rfree /* free space in L and R */;
+
+  Sh = PATH_H_PBUFFER (tb->tb_path, h); 
+  Fh = PATH_H_PPARENT (tb->tb_path, h); 
+
+  maxsize = MAX_CHILD_SIZE(Sh); 
+
+/*   using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
+/*   new_nr_item = number of items node would have if operation is */
+/* 	performed without balancing (new_nr_item); */
+  create_virtual_node (tb, h);
+
+  if ( ! Fh )
+    {   /* S[h] is the root. */
+      if ( vn->vn_nr_item > 0 )
+	{
+	  set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
+	  return NO_BALANCING_NEEDED; /* no balancing for higher levels needed */
+	}
+      /* new_nr_item == 0.
+       * Current root will be deleted resulting in
+       * decrementing the tree height. */
+      set_parameters (tb, h, 0, 0, 0, NULL, -1, -1);
+      return CARRY_ON;
+    }
+
+  if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON )
+    return n_ret_value;
+
+
+  /* get free space of neighbors */
+  rfree = get_rfree (tb, h);
+  lfree = get_lfree (tb, h);
+		
+  /* determine maximal number of items we can fit into neighbors */
+  check_left (tb, h, lfree);
+  check_right (tb, h, rfree);
+
+
+  if ( vn->vn_nr_item >= MIN_NR_KEY(Sh) )
+    { /* Balance condition for the internal node is valid.
+       * In this case we balance only if it leads to better packing. */ 
+      if ( vn->vn_nr_item == MIN_NR_KEY(Sh) )
+	{ /* Here we join S[h] with one of its neighbors,
+	   * which is impossible with greater values of new_nr_item. */
+	  if ( tb->lnum[h] >= vn->vn_nr_item + 1 )
+	    {
+	      /* All contents of S[h] can be moved to L[h]. */
+	      int n;
+	      int order_L;
+	      
+	      order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
+	      n = B_N_CHILD(tb->FL[h],order_L)->dc_size / (DC_SIZE + KEY_SIZE);
+	      set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1);
+	      return CARRY_ON;
+	    }
+
+	  if ( tb->rnum[h] >= vn->vn_nr_item + 1 )
+	    {
+	      /* All contents of S[h] can be moved to R[h]. */
+	      int n;
+	      int order_R;
+	    
+	      order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : n + 1;
+	      n = B_N_CHILD(tb->FR[h],order_R)->dc_size / (DC_SIZE + KEY_SIZE);
+	      set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1);
+	      return CARRY_ON;   
+	    }
+	}
+
+      if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)
+	{
+	  /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
+	  int to_r;
+
+	  to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 - 
+	    (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
+	  set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
+	  return CARRY_ON;
+	}
+
+      /* Balancing does not lead to better packing. */
+      set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
+      return NO_BALANCING_NEEDED;
+    }
+
+  /* Current node contain insufficient number of items. Balancing is required. */	
+  /* Check whether we can merge S[h] with left neighbor. */
+  if (tb->lnum[h] >= vn->vn_nr_item + 1)
+    if (is_left_neighbor_in_cache (tb,h) || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h])
+      {
+	int n;
+	int order_L;
+	      
+	order_L = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
+	n = B_N_CHILD(tb->FL[h],order_L)->dc_size / (DC_SIZE + KEY_SIZE);
+	set_parameters (tb, h, -n-1, 0, 0, NULL, -1, -1);
+	return CARRY_ON;
+      }
+
+  /* Check whether we can merge S[h] with right neighbor. */
+  if (tb->rnum[h] >= vn->vn_nr_item + 1)
+    {
+      int n;
+      int order_R;
+	    
+      order_R = ((n=PATH_H_B_ITEM_ORDER(tb->tb_path, h))==B_NR_ITEMS(Fh)) ? 0 : (n + 1);
+      n = B_N_CHILD(tb->FR[h],order_R)->dc_size / (DC_SIZE + KEY_SIZE);
+      set_parameters (tb, h, 0, -n-1, 0, NULL, -1, -1);
+      return CARRY_ON;   
+    }
+
+  /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
+  if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)
+    {
+      int to_r;
+	    
+      to_r = ((MAX_NR_KEY(Sh)<<1)+2-tb->lnum[h]-tb->rnum[h]+vn->vn_nr_item+1)/2 - 
+	(MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
+      set_parameters (tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, -1, -1);
+      return CARRY_ON;
+    }
+
+  /* For internal nodes try to borrow item from a neighbor */
+#ifdef CONFIG_REISERFS_CHECK
+  if (!tb->FL[h] && !tb->FR[h])
+    reiserfs_panic (0, "vs-8235: dc_check_balance_internal: trying to borrow for root");
+#endif
+
+  /* Borrow one or two items from caching neighbor */
+  if (is_left_neighbor_in_cache (tb,h) || !tb->FR[h])
+    {
+      int from_l;
+		
+      from_l = (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + 1) / 2 -  (vn->vn_nr_item + 1);
+      set_parameters (tb, h, -from_l, 0, 1, NULL, -1, -1);
+      return CARRY_ON;
+    }
+
+  set_parameters (tb, h, 0, -((MAX_NR_KEY(Sh)+1-tb->rnum[h]+vn->vn_nr_item+1)/2-(vn->vn_nr_item+1)), 1, 
+		  NULL, -1, -1);
+  return CARRY_ON;
+}
+
+
+/* Check whether current node S[h] is balanced when Decreasing its size by
+ * Deleting or Truncating for LEAF node of S+tree.
+ * Calculate parameters for balancing for current level h.
+ * Parameters:
+ *	tb	tree_balance structure;
+ *	h	current level of the node;
+ *	inum	item number in S[h];
+ *	mode	i - insert, p - paste;
+ * Returns:	1 - schedule occured; 
+ *	        0 - balancing for higher levels needed;
+ *	       -1 - no balancing for higher levels needed;
+ *	       -2 - no disk space.
+ */
+static int dc_check_balance_leaf (struct tree_balance * tb, int h)
+{
+  struct virtual_node * vn = tb->tb_vn;
+
+  /* Number of bytes that must be deleted from
+     (value is negative if bytes are deleted) buffer which
+     contains node being balanced.  The mnemonic is that the
+     attempted change in node space used level is levbytes bytes. */
+  int levbytes;
+  /* the maximal item size */
+  int maxsize,
+      n_ret_value;
+  /* S0 is the node whose balance is currently being checked,
+     and F0 is its father.  */
+  struct buffer_head * S0, * F0;
+  int lfree, rfree /* free space in L and R */;
+
+  S0 = PATH_H_PBUFFER (tb->tb_path, 0);
+  F0 = PATH_H_PPARENT (tb->tb_path, 0);
+
+  levbytes = tb->insert_size[h];
+
+  maxsize = MAX_CHILD_SIZE(S0); 	/* maximal possible size of an item */
+
+  if ( ! F0 )
+    {  /* S[0] is the root now. */
+
+#ifdef CONFIG_REISERFS_CHECK
+      if ( -levbytes >= maxsize - B_FREE_SPACE (S0) )
+	reiserfs_panic (tb->tb_sb, "vs-8240: dc_check_balance_leaf: attempt to create empty buffer tree");
+#endif
+
+      set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
+      return NO_BALANCING_NEEDED;
+    }
+
+  if ( (n_ret_value = get_parents(tb,h)) != CARRY_ON )
+    return n_ret_value;
+
+  /* get free space of neighbors */
+  rfree = get_rfree (tb, h);
+  lfree = get_lfree (tb, h);		
+
+  create_virtual_node (tb, h);
+
+  /* if 3 leaves can be merge to one, set parameters and return */
+  if (are_leaves_removable (tb, lfree, rfree))
+    return CARRY_ON;
+
+  /* determine maximal number of items we can shift to the left/right  neighbor
+     and the maximal number of bytes that can flow to the left/right neighbor
+     from the left/right most liquid item that cannot be shifted from S[0] entirely
+     */
+  check_left (tb, h, lfree);
+  check_right (tb, h, rfree);   
+
+  /* check whether we can merge S with left neighbor. */
+  if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
+    if (is_left_neighbor_in_cache (tb,h) ||
+	((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */
+	!tb->FR[h]) {
+      
+#ifdef CONFIG_REISERFS_CHECK
+      if (!tb->FL[h])
+	reiserfs_panic (0, "vs-8245: dc_check_balance_leaf: FL[h] must exist");
+#endif
+
+      /* set parameter to merge S[0] with its left neighbor */
+      set_parameters (tb, h, -1, 0, 0, NULL, -1, -1);
+      return CARRY_ON;
+    }
+
+  /* check whether we can merge S[0] with right neighbor. */
+  if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
+    set_parameters (tb, h, 0, -1, 0, NULL, -1, -1);
+    return CARRY_ON;
+  }
+  
+  /* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
+  if (is_leaf_removable (tb))
+    return CARRY_ON;
+  
+  /* Balancing is not required. */
+  tb->s0num = vn->vn_nr_item;
+  set_parameters (tb, h, 0, 0, 1, NULL, -1, -1);
+  return NO_BALANCING_NEEDED;
+}
+
+
+
+/* Check whether current node S[h] is balanced when Decreasing its size by
+ * Deleting or Cutting.
+ * Calculate parameters for balancing for current level h.
+ * Parameters:
+ *	tb	tree_balance structure;
+ *	h	current level of the node;
+ *	inum	item number in S[h];
+ *	mode	d - delete, c - cut.
+ * Returns:	1 - schedule occured; 
+ *	        0 - balancing for higher levels needed;
+ *	       -1 - no balancing for higher levels needed;
+ *	       -2 - no disk space.
+ */
+static int dc_check_balance (struct tree_balance * tb, int h)
+{
+
+#ifdef CONFIG_REISERFS_CHECK
+ if ( ! (PATH_H_PBUFFER (tb->tb_path, h)) )
+   reiserfs_panic(tb->tb_sb, "vs-8250: dc_check_balance: S is not initialized");
+#endif
+
+ if ( h )
+   return dc_check_balance_internal (tb, h);
+ else
+   return dc_check_balance_leaf (tb, h);
+}
+
+
+
+/* Check whether current node S[h] is balanced.
+ * Calculate parameters for balancing for current level h.
+ * Parameters:
+ *
+ *	tb	tree_balance structure:
+ *
+ *              tb is a large structure that must be read about in the header file
+ *              at the same time as this procedure if the reader is to successfully
+ *              understand this procedure
+ *
+ *	h	current level of the node;
+ *	inum	item number in S[h];
+ *	mode	i - insert, p - paste, d - delete, c - cut.
+ * Returns:	1 - schedule occured; 
+ *	        0 - balancing for higher levels needed;
+ *	       -1 - no balancing for higher levels needed;
+ *	       -2 - no disk space.
+ */
+static int check_balance (int mode, 
+			  struct tree_balance * tb,
+			  int h, 
+			  int inum,
+			  int pos_in_item,
+			  struct item_head * ins_ih,
+			  const void * data
+			  )
+{
+  struct virtual_node * vn;
+
+  vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
+  vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
+  vn->vn_mode = mode;
+  vn->vn_affected_item_num = inum;
+  vn->vn_pos_in_item = pos_in_item;
+  vn->vn_ins_ih = ins_ih;
+  vn->vn_data = data;
+
+#ifdef CONFIG_REISERFS_CHECK
+  if (mode == M_INSERT && !vn->vn_ins_ih)
+    reiserfs_panic (0, "vs-8255: check_balance: ins_ih can not be 0 in insert mode");
+#endif
+
+ if ( tb->insert_size[h] > 0 )
+   /* Calculate balance parameters when size of node is increasing. */
+   return ip_check_balance (tb, h);
+
+ /* Calculate balance parameters when  size of node is decreasing. */
+ return dc_check_balance (tb, h);
+}
+
+
+
+/* Check whether parent at the path is the really parent of the current node.*/
+static int  get_direct_parent(
+              struct tree_balance * p_s_tb,
+              int                   n_h
+            ) {
+    struct buffer_head  * p_s_bh;
+    struct path         * p_s_path      = p_s_tb->tb_path;
+    int                   n_position,
+	n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
+    
+    /* We are in the root or in the new root. */
+    if ( n_path_offset <= FIRST_PATH_ELEMENT_OFFSET ) {
+	
+#ifdef CONFIG_REISERFS_CHECK
+	if ( n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1 )
+	    reiserfs_panic(p_s_tb->tb_sb, "PAP-8260: get_direct_parent: illegal offset in the path");
+#endif
+
+	if ( PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
+	     SB_ROOT_BLOCK (p_s_tb->tb_sb) ) {
+	    /* Root is not changed. */
+	    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL;
+	    PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0;
+	    return CARRY_ON;
+	}
+	return REPEAT_SEARCH; /* Root is changed and we must recalculate the path. */
+    }
+
+    if ( ! B_IS_IN_TREE(p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)) )
+	return REPEAT_SEARCH; /* Parent in the path is not in the tree. */
+
+    if ( (n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1)) > B_NR_ITEMS(p_s_bh) )
+	return REPEAT_SEARCH;
+    
+    if ( B_N_CHILD_NUM(p_s_bh, n_position) != PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr )
+	/* Parent in the path is not parent of the current node in the tree. */
+	return REPEAT_SEARCH;
+
+    if ( buffer_locked(p_s_bh) ) {
+	__wait_on_buffer(p_s_bh);
+	if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
+	    return REPEAT_SEARCH;
+    }
+
+    return CARRY_ON; /* Parent in the path is unlocked and really parent of the current node.  */
+}
+
+
+/* Using lnum[n_h] and rnum[n_h] we should determine what neighbors
+ * of S[n_h] we
+ * need in order to balance S[n_h], and get them if necessary.
+ * Returns:	SCHEDULE_OCCURRED - schedule occured while the function worked;
+ *	        CARRY_ON - schedule didn't occur while the function worked;
+ */
+static int  get_neighbors(
+	            struct tree_balance * p_s_tb,
+	            int 		  n_h
+	          ) {
+    int		 	n_child_position,
+	n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1);
+    unsigned long		n_son_number;
+    struct super_block  *	p_s_sb = p_s_tb->tb_sb;
+    struct buffer_head  * p_s_bh;
+
+
+    if ( p_s_tb->lnum[n_h] ) {
+	/* We need left neighbor to balance S[n_h]. */
+	p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
+	
+#ifdef CONFIG_REISERFS_CHECK
+	if ( p_s_bh == p_s_tb->FL[n_h] && ! PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset) )
+	    reiserfs_panic (p_s_tb->tb_sb, "PAP-8270: get_neighbors: invalid position in the parent");
+#endif
+
+	n_child_position = ( p_s_bh == p_s_tb->FL[n_h] ) ? p_s_tb->lkey[n_h] : B_NR_ITEMS (p_s_tb->FL[n_h]);
+	n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position);
+	p_s_bh = reiserfs_bread(p_s_sb->s_dev, n_son_number, p_s_sb->s_blocksize);
+	if (!p_s_bh)
+	    return IO_ERROR;
+	if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
+	    decrement_bcount(p_s_bh);
+	    return REPEAT_SEARCH;
+	}
+	
+#ifdef CONFIG_REISERFS_CHECK
+	if ( ! B_IS_IN_TREE(p_s_tb->FL[n_h]) || n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) ||
+	     B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) != p_s_bh->b_blocknr )
+	    reiserfs_panic (p_s_tb->tb_sb, "PAP-8275: get_neighbors: invalid parent");
+	if ( ! B_IS_IN_TREE(p_s_bh) )
+	    reiserfs_panic (p_s_tb->tb_sb, "PAP-8280: get_neighbors: invalid child");
+
+	if (! n_h && B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - B_N_CHILD (p_s_tb->FL[0],n_child_position)->dc_size)
+	    reiserfs_panic (p_s_tb->tb_sb, "PAP-8290: get_neighbors: invalid child size of left neighbor");
+#endif
+
+	decrement_bcount(p_s_tb->L[n_h]);
+	p_s_tb->L[n_h] = p_s_bh;
+    }
+
+
+    if ( p_s_tb->rnum[n_h] ) { /* We need right neighbor to balance S[n_path_offset]. */
+	p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
+	
+#ifdef CONFIG_REISERFS_CHECK
+	if ( p_s_bh == p_s_tb->FR[n_h] && PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset) >= B_NR_ITEMS(p_s_bh) )
+	    reiserfs_panic (p_s_tb->tb_sb, "PAP-8295: get_neighbors: invalid position in the parent");
+#endif
+
+	n_child_position = ( p_s_bh == p_s_tb->FR[n_h] ) ? p_s_tb->rkey[n_h] + 1 : 0;
+	n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position);
+	p_s_bh = reiserfs_bread(p_s_sb->s_dev, n_son_number, p_s_sb->s_blocksize);
+	if (!p_s_bh)
+	    return IO_ERROR;
+	if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
+	    decrement_bcount(p_s_bh);
+	    return REPEAT_SEARCH;
+	}
+	decrement_bcount(p_s_tb->R[n_h]);
+	p_s_tb->R[n_h] = p_s_bh;
+
+#ifdef CONFIG_REISERFS_CHECK
+	if (! n_h && B_FREE_SPACE (p_s_bh) != MAX_CHILD_SIZE (p_s_bh) - B_N_CHILD (p_s_tb->FR[0],n_child_position)->dc_size) {
+	    reiserfs_panic (p_s_tb->tb_sb, "PAP-8300: get_neighbors: invalid child size of right neighbor (%d != %d - %d)",
+			    B_FREE_SPACE (p_s_bh), MAX_CHILD_SIZE (p_s_bh), B_N_CHILD (p_s_tb->FR[0],n_child_position)->dc_size);
+	}
+#endif
+	
+    }
+    return CARRY_ON;
+}
+
+
+void * reiserfs_kmalloc (size_t size, int flags, struct super_block * s)
+{
+    void * vp;
+    static size_t malloced;
+
+
+    vp = kmalloc (size, flags);
+    if (vp) {
+	s->u.reiserfs_sb.s_kmallocs += size;
+	if (s->u.reiserfs_sb.s_kmallocs > malloced + 200000) {
+	    reiserfs_warning ("vs-8301: reiserfs_kmalloc: allocated memory %d\n", s->u.reiserfs_sb.s_kmallocs);
+	    malloced = s->u.reiserfs_sb.s_kmallocs;
+	}
+    }
+/*printk ("malloc : size %d, allocated %d\n", size, s->u.reiserfs_sb.s_kmallocs);*/
+    return vp;
+}
+
+void reiserfs_kfree (const void * vp, size_t size, struct super_block * s)
+{
+    kfree (vp);
+  
+    s->u.reiserfs_sb.s_kmallocs -= size;
+    if (s->u.reiserfs_sb.s_kmallocs < 0)
+	reiserfs_warning ("vs-8302: reiserfs_kfree: allocated memory %d\n", s->u.reiserfs_sb.s_kmallocs);
+
+}
+
+
+static int get_virtual_node_size (struct super_block * sb, struct buffer_head * bh)
+{
+  //  int size = sizeof (struct virtual_item); /* for new item in case of insert */
+  //  int i, nr_items;
+  //  struct item_head * ih;
+
+  // this is enough for _ALL_ currently possible cases. In 4 k block
+  // one may put < 170 empty items. Each virtual item eats 12
+  // byte. The biggest direntry item may have < 256 entries. Each
+  // entry would eat 2 byte of virtual node space
+  return sb->s_blocksize;
+
+#if 0
+  size = sizeof (struct virtual_node) + sizeof (struct virtual_item);
+  ih = B_N_PITEM_HEAD (bh, 0);
+  nr_items = B_NR_ITEMS (bh);
+  for (i = 0; i < nr_items; i ++, ih ++) {
+    /* each item occupies some space in virtual node */
+    size += sizeof (struct virtual_item);
+    if (is_direntry_le_ih (ih))
+      /* each entry and new one occupeis 2 byte in the virtual node */
+      size += (le16_to_cpu (ih->u.ih_entry_count) + 1) * sizeof (__u16);
+  }
+  
+  /* 1 bit for each bitmap block to note whether bitmap block was
+     dirtied in the operation */
+ /* size += (SB_BMAP_NR (sb) * 2 / 8 + 4);*/
+  return size;
+#endif
+}
+
+
+
+/* maybe we should fail balancing we are going to perform when kmalloc
+   fails several times. But now it will loop until kmalloc gets
+   required memory */
+static int get_mem_for_virtual_node (struct tree_balance * tb)
+{
+    int check_fs = 0;
+    int size;
+    char * buf;
+
+    size = get_virtual_node_size (tb->tb_sb, PATH_PLAST_BUFFER (tb->tb_path));
+
+    if (size > tb->vn_buf_size) {
+	/* we have to allocate more memory for virtual node */
+	if (tb->vn_buf) {
+	    /* free memory allocated before */
+	    reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
+	    /* this is not needed if kfree is atomic */
+            check_fs = 1;
+	}
+
+	/* virtual node requires now more memory */
+	tb->vn_buf_size = size;
+
+	/* get memory for virtual item */
+	buf = reiserfs_kmalloc(size, GFP_ATOMIC, tb->tb_sb);
+	if ( ! buf ) {
+	    /* getting memory with GFP_KERNEL priority may involve
+               balancing now (due to indirect_to_direct conversion on
+               dcache shrinking). So, release path and collected
+               resourses here */
+	    free_buffers_in_tb (tb);
+	    buf = reiserfs_kmalloc(size, GFP_BUFFER, tb->tb_sb);
+	    if ( !buf ) {
+#ifdef CONFIG_REISERFS_CHECK
+		reiserfs_warning ("vs-8345: get_mem_for_virtual_node: "
+				  "kmalloc failed. reiserfs kmalloced %d bytes\n",
+				  tb->tb_sb->u.reiserfs_sb.s_kmallocs);
+#endif
+		tb->vn_buf_size = 0;
+	    }
+	    tb->vn_buf = buf;
+	    schedule() ;
+	    return REPEAT_SEARCH;
+	}
+
+	tb->vn_buf = buf;
+    }
+
+    if ( check_fs && FILESYSTEM_CHANGED_TB (tb) )
+        return REPEAT_SEARCH;
+
+    return CARRY_ON;
+}
+
+
+#ifdef CONFIG_REISERFS_CHECK
+static void tb_buffer_sanity_check (struct super_block * p_s_sb,
+				    struct buffer_head * p_s_bh, 
+				    const char *descr, int level) {
+  if (p_s_bh) {
+    if (atomic_read (&(p_s_bh->b_count)) <= 0) {
+
+      reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n", descr, level, p_s_bh);
+    }
+
+    if ( ! buffer_uptodate (p_s_bh) ) {
+      reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n", descr, level, p_s_bh);
+    }
+
+    if ( ! B_IS_IN_TREE (p_s_bh) ) {
+      reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n", descr, level, p_s_bh);
+    }
+
+    if (p_s_bh->b_dev != p_s_sb->s_dev || 
+	p_s_bh->b_size != p_s_sb->s_blocksize ||
+	p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
+      reiserfs_panic (p_s_sb, "tb_buffer_sanity_check(): check failed for buffer %s[%d] (%b)\n", descr, level, p_s_bh);
+    }
+  }
+}
+#endif
+
+static void clear_all_dirty_bits(struct super_block *s, 
+                                 struct buffer_head *bh) {
+  reiserfs_prepare_for_journal(s, bh, 0) ;
+}
+
+static int wait_tb_buffers_until_unlocked (struct tree_balance * p_s_tb)
+{
+    struct buffer_head * locked;
+#ifdef CONFIG_REISERFS_CHECK
+    int repeat_counter = 0;
+#endif
+    int i;
+
+    do {
+
+	locked = NULL;
+
+	for ( i = p_s_tb->tb_path->path_length; !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i-- ) {
+	    if ( PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i) ) {
+		/* if I understand correctly, we can only be sure the last buffer
+		** in the path is in the tree --clm
+		*/
+#ifdef CONFIG_REISERFS_CHECK
+		if (PATH_PLAST_BUFFER(p_s_tb->tb_path) ==
+		    PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
+		    tb_buffer_sanity_check (p_s_tb->tb_sb, 
+					    PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i), 
+					    "S", 
+					    p_s_tb->tb_path->path_length - i);
+		}
+#endif
+		clear_all_dirty_bits(p_s_tb->tb_sb, 
+				     PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i)) ;
+
+		if ( buffer_locked (PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i)) )
+		    locked = PATH_OFFSET_PBUFFER (p_s_tb->tb_path, i);
+	    }
+	}
+
+	for ( i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i]; i++ ) { 
+
+	    if (p_s_tb->lnum[i] ) {
+
+		if ( p_s_tb->L[i] ) {
+#ifdef CONFIG_REISERFS_CHECK
+		    tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->L[i], "L", i);
+#endif
+		    clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->L[i]) ;
+		    if ( buffer_locked (p_s_tb->L[i]) )
+			locked = p_s_tb->L[i];
+		}
+
+		if ( !locked && p_s_tb->FL[i] ) {
+#ifdef CONFIG_REISERFS_CHECK
+		    tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FL[i], "FL", i);
+#endif
+		    clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FL[i]) ;
+		    if ( buffer_locked (p_s_tb->FL[i]) )
+			locked = p_s_tb->FL[i];
+		}
+
+		if ( !locked && p_s_tb->CFL[i] ) {
+#ifdef CONFIG_REISERFS_CHECK
+		    tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFL[i], "CFL", i);
+#endif
+		    clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFL[i]) ;
+		    if ( buffer_locked (p_s_tb->CFL[i]) )
+			locked = p_s_tb->CFL[i];
+		}
+
+	    }
+
+	    if ( !locked && (p_s_tb->rnum[i]) ) {
+
+		if ( p_s_tb->R[i] ) {
+#ifdef CONFIG_REISERFS_CHECK
+		    tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->R[i], "R", i);
+#endif
+		    clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->R[i]) ;
+		    if ( buffer_locked (p_s_tb->R[i]) )
+			locked = p_s_tb->R[i];
+		}
+
+       
+		if ( !locked && p_s_tb->FR[i] ) {
+#ifdef CONFIG_REISERFS_CHECK
+		    tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->FR[i], "FR", i);
+#endif
+		    clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FR[i]) ;
+		    if ( buffer_locked (p_s_tb->FR[i]) )
+			locked = p_s_tb->FR[i];
+		}
+
+		if ( !locked && p_s_tb->CFR[i] ) {
+#ifdef CONFIG_REISERFS_CHECK
+		    tb_buffer_sanity_check (p_s_tb->tb_sb, p_s_tb->CFR[i], "CFR", i);
+#endif
+		    clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->CFR[i]) ;
+		    if ( buffer_locked (p_s_tb->CFR[i]) )
+			locked = p_s_tb->CFR[i];
+		}
+	    }
+	}
+	/* as far as I can tell, this is not required.  The FEB list seems
+	** to be full of newly allocated nodes, which will never be locked,
+	** dirty, or anything else.
+	** To be safe, I'm putting in the checks and waits in.  For the moment,
+	** they are needed to keep the code in journal.c from complaining
+	** about the buffer.  That code is inside CONFIG_REISERFS_CHECK as well.
+	** --clm
+	*/
+	for ( i = 0; !locked && i < MAX_FEB_SIZE; i++ ) { 
+	    if ( p_s_tb->FEB[i] ) {
+		clear_all_dirty_bits(p_s_tb->tb_sb, p_s_tb->FEB[i]) ;
+		if (buffer_locked(p_s_tb->FEB[i])) {
+		    locked = p_s_tb->FEB[i] ;
+		}
+	    }
+	}
+
+	if (locked) {
+#ifdef CONFIG_REISERFS_CHECK
+	    repeat_counter++;
+	    if ( (repeat_counter % 10000) == 0) {
+		reiserfs_warning ("wait_tb_buffers_until_released(): too many iterations waiting for buffer to unlock (%b)\n", locked);
+
+		/* Don't loop forever.  Try to recover from possible error. */
+
+		return ( FILESYSTEM_CHANGED_TB (p_s_tb) ) ? REPEAT_SEARCH : CARRY_ON;
+	    }
+#endif
+	    __wait_on_buffer (locked);
+	    if ( FILESYSTEM_CHANGED_TB (p_s_tb) ) {
+		return REPEAT_SEARCH;
+	    }
+	}
+
+    } while (locked);
+
+    return CARRY_ON;
+}
+
+
+/* Prepare for balancing, that is
+ *	get all necessary parents, and neighbors;
+ *	analyze what and where should be moved;
+ *	get sufficient number of new nodes;
+ * Balancing will start only after all resources will be collected at a time.
+ * 
+ * When ported to SMP kernels, only at the last moment after all needed nodes
+ * are collected in cache, will the resources be locked using the usual
+ * textbook ordered lock acquisition algorithms.  Note that ensuring that
+ * this code neither write locks what it does not need to write lock nor locks out of order
+ * will be a pain in the butt that could have been avoided.  Grumble grumble. -Hans
+ * 
+ * fix is meant in the sense of render unchanging
+ * 
+ * Latency might be improved by first gathering a list of what buffers are needed
+ * and then getting as many of them in parallel as possible? -Hans
+ *
+ * Parameters:
+ *	op_mode	i - insert, d - delete, c - cut (truncate), p - paste (append)
+ *	tb	tree_balance structure;
+ *	inum	item number in S[h];
+ *      pos_in_item - comment this if you can
+ *      ins_ih & ins_sd are used when inserting
+ * Returns:	1 - schedule occurred while the function worked;
+ *	        0 - schedule didn't occur while the function worked;
+ *             -1 - if no_disk_space 
+ */
+
+
+int fix_nodes (int n_op_mode,
+	       struct tree_balance * 	p_s_tb,
+	       struct item_head * p_s_ins_ih, // item head of item being inserted
+	       const void * data // inserted item or data to be pasted
+    ) {
+    int	n_ret_value,
+    	n_h,
+    	n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path);
+    int n_pos_in_item;
+
+    /* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
+    ** during wait_tb_buffers_run
+    */
+    int wait_tb_buffers_run = 0 ; 
+    int windex ;
+    struct buffer_head  * p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path);
+
+    n_pos_in_item = p_s_tb->tb_path->pos_in_item;
+
+
+    p_s_tb->fs_gen = get_generation (p_s_tb->tb_sb);
+
+    /* we prepare and log the super here so it will already be in the
+    ** transaction when do_balance needs to change it.
+    ** This way do_balance won't have to schedule when trying to prepare
+    ** the super for logging
+    */
+    reiserfs_prepare_for_journal(p_s_tb->tb_sb, 
+                                 SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1) ;
+    journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb, 
+                       SB_BUFFER_WITH_SB(p_s_tb->tb_sb)) ;
+    if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
+	return REPEAT_SEARCH;
+
+    /* if it possible in indirect_to_direct conversion */
+    if (buffer_locked (p_s_tbS0)) {
+        __wait_on_buffer (p_s_tbS0);
+        if ( FILESYSTEM_CHANGED_TB (p_s_tb) )
+            return REPEAT_SEARCH;
+    }
+
+#ifndef __KERNEL__
+    if ( atomic_read (&(p_s_tbS0->b_count)) > 1 || 
+	 (p_s_tb->L[0] && atomic_read (&(p_s_tb->L[0]->b_count)) > 1) ||
+	 (p_s_tb->R[0] && atomic_read (&(p_s_tb->R[0]->b_count)) > 1) ) {
+	printk ("mode=%c, insert_size=%d\n", n_op_mode, p_s_tb->insert_size[0]);
+	print_cur_tb ("first three parameters are invalid");
+	reiserfs_panic (p_s_tb->tb_sb, "PAP-8310: fix_nodes: all buffers must be hold once in one thread processing");
+    }
+#endif
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( cur_tb ) {
+	print_cur_tb ("fix_nodes");
+	reiserfs_panic(p_s_tb->tb_sb,"PAP-8305: fix_nodes:  there is pending do_balance");
+    }
+
+    if (!buffer_uptodate (p_s_tbS0) || !B_IS_IN_TREE (p_s_tbS0)) {
+	reiserfs_panic (p_s_tb->tb_sb, "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate "
+			"at the beginning of fix_nodes or not in tree (mode %c)", p_s_tbS0, p_s_tbS0, n_op_mode);
+    }
+
+    // FIXME: new items have to be of 8 byte multiples. Including new
+    // directory items those look like old ones
+    /*
+    if (p_s_tb->insert_size[0] % 8)
+	reiserfs_panic (p_s_tb->tb_sb, "vs-: fix_nodes: incorrect insert_size %d, "
+			"mode %c",
+			p_s_tb->insert_size[0], n_op_mode);
+    */
+
+    /* Check parameters. */
+    switch (n_op_mode) {
+#ifdef REISERFS_FSCK
+    case M_INTERNAL:
+	break;
+    case M_INSERT:
+	if ( n_item_num < 0 || n_item_num > B_NR_ITEMS(p_s_tbS0) )
+	    reiserfs_panic(p_s_tb->tb_sb,"PAP-8325: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
+			   n_item_num, B_NR_ITEMS(p_s_tbS0));
+#else
+    case M_INSERT:
+	if ( n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0) )
+	    reiserfs_panic(p_s_tb->tb_sb,"PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
+			   n_item_num, B_NR_ITEMS(p_s_tbS0));
+#endif
+	break;
+    case M_PASTE:
+    case M_DELETE:
+    case M_CUT:
+	if ( n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0) ) {
+	    print_block (p_s_tbS0, 0, -1, -1);
+	    printk("mode = %c insert_size = %d\n", n_op_mode, p_s_tb->insert_size[0]);
+	    reiserfs_panic(p_s_tb->tb_sb,"PAP-8335: fix_nodes: Incorrect item number(%d)", n_item_num);
+	}
+	break;
+    default:
+	reiserfs_panic(p_s_tb->tb_sb,"PAP-8340: fix_nodes: Incorrect mode of operation");
+    }
+#endif
+
+    if (get_mem_for_virtual_node (p_s_tb) == REPEAT_SEARCH)
+	// FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
+	return REPEAT_SEARCH;
+
+
+    /* Starting from the leaf level; for all levels n_h of the tree. */
+    for ( n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++ ) { 
+	if ( (n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON ) {
+	    goto repeat;
+	    return n_ret_value;
+	}
+
+	if ( (n_ret_value = check_balance (n_op_mode, p_s_tb, n_h, n_item_num,
+					   n_pos_in_item, p_s_ins_ih, data)) != CARRY_ON ) {
+	    if ( n_ret_value == NO_BALANCING_NEEDED ) {
+		/* No balancing for higher levels needed. */
+		if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) {
+		    goto repeat;
+		    return n_ret_value;
+		}
+		if ( n_h != MAX_HEIGHT - 1 )  
+		    p_s_tb->insert_size[n_h + 1] = 0;
+		/* ok, analysis and resource gathering are complete */
+		break;
+	    }
+	    goto repeat;
+	    return n_ret_value;
+	}
+
+	if ( (n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON ) {
+	    goto repeat;
+	    return n_ret_value;
+	}
+
+	if ( (n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON ) {
+	    goto repeat;
+	    return n_ret_value; /* No disk space, or schedule occurred and
+				   analysis may be invalid and needs to be redone. */
+	}
+    
+	if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h) ) {
+	    /* We have a positive insert size but no nodes exist on this
+	       level, this means that we are creating a new root. */
+
+#ifdef CONFIG_REISERFS_CHECK
+	    if ( p_s_tb->blknum[n_h] != 1 )
+		reiserfs_panic(p_s_tb->tb_sb,"PAP-8350: fix_nodes: creating new empty root");
+#endif /* CONFIG_REISERFS_CHECK */
+
+	    if ( n_h < MAX_HEIGHT - 1 )
+		p_s_tb->insert_size[n_h + 1] = 0;
+	}
+	else
+	    if ( ! PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1) ) {
+		if ( p_s_tb->blknum[n_h] > 1 ) {
+		    /* The tree needs to be grown, so this node S[n_h]
+		       which is the root node is split into two nodes,
+		       and a new node (S[n_h+1]) will be created to
+		       become the root node.  */
+	  
+#ifdef CONFIG_REISERFS_CHECK
+		    if ( n_h == MAX_HEIGHT - 1 )
+			reiserfs_panic(p_s_tb->tb_sb, "PAP-8355: fix_nodes: attempt to create too high of a tree");
+#endif /* CONFIG_REISERFS_CHECK */
+
+		    p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) + DC_SIZE;
+		}
+		else
+		    if ( n_h < MAX_HEIGHT - 1 )
+			p_s_tb->insert_size[n_h + 1] = 0;
+	    }
+	    else
+		p_s_tb->insert_size[n_h + 1] = (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1);
+    }
+
+    
+    windex = push_journal_writer("fix_nodes") ;
+    if ((n_ret_value = wait_tb_buffers_until_unlocked (p_s_tb)) == CARRY_ON) {
+	pop_journal_writer(windex) ;
+	if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
+	    wait_tb_buffers_run = 1 ;
+	    n_ret_value = REPEAT_SEARCH ;
+	    goto repeat; 
+	} else {
+	    return CARRY_ON;
+	}
+    } else {
+	wait_tb_buffers_run = 1 ;
+	pop_journal_writer(windex) ;
+	goto repeat; 
+    }
+
+ repeat:
+    // fix_nodes was unable to perform its calculation due to
+    // filesystem got changed under us, lack of free disk space or i/o
+    // failure. If the first is the case - the search will be
+    // repeated. For now - free all resources acquired so far except
+    // for the new allocated nodes
+    {
+	int i;
+
+	/* Release path buffers. */
+	if (wait_tb_buffers_run) {
+	    pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path) ;
+	} else {
+	    pathrelse (p_s_tb->tb_path);
+        }	
+	/* brelse all resources collected for balancing */
+	for ( i = 0; i < MAX_HEIGHT; i++ ) {
+	    if (wait_tb_buffers_run) {
+		reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->L[i]);
+		reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->R[i]);
+		reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FL[i]);
+		reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->FR[i]);
+		reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFL[i]);
+		reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, p_s_tb->CFR[i]);
+	    }
+
+	    brelse (p_s_tb->L[i]);p_s_tb->L[i] = 0;
+	    brelse (p_s_tb->R[i]);p_s_tb->R[i] = 0;
+	    brelse (p_s_tb->FL[i]);p_s_tb->FL[i] = 0;
+	    brelse (p_s_tb->FR[i]);p_s_tb->FR[i] = 0;
+	    brelse (p_s_tb->CFL[i]);p_s_tb->CFL[i] = 0;
+	    brelse (p_s_tb->CFR[i]);p_s_tb->CFR[i] = 0;
+	}
+
+	if (wait_tb_buffers_run) {
+	    for ( i = 0; i < MAX_FEB_SIZE; i++ ) { 
+		if ( p_s_tb->FEB[i] ) {
+		    reiserfs_restore_prepared_buffer(p_s_tb->tb_sb, 
+						     p_s_tb->FEB[i]) ;
+		}
+	    }
+	}
+	return n_ret_value;
+    }
+
+}
+
+
+/* Anatoly will probably forgive me renaming p_s_tb to tb. I just
+   wanted to make lines shorter */
+void unfix_nodes (struct tree_balance * tb)
+{
+    int	i;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( ! tb->vn_buf )
+	reiserfs_panic (tb->tb_sb,
+			"PAP-16050: unfix_nodes: pointer to the virtual node is NULL");
+#endif
+
+    /* Release path buffers. */
+    pathrelse_and_restore (tb->tb_sb, tb->tb_path);
+
+    /* brelse all resources collected for balancing */
+    for ( i = 0; i < MAX_HEIGHT; i++ ) {
+	reiserfs_restore_prepared_buffer (tb->tb_sb, tb->L[i]);
+	reiserfs_restore_prepared_buffer (tb->tb_sb, tb->R[i]);
+	reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FL[i]);
+	reiserfs_restore_prepared_buffer (tb->tb_sb, tb->FR[i]);
+	reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFL[i]);
+	reiserfs_restore_prepared_buffer (tb->tb_sb, tb->CFR[i]);
+
+	brelse (tb->L[i]);
+	brelse (tb->R[i]);
+	brelse (tb->FL[i]);
+	brelse (tb->FR[i]);
+	brelse (tb->CFL[i]);
+	brelse (tb->CFR[i]);
+    }
+
+    /* deal with list of allocated (used and unused) nodes */
+    for ( i = 0; i < MAX_FEB_SIZE; i++ ) {
+	if ( tb->FEB[i] ) {
+	    unsigned long blocknr  = tb->FEB[i]->b_blocknr ;
+	    /* de-allocated block which was not used by balancing and
+               bforget about buffer for it */
+	    brelse (tb->FEB[i]);
+	    reiserfs_free_block (tb->transaction_handle, blocknr);
+	}
+	if (tb->used[i]) {
+	    /* release used as new nodes including a new root */
+	    brelse (tb->used[i]);
+	}
+    }
+
+#if 0 /* shouldn't this be in CONFIG_REISERFS_CHECK??? */
+    /* make sure, that all we have released got really freed */
+    for (i = 0; i < sizeof (tb->thrown) / sizeof (tb->thrown[0]); i ++)
+	if (tb->thrown[i]) {
+	    if (atomic_read (&(tb->thrown[i]->b_count))) {
+		/* the log will have the count at one and the buffers marked */
+		if (atomic_read(&(tb->thrown[i]->b_count)) > 1 || 
+		    !(buffer_journaled(tb->thrown[i]) || 
+		      buffer_journal_dirty(tb->thrown[i]))) {
+		    foo_print (tb->thrown[i], tb->tb_sb);
+		    printk ("unfix_nodes: Waiting...(block %lu, count %d)\n", 
+			    tb->thrown[i]->b_blocknr, 
+			    atomic_read (&(tb->thrown[i]->b_count)));
+		    wait_buffer_until_released (tb->thrown[i]);
+		    printk ("unfix_nodes: Done (block %lu, count %d)\n", 
+			    tb->thrown[i]->b_blocknr, 
+			    atomic_read (&(tb->thrown[i]->b_count)));
+		}
+	    }
+	}
+#endif /* 0 */
+    reiserfs_kfree (tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
+
+} 
+
+
+
+#ifndef REISERFS_FSCK
+
+// is_left_mergeable is now one of the item methods
+
+#else
+
+// this works only in fsck
+
+int are_items_mergeable (struct item_head * left, struct item_head * right, int bsize)
+{
+  if (comp_keys (&left->ih_key, &right->ih_key) != -1) {
+    reiserfs_panic (0, "vs-16070: are_items_mergeable: left %k, right %k", &(left->ih_key), &(right->ih_key));
+  }
+
+  if (comp_short_keys (&left->ih_key, &right->ih_key))
+    return 0;
+
+  if (I_IS_DIRECTORY_ITEM (left)) {
+    return 1;
+  }
+
+  if ((I_IS_DIRECT_ITEM (left) && I_IS_DIRECT_ITEM (right)) || 
+      (I_IS_INDIRECT_ITEM (left) && I_IS_INDIRECT_ITEM (right)))
+    return (left->ih_key.k_offset + I_BYTES_NUMBER (left, bsize) == right->ih_key.k_offset) ? 1 : 0;
+
+  return 0;
+}
+
+/* get left neighbor of the leaf node */
+static struct buffer_head * get_left_neighbor (struct super_block * s, struct path * path)
+{
+  struct key key;
+  INITIALIZE_PATH (path_to_left_neighbor);
+  struct buffer_head * bh;
+
+  copy_key (&key, B_N_PKEY (PATH_PLAST_BUFFER (path), 0));
+  decrement_key (&key);
+
+/*  init_path (&path_to_left_neighbor);*/
+  search_by_key (s, &key, &path_to_left_neighbor, DISK_LEAF_NODE_LEVEL, READ_BLOCKS);
+  // FIXME: fsck is to handle I/O failures somehow as well
+  if (PATH_LAST_POSITION (&path_to_left_neighbor) == 0) {
+    pathrelse (&path_to_left_neighbor);
+    return 0;
+  }
+  bh = PATH_PLAST_BUFFER (&path_to_left_neighbor);
+  bh->b_count ++;
+  pathrelse (&path_to_left_neighbor);
+  return bh;
+}
+
+extern struct key  MIN_KEY;
+static struct buffer_head * get_right_neighbor (struct super_block * s, struct path * path)
+{
+  struct key key;
+  struct key * rkey;
+  INITIALIZE_PATH (path_to_right_neighbor);
+  struct buffer_head * bh;
+
+  rkey = get_rkey (path, s);
+  if (comp_keys (rkey, &MIN_KEY) == 0)
+    reiserfs_panic (s, "vs-16080: get_right_neighbor: get_rkey returned min key (path has changed)");
+  copy_key (&key, rkey);
+
+  
+  /*init_path (&path_to_right_neighbor);*/
+  search_by_key (s, &key, &path_to_right_neighbor, DISK_LEAF_NODE_LEVEL, READ_BLOCKS);
+  if (PATH_PLAST_BUFFER (&path_to_right_neighbor) == PATH_PLAST_BUFFER (path)) {
+    pathrelse (&path_to_right_neighbor);
+    return 0;
+  }
+  bh = PATH_PLAST_BUFFER (&path_to_right_neighbor);
+  bh->b_count ++;
+  pathrelse (&path_to_right_neighbor);
+  return bh;
+}
+
+
+int is_left_mergeable (struct super_block * s, struct path * path)
+{
+  struct item_head * right;
+  struct buffer_head * bh;
+  int retval;
+  
+  right = B_N_PITEM_HEAD (PATH_PLAST_BUFFER (path), 0);
+
+  bh = get_left_neighbor (s, path);
+  if (bh == 0) {
+    return 0;
+  }
+  retval = are_items_mergeable (B_N_PITEM_HEAD (bh, B_NR_ITEMS (bh) - 1), right, bh->b_size);
+  brelse (bh);
+  return retval;
+}
+
+
+int is_right_mergeable (struct super_block * s, struct path * path)
+{
+  struct item_head * left;
+  struct buffer_head * bh;
+  int retval;
+  
+  left = B_N_PITEM_HEAD (PATH_PLAST_BUFFER (path), B_NR_ITEMS (PATH_PLAST_BUFFER (path)) - 1);
+
+  bh = get_right_neighbor (s, path);
+  if (bh == 0) {
+    return 0;
+  }
+  retval = are_items_mergeable (left, B_N_PITEM_HEAD (bh, 0), bh->b_size);
+  brelse (bh);
+  return retval;
+}
+
+#endif /* REISERFS_FSCK */
+
+
+
+
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/hashes.c linux/fs/reiserfs/hashes.c
--- v2.4.0/linux/fs/reiserfs/hashes.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/hashes.c	Mon Jan 15 12:42:32 2001
@@ -0,0 +1,226 @@
+
+/*
+ * Keyed 32-bit hash function using TEA in a Davis-Meyer function
+ *   H0 = Key
+ *   Hi = E Mi(Hi-1) + Hi-1
+ *
+ * (see Applied Cryptography, 2nd edition, p448).
+ *
+ * Jeremy Fitzhardinge <jeremy@zip.com.au> 1998
+ * 
+ * Jeremy has agreed to the contents of reiserfs/README. -Hans
+ * Yura's function is added (04/07/2000)
+ */
+
+//
+// keyed_hash
+// yura_hash
+// r5_hash
+//
+
+#include <asm/types.h>
+
+
+
+#define DELTA 0x9E3779B9
+#define FULLROUNDS 10		/* 32 is overkill, 16 is strong crypto */
+#define PARTROUNDS 6		/* 6 gets complete mixing */
+
+#ifndef __KERNEL__
+typedef __u32 u32;
+#endif
+
+/* a, b, c, d - data; h0, h1 - accumulated hash */
+#define TEACORE(rounds)							\
+	do {								\
+		u32 sum = 0;						\
+		int n = rounds;						\
+		u32 b0, b1;						\
+									\
+		b0 = h0;						\
+		b1 = h1;						\
+									\
+		do							\
+		{							\
+			sum += DELTA;					\
+			b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);	\
+			b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);	\
+		} while(--n);						\
+									\
+		h0 += b0;						\
+		h1 += b1;						\
+	} while(0)
+
+
+u32 keyed_hash(const char *msg, int len)
+{
+	u32 k[] = { 0x9464a485, 0x542e1a94, 0x3e846bff, 0xb75bcfc3}; 
+
+	u32 h0 = k[0], h1 = k[1];
+	u32 a, b, c, d;
+	u32 pad;
+	int i;
+ 
+
+	//	assert(len >= 0 && len < 256);
+
+	pad = (u32)len | ((u32)len << 8);
+	pad |= pad << 16;
+
+	while(len >= 16)
+	{
+		a = (u32)msg[ 0]      |
+		    (u32)msg[ 1] << 8 |
+		    (u32)msg[ 2] << 16|
+		    (u32)msg[ 3] << 24;
+		b = (u32)msg[ 4]      |
+		    (u32)msg[ 5] << 8 |
+		    (u32)msg[ 6] << 16|
+		    (u32)msg[ 7] << 24;
+		c = (u32)msg[ 8]      |
+		    (u32)msg[ 9] << 8 |
+		    (u32)msg[10] << 16|
+		    (u32)msg[11] << 24;
+		d = (u32)msg[12]      |
+		    (u32)msg[13] << 8 |
+		    (u32)msg[14] << 16|
+		    (u32)msg[15] << 24;
+		
+		TEACORE(PARTROUNDS);
+
+		len -= 16;
+		msg += 16;
+	}
+
+	if (len >= 12)
+	{
+	    	//assert(len < 16);
+		if (len >= 16)
+		    *(int *)0 = 0;
+
+		a = (u32)msg[ 0]      |
+		    (u32)msg[ 1] << 8 |
+		    (u32)msg[ 2] << 16|
+		    (u32)msg[ 3] << 24;
+		b = (u32)msg[ 4]      |
+		    (u32)msg[ 5] << 8 |
+		    (u32)msg[ 6] << 16|
+		    (u32)msg[ 7] << 24;
+		c = (u32)msg[ 8]      |
+		    (u32)msg[ 9] << 8 |
+		    (u32)msg[10] << 16|
+		    (u32)msg[11] << 24;
+
+		d = pad;
+		for(i = 12; i < len; i++)
+		{
+			d <<= 8;
+			d |= msg[i];
+		}
+	}
+	else if (len >= 8)
+	{
+	    	//assert(len < 12);
+		if (len >= 12)
+		    *(int *)0 = 0;
+		a = (u32)msg[ 0]      |
+		    (u32)msg[ 1] << 8 |
+		    (u32)msg[ 2] << 16|
+		    (u32)msg[ 3] << 24;
+		b = (u32)msg[ 4]      |
+		    (u32)msg[ 5] << 8 |
+		    (u32)msg[ 6] << 16|
+		    (u32)msg[ 7] << 24;
+
+		c = d = pad;
+		for(i = 8; i < len; i++)
+		{
+			c <<= 8;
+			c |= msg[i];
+		}
+	}
+	else if (len >= 4)
+	{
+	    	//assert(len < 8);
+		if (len >= 8)
+		    *(int *)0 = 0;
+		a = (u32)msg[ 0]      |
+		    (u32)msg[ 1] << 8 |
+		    (u32)msg[ 2] << 16|
+		    (u32)msg[ 3] << 24;
+
+		b = c = d = pad;
+		for(i = 4; i < len; i++)
+		{
+			b <<= 8;
+			b |= msg[i];
+		}
+	}
+	else
+	{
+	    	//assert(len < 4);
+		if (len >= 4)
+		    *(int *)0 = 0;
+		a = b = c = d = pad;
+		for(i = 0; i < len; i++)
+		{
+			a <<= 8;
+			a |= msg[i];
+		}
+	}
+
+	TEACORE(FULLROUNDS);
+
+/*	return 0;*/
+	return h0^h1;
+}
+
+/* What follows in this file is copyright 2000 by Hans Reiser, and the
+ * licensing of what follows is governed by reiserfs/README */
+
+u32 yura_hash (const char *msg, int len)
+{
+    int j, pow;
+    u32 a, c;
+    int i;
+    
+    for (pow=1,i=1; i < len; i++) pow = pow * 10; 
+    
+    if (len == 1) 
+	a = msg[0]-48;
+    else
+	a = (msg[0] - 48) * pow;
+    
+    for (i=1; i < len; i++) {
+	c = msg[i] - 48; 
+	for (pow=1,j=i; j < len-1; j++) pow = pow * 10; 
+	a = a + c * pow;
+    }
+    
+    for (; i < 40; i++) {
+	c = '0' - 48; 
+	for (pow=1,j=i; j < len-1; j++) pow = pow * 10; 
+	a = a + c * pow;
+    }
+    
+    for (; i < 256; i++) {
+	c = i; 
+	for (pow=1,j=i; j < len-1; j++) pow = pow * 10; 
+	a = a + c * pow;
+    }
+    
+    a = a << 7;
+    return a;
+}
+
+u32 r5_hash (const char *msg, int len)
+{
+  u32 a=0;
+  while(*msg) { 
+    a += *msg << 4;
+    a += *msg >> 4;
+    a *= 11;
+    msg++;
+   } 
+  return a;
+}
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/ibalance.c linux/fs/reiserfs/ibalance.c
--- v2.4.0/linux/fs/reiserfs/ibalance.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/ibalance.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,1140 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/reiserfs_fs.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+
+/* this is one and only function that is used outside (do_balance.c) */
+int	balance_internal (
+			  struct tree_balance * ,
+			  int,
+			  int,
+			  struct item_head * ,
+			  struct buffer_head ** 
+			  );
+
+/* modes of internal_shift_left, internal_shift_right and internal_insert_childs */
+#define INTERNAL_SHIFT_FROM_S_TO_L 0
+#define INTERNAL_SHIFT_FROM_R_TO_S 1
+#define INTERNAL_SHIFT_FROM_L_TO_S 2
+#define INTERNAL_SHIFT_FROM_S_TO_R 3
+#define INTERNAL_INSERT_TO_S 4
+#define INTERNAL_INSERT_TO_L 5
+#define INTERNAL_INSERT_TO_R 6
+
+static void	internal_define_dest_src_infos (
+						int shift_mode,
+						struct tree_balance * tb,
+						int h,
+						struct buffer_info * dest_bi,
+						struct buffer_info * src_bi,
+						int * d_key,
+						struct buffer_head ** cf
+						)
+{
+#ifdef CONFIG_REISERFS_CHECK
+    memset (dest_bi, 0, sizeof (struct buffer_info));
+    memset (src_bi, 0, sizeof (struct buffer_info));
+#endif
+    /* define dest, src, dest parent, dest position */
+    switch (shift_mode) {
+    case INTERNAL_SHIFT_FROM_S_TO_L:	/* used in internal_shift_left */
+	src_bi->tb = tb;
+	src_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h);
+	src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h);
+	src_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1);
+	dest_bi->tb = tb;
+	dest_bi->bi_bh = tb->L[h];
+	dest_bi->bi_parent = tb->FL[h];
+	dest_bi->bi_position = get_left_neighbor_position (tb, h);
+	*d_key = tb->lkey[h];
+	*cf = tb->CFL[h];
+	break;
+    case INTERNAL_SHIFT_FROM_L_TO_S:
+	src_bi->tb = tb;
+	src_bi->bi_bh = tb->L[h];
+	src_bi->bi_parent = tb->FL[h];
+	src_bi->bi_position = get_left_neighbor_position (tb, h);
+	dest_bi->tb = tb;
+	dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h);
+	dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h);
+	dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1); /* dest position is analog of dest->b_item_order */
+	*d_key = tb->lkey[h];
+	*cf = tb->CFL[h];
+	break;
+      
+    case INTERNAL_SHIFT_FROM_R_TO_S:	/* used in internal_shift_left */
+	src_bi->tb = tb;
+	src_bi->bi_bh = tb->R[h];
+	src_bi->bi_parent = tb->FR[h];
+	src_bi->bi_position = get_right_neighbor_position (tb, h);
+	dest_bi->tb = tb;
+	dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h);
+	dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h);
+	dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1);
+	*d_key = tb->rkey[h];
+	*cf = tb->CFR[h];
+	break;
+
+    case INTERNAL_SHIFT_FROM_S_TO_R:
+	src_bi->tb = tb;
+	src_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h);
+	src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h);
+	src_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1);
+	dest_bi->tb = tb;
+	dest_bi->bi_bh = tb->R[h];
+	dest_bi->bi_parent = tb->FR[h];
+	dest_bi->bi_position = get_right_neighbor_position (tb, h);
+	*d_key = tb->rkey[h];
+	*cf = tb->CFR[h];
+	break;
+
+    case INTERNAL_INSERT_TO_L:
+	dest_bi->tb = tb;
+	dest_bi->bi_bh = tb->L[h];
+	dest_bi->bi_parent = tb->FL[h];
+	dest_bi->bi_position = get_left_neighbor_position (tb, h);
+	break;
+	
+    case INTERNAL_INSERT_TO_S:
+	dest_bi->tb = tb;
+	dest_bi->bi_bh = PATH_H_PBUFFER (tb->tb_path, h);
+	dest_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, h);
+	dest_bi->bi_position = PATH_H_POSITION (tb->tb_path, h + 1);
+	break;
+
+    case INTERNAL_INSERT_TO_R:
+	dest_bi->tb = tb;
+	dest_bi->bi_bh = tb->R[h];
+	dest_bi->bi_parent = tb->FR[h];
+	dest_bi->bi_position = get_right_neighbor_position (tb, h);
+	break;
+
+    default:
+	reiserfs_panic (tb->tb_sb, "internal_define_dest_src_infos", "shift type is unknown (%d)", shift_mode);
+    }
+}
+
+
+
+/* Insert count node pointers into buffer cur before position to + 1.
+ * Insert count items into buffer cur before position to.
+ * Items and node pointers are specified by inserted and bh respectively.
+ */ 
+static void internal_insert_childs (struct buffer_info * cur_bi,
+				    int to, int count,
+				    struct item_head * inserted,
+				    struct buffer_head ** bh
+    )
+{
+    struct buffer_head * cur = cur_bi->bi_bh;
+    struct block_head * blkh;
+    int nr;
+    struct key * ih;
+    struct disk_child new_dc[2];
+    struct disk_child * dc;
+    int i;
+
+    if (count <= 0)
+	return;
+
+    nr = le16_to_cpu ((blkh = B_BLK_HEAD(cur))->blk_nr_item);
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (count > 2)
+	reiserfs_panic (0, "internal_insert_childs", "too many children (%d) are to be inserted", count);
+    if (B_FREE_SPACE (cur) < count * (KEY_SIZE + DC_SIZE))
+	reiserfs_panic (0, "internal_insert_childs", "no enough free space (%d), needed %d bytes", 
+			B_FREE_SPACE (cur), count * (KEY_SIZE + DC_SIZE));
+#endif /* CONFIG_REISERFS_CHECK */
+
+    /* prepare space for count disk_child */
+    dc = B_N_CHILD(cur,to+1);
+
+    memmove (dc + count, dc, (nr+1-(to+1)) * DC_SIZE);
+
+    /* copy to_be_insert disk children */
+    for (i = 0; i < count; i ++) {
+	new_dc[i].dc_size =
+	    cpu_to_le16 (MAX_CHILD_SIZE(bh[i]) - B_FREE_SPACE (bh[i]));
+	new_dc[i].dc_block_number = cpu_to_le32 (bh[i]->b_blocknr);
+    }
+    memcpy (dc, new_dc, DC_SIZE * count);
+
+  
+    /* prepare space for count items  */
+    ih = B_N_PDELIM_KEY (cur, ((to == -1) ? 0 : to));
+
+    memmove (ih + count, ih, (nr - to) * KEY_SIZE + (nr + 1 + count) * DC_SIZE);
+
+    /* copy item headers (keys) */
+    memcpy (ih, inserted, KEY_SIZE);
+    if ( count > 1 )
+	memcpy (ih + 1, inserted + 1, KEY_SIZE);
+
+    /* sizes, item number */
+    blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) + count);
+    blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - count * (DC_SIZE + KEY_SIZE));
+
+    do_balance_mark_internal_dirty (cur_bi->tb, cur,0);
+
+    /*&&&&&&&&&&&&&&&&&&&&&&&&*/
+    check_internal (cur);
+    /*&&&&&&&&&&&&&&&&&&&&&&&&*/
+
+    if (cur_bi->bi_parent) {
+	B_N_CHILD (cur_bi->bi_parent,cur_bi->bi_position)->dc_size += count * (DC_SIZE + KEY_SIZE);
+	do_balance_mark_internal_dirty(cur_bi->tb, cur_bi->bi_parent, 0);
+
+	/*&&&&&&&&&&&&&&&&&&&&&&&&*/
+	check_internal (cur_bi->bi_parent);
+	/*&&&&&&&&&&&&&&&&&&&&&&&&*/   
+    }
+
+}
+
+
+/* Delete del_num items and node pointers from buffer cur starting from *
+ * the first_i'th item and first_p'th pointers respectively.		*/
+static void	internal_delete_pointers_items (
+						struct buffer_info * cur_bi,
+						int first_p, 
+						int first_i, 
+						int del_num
+						)
+{
+  struct buffer_head * cur = cur_bi->bi_bh;
+  int nr;
+  struct block_head * blkh;
+  struct key * key;
+  struct disk_child * dc;
+
+#ifdef CONFIG_REISERFS_CHECK
+  if (cur == NULL)
+    reiserfs_panic (0, "internal_delete_pointers_items1: buffer is 0");
+	
+  if (del_num < 0)
+    reiserfs_panic (0, "internal_delete_pointers_items2",
+		    "negative number of items (%d) can not be deleted", del_num);
+
+  if (first_p < 0 || first_p + del_num > B_NR_ITEMS (cur) + 1 || first_i < 0)
+    reiserfs_panic (0, "internal_delete_pointers_items3",
+		    "first pointer order (%d) < 0 or "
+		    "no so many pointers (%d), only (%d) or "
+		    "first key order %d < 0", first_p, 
+		    first_p + del_num, B_NR_ITEMS (cur) + 1, first_i);
+#endif /* CONFIG_REISERFS_CHECK */
+  if ( del_num == 0 )
+    return;
+
+  nr = le16_to_cpu ((blkh = B_BLK_HEAD(cur))->blk_nr_item);
+
+  if ( first_p == 0 && del_num == nr + 1 ) {
+#ifdef CONFIG_REISERFS_CHECK
+    if ( first_i != 0 )
+      reiserfs_panic (0, "internal_delete_pointers_items5",
+		      "first deleted key must have order 0, not %d", first_i);
+#endif /* CONFIG_REISERFS_CHECK */
+    make_empty_node (cur_bi);
+    return;
+  }
+
+#ifdef CONFIG_REISERFS_CHECK
+  if (first_i + del_num > B_NR_ITEMS (cur)) {
+    printk("first_i = %d del_num = %d\n",first_i,del_num);
+    reiserfs_panic (0, "internal_delete_pointers_items4: :"
+		    "no so many keys (%d) in the node (%b)(%z)", first_i + del_num, cur, cur);
+  }
+#endif /* CONFIG_REISERFS_CHECK */
+
+
+  /* deleting */
+  dc = B_N_CHILD (cur, first_p);
+
+  memmove (dc, dc + del_num, (nr + 1 - first_p - del_num) * DC_SIZE);
+  key = B_N_PDELIM_KEY (cur, first_i);
+  memmove (key, key + del_num, (nr - first_i - del_num) * KEY_SIZE + (nr + 1 - del_num) * DC_SIZE);
+
+
+  /* sizes, item number */
+  blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) - del_num);
+  blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) + del_num * (KEY_SIZE +  DC_SIZE));
+
+  do_balance_mark_internal_dirty (cur_bi->tb, cur, 0);
+  /*&&&&&&&&&&&&&&&&&&&&&&&*/
+  check_internal (cur);
+  /*&&&&&&&&&&&&&&&&&&&&&&&*/
+ 
+  if (cur_bi->bi_parent) {
+    B_N_CHILD (cur_bi->bi_parent, cur_bi->bi_position)->dc_size -= del_num * (KEY_SIZE +  DC_SIZE);
+    do_balance_mark_internal_dirty (cur_bi->tb, cur_bi->bi_parent,0);
+    /*&&&&&&&&&&&&&&&&&&&&&&&&*/
+    check_internal (cur_bi->bi_parent);
+    /*&&&&&&&&&&&&&&&&&&&&&&&&*/   
+  }
+}
+
+
+/* delete n node pointers and items starting from given position */
+static void  internal_delete_childs (struct buffer_info * cur_bi, 
+				     int from, int n)
+{
+  int i_from;
+
+  i_from = (from == 0) ? from : from - 1;
+
+  /* delete n pointers starting from `from' position in CUR;
+     delete n keys starting from 'i_from' position in CUR;
+     */
+  internal_delete_pointers_items (cur_bi, from, i_from, n);
+}
+
+
+/* copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest
+* last_first == FIRST_TO_LAST means, that we copy first items from src to tail of dest
+ * last_first == LAST_TO_FIRST means, that we copy last items from src to head of dest 
+ */
+static void internal_copy_pointers_items (
+					  struct buffer_info * dest_bi,
+					  struct buffer_head * src,
+					  int last_first, int cpy_num
+					  )
+{
+  /* ATTENTION! Number of node pointers in DEST is equal to number of items in DEST *
+   * as delimiting key have already inserted to buffer dest.*/
+  struct buffer_head * dest = dest_bi->bi_bh;
+  int nr_dest, nr_src;
+  int dest_order, src_order;
+  struct block_head * blkh;
+  struct key * key;
+  struct disk_child * dc;
+
+  nr_src = B_NR_ITEMS (src);
+
+#ifdef CONFIG_REISERFS_CHECK
+  if ( dest == NULL || src == NULL )
+    reiserfs_panic (0, "internal_copy_pointers_items", "src (%p) or dest (%p) buffer is 0", src, dest);
+
+  if (last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST)
+    reiserfs_panic (0, "internal_copy_pointers_items",
+		    "invalid last_first parameter (%d)", last_first);
+
+  if ( nr_src < cpy_num - 1 )
+    reiserfs_panic (0, "internal_copy_pointers_items", "no so many items (%d) in src (%d)", cpy_num, nr_src);
+
+  if ( cpy_num < 0 )
+    reiserfs_panic (0, "internal_copy_pointers_items", "cpy_num less than 0 (%d)", cpy_num);
+
+  if (cpy_num - 1 + B_NR_ITEMS(dest) > (int)MAX_NR_KEY(dest))
+    reiserfs_panic (0, "internal_copy_pointers_items",
+		    "cpy_num (%d) + item number in dest (%d) can not be more than MAX_NR_KEY(%d)",
+		    cpy_num, B_NR_ITEMS(dest), MAX_NR_KEY(dest));
+#endif
+
+  if ( cpy_num == 0 )
+    return;
+
+	/* coping */
+  nr_dest = le16_to_cpu ((blkh = B_BLK_HEAD(dest))->blk_nr_item);
+
+  /*dest_order = (last_first == LAST_TO_FIRST) ? 0 : nr_dest;*/
+  /*src_order = (last_first == LAST_TO_FIRST) ? (nr_src - cpy_num + 1) : 0;*/
+  (last_first == LAST_TO_FIRST) ?	(dest_order = 0, src_order = nr_src - cpy_num + 1) :
+    (dest_order = nr_dest, src_order = 0);
+
+  /* prepare space for cpy_num pointers */
+  dc = B_N_CHILD (dest, dest_order);
+
+  memmove (dc + cpy_num, dc, (nr_dest - dest_order) * DC_SIZE);
+
+	/* insert pointers */
+  memcpy (dc, B_N_CHILD (src, src_order), DC_SIZE * cpy_num);
+
+
+  /* prepare space for cpy_num - 1 item headers */
+  key = B_N_PDELIM_KEY(dest, dest_order);
+  memmove (key + cpy_num - 1, key,
+	   KEY_SIZE * (nr_dest - dest_order) + DC_SIZE * (nr_dest + cpy_num));
+
+
+  /* insert headers */
+  memcpy (key, B_N_PDELIM_KEY (src, src_order), KEY_SIZE * (cpy_num - 1));
+
+  /* sizes, item number */
+  blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) + (cpy_num - 1));
+  blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - (KEY_SIZE * (cpy_num - 1) + DC_SIZE * cpy_num));
+
+  do_balance_mark_internal_dirty (dest_bi->tb, dest, 0);
+
+  /*&&&&&&&&&&&&&&&&&&&&&&&&*/
+  check_internal (dest);
+  /*&&&&&&&&&&&&&&&&&&&&&&&&*/
+
+  if (dest_bi->bi_parent) {
+    B_N_CHILD(dest_bi->bi_parent,dest_bi->bi_position)->dc_size +=
+      KEY_SIZE * (cpy_num - 1) + DC_SIZE * cpy_num;
+
+    do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent,0);
+    /*&&&&&&&&&&&&&&&&&&&&&&&&*/
+    check_internal (dest_bi->bi_parent);
+    /*&&&&&&&&&&&&&&&&&&&&&&&&*/   
+  }
+
+}
+
+
+/* Copy cpy_num node pointers and cpy_num - 1 items from buffer src to buffer dest.
+ * Delete cpy_num - del_par items and node pointers from buffer src.
+ * last_first == FIRST_TO_LAST means, that we copy/delete first items from src.
+ * last_first == LAST_TO_FIRST means, that we copy/delete last items from src.
+ */
+static void internal_move_pointers_items (struct buffer_info * dest_bi, 
+					  struct buffer_info * src_bi, 
+					  int last_first, int cpy_num, int del_par)
+{
+    int first_pointer;
+    int first_item;
+    
+    internal_copy_pointers_items (dest_bi, src_bi->bi_bh, last_first, cpy_num);
+
+    if (last_first == FIRST_TO_LAST) {	/* shift_left occurs */
+	first_pointer = 0;
+	first_item = 0;
+	/* delete cpy_num - del_par pointers and keys starting for pointers with first_pointer, 
+	   for key - with first_item */
+	internal_delete_pointers_items (src_bi, first_pointer, first_item, cpy_num - del_par);
+    } else {			/* shift_right occurs */
+	int i, j;
+
+	i = ( cpy_num - del_par == ( j = B_NR_ITEMS(src_bi->bi_bh)) + 1 ) ? 0 : j - cpy_num + del_par;
+
+	internal_delete_pointers_items (src_bi, j + 1 - cpy_num + del_par, i, cpy_num - del_par);
+    }
+}
+
+/* Insert n_src'th key of buffer src before n_dest'th key of buffer dest. */
+static void internal_insert_key (struct buffer_info * dest_bi, 
+				 int dest_position_before,                 /* insert key before key with n_dest number */
+				 struct buffer_head * src, 
+				 int src_position)
+{
+    struct buffer_head * dest = dest_bi->bi_bh;
+    int nr;
+    struct block_head * blkh;
+    struct key * key;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (dest == NULL || src == NULL)
+	reiserfs_panic (0, "internal_insert_key", "sourse(%p) or dest(%p) buffer is 0", src, dest);
+
+    if (dest_position_before < 0 || src_position < 0)
+	reiserfs_panic (0, "internal_insert_key", "source(%d) or dest(%d) key number less than 0", 
+			src_position, dest_position_before);
+
+    if (dest_position_before > B_NR_ITEMS (dest) || src_position >= B_NR_ITEMS(src))
+	reiserfs_panic (0, "internal_insert_key", 
+			"invalid position in dest (%d (key number %d)) or in src (%d (key number %d))",
+			dest_position_before, B_NR_ITEMS (dest), src_position, B_NR_ITEMS(src));
+
+    if (B_FREE_SPACE (dest) < KEY_SIZE)
+	reiserfs_panic (0, "internal_insert_key", 
+			"no enough free space (%d) in dest buffer", B_FREE_SPACE (dest));
+#endif
+
+    nr = le16_to_cpu ((blkh=B_BLK_HEAD(dest))->blk_nr_item);
+
+    /* prepare space for inserting key */
+    key = B_N_PDELIM_KEY (dest, dest_position_before);
+    memmove (key + 1, key, (nr - dest_position_before) * KEY_SIZE + (nr + 1) * DC_SIZE);
+
+    /* insert key */
+    memcpy (key, B_N_PDELIM_KEY(src, src_position), KEY_SIZE);
+
+    /* Change dirt, free space, item number fields. */
+    blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) + 1);
+    blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - KEY_SIZE);
+
+    do_balance_mark_internal_dirty (dest_bi->tb, dest, 0);
+
+    if (dest_bi->bi_parent) {
+	B_N_CHILD(dest_bi->bi_parent,dest_bi->bi_position)->dc_size += KEY_SIZE;
+	do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent,0);
+    }
+}
+
+
+
+/* Insert d_key'th (delimiting) key from buffer cfl to tail of dest. 
+ * Copy pointer_amount node pointers and pointer_amount - 1 items from buffer src to buffer dest.
+ * Replace  d_key'th key in buffer cfl.
+ * Delete pointer_amount items and node pointers from buffer src.
+ */
+/* this can be invoked both to shift from S to L and from R to S */
+static void	internal_shift_left (
+				     int mode,	/* INTERNAL_FROM_S_TO_L | INTERNAL_FROM_R_TO_S */
+				     struct tree_balance * tb,
+				     int h,
+				     int pointer_amount
+				     )
+{
+  struct buffer_info dest_bi, src_bi;
+  struct buffer_head * cf;
+  int d_key_position;
+
+  internal_define_dest_src_infos (mode, tb, h, &dest_bi, &src_bi, &d_key_position, &cf);
+
+  /*printk("pointer_amount = %d\n",pointer_amount);*/
+
+  if (pointer_amount) {
+    /* insert delimiting key from common father of dest and src to node dest into position B_NR_ITEM(dest) */
+    internal_insert_key (&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, d_key_position);
+
+    if (B_NR_ITEMS(src_bi.bi_bh) == pointer_amount - 1) {
+      if (src_bi.bi_position/*src->b_item_order*/ == 0)
+	replace_key (tb, cf, d_key_position, src_bi.bi_parent/*src->b_parent*/, 0);
+    } else
+      replace_key (tb, cf, d_key_position, src_bi.bi_bh, pointer_amount - 1);
+  }
+  /* last parameter is del_parameter */
+  internal_move_pointers_items (&dest_bi, &src_bi, FIRST_TO_LAST, pointer_amount, 0);
+
+}
+
+/* Insert delimiting key to L[h].
+ * Copy n node pointers and n - 1 items from buffer S[h] to L[h].
+ * Delete n - 1 items and node pointers from buffer S[h].
+ */
+/* it always shifts from S[h] to L[h] */
+static void	internal_shift1_left (
+				      struct tree_balance * tb, 
+				      int h, 
+				      int pointer_amount
+				      )
+{
+  struct buffer_info dest_bi, src_bi;
+  struct buffer_head * cf;
+  int d_key_position;
+
+  internal_define_dest_src_infos (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, &dest_bi, &src_bi, &d_key_position, &cf);
+
+  if ( pointer_amount > 0 ) /* insert lkey[h]-th key  from CFL[h] to left neighbor L[h] */
+    internal_insert_key (&dest_bi, B_NR_ITEMS(dest_bi.bi_bh), cf, d_key_position);
+  /*		internal_insert_key (tb->L[h], B_NR_ITEM(tb->L[h]), tb->CFL[h], tb->lkey[h]);*/
+
+  /* last parameter is del_parameter */
+  internal_move_pointers_items (&dest_bi, &src_bi, FIRST_TO_LAST, pointer_amount, 1);
+  /*	internal_move_pointers_items (tb->L[h], tb->S[h], FIRST_TO_LAST, pointer_amount, 1);*/
+}
+
+
+/* Insert d_key'th (delimiting) key from buffer cfr to head of dest. 
+ * Copy n node pointers and n - 1 items from buffer src to buffer dest.
+ * Replace  d_key'th key in buffer cfr.
+ * Delete n items and node pointers from buffer src.
+ */
+static void internal_shift_right (
+				  int mode,	/* INTERNAL_FROM_S_TO_R | INTERNAL_FROM_L_TO_S */
+				  struct tree_balance * tb,
+				  int h,
+				  int pointer_amount
+				  )
+{
+  struct buffer_info dest_bi, src_bi;
+  struct buffer_head * cf;
+  int d_key_position;
+  int nr;
+
+
+  internal_define_dest_src_infos (mode, tb, h, &dest_bi, &src_bi, &d_key_position, &cf);
+
+  nr = B_NR_ITEMS (src_bi.bi_bh);
+
+  if (pointer_amount > 0) {
+    /* insert delimiting key from common father of dest and src to dest node into position 0 */
+    internal_insert_key (&dest_bi, 0, cf, d_key_position);
+    if (nr == pointer_amount - 1) {
+#ifdef CONFIG_REISERFS_CHECK
+      if ( src_bi.bi_bh != PATH_H_PBUFFER (tb->tb_path, h)/*tb->S[h]*/ || dest_bi.bi_bh != tb->R[h])
+	reiserfs_panic (tb->tb_sb, "internal_shift_right", "src (%p) must be == tb->S[h](%p) when it disappears",
+			src_bi.bi_bh, PATH_H_PBUFFER (tb->tb_path, h));
+#endif
+      /* when S[h] disappers replace left delemiting key as well */
+      if (tb->CFL[h])
+	replace_key (tb, cf, d_key_position, tb->CFL[h], tb->lkey[h]);
+    } else
+      replace_key (tb, cf, d_key_position, src_bi.bi_bh, nr - pointer_amount);
+  }      
+
+  /* last parameter is del_parameter */
+  internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, pointer_amount, 0);
+}
+
+/* Insert delimiting key to R[h].
+ * Copy n node pointers and n - 1 items from buffer S[h] to R[h].
+ * Delete n - 1 items and node pointers from buffer S[h].
+ */
+/* it always shift from S[h] to R[h] */
+static void	internal_shift1_right (
+				       struct tree_balance * tb, 
+				       int h, 
+				       int pointer_amount
+				       )
+{
+  struct buffer_info dest_bi, src_bi;
+  struct buffer_head * cf;
+  int d_key_position;
+
+  internal_define_dest_src_infos (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, &dest_bi, &src_bi, &d_key_position, &cf);
+
+  if (pointer_amount > 0) /* insert rkey from CFR[h] to right neighbor R[h] */
+    internal_insert_key (&dest_bi, 0, cf, d_key_position);
+  /*		internal_insert_key (tb->R[h], 0, tb->CFR[h], tb->rkey[h]);*/
+	
+  /* last parameter is del_parameter */
+  internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, pointer_amount, 1);
+  /*	internal_move_pointers_items (tb->R[h], tb->S[h], LAST_TO_FIRST, pointer_amount, 1);*/
+}
+
+
+/* Delete insert_num node pointers together with their left items
+ * and balance current node.*/
+static void balance_internal_when_delete (struct tree_balance * tb, 
+					  int h, int child_pos)
+{
+    int insert_num;
+    int n;
+    struct buffer_head * tbSh = PATH_H_PBUFFER (tb->tb_path, h);
+    struct buffer_info bi;
+
+    insert_num = tb->insert_size[h] / ((int)(DC_SIZE + KEY_SIZE));
+  
+    /* delete child-node-pointer(s) together with their left item(s) */
+    bi.tb = tb;
+    bi.bi_bh = tbSh;
+    bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h);
+    bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1);
+
+    internal_delete_childs (&bi, child_pos, -insert_num);
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( tb->blknum[h] > 1 )
+	reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "tb->blknum[%d]=%d when insert_size < 0",
+			h, tb->blknum[h]);
+#endif /* CONFIG_REISERFS_CHECK */
+
+    n = B_NR_ITEMS(tbSh);
+
+    if ( tb->lnum[h] == 0 && tb->rnum[h] == 0 ) {
+	if ( tb->blknum[h] == 0 ) {
+	    /* node S[h] (root of the tree) is empty now */
+	    struct buffer_head *new_root;
+
+#ifdef CONFIG_REISERFS_CHECK
+	    if (n || B_FREE_SPACE (tbSh) != MAX_CHILD_SIZE(tbSh) - DC_SIZE)
+		reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "buffer must have only 0 keys (%d)",
+				n);
+
+	    if (bi.bi_parent)
+		reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "root has parent (%p)", bi.bi_parent);
+#endif /* CONFIG_REISERFS_CHECK */
+		
+	    /* choose a new root */
+	    if ( ! tb->L[h-1] || ! B_NR_ITEMS(tb->L[h-1]) )
+		new_root = tb->R[h-1];
+	    else
+		new_root = tb->L[h-1];
+	    /* switch super block's tree root block number to the new value */
+	    tb->tb_sb->u.reiserfs_sb.s_rs->s_root_block = cpu_to_le32 (new_root->b_blocknr);
+	    //tb->tb_sb->u.reiserfs_sb.s_rs->s_tree_height --;
+	    tb->tb_sb->u.reiserfs_sb.s_rs->s_tree_height = cpu_to_le16 (SB_TREE_HEIGHT (tb->tb_sb) - 1);
+
+	    do_balance_mark_sb_dirty (tb, tb->tb_sb->u.reiserfs_sb.s_sbh, 1);
+	    /*&&&&&&&&&&&&&&&&&&&&&&*/
+	    if (h > 1)
+		/* use check_internal if new root is an internal node */
+		check_internal (new_root);
+	    /*&&&&&&&&&&&&&&&&&&&&&&*/
+	    tb->tb_sb->s_dirt = 1;
+
+	    /* do what is needed for buffer thrown from tree */
+	    reiserfs_invalidate_buffer(tb, tbSh);
+	    return;
+	}
+	return;
+    }
+
+    if ( tb->L[h] && tb->lnum[h] == -B_NR_ITEMS(tb->L[h]) - 1 ) { /* join S[h] with L[h] */
+
+#ifdef CONFIG_REISERFS_CHECK
+	if ( tb->rnum[h] != 0 )
+	    reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "invalid tb->rnum[%d]==%d when joining S[h] with L[h]",
+			    h, tb->rnum[h]);
+#endif /* CONFIG_REISERFS_CHECK */
+
+	internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, n + 1);
+	reiserfs_invalidate_buffer(tb, tbSh);
+
+	return;
+    }
+
+    if ( tb->R[h] &&  tb->rnum[h] == -B_NR_ITEMS(tb->R[h]) - 1 ) { /* join S[h] with R[h] */
+#ifdef CONFIG_REISERFS_CHECK
+	if ( tb->lnum[h] != 0 )
+	    reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "invalid tb->lnum[%d]==%d when joining S[h] with R[h]",
+			    h, tb->lnum[h]);
+#endif /* CONFIG_REISERFS_CHECK */
+
+	internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, n + 1);
+
+	reiserfs_invalidate_buffer(tb,tbSh);
+	return;
+    }
+
+    if ( tb->lnum[h] < 0 ) { /* borrow from left neighbor L[h] */
+#ifdef CONFIG_REISERFS_CHECK
+	if ( tb->rnum[h] != 0 )
+	    reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "invalid tb->rnum[%d]==%d when borrow from L[h]",
+			    h, tb->rnum[h]);
+#endif /* CONFIG_REISERFS_CHECK */
+	/*internal_shift_right (tb, h, tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], -tb->lnum[h]);*/
+	internal_shift_right (INTERNAL_SHIFT_FROM_L_TO_S, tb, h, -tb->lnum[h]);
+	return;
+    }
+
+    if ( tb->rnum[h] < 0 ) { /* borrow from right neighbor R[h] */
+#ifdef CONFIG_REISERFS_CHECK
+	if ( tb->lnum[h] != 0 )
+	    reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "invalid tb->lnum[%d]==%d when borrow from R[h]",
+			    h, tb->lnum[h]);
+#endif /* CONFIG_REISERFS_CHECK */
+	internal_shift_left (INTERNAL_SHIFT_FROM_R_TO_S, tb, h, -tb->rnum[h]);/*tb->S[h], tb->CFR[h], tb->rkey[h], tb->R[h], -tb->rnum[h]);*/
+	return;
+    }
+
+    if ( tb->lnum[h] > 0 ) { /* split S[h] into two parts and put them into neighbors */
+#ifdef CONFIG_REISERFS_CHECK
+	if ( tb->rnum[h] == 0 || tb->lnum[h] + tb->rnum[h] != n + 1 )
+	    reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", 
+			    "invalid tb->lnum[%d]==%d or tb->rnum[%d]==%d when S[h](item number == %d) is split between them",
+			    h, tb->lnum[h], h, tb->rnum[h], n);
+#endif /* CONFIG_REISERFS_CHECK */
+
+	internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]);/*tb->L[h], tb->CFL[h], tb->lkey[h], tb->S[h], tb->lnum[h]);*/
+	internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h]);
+
+	reiserfs_invalidate_buffer (tb, tbSh);
+
+	return;
+    }
+    reiserfs_panic (tb->tb_sb, "balance_internal_when_delete", "unexpected tb->lnum[%d]==%d or tb->rnum[%d]==%d",
+		    h, tb->lnum[h], h, tb->rnum[h]);
+}
+
+
+/* Replace delimiting key of buffers L[h] and S[h] by the given key.*/
+void	replace_lkey (
+		      struct tree_balance * tb,
+		      int h,
+		      struct item_head * key
+		      )
+{
+#ifdef CONFIG_REISERFS_CHECK
+  if (tb->L[h] == NULL || tb->CFL[h] == NULL)
+    reiserfs_panic (tb->tb_sb, "replace_lkey: 12255: "
+		    "L[h](%p) and CFL[h](%p) must exist in replace_lkey", tb->L[h], tb->CFL[h]);
+#endif
+
+  if (B_NR_ITEMS(PATH_H_PBUFFER(tb->tb_path, h)) == 0)
+    return;
+
+  memcpy (B_N_PDELIM_KEY(tb->CFL[h],tb->lkey[h]), key, KEY_SIZE);
+
+  do_balance_mark_internal_dirty (tb, tb->CFL[h],0);
+}
+
+
+/* Replace delimiting key of buffers S[h] and R[h] by the given key.*/
+void	replace_rkey (
+		      struct tree_balance * tb,
+		      int h,
+		      struct item_head * key
+		      )
+{
+#ifdef CONFIG_REISERFS_CHECK
+  if (tb->R[h] == NULL || tb->CFR[h] == NULL)
+    reiserfs_panic (tb->tb_sb, "replace_rkey: 12260: "
+		    "R[h](%p) and CFR[h](%p) must exist in replace_rkey", tb->R[h], tb->CFR[h]);
+
+  if (B_NR_ITEMS(tb->R[h]) == 0)
+    reiserfs_panic (tb->tb_sb, "replace_rkey: 12265: "
+		    "R[h] can not be empty if it exists (item number=%d)", B_NR_ITEMS(tb->R[h]));
+#endif
+
+  memcpy (B_N_PDELIM_KEY(tb->CFR[h],tb->rkey[h]), key, KEY_SIZE);
+
+  do_balance_mark_internal_dirty (tb, tb->CFR[h], 0);
+}
+
+
+int balance_internal (struct tree_balance * tb,			/* tree_balance structure 		*/
+		      int h,					/* level of the tree 			*/
+		      int child_pos,
+		      struct item_head * insert_key,		/* key for insertion on higher level   	*/
+		      struct buffer_head ** insert_ptr	/* node for insertion on higher level*/
+    )
+    /* if inserting/pasting
+       {
+       child_pos is the position of the node-pointer in S[h] that	 *
+       pointed to S[h-1] before balancing of the h-1 level;		 *
+       this means that new pointers and items must be inserted AFTER *
+       child_pos
+       }
+       else 
+       {
+   it is the position of the leftmost pointer that must be deleted (together with
+   its corresponding key to the left of the pointer)
+   as a result of the previous level's balancing.
+   }
+*/
+{
+    struct buffer_head * tbSh = PATH_H_PBUFFER (tb->tb_path, h);
+    struct buffer_info bi;
+    int order;		/* we return this: it is 0 if there is no S[h], else it is tb->S[h]->b_item_order */
+    int insert_num, n, k;
+    struct buffer_head * S_new;
+    struct item_head new_insert_key;
+    struct buffer_head * new_insert_ptr = NULL;
+    struct item_head * new_insert_key_addr = insert_key;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( h < 1 )      
+	reiserfs_panic (tb->tb_sb, "balance_internal", "h (%d) can not be < 1 on internal level", h);
+#endif /* CONFIG_REISERFS_CHECK */
+
+    order = ( tbSh ) ? PATH_H_POSITION (tb->tb_path, h + 1)/*tb->S[h]->b_item_order*/ : 0;
+
+  /* Using insert_size[h] calculate the number insert_num of items
+     that must be inserted to or deleted from S[h]. */
+    insert_num = tb->insert_size[h]/((int)(KEY_SIZE + DC_SIZE));
+
+    /* Check whether insert_num is proper **/
+#ifdef CONFIG_REISERFS_CHECK
+    if ( insert_num < -2  ||  insert_num > 2 )
+	reiserfs_panic (tb->tb_sb, "balance_internal",
+			"incorrect number of items inserted to the internal node (%d)", insert_num);
+
+    if ( h > 1  && (insert_num > 1 || insert_num < -1) )
+	reiserfs_panic (tb->tb_sb, "balance_internal",
+			"incorrect number of items (%d) inserted to the internal node on a level (h=%d) higher than last internal level", 
+			insert_num, h);
+#endif /* CONFIG_REISERFS_CHECK */
+
+    /* Make balance in case insert_num < 0 */
+    if ( insert_num < 0 ) {
+	balance_internal_when_delete (tb, h, child_pos);
+	return order;
+    }
+ 
+    k = 0;
+    if ( tb->lnum[h] > 0 ) {
+	/* shift lnum[h] items from S[h] to the left neighbor L[h].
+	   check how many of new items fall into L[h] or CFL[h] after
+	   shifting */
+	n = B_NR_ITEMS (tb->L[h]); /* number of items in L[h] */
+	if ( tb->lnum[h] <= child_pos ) {
+	    /* new items don't fall into L[h] or CFL[h] */
+	    internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h]);
+	    /*internal_shift_left (tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,tb->lnum[h]);*/
+	    child_pos -= tb->lnum[h];
+	} else if ( tb->lnum[h] > child_pos + insert_num ) {
+	    /* all new items fall into L[h] */
+	    internal_shift_left (INTERNAL_SHIFT_FROM_S_TO_L, tb, h, tb->lnum[h] - insert_num);
+	    /*			internal_shift_left(tb->L[h],tb->CFL[h],tb->lkey[h],tbSh,
+				tb->lnum[h]-insert_num);
+	    */
+	    /* insert insert_num keys and node-pointers into L[h] */
+	    bi.tb = tb;
+	    bi.bi_bh = tb->L[h];
+	    bi.bi_parent = tb->FL[h];
+	    bi.bi_position = get_left_neighbor_position (tb, h);
+	    internal_insert_childs (&bi,/*tb->L[h], tb->S[h-1]->b_next*/ n + child_pos + 1,
+				    insert_num,insert_key,insert_ptr);
+
+	    insert_num = 0; 
+	} else {
+	    struct disk_child * dc;
+
+	    /* some items fall into L[h] or CFL[h], but some don't fall */
+	    internal_shift1_left(tb,h,child_pos+1);
+	    /* calculate number of new items that fall into L[h] */
+	    k = tb->lnum[h] - child_pos - 1;
+	    bi.tb = tb;
+	    bi.bi_bh = tb->L[h];
+	    bi.bi_parent = tb->FL[h];
+	    bi.bi_position = get_left_neighbor_position (tb, h);
+	    internal_insert_childs (&bi,/*tb->L[h], tb->S[h-1]->b_next,*/ n + child_pos + 1,k,
+				    insert_key,insert_ptr);
+
+	    replace_lkey(tb,h,insert_key + k);
+
+	    /* replace the first node-ptr in S[h] by node-ptr to insert_ptr[k] */
+	    dc = B_N_CHILD(tbSh, 0);
+	    dc->dc_size = cpu_to_le16 (MAX_CHILD_SIZE(insert_ptr[k]) - B_FREE_SPACE (insert_ptr[k]));
+	    dc->dc_block_number = cpu_to_le32 (insert_ptr[k]->b_blocknr);
+
+	    do_balance_mark_internal_dirty (tb, tbSh, 0);
+
+	    k++;
+	    insert_key += k;
+	    insert_ptr += k;
+	    insert_num -= k;
+	    child_pos = 0;
+	}
+    }	/* tb->lnum[h] > 0 */
+
+    if ( tb->rnum[h] > 0 ) {
+	/*shift rnum[h] items from S[h] to the right neighbor R[h]*/
+	/* check how many of new items fall into R or CFR after shifting */
+	n = B_NR_ITEMS (tbSh); /* number of items in S[h] */
+	if ( n - tb->rnum[h] >= child_pos )
+	    /* new items fall into S[h] */
+	    /*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],tb->rnum[h]);*/
+	    internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h]);
+	else
+	    if ( n + insert_num - tb->rnum[h] < child_pos )
+	    {
+		/* all new items fall into R[h] */
+		/*internal_shift_right(tb,h,tbSh,tb->CFR[h],tb->rkey[h],tb->R[h],
+	    tb->rnum[h] - insert_num);*/
+		internal_shift_right (INTERNAL_SHIFT_FROM_S_TO_R, tb, h, tb->rnum[h] - insert_num);
+
+		/* insert insert_num keys and node-pointers into R[h] */
+		bi.tb = tb;
+		bi.bi_bh = tb->R[h];
+		bi.bi_parent = tb->FR[h];
+		bi.bi_position = get_right_neighbor_position (tb, h);
+		internal_insert_childs (&bi, /*tb->R[h],tb->S[h-1]->b_next*/ child_pos - n - insert_num + tb->rnum[h] - 1,
+					insert_num,insert_key,insert_ptr);
+		insert_num = 0;
+	    }
+	    else
+	    {
+		struct disk_child * dc;
+
+		/* one of the items falls into CFR[h] */
+		internal_shift1_right(tb,h,n - child_pos + 1);
+		/* calculate number of new items that fall into R[h] */
+		k = tb->rnum[h] - n + child_pos - 1;
+		bi.tb = tb;
+		bi.bi_bh = tb->R[h];
+		bi.bi_parent = tb->FR[h];
+		bi.bi_position = get_right_neighbor_position (tb, h);
+		internal_insert_childs (&bi, /*tb->R[h], tb->R[h]->b_child,*/ 0, k, insert_key + 1, insert_ptr + 1);
+
+		replace_rkey(tb,h,insert_key + insert_num - k - 1);
+
+		/* replace the first node-ptr in R[h] by node-ptr insert_ptr[insert_num-k-1]*/
+		dc = B_N_CHILD(tb->R[h], 0);
+		dc->dc_size =
+		    cpu_to_le16 (MAX_CHILD_SIZE(insert_ptr[insert_num-k-1]) -
+				 B_FREE_SPACE (insert_ptr[insert_num-k-1]));
+		dc->dc_block_number = cpu_to_le32 (insert_ptr[insert_num-k-1]->b_blocknr);
+
+		do_balance_mark_internal_dirty (tb, tb->R[h],0);
+
+		insert_num -= (k + 1);
+	    }
+    }
+
+    /** Fill new node that appears instead of S[h] **/
+#ifdef CONFIG_REISERFS_CHECK
+    if ( tb->blknum[h] > 2 )
+	reiserfs_panic(0, "balance_internal", "blknum can not be > 2 for internal level");
+    if ( tb->blknum[h] < 0 )
+	reiserfs_panic(0, "balance_internal", "blknum can not be < 0");
+#endif /* CONFIG_REISERFS_CHECK */
+
+    if ( ! tb->blknum[h] )
+    { /* node S[h] is empty now */
+#ifdef CONFIG_REISERFS_CHECK
+	if ( ! tbSh )
+	    reiserfs_panic(0,"balance_internal", "S[h] is equal NULL");
+#endif /* CONFIG_REISERFS_CHECK */
+
+	/* do what is needed for buffer thrown from tree */
+	reiserfs_invalidate_buffer(tb,tbSh);
+	return order;
+    }
+
+    if ( ! tbSh ) {
+	/* create new root */
+	struct disk_child  * dc;
+	struct buffer_head * tbSh_1 = PATH_H_PBUFFER (tb->tb_path, h - 1);
+
+
+	if ( tb->blknum[h] != 1 )
+	    reiserfs_panic(0, "balance_internal", "One new node required for creating the new root");
+	/* S[h] = empty buffer from the list FEB. */
+	tbSh = get_FEB (tb);
+	B_BLK_HEAD(tbSh)->blk_level = cpu_to_le16 (h + 1);
+
+	/* Put the unique node-pointer to S[h] that points to S[h-1]. */
+
+	dc = B_N_CHILD(tbSh, 0);
+	dc->dc_block_number = cpu_to_le32 (tbSh_1->b_blocknr);
+	dc->dc_size = cpu_to_le16 (MAX_CHILD_SIZE (tbSh_1) - B_FREE_SPACE (tbSh_1));
+
+	tb->insert_size[h] -= DC_SIZE;
+	B_BLK_HEAD(tbSh)->blk_free_space = cpu_to_le16 (B_FREE_SPACE (tbSh) - DC_SIZE);
+
+	do_balance_mark_internal_dirty (tb, tbSh, 0);
+
+	/*&&&&&&&&&&&&&&&&&&&&&&&&*/
+	check_internal (tbSh);
+	/*&&&&&&&&&&&&&&&&&&&&&&&&*/
+    
+    /* put new root into path structure */
+	PATH_OFFSET_PBUFFER(tb->tb_path, ILLEGAL_PATH_ELEMENT_OFFSET) = tbSh;
+
+	/* Change root in structure super block. */
+	tb->tb_sb->u.reiserfs_sb.s_rs->s_root_block = cpu_to_le32 (tbSh->b_blocknr);
+	tb->tb_sb->u.reiserfs_sb.s_rs->s_tree_height = cpu_to_le16 (SB_TREE_HEIGHT (tb->tb_sb) + 1);
+	do_balance_mark_sb_dirty (tb, tb->tb_sb->u.reiserfs_sb.s_sbh, 1);
+	tb->tb_sb->s_dirt = 1;
+    }
+	
+    if ( tb->blknum[h] == 2 ) {
+	int snum;
+	struct buffer_info dest_bi, src_bi;
+
+
+	/* S_new = free buffer from list FEB */
+	S_new = get_FEB(tb);
+
+	B_BLK_HEAD(S_new)->blk_level = cpu_to_le16 (h + 1);
+
+	dest_bi.tb = tb;
+	dest_bi.bi_bh = S_new;
+	dest_bi.bi_parent = 0;
+	dest_bi.bi_position = 0;
+	src_bi.tb = tb;
+	src_bi.bi_bh = tbSh;
+	src_bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h);
+	src_bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1);
+		
+	n = B_NR_ITEMS (tbSh); /* number of items in S[h] */
+	snum = (insert_num + n + 1)/2;
+	if ( n - snum >= child_pos ) {
+	    /* new items don't fall into S_new */
+	    /*	store the delimiting key for the next level */
+	    /* new_insert_key = (n - snum)'th key in S[h] */
+	    memcpy (&new_insert_key,B_N_PDELIM_KEY(tbSh,n - snum),
+		    KEY_SIZE);
+	    /* last parameter is del_par */
+	    internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, snum, 0);
+	    /*            internal_move_pointers_items(S_new, tbSh, LAST_TO_FIRST, snum, 0);*/
+	} else if ( n + insert_num - snum < child_pos ) {
+	    /* all new items fall into S_new */
+	    /*	store the delimiting key for the next level */
+	    /* new_insert_key = (n + insert_item - snum)'th key in S[h] */
+	    memcpy(&new_insert_key,B_N_PDELIM_KEY(tbSh,n + insert_num - snum),
+		   KEY_SIZE);
+	    /* last parameter is del_par */
+	    internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, snum - insert_num, 0);
+	    /*			internal_move_pointers_items(S_new,tbSh,1,snum - insert_num,0);*/
+
+	    /* insert insert_num keys and node-pointers into S_new */
+	    internal_insert_childs (&dest_bi, /*S_new,tb->S[h-1]->b_next,*/child_pos - n - insert_num + snum - 1,
+				    insert_num,insert_key,insert_ptr);
+
+	    insert_num = 0;
+	} else {
+	    struct disk_child * dc;
+
+	    /* some items fall into S_new, but some don't fall */
+	    /* last parameter is del_par */
+	    internal_move_pointers_items (&dest_bi, &src_bi, LAST_TO_FIRST, n - child_pos + 1, 1);
+	    /*			internal_move_pointers_items(S_new,tbSh,1,n - child_pos + 1,1);*/
+	    /* calculate number of new items that fall into S_new */
+	    k = snum - n + child_pos - 1;
+
+	    internal_insert_childs (&dest_bi, /*S_new,*/ 0, k, insert_key + 1, insert_ptr+1);
+
+	    /* new_insert_key = insert_key[insert_num - k - 1] */
+	    memcpy(&new_insert_key,insert_key + insert_num - k - 1,
+		   KEY_SIZE);
+	    /* replace first node-ptr in S_new by node-ptr to insert_ptr[insert_num-k-1] */
+
+	    dc = B_N_CHILD(S_new,0);
+	    dc->dc_size = cpu_to_le16 (MAX_CHILD_SIZE(insert_ptr[insert_num-k-1]) -
+				       B_FREE_SPACE(insert_ptr[insert_num-k-1]));
+	    dc->dc_block_number =	cpu_to_le32 (insert_ptr[insert_num-k-1]->b_blocknr);
+
+	    do_balance_mark_internal_dirty (tb, S_new,0);
+			
+	    insert_num -= (k + 1);
+	}
+	/* new_insert_ptr = node_pointer to S_new */
+	new_insert_ptr = S_new;
+
+#ifdef CONFIG_REISERFS_CHECK
+	if ( buffer_locked(S_new) || atomic_read (&(S_new->b_count)) != 1)
+	    if (buffer_locked(S_new) || atomic_read(&(S_new->b_count)) > 2 ||
+		!(buffer_journaled(S_new) || buffer_journal_dirty(S_new))) {
+		reiserfs_panic (tb->tb_sb, "cm-00001: balance_internal: bad S_new (%b)", S_new);
+	    }
+#endif /* CONFIG_REISERFS_CHECK */
+
+	// S_new is released in unfix_nodes
+    }
+
+    n = B_NR_ITEMS (tbSh); /*number of items in S[h] */
+
+#ifdef REISERFS_FSCK
+    if ( -1 <= child_pos && child_pos <= n && insert_num > 0 ) {
+#else
+	if ( 0 <= child_pos && child_pos <= n && insert_num > 0 ) {
+#endif
+	    bi.tb = tb;
+	    bi.bi_bh = tbSh;
+	    bi.bi_parent = PATH_H_PPARENT (tb->tb_path, h);
+	    bi.bi_position = PATH_H_POSITION (tb->tb_path, h + 1);
+#ifdef REISERFS_FSCK
+	    if (child_pos == -1) {
+		/* this is a little different from original do_balance: 
+		   here we insert the minimal keys in the tree, that has never happened when file system works */
+		if (tb->CFL[h-1] || insert_num != 1 || h != 1)
+		    die ("balance_internal: invalid child_pos");
+/*      insert_child (tb->S[h], tb->S[h-1], child_pos, insert_num, B_N_ITEM_HEAD(tb->S[0],0), insert_ptr);*/
+		internal_insert_childs (&bi, child_pos, insert_num, B_N_PITEM_HEAD (PATH_PLAST_BUFFER (tb->tb_path), 0), insert_ptr);
+	    } else
+#endif
+		internal_insert_childs (
+		    &bi,/*tbSh,*/
+		    /*		( tb->S[h-1]->b_parent == tb->S[h] ) ? tb->S[h-1]->b_next :  tb->S[h]->b_child->b_next,*/
+		    child_pos,insert_num,insert_key,insert_ptr
+		    );
+	}
+
+
+	memcpy (new_insert_key_addr,&new_insert_key,KEY_SIZE);
+	insert_ptr[0] = new_insert_ptr;
+
+	return order;
+    }
+
+  
+    
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/inode.c linux/fs/reiserfs/inode.c
--- v2.4.0/linux/fs/reiserfs/inode.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/inode.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,1879 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/locks.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+/* args for the create parameter of reiserfs_get_block */
+#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
+#define GET_BLOCK_CREATE 1    /* add anything you need to find block */
+#define GET_BLOCK_NO_HOLE 2   /* return -ENOENT for file holes */
+#define GET_BLOCK_READ_DIRECT 4  /* read the tail if indirect item not found */
+
+//
+// initially this function was derived from minix or ext2's analog and
+// evolved as the prototype did
+//
+void reiserfs_delete_inode (struct inode * inode)
+{
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2; 
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+
+  
+    lock_kernel() ; 
+
+    /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
+    if (INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
+	down (&inode->i_sem); 
+
+	journal_begin(&th, inode->i_sb, jbegin_count) ;
+	windex = push_journal_writer("delete_inode") ;
+
+	reiserfs_delete_object (&th, inode);
+	reiserfs_remove_page_from_flush_list(&th, inode) ;
+	pop_journal_writer(windex) ;
+	reiserfs_release_objectid (&th, inode->i_ino);
+
+	journal_end(&th, inode->i_sb, jbegin_count) ;
+
+	up (&inode->i_sem); 
+    } else {
+	/* no object items are in the tree */
+	;
+    }
+    clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */
+    unlock_kernel() ;
+}
+
+static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid, 
+			   loff_t offset, int type, int length)
+{
+  key->version = version;
+
+  key->on_disk_key.k_dir_id = dirid;
+  key->on_disk_key.k_objectid = objectid;
+  set_cpu_key_k_offset (key, offset);
+  set_cpu_key_k_type (key, type);  
+  key->key_length = length;
+}
+
+
+/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
+   offset and type of key */
+void make_cpu_key (struct cpu_key * key, const struct inode * inode, loff_t offset,
+		   int type, int length)
+{
+  _make_cpu_key (key, inode_items_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id),
+					  le32_to_cpu (INODE_PKEY (inode)->k_objectid), 
+		 offset, type, length);
+}
+
+
+//
+// when key is 0, do not set version and short key
+//
+inline void make_le_item_head (struct item_head * ih, struct cpu_key * key, int version,
+			       loff_t offset, int type, int length, int entry_count/*or ih_free_space*/)
+{
+    if (key) {
+	ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id);
+	ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid);
+    }
+    ih->ih_version = cpu_to_le16 (version);
+    set_le_ih_k_offset (ih, offset);
+    set_le_ih_k_type (ih, type);
+    ih->ih_item_len = cpu_to_le16 (length);
+    /*    set_ih_free_space (ih, 0);*/
+    // for directory items it is entry count, for directs and stat
+    // datas - 0xffff, for indirects - 0
+    ih->u.ih_entry_count = cpu_to_le16 (entry_count);
+}
+
+
+//
+// FIXME: we might cache recently accessed indirect item (or at least
+// first 15 pointers just like ext2 does
+
+// Ugh.  Not too eager for that....
+//  I cut the code until such time as I see a convincing argument (benchmark).
+// I don't want a bloated inode struct..., and I don't like code complexity....
+
+/* cutting the code is fine, since it really isn't in use yet and is easy
+** to add back in.  But, Vladimir has a really good idea here.  Think
+** about what happens for reading a file.  For each page,
+** The VFS layer calls reiserfs_readpage, who searches the tree to find
+** an indirect item.  This indirect item has X number of pointers, where
+** X is a big number if we've done the block allocation right.  But,
+** we only use one or two of these pointers during each call to readpage,
+** needlessly researching again later on.
+**
+** The size of the cache could be dynamic based on the size of the file.
+**
+** I'd also like to see us cache the location the stat data item, since
+** we are needlessly researching for that frequently.
+**
+** --chris
+*/
+
+/* people who call journal_begin with a page locked must call this
+** BEFORE calling journal_begin
+*/
+static int prevent_flush_page_lock(struct page *page, 
+				   struct inode *inode) {
+  struct reiserfs_page_list *pl ;
+  struct super_block *s = inode->i_sb ;
+  /* we don't care if the inode has a stale pointer from an old
+  ** transaction
+  */
+  if(!page || inode->u.reiserfs_i.i_conversion_trans_id != SB_JOURNAL(s)->j_trans_id) {
+    return 0 ;
+  }
+  pl = inode->u.reiserfs_i.i_converted_page ;
+  if (pl && pl->page == page) {
+    pl->do_not_lock = 1 ;
+  }
+  /* this last part is really important.  The address space operations have
+  ** the page locked before they call the journal functions.  So it is possible
+  ** for one process to be waiting in flush_pages_before_commit for a 
+  ** page, then for the process with the page locked to call journal_begin.
+  **
+  ** We'll deadlock because the process flushing pages will never notice
+  ** the process with the page locked has called prevent_flush_page_lock.
+  ** So, we wake up the page waiters, even though the page is still locked.
+  ** The process waiting in flush_pages_before_commit must check the
+  ** pl->do_not_lock flag, and stop trying to lock the page.
+  */
+  wake_up(&page->wait) ;
+  return 0 ;
+ 
+}
+/* people who call journal_end with a page locked must call this
+** AFTER calling journal_end
+*/
+static int allow_flush_page_lock(struct page *page, 
+				   struct inode *inode) {
+
+  struct reiserfs_page_list *pl ;
+  struct super_block *s = inode->i_sb ;
+  /* we don't care if the inode has a stale pointer from an old
+  ** transaction
+  */
+  if(!page || inode->u.reiserfs_i.i_conversion_trans_id != SB_JOURNAL(s)->j_trans_id) {
+    return 0 ;
+  }
+  pl = inode->u.reiserfs_i.i_converted_page ;
+  if (pl && pl->page == page) {
+    pl->do_not_lock = 0 ;
+  }
+  return 0 ;
+ 
+}
+
+/* If this page has a file tail in it, and
+** it was read in by get_block_create_0, the page data is valid,
+** but tail is still sitting in a direct item, and we can't write to
+** it.  So, look through this page, and check all the mapped buffers
+** to make sure they have valid block numbers.  Any that don't need
+** to be unmapped, so that block_prepare_write will correctly call
+** reiserfs_get_block to convert the tail into an unformatted node
+*/
+static inline void fix_tail_page_for_writing(struct page *page) {
+    struct buffer_head *head, *next, *bh ;
+
+    if (page && page->buffers) {
+	head = page->buffers ;
+	bh = head ;
+	do {
+	    next = bh->b_this_page ;
+	    if (buffer_mapped(bh) && bh->b_blocknr == 0) {
+	        reiserfs_unmap_buffer(bh) ;
+	    }
+	    bh = next ;
+	} while (bh != head) ;
+    }
+}
+
+
+
+
+/* we need to allocate a block for new unformatted node.  Try to figure out
+   what point in bitmap reiserfs_new_blocknrs should start from. */
+static b_blocknr_t find_tag (struct buffer_head * bh, struct item_head * ih,
+			     __u32 * item, int pos_in_item)
+{
+  if (!is_indirect_le_ih (ih))
+	 /* something more complicated could be here */
+	 return bh->b_blocknr;
+
+  /* for indirect item: go to left and look for the first non-hole entry in
+	  the indirect item */
+  if (pos_in_item == I_UNFM_NUM (ih))
+	 pos_in_item --;
+  while (pos_in_item >= 0) {
+	 if (item [pos_in_item])
+		return item [pos_in_item];
+	 pos_in_item --;
+  }
+  return bh->b_blocknr;
+}
+
+
+/* reiserfs_get_block does not need to allocate a block only if it has been
+   done already or non-hole position has been found in the indirect item */
+static inline int allocation_needed (int retval, b_blocknr_t allocated, 
+				     struct item_head * ih,
+				     __u32 * item, int pos_in_item)
+{
+  if (allocated)
+	 return 0;
+  if (retval == POSITION_FOUND && is_indirect_le_ih (ih) && item[pos_in_item])
+	 return 0;
+  return 1;
+}
+
+static inline int indirect_item_found (int retval, struct item_head * ih)
+{
+  return (retval == POSITION_FOUND) && is_indirect_le_ih (ih);
+}
+
+
+static inline void set_block_dev_mapped (struct buffer_head * bh, 
+					 b_blocknr_t block, struct inode * inode)
+{
+  bh->b_dev = inode->i_dev;
+  bh->b_blocknr = block;
+  bh->b_state |= (1UL << BH_Mapped);
+}
+
+
+//
+// files which were created in the earlier version can not be longer,
+// than 2 gb
+//
+int file_capable (struct inode * inode, long block)
+{
+    if (inode_items_version (inode) != ITEM_VERSION_1 || // it is new file.
+	block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
+	return 1;
+
+    return 0;
+}
+
+/*static*/ void restart_transaction(struct reiserfs_transaction_handle *th,
+				struct inode *inode, struct path *path) {
+  struct super_block *s = th->t_super ;
+  int len = th->t_blocks_allocated ;
+
+  pathrelse(path) ;
+  reiserfs_update_sd(th, inode) ;
+  journal_end(th, s, len) ;
+  journal_begin(th, s, len) ;
+}
+
+// it is called by get_block when create == 0. Returns block number
+// for 'block'-th logical block of file. When it hits direct item it
+// returns 0 (being called from bmap) or read direct item into piece
+// of page (bh_result)
+
+// Please improve the english/clarity in the comment above, as it is
+// hard to understand.
+
+static int _get_block_create_0 (struct inode * inode, long block,
+				 struct buffer_head * bh_result,
+				 int args)
+{
+    INITIALIZE_PATH (path);
+    struct cpu_key key;
+    struct buffer_head * bh;
+    struct item_head * ih, tmp_ih;
+    int fs_gen ;
+    int blocknr;
+    char * p = NULL;
+    int chars;
+    int ret ;
+    unsigned long offset ;
+
+    // prepare the key to look for the 'block'-th block of file
+    make_cpu_key (&key, inode,
+		  (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3);
+
+research:
+    if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) {
+	pathrelse (&path);
+        if (p)
+            kunmap(bh_result->b_page) ;
+	if ((args & GET_BLOCK_NO_HOLE)) {
+	    return -ENOENT ;
+	}
+        return 0 ;
+    }
+    
+    //
+    bh = get_bh (&path);
+    ih = get_ih (&path);
+    if (is_indirect_le_ih (ih)) {
+	__u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih);
+	
+	/* FIXME: here we could cache indirect item or part of it in
+	   the inode to avoid search_by_key in case of subsequent
+	   access to file */
+	blocknr = le32_to_cpu (ind_item [path.pos_in_item]);
+	ret = 0 ;
+	if (blocknr) {
+	    bh_result->b_dev = inode->i_dev;
+	    bh_result->b_blocknr = blocknr;
+	    bh_result->b_state |= (1UL << BH_Mapped);
+	} else if ((args & GET_BLOCK_NO_HOLE)) {
+	    ret = -ENOENT ;
+	}
+	pathrelse (&path);
+        if (p)
+            kunmap(bh_result->b_page) ;
+	return ret ;
+    }
+
+    // requested data are in direct item(s)
+    if (!(args & GET_BLOCK_READ_DIRECT)) {
+	// we are called by bmap. FIXME: we can not map block of file
+	// when it is stored in direct item(s)
+	pathrelse (&path);	
+        if (p)
+            kunmap(bh_result->b_page) ;
+	return -ENOENT;
+    }
+
+    // read file tail into part of page
+    offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ;
+    fs_gen = get_generation(inode->i_sb) ;
+    copy_item_head (&tmp_ih, ih);
+
+    /* we only want to kmap if we are reading the tail into the page.
+    ** this is not the common case, so we don't kmap until we are
+    ** sure we need to.  But, this means the item might move if
+    ** kmap schedules
+    */
+    p = (char *)kmap(bh_result->b_page) ;
+    if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
+        goto research;
+    }
+    p += offset ;
+    memset (p, 0, inode->i_sb->s_blocksize);
+    do {
+	if (!is_direct_le_ih (ih)) {
+	    BUG ();
+        }
+	chars = le16_to_cpu (ih->ih_item_len) - path.pos_in_item;
+	memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars);
+	p += chars;
+
+	if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1))
+	    // we done, if read direct item is not the last item of
+	    // node FIXME: we could try to check right delimiting key
+	    // to see whether direct item continues in the right
+	    // neighbor or rely on i_size
+	    break;
+
+	// update key to look for the next piece
+	set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars);
+	if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND)
+	    // we read something from tail, even if now we got IO_ERROR
+	    break;
+	bh = get_bh (&path);
+	ih = get_ih (&path);
+    } while (1);
+
+    pathrelse (&path);
+    
+    // FIXME: b_blocknr == 0 here. but b_data contains correct data
+    // from tail. ll_rw_block will skip uptodate buffers
+    bh_result->b_blocknr = 0 ;
+    bh_result->b_dev = inode->i_dev;
+    mark_buffer_uptodate (bh_result, 1);
+    bh_result->b_state |= (1UL << BH_Mapped);
+    kunmap(bh_result->b_page) ;
+
+    return 0;
+}
+
+
+// this is called to create file map. So, _get_block_create_0 will not
+// read direct item
+int reiserfs_bmap (struct inode * inode, long block,
+		   struct buffer_head * bh_result, int create)
+{
+    if (!file_capable (inode, block))
+	return -EFBIG;
+
+    lock_kernel() ;
+    /* do not read the direct item */
+    _get_block_create_0 (inode, block, bh_result, 0) ;
+    unlock_kernel() ;
+    return 0;
+}
+
+/* special version of get_block that is only used by grab_tail_page right
+** now.  It is sent to block_prepare_write, and when you try to get a
+** block past the end of the file (or a block from a hole) it returns
+** -ENOENT instead of a valid buffer.  block_prepare_write expects to
+** be able to do i/o on the buffers returned, unless an error value
+** is also returned.
+** 
+** So, this allows block_prepare_write to be used for reading a single block
+** in a page.  Where it does not produce a valid page for holes, or past the
+** end of the file.  This turns out to be exactly what we need for reading
+** tails for conversion.
+**
+** The point of the wrapper is forcing a certain value for create, even
+** though the VFS layer is calling this function with create==1.  If you 
+** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block, 
+** don't use this function.
+*/
+static int reiserfs_get_block_create_0 (struct inode * inode, long block,
+			struct buffer_head * bh_result, int create) {
+    return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ;
+}
+
+/*
+** helper function for when reiserfs_get_block is called for a hole
+** but the file tail is still in a direct item
+** bh_result is the buffer head for the hole
+** tail_offset is the offset of the start of the tail in the file
+**
+** This calls prepare_write, which will start a new transaction
+** you should not be in a transaction, or have any paths held when you
+** call this.
+*/
+static int convert_tail_for_hole(struct inode *inode, 
+                                 struct buffer_head *bh_result,
+				 loff_t tail_offset) {
+    unsigned long index ;
+    unsigned long tail_end ; 
+    unsigned long tail_start ;
+    struct page * tail_page ;
+    struct page * hole_page = bh_result->b_page ;
+    int retval = 0 ;
+
+    if ((tail_offset & (bh_result->b_size - 1)) != 1) 
+        return -EIO ;
+
+    /* always try to read until the end of the block */
+    tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ;
+    tail_end = (tail_start | (bh_result->b_size - 1)) + 1 ;
+
+    index = tail_offset >> PAGE_CACHE_SHIFT ;
+    if (index != hole_page->index) {
+	tail_page = grab_cache_page(inode->i_mapping, index) ;
+	retval = PTR_ERR(tail_page) ;
+	if (IS_ERR(tail_page)) {
+	    goto out ;
+	}
+    } else {
+        tail_page = hole_page ;
+    }
+
+    /* we don't have to make sure the conversion did not happen while
+    ** we were locking the page because anyone that could convert
+    ** must first take i_sem.
+    **
+    ** We must fix the tail page for writing because it might have buffers
+    ** that are mapped, but have a block number of 0.  This indicates tail
+    ** data that has been read directly into the page, and block_prepare_write
+    ** won't trigger a get_block in this case.
+    */
+    fix_tail_page_for_writing(tail_page) ;
+    retval = block_prepare_write(tail_page, tail_start, tail_end, 
+                                 reiserfs_get_block) ; 
+    if (retval)
+        goto unlock ;
+
+    /* tail conversion might change the data in the page */
+    flush_dcache_page(tail_page) ;
+
+    retval = generic_commit_write(NULL, tail_page, tail_start, tail_end) ;
+
+unlock:
+    if (tail_page != hole_page) {
+        UnlockPage(tail_page) ;
+	page_cache_release(tail_page) ;
+    }
+out:
+    return retval ;
+}
+
+//
+// initially this function was derived from ext2's analog and evolved
+// as the prototype did.  You'll need to look at the ext2 version to
+// determine which parts are derivative, if any, understanding that
+// there are only so many ways to code to a given interface.
+//
+int reiserfs_get_block (struct inode * inode, long block,
+			struct buffer_head * bh_result, int create)
+{
+    int repeat, retval;
+    unsigned long tag;
+    b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is unsigned long
+    INITIALIZE_PATH(path);
+    int pos_in_item;
+    struct cpu_key key;
+    struct buffer_head * bh, * unbh = 0;
+    struct item_head * ih, tmp_ih;
+    __u32 * item;
+    int done;
+    int fs_gen;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 ;
+    int version;
+    int transaction_started = 0 ;
+    loff_t new_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ;
+
+				/* bad.... */
+    lock_kernel() ;
+    th.t_trans_id = 0 ;
+    version = inode_items_version (inode);
+
+    if (!file_capable (inode, block)) {
+	unlock_kernel() ;
+	return -EFBIG;
+    }
+
+    /* if !create, we aren't changing the FS, so we don't need to
+    ** log anything, so we don't need to start a transaction
+    */
+    if (!(create & GET_BLOCK_CREATE)) {
+	int ret ;
+	/* find number of block-th logical block of the file */
+	ret = _get_block_create_0 (inode, block, bh_result, 
+	                           create | GET_BLOCK_READ_DIRECT) ;
+	unlock_kernel() ;
+	return ret;
+    }
+
+    if (block < 0) {
+	unlock_kernel();
+	return -EIO;
+    }
+
+    prevent_flush_page_lock(bh_result->b_page, inode) ;
+    inode->u.reiserfs_i.i_pack_on_close = 1 ;
+
+    windex = push_journal_writer("reiserfs_get_block") ;
+  
+    /* set the key of the first byte in the 'block'-th block of file */
+    make_cpu_key (&key, inode,
+		  (loff_t)block * inode->i_sb->s_blocksize + 1, // k_offset
+		  TYPE_ANY, 3/*key length*/);
+    if ((new_offset + inode->i_sb->s_blocksize) >= inode->i_size) {
+	journal_begin(&th, inode->i_sb, jbegin_count) ;
+	transaction_started = 1 ;
+    }
+ research:
+
+    retval = search_for_position_by_key (inode->i_sb, &key, &path);
+    if (retval == IO_ERROR) {
+	retval = -EIO;
+	goto failure;
+    }
+	
+    bh = get_bh (&path);
+    ih = get_ih (&path);
+    item = get_item (&path);
+    pos_in_item = path.pos_in_item;
+
+    fs_gen = get_generation (inode->i_sb);
+    copy_item_head (&tmp_ih, ih);
+
+    if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
+	/* we have to allocate block for the unformatted node */
+	tag = find_tag (bh, ih, item, pos_in_item);
+	if (!transaction_started) {
+	    pathrelse(&path) ;
+	    journal_begin(&th, inode->i_sb, jbegin_count) ;
+	    transaction_started = 1 ;
+	    goto research ;
+	}
+
+#ifdef REISERFS_PREALLOCATE
+	repeat = reiserfs_new_unf_blocknrs2 (&th, inode, &allocated_block_nr, tag);
+#else
+	repeat = reiserfs_new_unf_blocknrs (&th, &allocated_block_nr, tag);
+#endif
+
+	if (repeat == NO_DISK_SPACE) {
+	    /* restart the transaction to give the journal a chance to free
+	    ** some blocks.  releases the path, so we have to go back to
+	    ** research if we succeed on the second try
+	    */
+	    restart_transaction(&th, inode, &path) ; 
+#ifdef REISERFS_PREALLOCATE
+	    repeat = reiserfs_new_unf_blocknrs2 (&th, inode, &allocated_block_nr, tag);
+#else
+	    repeat = reiserfs_new_unf_blocknrs (&th, &allocated_block_nr, tag);
+#endif
+
+	    if (repeat != NO_DISK_SPACE) {
+		goto research ;
+	    }
+	    retval = -ENOSPC;
+	    goto failure;
+	}
+
+	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
+	    goto research;
+	}
+    }
+
+    if (indirect_item_found (retval, ih)) {
+	/* 'block'-th block is in the file already (there is
+	   corresponding cell in some indirect item). But it may be
+	   zero unformatted node pointer (hole) */
+	if (!item[pos_in_item]) {
+	    /* use allocated block to plug the hole */
+	    reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
+	    if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
+		reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
+		goto research;
+	    }
+	    bh_result->b_state |= (1UL << BH_New);
+	    item[pos_in_item] = cpu_to_le32 (allocated_block_nr);
+	    journal_mark_dirty (&th, inode->i_sb, bh);
+	    inode->i_blocks += (inode->i_sb->s_blocksize / 512) ;
+	    reiserfs_update_sd(&th, inode) ;
+	}
+	set_block_dev_mapped(bh_result, le32_to_cpu (item[pos_in_item]), inode);
+	pathrelse (&path);
+#ifdef REISERFS_CHECK
+	pop_journal_writer(windex) ;
+#endif /* REISERFS_CHECK */
+	if (transaction_started)
+	    journal_end(&th, inode->i_sb, jbegin_count) ;
+
+	allow_flush_page_lock(bh_result->b_page, inode) ;
+	unlock_kernel() ;
+	 
+	/* the item was found, so new blocks were not added to the file
+	** there is no need to make sure the inode is updated with this 
+	** transaction
+	*/
+	return 0;
+    }
+
+    if (!transaction_started) {
+	/* if we don't pathrelse, we could vs-3050 on the buffer if
+	** someone is waiting for it (they can't finish until the buffer
+	** is released, we can start a new transaction until they finish)
+	*/
+	pathrelse(&path) ;
+	journal_begin(&th, inode->i_sb, jbegin_count) ;
+	transaction_started = 1 ;
+	goto research;
+    }
+
+    /* desired position is not found or is in the direct item. We have
+       to append file with holes up to 'block'-th block converting
+       direct items to indirect one if necessary */
+    done = 0;
+    do {
+	if (is_statdata_le_ih (ih)) {
+	    __u32 unp = 0;
+	    struct cpu_key tmp_key;
+
+	    /* indirect item has to be inserted */
+	    make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT, 
+			       UNFM_P_SIZE, 0/* free_space */);
+
+	    if (cpu_key_k_offset (&key) == 1) {
+		/* we are going to add 'block'-th block to the file. Use
+		   allocated block for that */
+		unp = cpu_to_le32 (allocated_block_nr);
+		set_block_dev_mapped (bh_result, allocated_block_nr, inode);
+		bh_result->b_state |= (1UL << BH_New);
+		done = 1;
+	    }
+	    tmp_key = key; // ;)
+	    set_cpu_key_k_offset (&tmp_key, 1);
+	    PATH_LAST_POSITION(&path) ++;
+
+	    retval = reiserfs_insert_item (&th, &path, &tmp_key, &tmp_ih, (char *)&unp);
+	    if (retval) {
+		reiserfs_free_block (&th, allocated_block_nr);
+
+#ifdef REISERFS_PREALLOCATE
+		reiserfs_discard_prealloc (&th, inode); 
+#endif
+		goto failure; // retval == -ENOSPC or -EIO or -EEXIST
+	    }
+	    if (unp)
+		inode->i_blocks += inode->i_sb->s_blocksize / 512;
+	    //mark_tail_converted (inode);
+	} else if (is_direct_le_ih (ih)) {
+	    /* direct item has to be converted */
+	    loff_t tail_offset;
+
+	    tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
+	    if (tail_offset == cpu_key_k_offset (&key)) {
+		/* direct item we just found fits into block we have
+                   to map. Convert it into unformatted node: use
+                   bh_result for the conversion */
+		set_block_dev_mapped (bh_result, allocated_block_nr, inode);
+		unbh = bh_result;
+		done = 1;
+	    } else {
+		/* we have to padd file tail stored in direct item(s)
+		   up to block size and convert it to unformatted
+		   node. FIXME: this should also get into page cache */
+
+		pathrelse(&path) ;
+		journal_end(&th, inode->i_sb, jbegin_count) ;
+		transaction_started = 0 ;
+
+		retval = convert_tail_for_hole(inode, bh_result, tail_offset) ;
+		if (retval) {
+		    printk("clm-6004: convert tail failed inode %lu, error %d\n", inode->i_ino, retval) ;
+		    if (allocated_block_nr)
+			reiserfs_free_block (&th, allocated_block_nr);
+		    goto failure ;
+		}
+		goto research ;
+	    }
+	    retval = direct2indirect (&th, inode, &path, unbh, tail_offset);
+	    /* it is important the mark_buffer_uptodate is done after
+	    ** the direct2indirect.  The buffer might contain valid
+	    ** data newer than the data on disk (read by readpage, changed,
+	    ** and then sent here by writepage).  direct2indirect needs
+	    ** to know if unbh was already up to date, so it can decide
+	    ** if the data in unbh needs to be replaced with data from
+	    ** the disk
+	    */
+	    mark_buffer_uptodate (unbh, 1);
+	    if (retval) {
+		reiserfs_free_block (&th, allocated_block_nr);
+
+#ifdef REISERFS_PREALLOCATE
+		reiserfs_discard_prealloc (&th, inode); 
+#endif
+		goto failure;
+	    }
+	    /* we've converted the tail, so we must 
+	    ** flush unbh before the transaction commits
+	    */
+	    reiserfs_add_page_to_flush_list(&th, inode, unbh) ;
+		  
+	    //inode->i_blocks += inode->i_sb->s_blocksize / 512;
+	    //mark_tail_converted (inode);
+	} else {
+	    /* append indirect item with holes if needed, when appending
+	       pointer to 'block'-th block use block, which is already
+	       allocated */
+	    struct cpu_key tmp_key;
+	    struct unfm_nodeinfo un = {0, 0};
+
+#ifdef CONFIG_REISERFS_CHECK
+	    if (pos_in_item != le16_to_cpu (ih->ih_item_len) / UNFM_P_SIZE)
+		reiserfs_panic (inode->i_sb, "vs-: reiserfs_get_block: "
+				"invalid position for append");
+#endif
+	    /* indirect item has to be appended, set up key of that position */
+	    make_cpu_key (&tmp_key, inode,
+			  le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize),
+			  //pos_in_item * inode->i_sb->s_blocksize,
+			  TYPE_INDIRECT, 3);// key type is unimportant
+		  
+	    if (cpu_key_k_offset (&tmp_key) == cpu_key_k_offset (&key)) {
+		/* we are going to add target block to the file. Use allocated
+		   block for that */
+		un.unfm_nodenum = cpu_to_le32 (allocated_block_nr);
+		set_block_dev_mapped (bh_result, allocated_block_nr, inode);
+		bh_result->b_state |= (1UL << BH_New);
+		done = 1;
+	    } else {
+		/* paste hole to the indirect item */
+	    }
+	    retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)&un, UNFM_P_SIZE);
+	    if (retval) {
+		reiserfs_free_block (&th, allocated_block_nr);
+
+#ifdef REISERFS_PREALLOCATE
+		reiserfs_discard_prealloc (&th, inode); 
+#endif
+		goto failure;
+	    }
+	    if (un.unfm_nodenum)
+		inode->i_blocks += inode->i_sb->s_blocksize / 512;
+	    //mark_tail_converted (inode);
+	}
+		
+	if (done == 1)
+	    break;
+	 
+	/* this loop could log more blocks than we had originally asked
+	** for.  So, we have to allow the transaction to end if it is
+	** too big or too full.  Update the inode so things are 
+	** consistent if we crash before the function returns
+	**
+	** release the path so that anybody waiting on the path before
+	** ending their transaction will be able to continue.
+	*/
+	if (journal_transaction_should_end(&th, th.t_blocks_allocated)) {
+	  restart_transaction(&th, inode, &path) ; 
+	}
+	/* inserting indirect pointers for a hole can take a 
+	** long time.  reschedule if needed
+	*/
+	if (current->need_resched)
+	    schedule() ;
+
+	retval = search_for_position_by_key (inode->i_sb, &key, &path);
+	if (retval == IO_ERROR) {
+	    retval = -EIO;
+	    goto failure;
+	}
+	if (retval == POSITION_FOUND) {
+	    reiserfs_warning ("vs-: reiserfs_get_block: "
+			      "%k should not be found", &key);
+	    retval = -EEXIST;
+	    pathrelse(&path) ;
+	    goto failure;
+	}
+	bh = get_bh (&path);
+	ih = get_ih (&path);
+	item = get_item (&path);
+	pos_in_item = path.pos_in_item;
+    } while (1);
+
+
+    retval = 0;
+    reiserfs_check_path(&path) ;
+
+ failure:
+    if (transaction_started) {
+      reiserfs_update_sd(&th, inode) ;
+      journal_end(&th, inode->i_sb, jbegin_count) ;
+    }
+    pop_journal_writer(windex) ;
+    allow_flush_page_lock(bh_result->b_page, inode) ;
+    unlock_kernel() ;
+    reiserfs_check_path(&path) ;
+    return retval;
+}
+
+
+//
+// BAD: new directories have stat data of new type and all other items
+// of old type. Version stored in the inode says about body items, so
+// in update_stat_data we can not rely on inode, but have to check
+// item version directly
+//
+
+// called by read_inode
+static void init_inode (struct inode * inode, struct path * path)
+{
+    struct buffer_head * bh;
+    struct item_head * ih;
+    __u32 rdev;
+    //int version = ITEM_VERSION_1;
+
+    bh = PATH_PLAST_BUFFER (path);
+    ih = PATH_PITEM_HEAD (path);
+
+
+    copy_key (INODE_PKEY (inode), &(ih->ih_key));
+    inode->i_generation = INODE_PKEY (inode)->k_dir_id;
+    inode->i_blksize = PAGE_SIZE;
+
+    if (stat_data_v1 (ih)) {
+	struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih);
+	unsigned long blocks;
+
+	inode_items_version (inode) = ITEM_VERSION_1;
+	inode->i_mode = le16_to_cpu (sd->sd_mode);
+	inode->i_nlink = le16_to_cpu (sd->sd_nlink);
+	inode->i_uid = le16_to_cpu (sd->sd_uid);
+	inode->i_gid = le16_to_cpu (sd->sd_gid);
+	inode->i_size = le32_to_cpu (sd->sd_size);
+	inode->i_atime = le32_to_cpu (sd->sd_atime);
+	inode->i_mtime = le32_to_cpu (sd->sd_mtime);
+	inode->i_ctime = le32_to_cpu (sd->sd_ctime);
+
+	inode->i_blocks = le32_to_cpu (sd->u.sd_blocks);
+	blocks = (inode->i_size + 511) >> 9;
+	blocks = _ROUND_UP (blocks, inode->i_blksize >> 9);
+	if (inode->i_blocks > blocks) {
+	    // there was a bug in <=3.5.23 when i_blocks could take negative
+	    // values. Starting from 3.5.17 this value could even be stored in
+	    // stat data. For such files we set i_blocks based on file
+	    // size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
+	    // only updated if file's inode will ever change
+	    inode->i_blocks = blocks;
+	}
+
+	rdev = le32_to_cpu (sd->u.sd_rdev);
+	inode->u.reiserfs_i.i_first_direct_byte = le32_to_cpu (sd->sd_first_direct_byte);
+    } else {
+	// new stat data found, but object may have old items
+	// (directories and symlinks)
+	struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih);
+
+	/* both old and new directories have old keys */
+	//version = (S_ISDIR (sd->sd_mode) ? ITEM_VERSION_1 : ITEM_VERSION_2);
+	if (S_ISDIR (sd->sd_mode) || S_ISLNK (sd->sd_mode))
+	    inode_items_version (inode) = ITEM_VERSION_1;
+	else
+	    inode_items_version (inode) = ITEM_VERSION_2;
+	inode->i_mode = le16_to_cpu (sd->sd_mode);
+	inode->i_nlink = le32_to_cpu (sd->sd_nlink);
+	inode->i_uid = le32_to_cpu (sd->sd_uid);
+	inode->i_size = le64_to_cpu (sd->sd_size);
+	inode->i_gid = le32_to_cpu (sd->sd_gid);
+	inode->i_mtime = le32_to_cpu (sd->sd_mtime);
+	inode->i_atime = le32_to_cpu (sd->sd_atime);
+	inode->i_ctime = le32_to_cpu (sd->sd_ctime);
+	inode->i_blocks = le32_to_cpu (sd->sd_blocks);
+	rdev = le32_to_cpu (sd->u.sd_rdev);
+    }
+
+    /* nopack = 0, by default */
+    inode->u.reiserfs_i.nopack = 0;
+
+    pathrelse (path);
+    if (S_ISREG (inode->i_mode)) {
+	inode->i_op = &reiserfs_file_inode_operations;
+	inode->i_fop = &reiserfs_file_operations;
+	inode->i_mapping->a_ops = &reiserfs_address_space_operations ;
+    } else if (S_ISDIR (inode->i_mode)) {
+	inode->i_op = &reiserfs_dir_inode_operations;
+	inode->i_fop = &reiserfs_dir_operations;
+    } else if (S_ISLNK (inode->i_mode)) {
+	inode->i_op = &page_symlink_inode_operations;
+	inode->i_mapping->a_ops = &reiserfs_address_space_operations;
+    } else {
+	inode->i_blocks = 0;
+	init_special_inode(inode, inode->i_mode, rdev) ;
+    }
+}
+
+
+// update new stat data with inode fields
+static void inode2sd (void * sd, struct inode * inode)
+{
+    struct stat_data * sd_v2 = (struct stat_data *)sd;
+
+    sd_v2->sd_mode = cpu_to_le16 (inode->i_mode);
+    sd_v2->sd_nlink = cpu_to_le16 (inode->i_nlink);
+    sd_v2->sd_uid = cpu_to_le32 (inode->i_uid);
+    sd_v2->sd_size = cpu_to_le64 (inode->i_size);
+    sd_v2->sd_gid = cpu_to_le32 (inode->i_gid);
+    sd_v2->sd_mtime = cpu_to_le32 (inode->i_mtime);
+    sd_v2->sd_atime = cpu_to_le32 (inode->i_atime);
+    sd_v2->sd_ctime = cpu_to_le32 (inode->i_ctime);
+    sd_v2->sd_blocks = cpu_to_le32 (inode->i_blocks);
+    if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+	sd_v2->u.sd_rdev = cpu_to_le32 (inode->i_rdev);
+}
+
+
+// used to copy inode's fields to old stat data
+static void inode2sd_v1 (void * sd, struct inode * inode)
+{
+    struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd;
+
+    sd_v1->sd_mode = cpu_to_le16 (inode->i_mode);
+    sd_v1->sd_uid = cpu_to_le16 (inode->i_uid);
+    sd_v1->sd_gid = cpu_to_le16 (inode->i_gid);
+    sd_v1->sd_nlink = cpu_to_le16 (inode->i_nlink);
+    sd_v1->sd_size = cpu_to_le32 (inode->i_size);
+    sd_v1->sd_atime = cpu_to_le32 (inode->i_atime);
+    sd_v1->sd_ctime = cpu_to_le32 (inode->i_ctime);
+    sd_v1->sd_mtime = cpu_to_le32 (inode->i_mtime);
+    if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+	sd_v1->u.sd_rdev = cpu_to_le32 (inode->i_rdev);
+    else
+	sd_v1->u.sd_blocks = cpu_to_le32 (inode->i_blocks);
+
+    // Sigh. i_first_direct_byte is back
+    sd_v1->sd_first_direct_byte = cpu_to_le32 (inode->u.reiserfs_i.i_first_direct_byte);
+}
+
+
+/* NOTE, you must prepare the buffer head before sending it here,
+** and then log it after the call
+*/
+static void update_stat_data (struct path * path, struct inode * inode)
+{
+    struct buffer_head * bh;
+    struct item_head * ih;
+  
+    bh = PATH_PLAST_BUFFER (path);
+    ih = PATH_PITEM_HEAD (path);
+
+    if (!is_statdata_le_ih (ih))
+	reiserfs_panic (inode->i_sb, "vs-13065: update_stat_data: key %k, found item %h",
+			INODE_PKEY (inode), ih);
+  
+    if (stat_data_v1 (ih)) {
+	// path points to old stat data
+	inode2sd_v1 (B_I_PITEM (bh, ih), inode);
+    } else {
+	inode2sd (B_I_PITEM (bh, ih), inode);
+    }
+
+    return;
+}
+
+
+void reiserfs_update_sd (struct reiserfs_transaction_handle *th, 
+			 struct inode * inode)
+{
+    struct cpu_key key;
+    INITIALIZE_PATH(path);
+    struct buffer_head *bh ;
+    int fs_gen ;
+    struct item_head *ih, tmp_ih ;
+    int retval;
+
+    make_cpu_key (&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);//key type is unimportant
+    
+    for(;;) {
+	int pos;
+	/* look for the object's stat data */
+	retval = search_item (inode->i_sb, &key, &path);
+	if (retval == IO_ERROR) {
+	    reiserfs_warning ("vs-13050: reiserfs_update_sd: "
+			      "i/o failure occurred trying to update %K stat data",
+			      &key);
+	    return;
+	}
+	if (retval == ITEM_NOT_FOUND) {
+	    pos = PATH_LAST_POSITION (&path);
+	    pathrelse(&path) ;
+	    if (inode->i_nlink == 0) {
+		/*printk ("vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found\n");*/
+		return;
+	    }
+	    reiserfs_warning ("vs-13060: reiserfs_update_sd: "
+			      "stat data of object %k (nlink == %d) not found (pos %d)\n", 
+			      INODE_PKEY (inode), inode->i_nlink, pos);
+	    reiserfs_check_path(&path) ;
+	    return;
+	}
+	
+	/* sigh, prepare_for_journal might schedule.  When it schedules the
+	** FS might change.  We have to detect that, and loop back to the
+	** search if the stat data item has moved
+	*/
+	bh = get_bh(&path) ;
+	ih = get_ih(&path) ;
+	copy_item_head (&tmp_ih, ih);
+	fs_gen = get_generation (inode->i_sb);
+	reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
+	if (fs_changed (fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
+	    reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
+	    continue ;	/* Stat_data item has been moved after scheduling. */
+	}
+	break;
+    }
+    update_stat_data (&path, inode);
+    journal_mark_dirty(th, th->t_super, bh) ; 
+    pathrelse (&path);
+    return;
+}
+
+void reiserfs_read_inode(struct inode *inode) {
+    make_bad_inode(inode) ;
+}
+
+
+//
+// initially this function was derived from minix or ext2's analog and
+// evolved as the prototype did
+//
+
+/* looks for stat data in the tree, and fills up the fields of in-core
+   inode stat data fields */
+void reiserfs_read_inode2 (struct inode * inode, void *p)
+{
+    INITIALIZE_PATH (path_to_sd);
+    struct cpu_key key;
+    struct reiserfs_iget4_args *args = (struct reiserfs_iget4_args *)p ;
+    unsigned long dirino;
+    int retval;
+
+    if (!p) {
+	make_bad_inode(inode) ;
+	return;
+    }
+
+    dirino = args->objectid ;
+
+    /* set version 1, version 2 could be used too, because stat data
+       key is the same in both versions */
+    key.version = ITEM_VERSION_1;
+    key.on_disk_key.k_dir_id = dirino;
+    key.on_disk_key.k_objectid = inode->i_ino;
+    key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET;
+    key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS;
+
+    /* look for the object's stat data */
+    retval = search_item (inode->i_sb, &key, &path_to_sd);
+    if (retval == IO_ERROR) {
+	reiserfs_warning ("vs-13070: reiserfs_read_inode2: "
+			  "i/o failure occurred trying to find stat data of %K\n",
+			  &key);
+	make_bad_inode(inode) ;
+	return;
+    }
+    if (retval != ITEM_FOUND) {
+	reiserfs_warning ("vs-13042: reiserfs_read_inode2: %K not found\n", &key);
+	pathrelse (&path_to_sd);
+	make_bad_inode(inode) ;
+	return;
+    }
+
+    init_inode (inode, &path_to_sd);
+    reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */
+
+}
+
+
+struct inode * reiserfs_iget (struct super_block * s, struct cpu_key * key)
+{
+    struct inode * inode;
+    struct reiserfs_iget4_args args ;
+
+    args.objectid = key->on_disk_key.k_dir_id ;
+    inode = iget4 (s, key->on_disk_key.k_objectid, 0, (void *)(&args));
+    if (!inode) 
+      return inode ;
+
+    //    if (comp_short_keys (INODE_PKEY (inode), key)) {
+    if (is_bad_inode (inode)) {
+	reiserfs_warning ("vs-13048: reiserfs_iget: "
+			  "bad_inode. Stat data of (%lu %lu) not found\n",
+			  key->on_disk_key.k_dir_id, key->on_disk_key.k_objectid);
+	iput (inode);
+	inode = 0;
+    }
+    return inode;
+}
+
+
+//
+// initially this function was derived from minix or ext2's analog and
+// evolved as the prototype did
+//
+/* looks for stat data, then copies fields to it, marks the buffer
+   containing stat data as dirty */
+/* reiserfs inodes are never really dirty, since the dirty inode call
+** always logs them.  This call allows the VFS inode marking routines
+** to properly mark inodes for datasync and such, but only actually
+** does something when called for a synchronous update.
+*/
+void reiserfs_write_inode (struct inode * inode, int do_sync) {
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = 1 ;
+
+    if (inode->i_sb->s_flags & MS_RDONLY) {
+        reiserfs_warning("clm-6005: writing inode %lu on readonly FS\n", 
+	                  inode->i_ino) ;
+        return ;
+    }
+    if (do_sync) {
+	lock_kernel() ;
+	journal_begin(&th, inode->i_sb, jbegin_count) ;
+	reiserfs_update_sd (&th, inode);
+	journal_end_sync(&th, inode->i_sb, jbegin_count) ;
+	unlock_kernel() ;
+    }
+}
+
+void reiserfs_dirty_inode (struct inode * inode) {
+    struct reiserfs_transaction_handle th ;
+
+    if (inode->i_sb->s_flags & MS_RDONLY) {
+        reiserfs_warning("clm-6006: writing inode %lu on readonly FS\n", 
+	                  inode->i_ino) ;
+        return ;
+    }
+    lock_kernel() ;
+    journal_begin(&th, inode->i_sb, 1) ;
+    reiserfs_update_sd (&th, inode);
+    journal_end(&th, inode->i_sb, 1) ;
+    unlock_kernel() ;
+}
+
+
+/* FIXME: no need any more. right? */
+int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode)
+{
+  int err = 0;
+
+  reiserfs_update_sd (th, inode);
+  return err;
+}
+
+
+/* stat data of new object is inserted already, this inserts the item
+   containing "." and ".." entries */
+static int reiserfs_new_directory (struct reiserfs_transaction_handle *th, 
+				   struct item_head * ih, struct path * path, const struct inode * dir)
+{
+    struct super_block * sb = th->t_super;
+    char empty_dir [EMPTY_DIR_SIZE];
+    char * body = empty_dir;
+    struct cpu_key key;
+    int retval;
+    
+    _make_cpu_key (&key, ITEM_VERSION_1, le32_to_cpu (ih->ih_key.k_dir_id),
+		   le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/*key length*/);
+    
+    /* compose item head for new item. Directories consist of items of
+       old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
+       is done by reiserfs_new_inode */
+    if (old_format_only (sb)) {
+	make_le_item_head (ih, 0, ITEM_VERSION_1, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
+	
+	make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
+				le32_to_cpu (INODE_PKEY (dir)->k_dir_id), 
+				le32_to_cpu (INODE_PKEY (dir)->k_objectid));
+    } else {
+	make_le_item_head (ih, 0, ITEM_VERSION_1, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
+	
+	make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
+			     le32_to_cpu (INODE_PKEY (dir)->k_dir_id), 
+			     le32_to_cpu (INODE_PKEY (dir)->k_objectid));
+    }
+    
+    /* look for place in the tree for new item */
+    retval = search_item (sb, &key, path);
+    if (retval == IO_ERROR) {
+	reiserfs_warning ("vs-13080: reiserfs_new_directory: "
+			  "i/o failure occured creating new directory\n");
+	return -EIO;
+    }
+    if (retval == ITEM_FOUND) {
+	pathrelse (path);
+	reiserfs_warning ("vs-13070: reiserfs_new_directory: "
+			  "object with this key exists (%k)", &(ih->ih_key));
+	return -EEXIST;
+    }
+
+    /* insert item, that is empty directory item */
+    return reiserfs_insert_item (th, path, &key, ih, body);
+}
+
+
+/* stat data of object has been inserted, this inserts the item
+   containing the body of symlink */
+static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th, 
+				 struct item_head * ih,
+				 struct path * path, const char * symname, int item_len)
+{
+    struct super_block * sb = th->t_super;
+    struct cpu_key key;
+    int retval;
+
+    _make_cpu_key (&key, ITEM_VERSION_1, 
+		   le32_to_cpu (ih->ih_key.k_dir_id), 
+		   le32_to_cpu (ih->ih_key.k_objectid),
+		   1, TYPE_DIRECT, 3/*key length*/);
+
+    make_le_item_head (ih, 0, ITEM_VERSION_1, 1, TYPE_DIRECT, item_len, 0/*free_space*/);
+
+    /* look for place in the tree for new item */
+    retval = search_item (sb, &key, path);
+    if (retval == IO_ERROR) {
+	reiserfs_warning ("vs-13080: reiserfs_new_symlinik: "
+			  "i/o failure occured creating new symlink\n");
+	return -EIO;
+    }
+    if (retval == ITEM_FOUND) {
+	pathrelse (path);
+	reiserfs_warning ("vs-13080: reiserfs_new_symlink: "
+			  "object with this key exists (%k)", &(ih->ih_key));
+	return -EEXIST;
+    }
+
+    /* insert item, that is body of symlink */
+    return reiserfs_insert_item (th, path, &key, ih, symname);
+}
+
+
+/* inserts the stat data into the tree, and then calls
+   reiserfs_new_directory (to insert ".", ".." item if new object is
+   directory) or reiserfs_new_symlink (to insert symlink body if new
+   object is symlink) or nothing (if new object is regular file) */
+struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th,
+				   const struct inode * dir, int mode, 
+				   const char * symname, 
+				   int i_size, /* 0 for regular, EMTRY_DIR_SIZE for dirs,
+						  strlen (symname) for symlinks)*/
+				   struct dentry *dentry, struct inode *inode, int * err)
+{
+    struct super_block * sb;
+    INITIALIZE_PATH (path_to_key);
+    struct cpu_key key;
+    struct item_head ih;
+    struct stat_data sd;
+    int retval;
+  
+    if (!dir || !dir->i_nlink) {
+	*err = -EPERM;
+	iput(inode) ;
+	return NULL;
+    }
+
+    sb = dir->i_sb;
+    inode->i_sb = sb;
+    inode->i_flags = 0;//inode->i_sb->s_flags;
+
+    /* item head of new item */
+    ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid;
+    ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th));
+    if (!ih.ih_key.k_objectid) {
+	iput(inode) ;
+	*err = -ENOMEM;
+	return NULL;
+    }
+    if (old_format_only (sb))
+	make_le_item_head (&ih, 0, ITEM_VERSION_1, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
+    else
+	make_le_item_head (&ih, 0, ITEM_VERSION_2, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
+
+
+    /* key to search for correct place for new stat data */
+    _make_cpu_key (&key, ITEM_VERSION_2, le32_to_cpu (ih.ih_key.k_dir_id),
+		   le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/*key length*/);
+
+    /* find proper place for inserting of stat data */
+    retval = search_item (sb, &key, &path_to_key);
+    if (retval == IO_ERROR) {
+	iput (inode);
+	*err = -EIO;
+	return NULL;
+    }
+    if (retval == ITEM_FOUND) {
+	pathrelse (&path_to_key);
+	iput (inode);
+	*err = -EEXIST;
+	return NULL;
+    }
+
+    /* fill stat data */
+    inode->i_mode = mode;
+    inode->i_nlink = (S_ISDIR (mode) ? 2 : 1);
+    inode->i_uid = current->fsuid;
+    if (dir->i_mode & S_ISGID) {
+	inode->i_gid = dir->i_gid;
+	if (S_ISDIR(mode))
+	    inode->i_mode |= S_ISGID;
+    } else
+	inode->i_gid = current->fsgid;
+
+    inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+    inode->i_size = i_size;
+    inode->i_blocks = (inode->i_size + 511) >> 9;
+    inode->u.reiserfs_i.i_first_direct_byte = S_ISLNK(mode) ? 1 : 
+      U32_MAX/*NO_BYTES_IN_DIRECT_ITEM*/;
+
+    if (old_format_only (sb))
+	inode2sd_v1 (&sd, inode);
+    else
+	inode2sd (&sd, inode);
+
+    // these do not go to on-disk stat data
+    inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid);
+    inode->i_blksize = PAGE_SIZE;
+    inode->i_dev = sb->s_dev;
+  
+    // store in in-core inode the key of stat data and version all
+    // object items will have (directory items will have old offset
+    // format, other new objects will consist of new items)
+    memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE);
+    if (old_format_only (sb) || S_ISDIR(mode) || S_ISLNK(mode))
+	inode_items_version (inode) = ITEM_VERSION_1;
+    else
+	inode_items_version (inode) = ITEM_VERSION_2;
+
+    /* insert the stat data into the tree */
+    retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, (char *)(&sd));
+    if (retval) {
+	iput (inode);
+	*err = retval;
+	reiserfs_check_path(&path_to_key) ;
+	return NULL;
+    }
+
+    if (S_ISDIR(mode)) {
+	/* insert item with "." and ".." */
+	retval = reiserfs_new_directory (th, &ih, &path_to_key, dir);
+    }
+
+    if (S_ISLNK(mode)) {
+	/* insert body of symlink */
+	if (!old_format_only (sb))
+	    i_size = ROUND_UP(i_size);
+	retval = reiserfs_new_symlink (th, &ih, &path_to_key, symname, i_size);
+    }
+    if (retval) {
+      inode->i_nlink = 0;
+	iput (inode);
+	*err = retval;
+	reiserfs_check_path(&path_to_key) ;
+	return NULL;
+    }
+
+    /* not a perfect generation count, as object ids can be reused, but this
+    ** is as good as reiserfs can do right now
+    */
+    inode->i_generation = INODE_PKEY (inode)->k_dir_id;
+    insert_inode_hash (inode);
+    // we do not mark inode dirty: on disk content matches to the
+    // in-core one
+    reiserfs_check_path(&path_to_key) ;
+
+    return inode;
+}
+
+/*
+** finds the tail page in the page cache,
+** reads the last block in.
+**
+** On success, page_result is set to a locked, pinned page, and bh_result
+** is set to an up to date buffer for the last block in the file.  returns 0.
+**
+** tail conversion is not done, so bh_result might not be valid for writing
+** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
+** trying to write the block.
+**
+** on failure, nonzero is returned, page_result and bh_result are untouched.
+*/
+static int grab_tail_page(struct inode *p_s_inode, 
+			  struct page **page_result, 
+			  struct buffer_head **bh_result) {
+
+    /* we want the page with the last byte in the file,
+    ** not the page that will hold the next byte for appending
+    */
+    unsigned long index = (p_s_inode->i_size-1) >> PAGE_CACHE_SHIFT ;
+    unsigned long pos = 0 ;
+    unsigned long start = 0 ;
+    unsigned long blocksize = p_s_inode->i_sb->s_blocksize ;
+    unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1) ;
+    struct buffer_head *bh ;
+    struct buffer_head *head ;
+    struct page * page ;
+    int error ;
+    
+    /* we know that we are only called with inode->i_size > 0.
+    ** we also know that a file tail can never be as big as a block
+    ** If i_size % blocksize == 0, our file is currently block aligned
+    ** and it won't need converting or zeroing after a truncate.
+    */
+    if ((offset & (blocksize - 1)) == 0) {
+        return -ENOENT ;
+    }
+    page = grab_cache_page(p_s_inode->i_mapping, index) ;
+    error = PTR_ERR(page) ;
+    if (IS_ERR(page)) {
+        goto out ;
+    }
+    /* start within the page of the last block in the file */
+    start = (offset / blocksize) * blocksize ;
+
+    error = block_prepare_write(page, start, offset, 
+				reiserfs_get_block_create_0) ;
+    if (error)
+	goto unlock ;
+
+    kunmap(page) ; /* mapped by block_prepare_write */
+
+    head = page->buffers ;      
+    bh = head;
+    do {
+	if (pos >= start) {
+	    break ;
+	}
+	bh = bh->b_this_page ;
+	pos += blocksize ;
+    } while(bh != head) ;
+
+    if (!buffer_uptodate(bh)) {
+	/* note, this should never happen, prepare_write should
+	** be taking care of this for us.  If the buffer isn't up to date,
+	** I've screwed up the code to find the buffer, or the code to
+	** call prepare_write
+	*/
+	reiserfs_warning("clm-6000: error reading block %lu on dev %s\n",
+	                  bh->b_blocknr, kdevname(bh->b_dev)) ;
+	error = -EIO ;
+	goto unlock ;
+    }
+    *bh_result = bh ;
+    *page_result = page ;
+
+out:
+    return error ;
+
+unlock:
+    UnlockPage(page) ;
+    page_cache_release(page) ;
+    return error ;
+}
+
+/*
+** vfs version of truncate file.  Must NOT be called with
+** a transaction already started.
+**
+** some code taken from block_truncate_page
+*/
+void reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) {
+    struct reiserfs_transaction_handle th ;
+    int windex ;
+
+    /* we want the offset for the first byte after the end of the file */
+    unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1) ;
+    unsigned blocksize = p_s_inode->i_sb->s_blocksize ;
+    unsigned length ;
+    struct page *page = NULL ;
+    int error ;
+    struct buffer_head *bh = NULL ;
+
+    if (p_s_inode->i_size > 0) {
+        if ((error = grab_tail_page(p_s_inode, &page, &bh))) {
+	    // -ENOENT means we truncated past the end of the file, 
+	    // and get_block_create_0 could not find a block to read in,
+	    // which is ok.
+	    if (error != -ENOENT)
+	        reiserfs_warning("clm-6001: grab_tail_page failed %d\n", error);
+	    page = NULL ;
+	    bh = NULL ;
+	}
+    }
+
+    /* so, if page != NULL, we have a buffer head for the offset at 
+    ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, 
+    ** then we have an unformatted node.  Otherwise, we have a direct item, 
+    ** and no zeroing is required.  We zero after the truncate, because the 
+    ** truncate might pack the item anyway (it will unmap bh if it packs).
+    */
+    prevent_flush_page_lock(page, p_s_inode) ;
+    journal_begin(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 ) ;
+    windex = push_journal_writer("reiserfs_vfs_truncate_file") ;
+    reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
+    pop_journal_writer(windex) ;
+    journal_end(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 ) ;
+    allow_flush_page_lock(page, p_s_inode) ;
+
+    if (page && buffer_mapped(bh) && bh->b_blocknr != 0) {
+        length = offset & (blocksize - 1) ;
+	/* if we are not on a block boundary */
+	if (length) {
+	    length = blocksize - length ;
+	    memset((char *)kmap(page) + offset, 0, length) ;   
+	    flush_dcache_page(page) ;
+	    kunmap(page) ;
+	    mark_buffer_dirty(bh) ;
+	}
+    } 
+
+    if (page) {
+	UnlockPage(page) ;
+	page_cache_release(page) ;
+    }
+    return ;
+}
+
+static int map_block_for_writepage(struct inode *inode, 
+			       struct buffer_head *bh_result, 
+                               unsigned long block) {
+    struct reiserfs_transaction_handle th ;
+    int fs_gen ;
+    struct item_head tmp_ih ;
+    struct item_head *ih ;
+    struct buffer_head *bh ;
+    __u32 *item ;
+    struct cpu_key key ;
+    INITIALIZE_PATH(path) ;
+    int pos_in_item ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT ;
+    loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ;
+    int retval ;
+    int use_get_block = 0 ;
+    int bytes_copied = 0 ;
+    int copy_size ;
+
+start_over:
+    lock_kernel() ;
+    prevent_flush_page_lock(bh_result->b_page, inode) ;
+    journal_begin(&th, inode->i_sb, jbegin_count) ;
+
+    make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
+
+research:
+    retval = search_for_position_by_key(inode->i_sb, &key, &path) ;
+    if (retval != POSITION_FOUND) {
+        use_get_block = 1;
+	goto out ;
+    } 
+
+    bh = get_bh(&path) ;
+    ih = get_ih(&path) ;
+    item = get_item(&path) ;
+    pos_in_item = path.pos_in_item ;
+
+    /* we've found an unformatted node */
+    if (indirect_item_found(retval, ih)) {
+	if (bytes_copied > 0) {
+	    reiserfs_warning("clm-6002: bytes_copied %d\n", bytes_copied) ;
+	}
+        if (!item[pos_in_item]) {
+	    /* crap, we are writing to a hole */
+	    use_get_block = 1;
+	    goto out ;
+	}
+	set_block_dev_mapped(bh_result, le32_to_cpu(item[pos_in_item]), inode);
+    } else if (is_direct_le_ih(ih)) {
+        char *p ; 
+        p = page_address(bh_result->b_page) ;
+        p += (byte_offset -1) & (PAGE_CACHE_SIZE - 1) ;
+        copy_size = le16_to_cpu(ih->ih_item_len) - pos_in_item ;
+
+	fs_gen = get_generation(inode->i_sb) ;
+	copy_item_head(&tmp_ih, ih) ;
+	reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
+	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
+	    reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
+	    goto research;
+	}
+
+	memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ;
+
+	journal_mark_dirty(&th, inode->i_sb, bh) ;
+	bytes_copied += copy_size ;
+	set_block_dev_mapped(bh_result, 0, inode);
+
+	/* are there still bytes left? */
+        if (bytes_copied < bh_result->b_size && 
+	    (byte_offset + bytes_copied) < inode->i_size) {
+	    set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + copy_size) ;
+	    goto research ;
+	}
+    } else {
+        reiserfs_warning("clm-6003: bad item inode %lu, device %s\n", inode->i_ino, kdevname(inode->i_sb->s_dev)) ;
+        retval = -EIO ;
+	goto out ;
+    }
+    retval = 0 ;
+    
+out:
+    pathrelse(&path) ;
+    journal_end(&th, inode->i_sb, jbegin_count) ;
+    allow_flush_page_lock(bh_result->b_page, inode) ;
+    unlock_kernel() ;
+
+    /* this is where we fill in holes in the file. */
+    if (use_get_block) {
+        kmap(bh_result->b_page) ;
+	retval = reiserfs_get_block(inode, block, bh_result, 1) ;
+        kunmap(bh_result->b_page) ;
+	if (!retval) {
+	    if (!buffer_mapped(bh_result) || bh_result->b_blocknr == 0) {
+	        /* get_block failed to find a mapped unformatted node. */
+		use_get_block = 0 ;
+		goto start_over ;
+	    }
+	}
+    }
+    return retval ;
+}
+
+/* helper func to get a buffer head ready for writepage to send to
+** ll_rw_block
+*/
+static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) {
+    struct buffer_head *bh ;
+    int i;
+    for(i = 0 ; i < nr ; i++) {
+        bh = bhp[i] ;
+	lock_buffer(bh) ;
+	atomic_inc(&bh->b_count) ; /* async end_io handler decs this */
+	set_buffer_async_io(bh) ;
+	/* submit_bh doesn't care if the buffer is dirty, but nobody
+	** later on in the call chain will be cleaning it.  So, we
+	** clean the buffer here, it still gets written either way.
+	*/
+	clear_bit(BH_Dirty, &bh->b_state) ;
+	set_bit(BH_Uptodate, &bh->b_state) ;
+	submit_bh(WRITE, bh) ;
+    }
+}
+
+static int reiserfs_write_full_page(struct page *page) {
+    struct inode *inode = page->mapping->host ;
+    unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
+    unsigned last_offset = PAGE_CACHE_SIZE;
+    int error = 0;
+    unsigned long block ;
+    unsigned cur_offset = 0 ;
+    struct buffer_head *head, *bh ;
+    int partial = 0 ;
+    struct buffer_head *arr[PAGE_CACHE_SIZE/512] ;
+    int nr = 0 ;
+
+    if (!page->buffers) {
+        block_prepare_write(page, 0, 0, NULL) ;
+	kunmap(page) ;
+    }
+    /* last page in the file, zero out any contents past the
+    ** last byte in the file
+    */
+    if (page->index >= end_index) {
+        last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
+	/* no file contents in this page */
+	if (page->index >= end_index + 1 || !last_offset) {
+	    error =  -EIO ;
+	    goto fail ;
+	}
+	memset((char *)kmap(page)+last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
+	flush_dcache_page(page) ;
+	kunmap(page) ;
+    }
+    head = page->buffers ;
+    bh = head ;
+    block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ;
+    do {
+	/* if this offset in the page is outside the file */
+	if (cur_offset >= last_offset) {
+	    if (!buffer_uptodate(bh))
+	        partial = 1 ;
+	} else {
+	    /* fast path, buffer mapped to an unformatted node */
+	    if (buffer_mapped(bh) && bh->b_blocknr != 0) {
+		arr[nr++] = bh ;
+	    } else {
+		/* buffer not mapped yet, or points to a direct item.
+		** search and dirty or log
+		*/
+		if ((error = map_block_for_writepage(inode, bh, block))) {
+		    goto fail ;
+		}
+		/* map_block_for_writepage either found an unformatted node
+		** and mapped it for us, or it found a direct item
+		** and logged the changes.  
+		*/
+		if (buffer_mapped(bh) && bh->b_blocknr != 0) {
+		    arr[nr++] = bh ;
+		}
+	    }
+	}
+        bh = bh->b_this_page ;
+	cur_offset += bh->b_size ;
+	block++ ;
+    } while(bh != head) ;
+
+    /* if this page only had a direct item, it is very possible for
+    ** nr == 0 without there being any kind of error.
+    */
+    if (nr) {
+        submit_bh_for_writepage(arr, nr) ;
+    } else {
+        UnlockPage(page) ;
+    }
+    if (!partial)
+        SetPageUptodate(page) ;
+
+    return 0 ;
+
+fail:
+    if (nr) {
+        submit_bh_for_writepage(arr, nr) ;
+    } else {
+        UnlockPage(page) ;
+    }
+    ClearPageUptodate(page) ;
+    return error ;
+}
+
+//
+// this is exactly what 2.3.99-pre9's ext2_readpage is
+//
+static int reiserfs_readpage (struct file *f, struct page * page)
+{
+    return block_read_full_page (page, reiserfs_get_block);
+}
+
+
+//
+// modified from ext2_writepage is
+//
+static int reiserfs_writepage (struct page * page)
+{
+    struct inode *inode = page->mapping->host ;
+    reiserfs_wait_on_write_block(inode->i_sb) ;
+    return reiserfs_write_full_page(page) ;
+}
+
+
+//
+// from ext2_prepare_write, but modified
+//
+int reiserfs_prepare_write(struct file *f, struct page *page, unsigned from, unsigned to) {
+    struct inode *inode = page->mapping->host ;
+    reiserfs_wait_on_write_block(inode->i_sb) ;
+    fix_tail_page_for_writing(page) ;
+    return block_prepare_write(page, from, to, reiserfs_get_block) ;
+}
+
+
+//
+// this is exactly what 2.3.99-pre9's ext2_bmap is
+//
+static int reiserfs_aop_bmap(struct address_space *as, long block) {
+  return generic_block_bmap(as, block, reiserfs_bmap) ;
+}
+
+
+static int reiserfs_commit_write(struct file *f, struct page *page, 
+                                 unsigned from, unsigned to) {
+    struct inode *inode = page->mapping->host ;
+    int ret ; 
+    struct reiserfs_transaction_handle th ;
+    
+    reiserfs_wait_on_write_block(inode->i_sb) ;
+    prevent_flush_page_lock(page, inode) ;
+    ret = generic_commit_write(f, page, from, to) ;
+    /* we test for O_SYNC here so we can commit the transaction
+    ** for any packed tails the file might have had
+    */
+    if (f->f_flags & O_SYNC) {
+	journal_begin(&th, inode->i_sb, 1) ;
+	reiserfs_prepare_for_journal(inode->i_sb, 
+	                             SB_BUFFER_WITH_SB(inode->i_sb), 1) ;
+	journal_mark_dirty(&th, inode->i_sb, SB_BUFFER_WITH_SB(inode->i_sb)) ;
+	journal_end_sync(&th, inode->i_sb, 1) ;
+    }
+    allow_flush_page_lock(page, inode) ;
+    return ret ;
+}
+
+struct address_space_operations reiserfs_address_space_operations = {
+    writepage: reiserfs_writepage,
+    readpage: reiserfs_readpage, 
+    sync_page: block_sync_page,
+    prepare_write: reiserfs_prepare_write,
+    commit_write: reiserfs_commit_write,
+    bmap: reiserfs_aop_bmap
+} ;
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/ioctl.c linux/fs/reiserfs/ioctl.c
--- v2.4.0/linux/fs/reiserfs/ioctl.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/ioctl.c	Mon Jan 15 12:42:32 2001
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <linux/smp_lock.h>
+#include <linux/locks.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+/*
+** reiserfs_ioctl - handler for ioctl for inode
+** supported commands:
+**  1) REISERFS_IOC_UNPACK - try to unpack tail from direct item into indirect
+**                           and prevent packing file (argument arg has to be non-zero)
+**  2) That's all for a while ...
+*/
+int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+		unsigned long arg)
+{
+	switch (cmd) {
+	    case REISERFS_IOC_UNPACK:
+		if (arg)
+		    return reiserfs_unpack (inode, filp);
+			
+	    default:
+		return -ENOTTY;
+	}
+}
+
+/*
+** reiserfs_unpack
+** Function try to convert tail from direct item into indirect.
+** It set up nopack attribute in the inode.u.reiserfs_i.nopack
+*/
+int reiserfs_unpack (struct inode * inode, struct file * filp)
+{
+    int retval = 0;
+    int index ;
+    struct page *page ;
+    unsigned long write_from ;
+    unsigned long blocksize = inode->i_sb->s_blocksize ;
+    	
+    if (inode->i_size == 0) {
+        return -EINVAL ;
+    }
+    /* ioctl already done */
+    if (inode->u.reiserfs_i.nopack) {
+        return 0 ;
+    }
+    lock_kernel();
+
+    /* we need to make sure nobody is changing the file size beneath
+    ** us
+    */
+    down(&inode->i_sem) ;
+
+    write_from = inode->i_size & (blocksize - 1) ;
+    /* if we are on a block boundary, we are already unpacked.  */
+    if ( write_from == 0) {
+	inode->u.reiserfs_i.nopack = 1;
+	goto out ;
+    }
+
+    /* we unpack by finding the page with the tail, and calling
+    ** reiserfs_prepare_write on that page.  This will force a 
+    ** reiserfs_get_block to unpack the tail for us.
+    */
+    index = inode->i_size >> PAGE_CACHE_SHIFT ;
+    page = grab_cache_page(inode->i_mapping, index) ;
+    retval = PTR_ERR(page) ;
+    if (IS_ERR(page)) {
+        goto out ;
+    }
+    retval = reiserfs_prepare_write(NULL, page, write_from, blocksize) ;
+    if (retval)
+        goto out_unlock ;
+
+    /* conversion can change page contents, must flush */
+    flush_dcache_page(page) ;
+    inode->u.reiserfs_i.nopack = 1;
+    kunmap(page) ; /* mapped by prepare_write */
+
+out_unlock:
+    UnlockPage(page) ;
+    page_cache_release(page) ;
+
+out:
+    up(&inode->i_sem) ;
+    unlock_kernel();    
+    return retval;
+}
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/item_ops.c linux/fs/reiserfs/item_ops.c
--- v2.4.0/linux/fs/reiserfs/item_ops.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/item_ops.c	Mon Jan 15 12:42:32 2001
@@ -0,0 +1,718 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/sched.h>
+#include <linux/reiserfs_fs.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+
+// this contains item handlers for old item types: sd, direct,
+// indirect, directory
+
+/* and where are the comments? how about saying where we can find an
+   explanation of each item handler method? -Hans */
+
+//////////////////////////////////////////////////////////////////////////////
+// stat data functions
+//
+static int sd_bytes_number (struct item_head * ih, int block_size)
+{
+  return 0;
+}
+
+static void sd_decrement_key (struct cpu_key * key)
+{
+    key->on_disk_key.k_objectid --;
+    set_cpu_key_k_type (key, TYPE_ANY);
+    set_cpu_key_k_offset(key, (loff_t)(-1));
+}
+
+static int sd_is_left_mergeable (struct key * key, unsigned long bsize)
+{
+    return 0;
+}
+
+
+
+static char * print_time (time_t t)
+{
+    static char timebuf[256];
+
+#ifndef __KERNEL__
+//    struct tm *loctime;
+//    loctime = localtime (&t);
+    sprintf (timebuf, "%s", asctime (localtime (&t)));
+    timebuf[strlen (timebuf) - 1] = 0;
+#else
+    sprintf (timebuf, "%ld", t);
+#endif
+    return timebuf;
+}
+
+
+static void sd_print_item (struct item_head * ih, char * item)
+{
+    printk ("\tmode | size | nlinks | first direct | mtime\n");
+    if (stat_data_v1 (ih)) {
+      	struct stat_data_v1 * sd = (struct stat_data_v1 *)item;
+
+	printk ("\t0%-6o | %6u | %2u | %d | %s\n", sd->sd_mode, sd->sd_size,
+		sd->sd_nlink, sd->sd_first_direct_byte, print_time (sd->sd_mtime));
+    } else {
+	struct stat_data * sd = (struct stat_data *)item;
+
+	printk ("\t0%-6o | %6Lu | %2u | %d | %s\n", sd->sd_mode, (unsigned long long)(sd->sd_size),
+		sd->sd_nlink, sd->u.sd_rdev, print_time (sd->sd_mtime));
+    }
+}
+
+static void sd_check_item (struct item_head * ih, char * item)
+{
+    // FIXME: type something here!
+}
+
+
+static int sd_create_vi (struct virtual_node * vn,
+			 struct virtual_item * vi, 
+			 int is_affected, 
+			 int insert_size)
+{
+    vi->vi_index = TYPE_STAT_DATA;
+    //vi->vi_type |= VI_TYPE_STAT_DATA;// not needed?
+    return 0;
+}
+
+
+static int sd_check_left (struct virtual_item * vi, int free, 
+			  int start_skip, int end_skip)
+{
+    if (start_skip || end_skip)
+	BUG ();
+    return -1;
+}
+
+
+static int sd_check_right (struct virtual_item * vi, int free)
+{
+    return -1;
+}
+
+static int sd_part_size (struct virtual_item * vi, int first, int count)
+{
+    if (count)
+	BUG ();
+    return 0;
+}
+
+static int sd_unit_num (struct virtual_item * vi)
+{
+    return vi->vi_item_len - IH_SIZE;
+}
+
+
+static void sd_print_vi (struct virtual_item * vi)
+{
+    reiserfs_warning ("STATDATA, index %d, type 0x%x, %h\n", 
+		      vi->vi_index, vi->vi_type, vi->vi_ih);
+}
+
+struct item_operations stat_data_ops = {
+    sd_bytes_number,
+    sd_decrement_key,
+    sd_is_left_mergeable,
+    sd_print_item,
+    sd_check_item,
+
+    sd_create_vi,
+    sd_check_left,
+    sd_check_right,
+    sd_part_size,
+    sd_unit_num,
+    sd_print_vi
+};
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// direct item functions
+//
+static int direct_bytes_number (struct item_head * ih, int block_size)
+{
+  return le16_to_cpu (ih->ih_item_len);
+}
+
+
+// FIXME: this should probably switch to indirect as well
+static void direct_decrement_key (struct cpu_key * key)
+{
+    cpu_key_k_offset_dec (key);
+    if (cpu_key_k_offset (key) == 0)
+	set_cpu_key_k_type (key, TYPE_STAT_DATA);	
+}
+
+
+static int direct_is_left_mergeable (struct key * key, unsigned long bsize)
+{
+    int version = le_key_version (key);
+    return ((le_key_k_offset (version, key) & (bsize - 1)) != 1);
+}
+
+
+static void direct_print_item (struct item_head * ih, char * item)
+{
+    int j = 0;
+
+//    return;
+    printk ("\"");
+    while (j < ih->ih_item_len)
+	printk ("%c", item[j++]);
+    printk ("\"\n");
+}
+
+
+static void direct_check_item (struct item_head * ih, char * item)
+{
+    // FIXME: type something here!
+}
+
+
+static int direct_create_vi (struct virtual_node * vn,
+			     struct virtual_item * vi, 
+			     int is_affected, 
+			     int insert_size)
+{
+    vi->vi_index = TYPE_DIRECT;
+    //vi->vi_type |= VI_TYPE_DIRECT;
+    return 0;
+}
+
+static int direct_check_left (struct virtual_item * vi, int free,
+			      int start_skip, int end_skip)
+{
+    int bytes;
+
+    bytes = free - free % 8;
+    return bytes ?: -1;    
+}
+
+
+static int direct_check_right (struct virtual_item * vi, int free)
+{
+    return direct_check_left (vi, free, 0, 0);
+}
+
+static int direct_part_size (struct virtual_item * vi, int first, int count)
+{
+    return count;
+}
+
+
+static int direct_unit_num (struct virtual_item * vi)
+{
+    return vi->vi_item_len - IH_SIZE;
+}
+
+
+static void direct_print_vi (struct virtual_item * vi)
+{
+    reiserfs_warning ("DIRECT, index %d, type 0x%x, %h\n", 
+		      vi->vi_index, vi->vi_type, vi->vi_ih);
+}
+
+struct item_operations direct_ops = {
+    direct_bytes_number,
+    direct_decrement_key,
+    direct_is_left_mergeable,
+    direct_print_item,
+    direct_check_item,
+
+    direct_create_vi,
+    direct_check_left,
+    direct_check_right,
+    direct_part_size,
+    direct_unit_num,
+    direct_print_vi
+};
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// indirect item functions
+//
+
+static int indirect_bytes_number (struct item_head * ih, int block_size)
+{
+  return le16_to_cpu (ih->ih_item_len) / UNFM_P_SIZE * block_size; //- get_ih_free_space (ih);
+}
+
+
+// decrease offset, if it becomes 0, change type to stat data
+static void indirect_decrement_key (struct cpu_key * key)
+{
+    cpu_key_k_offset_dec (key);
+    if (cpu_key_k_offset (key) == 0)
+	set_cpu_key_k_type (key, TYPE_STAT_DATA);
+}
+
+
+// if it is not first item of the body, then it is mergeable
+static int indirect_is_left_mergeable (struct key * key, unsigned long bsize)
+{
+    int version = le_key_version (key);
+    return (le_key_k_offset (version, key) != 1);
+}
+
+
+// printing of indirect item
+static void start_new_sequence (__u32 * start, int * len, __u32 new)
+{
+    *start = new;
+    *len = 1;
+}
+
+
+static int sequence_finished (__u32 start, int * len, __u32 new)
+{
+    if (start == INT_MAX)
+	return 1;
+
+    if (start == 0 && new == 0) {
+	(*len) ++;
+	return 0;
+    }
+    if (start != 0 && (start + *len) == new) {
+	(*len) ++;
+	return 0;
+    }
+    return 1;
+}
+
+static void print_sequence (__u32 start, int len)
+{
+    if (start == INT_MAX)
+	return;
+
+    if (len == 1)
+	printk (" %d", start);
+    else
+	printk (" %d(%d)", start, len);
+}
+
+
+static void indirect_print_item (struct item_head * ih, char * item)
+{
+    int j;
+    __u32 * unp, prev = INT_MAX;
+    int num;
+
+    unp = (__u32 *)item;
+
+    if (ih->ih_item_len % UNFM_P_SIZE)
+	printk ("indirect_print_item: invalid item len");  
+
+    printk ("%d pointers\n[ ", (int)I_UNFM_NUM (ih));
+    for (j = 0; j < I_UNFM_NUM (ih); j ++) {
+	if (sequence_finished (prev, &num, unp[j])) {
+	    print_sequence (prev, num);
+	    start_new_sequence (&prev, &num, unp[j]);
+	}
+    }
+    print_sequence (prev, num);
+    printk ("]\n");
+}
+
+static void indirect_check_item (struct item_head * ih, char * item)
+{
+    // FIXME: type something here!
+}
+
+
+static int indirect_create_vi (struct virtual_node * vn,
+			       struct virtual_item * vi, 
+			       int is_affected, 
+			       int insert_size)
+{
+    vi->vi_index = TYPE_INDIRECT;
+    //vi->vi_type |= VI_TYPE_INDIRECT;
+    return 0;
+}
+
+static int indirect_check_left (struct virtual_item * vi, int free,
+				int start_skip, int end_skip)
+{
+    int bytes;
+
+    bytes = free - free % UNFM_P_SIZE;
+    return bytes ?: -1;    
+}
+
+
+static int indirect_check_right (struct virtual_item * vi, int free)
+{
+    return indirect_check_left (vi, free, 0, 0);
+}
+
+
+
+// return size in bytes of 'units' units. If first == 0 - calculate from the head (left), otherwise - from tail (right)
+static int indirect_part_size (struct virtual_item * vi, int first, int units)
+{
+    // unit of indirect item is byte (yet)
+    return units;
+}
+
+static int indirect_unit_num (struct virtual_item * vi)
+{
+    // unit of indirect item is byte (yet)
+    return vi->vi_item_len - IH_SIZE;
+}
+
+static void indirect_print_vi (struct virtual_item * vi)
+{
+    reiserfs_warning ("INDIRECT, index %d, type 0x%x, %h\n", 
+		      vi->vi_index, vi->vi_type, vi->vi_ih);
+}
+
+struct item_operations indirect_ops = {
+    indirect_bytes_number,
+    indirect_decrement_key,
+    indirect_is_left_mergeable,
+    indirect_print_item,
+    indirect_check_item,
+
+    indirect_create_vi,
+    indirect_check_left,
+    indirect_check_right,
+    indirect_part_size,
+    indirect_unit_num,
+    indirect_print_vi
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+// direntry functions
+//
+
+
+static int direntry_bytes_number (struct item_head * ih, int block_size)
+{
+    reiserfs_warning ("vs-16090: direntry_bytes_number: "
+		      "bytes number is asked for direntry");
+    return 0;
+}
+
+static void direntry_decrement_key (struct cpu_key * key)
+{
+    cpu_key_k_offset_dec (key);
+    if (cpu_key_k_offset (key) == 0)
+	set_cpu_key_k_type (key, TYPE_STAT_DATA);	
+}
+
+
+static int direntry_is_left_mergeable (struct key * key, unsigned long bsize)
+{
+    if (le32_to_cpu (key->u.k_offset_v1.k_offset) == DOT_OFFSET)
+	return 0;
+    return 1;
+	
+}
+
+
+static void direntry_print_item (struct item_head * ih, char * item)
+{
+    int i;
+    int namelen;
+    struct reiserfs_de_head * deh;
+    char * name;
+    static char namebuf [80];
+
+
+    printk ("\n # %-15s%-30s%-15s%-15s%-15s\n", "Name", "Key of pointed object", "Hash", "Gen number", "Status");
+
+    deh = (struct reiserfs_de_head *)item;
+
+    for (i = 0; i < I_ENTRY_COUNT (ih); i ++, deh ++) {
+	namelen = (i ? ((deh - 1)->deh_location) : ih->ih_item_len) - deh->deh_location;
+	name = item + deh->deh_location;
+	if (name[namelen-1] == 0)
+	  namelen = strlen (name);
+	namebuf[0] = '"';
+	if (namelen > sizeof (namebuf) - 3) {
+	    strncpy (namebuf + 1, name, sizeof (namebuf) - 3);
+	    namebuf[sizeof (namebuf) - 2] = '"';
+	    namebuf[sizeof (namebuf) - 1] = 0;
+	} else {
+	    memcpy (namebuf + 1, name, namelen);
+	    namebuf[namelen + 1] = '"';
+	    namebuf[namelen + 2] = 0;
+	}
+
+	printk ("%d:  %-15s%-15d%-15d%-15Ld%-15Ld(%s)\n", 
+		i, namebuf,
+		deh->deh_dir_id, deh->deh_objectid,
+		GET_HASH_VALUE (deh_offset (deh)), GET_GENERATION_NUMBER ((deh_offset (deh))),
+		(de_hidden (deh)) ? "HIDDEN" : "VISIBLE");
+    }
+}
+
+
+static void direntry_check_item (struct item_head * ih, char * item)
+{
+    int i;
+    struct reiserfs_de_head * deh;
+
+    // FIXME: type something here!
+    deh = (struct reiserfs_de_head *)item;
+    for (i = 0; i < I_ENTRY_COUNT (ih); i ++, deh ++) {
+	;
+    }
+}
+
+
+
+#define DIRENTRY_VI_FIRST_DIRENTRY_ITEM 1
+
+struct direntry_uarea {
+    int flags;
+    short entry_count;
+    short entry_sizes[1];
+};
+
+
+/*
+ * function returns old entry number in directory item in real node
+ * using new entry number in virtual item in virtual node */
+static inline int old_entry_num (int is_affected, int virtual_entry_num, int pos_in_item, int mode)
+{
+    if ( mode == M_INSERT || mode == M_DELETE)
+	return virtual_entry_num;
+    
+    if (!is_affected)
+	/* cut or paste is applied to another item */
+	return virtual_entry_num;
+
+    if (virtual_entry_num < pos_in_item)
+	return virtual_entry_num;
+
+    if (mode == M_CUT)
+	return virtual_entry_num + 1;
+
+#ifdef CONFIG_REISERFS_CHECK  
+    if (mode != M_PASTE || virtual_entry_num == 0)
+	reiserfs_panic (0, "vs-8015: old_entry_num: mode must be M_PASTE (mode = \'%c\'", mode);
+#endif
+    
+    return virtual_entry_num - 1;
+}
+
+
+
+
+/* Create an array of sizes of directory entries for virtual
+   item. Return space used by an item. FIXME: no control over
+   consuming of space used by this item handler */
+static int direntry_create_vi (struct virtual_node * vn,
+			       struct virtual_item * vi, 
+			       int is_affected, 
+			       int insert_size)
+{
+    struct direntry_uarea * dir_u = vi->vi_uarea;
+    int i, j;
+    int size = sizeof (struct direntry_uarea);
+    struct reiserfs_de_head * deh;
+  
+    vi->vi_index = TYPE_DIRENTRY;
+
+    if (!(vi->vi_ih) || !vi->vi_item)
+	BUG ();
+
+
+    dir_u->flags = 0;
+    if (le_ih_k_offset (vi->vi_ih) == DOT_OFFSET)
+	dir_u->flags |= DIRENTRY_VI_FIRST_DIRENTRY_ITEM;
+
+    deh = (struct reiserfs_de_head *)(vi->vi_item);
+    
+    
+    /* virtual directory item have this amount of entry after */
+    dir_u->entry_count = ih_entry_count (vi->vi_ih) + 
+	((is_affected) ? ((vn->vn_mode == M_CUT) ? -1 :
+			  (vn->vn_mode == M_PASTE ? 1 : 0)) : 0);
+    
+    for (i = 0; i < dir_u->entry_count; i ++) {
+	j = old_entry_num (is_affected, i, vn->vn_pos_in_item, vn->vn_mode);
+	dir_u->entry_sizes[i] = (j ? le16_to_cpu (deh[j - 1].deh_location) : le16_to_cpu (vi->vi_ih->ih_item_len)) -
+	    le16_to_cpu (deh[j].deh_location) + DEH_SIZE;
+    }
+
+    size += (dir_u->entry_count * sizeof (short));
+    
+    /* set size of pasted entry */
+    if (is_affected && vn->vn_mode == M_PASTE)
+	dir_u->entry_sizes[vn->vn_pos_in_item] = insert_size;
+
+
+#ifdef CONFIG_REISERFS_CHECK
+    /* compare total size of entries with item length */
+    {
+	int k, l;
+    
+	l = 0;
+	for (k = 0; k < dir_u->entry_count; k ++)
+	    l += dir_u->entry_sizes[k];
+    
+	if (l + IH_SIZE != vi->vi_item_len + 
+	    ((is_affected && (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT)) ? insert_size : 0) ) {
+	    reiserfs_panic (0, "vs-8025: set_entry_sizes: (mode==%c, insert_size==%d), invalid length of directory item",
+			    vn->vn_mode, insert_size);
+	}
+    }
+#endif
+
+    return size;
+
+
+}
+
+
+//
+// return number of entries which may fit into specified amount of
+// free space, or -1 if free space is not enough even for 1 entry
+//
+static int direntry_check_left (struct virtual_item * vi, int free,
+				int start_skip, int end_skip)
+{
+    int i;
+    int entries = 0;
+    struct direntry_uarea * dir_u = vi->vi_uarea;
+
+    for (i = start_skip; i < dir_u->entry_count - end_skip; i ++) {
+	if (dir_u->entry_sizes[i] > free)
+	    /* i-th entry doesn't fit into the remaining free space */
+	    break;
+		  
+	free -= dir_u->entry_sizes[i];
+	entries ++;
+    }
+
+    if (entries == dir_u->entry_count) {
+	printk ("free spze %d, entry_count %d\n", free, dir_u->entry_count);
+	BUG ();
+    }
+
+    /* "." and ".." can not be separated from each other */
+    if (start_skip == 0 && (dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) && entries < 2)
+	entries = 0;
+    
+    return entries ?: -1;
+}
+
+
+static int direntry_check_right (struct virtual_item * vi, int free)
+{
+    int i;
+    int entries = 0;
+    struct direntry_uarea * dir_u = vi->vi_uarea;
+    
+    for (i = dir_u->entry_count - 1; i >= 0; i --) {
+	if (dir_u->entry_sizes[i] > free)
+	    /* i-th entry doesn't fit into the remaining free space */
+	    break;
+	
+	free -= dir_u->entry_sizes[i];
+	entries ++;
+    }
+    if (entries == dir_u->entry_count)
+	BUG ();
+
+    /* "." and ".." can not be separated from each other */
+    if ((dir_u->flags & DIRENTRY_VI_FIRST_DIRENTRY_ITEM) && entries > dir_u->entry_count - 2)
+	entries = dir_u->entry_count - 2;
+
+    return entries ?: -1;
+}
+
+
+/* sum of entry sizes between from-th and to-th entries including both edges */
+static int direntry_part_size (struct virtual_item * vi, int first, int count)
+{
+    int i, retval;
+    int from, to;
+    struct direntry_uarea * dir_u = vi->vi_uarea;
+    
+    retval = 0;
+    if (first == 0)
+	from = 0;
+    else
+	from = dir_u->entry_count - count;
+    to = from + count - 1;
+
+    for (i = from; i <= to; i ++)
+	retval += dir_u->entry_sizes[i];
+
+    return retval;
+}
+
+static int direntry_unit_num (struct virtual_item * vi)
+{
+    struct direntry_uarea * dir_u = vi->vi_uarea;
+    
+    return dir_u->entry_count;
+}
+
+
+
+static void direntry_print_vi (struct virtual_item * vi)
+{
+    int i;
+    struct direntry_uarea * dir_u = vi->vi_uarea;
+
+    reiserfs_warning ("DIRENTRY, index %d, type 0x%x, %h, flags 0x%x\n", 
+		      vi->vi_index, vi->vi_type, vi->vi_ih, dir_u->flags);
+    printk ("%d entries: ", dir_u->entry_count);
+    for (i = 0; i < dir_u->entry_count; i ++)
+	printk ("%d ", dir_u->entry_sizes[i]);
+    printk ("\n");
+}
+
+struct item_operations direntry_ops = {
+    direntry_bytes_number,
+    direntry_decrement_key,
+    direntry_is_left_mergeable,
+    direntry_print_item,
+    direntry_check_item,
+
+    direntry_create_vi,
+    direntry_check_left,
+    direntry_check_right,
+    direntry_part_size,
+    direntry_unit_num,
+    direntry_print_vi
+};
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//
+#if ! (TYPE_STAT_DATA == 0 && TYPE_INDIRECT == 1 && TYPE_DIRECT == 2 && TYPE_DIRENTRY == 3)
+  do not compile
+#endif
+
+struct item_operations * item_ops [4] = {
+  &stat_data_ops,
+  &indirect_ops,
+  &direct_ops,
+  &direntry_ops
+};
+
+
+
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/journal.c linux/fs/reiserfs/journal.c
--- v2.4.0/linux/fs/reiserfs/journal.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/journal.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,3215 @@
+/*
+** Write ahead logging implementation copyright Chris Mason 2000
+**
+** The background commits make this code very interelated, and 
+** overly complex.  I need to rethink things a bit....The major players:
+**
+** journal_begin -- call with the number of blocks you expect to log.  
+**                  If the current transaction is too
+** 		    old, it will block until the current transaction is 
+** 		    finished, and then start a new one.
+**		    Usually, your transaction will get joined in with 
+**                  previous ones for speed.
+**
+** journal_join  -- same as journal_begin, but won't block on the current 
+**                  transaction regardless of age.  Don't ever call
+**                  this.  Ever.  There are only two places it should be 
+**                  called from, and they are both inside this file.
+**
+** journal_mark_dirty -- adds blocks into this transaction.  clears any flags 
+**                       that might make them get sent to disk
+**                       and then marks them BH_JDirty.  Puts the buffer head 
+**                       into the current transaction hash.  
+**
+** journal_end -- if the current transaction is batchable, it does nothing
+**                   otherwise, it could do an async/synchronous commit, or
+**                   a full flush of all log and real blocks in the 
+**                   transaction.
+**
+** flush_old_commits -- if the current transaction is too old, it is ended and 
+**                      commit blocks are sent to disk.  Forces commit blocks 
+**                      to disk for all backgrounded commits that have been 
+**                      around too long.
+**		     -- Note, if you call this as an immediate flush from 
+**		        from within kupdate, it will ignore the immediate flag
+**
+** The commit thread -- a writer process for async commits.  It allows a 
+**                      a process to request a log flush on a task queue.
+**                      the commit will happen once the commit thread wakes up.
+**                      The benefit here is the writer (with whatever
+**                      related locks it has) doesn't have to wait for the
+**                      log blocks to hit disk if it doesn't want to.
+*/
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/sched.h>
+#include <asm/semaphore.h>
+
+#include <linux/vmalloc.h>
+#include <linux/reiserfs_fs.h>
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/locks.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+
+/* the number of mounted filesystems.  This is used to decide when to
+** start and kill the commit thread
+*/
+static int reiserfs_mounted_fs_count = 0 ;
+
+/* wake this up when you add something to the commit thread task queue */
+DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_wait) ;
+
+/* wait on this if you need to be sure you task queue entries have been run */
+static DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_done) ;
+DECLARE_TASK_QUEUE(reiserfs_commit_thread_tq) ;
+
+#define JOURNAL_TRANS_HALF 1018   /* must be correct to keep the desc and commit structs at 4k */
+
+/* cnode stat bits.  Move these into reiserfs_fs.h */
+
+#define BLOCK_FREED 2		/* this block was freed, and can't be written.  */
+#define BLOCK_FREED_HOLDER 3    /* this block was freed during this transaction, and can't be written */
+
+#define BLOCK_NEEDS_FLUSH 4	/* used in flush_journal_list */
+
+/* flags for do_journal_end */
+#define FLUSH_ALL   1		/* flush commit and real blocks */
+#define COMMIT_NOW  2		/* end and commit this transaction */
+#define WAIT        4		/* wait for the log blocks to hit the disk*/
+
+/* state bits for the journal */
+#define WRITERS_BLOCKED 1      /* set when new writers not allowed */
+
+static int do_journal_end(struct reiserfs_transaction_handle *,struct super_block *,unsigned long nblocks,int flags) ;
+static int flush_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) ;
+static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall)  ;
+static int can_dirty(struct reiserfs_journal_cnode *cn) ;
+
+static void init_journal_hash(struct super_block *p_s_sb) {
+  memset(SB_JOURNAL(p_s_sb)->j_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ;
+}
+
+/*
+** clears BH_Dirty and sticks the buffer on the clean list.  Called because I can't allow refile_buffer to
+** make schedule happen after I've freed a block.  Look at remove_from_transaction and journal_mark_freed for
+** more details.
+*/
+static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) {
+  if (bh) {
+    clear_bit(BH_Dirty, &bh->b_state) ;
+#if 0
+    if (bh->b_list != BUF_CLEAN) {
+      reiserfs_file_buffer(bh, BUF_CLEAN) ;
+    }
+#endif
+  }
+  return 0 ;
+}
+
+static struct reiserfs_bitmap_node *
+allocate_bitmap_node(struct super_block *p_s_sb) {
+  struct reiserfs_bitmap_node *bn ;
+  static int id = 0 ;
+
+  bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_BUFFER) ;
+  if (!bn) {
+    return NULL ;
+  }
+  bn->data = kmalloc(p_s_sb->s_blocksize, GFP_BUFFER) ;
+  if (!bn->data) {
+    kfree(bn) ;
+    return NULL ;
+  }
+  bn->id = id++ ;
+  memset(bn->data, 0, p_s_sb->s_blocksize) ;
+  INIT_LIST_HEAD(&bn->list) ;
+  return bn ;
+}
+
+static struct reiserfs_bitmap_node *
+get_bitmap_node(struct super_block *p_s_sb) {
+  struct reiserfs_bitmap_node *bn = NULL;
+  struct list_head *entry = SB_JOURNAL(p_s_sb)->j_bitmap_nodes.next ;
+
+  SB_JOURNAL(p_s_sb)->j_used_bitmap_nodes++ ;
+repeat:
+
+  if(entry != &SB_JOURNAL(p_s_sb)->j_bitmap_nodes) {
+    bn = list_entry(entry, struct reiserfs_bitmap_node, list) ;
+    list_del(entry) ;
+    memset(bn->data, 0, p_s_sb->s_blocksize) ;
+    SB_JOURNAL(p_s_sb)->j_free_bitmap_nodes-- ;
+    return bn ;
+  }
+  bn = allocate_bitmap_node(p_s_sb) ;
+  if (!bn) {
+    current->policy = SCHED_YIELD ;
+    schedule() ;
+    goto repeat ;
+  }
+  return bn ;
+}
+static inline void free_bitmap_node(struct super_block *p_s_sb,
+                                    struct reiserfs_bitmap_node *bn) {
+  SB_JOURNAL(p_s_sb)->j_used_bitmap_nodes-- ;
+  if (SB_JOURNAL(p_s_sb)->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
+    kfree(bn->data) ;
+    kfree(bn) ;
+  } else {
+    list_add(&bn->list, &SB_JOURNAL(p_s_sb)->j_bitmap_nodes) ;
+    SB_JOURNAL(p_s_sb)->j_free_bitmap_nodes++ ;
+  }
+}
+
+static void allocate_bitmap_nodes(struct super_block *p_s_sb) {
+  int i ;
+  struct reiserfs_bitmap_node *bn = NULL ;
+  for (i = 0 ; i < REISERFS_MIN_BITMAP_NODES ; i++) {
+    bn = allocate_bitmap_node(p_s_sb) ;
+    if (bn) {
+      list_add(&bn->list, &SB_JOURNAL(p_s_sb)->j_bitmap_nodes) ;
+      SB_JOURNAL(p_s_sb)->j_free_bitmap_nodes++ ;
+    } else {
+      break ; // this is ok, we'll try again when more are needed 
+    }
+  }
+}
+
+static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block,
+                                  struct reiserfs_list_bitmap *jb) {
+  int bmap_nr = block / (p_s_sb->s_blocksize << 3) ;
+  int bit_nr = block % (p_s_sb->s_blocksize << 3) ;
+
+  if (!jb->bitmaps[bmap_nr]) {
+    jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb) ;
+  }
+  set_bit(bit_nr, jb->bitmaps[bmap_nr]->data) ;
+  return 0 ;
+}
+
+static void cleanup_bitmap_list(struct super_block *p_s_sb,
+                                struct reiserfs_list_bitmap *jb) {
+  int i;
+  for (i = 0 ; i < SB_BMAP_NR(p_s_sb) ; i++) {
+    if (jb->bitmaps[i]) {
+      free_bitmap_node(p_s_sb, jb->bitmaps[i]) ;
+      jb->bitmaps[i] = NULL ;
+    }
+  }
+}
+
+/*
+** only call this on FS unmount.
+*/
+static int free_list_bitmaps(struct super_block *p_s_sb,
+                             struct reiserfs_list_bitmap *jb_array) {
+  int i ;
+  struct reiserfs_list_bitmap *jb ;
+  for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
+    jb = jb_array + i ;
+    jb->journal_list = NULL ;
+    cleanup_bitmap_list(p_s_sb, jb) ;
+    vfree(jb->bitmaps) ;
+    jb->bitmaps = NULL ;
+  }
+  return 0;
+}
+
+static int free_bitmap_nodes(struct super_block *p_s_sb) {
+  struct list_head *next = SB_JOURNAL(p_s_sb)->j_bitmap_nodes.next ;
+  struct reiserfs_bitmap_node *bn ;
+
+  while(next != &SB_JOURNAL(p_s_sb)->j_bitmap_nodes) {
+    bn = list_entry(next, struct reiserfs_bitmap_node, list) ;
+    list_del(next) ;
+    kfree(bn->data) ;
+    kfree(bn) ;
+    next = SB_JOURNAL(p_s_sb)->j_bitmap_nodes.next ;
+    SB_JOURNAL(p_s_sb)->j_free_bitmap_nodes-- ;
+  }
+
+  return 0 ;
+}
+
+/*
+** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps. 
+** jb_array is the array to be filled in.
+*/
+int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
+                                   struct reiserfs_list_bitmap *jb_array,
+				   int bmap_nr) {
+  int i ;
+  int failed = 0 ;
+  struct reiserfs_list_bitmap *jb ;
+  int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *) ;
+
+  for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
+    jb = jb_array + i ;
+    jb->journal_list = NULL ;
+    jb->bitmaps = vmalloc( mem ) ;
+    if (!jb->bitmaps) {
+      reiserfs_warning("clm-2000, unable to allocate bitmaps for journal lists\n") ;
+      failed = 1;   
+      break ;
+    }
+    memset(jb->bitmaps, 0, mem) ;
+  }
+  if (failed) {
+    free_list_bitmaps(p_s_sb, jb_array) ;
+    return -1 ;
+  }
+  return 0 ;
+}
+
+/*
+** find an available list bitmap.  If you can't find one, flush a commit list 
+** and try again
+*/
+static struct reiserfs_list_bitmap *
+get_list_bitmap(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) {
+  int i,j ; 
+  struct reiserfs_list_bitmap *jb = NULL ;
+
+  for (j = 0 ; j < (JOURNAL_NUM_BITMAPS * 3) ; j++) {
+    i = SB_JOURNAL(p_s_sb)->j_list_bitmap_index ;
+    SB_JOURNAL(p_s_sb)->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS ;
+    jb = SB_JOURNAL(p_s_sb)->j_list_bitmap + i ;
+    if (SB_JOURNAL(p_s_sb)->j_list_bitmap[i].journal_list) {
+      flush_commit_list(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_bitmap[i].journal_list, 1) ;
+      if (!SB_JOURNAL(p_s_sb)->j_list_bitmap[i].journal_list) {
+	break ;
+      }
+    } else {
+      break ;
+    }
+  }
+  if (jb->journal_list) { /* double check to make sure if flushed correctly */
+    return NULL ;
+  }
+  jb->journal_list = jl ;
+  return jb ;
+}
+
+/* 
+** allocates a new chunk of X nodes, and links them all together as a list.
+** Uses the cnode->next and cnode->prev pointers
+** returns NULL on failure
+*/
+static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) {
+  struct reiserfs_journal_cnode *head ;
+  int i ;
+  if (num_cnodes <= 0) {
+    return NULL ;
+  }
+  head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)) ;
+  if (!head) {
+    return NULL ;
+  }
+  memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)) ;
+  head[0].prev = NULL ;
+  head[0].next = head + 1 ;
+  for (i = 1 ; i < num_cnodes; i++) {
+    head[i].prev = head + (i - 1) ;
+    head[i].next = head + (i + 1) ; /* if last one, overwrite it after the if */
+  }
+  head[num_cnodes -1].next = NULL ;
+  return head ;
+}
+
+/*
+** pulls a cnode off the free list, or returns NULL on failure 
+*/
+static struct reiserfs_journal_cnode *get_cnode(struct super_block *p_s_sb) {
+  struct reiserfs_journal_cnode *cn ;
+
+  reiserfs_check_lock_depth("get_cnode") ;
+
+  if (SB_JOURNAL(p_s_sb)->j_cnode_free <= 0) {
+    return NULL ;
+  }
+  SB_JOURNAL(p_s_sb)->j_cnode_used++ ;
+  SB_JOURNAL(p_s_sb)->j_cnode_free-- ;
+  cn = SB_JOURNAL(p_s_sb)->j_cnode_free_list ;
+  if (!cn) {
+    return cn ;
+  }
+  if (cn->next) {
+    cn->next->prev = NULL ;
+  }
+  SB_JOURNAL(p_s_sb)->j_cnode_free_list = cn->next ;
+  memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ;
+  return cn ;
+}
+
+/*
+** returns a cnode to the free list 
+*/
+static void free_cnode(struct super_block *p_s_sb, struct reiserfs_journal_cnode *cn) {
+
+  reiserfs_check_lock_depth("free_cnode") ;
+
+  SB_JOURNAL(p_s_sb)->j_cnode_used-- ;
+  SB_JOURNAL(p_s_sb)->j_cnode_free++ ;
+  /* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
+  cn->next = SB_JOURNAL(p_s_sb)->j_cnode_free_list ;
+  if (SB_JOURNAL(p_s_sb)->j_cnode_free_list) {
+    SB_JOURNAL(p_s_sb)->j_cnode_free_list->prev = cn ;
+  }
+  cn->prev = NULL ; /* not needed with the memset, but I might kill the memset, and forget to do this */
+  SB_JOURNAL(p_s_sb)->j_cnode_free_list = cn ;
+}
+
+static int clear_prepared_bits(struct buffer_head *bh) {
+  clear_bit(BH_JPrepared, &bh->b_state) ;
+  return 0 ;
+}
+
+/* buffer is in current transaction */
+inline int buffer_journaled(struct buffer_head *bh) {
+  if (bh)
+    return test_bit(BH_JDirty, &bh->b_state) ;
+  else
+    return 0 ;
+}
+
+/* disk block was taken off free list before being in a finished transation, or written to disk
+** journal_new blocks can be reused immediately, for any purpose
+*/ 
+inline int buffer_journal_new(struct buffer_head *bh) {
+  if (bh) 
+    return test_bit(BH_JNew, &bh->b_state) ;
+  else
+    return 0 ;
+}
+
+inline int mark_buffer_journal_new(struct buffer_head *bh) {
+  if (bh) {
+    set_bit(BH_JNew, &bh->b_state) ;
+  }
+  return 0 ;
+}
+
+inline int mark_buffer_not_journaled(struct buffer_head *bh) {
+  if (bh) 
+    clear_bit(BH_JDirty, &bh->b_state) ;
+  return 0 ;
+}
+
+/* utility function to force a BUG if it is called without the big
+** kernel lock held.  caller is the string printed just before calling BUG()
+*/
+void reiserfs_check_lock_depth(char *caller) {
+#ifdef CONFIG_SMP
+  if (current->lock_depth < 0) {
+    printk("%s called without kernel lock held\n", caller) ;
+    show_reiserfs_locks() ;
+    BUG() ;
+  }
+#else
+  ;
+#endif
+}
+
+/* return a cnode with same dev, block number and size in table, or null if not found */
+static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct reiserfs_journal_cnode **table,
+                                   				  kdev_t dev,long bl,int size) {
+  struct reiserfs_journal_cnode *cn ;
+  cn = journal_hash(table, dev, bl) ;
+  while(cn) {
+    if ((cn->blocknr == bl) && (cn->dev == dev))
+      return cn ;
+    cn = cn->hnext ;
+  }
+  return (struct reiserfs_journal_cnode *)0 ;
+}
+
+/* returns a cnode with same size, block number and dev as bh in the current transaction hash.  NULL if not found */
+static inline struct reiserfs_journal_cnode *get_journal_hash(struct super_block *p_s_sb, struct buffer_head *bh) {
+  struct reiserfs_journal_cnode *cn ;
+  if (bh) {
+    cn =  get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_hash_table, bh->b_dev, bh->b_blocknr, bh->b_size) ;
+  }
+  else {
+    return (struct reiserfs_journal_cnode *)0 ;
+  }
+  return cn ;
+}
+
+/* once upon a time, the journal would deadlock.  a lot.  Now, when
+** CONFIG_REISERFS_CHECK is defined, anytime someone enters a
+** transaction, it pushes itself into this ugly static list, and pops
+** itself off before calling journal_end.  I made a SysRq key to dump
+** the list, and tell me what the writers are when I'm deadlocked.  */
+
+				/* are you depending on the compiler
+                                   to optimize this function away
+                                   everywhere it is called? It is not
+                                   obvious how this works, but I
+                                   suppose debugging code need not be
+                                   clear.  -Hans */
+static char *journal_writers[512] ;
+int push_journal_writer(char *s) {
+#ifdef CONFIG_REISERFS_CHECK
+  int i ;
+  for (i = 0 ; i < 512 ; i++) {
+    if (!journal_writers[i]) {
+      journal_writers[i] = s ;
+      return i ;
+    }
+  }
+  return -1 ;
+#else
+  return 0 ;
+#endif
+}
+int pop_journal_writer(int index) {
+#ifdef CONFIG_REISERFS_CHECK
+  if (index >= 0) {
+    journal_writers[index] = NULL ;
+  }
+#endif
+  return 0 ;
+}
+
+int dump_journal_writers(void) {
+  int i ;
+  for (i = 0 ; i < 512 ; i++) {
+    if (journal_writers[i]) {
+      printk("%d: %s\n", i, journal_writers[i]) ;
+    }
+  }
+  return 0 ;
+}
+
+/*
+** this actually means 'can this block be reallocated yet?'.  If you set search_all, a block can only be allocated
+** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
+** being overwritten by a replay after crashing.
+**
+** If you don't set search_all, a block can only be allocated if it is not in the current transaction.  Since deleting
+** a block removes it from the current transaction, this case should never happen.  If you don't set search_all, make
+** sure you never write the block without logging it.
+**
+** next_zero_bit is a suggestion about the next block to try for find_forward.
+** when bl is rejected because it is set in a journal list bitmap, we search
+** for the next zero bit in the bitmap that rejected bl.  Then, we return that
+** through next_zero_bit for find_forward to try.
+**
+** Just because we return something in next_zero_bit does not mean we won't
+** reject it on the next call to reiserfs_in_journal
+**
+*/
+int reiserfs_in_journal(struct super_block *p_s_sb, kdev_t dev, 
+                        unsigned long bl, int size, int search_all, 
+			unsigned long *next_zero_bit) {
+  struct reiserfs_journal_cnode *cn ;
+  struct reiserfs_list_bitmap *jb ;
+  int i ;
+  int bmap_nr = bl / (p_s_sb->s_blocksize << 3) ;
+  int bit_nr = bl % (p_s_sb->s_blocksize << 3) ;
+  int tmp_bit ;
+
+  *next_zero_bit = 0 ; /* always start this at zero. */
+
+  /* we aren't logging all blocks are safe for reuse */
+  if (reiserfs_dont_log(p_s_sb)) {
+    return 0 ;
+  }
+
+  /* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
+  ** if we crash before the transaction that freed it commits,  this transaction won't
+  ** have committed either, and the block will never be written
+  */
+  if (search_all) {
+    for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
+      jb = SB_JOURNAL(p_s_sb)->j_list_bitmap + i ;
+      if (jb->journal_list && jb->bitmaps[bmap_nr] &&
+          test_bit(bit_nr, jb->bitmaps[bmap_nr]->data)) {
+	tmp_bit = find_next_zero_bit((unsigned long *)
+	                             (jb->bitmaps[bmap_nr]->data),
+	                             p_s_sb->s_blocksize << 3, bit_nr+1) ; 
+	*next_zero_bit = bmap_nr * (p_s_sb->s_blocksize << 3) + tmp_bit ;
+	return 1 ;
+      }
+    }
+  }
+
+  /* is it in any old transactions? */
+  if (search_all && (cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_list_hash_table, dev,bl,size))) {
+    return 1; 
+  }
+
+  /* is it in the current transaction.  This should never happen */
+  if ((cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_hash_table, dev,bl,size))) {
+    return 1; 
+  }
+
+  /* safe for reuse */
+  return 0 ;
+}
+
+/* insert cn into table
+*/
+inline void insert_journal_hash(struct reiserfs_journal_cnode **table, struct reiserfs_journal_cnode *cn) {
+  struct reiserfs_journal_cnode *cn_orig ;
+
+  cn_orig = journal_hash(table, cn->dev, cn->blocknr) ;
+  cn->hnext = cn_orig ;
+  cn->hprev = NULL ;
+  if (cn_orig) {
+    cn_orig->hprev = cn ;
+  }
+  journal_hash(table, cn->dev, cn->blocknr) =  cn ;
+}
+
+/* lock the current transaction */
+inline static void lock_journal(struct super_block *p_s_sb) {
+  while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) {
+    sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ;
+  }
+  atomic_set(&(SB_JOURNAL(p_s_sb)->j_wlock), 1) ;
+}
+
+/* unlock the current transaction */
+inline static void unlock_journal(struct super_block *p_s_sb) {
+  atomic_dec(&(SB_JOURNAL(p_s_sb)->j_wlock)) ;
+  wake_up(&(SB_JOURNAL(p_s_sb)->j_wait)) ;
+}
+
+/*
+** this used to be much more involved, and I'm keeping it just in case things get ugly again.
+** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
+** transaction.
+*/
+static void cleanup_freed_for_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl) {
+
+  struct reiserfs_list_bitmap *jb = jl->j_list_bitmap ;
+  if (jb) {
+    cleanup_bitmap_list(p_s_sb, jb) ;
+  }
+  jl->j_list_bitmap->journal_list = NULL ;
+  jl->j_list_bitmap = NULL ;
+}
+
+/*
+** if this journal list still has commit blocks unflushed, send them to disk.
+**
+** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
+** Before the commit block can by written, every other log block must be safely on disk
+**
+*/
+static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) {
+  int i, count ;
+  int index = 0 ;
+  int bn ;
+  int retry_count = 0 ;
+  int orig_commit_left = 0 ;
+  struct buffer_head *tbh = NULL ;
+  struct reiserfs_journal_list *other_jl ;
+
+  reiserfs_check_lock_depth("flush_commit_list") ;
+
+  if (atomic_read(&jl->j_older_commits_done)) {
+    return 0 ;
+  }
+
+  /* before we can put our commit blocks on disk, we have to make sure everyone older than
+  ** us is on disk too
+  */
+  if (jl->j_len <= 0) {
+    return 0 ;
+  }
+  if (flushall) {
+    /* we _must_ make sure the transactions are committed in order.  Start with the
+    ** index after this one, wrap all the way around 
+    */
+    index = (jl - SB_JOURNAL_LIST(s)) + 1 ;
+    for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
+      other_jl = SB_JOURNAL_LIST(s) + ( (index + i) % JOURNAL_LIST_COUNT) ;
+      if (other_jl && other_jl != jl && other_jl->j_len > 0 && other_jl->j_trans_id > 0 && 
+          other_jl->j_trans_id <= jl->j_trans_id && (atomic_read(&(jl->j_older_commits_done)) == 0)) {
+        flush_commit_list(s, other_jl, 0) ;
+      }
+    }
+  }
+
+  count = 0 ;
+  /* don't flush the commit list for the current transactoin */
+  if (jl == ((SB_JOURNAL_LIST(s) + SB_JOURNAL_LIST_INDEX(s)))) {
+    return 0 ;
+  }
+
+  /* make sure nobody is trying to flush this one at the same time */
+  if (atomic_read(&(jl->j_commit_flushing))) {
+    sleep_on(&(jl->j_commit_wait)) ;
+    if (flushall) {
+      atomic_set(&(jl->j_older_commits_done), 1) ;
+    }
+    return 0 ;
+  }
+  
+  /* this commit is done, exit */
+  if (atomic_read(&(jl->j_commit_left)) <= 0) {
+    if (flushall) {
+      atomic_set(&(jl->j_older_commits_done), 1) ;
+    }
+    return 0 ;
+  }
+  /* keeps others from flushing while we are flushing */
+  atomic_set(&(jl->j_commit_flushing), 1) ; 
+
+
+  if (jl->j_len > JOURNAL_TRANS_MAX) {
+    reiserfs_panic(s, "journal-512: flush_commit_list: length is %lu, list number %d\n", jl->j_len, jl - SB_JOURNAL_LIST(s)) ;
+    return 0 ;
+  }
+
+  orig_commit_left = atomic_read(&(jl->j_commit_left)) ; 
+
+  /* start by checking all the commit blocks in this transaction.  
+  ** Add anyone not on disk into tbh.  Stop checking once commit_left <= 1, because that means we
+  ** only have the commit block left 
+  */
+retry:
+  count = 0 ;
+  for (i = 0 ; atomic_read(&(jl->j_commit_left)) > 1 && i < (jl->j_len + 1) ; i++) {  /* everything but commit_bh */
+    bn = reiserfs_get_journal_block(s) + (jl->j_start+i) % JOURNAL_BLOCK_COUNT;
+    tbh = get_hash_table(s->s_dev, bn, s->s_blocksize) ;
+
+/* kill this sanity check */
+if (count > (orig_commit_left + 2)) {
+reiserfs_panic(s, "journal-539: flush_commit_list: BAD count(%d) > orig_commit_left(%d)!\n", count, orig_commit_left) ;
+}
+    if (tbh) {
+      if (buffer_locked(tbh)) { /* wait on it, redo it just to make sure */
+	wait_on_buffer(tbh) ;
+	if (!buffer_uptodate(tbh)) {
+	  reiserfs_panic(s, "journal-584, buffer write failed\n") ;
+	}
+      } 
+      if (buffer_dirty(tbh)) {
+	printk("journal-569: flush_commit_list, block already dirty!\n") ;
+      } else {				
+	mark_buffer_dirty(tbh) ;
+      }
+      ll_rw_block(WRITE, 1, &tbh) ;
+      count++ ;
+      atomic_dec(&(tbh->b_count)) ; /* once for our get_hash */
+    } 
+  }
+
+  /* wait on everyone in tbh before writing commit block*/
+  if (count > 0) {
+    for (i = 0 ; atomic_read(&(jl->j_commit_left)) > 1 && 
+                 i < (jl->j_len + 1) ; i++) {  /* everything but commit_bh */
+      bn = reiserfs_get_journal_block(s) + (jl->j_start + i) % JOURNAL_BLOCK_COUNT  ;
+      tbh = get_hash_table(s->s_dev, bn, s->s_blocksize) ;
+
+      wait_on_buffer(tbh) ;
+      if (!buffer_uptodate(tbh)) {
+	reiserfs_panic(s, "journal-601, buffer write failed\n") ;
+      }
+      atomic_dec(&(tbh->b_count)) ; /* once for our get_hash */
+      bforget(tbh) ;    /* once due to original getblk in do_journal_end */
+      atomic_dec(&(jl->j_commit_left)) ;
+    }
+  }
+
+  if (atomic_read(&(jl->j_commit_left)) != 1) { /* just the commit_bh left, flush it without calling getblk for everyone */
+    if (retry_count < 2) {
+      printk("journal-582: flush_commit_list, not all log blocks on disk yet, trying again\n") ;
+      retry_count++ ;
+      goto retry;
+    }
+    reiserfs_panic(s, "journal-563: flush_commit_list: BAD, j_commit_left is %lu, should be 1\n", 
+                       atomic_read(&(jl->j_commit_left)));
+  }
+
+  mark_buffer_dirty(jl->j_commit_bh) ;
+  ll_rw_block(WRITE, 1, &(jl->j_commit_bh)) ;
+  wait_on_buffer(jl->j_commit_bh) ;
+  if (!buffer_uptodate(jl->j_commit_bh)) {
+    reiserfs_panic(s, "journal-615: buffer write failed\n") ;
+  }
+  atomic_dec(&(jl->j_commit_left)) ;
+  bforget(jl->j_commit_bh) ;
+
+  /* now, every commit block is on the disk.  It is safe to allow blocks freed during this transaction to be reallocated */
+  cleanup_freed_for_journal_list(s, jl) ;
+
+  if (flushall) {
+    atomic_set(&(jl->j_older_commits_done), 1) ;
+  }
+  atomic_set(&(jl->j_commit_flushing), 0) ;
+  wake_up(&(jl->j_commit_wait)) ;
+  return 0 ;
+}
+
+/*
+** flush_journal_list frequently needs to find a newer transaction for a given block.  This does that, or 
+** returns NULL if it can't find anything 
+*/
+static struct reiserfs_journal_list *find_newer_jl_for_cn(struct reiserfs_journal_cnode *cn) {
+  kdev_t dev = cn->dev;
+  unsigned long blocknr = cn->blocknr ;
+
+  cn = cn->hprev ;
+  while(cn) {
+    if (cn->dev == dev && cn->blocknr == blocknr && cn->jlist) {
+      return cn->jlist ;
+    }
+    cn = cn->hprev ;
+  }
+  return NULL ;
+}
+
+
+/*
+** once all the real blocks have been flushed, it is safe to remove them from the
+** journal list for this transaction.  Aside from freeing the cnode, this also allows the
+** block to be reallocated for data blocks if it had been deleted.
+*/
+static void remove_all_from_journal_list(struct super_block *p_s_sb, struct reiserfs_journal_list *jl, int debug) {
+  struct buffer_head fake_bh ;
+  struct reiserfs_journal_cnode *cn, *last ;
+  cn = jl->j_realblock ;
+
+  /* which is better, to lock once around the whole loop, or
+  ** to lock for each call to remove_from_journal_list?
+  */
+  while(cn) {
+    if (cn->blocknr != 0) {
+      if (debug) {
+        printk("block %lu, bh is %d, state %d\n", cn->blocknr, cn->bh ? 1: 0, 
+	        cn->state) ;
+      }
+      fake_bh.b_blocknr = cn->blocknr ;
+      fake_bh.b_dev = cn->dev ;
+      cn->state = 0 ;
+      remove_from_journal_list(p_s_sb, jl, &fake_bh, 1) ;
+    }
+    last = cn ;
+    cn = cn->next ;
+    free_cnode(p_s_sb, last) ;
+  }
+  jl->j_realblock = NULL ;
+}
+
+/*
+** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block.
+** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start
+** releasing blocks in this transaction for reuse as data blocks.
+** called by flush_journal_list, before it calls remove_all_from_journal_list
+**
+*/
+static int update_journal_header_block(struct super_block *p_s_sb, unsigned long offset, unsigned long trans_id) {
+  struct reiserfs_journal_header *jh ;
+  if (trans_id >= SB_JOURNAL(p_s_sb)->j_last_flush_trans_id) {
+    if (buffer_locked((SB_JOURNAL(p_s_sb)->j_header_bh)))  {
+      wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ;
+      if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) {
+        reiserfs_panic(p_s_sb, "journal-699: buffer write failed\n") ;
+      }
+    }
+    SB_JOURNAL(p_s_sb)->j_last_flush_trans_id = trans_id ;
+    SB_JOURNAL(p_s_sb)->j_first_unflushed_offset = offset ;
+    jh = (struct reiserfs_journal_header *)(SB_JOURNAL(p_s_sb)->j_header_bh->b_data) ;
+    jh->j_last_flush_trans_id = cpu_to_le32(trans_id) ;
+    jh->j_first_unflushed_offset = cpu_to_le32(offset) ;
+    jh->j_mount_id = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_mount_id) ;
+    set_bit(BH_Dirty, &(SB_JOURNAL(p_s_sb)->j_header_bh->b_state)) ;
+    ll_rw_block(WRITE, 1, &(SB_JOURNAL(p_s_sb)->j_header_bh)) ;
+    wait_on_buffer((SB_JOURNAL(p_s_sb)->j_header_bh)) ; 
+    if (!buffer_uptodate(SB_JOURNAL(p_s_sb)->j_header_bh)) {
+      reiserfs_panic(p_s_sb, "journal-712: buffer write failed\n") ;
+    }
+  }
+  return 0 ;
+}
+
+/* 
+** flush any and all journal lists older than you are 
+** can only be called from flush_journal_list
+*/
+static int flush_older_journal_lists(struct super_block *p_s_sb, struct reiserfs_journal_list *jl, unsigned long trans_id) {
+  int i, index ;
+  struct reiserfs_journal_list *other_jl ;
+
+  index = jl - SB_JOURNAL_LIST(p_s_sb) ;
+  for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
+    other_jl = SB_JOURNAL_LIST(p_s_sb) + ((index + i) % JOURNAL_LIST_COUNT) ;
+    if (other_jl && other_jl->j_len > 0 && 
+        other_jl->j_trans_id > 0 && 
+	other_jl->j_trans_id < trans_id && 
+        other_jl != jl) {
+      /* do not flush all */
+      flush_journal_list(p_s_sb, other_jl, 0) ; 
+    }
+  }
+  return 0 ;
+}
+
+static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) {
+    if (buffer_journaled(bh)) {
+        reiserfs_warning("clm-2084: pinned buffer %u:%s sent to disk\n",
+	                 bh->b_blocknr, kdevname(bh->b_dev)) ;
+    }
+    mark_buffer_uptodate(bh, uptodate) ;
+    unlock_buffer(bh) ;
+}
+static void submit_logged_buffer(struct buffer_head *bh) {
+    lock_buffer(bh) ;
+    bh->b_end_io = reiserfs_end_buffer_io_sync ;
+    mark_buffer_notjournal_new(bh) ;
+    clear_bit(BH_Dirty, &bh->b_state) ;
+    submit_bh(WRITE, bh) ;
+}
+
+/* flush a journal list, both commit and real blocks
+**
+** always set flushall to 1, unless you are calling from inside
+** flush_journal_list
+**
+** IMPORTANT.  This can only be called while there are no journal writers, 
+** and the journal is locked.  That means it can only be called from 
+** do_journal_end, or by journal_release
+*/
+static int flush_journal_list(struct super_block *s, 
+                              struct reiserfs_journal_list *jl, int flushall) {
+  struct reiserfs_journal_list *pjl ;
+  struct reiserfs_journal_cnode *cn, *last ;
+  int count ;
+  int was_jwait = 0 ;
+  int was_dirty = 0 ;
+  struct buffer_head *saved_bh ; 
+  unsigned long j_len_saved = jl->j_len ;
+
+  if (j_len_saved <= 0) {
+    return 0 ;
+  }
+
+  if (atomic_read(&SB_JOURNAL(s)->j_wcount) != 0) {
+    reiserfs_warning("clm-2048: flush_journal_list called with wcount %d\n",
+                      atomic_read(&SB_JOURNAL(s)->j_wcount)) ;
+  }
+  /* if someone is getting the commit list, we must wait for them */
+  while (atomic_read(&(jl->j_commit_flushing))) { 
+    sleep_on(&(jl->j_commit_wait)) ;
+  }
+  /* if someone is flushing this list, we must wait for them */
+  while (atomic_read(&(jl->j_flushing))) {
+    sleep_on(&(jl->j_flush_wait)) ;
+  }
+
+  /* this list is now ours, we can change anything we want */
+  atomic_set(&(jl->j_flushing), 1) ;
+
+  count = 0 ;
+  if (j_len_saved > JOURNAL_TRANS_MAX) {
+    reiserfs_panic(s, "journal-715: flush_journal_list, length is %lu, list number %d\n", j_len_saved, jl - SB_JOURNAL_LIST(s)) ;
+    atomic_dec(&(jl->j_flushing)) ;
+    return 0 ;
+  }
+
+  /* if all the work is already done, get out of here */
+  if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 
+      atomic_read(&(jl->j_commit_left)) <= 0) {
+    goto flush_older_and_return ;
+  } 
+
+  /* start by putting the commit list on disk.  This will also flush 
+  ** the commit lists of any olders transactions
+  */
+  flush_commit_list(s, jl, 1) ;
+
+  /* are we done now? */
+  if (atomic_read(&(jl->j_nonzerolen)) <= 0 && 
+      atomic_read(&(jl->j_commit_left)) <= 0) {
+    goto flush_older_and_return ;
+  }
+
+  /* loop through each cnode, see if we need to write it, 
+  ** or wait on a more recent transaction, or just ignore it 
+  */
+  if (atomic_read(&(SB_JOURNAL(s)->j_wcount)) != 0) {
+    reiserfs_panic(s, "journal-844: panic journal list is flushing, wcount is not 0\n") ;
+  }
+  cn = jl->j_realblock ;
+  while(cn) {
+    was_jwait = 0 ;
+    was_dirty = 0 ;
+    saved_bh = NULL ;
+    /* blocknr of 0 is no longer in the hash, ignore it */
+    if (cn->blocknr == 0) {
+      goto free_cnode ;
+    }
+    pjl = find_newer_jl_for_cn(cn) ;
+    /* the order is important here.  We check pjl to make sure we
+    ** don't clear BH_JDirty_wait if we aren't the one writing this
+    ** block to disk
+    */
+    if (!pjl && cn->bh) {
+      saved_bh = cn->bh ;
+
+      /* we do this to make sure nobody releases the buffer while 
+      ** we are working with it 
+      */
+      atomic_inc(&(saved_bh->b_count)) ;  
+
+      if (buffer_journal_dirty(saved_bh)) {
+        was_jwait = 1 ;
+	mark_buffer_notjournal_dirty(saved_bh) ;
+        /* brelse the inc from journal_mark_dirty */
+	atomic_dec(&(saved_bh->b_count)) ; 
+      }
+      if (can_dirty(cn)) {
+        was_dirty = 1 ;
+      }
+    }
+
+    /* if someone has this block in a newer transaction, just make
+    ** sure they are commited, and don't try writing it to disk
+    */
+    if (pjl) {
+      flush_commit_list(s, pjl, 1) ;
+      goto free_cnode ;
+    }
+
+    /* bh == NULL when the block got to disk on its own, OR, 
+    ** the block got freed in a future transaction 
+    */
+    if (saved_bh == NULL) {
+      goto free_cnode ;
+    }
+
+    /* this should never happen.  kupdate_one_transaction has this list
+    ** locked while it works, so we should never see a buffer here that
+    ** is not marked JDirty_wait
+    */
+    if ((!was_jwait) && !buffer_locked(saved_bh)) {
+printk("journal-813: BAD! buffer %lu %cdirty %cjwait, not in a newer tranasction\n", saved_bh->b_blocknr,
+        was_dirty ? ' ' : '!', was_jwait ? ' ' : '!') ;
+    }
+    /* kupdate_one_transaction waits on the buffers it is writing, so we
+    ** should never see locked buffers here
+    */
+    if (buffer_locked(saved_bh)) {
+      printk("clm-2083: locked buffer %lu in flush_journal_list\n", 
+              saved_bh->b_blocknr) ;
+      wait_on_buffer(saved_bh) ;
+      if (!buffer_uptodate(saved_bh)) {
+        reiserfs_panic(s, "journal-923: buffer write failed\n") ;
+      }
+    } 
+    if (was_dirty) { 
+      /* we inc again because saved_bh gets decremented at free_cnode */
+      atomic_inc(&(saved_bh->b_count)) ;  
+      set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ;
+      submit_logged_buffer(saved_bh) ;
+      count++ ;
+    } else {
+      printk("clm-2082: Unable to flush buffer %lu in flush_journal_list\n",
+              saved_bh->b_blocknr) ;
+    }
+free_cnode:
+    last = cn ;
+    cn = cn->next ;
+    if (saved_bh) {
+      /* we incremented this to keep others from taking the buffer head away */
+      atomic_dec(&(saved_bh->b_count)); 
+      if (atomic_read(&(saved_bh->b_count)) < 0) {
+        printk("journal-945: saved_bh->b_count < 0") ;
+      }
+    }
+  }
+  if (count > 0) {
+    cn = jl->j_realblock ;
+    while(cn) {
+      if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
+	if (!cn->bh) {
+	  reiserfs_panic(s, "journal-1011: cn->bh is NULL\n") ;
+	}
+	wait_on_buffer(cn->bh) ;
+	if (!cn->bh) {
+	  reiserfs_panic(s, "journal-1012: cn->bh is NULL\n") ;
+	}
+	if (!buffer_uptodate(cn->bh)) {
+	  reiserfs_panic(s, "journal-949: buffer write failed\n") ;
+	}
+	refile_buffer(cn->bh) ;
+        brelse(cn->bh) ;
+      }
+      cn = cn->next ;
+    }
+  }
+
+flush_older_and_return:
+  /* before we can update the journal header block, we _must_ flush all 
+  ** real blocks from all older transactions to disk.  This is because
+  ** once the header block is updated, this transaction will not be
+  ** replayed after a crash
+  */
+  if (flushall) {
+    flush_older_journal_lists(s, jl, jl->j_trans_id) ;
+  } 
+  
+  /* before we can remove everything from the hash tables for this 
+  ** transaction, we must make sure it can never be replayed
+  **
+  ** since we are only called from do_journal_end, we know for sure there
+  ** are no allocations going on while we are flushing journal lists.  So,
+  ** we only need to update the journal header block for the last list
+  ** being flushed
+  */
+  if (flushall) {
+    update_journal_header_block(s, (jl->j_start + jl->j_len + 2) % JOURNAL_BLOCK_COUNT, jl->j_trans_id) ;
+  }
+  remove_all_from_journal_list(s, jl, 0) ;
+  jl->j_len = 0 ;
+  atomic_set(&(jl->j_nonzerolen), 0) ;
+  jl->j_start = 0 ;
+  jl->j_realblock = NULL ;
+  jl->j_commit_bh = NULL ;
+  jl->j_trans_id = 0 ;
+  atomic_dec(&(jl->j_flushing)) ;
+  wake_up(&(jl->j_flush_wait)) ;
+  return 0 ;
+} 
+
+
+static int kupdate_one_transaction(struct super_block *s,
+                                    struct reiserfs_journal_list *jl) 
+{
+    struct reiserfs_journal_list *pjl ; /* previous list for this cn */
+    struct reiserfs_journal_cnode *cn, *walk_cn ;
+    unsigned long blocknr ;
+    int run = 0 ;
+    int orig_trans_id = jl->j_trans_id ;
+    struct buffer_head *saved_bh ; 
+    int ret = 0 ;
+
+    /* if someone is getting the commit list, we must wait for them */
+    while (atomic_read(&(jl->j_commit_flushing))) {
+        sleep_on(&(jl->j_commit_wait)) ;
+    }
+    /* if someone is flushing this list, we must wait for them */
+    while (atomic_read(&(jl->j_flushing))) {
+        sleep_on(&(jl->j_flush_wait)) ;
+    }
+    /* was it flushed while we slept? */
+    if (jl->j_len <= 0 || jl->j_trans_id != orig_trans_id) {
+        return 0 ;
+    }
+
+    /* this list is now ours, we can change anything we want */
+    atomic_set(&(jl->j_flushing), 1) ;
+
+loop_start:
+    cn = jl->j_realblock ;
+    while(cn) {
+        saved_bh = NULL ;
+        /* if the blocknr == 0, this has been cleared from the hash,
+        ** skip it
+        */
+        if (cn->blocknr == 0) {
+            goto next ;
+        }
+        /* look for a more recent transaction that logged this
+        ** buffer.  Only the most recent transaction with a buffer in
+        ** it is allowed to send that buffer to disk
+        */
+        pjl = find_newer_jl_for_cn(cn) ;
+        if (run == 0 && !pjl && cn->bh && buffer_journal_dirty(cn->bh) &&
+            can_dirty(cn)) 
+        {
+            if (!test_bit(BH_JPrepared, &cn->bh->b_state)) {
+                set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ;
+		submit_logged_buffer(cn->bh) ;
+            } else {
+                /* someone else is using this buffer.  We can't 
+                ** send it to disk right now because they might
+                ** be changing/logging it.
+                */
+                ret = 1 ;
+            }
+        } else if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
+            clear_bit(BLOCK_NEEDS_FLUSH, &cn->state) ;
+            if (!pjl && cn->bh) {
+                wait_on_buffer(cn->bh) ;
+            }
+            /* check again, someone could have logged while we scheduled */
+            pjl = find_newer_jl_for_cn(cn) ;
+
+            /* before the JDirty_wait bit is set, the 
+            ** buffer is added to the hash list.  So, if we are
+            ** run in the middle of a do_journal_end, we will notice
+            ** if this buffer was logged and added from the latest
+            ** transaction.  In this case, we don't want to decrement
+            ** b_count
+            */
+            if (!pjl && cn->bh && buffer_journal_dirty(cn->bh)) {
+                blocknr = cn->blocknr ;
+                walk_cn = cn ;
+                saved_bh= cn->bh ;
+                /* update all older transactions to show this block
+                ** was flushed
+                */
+                mark_buffer_notjournal_dirty(cn->bh) ;
+                while(walk_cn) {
+                    if (walk_cn->bh && walk_cn->blocknr == blocknr && 
+                         walk_cn->dev == cn->dev) {
+                        if (walk_cn->jlist) {
+                            atomic_dec(&(walk_cn->jlist->j_nonzerolen)) ;
+                        }
+                        walk_cn->bh = NULL ;
+                    }
+                    walk_cn = walk_cn->hnext ;
+                }
+                if (atomic_read(&saved_bh->b_count) < 1) {
+                    reiserfs_warning("clm-2081: bad count on %lu\n", 
+                                      saved_bh->b_blocknr) ;
+                }
+                brelse(saved_bh) ;
+            }
+        }
+        /*
+        ** if the more recent transaction is committed to the log,
+        ** this buffer can be considered flushed.  Decrement our
+        ** counters to reflect one less buffer that needs writing.
+        **
+        ** note, this relies on all of the above code being
+        ** schedule free once pjl comes back non-null.
+        */
+        if (pjl && cn->bh && atomic_read(&pjl->j_commit_left) == 0) {
+            atomic_dec(&cn->jlist->j_nonzerolen) ;
+            cn->bh = NULL ;
+        } 
+next:
+        cn = cn->next ;
+    }
+    /* the first run through the loop sends all the dirty buffers to
+    ** ll_rw_block.
+    ** the second run through the loop does all the accounting
+    */
+    if (run++ == 0) {
+        goto loop_start ;
+    }
+
+    atomic_set(&(jl->j_flushing), 0) ;
+    wake_up(&(jl->j_flush_wait)) ;
+    return ret ;
+}
+/* since we never give dirty buffers to bdflush/kupdate, we have to
+** flush them ourselves.  This runs through the journal lists, finds
+** old metadata in need of flushing and sends it to disk.
+** this does not end transactions, commit anything, or free
+** cnodes.
+**
+** returns the highest transaction id that was flushed last time
+*/
+static unsigned long reiserfs_journal_kupdate(struct super_block *s) {
+    struct reiserfs_journal_list *jl ;
+    int i ;
+    int start ;
+    time_t age ;
+    int ret = 0 ;
+
+    start = SB_JOURNAL_LIST_INDEX(s) ;
+
+    /* safety check to prevent flush attempts during a mount */
+    if (start < 0) {
+        return 0 ;
+    }
+    i = (start + 1) % JOURNAL_LIST_COUNT ;
+    while(i != start) {
+        jl = SB_JOURNAL_LIST(s) + i  ;
+        age = CURRENT_TIME - jl->j_timestamp ;
+        if (jl->j_len > 0 && // age >= (JOURNAL_MAX_COMMIT_AGE * 2) && 
+            atomic_read(&(jl->j_nonzerolen)) > 0 &&
+            atomic_read(&(jl->j_commit_left)) == 0) {
+
+            if (jl->j_trans_id == SB_JOURNAL(s)->j_trans_id) {
+                break ;
+            }
+            /* if ret was already 1, we want to preserve that */
+            ret |= kupdate_one_transaction(s, jl) ;
+        } 
+        if (atomic_read(&(jl->j_nonzerolen)) > 0) {
+            ret |= 1 ;
+        }
+        i = (i + 1) % JOURNAL_LIST_COUNT ;
+    }
+    return ret ;
+}
+
+/*
+** removes any nodes in table with name block and dev as bh.
+** only touchs the hnext and hprev pointers.
+*/
+void remove_journal_hash(struct reiserfs_journal_cnode **table, struct reiserfs_journal_list *jl,struct buffer_head *bh,
+                         int remove_freed){
+  struct reiserfs_journal_cnode *cur ;
+  struct reiserfs_journal_cnode **head ;
+
+  if (!bh)
+    return ;
+
+  head= &(journal_hash(table, bh->b_dev, bh->b_blocknr)) ;
+  if (!head) {
+    return ;
+  }
+  cur = *head ;
+  while(cur) {
+    if (cur->blocknr == bh->b_blocknr && cur->dev == bh->b_dev && (jl == NULL || jl == cur->jlist) && 
+        (!test_bit(BLOCK_FREED, &cur->state) || remove_freed)) {
+      if (cur->hnext) {
+        cur->hnext->hprev = cur->hprev ;
+      }
+      if (cur->hprev) {
+	cur->hprev->hnext = cur->hnext ;
+      } else {
+	*head = cur->hnext ;
+      }
+      cur->blocknr = 0 ;
+      cur->dev = 0 ;
+      cur->state = 0 ;
+      if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */
+	atomic_dec(&(cur->jlist->j_nonzerolen)) ;
+      cur->bh = NULL ;
+      cur->jlist = NULL ;
+    } 
+    cur = cur->hnext ;
+  }
+}
+
+static void free_journal_ram(struct super_block *p_s_sb) {
+  vfree(SB_JOURNAL(p_s_sb)->j_cnode_free_orig) ;
+  free_list_bitmaps(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_bitmap) ;
+  free_bitmap_nodes(p_s_sb) ; /* must be after free_list_bitmaps */
+  if (SB_JOURNAL(p_s_sb)->j_header_bh) {
+    brelse(SB_JOURNAL(p_s_sb)->j_header_bh) ;
+  }
+  vfree(SB_JOURNAL(p_s_sb)) ;
+}
+
+/*
+** call on unmount.  Only set error to 1 if you haven't made your way out
+** of read_super() yet.  Any other caller must keep error at 0.
+*/
+static int do_journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, int error) {
+  struct reiserfs_transaction_handle myth ;
+
+  /* we only want to flush out transactions if we were called with error == 0
+  */
+  if (!error && !(p_s_sb->s_flags & MS_RDONLY)) {
+    /* end the current trans */
+    do_journal_end(th, p_s_sb,10, FLUSH_ALL) ;
+
+    /* make sure something gets logged to force our way into the flush code */
+    journal_join(&myth, p_s_sb, 1) ;
+    reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
+    journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
+    do_journal_end(&myth, p_s_sb,1, FLUSH_ALL) ;
+  }
+
+  /* we decrement before we wake up, because the commit thread dies off
+  ** when it has been woken up and the count is <= 0
+  */
+  reiserfs_mounted_fs_count-- ;
+  wake_up(&reiserfs_commit_thread_wait) ;
+  sleep_on(&reiserfs_commit_thread_done) ;
+
+  free_journal_ram(p_s_sb) ;
+
+  return 0 ;
+}
+
+/*
+** call on unmount.  flush all journal trans, release all alloc'd ram
+*/
+int journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb) {
+  return do_journal_release(th, p_s_sb, 0) ;
+}
+/*
+** only call from an error condition inside reiserfs_read_super!
+*/
+int journal_release_error(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb) {
+  return do_journal_release(th, p_s_sb, 1) ;
+}
+
+/* compares description block with commit block.  returns 1 if they differ, 0 if they are the same */
+static int journal_compare_desc_commit(struct super_block *p_s_sb, struct reiserfs_journal_desc *desc, 
+			               struct reiserfs_journal_commit *commit) {
+  if (le32_to_cpu(commit->j_trans_id) != le32_to_cpu(desc->j_trans_id) || 
+      le32_to_cpu(commit->j_len) != le32_to_cpu(desc->j_len) || 
+      le32_to_cpu(commit->j_len) > JOURNAL_TRANS_MAX || 
+      le32_to_cpu(commit->j_len) <= 0 
+  ) {
+    return 1 ;
+  }
+  return 0 ;
+}
+/* returns 0 if it did not find a description block  
+** returns -1 if it found a corrupt commit block
+** returns 1 if both desc and commit were valid 
+*/
+static int journal_transaction_is_valid(struct super_block *p_s_sb, struct buffer_head *d_bh, unsigned long *oldest_invalid_trans_id, unsigned long *newest_mount_id) {
+  struct reiserfs_journal_desc *desc ;
+  struct reiserfs_journal_commit *commit ;
+  struct buffer_head *c_bh ;
+  unsigned long offset ;
+
+  desc = (struct reiserfs_journal_desc *)d_bh->b_data ;
+  if (le32_to_cpu(desc->j_len) > 0 && !memcmp(desc->j_magic, JOURNAL_DESC_MAGIC, 8)) {
+    if (oldest_invalid_trans_id && *oldest_invalid_trans_id && le32_to_cpu(desc->j_trans_id) > *oldest_invalid_trans_id) {
+      reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-986: transaction "
+	              "is valid returning because trans_id %d is greater than "
+		      "oldest_invalid %lu\n", le32_to_cpu(desc->j_trans_id), 
+		       *oldest_invalid_trans_id);
+      return 0 ;
+    }
+    if (newest_mount_id && *newest_mount_id > le32_to_cpu(desc->j_mount_id)) {
+      reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1087: transaction "
+                     "is valid returning because mount_id %d is less than "
+		     "newest_mount_id %lu\n", desc->j_mount_id, 
+		     *newest_mount_id) ;
+      return -1 ;
+    }
+    offset = d_bh->b_blocknr - reiserfs_get_journal_block(p_s_sb) ;
+
+    /* ok, we have a journal description block, lets see if the transaction was valid */
+    c_bh = bread(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + ((offset + le32_to_cpu(desc->j_len) + 1) % JOURNAL_BLOCK_COUNT), 
+    		p_s_sb->s_blocksize) ;
+    if (!c_bh)
+      return 0 ;
+    commit = (struct reiserfs_journal_commit *)c_bh->b_data ;
+    if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
+      reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, 
+                     "journal_transaction_is_valid, commit offset %ld had bad "
+		     "time %d or length %d\n", 
+		     c_bh->b_blocknr - reiserfs_get_journal_block(p_s_sb),
+		     le32_to_cpu(commit->j_trans_id), 
+		     le32_to_cpu(commit->j_len));
+      brelse(c_bh) ;
+      if (oldest_invalid_trans_id)
+        *oldest_invalid_trans_id = le32_to_cpu(desc->j_trans_id) ;
+	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1004: "
+	               "transaction_is_valid setting oldest invalid trans_id "
+		       "to %d\n", le32_to_cpu(desc->j_trans_id)) ;
+      return -1; 
+    }
+    brelse(c_bh) ;
+    reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1006: found valid "
+                   "transaction start offset %lu, len %d id %d\n", 
+		   d_bh->b_blocknr - reiserfs_get_journal_block(p_s_sb), 
+		   le32_to_cpu(desc->j_len), le32_to_cpu(desc->j_trans_id)) ;
+    return 1 ;
+  } else {
+    return 0 ;
+  }
+}
+
+static void brelse_array(struct buffer_head **heads, int num) {
+  int i ;
+  for (i = 0 ; i < num ; i++) {
+    brelse(heads[i]) ;
+  }
+}
+
+/*
+** given the start, and values for the oldest acceptable transactions,
+** this either reads in a replays a transaction, or returns because the transaction
+** is invalid, or too old.
+*/
+static int journal_read_transaction(struct super_block *p_s_sb, unsigned long cur_dblock, unsigned long oldest_start, 
+				    unsigned long oldest_trans_id, unsigned long newest_mount_id) {
+  struct reiserfs_journal_desc *desc ;
+  struct reiserfs_journal_commit *commit ;
+  unsigned long trans_id = 0 ;
+  struct buffer_head *c_bh ;
+  struct buffer_head *d_bh ;
+  struct buffer_head **log_blocks = NULL ;
+  struct buffer_head **real_blocks = NULL ;
+  unsigned long trans_offset ;
+  int i;
+
+  d_bh = bread(p_s_sb->s_dev, cur_dblock, p_s_sb->s_blocksize) ;
+  if (!d_bh)
+    return 1 ;
+  desc = (struct reiserfs_journal_desc *)d_bh->b_data ;
+  trans_offset = d_bh->b_blocknr - reiserfs_get_journal_block(p_s_sb) ;
+  reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: "
+                 "journal_read_transaction, offset %lu, len %d mount_id %d\n", 
+		 d_bh->b_blocknr - reiserfs_get_journal_block(p_s_sb), 
+		 le32_to_cpu(desc->j_len), le32_to_cpu(desc->j_mount_id)) ;
+  if (le32_to_cpu(desc->j_trans_id) < oldest_trans_id) {
+    reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: "
+                   "journal_read_trans skipping because %lu is too old\n", 
+		   cur_dblock - reiserfs_get_journal_block(p_s_sb)) ;
+    brelse(d_bh) ;
+    return 1 ;
+  }
+  if (le32_to_cpu(desc->j_mount_id) != newest_mount_id) {
+    reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: "
+                   "journal_read_trans skipping because %d is != "
+		   "newest_mount_id %lu\n", le32_to_cpu(desc->j_mount_id), 
+		    newest_mount_id) ;
+    brelse(d_bh) ;
+    return 1 ;
+  }
+  c_bh = bread(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + ((trans_offset + le32_to_cpu(desc->j_len) + 1) % JOURNAL_BLOCK_COUNT), 
+    		p_s_sb->s_blocksize) ;
+  if (!c_bh) {
+    brelse(d_bh) ;
+    return 1 ;
+  }
+  commit = (struct reiserfs_journal_commit *)c_bh->b_data ;
+  if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
+    reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal_read_transaction, "
+                   "commit offset %ld had bad time %d or length %d\n", 
+		   c_bh->b_blocknr - reiserfs_get_journal_block(p_s_sb), 
+		   le32_to_cpu(commit->j_trans_id), le32_to_cpu(commit->j_len));
+    brelse(c_bh) ;
+    brelse(d_bh) ;
+    return 1; 
+  }
+  trans_id = le32_to_cpu(desc->j_trans_id) ;
+  /* now we know we've got a good transaction, and it was inside the valid time ranges */
+  log_blocks = kmalloc(le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), GFP_BUFFER) ;
+  real_blocks = kmalloc(le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), GFP_BUFFER) ;
+  if (!log_blocks  || !real_blocks) {
+    brelse(c_bh) ;
+    brelse(d_bh) ;
+    kfree(log_blocks) ;
+    kfree(real_blocks) ;
+    reiserfs_warning("journal-1169: kmalloc failed, unable to mount FS\n") ;
+    return -1 ;
+  }
+  /* get all the buffer heads */
+  for(i = 0 ; i < le32_to_cpu(desc->j_len) ; i++) {
+    log_blocks[i] = getblk(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + (trans_offset + 1 + i) % JOURNAL_BLOCK_COUNT, p_s_sb->s_blocksize);
+    if (i < JOURNAL_TRANS_HALF) {
+      real_blocks[i] = getblk(p_s_sb->s_dev, le32_to_cpu(desc->j_realblock[i]), p_s_sb->s_blocksize) ;
+    } else {
+      real_blocks[i] = getblk(p_s_sb->s_dev, le32_to_cpu(commit->j_realblock[i - JOURNAL_TRANS_HALF]), p_s_sb->s_blocksize) ;
+    }
+    if (real_blocks[i]->b_blocknr >= reiserfs_get_journal_block(p_s_sb) &&
+        real_blocks[i]->b_blocknr < (reiserfs_get_journal_block(p_s_sb)+JOURNAL_BLOCK_COUNT)) {
+      reiserfs_warning("journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block\n") ;
+      brelse_array(log_blocks, i) ;
+      brelse_array(real_blocks, i) ;
+      brelse(c_bh) ;
+      brelse(d_bh) ;
+      kfree(log_blocks) ;
+      kfree(real_blocks) ;
+      return -1 ;
+    }
+  }
+  /* read in the log blocks, memcpy to the corresponding real block */
+  ll_rw_block(READ, le32_to_cpu(desc->j_len), log_blocks) ;
+  for (i = 0 ; i < le32_to_cpu(desc->j_len) ; i++) {
+    wait_on_buffer(log_blocks[i]) ;
+    if (!buffer_uptodate(log_blocks[i])) {
+      reiserfs_warning("journal-1212: REPLAY FAILURE fsck required! buffer write failed\n") ;
+      brelse_array(log_blocks + i, le32_to_cpu(desc->j_len) - i) ;
+      brelse_array(real_blocks, le32_to_cpu(desc->j_len)) ;
+      brelse(c_bh) ;
+      brelse(d_bh) ;
+      kfree(log_blocks) ;
+      kfree(real_blocks) ;
+      return -1 ;
+    }
+    memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, real_blocks[i]->b_size) ;
+    mark_buffer_uptodate(real_blocks[i], 1) ;
+    brelse(log_blocks[i]) ;
+  }
+  /* flush out the real blocks */
+  for (i = 0 ; i < le32_to_cpu(desc->j_len) ; i++) {
+    set_bit(BH_Dirty, &(real_blocks[i]->b_state)) ;
+    ll_rw_block(WRITE, 1, real_blocks + i) ;
+  }
+  for (i = 0 ; i < le32_to_cpu(desc->j_len) ; i++) {
+    wait_on_buffer(real_blocks[i]) ; 
+    if (!buffer_uptodate(real_blocks[i])) {
+      reiserfs_warning("journal-1226: REPLAY FAILURE, fsck required! buffer write failed\n") ;
+      brelse_array(real_blocks + i, le32_to_cpu(desc->j_len) - i) ;
+      brelse(c_bh) ;
+      brelse(d_bh) ;
+      kfree(log_blocks) ;
+      kfree(real_blocks) ;
+      return -1 ;
+    }
+    brelse(real_blocks[i]) ;
+  }
+  cur_dblock = reiserfs_get_journal_block(p_s_sb) + ((trans_offset + le32_to_cpu(desc->j_len) + 2) % JOURNAL_BLOCK_COUNT) ;
+  reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1095: setting journal "
+                 "start to offset %ld\n", 
+		 cur_dblock - reiserfs_get_journal_block(p_s_sb)) ;
+  
+  /* init starting values for the first transaction, in case this is the last transaction to be replayed. */
+  SB_JOURNAL(p_s_sb)->j_start = cur_dblock - reiserfs_get_journal_block(p_s_sb) ;
+  SB_JOURNAL(p_s_sb)->j_last_flush_trans_id = trans_id ;
+  SB_JOURNAL(p_s_sb)->j_trans_id = trans_id + 1;
+  brelse(c_bh) ;
+  brelse(d_bh) ;
+  kfree(log_blocks) ;
+  kfree(real_blocks) ;
+  return 0 ;
+}
+
+/*
+** read and replay the log
+** on a clean unmount, the journal header's next unflushed pointer will be to an invalid
+** transaction.  This tests that before finding all the transactions in the log, whic makes normal mount times fast.
+**
+** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid.
+**
+** On exit, it sets things up so the first transaction will work correctly.
+*/
+static int journal_read(struct super_block *p_s_sb) {
+  struct reiserfs_journal_desc *desc ;
+  unsigned long last_flush_trans_id = 0 ;
+  unsigned long oldest_trans_id = 0;
+  unsigned long oldest_invalid_trans_id = 0 ;
+  time_t start ;
+  unsigned long last_flush_start = 0;
+  unsigned long oldest_start = 0;
+  unsigned long cur_dblock = 0 ;
+  unsigned long newest_mount_id = 9 ;
+  struct buffer_head *d_bh ;
+  struct reiserfs_journal_header *jh ;
+  int valid_journal_header = 0 ;
+  int replay_count = 0 ;
+  int continue_replay = 1 ;
+  int ret ;
+
+  cur_dblock = reiserfs_get_journal_block(p_s_sb) ;
+  printk("reiserfs: checking transaction log (device %s) ...\n",
+          kdevname(p_s_sb->s_dev)) ;
+  start = CURRENT_TIME ;
+
+  /* step 1, read in the journal header block.  Check the transaction it says 
+  ** is the first unflushed, and if that transaction is not valid, 
+  ** replay is done
+  */
+  SB_JOURNAL(p_s_sb)->j_header_bh = bread(p_s_sb->s_dev, 
+                                          reiserfs_get_journal_block(p_s_sb) + 
+					  JOURNAL_BLOCK_COUNT, 
+					  p_s_sb->s_blocksize) ;
+  if (!SB_JOURNAL(p_s_sb)->j_header_bh) {
+    return 1 ;
+  }
+  jh = (struct reiserfs_journal_header *)(SB_JOURNAL(p_s_sb)->j_header_bh->b_data) ;
+  if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 && 
+      le32_to_cpu(jh->j_first_unflushed_offset) < JOURNAL_BLOCK_COUNT &&
+      le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
+    last_flush_start = reiserfs_get_journal_block(p_s_sb) + 
+                       le32_to_cpu(jh->j_first_unflushed_offset) ;
+    last_flush_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) ;
+    reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1153: found in "
+                   "header: first_unflushed_offset %d, last_flushed_trans_id "
+		   "%lu\n", le32_to_cpu(jh->j_first_unflushed_offset), 
+		   last_flush_trans_id) ;
+    valid_journal_header = 1 ;
+
+    /* now, we try to read the first unflushed offset.  If it is not valid, 
+    ** there is nothing more we can do, and it makes no sense to read 
+    ** through the whole log.
+    */
+    d_bh = bread(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + le32_to_cpu(jh->j_first_unflushed_offset), p_s_sb->s_blocksize) ;
+    ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL) ;
+    if (!ret) {
+      continue_replay = 0 ;
+    }
+    brelse(d_bh) ;
+  }
+
+  if (continue_replay && is_read_only(p_s_sb->s_dev)) {
+    printk("clm-2076: device is readonly, unable to replay log\n") ;
+    brelse(SB_JOURNAL(p_s_sb)->j_header_bh) ;
+    SB_JOURNAL(p_s_sb)->j_header_bh = NULL ;
+    return -1 ;
+  }
+  if (continue_replay && (p_s_sb->s_flags & MS_RDONLY)) {
+    printk("Warning, log replay starting on readonly filesystem\n") ;    
+  }
+
+  /* ok, there are transactions that need to be replayed.  start with the first log block, find
+  ** all the valid transactions, and pick out the oldest.
+  */
+  while(continue_replay && cur_dblock < (reiserfs_get_journal_block(p_s_sb) + JOURNAL_BLOCK_COUNT)) {
+    d_bh = bread(p_s_sb->s_dev, cur_dblock, p_s_sb->s_blocksize) ;
+    ret = journal_transaction_is_valid(p_s_sb, d_bh, &oldest_invalid_trans_id, &newest_mount_id) ;
+    if (ret == 1) {
+      desc = (struct reiserfs_journal_desc *)d_bh->b_data ;
+      if (oldest_start == 0) { /* init all oldest_ values */
+        oldest_trans_id = le32_to_cpu(desc->j_trans_id) ;
+	oldest_start = d_bh->b_blocknr ;
+	newest_mount_id = le32_to_cpu(desc->j_mount_id) ;
+	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1179: Setting "
+	               "oldest_start to offset %lu, trans_id %lu\n", 
+		       oldest_start - reiserfs_get_journal_block(p_s_sb), 
+		       oldest_trans_id) ;
+      } else if (oldest_trans_id > le32_to_cpu(desc->j_trans_id)) { 
+        /* one we just read was older */
+        oldest_trans_id = le32_to_cpu(desc->j_trans_id) ;
+	oldest_start = d_bh->b_blocknr ;
+	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1180: Resetting "
+	               "oldest_start to offset %lu, trans_id %lu\n", 
+			oldest_start - reiserfs_get_journal_block(p_s_sb), 
+			oldest_trans_id) ;
+      }
+      if (newest_mount_id < le32_to_cpu(desc->j_mount_id)) {
+        newest_mount_id = le32_to_cpu(desc->j_mount_id) ;
+	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
+	              "newest_mount_id to %d\n", le32_to_cpu(desc->j_mount_id));
+      }
+      cur_dblock += le32_to_cpu(desc->j_len) + 2 ;
+    } 
+    else {
+      cur_dblock++ ;
+    }
+    brelse(d_bh) ;
+  }
+  /* step three, starting at the oldest transaction, replay */
+  if (last_flush_start > 0) {
+    oldest_start = last_flush_start ;
+    oldest_trans_id = last_flush_trans_id ;
+  } 
+  cur_dblock = oldest_start ;
+  if (oldest_trans_id)  {
+    reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1206: Starting replay "
+                   "from offset %lu, trans_id %lu\n", 
+		   cur_dblock - reiserfs_get_journal_block(p_s_sb), 
+		   oldest_trans_id) ;
+
+  }
+  replay_count = 0 ;
+  while(continue_replay && oldest_trans_id > 0) {
+    ret = journal_read_transaction(p_s_sb, cur_dblock, oldest_start, oldest_trans_id, newest_mount_id) ;
+    if (ret < 0) {
+      return ret ;
+    } else if (ret != 0) {
+      break ;
+    }
+    cur_dblock = reiserfs_get_journal_block(p_s_sb) + SB_JOURNAL(p_s_sb)->j_start ;
+    replay_count++ ;
+  }
+
+  if (oldest_trans_id == 0) {
+    reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1225: No valid "
+                   "transactions found\n") ;
+  }
+  /* j_start does not get set correctly if we don't replay any transactions.
+  ** if we had a valid journal_header, set j_start to the first unflushed transaction value,
+  ** copy the trans_id from the header
+  */
+  if (valid_journal_header && replay_count == 0) { 
+    SB_JOURNAL(p_s_sb)->j_start = le32_to_cpu(jh->j_first_unflushed_offset) ;
+    SB_JOURNAL(p_s_sb)->j_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
+    SB_JOURNAL(p_s_sb)->j_last_flush_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) ;
+    SB_JOURNAL(p_s_sb)->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
+  } else {
+    SB_JOURNAL(p_s_sb)->j_mount_id = newest_mount_id + 1 ;
+  }
+  reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
+                 "newest_mount_id to %lu\n", SB_JOURNAL(p_s_sb)->j_mount_id) ;
+  SB_JOURNAL(p_s_sb)->j_first_unflushed_offset = SB_JOURNAL(p_s_sb)->j_start ; 
+  if (replay_count > 0) {
+    printk("reiserfs: replayed %d transactions in %lu seconds\n", replay_count, 
+	    CURRENT_TIME - start) ;
+  }
+  if (!is_read_only(p_s_sb->s_dev)) {
+    update_journal_header_block(p_s_sb, SB_JOURNAL(p_s_sb)->j_start, 
+                                SB_JOURNAL(p_s_sb)->j_last_flush_trans_id) ;
+  }
+  return 0 ;
+}
+
+
+struct reiserfs_journal_commit_task {
+  struct super_block *p_s_sb ;
+  int jindex ;
+  int wake_on_finish ; /* if this is one, we wake the task_done queue, if it
+                       ** is zero, we free the whole struct on finish
+		       */
+  struct reiserfs_journal_commit_task *self ;
+  struct wait_queue *task_done ;
+  struct tq_struct task ;
+} ;
+
+static void reiserfs_journal_commit_task_func(struct reiserfs_journal_commit_task *ct) {
+
+  struct reiserfs_journal_list *jl ;
+  jl = SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex ;
+
+  flush_commit_list(ct->p_s_sb, SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex, 1) ; 
+  if (jl->j_len > 0 && atomic_read(&(jl->j_nonzerolen)) > 0 && 
+      atomic_read(&(jl->j_commit_left)) == 0) {
+    kupdate_one_transaction(ct->p_s_sb, jl) ;
+  }
+  kfree(ct->self) ;
+}
+
+static void setup_commit_task_arg(struct reiserfs_journal_commit_task *ct,
+                                  struct super_block *p_s_sb, 
+				  int jindex) {
+  if (!ct) {
+    reiserfs_panic(NULL, "journal-1360: setup_commit_task_arg called with NULL struct\n") ;
+  }
+  ct->p_s_sb = p_s_sb ;
+  ct->jindex = jindex ;
+  ct->task_done = NULL ;
+  INIT_LIST_HEAD(&ct->task.list) ;
+  ct->task.sync = 0 ;
+  ct->task.routine = (void *)(void *)reiserfs_journal_commit_task_func ; 
+  ct->self = ct ;
+  ct->task.data = (void *)ct ;
+}
+
+static void commit_flush_async(struct super_block *p_s_sb, int jindex) {
+  struct reiserfs_journal_commit_task *ct ;
+  /* using GFP_BUFFER, GFP_KERNEL could try to flush inodes, which will try
+  ** to start/join a transaction, which will deadlock
+  */
+  ct = kmalloc(sizeof(struct reiserfs_journal_commit_task), GFP_BUFFER) ;
+  if (ct) {
+    setup_commit_task_arg(ct, p_s_sb, jindex) ;
+    queue_task(&(ct->task), &reiserfs_commit_thread_tq);
+    wake_up(&reiserfs_commit_thread_wait) ;
+  } else {
+#ifdef CONFIG_REISERFS_CHECK
+    reiserfs_warning("journal-1540: kmalloc failed, doing sync commit\n") ;
+#endif
+    flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1) ;
+  }
+}
+
+/*
+** this is the commit thread.  It is started with kernel_thread on
+** FS mount, and journal_release() waits for it to exit.
+**
+** It could do a periodic commit, but there is a lot code for that
+** elsewhere right now, and I only wanted to implement this little
+** piece for starters.
+**
+** All we do here is sleep on the j_commit_thread_wait wait queue, and
+** then run the per filesystem commit task queue when we wakeup.
+*/
+static int reiserfs_journal_commit_thread(void *nullp) {
+  exit_files(current);
+  exit_mm(current);
+
+  spin_lock_irq(&current->sigmask_lock);
+  sigfillset(&current->blocked);
+  recalc_sigpending(current);
+  spin_unlock_irq(&current->sigmask_lock);
+
+  current->session = 1;
+  current->pgrp = 1;
+  sprintf(current->comm, "kreiserfsd") ;
+  lock_kernel() ;
+  while(1) {
+
+    while(TQ_ACTIVE(reiserfs_commit_thread_tq)) {
+      run_task_queue(&reiserfs_commit_thread_tq) ;
+    }
+
+    /* if there aren't any more filesystems left, break */
+    if (reiserfs_mounted_fs_count <= 0) {
+      run_task_queue(&reiserfs_commit_thread_tq) ;
+      break ;
+    }
+    wake_up(&reiserfs_commit_thread_done) ;
+    interruptible_sleep_on_timeout(&reiserfs_commit_thread_wait, 5) ;
+  }
+  unlock_kernel() ;
+  wake_up(&reiserfs_commit_thread_done) ;
+  return 0 ;
+}
+
+static void journal_list_init(struct super_block *p_s_sb) {
+  int i ;
+  for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
+    init_waitqueue_head(&(SB_JOURNAL_LIST(p_s_sb)[i].j_commit_wait)) ;
+    init_waitqueue_head(&(SB_JOURNAL_LIST(p_s_sb)[i].j_flush_wait)) ;
+  }
+}
+
+/*
+** must be called once on fs mount.  calls journal_read for you
+*/
+int journal_init(struct super_block *p_s_sb) {
+  int num_cnodes = JOURNAL_BLOCK_COUNT * 2 ;
+
+  if (sizeof(struct reiserfs_journal_commit) != 4096 ||
+      sizeof(struct reiserfs_journal_desc) != 4096
+     ) {
+    printk("journal-1249: commit or desc struct not 4096 %Zd %Zd\n", sizeof(struct reiserfs_journal_commit), 
+        sizeof(struct reiserfs_journal_desc)) ;
+    return 1 ;
+  }
+  /* sanity check to make sure they don't overflow the journal */
+  if (JOURNAL_BLOCK_COUNT > reiserfs_get_journal_orig_size(p_s_sb)) {
+    printk("journal-1393: current JOURNAL_BLOCK_COUNT (%d) is too big.  This FS was created with a journal size of %lu blocks\n",
+            JOURNAL_BLOCK_COUNT, reiserfs_get_journal_orig_size(p_s_sb)) ;
+    return 1 ;
+  }
+  SB_JOURNAL(p_s_sb) = vmalloc(sizeof (struct reiserfs_journal)) ;
+
+  if (!SB_JOURNAL(p_s_sb)) {
+    printk("journal-1256: unable to get memory for journal structure\n") ;
+    return 1 ;
+  }
+  memset(SB_JOURNAL(p_s_sb), 0, sizeof(struct reiserfs_journal)) ;
+
+  SB_JOURNAL(p_s_sb)->j_list_bitmap_index = 0 ;
+  SB_JOURNAL_LIST_INDEX(p_s_sb) = -10000 ; /* make sure flush_old_commits does not try to flush a list while replay is on */
+
+  /* clear out the journal list array */
+  memset(SB_JOURNAL_LIST(p_s_sb), 0, sizeof(struct reiserfs_journal_list) * JOURNAL_LIST_COUNT) ; 
+  journal_list_init(p_s_sb) ;
+
+  memset(SB_JOURNAL(p_s_sb)->j_list_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ;
+  memset(journal_writers, 0, sizeof(char *) * 512) ; /* debug code */
+
+  INIT_LIST_HEAD(&SB_JOURNAL(p_s_sb)->j_bitmap_nodes) ;
+  reiserfs_allocate_list_bitmaps(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_bitmap, 
+                                 SB_BMAP_NR(p_s_sb)) ;
+  allocate_bitmap_nodes(p_s_sb) ;
+
+  SB_JOURNAL(p_s_sb)->j_start = 0 ;
+  SB_JOURNAL(p_s_sb)->j_len = 0 ;
+  SB_JOURNAL(p_s_sb)->j_len_alloc = 0 ;
+  atomic_set(&(SB_JOURNAL(p_s_sb)->j_wcount), 0) ;
+  SB_JOURNAL(p_s_sb)->j_bcount = 0 ;	  
+  SB_JOURNAL(p_s_sb)->j_trans_start_time = 0 ;	  
+  SB_JOURNAL(p_s_sb)->j_last = NULL ;	  
+  SB_JOURNAL(p_s_sb)->j_first = NULL ;     
+  init_waitqueue_head(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
+  init_waitqueue_head(&(SB_JOURNAL(p_s_sb)->j_wait)) ; 
+
+  SB_JOURNAL(p_s_sb)->j_trans_id = 10 ;  
+  SB_JOURNAL(p_s_sb)->j_mount_id = 10 ; 
+  SB_JOURNAL(p_s_sb)->j_state = 0 ;
+  atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ;
+  atomic_set(&(SB_JOURNAL(p_s_sb)->j_wlock), 0) ;
+  SB_JOURNAL(p_s_sb)->j_cnode_free_list = allocate_cnodes(num_cnodes) ;
+  SB_JOURNAL(p_s_sb)->j_cnode_free_orig = SB_JOURNAL(p_s_sb)->j_cnode_free_list ;
+  SB_JOURNAL(p_s_sb)->j_cnode_free = SB_JOURNAL(p_s_sb)->j_cnode_free_list ? num_cnodes : 0 ;
+  SB_JOURNAL(p_s_sb)->j_cnode_used = 0 ;
+  SB_JOURNAL(p_s_sb)->j_must_wait = 0 ;
+  init_journal_hash(p_s_sb) ;
+  SB_JOURNAL_LIST(p_s_sb)[0].j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL_LIST(p_s_sb)) ;
+  if (!(SB_JOURNAL_LIST(p_s_sb)[0].j_list_bitmap)) {
+    reiserfs_warning("journal-2005, get_list_bitmap failed for journal list 0\n") ;
+    return 1 ;
+  }
+  if (journal_read(p_s_sb) < 0) {
+    reiserfs_warning("Replay Failure, unable to mount\n") ;
+    free_journal_ram(p_s_sb) ;
+    return 1 ;
+  }
+  SB_JOURNAL_LIST_INDEX(p_s_sb) = 0 ; /* once the read is done, we can set this where it belongs */
+
+  if (reiserfs_dont_log (p_s_sb))
+    return 0;
+
+  reiserfs_mounted_fs_count++ ;
+  if (reiserfs_mounted_fs_count <= 1) {
+    kernel_thread((void *)(void *)reiserfs_journal_commit_thread, NULL,
+                  CLONE_FS | CLONE_FILES | CLONE_VM) ;
+  }
+  return 0 ;
+}
+
+/*
+** test for a polite end of the current transaction.  Used by file_write, and should
+** be used by delete to make sure they don't write more than can fit inside a single
+** transaction
+*/
+int journal_transaction_should_end(struct reiserfs_transaction_handle *th, int new_alloc) {
+  time_t now = CURRENT_TIME ;
+  if (reiserfs_dont_log(th->t_super)) 
+    return 0 ;
+  if ( SB_JOURNAL(th->t_super)->j_must_wait > 0 ||
+       (SB_JOURNAL(th->t_super)->j_len_alloc + new_alloc) >= JOURNAL_MAX_BATCH || 
+       atomic_read(&(SB_JOURNAL(th->t_super)->j_jlock)) ||
+      (now - SB_JOURNAL(th->t_super)->j_trans_start_time) > JOURNAL_MAX_TRANS_AGE ||
+       SB_JOURNAL(th->t_super)->j_cnode_free < (JOURNAL_TRANS_MAX * 3)) { 
+    return 1 ;
+  }
+  return 0 ;
+}
+
+/* this must be called inside a transaction, and requires the 
+** kernel_lock to be held
+*/
+void reiserfs_block_writes(struct reiserfs_transaction_handle *th) {
+    struct super_block *s = th->t_super ;
+    SB_JOURNAL(s)->j_must_wait = 1 ;
+    set_bit(WRITERS_BLOCKED, &SB_JOURNAL(s)->j_state) ;
+    return ;
+}
+
+/* this must be called without a transaction started, and does not
+** require BKL
+*/
+void reiserfs_allow_writes(struct super_block *s) {
+    clear_bit(WRITERS_BLOCKED, &SB_JOURNAL(s)->j_state) ;
+    wake_up(&SB_JOURNAL(s)->j_join_wait) ;
+}
+
+/* this must be called without a transaction started, and does not
+** require BKL
+*/
+void reiserfs_wait_on_write_block(struct super_block *s) {
+    wait_event(SB_JOURNAL(s)->j_join_wait, 
+               !test_bit(WRITERS_BLOCKED, &SB_JOURNAL(s)->j_state)) ;
+}
+
+/* join == true if you must join an existing transaction.
+** join == false if you can deal with waiting for others to finish
+**
+** this will block until the transaction is joinable.  send the number of blocks you
+** expect to use in nblocks.
+*/
+static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct super_block * p_s_sb,unsigned long nblocks,int join) {
+  time_t now = CURRENT_TIME ;
+  int old_trans_id  ;
+
+  reiserfs_check_lock_depth("journal_begin") ;
+#ifdef CONFIG_REISERFS_CHECK
+  if (p_s_sb->s_flags & MS_RDONLY) {
+    printk("clm-2078: calling journal_begin on readonly FS\n") ;
+    BUG() ;
+  }
+#endif
+
+  if (reiserfs_dont_log(p_s_sb)) {
+    th->t_super = p_s_sb ; /* others will check this for the don't log flag */
+    return 0 ;
+  }
+
+relock:
+  lock_journal(p_s_sb) ;
+
+  if (test_bit(WRITERS_BLOCKED, &SB_JOURNAL(p_s_sb)->j_state)) {
+    unlock_journal(p_s_sb) ;
+    reiserfs_wait_on_write_block(p_s_sb) ;
+    goto relock ;
+  }
+
+  /* if there is no room in the journal OR
+  ** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning 
+  ** we don't sleep if there aren't other writers
+  */
+
+
+  if (  (!join && SB_JOURNAL(p_s_sb)->j_must_wait > 0) ||
+     ( !join && (SB_JOURNAL(p_s_sb)->j_len_alloc + nblocks + 2) >= JOURNAL_MAX_BATCH) || 
+     (!join && atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) > 0 && SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 && 
+      (now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > JOURNAL_MAX_TRANS_AGE) ||
+     (!join && atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) ) ||
+     (!join && SB_JOURNAL(p_s_sb)->j_cnode_free < (JOURNAL_TRANS_MAX * 3))) {
+
+    unlock_journal(p_s_sb) ; /* allow others to finish this transaction */
+
+    /* if writer count is 0, we can just force this transaction to end, and start
+    ** a new one afterwards.
+    */
+    if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0) {
+      struct reiserfs_transaction_handle myth ;
+      journal_join(&myth, p_s_sb, 1) ;
+      reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
+      journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
+      do_journal_end(&myth, p_s_sb,1,COMMIT_NOW) ;
+    } else {
+      /* but if the writer count isn't zero, we have to wait for the current writers to finish.
+      ** They won't batch on transaction end once we set j_jlock
+      */
+      atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ;
+      old_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
+      while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) &&
+            SB_JOURNAL(p_s_sb)->j_trans_id == old_trans_id) {
+	sleep_on(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
+      }
+    }
+    lock_journal(p_s_sb) ; /* relock to continue */
+  }
+
+  if (SB_JOURNAL(p_s_sb)->j_trans_start_time == 0) { /* we are the first writer, set trans_id */
+    SB_JOURNAL(p_s_sb)->j_trans_start_time = now ;
+  }
+  atomic_inc(&(SB_JOURNAL(p_s_sb)->j_wcount)) ;
+  SB_JOURNAL(p_s_sb)->j_len_alloc += nblocks ;
+  th->t_blocks_logged = 0 ;
+  th->t_blocks_allocated = nblocks ;
+  th->t_super = p_s_sb ;
+  th->t_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
+  th->t_caller = "Unknown" ;
+  unlock_journal(p_s_sb) ;
+  p_s_sb->s_dirt = 1; 
+  return 0 ;
+}
+
+
+int journal_join(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
+  return do_journal_begin_r(th, p_s_sb, nblocks, 1) ;
+}
+
+int journal_begin(struct reiserfs_transaction_handle *th, struct super_block  * p_s_sb, unsigned long nblocks) {
+  return do_journal_begin_r(th, p_s_sb, nblocks, 0) ;
+}
+
+/* not used at all */
+int journal_prepare(struct super_block  * p_s_sb, struct buffer_head *bh) {
+  return 0 ;
+}
+
+/*
+** puts bh into the current transaction.  If it was already there, reorders removes the
+** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order).
+**
+** if it was dirty, cleans and files onto the clean list.  I can't let it be dirty again until the
+** transaction is committed.
+** 
+** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
+*/
+int journal_mark_dirty(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, struct buffer_head *bh) {
+  struct reiserfs_journal_cnode *cn = NULL;
+  int count_already_incd = 0 ;
+  int prepared = 0 ;
+
+  if (reiserfs_dont_log(th->t_super)) {
+    mark_buffer_dirty(bh) ;
+    return 0 ;
+  }
+
+  if (th->t_trans_id != SB_JOURNAL(p_s_sb)->j_trans_id) {
+    reiserfs_panic(th->t_super, "journal-1577: handle trans id %d != current trans id %d\n", 
+                   th->t_trans_id, SB_JOURNAL(p_s_sb)->j_trans_id);
+  }
+  p_s_sb->s_dirt = 1 ;
+
+  prepared = test_and_clear_bit(BH_JPrepared, &bh->b_state) ;
+  /* already in this transaction, we are done */
+  if (buffer_journaled(bh)) {
+    return 0 ;
+  }
+
+  /* this must be turned into a panic instead of a warning.  We can't allow
+  ** a dirty or journal_dirty or locked buffer to be logged, as some changes
+  ** could get to disk too early.  NOT GOOD.
+  */
+  if (!prepared || buffer_locked(bh)) {
+    printk("journal-1777: buffer %lu bad state %cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT\n", bh->b_blocknr, prepared ? ' ' : '!', 
+                            buffer_locked(bh) ? ' ' : '!',
+			    buffer_dirty(bh) ? ' ' : '!',
+			    buffer_journal_dirty(bh) ? ' ' : '!') ;
+    show_reiserfs_locks() ;
+  }
+  count_already_incd = clear_prepared_bits(bh) ;
+
+  if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0) {
+    printk("journal-1409: journal_mark_dirty returning because j_wcount was %d\n", atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount))) ;
+    return 1 ;
+  }
+  /* this error means I've screwed up, and we've overflowed the transaction.  
+  ** Nothing can be done here, except make the FS readonly or panic.
+  */ 
+  if (SB_JOURNAL(p_s_sb)->j_len >= JOURNAL_TRANS_MAX) { 
+    reiserfs_panic(th->t_super, "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", SB_JOURNAL(p_s_sb)->j_len) ;
+  }
+
+  if (buffer_journal_dirty(bh)) {
+    count_already_incd = 1 ;
+    mark_buffer_notjournal_dirty(bh) ;
+  }
+
+  if (buffer_dirty(bh)) {
+    clear_bit(BH_Dirty, &bh->b_state) ;
+  }
+
+  if (buffer_journaled(bh)) { /* must double check after getting lock */
+    goto done ;
+  }
+
+  if (SB_JOURNAL(p_s_sb)->j_len > SB_JOURNAL(p_s_sb)->j_len_alloc) {
+    SB_JOURNAL(p_s_sb)->j_len_alloc = SB_JOURNAL(p_s_sb)->j_len + JOURNAL_PER_BALANCE_CNT ;
+  }
+
+  set_bit(BH_JDirty, &bh->b_state) ;
+
+  /* now put this guy on the end */
+  if (!cn) {
+    cn = get_cnode(p_s_sb) ;
+    if (!cn) {
+      reiserfs_panic(p_s_sb, "get_cnode failed!\n"); 
+    }
+
+    if (th->t_blocks_logged == th->t_blocks_allocated) {
+      th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT ;
+      SB_JOURNAL(p_s_sb)->j_len_alloc += JOURNAL_PER_BALANCE_CNT ;
+    }
+    th->t_blocks_logged++ ;
+    SB_JOURNAL(p_s_sb)->j_len++ ;
+
+    cn->bh = bh ;
+    cn->blocknr = bh->b_blocknr ;
+    cn->dev = bh->b_dev ;
+    cn->jlist = NULL ;
+    insert_journal_hash(SB_JOURNAL(p_s_sb)->j_hash_table, cn) ;
+    if (!count_already_incd) {
+      atomic_inc(&(bh->b_count)) ;
+    }
+  }
+  cn->next = NULL ;
+  cn->prev = SB_JOURNAL(p_s_sb)->j_last ;
+  cn->bh = bh ;
+  if (SB_JOURNAL(p_s_sb)->j_last) {
+    SB_JOURNAL(p_s_sb)->j_last->next = cn ;
+    SB_JOURNAL(p_s_sb)->j_last = cn ;
+  } else {
+    SB_JOURNAL(p_s_sb)->j_first = cn ;
+    SB_JOURNAL(p_s_sb)->j_last = cn ;
+  }
+done:
+  return 0 ;
+}
+
+/*
+** if buffer already in current transaction, do a journal_mark_dirty
+** otherwise, just mark it dirty and move on.  Used for writes to meta blocks
+** that don't need journaling
+*/
+int journal_mark_dirty_nolog(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, struct buffer_head *bh) {
+  if (reiserfs_dont_log(th->t_super) || buffer_journaled(bh) || 
+      buffer_journal_dirty(bh)) {
+    return journal_mark_dirty(th, p_s_sb, bh) ;
+  }
+  if (get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_list_hash_table, bh->b_dev,bh->b_blocknr,bh->b_size)) {
+    return journal_mark_dirty(th, p_s_sb, bh) ;
+  }
+  mark_buffer_dirty(bh) ;
+  return 0 ;
+}
+
+int journal_end(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
+  return do_journal_end(th, p_s_sb, nblocks, 0) ;
+}
+
+/* removes from the current transaction, relsing and descrementing any counters.  
+** also files the removed buffer directly onto the clean list
+**
+** called by journal_mark_freed when a block has been deleted
+**
+** returns 1 if it cleaned and relsed the buffer. 0 otherwise
+*/
+int remove_from_transaction(struct super_block *p_s_sb, unsigned long blocknr, int already_cleaned) {
+  struct buffer_head *bh ;
+  struct reiserfs_journal_cnode *cn ;
+  int ret = 0;
+
+  cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_hash_table, p_s_sb->s_dev, blocknr, p_s_sb->s_blocksize) ;
+  if (!cn || !cn->bh) {
+    return ret ;
+  }
+  bh = cn->bh ;
+  if (cn->prev) {
+    cn->prev->next = cn->next ;
+  }
+  if (cn->next) {
+    cn->next->prev = cn->prev ;
+  }
+  if (cn == SB_JOURNAL(p_s_sb)->j_first) {
+    SB_JOURNAL(p_s_sb)->j_first = cn->next ;  
+  }
+  if (cn == SB_JOURNAL(p_s_sb)->j_last) {
+    SB_JOURNAL(p_s_sb)->j_last = cn->prev ;
+  }
+  remove_journal_hash(SB_JOURNAL(p_s_sb)->j_hash_table, NULL, bh, 0) ; 
+  mark_buffer_not_journaled(bh) ; /* don't log this one */
+
+  if (!already_cleaned) {
+    mark_buffer_notjournal_dirty(bh) ; 
+    atomic_dec(&(bh->b_count)) ;
+    if (atomic_read(&(bh->b_count)) < 0) {
+      printk("journal-1752: remove from trans, b_count < 0\n") ;
+    }
+    if (!buffer_locked(bh)) reiserfs_clean_and_file_buffer(bh) ; 
+    ret = 1 ;
+  }
+  SB_JOURNAL(p_s_sb)->j_len-- ;
+  SB_JOURNAL(p_s_sb)->j_len_alloc-- ;
+  free_cnode(p_s_sb, cn) ;
+  return ret ;
+}
+
+/* removes from a specific journal list hash */
+int remove_from_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, struct buffer_head *bh, int remove_freed) {
+  remove_journal_hash(SB_JOURNAL(s)->j_list_hash_table, jl, bh, remove_freed) ;
+  return 0 ;
+}
+
+/*
+** for any cnode in a journal list, it can only be dirtied of all the
+** transactions that include it are commited to disk.
+** this checks through each transaction, and returns 1 if you are allowed to dirty,
+** and 0 if you aren't
+**
+** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log
+** blocks for a given transaction on disk
+**
+*/
+static int can_dirty(struct reiserfs_journal_cnode *cn) {
+  kdev_t dev = cn->dev ;
+  unsigned long blocknr = cn->blocknr  ;
+  struct reiserfs_journal_cnode *cur = cn->hprev ;
+  int can_dirty = 1 ;
+  
+  /* first test hprev.  These are all newer than cn, so any node here
+  ** with the name block number and dev means this node can't be sent
+  ** to disk right now.
+  */
+  while(cur && can_dirty) {
+    if (cur->jlist && cur->bh && cur->blocknr && cur->dev == dev && 
+        cur->blocknr == blocknr) {
+      can_dirty = 0 ;
+    }
+    cur = cur->hprev ;
+  }
+  /* then test hnext.  These are all older than cn.  As long as they
+  ** are committed to the log, it is safe to write cn to disk
+  */
+  cur = cn->hnext ;
+  while(cur && can_dirty) {
+    if (cur->jlist && cur->jlist->j_len > 0 && 
+        atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh && 
+        cur->blocknr && cur->dev == dev && cur->blocknr == blocknr) {
+      can_dirty = 0 ;
+    }
+    cur = cur->hnext ;
+  }
+  return can_dirty ;
+}
+
+/* syncs the commit blocks, but does not force the real buffers to disk
+** will wait until the current transaction is done/commited before returning 
+*/
+int journal_end_sync(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long nblocks) {
+  return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW | WAIT) ;
+}
+
+#ifdef __KERNEL__
+int show_reiserfs_locks(void) {
+
+  dump_journal_writers() ;
+#if 0 /* debugging code for when we are compiled static don't delete */
+  p_s_sb = sb_entry(super_blocks.next);
+  while (p_s_sb != sb_entry(&super_blocks)) {
+    if (reiserfs_is_super(p_s_sb)) {
+printk("journal lock is %d, join lock is %d, writers %d must wait is %d\n", 
+        atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)),
+        atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)),
+	atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)),
+	SB_JOURNAL(p_s_sb)->j_must_wait) ;
+	printk("used cnodes %d, free cnodes %d\n", SB_JOURNAL(p_s_sb)->j_cnode_used, SB_JOURNAL(p_s_sb)->j_cnode_free) ;
+    }
+    p_s_sb = sb_entry(p_s_sb->s_list.next);
+  }
+#endif
+  return 0 ;
+}
+#endif
+
+/*
+** used to get memory back from async commits that are floating around
+** and to reclaim any blocks deleted but unusable because their commits
+** haven't hit disk yet.  called from bitmap.c
+**
+** if it starts flushing things, it ors SCHEDULE_OCCURRED into repeat.
+** note, this is just if schedule has a chance of occuring.  I need to 
+** change flush_commit_lists to have a repeat parameter too.
+**
+*/
+void flush_async_commits(struct super_block *p_s_sb) {
+  int i ;
+
+  for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
+    if (i != SB_JOURNAL_LIST_INDEX(p_s_sb)) {
+      flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1) ; 
+    }
+  }
+}
+
+/*
+** flushes any old transactions to disk
+** ends the current transaction if it is too old
+**
+** also calls flush_journal_list with old_only == 1, which allows me to reclaim
+** memory and such from the journal lists whose real blocks are all on disk.
+**
+** called by sync_dev_journal from buffer.c
+*/
+int flush_old_commits(struct super_block *p_s_sb, int immediate) {
+  int i ;
+  int count = 0;
+  int start ; 
+  time_t now ; 
+  int keep_dirty = 0 ;
+  struct reiserfs_transaction_handle th ; 
+
+  start =  SB_JOURNAL_LIST_INDEX(p_s_sb) ;
+  now = CURRENT_TIME ;
+
+  /* safety check so we don't flush while we are replaying the log during mount */
+  if (SB_JOURNAL_LIST_INDEX(p_s_sb) < 0) {
+    return 0  ;
+  }
+  if (!strcmp(current->comm, "kupdate")) {
+    immediate = 0 ;
+    keep_dirty = 1 ;
+  }
+  /* starting with oldest, loop until we get to the start */
+  i = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ;
+  while(i != start) {
+    if (SB_JOURNAL_LIST(p_s_sb)[i].j_len > 0 && ((now - SB_JOURNAL_LIST(p_s_sb)[i].j_timestamp) > JOURNAL_MAX_COMMIT_AGE ||
+       immediate)) {
+      /* we have to check again to be sure the current transaction did not change */
+      if (i != SB_JOURNAL_LIST_INDEX(p_s_sb))  {
+	flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1) ;
+      }
+    }
+    i = (i + 1) % JOURNAL_LIST_COUNT ;
+    count++ ;
+  }
+  /* now, check the current transaction.  If there are no writers, and it is too old, finish it, and
+  ** force the commit blocks to disk
+  */
+  if (!immediate && atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0 &&  
+     SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 && 
+     SB_JOURNAL(p_s_sb)->j_len > 0 && 
+     (now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > JOURNAL_MAX_TRANS_AGE) {
+    journal_join(&th, p_s_sb, 1) ;
+    reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
+    journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
+    do_journal_end(&th, p_s_sb,1, COMMIT_NOW) ;
+    keep_dirty = 0 ;
+  } else if (immediate) { /* belongs above, but I wanted this to be very explicit as a special case.  If they say to 
+                             flush, we must be sure old transactions hit the disk too. */
+    journal_join(&th, p_s_sb, 1) ;
+    reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
+    journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
+    do_journal_end(&th, p_s_sb,1, COMMIT_NOW | WAIT) ;
+  }
+  keep_dirty |= reiserfs_journal_kupdate(p_s_sb) ;
+  return keep_dirty ;
+}
+
+/*
+** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
+** 
+** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all 
+** the writers are done.  By the time it wakes up, the transaction it was called has already ended, so it just
+** flushes the commit list and returns 0.
+**
+** Won't batch when flush or commit_now is set.  Also won't batch when others are waiting on j_join_wait.
+** 
+** Note, we can't allow the journal_end to proceed while there are still writers in the log.
+*/
+static int check_journal_end(struct reiserfs_transaction_handle *th, struct super_block  * p_s_sb, 
+                             unsigned long nblocks, int flags) {
+
+  time_t now ;
+  int flush = flags & FLUSH_ALL ;
+  int commit_now = flags & COMMIT_NOW ;
+  int wait_on_commit = flags & WAIT ;
+
+  if (th->t_trans_id != SB_JOURNAL(p_s_sb)->j_trans_id) {
+    reiserfs_panic(th->t_super, "journal-1577: handle trans id %d != current trans id %d\n", 
+                   th->t_trans_id, SB_JOURNAL(p_s_sb)->j_trans_id);
+  }
+
+  SB_JOURNAL(p_s_sb)->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged) ;
+  if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) > 0) { /* <= 0 is allowed.  unmounting might not call begin */
+    atomic_dec(&(SB_JOURNAL(p_s_sb)->j_wcount)) ;
+  }
+
+  /* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released 
+  ** will be dealt with by next transaction that actually writes something, but should be taken
+  ** care of in this trans
+  */
+  if (SB_JOURNAL(p_s_sb)->j_len == 0) {
+    int wcount = atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) ;
+    unlock_journal(p_s_sb) ;
+    if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock))  > 0 && wcount <= 0) {
+      atomic_dec(&(SB_JOURNAL(p_s_sb)->j_jlock)) ;
+      wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
+    }
+    return 0 ;
+  }
+  /* if wcount > 0, and we are called to with flush or commit_now,
+  ** we wait on j_join_wait.  We will wake up when the last writer has
+  ** finished the transaction, and started it on its way to the disk.
+  ** Then, we flush the commit or journal list, and just return 0 
+  ** because the rest of journal end was already done for this transaction.
+  */
+  if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) > 0) {
+    if (flush || commit_now) {
+      int orig_jindex = SB_JOURNAL_LIST_INDEX(p_s_sb) ;
+      atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ;
+      if (flush) {
+        SB_JOURNAL(p_s_sb)->j_next_full_flush = 1 ;
+      }
+      unlock_journal(p_s_sb) ;
+      /* sleep while the current transaction is still j_jlocked */
+      while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) && 
+            SB_JOURNAL(p_s_sb)->j_trans_id == th->t_trans_id) {
+	sleep_on(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
+      }
+      if (commit_now) {
+	if (wait_on_commit) {
+	  flush_commit_list(p_s_sb,  SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
+	} else {
+	  commit_flush_async(p_s_sb, orig_jindex) ; 
+	}
+      }
+      return 0 ;
+    } 
+    unlock_journal(p_s_sb) ;
+    return 0 ;
+  }
+
+  /* deal with old transactions where we are the last writers */
+  now = CURRENT_TIME ;
+  if ((now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > JOURNAL_MAX_TRANS_AGE) {
+    commit_now = 1 ;
+    SB_JOURNAL(p_s_sb)->j_next_async_flush = 1 ;
+  }
+  /* don't batch when someone is waiting on j_join_wait */
+  /* don't batch when syncing the commit or flushing the whole trans */
+  if (!(SB_JOURNAL(p_s_sb)->j_must_wait > 0) && !(atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock))) && !flush && !commit_now && 
+      (SB_JOURNAL(p_s_sb)->j_len < JOURNAL_MAX_BATCH)  && 
+      SB_JOURNAL(p_s_sb)->j_len_alloc < JOURNAL_MAX_BATCH && SB_JOURNAL(p_s_sb)->j_cnode_free > (JOURNAL_TRANS_MAX * 3)) {
+    SB_JOURNAL(p_s_sb)->j_bcount++ ;
+    unlock_journal(p_s_sb) ;
+    return 0 ;
+  }
+
+  if (SB_JOURNAL(p_s_sb)->j_start > JOURNAL_BLOCK_COUNT) {
+    reiserfs_panic(p_s_sb, "journal-003: journal_end: j_start (%d) is too high\n", SB_JOURNAL(p_s_sb)->j_start) ;
+  }
+  return 1 ;
+}
+
+/*
+** Does all the work that makes deleting blocks safe.
+** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
+** 
+** otherwise:
+** set a bit for the block in the journal bitmap.  That will prevent it from being allocated for unformatted nodes
+** before this transaction has finished.
+**
+** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers.  That will prevent any old transactions with
+** this block from trying to flush to the real location.  Since we aren't removing the cnode from the journal_list_hash,
+** the block can't be reallocated yet.
+**
+** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
+*/
+int journal_mark_freed(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, unsigned long blocknr) {
+  struct reiserfs_journal_cnode *cn = NULL ;
+  struct buffer_head *bh = NULL ;
+  struct reiserfs_list_bitmap *jb = NULL ;
+  int cleaned = 0 ;
+  
+  if (reiserfs_dont_log(th->t_super)) {
+    bh = get_hash_table(p_s_sb->s_dev, blocknr, p_s_sb->s_blocksize) ;
+    if (bh && buffer_dirty (bh)) {
+      printk ("journal_mark_freed(dont_log): dirty buffer on hash list: %lx %ld\n", bh->b_state, blocknr);
+      BUG ();
+    }
+    brelse (bh);
+    return 0 ;
+  }
+  bh = get_hash_table(p_s_sb->s_dev, blocknr, p_s_sb->s_blocksize) ;
+  /* if it is journal new, we just remove it from this transaction */
+  if (bh && buffer_journal_new(bh)) {
+    clear_prepared_bits(bh) ;
+    cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ;
+  } else {
+    /* set the bit for this block in the journal bitmap for this transaction */
+    jb = SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap ;
+    if (!jb) {
+      reiserfs_panic(p_s_sb, "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n") ;
+    }
+    set_bit_in_list_bitmap(p_s_sb, blocknr, jb) ;
+
+    /* Note, the entire while loop is not allowed to schedule.  */
+
+    if (bh) {
+      clear_prepared_bits(bh) ;
+    }
+    cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ;
+
+    /* find all older transactions with this block, make sure they don't try to write it out */
+    cn = get_journal_hash_dev(SB_JOURNAL(p_s_sb)->j_list_hash_table, p_s_sb->s_dev, blocknr, p_s_sb->s_blocksize) ;
+    while (cn) {
+      if (p_s_sb->s_dev == cn->dev && blocknr == cn->blocknr) {
+	set_bit(BLOCK_FREED, &cn->state) ;
+	if (cn->bh) {
+	  if (!cleaned) {
+	    /* remove_from_transaction will brelse the buffer if it was 
+	    ** in the current trans
+	    */
+	    mark_buffer_notjournal_dirty(cn->bh) ;
+	    if (!buffer_locked(cn->bh)) {
+	      reiserfs_clean_and_file_buffer(cn->bh) ;
+	    }
+	    cleaned = 1 ;
+	    atomic_dec(&(cn->bh->b_count)) ;
+	    if (atomic_read(&(cn->bh->b_count)) < 0) {
+	      printk("journal-2138: cn->bh->b_count < 0\n") ;
+	    }
+	  }
+	  if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */
+	    atomic_dec(&(cn->jlist->j_nonzerolen)) ;
+	  }
+	  cn->bh = NULL ; 
+	} 
+      }
+      cn = cn->hnext ;
+    }
+  }
+
+  if (bh) {
+    atomic_dec(&(bh->b_count)) ; /* get_hash incs this */
+    if (atomic_read(&(bh->b_count)) < 0) {
+      printk("journal-2165: bh->b_count < 0\n") ;
+    }
+  }
+  return 0 ;
+}
+
+void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb, 
+                                      struct buffer_head *bh) {
+  if (reiserfs_dont_log (p_s_sb))
+    return;
+
+  if (!bh) {
+    return ;
+  }
+  clear_bit(BH_JPrepared, &bh->b_state) ;
+}
+
+extern struct tree_balance *cur_tb ;
+/*
+** before we can change a metadata block, we have to make sure it won't
+** be written to disk while we are altering it.  So, we must:
+** clean it
+** wait on it.
+** 
+*/
+void reiserfs_prepare_for_journal(struct super_block *p_s_sb, 
+                                  struct buffer_head *bh, int wait) {
+  int retry_count = 0 ;
+
+  if (reiserfs_dont_log (p_s_sb))
+    return;
+
+  while(!test_bit(BH_JPrepared, &bh->b_state) ||
+        (wait && buffer_locked(bh))) {
+    if (buffer_journaled(bh)) {
+      set_bit(BH_JPrepared, &bh->b_state) ;
+      return ;
+    }
+    set_bit(BH_JPrepared, &bh->b_state) ;
+    if (wait) {
+#ifdef CONFIG_REISERFS_CHECK
+      if (buffer_locked(bh) && cur_tb != NULL) {
+	printk("reiserfs_prepare_for_journal, waiting while do_balance was running\n") ;
+        BUG() ;
+      }
+#endif
+      wait_on_buffer(bh) ;
+    }
+    retry_count++ ;
+  }
+}
+
+/* 
+ * Wait for a page to get unlocked.
+ *
+ * This must be called with the caller "holding" the page,
+ * ie with increased "page->count" so that the page won't
+ * go away during the wait..
+ */
+static void ___reiserfs_wait_on_page(struct reiserfs_page_list *pl)
+{
+    struct task_struct *tsk = current;
+    struct page *page = pl->page ;
+    DECLARE_WAITQUEUE(wait, tsk);
+
+    add_wait_queue(&page->wait, &wait);
+    do {
+	block_sync_page(page);
+	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+	if (!PageLocked(page) || pl->do_not_lock)
+	    break;
+	schedule();
+    } while (PageLocked(page));
+    tsk->state = TASK_RUNNING;
+    remove_wait_queue(&page->wait, &wait);
+}
+
+/*
+ * Get an exclusive lock on the page..
+ * but, every time you get woken up, check the page to make sure
+ * someone hasn't called a journal_begin with it locked.
+ *
+ * the page should always be locked when this returns
+ *
+ * returns 0 if you've got the page locked
+ * returns 1 if it returns because someone else has called journal_begin
+ *           with the page locked
+ * this is only useful to the code that flushes pages before a 
+ * commit.  Do not export this hack.  Ever.
+ */
+static int reiserfs_try_lock_page(struct reiserfs_page_list *pl)
+{
+    struct page *page = pl->page ;
+    while (TryLockPage(page)) {
+	if (pl->do_not_lock) {
+	    /* the page is locked, but we cannot have it */
+	    return 1 ;
+	}
+	___reiserfs_wait_on_page(pl);
+    }
+    /* we have the page locked */
+    return 0 ;
+}
+
+
+/*
+** This can only be called from do_journal_end.
+** it runs through the list things that need flushing before the
+** transaction can commit, and writes each of them to disk
+**
+*/
+
+static void flush_pages_before_commit(struct reiserfs_transaction_handle *th,
+                                      struct super_block *p_s_sb) {
+  struct reiserfs_page_list *pl = SB_JOURNAL(p_s_sb)->j_flush_pages ;
+  struct reiserfs_page_list *pl_tmp ;
+  struct buffer_head *bh, *head ;
+  int count = 0 ;
+
+  /* first write each dirty unlocked buffer in the list */
+
+  while(pl) {
+    /* ugly.  journal_end can be called from get_block, which has a 
+    ** page locked.  So, we have to check to see if pl->page is the page
+    ** currently locked by the calling function, and if so, skip the
+    ** lock
+    */
+    if (reiserfs_try_lock_page(pl)) {
+      goto setup_next ;
+    }
+    if (!PageLocked(pl->page)) {
+      BUG() ;
+    }
+    if (pl->page->buffers) {
+      head = pl->page->buffers ;
+      bh = head ;
+      do {
+	if (bh->b_blocknr == pl->blocknr && buffer_dirty(bh) &&
+	    !buffer_locked(bh) && buffer_uptodate(bh) ) {
+	  ll_rw_block(WRITE, 1, &bh) ;
+	}
+	bh = bh->b_this_page ;
+      } while (bh != head) ;
+    }
+    if (!pl->do_not_lock) {
+      UnlockPage(pl->page) ;
+    }
+setup_next:
+    pl = pl->next ;
+  }
+
+  /* now wait on them */
+
+  pl = SB_JOURNAL(p_s_sb)->j_flush_pages ;
+  while(pl) {
+    if (reiserfs_try_lock_page(pl)) {
+      goto remove_page ;
+    }
+    if (!PageLocked(pl->page)) {
+      BUG() ;
+    }
+    if (pl->page->buffers) {
+      head = pl->page->buffers ;
+      bh = head ;
+      do {
+	if (bh->b_blocknr == pl->blocknr) {
+	  count++ ;
+	  wait_on_buffer(bh) ;
+	  if (!buffer_uptodate(bh)) {
+	    reiserfs_panic(p_s_sb, "journal-2443: flush_pages_before_commit, error writing block %lu\n", bh->b_blocknr) ;
+	  }
+	}
+	bh = bh->b_this_page ;
+      } while (bh != head) ;
+    }
+    if (!pl->do_not_lock) {
+      UnlockPage(pl->page) ;
+    }
+remove_page:
+    /* we've waited on the I/O, we can remove the page from the
+    ** list, and free our pointer struct to it.
+    */
+    if (pl->prev) {
+      pl->prev->next = pl->next ;
+    }
+    if (pl->next) {
+      pl->next->prev = pl->prev ;
+    }
+    put_page(pl->page) ;
+    pl_tmp = pl ;
+    pl = pl->next ;
+    reiserfs_kfree(pl_tmp, sizeof(struct reiserfs_page_list), p_s_sb) ;
+  }
+  SB_JOURNAL(p_s_sb)->j_flush_pages = NULL ;
+}
+
+/*
+** called when a indirect item is converted back into a tail.
+**
+** The reiserfs part of the inode stores enough information to find
+** our page_list struct in the flush list.  We remove it from the list
+** and free the struct.
+**
+** Note, it is possible for this to happen:
+**
+** reiserfs_add_page_to_flush_list(inode)
+** transaction ends, list is flushed
+** reiserfs_remove_page_from_flush_list(inode)
+**
+** This would be bad because the page_list pointer in the inode is not
+** updated when the list is flushed, so we can't know if the pointer is
+** valid.  So, in the inode, we also store the transaction id when the
+** page was added.  If we are trying to remove something from an old 
+** transaction, we just clear out the pointer in the inode and return.
+**
+** Normal case is to use the reiserfs_page_list pointer in the inode to 
+** find and remove the page from the flush list.
+*/
+int reiserfs_remove_page_from_flush_list(struct reiserfs_transaction_handle *th,
+                                         struct inode *inode) {
+  struct reiserfs_page_list *pl ;
+
+  /* was this conversion done in a previous transaction? If so, return */
+  if (inode->u.reiserfs_i.i_conversion_trans_id < th->t_trans_id) {
+    inode->u.reiserfs_i.i_converted_page = NULL ;
+    inode->u.reiserfs_i.i_conversion_trans_id = 0  ;
+    return 0 ;
+  }
+
+  /* remove the page_list struct from the list, release our hold on the
+  ** page, and free the page_list struct
+  */
+  pl = inode->u.reiserfs_i.i_converted_page ;
+  if (pl) {
+    if (pl->next) {
+      pl->next->prev = pl->prev ;
+    }
+    if (pl->prev) {
+      pl->prev->next = pl->next ;
+    }
+    if (SB_JOURNAL(inode->i_sb)->j_flush_pages == pl) {
+      SB_JOURNAL(inode->i_sb)->j_flush_pages = pl->next ;
+    }
+    put_page(pl->page) ;
+    reiserfs_kfree(pl, sizeof(struct reiserfs_page_list), inode->i_sb) ;
+    inode->u.reiserfs_i.i_converted_page = NULL ;
+    inode->u.reiserfs_i.i_conversion_trans_id = 0 ;
+  }
+  return 0 ;
+}
+
+/*
+** Called after a direct to indirect transaction.  The unformatted node
+** must be flushed to disk before the transaction commits, otherwise, we
+** risk losing the data from the direct item.  This adds the page
+** containing the unformatted node to a list of pages that need flushing.
+**
+** it calls get_page(page), so the page won't disappear until we've
+** flushed or removed it from our list.
+**
+** pointers to the reiserfs_page_list struct are stored in the inode, 
+** so this page can be quickly removed from the list after the tail is
+** converted back into a direct item.
+**
+** If we fail to find the memory for the reiserfs_page_list struct, we
+** just sync the page now.  Not good, but safe.
+**
+** since this must be called with the page locked, we always set
+** the do_not_lock field in the page_list struct we allocate
+**
+*/
+int reiserfs_add_page_to_flush_list(struct reiserfs_transaction_handle *th, 
+                                    struct inode *inode,
+				    struct buffer_head *bh) {
+  struct reiserfs_page_list *new_pl ;
+
+/* debugging use ONLY.  Do not define this on data you care about. */
+#ifdef REISERFS_NO_FLUSH_AFTER_CONVERT
+  return 0 ;
+#endif
+
+  get_page(bh->b_page) ;
+  new_pl = reiserfs_kmalloc(sizeof(struct reiserfs_page_list), GFP_BUFFER,
+                            inode->i_sb) ;
+  if (!new_pl) {
+    put_page(bh->b_page) ;
+    reiserfs_warning("journal-2480: forced to flush page, out of memory\n") ;
+    ll_rw_block(WRITE, 1, &bh) ;
+    wait_on_buffer(bh) ;
+    if (!buffer_uptodate(bh)) {
+      reiserfs_panic(inode->i_sb, "journal-2484: error writing buffer %lu to disk\n", bh->b_blocknr) ;
+    }
+    inode->u.reiserfs_i.i_converted_page = NULL ;
+    return 0 ;
+  }
+
+  new_pl->page = bh->b_page ;
+  new_pl->do_not_lock = 1 ;
+  new_pl->blocknr = bh->b_blocknr ;
+  new_pl->next = SB_JOURNAL(inode->i_sb)->j_flush_pages; 
+  if (new_pl->next) {
+    new_pl->next->prev = new_pl ;
+  }
+  new_pl->prev = NULL ;
+  SB_JOURNAL(inode->i_sb)->j_flush_pages = new_pl ;
+  
+  /* if we have numbers from an old transaction, zero the converted
+  ** page, it has already been flushed and freed
+  */
+  if (inode->u.reiserfs_i.i_conversion_trans_id &&
+      inode->u.reiserfs_i.i_conversion_trans_id < th->t_trans_id) {
+    inode->u.reiserfs_i.i_converted_page = NULL ;
+  }
+  if (inode->u.reiserfs_i.i_converted_page) {
+    reiserfs_panic(inode->i_sb, "journal-2501: inode already had a converted page\n") ;
+  }
+  inode->u.reiserfs_i.i_converted_page = new_pl ;
+  inode->u.reiserfs_i.i_conversion_trans_id = th->t_trans_id ;
+  return 0 ;
+}
+
+/*
+** long and ugly.  If flush, will not return until all commit
+** blocks and all real buffers in the trans are on disk.
+** If no_async, won't return until all commit blocks are on disk.
+**
+** keep reading, there are comments as you go along
+*/
+static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_block  * p_s_sb, unsigned long nblocks, 
+		          int flags) {
+  struct reiserfs_journal_cnode *cn, *next, *jl_cn; 
+  struct reiserfs_journal_cnode *last_cn = NULL;
+  struct reiserfs_journal_desc *desc ; 
+  struct reiserfs_journal_commit *commit ; 
+  struct buffer_head *c_bh ; /* commit bh */
+  struct buffer_head *d_bh ; /* desc bh */
+  int cur_write_start = 0 ; /* start index of current log write */
+  int cur_blocks_left = 0 ; /* number of journal blocks left to write */
+  int old_start ;
+  int i ;
+  int jindex ;
+  int orig_jindex ;
+  int flush = flags & FLUSH_ALL ;
+  int commit_now = flags & COMMIT_NOW ;
+  int wait_on_commit = flags & WAIT ;
+  struct reiserfs_super_block *rs ; 
+
+  if (reiserfs_dont_log(th->t_super)) {
+    return 0 ;
+  }
+
+  lock_journal(p_s_sb) ;
+  if (SB_JOURNAL(p_s_sb)->j_next_full_flush) {
+    flags |= FLUSH_ALL ;
+    flush = 1 ;
+  }
+  if (SB_JOURNAL(p_s_sb)->j_next_async_flush) {
+    flags |= COMMIT_NOW ;
+    commit_now = 1 ;
+  }
+
+  /* check_journal_end locks the journal, and unlocks if it does not return 1 
+  ** it tells us if we should continue with the journal_end, or just return
+  */
+  if (!check_journal_end(th, p_s_sb, nblocks, flags)) {
+    return 0 ;
+  }
+
+  /* check_journal_end might set these, check again */
+  if (SB_JOURNAL(p_s_sb)->j_next_full_flush) {
+    flush = 1 ;
+  }
+  if (SB_JOURNAL(p_s_sb)->j_next_async_flush) {
+    commit_now = 1 ;
+  }
+  /*
+  ** j must wait means we have to flush the log blocks, and the real blocks for
+  ** this transaction
+  */
+  if (SB_JOURNAL(p_s_sb)->j_must_wait > 0) {
+    flush = 1 ;
+  }
+
+  rs = SB_DISK_SUPER_BLOCK(p_s_sb) ;
+  /* setup description block */
+  d_bh = getblk(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + SB_JOURNAL(p_s_sb)->j_start, p_s_sb->s_blocksize) ; 
+  mark_buffer_uptodate(d_bh, 1) ;
+  desc = (struct reiserfs_journal_desc *)(d_bh)->b_data ;
+  memset(desc, 0, sizeof(struct reiserfs_journal_desc)) ;
+  memcpy(desc->j_magic, JOURNAL_DESC_MAGIC, 8) ;
+  desc->j_trans_id = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_trans_id) ;
+
+  /* setup commit block.  Don't write (keep it clean too) this one until after everyone else is written */
+  c_bh = getblk(p_s_sb->s_dev,  reiserfs_get_journal_block(p_s_sb) + 
+  				        ((SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL(p_s_sb)->j_len + 1) % JOURNAL_BLOCK_COUNT), 
+					 p_s_sb->s_blocksize) ;
+  commit = (struct reiserfs_journal_commit *)c_bh->b_data ;
+  memset(commit, 0, sizeof(struct reiserfs_journal_commit)) ;
+  commit->j_trans_id = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_trans_id) ;
+  mark_buffer_uptodate(c_bh, 1) ;
+
+  /* init this journal list */
+  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_older_commits_done), 0) ;
+  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ;
+  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_timestamp = SB_JOURNAL(p_s_sb)->j_trans_start_time ;
+  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_bh = c_bh ;
+  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_start = SB_JOURNAL(p_s_sb)->j_start ;
+  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_len = SB_JOURNAL(p_s_sb)->j_len ;  
+  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_nonzerolen), SB_JOURNAL(p_s_sb)->j_len) ;
+  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_left), SB_JOURNAL(p_s_sb)->j_len + 2);
+  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_realblock = NULL ;
+  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 1) ;
+  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 1) ;
+
+  /* which is faster, locking/unlocking at the start and end of the for
+  ** or locking once per iteration around the insert_journal_hash?
+  ** eitherway, we are write locking insert_journal_hash.  The ENTIRE FOR
+  ** LOOP MUST not cause schedule to occur.
+  */
+
+  /* for each real block, add it to the journal list hash,
+  ** copy into real block index array in the commit or desc block
+  */
+  for (i = 0, cn = SB_JOURNAL(p_s_sb)->j_first ; cn ; cn = cn->next, i++) {
+    if (test_bit(BH_JDirty, &cn->bh->b_state) ) {
+      jl_cn = get_cnode(p_s_sb) ;
+      if (!jl_cn) {
+        reiserfs_panic(p_s_sb, "journal-1676, get_cnode returned NULL\n") ;
+      }
+      if (i == 0) {
+        SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_realblock = jl_cn ;
+      }
+      jl_cn->prev = last_cn ;
+      jl_cn->next = NULL ;
+      if (last_cn) {
+        last_cn->next = jl_cn ;
+      }
+      last_cn = jl_cn ;
+      if (cn->bh->b_blocknr >= reiserfs_get_journal_block(p_s_sb) &&
+          cn->bh->b_blocknr < (reiserfs_get_journal_block(p_s_sb) + JOURNAL_BLOCK_COUNT)) {
+        reiserfs_panic(p_s_sb, "journal-2332: Trying to log block %lu, which is a log block\n", cn->bh->b_blocknr) ;
+      }
+      jl_cn->blocknr = cn->bh->b_blocknr ; 
+      jl_cn->state = 0 ;
+      jl_cn->dev = cn->bh->b_dev ; 
+      jl_cn->bh = cn->bh ;
+      jl_cn->jlist = SB_JOURNAL_LIST(p_s_sb) + SB_JOURNAL_LIST_INDEX(p_s_sb) ;
+      insert_journal_hash(SB_JOURNAL(p_s_sb)->j_list_hash_table, jl_cn) ; 
+      if (i < JOURNAL_TRANS_HALF) {
+	desc->j_realblock[i] = cpu_to_le32(cn->bh->b_blocknr) ;
+      } else {
+	commit->j_realblock[i - JOURNAL_TRANS_HALF] = cpu_to_le32(cn->bh->b_blocknr) ;
+      }
+    } else {
+      i-- ;
+    }
+  }
+
+  desc->j_len = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_len)  ;
+  desc->j_mount_id = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_mount_id) ;
+  desc->j_trans_id = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_trans_id) ;
+  commit->j_len = cpu_to_le32(SB_JOURNAL(p_s_sb)->j_len) ;
+
+  /* special check in case all buffers in the journal were marked for not logging */
+  if (SB_JOURNAL(p_s_sb)->j_len == 0) {
+    brelse(d_bh) ;
+    brelse(c_bh) ;
+    unlock_journal(p_s_sb) ;
+printk("journal-2020: do_journal_end: BAD desc->j_len is ZERO\n") ;
+    atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ;
+    wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
+    return 0 ;
+  }
+
+  /* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
+  cur_write_start = SB_JOURNAL(p_s_sb)->j_start ;
+  cur_blocks_left = SB_JOURNAL(p_s_sb)->j_len  ;
+  cn = SB_JOURNAL(p_s_sb)->j_first ;
+  jindex = 1 ; /* start at one so we don't get the desc again */
+  while(cur_blocks_left > 0) {
+    /* copy all the real blocks into log area.  dirty log blocks */
+    if (test_bit(BH_JDirty, &cn->bh->b_state)) {
+      struct buffer_head *tmp_bh ;
+      tmp_bh = getblk(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + 
+		     ((cur_write_start + jindex) % JOURNAL_BLOCK_COUNT), 
+				       p_s_sb->s_blocksize) ;
+      mark_buffer_uptodate(tmp_bh, 1) ;
+      memcpy(tmp_bh->b_data, cn->bh->b_data, cn->bh->b_size) ;  
+      jindex++ ;
+    } else {
+      /* JDirty cleared sometime during transaction.  don't log this one */
+      printk("journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!\n") ;
+    }
+    cn = cn->next ;
+    cur_blocks_left-- ;
+  }
+
+  /* we are done  with both the c_bh and d_bh, but
+  ** c_bh must be written after all other commit blocks,
+  ** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
+  */
+
+  /* now loop through and mark all buffers from this transaction as JDirty_wait
+  ** clear the JDirty bit, clear BH_JNew too.  
+  ** if they weren't JDirty, they weren't logged, just relse them and move on
+  */
+  cn = SB_JOURNAL(p_s_sb)->j_first ; 
+  while(cn) {
+    clear_bit(BH_JNew, &(cn->bh->b_state)) ;
+    if (test_bit(BH_JDirty, &(cn->bh->b_state))) {
+      set_bit(BH_JDirty_wait, &(cn->bh->b_state)) ; 
+      clear_bit(BH_JDirty, &(cn->bh->b_state)) ;
+    } else {
+      brelse(cn->bh) ;
+    }
+    next = cn->next ;
+    free_cnode(p_s_sb, cn) ;
+    cn = next ;
+  }
+
+  /* unlock the journal list for committing and flushing */
+  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 0) ;
+  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 0) ;
+
+  orig_jindex = SB_JOURNAL_LIST_INDEX(p_s_sb) ;
+  jindex = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ; 
+  SB_JOURNAL_LIST_INDEX(p_s_sb) = jindex ;
+
+  /* make sure to flush any data converted from direct items to
+  ** indirect items before allowing the commit blocks to reach the
+  ** disk
+  */
+  flush_pages_before_commit(th, p_s_sb) ;
+
+  /* honor the flush and async wishes from the caller */
+  if (flush) {
+  
+    flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
+    flush_journal_list(p_s_sb,  SB_JOURNAL_LIST(p_s_sb) + orig_jindex , 1) ;  
+  } else if (commit_now) {
+    if (wait_on_commit) {
+      flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
+    } else {
+      commit_flush_async(p_s_sb, orig_jindex) ; 
+    }
+  }
+
+  /* reset journal values for the next transaction */
+  old_start = SB_JOURNAL(p_s_sb)->j_start ;
+  SB_JOURNAL(p_s_sb)->j_start = (SB_JOURNAL(p_s_sb)->j_start + SB_JOURNAL(p_s_sb)->j_len + 2) % JOURNAL_BLOCK_COUNT;
+  atomic_set(&(SB_JOURNAL(p_s_sb)->j_wcount), 0) ;
+  SB_JOURNAL(p_s_sb)->j_bcount = 0 ;
+  SB_JOURNAL(p_s_sb)->j_last = NULL ;
+  SB_JOURNAL(p_s_sb)->j_first = NULL ;
+  SB_JOURNAL(p_s_sb)->j_len = 0 ;
+  SB_JOURNAL(p_s_sb)->j_trans_start_time = 0 ;
+  SB_JOURNAL(p_s_sb)->j_trans_id++ ;
+  SB_JOURNAL(p_s_sb)->j_must_wait = 0 ;
+  SB_JOURNAL(p_s_sb)->j_len_alloc = 0 ;
+  SB_JOURNAL(p_s_sb)->j_next_full_flush = 0 ;
+  SB_JOURNAL(p_s_sb)->j_next_async_flush = 0 ;
+  init_journal_hash(p_s_sb) ; 
+
+  /* if the next transaction has any chance of wrapping, flush 
+  ** transactions that might get overwritten.  If any journal lists are very 
+  ** old flush them as well.  
+  */
+  for (i = 0 ; i < JOURNAL_LIST_COUNT ; i++) {
+    jindex = i ;
+    if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && SB_JOURNAL(p_s_sb)->j_start <= SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) {
+      if ((SB_JOURNAL(p_s_sb)->j_start + JOURNAL_TRANS_MAX + 1) >= SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) {
+	flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1) ; 
+      }
+    } else if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && 
+              (SB_JOURNAL(p_s_sb)->j_start + JOURNAL_TRANS_MAX + 1) > JOURNAL_BLOCK_COUNT) {
+      if (((SB_JOURNAL(p_s_sb)->j_start + JOURNAL_TRANS_MAX + 1) % JOURNAL_BLOCK_COUNT) >= 
+            SB_JOURNAL_LIST(p_s_sb)[jindex].j_start) {
+	flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1 ) ; 
+      }
+    } 
+    /* this check should always be run, to send old lists to disk */
+    if (SB_JOURNAL_LIST(p_s_sb)[jindex].j_len > 0 && 
+              SB_JOURNAL_LIST(p_s_sb)[jindex].j_timestamp < 
+	      (CURRENT_TIME - (JOURNAL_MAX_TRANS_AGE * 4))) {
+	flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + jindex, 1 ) ; 
+    }
+  }
+
+  /* if the next journal_list is still in use, flush it */
+  if (SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_len != 0) {
+    flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + SB_JOURNAL_LIST_INDEX(p_s_sb), 1) ; 
+  }
+
+  /* we don't want anyone flushing the new transaction's list */
+  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_commit_flushing), 1) ;
+  atomic_set(&(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_flushing), 1) ;
+  SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap = get_list_bitmap(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + 
+											 SB_JOURNAL_LIST_INDEX(p_s_sb)) ;
+
+  if (!(SB_JOURNAL_LIST(p_s_sb)[SB_JOURNAL_LIST_INDEX(p_s_sb)].j_list_bitmap)) {
+    reiserfs_panic(p_s_sb, "journal-1996: do_journal_end, could not get a list bitmap\n") ;
+  }
+  unlock_journal(p_s_sb) ;
+  atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ;
+  /* wake up any body waiting to join. */
+  wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
+  return 0 ;
+}
+
+
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/lbalance.c linux/fs/reiserfs/lbalance.c
--- v2.4.0/linux/fs/reiserfs/lbalance.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/lbalance.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,1326 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/reiserfs_fs.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+/* these are used in do_balance.c */
+
+/* leaf_move_items
+   leaf_shift_left
+   leaf_shift_right
+   leaf_delete_items
+   leaf_insert_into_buf
+   leaf_paste_in_buffer
+   leaf_cut_from_buffer
+   leaf_paste_entries
+   */
+
+
+/* copy copy_count entries from source directory item to dest buffer (creating new item if needed) */
+static void leaf_copy_dir_entries (struct buffer_info * dest_bi, struct buffer_head * source, 
+				   int last_first, int item_num, int from, int copy_count)
+{
+    struct buffer_head * dest = dest_bi->bi_bh;
+    int item_num_in_dest;		/* either the number of target item,
+					   or if we must create a new item,
+					   the number of the item we will
+					   create it next to */
+    struct item_head * ih;
+    struct reiserfs_de_head * deh;
+    int copy_records_len;			/* length of all records in item to be copied */
+    char * records;
+
+    ih = B_N_PITEM_HEAD (source, item_num);
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (!is_direntry_le_ih (ih))
+	reiserfs_panic(0, "vs-10000: leaf_copy_dir_entries: item must be directory item");
+#endif
+
+    /* length of all record to be copied and first byte of the last of them */
+    deh = B_I_DEH (source, ih);
+    if (copy_count) {
+	copy_records_len = (from ? deh[from - 1].deh_location : ih->ih_item_len) - 
+	    deh[from + copy_count - 1].deh_location;
+	records = source->b_data + ih->ih_item_location + deh[from + copy_count - 1].deh_location;
+    } else {
+	copy_records_len = 0;
+	records = 0;
+    }
+
+    /* when copy last to first, dest buffer can contain 0 items */
+    item_num_in_dest = (last_first == LAST_TO_FIRST) ? (( B_NR_ITEMS(dest) ) ? 0 : -1) : (B_NR_ITEMS(dest) - 1);
+
+    /* if there are no items in dest or the first/last item in dest is not item of the same directory */
+    if ( (item_num_in_dest == - 1) ||
+#ifdef REISERFS_FSCK
+	 (last_first == FIRST_TO_LAST && are_items_mergeable (B_N_PITEM_HEAD (dest, item_num_in_dest), ih, dest->b_size) == 0) ||
+	 (last_first == LAST_TO_FIRST && are_items_mergeable (ih, B_N_PITEM_HEAD (dest, item_num_in_dest), dest->b_size) == 0)) {
+#else
+	(last_first == FIRST_TO_LAST && le_key_k_offset (ih_version (ih), &(ih->ih_key)) == DOT_OFFSET) ||
+	    (last_first == LAST_TO_FIRST && comp_short_le_keys/*COMP_SHORT_KEYS*/ (&ih->ih_key, B_N_PKEY (dest, item_num_in_dest)))) {
+#endif
+	/* create new item in dest */
+	struct item_head new_ih;
+
+	/* form item header */
+	memcpy (&new_ih.ih_key, &ih->ih_key, KEY_SIZE);
+	new_ih.ih_version = cpu_to_le16 (ITEM_VERSION_1);
+	/* calculate item len */
+	new_ih.ih_item_len = cpu_to_le16 (DEH_SIZE * copy_count + copy_records_len);
+	I_ENTRY_COUNT(&new_ih) = 0;
+    
+	if (last_first == LAST_TO_FIRST) {
+	    /* form key by the following way */
+	    if (from < I_ENTRY_COUNT(ih)) {
+		set_le_ih_k_offset (&new_ih, cpu_to_le32 (le32_to_cpu (deh[from].deh_offset)));
+		/*memcpy (&new_ih.ih_key.k_offset, &deh[from].deh_offset, SHORT_KEY_SIZE);*/
+	    } else {
+		/* no entries will be copied to this item in this function */
+		set_le_ih_k_offset (&new_ih, cpu_to_le32 (U32_MAX));
+		/* this item is not yet valid, but we want I_IS_DIRECTORY_ITEM to return 1 for it, so we -1 */
+	    }
+	    set_le_key_k_type (ITEM_VERSION_1, &(new_ih.ih_key), TYPE_DIRENTRY);
+	}
+    
+	/* insert item into dest buffer */
+	leaf_insert_into_buf (dest_bi, (last_first == LAST_TO_FIRST) ? 0 : B_NR_ITEMS(dest), &new_ih, NULL, 0);
+    } else {
+	/* prepare space for entries */
+	leaf_paste_in_buffer (dest_bi, (last_first==FIRST_TO_LAST) ? (B_NR_ITEMS(dest) - 1) : 0, MAX_US_INT,
+			      DEH_SIZE * copy_count + copy_records_len, records, 0
+	    );
+    }
+  
+    item_num_in_dest = (last_first == FIRST_TO_LAST) ? (B_NR_ITEMS(dest)-1) : 0;
+    
+    leaf_paste_entries (dest_bi->bi_bh, item_num_in_dest,
+			(last_first == FIRST_TO_LAST) ? I_ENTRY_COUNT(B_N_PITEM_HEAD (dest, item_num_in_dest)) : 0,
+			copy_count, deh + from, records,
+			DEH_SIZE * copy_count + copy_records_len
+	);
+}
+
+
+/* Copy the first (if last_first == FIRST_TO_LAST) or last (last_first == LAST_TO_FIRST) item or 
+   part of it or nothing (see the return 0 below) from SOURCE to the end 
+   (if last_first) or beginning (!last_first) of the DEST */
+/* returns 1 if anything was copied, else 0 */
+static int leaf_copy_boundary_item (struct buffer_info * dest_bi, struct buffer_head * src, int last_first,
+				    int bytes_or_entries)
+{
+  struct buffer_head * dest = dest_bi->bi_bh;
+  int dest_nr_item, src_nr_item; /* number of items in the source and destination buffers */
+  struct item_head * ih;
+  struct item_head * dih;
+  
+  dest_nr_item = B_NR_ITEMS(dest);
+  
+  if ( last_first == FIRST_TO_LAST ) {
+    /* if ( DEST is empty or first item of SOURCE and last item of DEST are the items of different objects
+       or of different types ) then there is no need to treat this item differently from the other items
+       that we copy, so we return */
+    ih = B_N_PITEM_HEAD (src, 0);
+    dih = B_N_PITEM_HEAD (dest, dest_nr_item - 1);
+#ifdef REISERFS_FSCK
+    if (!dest_nr_item || (are_items_mergeable (dih, ih, src->b_size) == 0))
+#else
+    if (!dest_nr_item || (!op_is_left_mergeable (&(ih->ih_key), src->b_size)))
+#endif
+      /* there is nothing to merge */
+      return 0;
+      
+#ifdef CONFIG_REISERFS_CHECK
+    if ( ! ih->ih_item_len )
+      reiserfs_panic (0, "vs-10010: leaf_copy_boundary_item: item can not have empty dynamic length");
+#endif
+      
+    if ( is_direntry_le_ih (ih) ) {
+      if ( bytes_or_entries == -1 )
+	/* copy all entries to dest */
+	bytes_or_entries = le16_to_cpu (ih->u.ih_entry_count);
+      leaf_copy_dir_entries (dest_bi, src, FIRST_TO_LAST, 0, 0, bytes_or_entries);
+      return 1;
+    }
+      
+    /* copy part of the body of the first item of SOURCE to the end of the body of the last item of the DEST
+       part defined by 'bytes_or_entries'; if bytes_or_entries == -1 copy whole body; don't create new item header
+       */
+    if ( bytes_or_entries == -1 )
+      bytes_or_entries = le16_to_cpu (ih->ih_item_len);
+
+#ifdef CONFIG_REISERFS_CHECK
+    else {
+      if (bytes_or_entries == le16_to_cpu (ih->ih_item_len) && is_indirect_le_ih(ih))
+	if (get_ih_free_space (ih))
+	  reiserfs_panic (0, "vs-10020: leaf_copy_boundary_item: "
+			  "last unformatted node must be filled entirely (%h)",
+			  ih);
+    }
+#endif
+      
+    /* merge first item (or its part) of src buffer with the last
+       item of dest buffer. Both are of the same file */
+    leaf_paste_in_buffer (dest_bi,
+			  dest_nr_item - 1, dih->ih_item_len, bytes_or_entries, B_I_PITEM(src,ih), 0
+			  );
+      
+    if (is_indirect_le_ih (dih)) {
+#ifdef CONFIG_REISERFS_CHECK
+      if (get_ih_free_space (dih))
+	reiserfs_panic (0, "vs-10030: leaf_copy_boundary_item: " 
+			"merge to left: last unformatted node of non-last indirect item %h must have zerto free space",
+			ih);
+#endif
+      if (bytes_or_entries == le16_to_cpu (ih->ih_item_len))
+	set_ih_free_space (dih, get_ih_free_space (ih));
+    }
+    
+    return 1;
+  }
+  
+
+  /* copy boundary item to right (last_first == LAST_TO_FIRST) */
+
+  /* ( DEST is empty or last item of SOURCE and first item of DEST
+     are the items of different object or of different types )
+     */
+  src_nr_item = B_NR_ITEMS (src);
+  ih = B_N_PITEM_HEAD (src, src_nr_item - 1);
+  dih = B_N_PITEM_HEAD (dest, 0);
+
+#ifdef REISERFS_FSCK
+  if (!dest_nr_item || are_items_mergeable (ih, dih, src->b_size) == 0)
+#else
+  if (!dest_nr_item || !op_is_left_mergeable (&(dih->ih_key), src->b_size))
+#endif
+    return 0;
+  
+  if ( is_direntry_le_ih (ih)) {
+    if ( bytes_or_entries == -1 )
+      /* bytes_or_entries = entries number in last item body of SOURCE */
+      bytes_or_entries = le16_to_cpu (ih->u.ih_entry_count);
+    
+    leaf_copy_dir_entries (dest_bi, src, LAST_TO_FIRST, src_nr_item - 1, le16_to_cpu (ih->u.ih_entry_count) - bytes_or_entries, bytes_or_entries);
+    return 1;
+  }
+
+  /* copy part of the body of the last item of SOURCE to the begin of the body of the first item of the DEST;
+     part defined by 'bytes_or_entries'; if byte_or_entriess == -1 copy whole body; change first item key of the DEST;
+     don't create new item header
+     */
+  
+#ifdef CONFIG_REISERFS_CHECK  
+  if (is_indirect_le_ih(ih) && get_ih_free_space (ih))
+    reiserfs_panic (0, "vs-10040: leaf_copy_boundary_item: " 
+		    "merge to right: last unformatted node of non-last indirect item must be filled entirely (%h)",
+		    ih);
+#endif
+
+  if ( bytes_or_entries == -1 ) {
+    /* bytes_or_entries = length of last item body of SOURCE */
+    bytes_or_entries = ih->ih_item_len;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (le_ih_k_offset (dih) != le_ih_k_offset (ih) + op_bytes_number (ih, src->b_size))
+      reiserfs_panic (0, "vs-10050: leaf_copy_boundary_item: items %h and %h do not match", ih, dih);
+#endif
+
+    /* change first item key of the DEST */
+    set_le_ih_k_offset (dih, le_ih_k_offset (ih));
+
+    /* item becomes non-mergeable */
+    /* or mergeable if left item was */
+    set_le_ih_k_type (dih, le_ih_k_type (ih));
+  } else {
+    /* merge to right only part of item */
+#ifdef CONFIG_REISERFS_CHECK
+    if ( le16_to_cpu (ih->ih_item_len) <= bytes_or_entries )
+      reiserfs_panic (0, "vs-10060: leaf_copy_boundary_item: no so much bytes %lu (needed %lu)",
+		      ih->ih_item_len, bytes_or_entries);
+#endif
+    
+    /* change first item key of the DEST */
+    if ( is_direct_le_ih (dih) ) {
+#ifdef CONFIG_REISERFS_CHECK
+      if (le_ih_k_offset (dih) <= (unsigned long)bytes_or_entries)
+	reiserfs_panic (0, "vs-10070: leaf_copy_boundary_item: dih %h, bytes_or_entries(%d)", 
+			dih, bytes_or_entries);
+#endif
+      set_le_ih_k_offset (dih, le_ih_k_offset (dih) - bytes_or_entries);
+    } else {
+#ifdef CONFIG_REISERFS_CHECK
+      if (le_ih_k_offset (dih) <= (bytes_or_entries / UNFM_P_SIZE) * dest->b_size )
+	reiserfs_panic (0, "vs-10080: leaf_copy_boundary_item: dih %h, bytes_or_entries(%d)",
+			dih, (bytes_or_entries/UNFM_P_SIZE)*dest->b_size);
+#endif
+      set_le_ih_k_offset (dih, le_ih_k_offset (dih) - ((bytes_or_entries / UNFM_P_SIZE) * dest->b_size));
+    }
+  }
+  
+  leaf_paste_in_buffer (dest_bi, 0, 0, bytes_or_entries, B_I_PITEM(src,ih) + ih->ih_item_len - bytes_or_entries, 0);
+  return 1;
+}
+
+
+/* copy cpy_mun items from buffer src to buffer dest
+ * last_first == FIRST_TO_LAST means, that we copy cpy_num  items beginning from first-th item in src to tail of dest
+ * last_first == LAST_TO_FIRST means, that we copy cpy_num  items beginning from first-th item in src to head of dest
+ */
+static void leaf_copy_items_entirely (struct buffer_info * dest_bi, struct buffer_head * src, int last_first,
+				      int first, int cpy_num)
+{
+    struct buffer_head * dest;
+    int nr;
+    int dest_before;
+    int last_loc, last_inserted_loc, location;
+    int i, j;
+    struct block_head * blkh;
+    struct item_head * ih;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (last_first != LAST_TO_FIRST  && last_first != FIRST_TO_LAST) 
+	reiserfs_panic (0, "vs-10090: leaf_copy_items_entirely: bad last_first parameter %d", last_first);
+
+    if (B_NR_ITEMS (src) - first < cpy_num)
+	reiserfs_panic (0, "vs-10100: leaf_copy_items_entirely: too few items in source %d, required %d from %d",
+			B_NR_ITEMS(src), cpy_num, first);
+
+    if (cpy_num < 0)
+	reiserfs_panic (0, "vs-10110: leaf_copy_items_entirely: can not copy negative amount of items");
+
+    if ( ! dest_bi )
+	reiserfs_panic (0, "vs-10120: leaf_copy_items_entirely: can not copy negative amount of items");
+#endif
+
+    dest = dest_bi->bi_bh;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( ! dest )
+	reiserfs_panic (0, "vs-10130: leaf_copy_items_entirely: can not copy negative amount of items");
+#endif
+
+    if (cpy_num == 0)
+	return;
+
+    nr = le16_to_cpu ((blkh = B_BLK_HEAD(dest))->blk_nr_item);
+  
+    /* we will insert items before 0-th or nr-th item in dest buffer. It depends of last_first parameter */
+    dest_before = (last_first == LAST_TO_FIRST) ? 0 : nr;
+
+    /* location of head of first new item */
+    ih = B_N_PITEM_HEAD (dest, dest_before);
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (le16_to_cpu (blkh->blk_free_space) < cpy_num * IH_SIZE) {
+	reiserfs_panic (0, "vs-10140: leaf_copy_items_entirely: "
+			"not enough free space for headers %d (needed %d)",
+			B_FREE_SPACE (dest), cpy_num * IH_SIZE);
+    }
+#endif
+
+    /* prepare space for headers */
+    memmove (ih + cpy_num, ih, (nr-dest_before) * IH_SIZE);
+
+    /* copy item headers */
+    memcpy (ih, B_N_PITEM_HEAD (src, first), cpy_num * IH_SIZE);
+
+    blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - IH_SIZE * cpy_num);
+
+    /* location of unmovable item */
+    j = location = (dest_before == 0) ? dest->b_size : (ih-1)->ih_item_location;
+    for (i = dest_before; i < nr + cpy_num; i ++)
+	ih[i-dest_before].ih_item_location =
+	    (location -= ih[i-dest_before].ih_item_len);
+
+    /* prepare space for items */
+    last_loc = ih[nr+cpy_num-1-dest_before].ih_item_location;
+    last_inserted_loc = ih[cpy_num-1].ih_item_location;
+
+    /* check free space */
+#ifdef CONFIG_REISERFS_CHECK
+    if (le16_to_cpu (blkh->blk_free_space) < j - last_inserted_loc) {
+	reiserfs_panic (0, "vs-10150: leaf_copy_items_entirely: not enough free space for items %d (needed %d)",
+			le16_to_cpu (blkh->blk_free_space), j - last_inserted_loc);
+    }
+#endif
+
+    memmove (dest->b_data + last_loc,
+	     dest->b_data + last_loc + j - last_inserted_loc,
+	     last_inserted_loc - last_loc);
+
+    /* copy items */
+    memcpy (dest->b_data + last_inserted_loc, B_N_PITEM(src,(first + cpy_num - 1)),
+	    j - last_inserted_loc);
+
+    /* sizes, item number */
+    blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) + cpy_num);
+    blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - (j - last_inserted_loc));
+
+    do_balance_mark_leaf_dirty (dest_bi->tb, dest, 0);
+
+    if (dest_bi->bi_parent) {
+#ifdef CONFIG_REISERFS_CHECK
+	if (B_N_CHILD (dest_bi->bi_parent, dest_bi->bi_position)->dc_block_number != dest->b_blocknr) {
+	    reiserfs_panic (0, "vs-10160: leaf_copy_items_entirely: "
+			    "block number in bh does not match to field in disk_child structure %lu and %lu",
+			    dest->b_blocknr, B_N_CHILD (dest_bi->bi_parent, dest_bi->bi_position)->dc_block_number);
+	}
+#endif
+	B_N_CHILD (dest_bi->bi_parent, dest_bi->bi_position)->dc_size +=
+	    j - last_inserted_loc + IH_SIZE * cpy_num;
+    
+	do_balance_mark_internal_dirty (dest_bi->tb, dest_bi->bi_parent, 0);
+    }
+}
+
+
+/* This function splits the (liquid) item into two items (useful when
+   shifting part of an item into another node.) */
+static void leaf_item_bottle (struct buffer_info * dest_bi, struct buffer_head * src, int last_first,
+			      int item_num, int cpy_bytes)
+{
+    struct buffer_head * dest = dest_bi->bi_bh;
+    struct item_head * ih;
+  
+#ifdef CONFIG_REISERFS_CHECK  
+    if ( cpy_bytes == -1 ) 
+	reiserfs_panic (0, "vs-10170: leaf_item_bottle: bytes == - 1 means: do not split item");
+#endif
+
+    if ( last_first == FIRST_TO_LAST ) {
+	/* if ( if item in position item_num in buffer SOURCE is directory item ) */
+	if (is_direntry_le_ih (ih = B_N_PITEM_HEAD(src,item_num)))
+	    leaf_copy_dir_entries (dest_bi, src, FIRST_TO_LAST, item_num, 0, cpy_bytes);
+	else {
+	    struct item_head n_ih;
+      
+	    /* copy part of the body of the item number 'item_num' of SOURCE to the end of the DEST 
+	       part defined by 'cpy_bytes'; create new item header; change old item_header (????);
+	       n_ih = new item_header;
+	    */
+	    memcpy (&n_ih, ih, IH_SIZE);
+	    n_ih.ih_item_len = cpu_to_le16 (cpy_bytes);
+	    if (is_indirect_le_ih (ih)) {
+#ifdef CONFIG_REISERFS_CHECK
+		if (cpy_bytes == le16_to_cpu (ih->ih_item_len) && get_ih_free_space (ih))
+		    reiserfs_panic (0, "vs-10180: leaf_item_bottle: " 
+				    "when whole indirect item is bottle to left neighbor, it must have free_space==0 (not %lu)",
+				    get_ih_free_space (ih));
+#endif
+		set_ih_free_space (&n_ih, 0);
+	    }
+
+#ifdef CONFIG_REISERFS_CHECK
+	    if (op_is_left_mergeable (&(ih->ih_key), src->b_size))
+		reiserfs_panic (0, "vs-10190: leaf_item_bottle: bad mergeability of item %h", ih);
+#endif
+	    n_ih.ih_version = ih->ih_version;;
+	    leaf_insert_into_buf (dest_bi, B_NR_ITEMS(dest), &n_ih, B_N_PITEM (src, item_num), 0);
+	}
+    } else {
+	/*  if ( if item in position item_num in buffer SOURCE is directory item ) */
+	if (is_direntry_le_ih(ih = B_N_PITEM_HEAD (src, item_num)))
+	    leaf_copy_dir_entries (dest_bi, src, LAST_TO_FIRST, item_num, I_ENTRY_COUNT(ih) - cpy_bytes, cpy_bytes);
+	else {
+	    struct item_head n_ih;
+      
+	    /* copy part of the body of the item number 'item_num' of SOURCE to the begin of the DEST 
+	       part defined by 'cpy_bytes'; create new item header;
+	       n_ih = new item_header;
+	    */
+	    memcpy (&n_ih, ih, SHORT_KEY_SIZE);
+	    n_ih.ih_version = cpu_to_le16 (ih_version (ih));
+	    if (is_direct_le_ih (ih)) {
+		set_le_ih_k_offset (&n_ih, le_ih_k_offset (ih) + le16_to_cpu (ih->ih_item_len) - cpy_bytes);
+		set_le_ih_k_type (&n_ih, TYPE_DIRECT);
+		set_ih_free_space (&n_ih, MAX_US_INT);
+	    } else {
+		/* indirect item */
+#ifdef CONFIG_REISERFS_CHECK
+		if (!cpy_bytes && get_ih_free_space (ih))
+		    reiserfs_panic (0, "vs-10200: leaf_item_bottle: ih->ih_free_space must be 0 when indirect item will be appended");
+#endif
+		set_le_ih_k_offset (&n_ih, le_ih_k_offset (ih) + (le16_to_cpu (ih->ih_item_len) - cpy_bytes) / UNFM_P_SIZE * dest->b_size);
+		set_le_ih_k_type (&n_ih, TYPE_INDIRECT);
+		set_ih_free_space (&n_ih, get_ih_free_space (ih));
+	    }
+      
+	    /* set item length */
+	    n_ih.ih_item_len = cpu_to_le16 (cpy_bytes);
+	    n_ih.ih_version = cpu_to_le16 (le16_to_cpu (ih->ih_version));
+	    leaf_insert_into_buf (dest_bi, 0, &n_ih, B_N_PITEM(src,item_num) + le16_to_cpu (ih->ih_item_len) - cpy_bytes, 0);
+	}
+    }
+}
+
+
+/* If cpy_bytes equals minus one than copy cpy_num whole items from SOURCE to DEST.
+   If cpy_bytes not equal to minus one than copy cpy_num-1 whole items from SOURCE to DEST.
+   From last item copy cpy_num bytes for regular item and cpy_num directory entries for
+   directory item. */
+static int leaf_copy_items (struct buffer_info * dest_bi, struct buffer_head * src, int last_first, int cpy_num,
+			    int cpy_bytes)
+{
+  struct buffer_head * dest;
+  int pos, i, src_nr_item, bytes;
+
+  dest = dest_bi->bi_bh;
+#ifdef CONFIG_REISERFS_CHECK
+  if (!dest || !src)
+    reiserfs_panic (0, "vs-10210: leaf_copy_items: !dest || !src");
+  
+  if ( last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST )
+    reiserfs_panic (0, "vs-10220: leaf_copy_items: last_first != FIRST_TO_LAST && last_first != LAST_TO_FIRST");
+
+  if ( B_NR_ITEMS(src) < cpy_num )
+    reiserfs_panic (0, "vs-10230: leaf_copy_items: No enough items: %d, required %d", B_NR_ITEMS(src), cpy_num);
+
+ if ( cpy_num < 0 )
+    reiserfs_panic (0, "vs-10240: leaf_copy_items: cpy_num < 0 (%d)", cpy_num);
+#endif
+
+ if ( cpy_num == 0 )
+   return 0;
+ 
+ if ( last_first == FIRST_TO_LAST ) {
+   /* copy items to left */
+   pos = 0;
+   if ( cpy_num == 1 )
+     bytes = cpy_bytes;
+   else
+     bytes = -1;
+   
+   /* copy the first item or it part or nothing to the end of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,0,bytes)) */
+   i = leaf_copy_boundary_item (dest_bi, src, FIRST_TO_LAST, bytes);
+   cpy_num -= i;
+   if ( cpy_num == 0 )
+     return i;
+   pos += i;
+   if ( cpy_bytes == -1 )
+     /* copy first cpy_num items starting from position 'pos' of SOURCE to end of DEST */
+     leaf_copy_items_entirely (dest_bi, src, FIRST_TO_LAST, pos, cpy_num);
+   else {
+     /* copy first cpy_num-1 items starting from position 'pos-1' of the SOURCE to the end of the DEST */
+     leaf_copy_items_entirely (dest_bi, src, FIRST_TO_LAST, pos, cpy_num-1);
+	     
+     /* copy part of the item which number is cpy_num+pos-1 to the end of the DEST */
+     leaf_item_bottle (dest_bi, src, FIRST_TO_LAST, cpy_num+pos-1, cpy_bytes);
+   } 
+ } else {
+   /* copy items to right */
+   src_nr_item = B_NR_ITEMS (src);
+   if ( cpy_num == 1 )
+     bytes = cpy_bytes;
+   else
+     bytes = -1;
+   
+   /* copy the last item or it part or nothing to the begin of the DEST (i = leaf_copy_boundary_item(DEST,SOURCE,1,bytes)); */
+   i = leaf_copy_boundary_item (dest_bi, src, LAST_TO_FIRST, bytes);
+   
+   cpy_num -= i;
+   if ( cpy_num == 0 )
+     return i;
+   
+   pos = src_nr_item - cpy_num - i;
+   if ( cpy_bytes == -1 ) {
+     /* starting from position 'pos' copy last cpy_num items of SOURCE to begin of DEST */
+     leaf_copy_items_entirely (dest_bi, src, LAST_TO_FIRST, pos, cpy_num);
+   } else {
+     /* copy last cpy_num-1 items starting from position 'pos+1' of the SOURCE to the begin of the DEST; */
+     leaf_copy_items_entirely (dest_bi, src, LAST_TO_FIRST, pos+1, cpy_num-1);
+
+     /* copy part of the item which number is pos to the begin of the DEST */
+     leaf_item_bottle (dest_bi, src, LAST_TO_FIRST, pos, cpy_bytes);
+   }
+ }
+ return i;
+}
+
+
+/* there are types of coping: from S[0] to L[0], from S[0] to R[0],
+   from R[0] to L[0]. for each of these we have to define parent and
+   positions of destination and source buffers */
+static void leaf_define_dest_src_infos (int shift_mode, struct tree_balance * tb, struct buffer_info * dest_bi,
+					struct buffer_info * src_bi, int * first_last,
+					struct buffer_head * Snew)
+{
+#ifdef CONFIG_REISERFS_CHECK
+    memset (dest_bi, 0, sizeof (struct buffer_info));
+    memset (src_bi, 0, sizeof (struct buffer_info));
+#endif
+
+    /* define dest, src, dest parent, dest position */
+    switch (shift_mode) {
+    case LEAF_FROM_S_TO_L:    /* it is used in leaf_shift_left */
+	src_bi->tb = tb;
+	src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path);
+	src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
+	src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0);	/* src->b_item_order */
+	dest_bi->tb = tb;
+	dest_bi->bi_bh = tb->L[0];
+	dest_bi->bi_parent = tb->FL[0];
+	dest_bi->bi_position = get_left_neighbor_position (tb, 0);
+	*first_last = FIRST_TO_LAST;
+	break;
+
+    case LEAF_FROM_S_TO_R:  /* it is used in leaf_shift_right */
+	src_bi->tb = tb;
+	src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path);
+	src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
+	src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0);
+	dest_bi->tb = tb;
+	dest_bi->bi_bh = tb->R[0];
+	dest_bi->bi_parent = tb->FR[0];
+	dest_bi->bi_position = get_right_neighbor_position (tb, 0);
+	*first_last = LAST_TO_FIRST;
+	break;
+
+    case LEAF_FROM_R_TO_L:  /* it is used in balance_leaf_when_delete */
+	src_bi->tb = tb;
+	src_bi->bi_bh = tb->R[0];
+	src_bi->bi_parent = tb->FR[0];
+	src_bi->bi_position = get_right_neighbor_position (tb, 0);
+	dest_bi->tb = tb;
+	dest_bi->bi_bh = tb->L[0];
+	dest_bi->bi_parent = tb->FL[0];
+	dest_bi->bi_position = get_left_neighbor_position (tb, 0);
+	*first_last = FIRST_TO_LAST;
+	break;
+    
+    case LEAF_FROM_L_TO_R:  /* it is used in balance_leaf_when_delete */
+	src_bi->tb = tb;
+	src_bi->bi_bh = tb->L[0];
+	src_bi->bi_parent = tb->FL[0];
+	src_bi->bi_position = get_left_neighbor_position (tb, 0);
+	dest_bi->tb = tb;
+	dest_bi->bi_bh = tb->R[0];
+	dest_bi->bi_parent = tb->FR[0];
+	dest_bi->bi_position = get_right_neighbor_position (tb, 0);
+	*first_last = LAST_TO_FIRST;
+	break;
+
+    case LEAF_FROM_S_TO_SNEW:
+	src_bi->tb = tb;
+	src_bi->bi_bh = PATH_PLAST_BUFFER (tb->tb_path);
+	src_bi->bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
+	src_bi->bi_position = PATH_H_B_ITEM_ORDER (tb->tb_path, 0);
+	dest_bi->tb = tb;
+	dest_bi->bi_bh = Snew;
+	dest_bi->bi_parent = 0;
+	dest_bi->bi_position = 0;
+	*first_last = LAST_TO_FIRST;
+	break;
+    
+    default:
+	reiserfs_panic (0, "vs-10250: leaf_define_dest_src_infos: shift type is unknown (%d)", shift_mode);
+    }
+#ifdef CONFIG_REISERFS_CHECK
+    if (src_bi->bi_bh == 0 || dest_bi->bi_bh == 0) {
+	reiserfs_panic (0, "vs-10260: leaf_define_dest_src_etc: mode==%d, source (%p) or dest (%p) buffer is initialized incorrectly",
+			shift_mode, src_bi->bi_bh, dest_bi->bi_bh);
+    }
+#endif
+}
+
+
+
+
+/* copy mov_num items and mov_bytes of the (mov_num-1)th item to
+   neighbor. Delete them from source */
+int leaf_move_items (int shift_mode, struct tree_balance * tb, int mov_num, int mov_bytes, struct buffer_head * Snew)
+{
+  int ret_value;
+  struct buffer_info dest_bi, src_bi;
+  int first_last;
+
+  leaf_define_dest_src_infos (shift_mode, tb, &dest_bi, &src_bi, &first_last, Snew);
+
+  ret_value = leaf_copy_items (&dest_bi, src_bi.bi_bh, first_last, mov_num, mov_bytes);
+
+  leaf_delete_items (&src_bi, first_last, (first_last == FIRST_TO_LAST) ? 0 : (B_NR_ITEMS(src_bi.bi_bh) - mov_num), mov_num, mov_bytes);
+
+  
+  return ret_value;
+}
+
+
+/* Shift shift_num items (and shift_bytes of last shifted item if shift_bytes != -1)
+   from S[0] to L[0] and replace the delimiting key */
+int leaf_shift_left (struct tree_balance * tb, int shift_num, int shift_bytes)
+{
+  struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path);
+  int i;
+
+  /* move shift_num (and shift_bytes bytes) items from S[0] to left neighbor L[0] */
+  i = leaf_move_items (LEAF_FROM_S_TO_L, tb, shift_num, shift_bytes, 0);
+
+  if ( shift_num ) {
+    if (B_NR_ITEMS (S0) == 0) { /* number of items in S[0] == 0 */
+
+#ifdef CONFIG_REISERFS_CHECK
+      if ( shift_bytes != -1 )
+	reiserfs_panic (tb->tb_sb, "vs-10270: leaf_shift_left: S0 is empty now, but shift_bytes != -1 (%d)", shift_bytes);
+
+      if (tb->tb_mode == M_PASTE || tb->tb_mode == M_INSERT) {
+	print_cur_tb ("vs-10275");
+	reiserfs_panic (tb->tb_sb, "vs-10275: leaf_shift_left: balance condition corrupted (%c)", tb->tb_mode);
+      }
+#endif
+
+      if (PATH_H_POSITION (tb->tb_path, 1) == 0)
+	replace_key (tb, tb->CFL[0], tb->lkey[0], PATH_H_PPARENT (tb->tb_path, 0), 0);
+
+#if 0      
+      /* change right_delimiting_key field in L0's block header */
+      copy_key (B_PRIGHT_DELIM_KEY(tb->L[0]), B_PRIGHT_DELIM_KEY (S0));
+#endif
+    } else {     
+      /* replace lkey in CFL[0] by 0-th key from S[0]; */
+      replace_key (tb, tb->CFL[0], tb->lkey[0], S0, 0);
+      
+#if 0
+      /* change right_delimiting_key field in L0's block header */
+      copy_key (B_PRIGHT_DELIM_KEY(tb->L[0]), B_N_PKEY (S0, 0));
+#endif
+#ifdef CONFIG_REISERFS_CHECK
+      if (shift_bytes != -1 && !(is_direntry_le_ih (B_N_PITEM_HEAD (S0, 0))
+				 && !I_ENTRY_COUNT (B_N_PITEM_HEAD (S0, 0)))) {
+	if (!op_is_left_mergeable (B_N_PKEY (S0, 0), S0->b_size)) {
+	  reiserfs_panic (tb->tb_sb, "vs-10280: leaf_shift_left: item must be mergeable");
+	}
+      }
+#endif
+    }
+  }
+  
+  return i;
+}
+
+
+
+
+
+/* CLEANING STOPPED HERE */
+
+
+
+
+/* Shift shift_num (shift_bytes) items from S[0] to the right neighbor, and replace the delimiting key */
+int	leaf_shift_right(
+		struct tree_balance * tb, 
+		int shift_num,
+		int shift_bytes
+	)
+{
+  //  struct buffer_head * S0 = PATH_PLAST_BUFFER (tb->tb_path);
+  int ret_value;
+
+  /* move shift_num (and shift_bytes) items from S[0] to right neighbor R[0] */
+  ret_value = leaf_move_items (LEAF_FROM_S_TO_R, tb, shift_num, shift_bytes, 0);
+
+  /* replace rkey in CFR[0] by the 0-th key from R[0] */
+  if (shift_num) {
+    replace_key (tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
+
+#if 0
+    /* change right_delimiting_key field in S0's block header */
+    copy_key (B_PRIGHT_DELIM_KEY(S0), B_N_PKEY (tb->R[0], 0));
+#endif
+  }
+
+  return ret_value;
+}
+
+
+
+static void leaf_delete_items_entirely (struct buffer_info * bi,
+					int first, int del_num);
+/*  If del_bytes == -1, starting from position 'first' delete del_num items in whole in buffer CUR.
+    If not. 
+    If last_first == 0. Starting from position 'first' delete del_num-1 items in whole. Delete part of body of
+    the first item. Part defined by del_bytes. Don't delete first item header
+    If last_first == 1. Starting from position 'first+1' delete del_num-1 items in whole. Delete part of body of
+    the last item . Part defined by del_bytes. Don't delete last item header.
+*/
+void leaf_delete_items (struct buffer_info * cur_bi, int last_first, 
+			int first, int del_num, int del_bytes)
+{
+    struct buffer_head * bh;
+    int item_amount = B_NR_ITEMS (bh = cur_bi->bi_bh);
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( !bh )
+	reiserfs_panic (0, "leaf_delete_items: 10155: bh is not defined");
+
+    if ( del_num < 0 )
+	reiserfs_panic (0, "leaf_delete_items: 10160: del_num can not be < 0. del_num==%d", del_num);
+
+    if ( first < 0 || first + del_num > item_amount )
+	reiserfs_panic (0, "leaf_delete_items: 10165: invalid number of first item to be deleted (%d) or "
+			"no so much items (%d) to delete (only %d)", first, first + del_num, item_amount);
+#endif
+
+    if ( del_num == 0 )
+	return;
+
+    if ( first == 0 && del_num == item_amount && del_bytes == -1 ) {
+	make_empty_node (cur_bi);
+	do_balance_mark_leaf_dirty (cur_bi->tb, bh, 0);
+	return;
+    }
+
+    if ( del_bytes == -1 )
+	/* delete del_num items beginning from item in position first */
+	leaf_delete_items_entirely (cur_bi, first, del_num);
+    else {
+	if ( last_first == FIRST_TO_LAST ) {
+	    /* delete del_num-1 items beginning from item in position first  */
+	    leaf_delete_items_entirely (cur_bi, first, del_num-1);
+
+	    /* delete the part of the first item of the bh
+	       do not delete item header
+	    */
+	    leaf_cut_from_buffer (cur_bi, 0, 0, del_bytes);
+	} else  {
+	    struct item_head * ih;
+	    int len;
+
+	    /* delete del_num-1 items beginning from item in position first+1  */
+	    leaf_delete_items_entirely (cur_bi, first+1, del_num-1);
+
+	    if (is_direntry_le_ih (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh)-1))) 	/* the last item is directory  */
+	        /* len = numbers of directory entries in this item */
+	        len = le16_to_cpu (ih->u.ih_entry_count);
+	    else
+	        /* len = body len of item */
+ 	        len = le16_to_cpu (ih->ih_item_len);
+
+	    /* delete the part of the last item of the bh 
+	       do not delete item header
+	    */
+	    leaf_cut_from_buffer (cur_bi, B_NR_ITEMS(bh)-1, len - del_bytes, del_bytes);
+	}
+    }
+}
+
+
+/* insert item into the leaf node in position before */
+void leaf_insert_into_buf (struct buffer_info * bi, int before,
+			   struct item_head * inserted_item_ih,
+			   const char * inserted_item_body,
+			   int zeros_number)
+{
+    struct buffer_head * bh = bi->bi_bh;
+    int nr;
+    struct block_head * blkh;
+    struct item_head * ih;
+    int i;
+    int last_loc, unmoved_loc;
+    char * to;
+
+
+    nr = le16_to_cpu ((blkh = B_BLK_HEAD (bh))->blk_nr_item);
+
+#ifdef CONFIG_REISERFS_CHECK
+    /* check free space */
+    if (le16_to_cpu (blkh->blk_free_space) < le16_to_cpu (inserted_item_ih->ih_item_len) + IH_SIZE)
+	reiserfs_panic (0, "leaf_insert_into_buf: 10170: "
+			"not enough free space in block %z, new item %h",
+			bh, inserted_item_ih);
+    if (zeros_number > inserted_item_ih->ih_item_len)
+	reiserfs_panic (0, "vs-10172: leaf_insert_into_buf: "
+			"zero number == %d, item length == %d", zeros_number, inserted_item_ih->ih_item_len);
+#endif /* CONFIG_REISERFS_CHECK */
+
+
+    /* get item new item must be inserted before */
+    ih = B_N_PITEM_HEAD (bh, before);
+
+    /* prepare space for the body of new item */
+    last_loc = nr ? ih[nr - before - 1].ih_item_location : bh->b_size;
+    unmoved_loc = before ? (ih-1)->ih_item_location : bh->b_size;
+
+    memmove (bh->b_data + last_loc - inserted_item_ih->ih_item_len, 
+	     bh->b_data + last_loc, unmoved_loc - last_loc);
+
+    to = bh->b_data + unmoved_loc - inserted_item_ih->ih_item_len;
+    memset (to, 0, zeros_number);
+    to += zeros_number;
+
+    /* copy body to prepared space */
+    if (inserted_item_body)
+	memmove (to, inserted_item_body, inserted_item_ih->ih_item_len - zeros_number);
+    else
+	memset(to, '\0', inserted_item_ih->ih_item_len - zeros_number);
+  
+    /* insert item header */
+    memmove (ih + 1, ih, IH_SIZE * (nr - before));
+    memmove (ih, inserted_item_ih, IH_SIZE);
+  
+    /* change locations */
+    for (i = before; i < nr + 1; i ++)
+	ih[i-before].ih_item_location =
+	    (unmoved_loc -= ih[i-before].ih_item_len);
+  
+    /* sizes, free space, item number */
+    blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) + 1);
+    blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - 
+					(IH_SIZE + inserted_item_ih->ih_item_len));
+
+    do_balance_mark_leaf_dirty (bi->tb, bh, 1);
+
+    if (bi->bi_parent) { 
+	B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size += (IH_SIZE + inserted_item_ih->ih_item_len);
+	do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0);
+    }
+}
+
+
+/* paste paste_size bytes to affected_item_num-th item. 
+   When item is a directory, this only prepare space for new entries */
+void leaf_paste_in_buffer (struct buffer_info * bi, int affected_item_num,
+			   int pos_in_item, int paste_size,
+			   const char * body,
+			   int zeros_number)
+{
+    struct buffer_head * bh = bi->bi_bh;
+    int nr;
+    struct block_head * blkh;
+    struct item_head * ih;
+    int i;
+    int last_loc, unmoved_loc;
+
+
+    nr = le16_to_cpu ((blkh = B_BLK_HEAD(bh))->blk_nr_item);
+
+#ifdef CONFIG_REISERFS_CHECK
+    /* check free space */
+    if (le16_to_cpu (blkh->blk_free_space) < paste_size)
+	reiserfs_panic (0, "leaf_paste_in_buffer: 10175: not enough free space: needed %d, available %d",
+			paste_size, le16_to_cpu (blkh->blk_free_space));
+    if (zeros_number > paste_size) {
+	print_cur_tb ("10177");
+	reiserfs_panic (0, "vs-10177: leaf_paste_in_buffer: zero number == %d, paste_size == %d",
+			zeros_number, paste_size);
+    }
+#endif /* CONFIG_REISERFS_CHECK */
+
+
+    /* item to be appended */
+    ih = B_N_PITEM_HEAD(bh, affected_item_num);
+
+    last_loc = ih[nr - affected_item_num - 1].ih_item_location;
+    unmoved_loc = affected_item_num ? (ih-1)->ih_item_location : bh->b_size;  
+
+    /* prepare space */
+    memmove (bh->b_data + last_loc - paste_size, bh->b_data + last_loc,
+	     unmoved_loc - last_loc);
+
+
+    /* change locations */
+    for (i = affected_item_num; i < nr; i ++)
+	ih[i-affected_item_num].ih_item_location -= paste_size;
+
+    if ( body ) {
+	if (!is_direntry_le_ih (ih)) {
+	    if (!pos_in_item) {
+		/* shift data to right */
+		memmove (bh->b_data + ih->ih_item_location + paste_size, 
+			 bh->b_data + ih->ih_item_location, ih->ih_item_len);
+		/* paste data in the head of item */
+		memset (bh->b_data + ih->ih_item_location, 0, zeros_number);
+		memcpy (bh->b_data + ih->ih_item_location + zeros_number, body, paste_size - zeros_number);
+	    } else {
+		memset (bh->b_data + unmoved_loc - paste_size, 0, zeros_number);
+		memcpy (bh->b_data + unmoved_loc - paste_size + zeros_number, body, paste_size - zeros_number);
+	    }
+	}
+    }
+    else
+	memset(bh->b_data + unmoved_loc - paste_size,'\0',paste_size);
+
+    ih->ih_item_len += paste_size;
+
+    /* change free space */
+    blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) - paste_size);
+
+    do_balance_mark_leaf_dirty (bi->tb, bh, 0);
+
+    if (bi->bi_parent) { 
+	B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size += paste_size;
+	do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0);
+    }
+}
+
+
+/* cuts DEL_COUNT entries beginning from FROM-th entry. Directory item
+   does not have free space, so it moves DEHs and remaining records as
+   necessary. Return value is size of removed part of directory item
+   in bytes. */
+static int	leaf_cut_entries (
+				struct buffer_head * bh,
+				struct item_head * ih, 
+				int from, 
+				int del_count
+			)
+{
+  char * item;
+  struct reiserfs_de_head * deh;
+  int prev_record_offset;	/* offset of record, that is (from-1)th */
+  char * prev_record;		/* */
+  int cut_records_len;		/* length of all removed records */
+  int i;
+
+
+#ifdef CONFIG_REISERFS_CHECK
+  /* make sure, that item is directory and there are enough entries to
+     remove */
+  if (!is_direntry_le_ih (ih))
+    reiserfs_panic (0, "leaf_cut_entries: 10180: item is not directory item");
+
+  if (I_ENTRY_COUNT(ih) < from + del_count)
+    reiserfs_panic (0, "leaf_cut_entries: 10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d",
+		    I_ENTRY_COUNT(ih), from, del_count);
+#endif
+
+  if (del_count == 0)
+    return 0;
+
+  /* first byte of item */
+  item = bh->b_data + ih->ih_item_location;
+
+  /* entry head array */
+  deh = B_I_DEH (bh, ih);
+
+  /* first byte of remaining entries, those are BEFORE cut entries
+     (prev_record) and length of all removed records (cut_records_len) */
+  prev_record_offset = (from ? deh[from - 1].deh_location : ih->ih_item_len);
+  cut_records_len = prev_record_offset/*from_record*/ - deh[from + del_count - 1].deh_location;
+  prev_record = item + prev_record_offset;
+
+
+  /* adjust locations of remaining entries */
+  for (i = I_ENTRY_COUNT(ih) - 1; i > from + del_count - 1; i --)
+    deh[i].deh_location -= (DEH_SIZE * del_count);
+
+  for (i = 0; i < from; i ++)
+    deh[i].deh_location -= DEH_SIZE * del_count + cut_records_len;
+
+  I_ENTRY_COUNT(ih) -= del_count;
+
+  /* shift entry head array and entries those are AFTER removed entries */
+  memmove ((char *)(deh + from),
+	   deh + from + del_count, 
+	   prev_record - cut_records_len - (char *)(deh + from + del_count));
+  
+  /* shift records, those are BEFORE removed entries */
+  memmove (prev_record - cut_records_len - DEH_SIZE * del_count,
+	   prev_record, item + ih->ih_item_len - prev_record);
+
+  return DEH_SIZE * del_count + cut_records_len;
+}
+
+
+/*  when cut item is part of regular file
+        pos_in_item - first byte that must be cut
+        cut_size - number of bytes to be cut beginning from pos_in_item
+ 
+   when cut item is part of directory
+        pos_in_item - number of first deleted entry
+        cut_size - count of deleted entries
+    */
+void leaf_cut_from_buffer (struct buffer_info * bi, int cut_item_num,
+			   int pos_in_item, int cut_size)
+{
+    int nr;
+    struct buffer_head * bh = bi->bi_bh;
+    struct block_head * blkh;
+    struct item_head * ih;
+    int last_loc, unmoved_loc;
+    int i;
+
+    nr = le16_to_cpu ((blkh = B_BLK_HEAD (bh))->blk_nr_item);
+
+    /* item head of truncated item */
+    ih = B_N_PITEM_HEAD (bh, cut_item_num);
+
+    if (is_direntry_le_ih (ih)) {
+        /* first cut entry ()*/
+        cut_size = leaf_cut_entries (bh, ih, pos_in_item, cut_size);
+        if (pos_in_item == 0) {
+	        /* change key */
+#ifdef CONFIG_REISERFS_CHECK
+            if (cut_item_num)
+                reiserfs_panic (0, "leaf_cut_from_buffer: 10190: " 
+                    "when 0-th enrty of item is cut, that item must be first in the node, not %d-th", cut_item_num);
+#endif
+            /* change item key by key of first entry in the item */
+	    set_le_ih_k_offset (ih, le32_to_cpu (B_I_DEH (bh, ih)->deh_offset));
+            /*memcpy (&ih->ih_key.k_offset, &(B_I_DEH (bh, ih)->deh_offset), SHORT_KEY_SIZE);*/
+	    }
+    } else {
+        /* item is direct or indirect */
+#ifdef CONFIG_REISERFS_CHECK
+        if (is_statdata_le_ih (ih))
+	        reiserfs_panic (0, "leaf_cut_from_buffer: 10195: item is stat data");
+
+        if (pos_in_item && pos_in_item + cut_size != le16_to_cpu (ih->ih_item_len) )
+            reiserfs_panic (0, "cut_from_buf: 10200: invalid offset (%lu) or trunc_size (%lu) or ih_item_len (%lu)",
+                pos_in_item, cut_size, le16_to_cpu (ih->ih_item_len));
+#endif
+
+        /* shift item body to left if cut is from the head of item */
+        if (pos_in_item == 0) {
+            memmove (bh->b_data + le16_to_cpu (ih->ih_item_location), bh->b_data + le16_to_cpu (ih->ih_item_location) + cut_size,
+		     le16_to_cpu (ih->ih_item_len) - cut_size);
+	    
+            /* change key of item */
+            if (is_direct_le_ih (ih))
+		set_le_ih_k_offset (ih, le_ih_k_offset (ih) + cut_size);
+            else {
+		set_le_ih_k_offset (ih, le_ih_k_offset (ih) + (cut_size / UNFM_P_SIZE) * bh->b_size);
+#ifdef CONFIG_REISERFS_CHECK
+                if ( le16_to_cpu (ih->ih_item_len) == cut_size && get_ih_free_space (ih) )
+                    reiserfs_panic (0, "leaf_cut_from_buf: 10205: invalid ih_free_space (%h)", ih);
+#endif
+	        }
+	    }
+    }
+  
+
+    /* location of the last item */
+    last_loc = le16_to_cpu (ih[nr - cut_item_num - 1].ih_item_location);
+
+    /* location of the item, which is remaining at the same place */
+    unmoved_loc = cut_item_num ? le16_to_cpu ((ih-1)->ih_item_location) : bh->b_size;
+
+
+    /* shift */
+    memmove (bh->b_data + last_loc + cut_size, bh->b_data + last_loc,
+	       unmoved_loc - last_loc - cut_size);
+
+    /* change item length */
+/*    ih->ih_item_len -= cut_size;*/
+    ih->ih_item_len = cpu_to_le16 (le16_to_cpu (ih->ih_item_len) - cut_size);
+  
+    if (is_indirect_le_ih (ih)) {
+        if (pos_in_item)
+            set_ih_free_space (ih, 0);
+    }
+
+    /* change locations */
+    for (i = cut_item_num; i < nr; i ++)
+/*        ih[i-cut_item_num].ih_item_location += cut_size;*/
+        ih[i-cut_item_num].ih_item_location = 
+	  cpu_to_le16 (le16_to_cpu (ih[i-cut_item_num].ih_item_location) + cut_size);
+
+    /* size, free space */
+    blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) + cut_size);
+
+    do_balance_mark_leaf_dirty (bi->tb, bh, 0);
+    
+    if (bi->bi_parent) { 
+      B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size -= cut_size; 
+      do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0);
+    }
+}
+
+
+/* delete del_num items from buffer starting from the first'th item */
+static void leaf_delete_items_entirely (struct buffer_info * bi,
+					int first, int del_num)
+{
+    struct buffer_head * bh = bi->bi_bh;
+    int nr;
+    int i, j;
+    int last_loc, last_removed_loc;
+    struct block_head * blkh;
+    struct item_head * ih;
+
+#ifdef CONFIG_REISERFS_CHECK
+  if (bh == NULL)
+    reiserfs_panic (0, "leaf_delete_items_entirely: 10210: buffer is 0");
+
+  if (del_num < 0)
+    reiserfs_panic (0, "leaf_delete_items_entirely: 10215: del_num less than 0 (%d)", del_num);
+#endif /* CONFIG_REISERFS_CHECK */
+
+  if (del_num == 0)
+    return;
+
+  nr = le16_to_cpu ((blkh = B_BLK_HEAD(bh))->blk_nr_item);
+
+#ifdef CONFIG_REISERFS_CHECK
+  if (first < 0 || first + del_num > nr)
+    reiserfs_panic (0, "leaf_delete_items_entirely: 10220: first=%d, number=%d, there is %d items", first, del_num, nr);
+#endif /* CONFIG_REISERFS_CHECK */
+
+  if (first == 0 && del_num == nr) {
+    /* this does not work */
+    make_empty_node (bi);
+    
+    do_balance_mark_leaf_dirty (bi->tb, bh, 0);
+    return;
+  }
+
+  ih = B_N_PITEM_HEAD (bh, first);
+  
+  /* location of unmovable item */
+  j = (first == 0) ? bh->b_size : (ih-1)->ih_item_location;
+      
+  /* delete items */
+  last_loc = ih[nr-1-first].ih_item_location;
+  last_removed_loc = ih[del_num-1].ih_item_location;
+
+  memmove (bh->b_data + last_loc + j - last_removed_loc,
+	   bh->b_data + last_loc, last_removed_loc - last_loc);
+  
+  /* delete item headers */
+  memmove (ih, ih + del_num, (nr - first - del_num) * IH_SIZE);
+  
+  /* change item location */
+  for (i = first; i < nr - del_num; i ++)
+    ih[i-first].ih_item_location += j - last_removed_loc;
+
+  /* sizes, item number */
+  blkh->blk_nr_item = cpu_to_le16 (le16_to_cpu (blkh->blk_nr_item) - del_num);
+  blkh->blk_free_space = cpu_to_le16 (le16_to_cpu (blkh->blk_free_space) + (j - last_removed_loc + IH_SIZE * del_num));
+
+  do_balance_mark_leaf_dirty (bi->tb, bh, 0);
+  
+  if (bi->bi_parent) {
+    B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size -= j - last_removed_loc + IH_SIZE * del_num;
+    do_balance_mark_internal_dirty (bi->tb, bi->bi_parent, 0);
+  }
+}
+
+
+
+
+
+/* paste new_entry_count entries (new_dehs, records) into position before to item_num-th item */
+void    leaf_paste_entries (
+			struct buffer_head * bh,
+			int item_num,
+			int before,
+			int new_entry_count,
+			struct reiserfs_de_head * new_dehs,
+			const char * records,
+			int paste_size
+		)
+{
+    struct item_head * ih;
+    char * item;
+    struct reiserfs_de_head * deh;
+    char * insert_point;
+    int i, old_entry_num;
+
+    if (new_entry_count == 0)
+        return;
+
+    ih = B_N_PITEM_HEAD(bh, item_num);
+
+#ifdef CONFIG_REISERFS_CHECK
+  /* make sure, that item is directory, and there are enough records in it */
+  if (!is_direntry_le_ih (ih))
+    reiserfs_panic (0, "leaf_paste_entries: 10225: item is not directory item");
+
+  if (I_ENTRY_COUNT (ih) < before)
+    reiserfs_panic (0, "leaf_paste_entries: 10230: there are no entry we paste entries before. entry_count = %d, before = %d",
+		    I_ENTRY_COUNT (ih), before);
+#endif
+
+
+  /* first byte of dest item */
+  item = bh->b_data + ih->ih_item_location;
+
+  /* entry head array */
+  deh = B_I_DEH (bh, ih);
+
+  /* new records will be pasted at this point */
+  insert_point = item + (before ? deh[before - 1].deh_location : (ih->ih_item_len - paste_size));
+
+  /* adjust locations of records that will be AFTER new records */
+  for (i = I_ENTRY_COUNT(ih) - 1; i >= before; i --)
+    deh[i].deh_location += DEH_SIZE * new_entry_count;
+
+  /* adjust locations of records that will be BEFORE new records */
+  for (i = 0; i < before; i ++)
+    deh[i].deh_location += paste_size;
+
+  old_entry_num = I_ENTRY_COUNT(ih);
+  I_ENTRY_COUNT(ih) += new_entry_count;
+
+  /* prepare space for pasted records */
+  memmove (insert_point + paste_size, insert_point, item + (ih->ih_item_len - paste_size) - insert_point);
+
+  /* copy new records */
+  memcpy (insert_point + DEH_SIZE * new_entry_count, records,
+		   paste_size - DEH_SIZE * new_entry_count);
+  
+  /* prepare space for new entry heads */
+  deh += before;
+  memmove ((char *)(deh + new_entry_count), deh, insert_point - (char *)deh);
+
+  /* copy new entry heads */
+  deh = (struct reiserfs_de_head *)((char *)deh);
+  memcpy (deh, new_dehs, DEH_SIZE * new_entry_count);
+
+  /* set locations of new records */
+  for (i = 0; i < new_entry_count; i ++)
+    deh[i].deh_location += 
+      (- new_dehs[new_entry_count - 1].deh_location + insert_point + DEH_SIZE * new_entry_count - item);
+
+
+  /* change item key if neccessary (when we paste before 0-th entry */
+  if (!before)
+    {
+#ifdef CONFIG_REISERFS_CHECK
+/*
+      if ( old_entry_num && COMP_SHORT_KEYS ((unsigned long *)&ih->ih_key.k_offset,
+					     &(new_dehs->deh_offset)) <= 0)
+	reiserfs_panic (0, "leaf_paste_entries: 10235: new key must be less, that old key");
+*/
+#endif
+	set_le_ih_k_offset (ih, le32_to_cpu (new_dehs->deh_offset));
+/*      memcpy (&ih->ih_key.k_offset, 
+		       &new_dehs->deh_offset, SHORT_KEY_SIZE);*/
+    }
+
+#ifdef CONFIG_REISERFS_CHECK
+  {
+    int prev, next;
+    /* check record locations */
+    deh = B_I_DEH (bh, ih);
+    for (i = 0; i < I_ENTRY_COUNT(ih); i ++) {
+      next = (i < I_ENTRY_COUNT(ih) - 1) ? deh[i + 1].deh_location : 0;
+      prev = (i != 0) ? deh[i - 1].deh_location : 0;
+      
+      if (prev && prev <= deh[i].deh_location)
+	reiserfs_warning ("vs-10240: leaf_paste_entries: directory item (%h) corrupted (prev %a, cur(%d) %a)\n", 
+			  ih, deh + i - 1, i, deh + i);
+      if (next && next >= deh[i].deh_location)
+	reiserfs_warning ("vs-10250: leaf_paste_entries: directory item (%h) corrupted (cur(%d) %a, next %a)\n",
+			  ih, i, deh + i, deh + i + 1);
+    }
+  }
+#endif
+
+}
+
+
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/namei.c linux/fs/reiserfs/namei.c
--- v2.4.0/linux/fs/reiserfs/namei.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/namei.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,1221 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/smp_lock.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+				/* there should be an overview right
+                                   here, as there should be in every
+                                   conceptual grouping of code.  This
+                                   should be combined with dir.c and
+                                   called dir.c (naming will become
+                                   too large to be called one file in
+                                   a few years), stop senselessly
+                                   imitating the incoherent
+                                   structuring of code used by other
+                                   filesystems.  */
+
+#define INC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) { i->i_nlink++; if (i->i_nlink >= REISERFS_LINK_MAX) i->i_nlink=1; }
+#define DEC_DIR_INODE_NLINK(i) if (i->i_nlink != 1) i->i_nlink--;
+
+// directory item contains array of entry headers. This performs
+// binary search through that array
+static int bin_search_in_dir_item (struct reiserfs_dir_entry * de, loff_t off)
+{
+    struct item_head * ih = de->de_ih;
+    struct reiserfs_de_head * deh = de->de_deh;
+    int rbound, lbound, j;
+
+    lbound = 0;
+    rbound = I_ENTRY_COUNT (ih) - 1;
+
+    for (j = (rbound + lbound) / 2; lbound <= rbound; j = (rbound + lbound) / 2) {
+	if (off < deh_offset (deh + j)) {
+	    rbound = j - 1;
+	    continue;
+	}
+	if (off > deh_offset (deh + j)) {
+	    lbound = j + 1;
+	    continue;
+	}
+	// this is not name found, but matched third key component
+	de->de_entry_num = j;
+	return NAME_FOUND;
+    }
+
+    de->de_entry_num = lbound;
+    return NAME_NOT_FOUND;
+}
+
+
+// comment?  maybe something like set de to point to what the path points to?
+static inline void set_de_item_location (struct reiserfs_dir_entry * de, struct path * path)
+{
+    de->de_bh = get_bh (path);
+    de->de_ih = get_ih (path);
+    de->de_deh = B_I_DEH (de->de_bh, de->de_ih);
+    de->de_item_num = PATH_LAST_POSITION (path);
+} 
+
+
+// de_bh, de_ih, de_deh (points to first element of array), de_item_num is set
+inline void set_de_name_and_namelen (struct reiserfs_dir_entry * de)
+{
+    struct reiserfs_de_head * deh = de->de_deh + de->de_entry_num;
+
+    if (de->de_entry_num >= ih_entry_count (de->de_ih))
+	BUG ();
+
+    de->de_entrylen = entry_length (de->de_bh, de->de_ih, de->de_entry_num);
+    de->de_namelen = de->de_entrylen - (de_with_sd (deh) ? SD_SIZE : 0);
+    de->de_name = B_I_PITEM (de->de_bh, de->de_ih) + le16_to_cpu (deh->deh_location);
+    if (de->de_name[de->de_namelen - 1] == 0)
+	de->de_namelen = strlen (de->de_name);
+}
+
+
+// what entry points to
+static inline void set_de_object_key (struct reiserfs_dir_entry * de)
+{
+    if (de->de_entry_num >= ih_entry_count (de->de_ih))
+	BUG ();
+    de->de_dir_id = le32_to_cpu (de->de_deh[de->de_entry_num].deh_dir_id);
+    de->de_objectid = le32_to_cpu (de->de_deh[de->de_entry_num].deh_objectid);
+}
+
+
+static inline void store_de_entry_key (struct reiserfs_dir_entry * de)
+{
+    struct reiserfs_de_head * deh = de->de_deh + de->de_entry_num;
+
+    if (de->de_entry_num >= ih_entry_count (de->de_ih))
+	BUG ();
+
+    /* store key of the found entry */
+    de->de_entry_key.version = ITEM_VERSION_1;
+    de->de_entry_key.on_disk_key.k_dir_id = le32_to_cpu (de->de_ih->ih_key.k_dir_id);
+    de->de_entry_key.on_disk_key.k_objectid = le32_to_cpu (de->de_ih->ih_key.k_objectid);
+    set_cpu_key_k_offset (&(de->de_entry_key), deh_offset (deh));
+    set_cpu_key_k_type (&(de->de_entry_key), TYPE_DIRENTRY);
+}
+
+
+/* We assign a key to each directory item, and place multiple entries
+in a single directory item.  A directory item has a key equal to the
+key of the first directory entry in it.
+
+This function first calls search_by_key, then, if item whose first
+entry matches is not found it looks for the entry inside directory
+item found by search_by_key. Fills the path to the entry, and to the
+entry position in the item 
+
+*/
+
+/* The function is NOT SCHEDULE-SAFE! */
+int search_by_entry_key (struct super_block * sb, struct cpu_key * key,
+			 struct path * path, struct reiserfs_dir_entry * de)
+{
+    int retval;
+
+    retval = search_item (sb, key, path);
+    switch (retval) {
+    case ITEM_NOT_FOUND:
+	if (!PATH_LAST_POSITION (path)) {
+	    reiserfs_warning ("vs-7000: search_by_entry_key: search_by_key returned item position == 0");
+	    pathrelse(path) ;
+	    return IO_ERROR ;
+	}
+	PATH_LAST_POSITION (path) --;
+
+    case ITEM_FOUND:
+	break;
+
+    case IO_ERROR:
+	return retval;
+
+    default:
+	pathrelse (path);
+	reiserfs_warning ("vs-7002: search_by_entry_key: no path to here");
+	return IO_ERROR;
+    }
+
+    set_de_item_location (de, path);
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (!is_direntry_le_ih (de->de_ih) || 
+	COMP_SHORT_KEYS (&(de->de_ih->ih_key), key)) {
+	print_block (de->de_bh, 0, -1, -1);
+	reiserfs_panic (sb, "vs-7005: search_by_entry_key: found item %h is not directory item or "
+			"does not belong to the same directory as key %k", de->de_ih, key);
+    }
+#endif /* CONFIG_REISERFS_CHECK */
+
+    /* binary search in directory item by third componen t of the
+       key. sets de->de_entry_num of de */
+    retval = bin_search_in_dir_item (de, cpu_key_k_offset (key));
+    path->pos_in_item = de->de_entry_num;
+    if (retval != NAME_NOT_FOUND) {
+	// ugly, but rename needs de_bh, de_deh, de_name, de_namelen, de_objectid set
+	set_de_name_and_namelen (de);
+	set_de_object_key (de);
+    }
+    return retval;
+}
+
+
+
+/* Keyed 32-bit hash function using TEA in a Davis-Meyer function */
+
+/* The third component is hashed, and you can choose from more than
+   one hash function.  Per directory hashes are not yet implemented
+   but are thought about. This function should be moved to hashes.c
+   Jedi, please do so.  -Hans */
+
+static __u32 get_third_component (struct super_block * s, 
+				  const char * name, int len)
+{
+    __u32 res;
+
+    if (!len || (len == 1 && name[0] == '.'))
+	return DOT_OFFSET;
+    if (len == 2 && name[0] == '.' && name[1] == '.')
+	return DOT_DOT_OFFSET;
+
+    res = s->u.reiserfs_sb.s_hash_function (name, len);
+
+    // take bits from 7-th to 30-th including both bounds
+    res = GET_HASH_VALUE(res);
+    if (res == 0)
+	// needed to have no names before "." and ".." those have hash
+	// value == 0 and generation conters 1 and 2 accordingly
+	res = 128;
+    return res + MAX_GENERATION_NUMBER;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+static int reiserfs_match (struct reiserfs_dir_entry * de, 
+			   const char * name, int namelen)
+{
+    int retval = NAME_NOT_FOUND;
+
+    if ((namelen == de->de_namelen) &&
+	!memcmp(de->de_name, name, de->de_namelen))
+	retval = (de_visible (de->de_deh + de->de_entry_num) ? NAME_FOUND : NAME_FOUND_INVISIBLE);
+
+    return retval;
+}
+
+
+/* de's de_bh, de_ih, de_deh, de_item_num, de_entry_num are set already */
+
+				/* used when hash collisions exist */
+
+
+static int linear_search_in_dir_item (struct cpu_key * key, struct reiserfs_dir_entry * de,
+				      const char * name, int namelen)
+{
+    struct reiserfs_de_head * deh = de->de_deh;
+    int retval;
+    int i;
+
+    i = de->de_entry_num;
+
+    if (i == I_ENTRY_COUNT (de->de_ih) ||
+	GET_HASH_VALUE (deh_offset (deh + i)) != GET_HASH_VALUE (cpu_key_k_offset (key))) {
+	i --;
+    }
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (de->de_deh != B_I_DEH (de->de_bh, de->de_ih))
+	reiserfs_panic (0, "vs-7010: linear_search_in_dir_item: array of entry headers not found");
+#endif /* CONFIG_REISERFS_CHECK */
+
+    deh += i;
+
+    for (; i >= 0; i --, deh --) {
+	if (GET_HASH_VALUE (deh_offset (deh)) !=
+	    GET_HASH_VALUE (cpu_key_k_offset (key))) {
+	    // hash value does not match, no need to check whole name
+	    return NAME_NOT_FOUND;
+	}
+   
+	/* mark, that this generation number is used */
+	if (de->de_gen_number_bit_string)
+	    set_bit (GET_GENERATION_NUMBER (deh_offset (deh)), de->de_gen_number_bit_string);
+
+	// calculate pointer to name and namelen
+	de->de_entry_num = i;
+	set_de_name_and_namelen (de);
+
+	if ((retval = reiserfs_match (de, name, namelen)) != NAME_NOT_FOUND) {
+	    // de's de_name, de_namelen, de_recordlen are set. Fill the rest:
+
+	    // key of pointed object
+	    set_de_object_key (de);
+
+	    store_de_entry_key (de);
+
+	    // retval can be NAME_FOUND or NAME_FOUND_INVISIBLE
+	    return retval;
+	}
+    }
+
+    if (GET_GENERATION_NUMBER (le_ih_k_offset (de->de_ih)) == 0)
+	/* we have reached left most entry in the node. In common we
+           have to go to the left neighbor, but if generation counter
+           is 0 already, we know for sure, that there is no name with
+           the same hash value */
+	// FIXME: this work correctly only because hash value can not
+	// be 0. Btw, in case of Yura's hash it is probably possible,
+	// so, this is a bug
+	return NAME_NOT_FOUND;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (de->de_item_num)
+	reiserfs_panic (0, "vs-7015: linear_search_in_dir_item: "
+			"two diritems of the same directory in one node?");
+#endif /* CONFIG_REISERFS_CHECK */
+
+    return GOTO_PREVIOUS_ITEM;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+// may return NAME_FOUND, NAME_FOUND_INVISIBLE, NAME_NOT_FOUND
+// FIXME: should add something like IOERROR
+static int reiserfs_find_entry (struct inode * dir, const char * name, int namelen, 
+				struct path * path_to_entry, struct reiserfs_dir_entry * de)
+{
+    struct cpu_key key_to_search;
+    int retval;
+
+
+    if (namelen > REISERFS_MAX_NAME_LEN (dir->i_sb->s_blocksize))
+	return NAME_NOT_FOUND;
+
+    /* we will search for this key in the tree */
+    make_cpu_key (&key_to_search, dir, 
+		  get_third_component (dir->i_sb, name, namelen), TYPE_DIRENTRY, 3);
+
+    while (1) {
+	retval = search_by_entry_key (dir->i_sb, &key_to_search, path_to_entry, de);
+	if (retval == IO_ERROR)
+	    // FIXME: still has to be dealt with
+
+				/* I want you to conform to our error
+                                   printing standard.  How many times
+                                   do I have to ask? -Hans */
+
+	    BUG ();
+
+	/* compare names for all entries having given hash value */
+	retval = linear_search_in_dir_item (&key_to_search, de, name, namelen);
+	if (retval != GOTO_PREVIOUS_ITEM) {
+	    /* there is no need to scan directory anymore. Given entry found or does not exist */
+	    path_to_entry->pos_in_item = de->de_entry_num;
+	    return retval;
+	}
+
+	/* there is left neighboring item of this directory and given entry can be there */
+	set_cpu_key_k_offset (&key_to_search, le_ih_k_offset (de->de_ih) - 1);
+	pathrelse (path_to_entry);
+
+    } /* while (1) */
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+struct dentry * reiserfs_lookup (struct inode * dir, struct dentry * dentry)
+{
+    int retval;
+    struct inode * inode = 0;
+    struct reiserfs_dir_entry de;
+    INITIALIZE_PATH (path_to_entry);
+
+    reiserfs_check_lock_depth("lookup") ;
+
+    if (dentry->d_name.len > REISERFS_MAX_NAME_LEN (dir->i_sb->s_blocksize))
+	return ERR_PTR(-ENAMETOOLONG);
+
+    de.de_gen_number_bit_string = 0;
+    retval = reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path_to_entry, &de);
+    pathrelse (&path_to_entry);
+    if (retval == NAME_FOUND) {
+	inode = reiserfs_iget (dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
+	if (!inode) {
+	    return ERR_PTR(-EACCES);
+        }
+    }
+
+    d_add(dentry, inode);
+    return NULL;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+
+/* add entry to the directory (entry can be hidden). 
+
+insert definition of when hidden directories are used here -Hans
+
+ Does not mark dir   inode dirty, do it after successesfull call to it */
+
+static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct inode * dir,
+                               const char * name, int namelen, struct inode * inode,
+			       int visible)
+{
+    struct cpu_key entry_key;
+    struct reiserfs_de_head * deh;
+    INITIALIZE_PATH (path);
+    struct reiserfs_dir_entry de;
+    int bit_string [MAX_GENERATION_NUMBER / (sizeof(int) * 8) + 1];
+    int gen_number;
+    char small_buf[32+DEH_SIZE] ; /* 48 bytes now and we avoid kmalloc
+                                     if we create file with short name */
+    char * buffer;
+    int buflen, paste_size;
+    int retval;
+
+
+    /* cannot allow items to be added into a busy deleted directory */
+    if (!namelen)
+	return -EINVAL;
+
+    if (namelen > REISERFS_MAX_NAME_LEN (dir->i_sb->s_blocksize))
+	return -ENAMETOOLONG;
+
+    /* each entry has unique key. compose it */
+    make_cpu_key (&entry_key, dir, 
+		  get_third_component (dir->i_sb, name, namelen), TYPE_DIRENTRY, 3);
+
+    /* get memory for composing the entry */
+    buflen = DEH_SIZE + ROUND_UP (namelen);
+    if (buflen > sizeof (small_buf)) {
+	buffer = reiserfs_kmalloc (buflen, GFP_BUFFER, dir->i_sb);
+	if (buffer == 0)
+	    return -ENOMEM;
+    } else
+	buffer = small_buf;
+
+    paste_size = (old_format_only (dir->i_sb)) ? (DEH_SIZE + namelen) : buflen;
+
+    /* fill buffer : directory entry head, name[, dir objectid | , stat data | ,stat data, dir objectid ] */
+    deh = (struct reiserfs_de_head *)buffer;
+    deh->deh_location = 0;
+    deh->deh_offset = cpu_to_le32 (cpu_key_k_offset (&entry_key));
+    deh->deh_state = 0;
+    /* put key (ino analog) to de */
+    deh->deh_dir_id = INODE_PKEY (inode)->k_dir_id;
+    deh->deh_objectid = INODE_PKEY (inode)->k_objectid;
+
+    /* copy name */
+    memcpy ((char *)(deh + 1), name, namelen);
+    /* padd by 0s to the 4 byte boundary */
+    padd_item ((char *)(deh + 1), ROUND_UP (namelen), namelen);
+
+    /* entry is ready to be pasted into tree, set 'visibility' and 'stat data in entry' attributes */
+    mark_de_without_sd (deh);
+    visible ? mark_de_visible (deh) : mark_de_hidden (deh);
+
+    /* find the proper place for the new entry */
+    memset (bit_string, 0, sizeof (bit_string));
+    de.de_gen_number_bit_string = (char *)bit_string;
+    if (reiserfs_find_entry (dir, name, namelen, &path, &de) == NAME_FOUND) {
+	if (buffer != small_buf)
+	    reiserfs_kfree (buffer, buflen, dir->i_sb);
+	pathrelse (&path);
+	return -EEXIST;
+    }
+
+    if (find_first_nonzero_bit (bit_string, MAX_GENERATION_NUMBER + 1) < MAX_GENERATION_NUMBER + 1) {
+	/* there are few names with given hash value */
+	gen_number = find_first_zero_bit (bit_string, MAX_GENERATION_NUMBER + 1);
+	if (gen_number > MAX_GENERATION_NUMBER) {
+	    /* there is no free generation number */
+	    reiserfs_warning ("reiserfs_add_entry: Congratulations! we have got hash function screwed up\n");
+	    if (buffer != small_buf)
+		reiserfs_kfree (buffer, buflen, dir->i_sb);
+	    pathrelse (&path);
+	    return -EHASHCOLLISION;//EBADSLT
+	}
+	/* adjust offset of directory enrty */
+	deh->deh_offset = cpu_to_le32 (SET_GENERATION_NUMBER (deh_offset (deh), gen_number));
+	set_cpu_key_k_offset (&entry_key, le32_to_cpu (deh->deh_offset));
+
+	/* find place for new entry */
+	if (search_by_entry_key (dir->i_sb, &entry_key, &path, &de) == NAME_FOUND) {
+	    reiserfs_warning ("vs-7032: reiserfs_add_entry: "
+			      "entry with this key (%k) already exists", &entry_key);
+	    if (buffer != small_buf)
+		reiserfs_kfree (buffer, buflen, dir->i_sb);
+	    pathrelse (&path);
+	    return -EHASHCOLLISION;
+	}
+    } else {
+	deh->deh_offset = cpu_to_le32 (SET_GENERATION_NUMBER (le32_to_cpu (deh->deh_offset), 0));
+	set_cpu_key_k_offset (&entry_key, le32_to_cpu (deh->deh_offset));
+    }
+  
+    /* perform the insertion of the entry that we have prepared */
+    retval = reiserfs_paste_into_item (th, &path, &entry_key, buffer, paste_size);
+    if (buffer != small_buf)
+	reiserfs_kfree (buffer, buflen, dir->i_sb);
+    if (retval) {
+	reiserfs_check_path(&path) ;
+	return retval;
+    }
+
+    dir->i_size += paste_size;
+    dir->i_blocks = ((dir->i_size + 511) >> 9);
+    dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+    if (!S_ISDIR (inode->i_mode) && visible)
+	// reiserfs_mkdir or reiserfs_rename will do that by itself
+	reiserfs_update_sd (th, dir);
+
+    reiserfs_check_path(&path) ;
+    return 0;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode)
+{
+    int retval;
+    struct inode * inode;
+    int windex ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 ;
+    struct reiserfs_transaction_handle th ;
+
+
+    inode = get_empty_inode() ;
+    if (!inode) {
+	return -ENOMEM ;
+    }
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    th.t_caller = "create" ;
+    windex = push_journal_writer("reiserfs_create") ;
+    inode = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode, &retval);
+    if (!inode) {
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	return retval;
+    }
+	
+    inode->i_op = &reiserfs_file_inode_operations;
+    inode->i_fop = &reiserfs_file_operations;
+    inode->i_mapping->a_ops = &reiserfs_address_space_operations ;
+
+    retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, 
+				inode, 1/*visible*/);
+    if (retval) {
+	inode->i_nlink--;
+	reiserfs_update_sd (&th, inode);
+	pop_journal_writer(windex) ;
+	// FIXME: should we put iput here and have stat data deleted
+	// in the same transactioin
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	iput (inode);
+	return retval;
+    }
+
+    d_instantiate(dentry, inode);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    return 0;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode, int rdev)
+{
+    int retval;
+    struct inode * inode;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+
+    inode = get_empty_inode() ;
+    if (!inode) {
+	return -ENOMEM ;
+    }
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_mknod") ;
+
+    inode = reiserfs_new_inode (&th, dir, mode, 0, 0/*i_size*/, dentry, inode, &retval);
+    if (!inode) {
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	return retval;
+    }
+
+    init_special_inode(inode, mode, rdev) ;
+
+    //FIXME: needed for block and char devices only
+    reiserfs_update_sd (&th, inode);
+
+    retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, 
+				 inode, 1/*visible*/);
+    if (retval) {
+	inode->i_nlink--;
+	reiserfs_update_sd (&th, inode);
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	iput (inode);
+	return retval;
+    }
+
+    d_instantiate(dentry, inode);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    return 0;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode)
+{
+    int retval;
+    struct inode * inode;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+
+    inode = get_empty_inode() ;
+    if (!inode) {
+	return -ENOMEM ;
+    }
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_mkdir") ;
+
+    /* inc the link count now, so another writer doesn't overflow it while
+    ** we sleep later on.
+    */
+    INC_DIR_INODE_NLINK(dir)
+
+    mode = S_IFDIR | mode;
+    inode = reiserfs_new_inode (&th, dir, mode, 0/*symlink*/,
+				old_format_only (dir->i_sb) ? EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
+				dentry, inode, &retval);
+    if (!inode) {
+	pop_journal_writer(windex) ;
+	dir->i_nlink-- ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	return retval;
+    }
+
+    inode->i_op = &reiserfs_dir_inode_operations;
+    inode->i_fop = &reiserfs_dir_operations;
+
+    // note, _this_ add_entry will not update dir's stat data
+    retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, 
+				inode, 1/*visible*/);
+    if (retval) {
+	inode->i_nlink = 0;
+	DEC_DIR_INODE_NLINK(dir);
+	reiserfs_update_sd (&th, inode);
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	iput (inode);
+	return retval;
+    }
+
+    // the above add_entry did not update dir's stat data
+    reiserfs_update_sd (&th, dir);
+
+    d_instantiate(dentry, inode);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    return 0;
+}
+
+static inline int reiserfs_empty_dir(struct inode *inode) {
+    /* we can cheat because an old format dir cannot have
+    ** EMPTY_DIR_SIZE, and a new format dir cannot have
+    ** EMPTY_DIR_SIZE_V1.  So, if the inode is either size, 
+    ** regardless of disk format version, the directory is empty.
+    */
+    if (inode->i_size != EMPTY_DIR_SIZE &&
+        inode->i_size != EMPTY_DIR_SIZE_V1) {
+        return 0 ;
+    }
+    return 1 ;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_rmdir (struct inode * dir, struct dentry *dentry)
+{
+    int retval;
+    struct inode * inode;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+    INITIALIZE_PATH (path);
+    struct reiserfs_dir_entry de;
+
+
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_rmdir") ;
+
+    de.de_gen_number_bit_string = 0;
+    if (reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path, &de) == NAME_NOT_FOUND) {
+	retval = -ENOENT;
+	goto end_rmdir;
+    }
+    inode = dentry->d_inode;
+
+    if (de.de_objectid != inode->i_ino) {
+	// FIXME: compare key of an object and a key found in the
+	// entry
+	retval = -EIO;
+	goto end_rmdir;
+    }
+    if (!reiserfs_empty_dir(inode)) {
+	retval = -ENOTEMPTY;
+	goto end_rmdir;
+    }
+
+    /* cut entry from dir directory */
+    retval = reiserfs_cut_from_item (&th, &path, &(de.de_entry_key), dir, 
+                                     NULL, /* page */ 
+				     0/*new file size - not used here*/);
+    if (retval < 0)
+	goto end_rmdir;
+
+    if ( inode->i_nlink != 2 && inode->i_nlink != 1 )
+	printk ("reiserfs_rmdir: empty directory has nlink != 2 (%d)\n", inode->i_nlink);
+
+    inode->i_nlink = 0;
+    inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+    reiserfs_update_sd (&th, inode);
+
+    DEC_DIR_INODE_NLINK(dir)
+    dir->i_size -= (DEH_SIZE + de.de_entrylen);
+    dir->i_blocks = ((dir->i_size + 511) >> 9);
+    reiserfs_update_sd (&th, dir);
+
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    reiserfs_check_path(&path) ;
+    return 0;
+	
+ end_rmdir:
+    /* we must release path, because we did not call
+       reiserfs_cut_from_item, or reiserfs_cut_from_item does not
+       release path if operation was not complete */
+    pathrelse (&path);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    return retval;	
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_unlink (struct inode * dir, struct dentry *dentry)
+{
+    int retval;
+    struct inode * inode;
+    struct reiserfs_dir_entry de;
+    INITIALIZE_PATH (path);
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_unlink") ;
+	
+    de.de_gen_number_bit_string = 0;
+    if (reiserfs_find_entry (dir, dentry->d_name.name, dentry->d_name.len, &path, &de) == NAME_NOT_FOUND) {
+	retval = -ENOENT;
+	goto end_unlink;
+    }
+    inode = dentry->d_inode;
+
+    if (de.de_objectid != inode->i_ino) {
+	// FIXME: compare key of an object and a key found in the
+	// entry
+	retval = -EIO;
+	goto end_unlink;
+    }
+  
+    if (!inode->i_nlink) {
+	printk("reiserfs_unlink: deleting nonexistent file (%s:%lu), %d\n",
+	       kdevname(inode->i_dev), inode->i_ino, inode->i_nlink);
+	inode->i_nlink = 1;
+    }
+
+    retval = reiserfs_cut_from_item (&th, &path, &(de.de_entry_key), dir, NULL, 0);
+    if (retval < 0)
+	goto end_unlink;
+
+    inode->i_nlink--;
+    inode->i_ctime = CURRENT_TIME;
+    reiserfs_update_sd (&th, inode);
+
+    dir->i_size -= (de.de_entrylen + DEH_SIZE);
+    dir->i_blocks = ((dir->i_size + 511) >> 9);
+    dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+    reiserfs_update_sd (&th, dir);
+
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    reiserfs_check_path(&path) ;
+    return 0;
+
+ end_unlink:
+    pathrelse (&path);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    reiserfs_check_path(&path) ;
+    return retval;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_symlink (struct inode * dir, struct dentry * dentry, const char * symname)
+{
+    int retval;
+    struct inode * inode;
+    char * name;
+    int item_len;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+
+
+    inode = get_empty_inode() ;
+    if (!inode) {
+	return -ENOMEM ;
+    }
+
+    item_len = ROUND_UP (strlen (symname));
+    if (item_len > MAX_ITEM_LEN (dir->i_sb->s_blocksize)) {
+	iput(inode) ;
+	return -ENAMETOOLONG;
+    }
+  
+    name = kmalloc (item_len, GFP_BUFFER);
+    if (!name) {
+	iput(inode) ;
+	return -ENOMEM;
+    }
+    memcpy (name, symname, strlen (symname));
+    padd_item (name, item_len, strlen (symname));
+
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_symlink") ;
+
+    inode = reiserfs_new_inode (&th, dir, S_IFLNK | S_IRWXUGO, name, strlen (symname), dentry,
+				inode, &retval);
+    kfree (name);
+    if (inode == 0) { /* reiserfs_new_inode iputs for us */
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	return retval;
+    }
+
+    inode->i_op = &page_symlink_inode_operations;
+    inode->i_mapping->a_ops = &reiserfs_address_space_operations;
+
+    // must be sure this inode is written with this transaction
+    //
+    //reiserfs_update_sd (&th, inode, READ_BLOCKS);
+
+    retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len, 
+				 inode, 1/*visible*/);
+    if (retval) {
+	inode->i_nlink--;
+	reiserfs_update_sd (&th, inode);
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	iput (inode);
+	return retval;
+    }
+
+    d_instantiate(dentry, inode);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    return 0;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct dentry * dentry)
+{
+    int retval;
+    struct inode *inode = old_dentry->d_inode;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+
+
+    if (S_ISDIR(inode->i_mode))
+	return -EPERM;
+  
+    if (inode->i_nlink >= REISERFS_LINK_MAX) {
+	//FIXME: sd_nlink is 32 bit for new files
+	return -EMLINK;
+    }
+
+    journal_begin(&th, dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_link") ;
+
+    /* create new entry */
+    retval = reiserfs_add_entry (&th, dir, dentry->d_name.name, dentry->d_name.len,
+				 inode, 1/*visible*/);
+    if (retval) {
+	pop_journal_writer(windex) ;
+	journal_end(&th, dir->i_sb, jbegin_count) ;
+	return retval;
+    }
+
+    inode->i_nlink++;
+    inode->i_ctime = CURRENT_TIME;
+    reiserfs_update_sd (&th, inode);
+
+    atomic_inc(&inode->i_count) ;
+    d_instantiate(dentry, inode);
+    pop_journal_writer(windex) ;
+    journal_end(&th, dir->i_sb, jbegin_count) ;
+    return 0;
+}
+
+
+// de contains information pointing to an entry which 
+static int de_still_valid (const char * name, int len, struct reiserfs_dir_entry * de)
+{
+    struct reiserfs_dir_entry tmp = *de;
+    
+    // recalculate pointer to name and name length
+    set_de_name_and_namelen (&tmp);
+    // FIXME: could check more
+    if (tmp.de_namelen != len || memcmp (name, de->de_name, len))
+	return 0;
+    return 1;
+}
+
+
+static int entry_points_to_object (const char * name, int len, struct reiserfs_dir_entry * de, struct inode * inode)
+{
+    if (!de_still_valid (name, len, de))
+	return 0;
+
+    if (inode) {
+	if (!de_visible (de->de_deh + de->de_entry_num))
+	    reiserfs_panic (0, "vs-7042: entry_points_to_object: entry must be visible");
+	return (de->de_objectid == inode->i_ino) ? 1 : 0;
+    }
+
+    /* this must be added hidden entry */
+    if (de_visible (de->de_deh + de->de_entry_num))
+	reiserfs_panic (0, "vs-7043: entry_points_to_object: entry must be visible");
+
+    return 1;
+}
+
+
+/* sets key of objectid the entry has to point to */
+static void set_ino_in_dir_entry (struct reiserfs_dir_entry * de, struct key * key)
+{
+    de->de_deh[de->de_entry_num].deh_dir_id = key->k_dir_id;
+    de->de_deh[de->de_entry_num].deh_objectid = key->k_objectid;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+
+/* 
+ * process, that is going to call fix_nodes/do_balance must hold only
+ * one path. If it holds 2 or more, it can get into endless waiting in
+ * get_empty_nodes or its clones 
+ */
+int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry,
+		     struct inode * new_dir, struct dentry *new_dentry)
+{
+    int retval;
+    INITIALIZE_PATH (old_entry_path);
+    INITIALIZE_PATH (new_entry_path);
+    INITIALIZE_PATH (dot_dot_entry_path);
+    struct item_head new_entry_ih, old_entry_ih ;
+    struct reiserfs_dir_entry old_de, new_de, dot_dot_de;
+    struct inode * old_inode, * new_inode;
+    int windex ;
+    struct reiserfs_transaction_handle th ;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; 
+
+
+    old_inode = old_dentry->d_inode;
+    new_inode = new_dentry->d_inode;
+
+    // make sure, that oldname still exists and points to an object we
+    // are going to rename
+    old_de.de_gen_number_bit_string = 0;
+    retval = reiserfs_find_entry (old_dir, old_dentry->d_name.name, old_dentry->d_name.len,
+				  &old_entry_path, &old_de);
+    pathrelse (&old_entry_path);
+    if (retval != NAME_FOUND || old_de.de_objectid != old_inode->i_ino) {
+	// FIXME: IO error is possible here
+	return -ENOENT;
+    }
+
+    if (S_ISDIR(old_inode->i_mode)) {
+	// make sure, that directory being renamed has correct ".." 
+	// and that its new parent directory has not too many links
+	// already
+
+	if (new_inode) {
+	    if (!reiserfs_empty_dir(new_inode)) {
+		return -ENOTEMPTY;
+	    }
+	}
+	
+	/* directory is renamed, its parent directory will be changed, 
+	** so find ".." entry 
+	*/
+	dot_dot_de.de_gen_number_bit_string = 0;
+	retval = reiserfs_find_entry (old_inode, "..", 2, &dot_dot_entry_path, &dot_dot_de);
+	pathrelse (&dot_dot_entry_path);
+	if (retval != NAME_FOUND)
+	    return -EIO;
+
+	/* inode number of .. must equal old_dir->i_ino */
+	if (dot_dot_de.de_objectid != old_dir->i_ino)
+	    return -EIO;
+    }
+
+    journal_begin(&th, old_dir->i_sb, jbegin_count) ;
+    windex = push_journal_writer("reiserfs_rename") ;
+
+    /* add new entry (or find the existing one) */
+    retval = reiserfs_add_entry (&th, new_dir, new_dentry->d_name.name, new_dentry->d_name.len, 
+				 old_inode, 0);
+    if (retval == -EEXIST) {
+	// FIXME: is it possible, that new_inode == 0 here? If yes, it
+	// is not clear how does ext2 handle that
+	if (!new_inode) {
+	    printk ("reiserfs_rename: new entry is found, new inode == 0\n");
+	    BUG ();
+	}
+    } else if (retval) {
+	pop_journal_writer(windex) ;
+	journal_end(&th, old_dir->i_sb, jbegin_count) ;
+	return retval;
+    }
+
+
+    while (1) {
+	// look for old name using corresponding entry key (found by reiserfs_find_entry)
+	if (search_by_entry_key (new_dir->i_sb, &old_de.de_entry_key, &old_entry_path, &old_de) != NAME_FOUND)
+	    BUG ();
+
+	copy_item_head(&old_entry_ih, get_ih(&old_entry_path)) ;
+
+	// look for new name by reiserfs_find_entry
+	new_de.de_gen_number_bit_string = 0;
+	retval = reiserfs_find_entry (new_dir, new_dentry->d_name.name, new_dentry->d_name.len, 
+				      &new_entry_path, &new_de);
+	if (retval != NAME_FOUND_INVISIBLE && retval != NAME_FOUND)
+	    BUG ();
+
+	copy_item_head(&new_entry_ih, get_ih(&new_entry_path)) ;
+
+	reiserfs_prepare_for_journal(old_inode->i_sb, new_de.de_bh, 1) ;
+
+	if (S_ISDIR(old_inode->i_mode)) {
+	    if (search_by_entry_key (new_dir->i_sb, &dot_dot_de.de_entry_key, &dot_dot_entry_path, &dot_dot_de) != NAME_FOUND)
+		BUG ();
+	    // node containing ".." gets into transaction
+	    reiserfs_prepare_for_journal(old_inode->i_sb, dot_dot_de.de_bh, 1) ;
+	}
+				/* we should check seals here, not do
+                                   this stuff, yes? Then, having
+                                   gathered everything into RAM we
+                                   should lock the buffers, yes?  -Hans */
+				/* probably.  our rename needs to hold more 
+				** than one path at once.  The seals would 
+				** have to be written to deal with multi-path 
+				** issues -chris
+				*/
+	/* sanity checking before doing the rename - avoid races many
+	** of the above checks could have scheduled.  We have to be
+	** sure our items haven't been shifted by another process.
+	*/
+	if (!entry_points_to_object(new_dentry->d_name.name, 
+	                            new_dentry->d_name.len,
+				    &new_de, new_inode) ||
+	    item_moved(&new_entry_ih, &new_entry_path) ||
+	    item_moved(&old_entry_ih, &old_entry_path) || 
+	    !entry_points_to_object (old_dentry->d_name.name, 
+	                             old_dentry->d_name.len,
+				     &old_de, old_inode)) {
+	    reiserfs_restore_prepared_buffer (old_inode->i_sb, new_de.de_bh);
+	    if (S_ISDIR(old_inode->i_mode))
+		reiserfs_restore_prepared_buffer (old_inode->i_sb, dot_dot_de.de_bh);
+#if 0
+	    // FIXME: do we need this? shouldn't we simply continue?
+	    run_task_queue(&tq_disk);
+	    current->policy |= SCHED_YIELD;
+	    /*current->counter = 0;*/
+	    schedule();
+#endif
+	    continue;
+	}
+
+#ifdef CONFIG_REISERFS_CHECK
+	if (S_ISDIR(old_inode->i_mode) && 
+	    (!entry_points_to_object ("..", 2, &dot_dot_de, old_dir) || 
+	     !reiserfs_buffer_prepared(dot_dot_de.de_bh))) {
+	    // this should be not changed
+	    BUG ();
+	}
+#endif	
+
+	break;
+    }
+
+    /* ok, all the changes can be done in one fell swoop when we
+       have claimed all the buffers needed.*/
+    
+    mark_de_visible (new_de.de_deh + new_de.de_entry_num);
+    set_ino_in_dir_entry (&new_de, INODE_PKEY (old_inode));
+    journal_mark_dirty (&th, old_dir->i_sb, new_de.de_bh);
+
+    mark_de_hidden (old_de.de_deh + old_de.de_entry_num);
+    old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+    new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME;
+
+    if (new_inode) {
+	// adjust link number of the victim
+	if (S_ISDIR(new_inode->i_mode)) {
+	  DEC_DIR_INODE_NLINK(new_inode)
+	} else {
+	  new_inode->i_nlink--;
+	}
+	new_inode->i_ctime = CURRENT_TIME;
+    }
+
+    if (S_ISDIR(old_inode->i_mode)) {
+      //if (dot_dot_de.de_bh) {
+	// adjust ".." of renamed directory
+	set_ino_in_dir_entry (&dot_dot_de, INODE_PKEY (new_dir));
+	journal_mark_dirty (&th, new_dir->i_sb, dot_dot_de.de_bh);
+
+	DEC_DIR_INODE_NLINK(old_dir)
+	if (new_inode) {
+	    if (S_ISDIR(new_inode->i_mode)) {
+		DEC_DIR_INODE_NLINK(new_inode)
+	    } else {
+	        new_inode->i_nlink--;
+	    }
+	} else {
+	    INC_DIR_INODE_NLINK(new_dir)
+	}
+    }
+
+    // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse
+    pathrelse (&new_entry_path);
+    pathrelse (&dot_dot_entry_path);
+
+    // FIXME: this reiserfs_cut_from_item's return value may screw up
+    // anybody, but it will panic if will not be able to find the
+    // entry. This needs one more clean up
+    if (reiserfs_cut_from_item (&th, &old_entry_path, &(old_de.de_entry_key), old_dir, NULL, 0) < 0)
+	reiserfs_warning ("vs-: reiserfs_rename: coudl not cut old name. Fsck later?\n");
+
+    old_dir->i_size -= DEH_SIZE + old_de.de_entrylen;
+    old_dir->i_blocks = ((old_dir->i_size + 511) >> 9);
+
+    reiserfs_update_sd (&th, old_dir);
+    reiserfs_update_sd (&th, new_dir);
+    if (new_inode)
+	reiserfs_update_sd (&th, new_inode);
+
+    pop_journal_writer(windex) ;
+    journal_end(&th, old_dir->i_sb, jbegin_count) ;
+    return 0;
+}
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/objectid.c linux/fs/reiserfs/objectid.c
--- v2.4.0/linux/fs/reiserfs/objectid.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/objectid.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/sched.h>
+#include <linux/reiserfs_fs.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+
+// find where objectid map starts
+#define objectid_map(s,rs) (old_format_only (s) ? \
+                         (__u32 *)((struct reiserfs_super_block_v1 *)rs + 1) :\
+			 (__u32 *)(rs + 1))
+
+
+#ifdef CONFIG_REISERFS_CHECK
+
+static void check_objectid_map (struct super_block * s, __u32 * map)
+{
+    if (le32_to_cpu (map[0]) != 1)
+	reiserfs_panic (s, "vs-15010: check_objectid_map: map corrupted");
+
+    // FIXME: add something else here
+}
+
+#endif
+
+
+/* When we allocate objectids we allocate the first unused objectid.
+   Each sequence of objectids in use (the odd sequences) is followed
+   by a sequence of objectids not in use (the even sequences).  We
+   only need to record the last objectid in each of these sequences
+   (both the odd and even sequences) in order to fully define the
+   boundaries of the sequences.  A consequence of allocating the first
+   objectid not in use is that under most conditions this scheme is
+   extremely compact.  The exception is immediately after a sequence
+   of operations which deletes a large number of objects of
+   non-sequential objectids, and even then it will become compact
+   again as soon as more objects are created.  Note that many
+   interesting optimizations of layout could result from complicating
+   objectid assignment, but we have deferred making them for now. */
+
+
+/* get unique object identifier */
+__u32 reiserfs_get_unused_objectid (struct reiserfs_transaction_handle *th)
+{
+    struct super_block * s = th->t_super;
+    struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s);
+    __u32 * map = objectid_map (s, rs);
+    __u32 unused_objectid;
+
+
+#ifdef CONFIG_REISERFS_CHECK
+    check_objectid_map (s, map);
+#endif
+
+    reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ;
+                                /* comment needed -Hans */
+    unused_objectid = le32_to_cpu (map[1]);
+    if (unused_objectid == U32_MAX) {
+	printk ("REISERFS: get_objectid: no more object ids\n");
+	reiserfs_restore_prepared_buffer(s, SB_BUFFER_WITH_SB(s)) ;
+	return 0;
+    }
+
+    /* This incrementation allocates the first unused objectid. That
+       is to say, the first entry on the objectid map is the first
+       unused objectid, and by incrementing it we use it.  See below
+       where we check to see if we eliminated a sequence of unused
+       objectids.... */
+    map[1] = cpu_to_le32 (unused_objectid + 1);
+
+    /* Now we check to see if we eliminated the last remaining member of
+       the first even sequence (and can eliminate the sequence by
+       eliminating its last objectid from oids), and can collapse the
+       first two odd sequences into one sequence.  If so, then the net
+       result is to eliminate a pair of objectids from oids.  We do this
+       by shifting the entire map to the left. */
+    if (le16_to_cpu (rs->s_oid_cursize) > 2 && map[1] == map[2]) {
+	memmove (map + 1, map + 3, (le16_to_cpu (rs->s_oid_cursize) - 3) * sizeof(__u32));
+	//rs->s_oid_cursize -= 2;
+	rs->s_oid_cursize = cpu_to_le16 (le16_to_cpu (rs->s_oid_cursize) - 2);
+    }
+
+    journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s));
+    s->s_dirt = 1;
+    return unused_objectid;
+}
+
+
+/* makes object identifier unused */
+void reiserfs_release_objectid (struct reiserfs_transaction_handle *th, 
+				__u32 objectid_to_release)
+{
+    struct super_block * s = th->t_super;
+    struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s);
+    __u32 * map = objectid_map (s, rs);
+    int i = 0;
+
+    //return;
+#ifdef CONFIG_REISERFS_CHECK
+    check_objectid_map (s, map);
+#endif
+
+    reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ;
+    journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s)); 
+    s->s_dirt = 1;
+
+
+    /* start at the beginning of the objectid map (i = 0) and go to
+       the end of it (i = disk_sb->s_oid_cursize).  Linear search is
+       what we use, though it is possible that binary search would be
+       more efficient after performing lots of deletions (which is
+       when oids is large.)  We only check even i's. */
+    while (i < le16_to_cpu (rs->s_oid_cursize)) {
+	if (objectid_to_release == le32_to_cpu (map[i])) {
+	    /* This incrementation unallocates the objectid. */
+	    //map[i]++;
+	    map[i] = cpu_to_le32 (le32_to_cpu (map[i]) + 1);
+
+	    /* Did we unallocate the last member of an odd sequence, and can shrink oids? */
+	    if (map[i] == map[i+1]) {
+		/* shrink objectid map */
+		memmove (map + i, map + i + 2, 
+			 (le16_to_cpu (rs->s_oid_cursize) - i - 2) * sizeof (__u32));
+		//disk_sb->s_oid_cursize -= 2;
+		rs->s_oid_cursize = cpu_to_le16 (le16_to_cpu (rs->s_oid_cursize) - 2);
+
+#ifdef CONFIG_REISERFS_CHECK
+		if (le16_to_cpu (rs->s_oid_cursize) < 2 || 
+		    le16_to_cpu (rs->s_oid_cursize) > le16_to_cpu (rs->s_oid_maxsize))
+		    reiserfs_panic (s, "vs-15005: reiserfs_release_objectid: "
+				    "objectid map corrupted cur_size == %d (max == %d)",
+				    le16_to_cpu (rs->s_oid_cursize), le16_to_cpu (rs->s_oid_maxsize));
+#endif
+	    }
+	    return;
+	}
+
+	if (objectid_to_release > le32_to_cpu (map[i]) && 
+	    objectid_to_release < le32_to_cpu (map[i + 1])) {
+	    /* size of objectid map is not changed */
+	    if (objectid_to_release + 1 == le32_to_cpu (map[i + 1])) {
+		//objectid_map[i+1]--;
+		map[i + 1] = cpu_to_le32 (le32_to_cpu (map[i + 1]) - 1);
+		return;
+	    }
+
+	    if (rs->s_oid_cursize == rs->s_oid_maxsize)
+		/* objectid map must be expanded, but there is no space */
+		return;
+
+	    /* expand the objectid map*/
+	    memmove (map + i + 3, map + i + 1, 
+		     (le16_to_cpu (rs->s_oid_cursize) - i - 1) * sizeof(__u32));
+	    map[i + 1] = cpu_to_le32 (objectid_to_release);
+	    map[i + 2] = cpu_to_le32 (objectid_to_release + 1);
+	    rs->s_oid_cursize = cpu_to_le16 (le16_to_cpu (rs->s_oid_cursize) + 2);
+	    return;
+	}
+	i += 2;
+    }
+
+    reiserfs_warning ("vs-15010: reiserfs_release_objectid: tried to free free object id (%lu)", 
+		      objectid_to_release);
+}
+
+
+int reiserfs_convert_objectid_map_v1(struct super_block *s) {
+    struct reiserfs_super_block *disk_sb = SB_DISK_SUPER_BLOCK (s);
+    int cur_size = le16_to_cpu(disk_sb->s_oid_cursize) ;
+    int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2 ;
+    int old_max = le16_to_cpu(disk_sb->s_oid_maxsize) ;
+    struct reiserfs_super_block_v1 *disk_sb_v1 ;
+    __u32 *objectid_map, *new_objectid_map ;
+    int i ;
+
+    disk_sb_v1=(struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data);
+    objectid_map = (__u32 *)(disk_sb_v1 + 1) ;
+    new_objectid_map = (__u32 *)(disk_sb + 1) ;
+
+    if (cur_size > new_size) {
+	/* mark everyone used that was listed as free at the end of the objectid
+	** map 
+	*/
+	objectid_map[new_size - 1] = objectid_map[cur_size - 1] ;
+	disk_sb->s_oid_cursize = cpu_to_le16(new_size) ;
+    }
+    /* move the smaller objectid map past the end of the new super */
+    for (i = new_size - 1 ; i >= 0 ; i--) {
+        objectid_map[i + (old_max - new_size)] = objectid_map[i] ; 
+    }
+
+
+    /* set the max size so we don't overflow later */
+    disk_sb->s_oid_maxsize = cpu_to_le16(new_size) ;
+
+    /* finally, zero out the unused chunk of the new super */
+    memset(disk_sb->s_unused, 0, sizeof(disk_sb->s_unused)) ;
+    return 0 ;
+}
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/prints.c linux/fs/reiserfs/prints.c
--- v2.4.0/linux/fs/reiserfs/prints.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/prints.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,881 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/string.h>
+
+#else
+
+#include "nokernel.h"
+#include <limits.h>
+
+#endif
+
+#include <stdarg.h>
+
+static char error_buf[1024];
+static char fmt_buf[1024];
+static char off_buf[80];
+
+
+static char * cpu_offset (struct cpu_key * key)
+{
+  if (cpu_key_k_type(key) == TYPE_DIRENTRY)
+    sprintf (off_buf, "%Lu(%Lu)", 
+	     (unsigned long long)GET_HASH_VALUE (cpu_key_k_offset (key)),
+	     (unsigned long long)GET_GENERATION_NUMBER (cpu_key_k_offset (key)));
+  else
+    sprintf (off_buf, "0x%Lx", (unsigned long long)cpu_key_k_offset (key));
+  return off_buf;
+}
+
+
+static char * le_offset (struct key * key)
+{
+  int version;
+
+  version = le_key_version (key);
+  if (le_key_k_type (version, key) == TYPE_DIRENTRY)
+    sprintf (off_buf, "%Lu(%Lu)", 
+	     (unsigned long long)GET_HASH_VALUE (le_key_k_offset (version, key)),
+	     (unsigned long long)GET_GENERATION_NUMBER (le_key_k_offset (version, key)));
+  else
+    sprintf (off_buf, "0x%Lx", (unsigned long long)le_key_k_offset (version, key));
+  return off_buf;
+}
+
+
+static char * cpu_type (struct cpu_key * key)
+{
+    if (cpu_key_k_type (key) == TYPE_STAT_DATA)
+	return "SD";
+    if (cpu_key_k_type (key) == TYPE_DIRENTRY)
+	return "DIR";
+    if (cpu_key_k_type (key) == TYPE_DIRECT)
+	return "DIRECT";
+    if (cpu_key_k_type (key) == TYPE_INDIRECT)
+	return "IND";
+    return "UNKNOWN";
+}
+
+
+static char * le_type (struct key * key)
+{
+    int version;
+    
+    version = le_key_version (key);
+
+    if (le_key_k_type (version, key) == TYPE_STAT_DATA)
+	return "SD";
+    if (le_key_k_type (version, key) == TYPE_DIRENTRY)
+	return "DIR";
+    if (le_key_k_type (version, key) == TYPE_DIRECT)
+	return "DIRECT";
+    if (le_key_k_type (version, key) == TYPE_INDIRECT)
+	return "IND";
+    return "UNKNOWN";
+}
+
+
+/* %k */
+static void sprintf_le_key (char * buf, struct key * key)
+{
+  if (key)
+    sprintf (buf, "[%d %d %s %s]", le32_to_cpu (key->k_dir_id),
+	     le32_to_cpu (key->k_objectid), le_offset (key), le_type (key));
+  else
+    sprintf (buf, "[NULL]");
+}
+
+
+/* %K */
+static void sprintf_cpu_key (char * buf, struct cpu_key * key)
+{
+  if (key)
+    sprintf (buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id,
+	     key->on_disk_key.k_objectid, cpu_offset (key), cpu_type (key));
+  else
+    sprintf (buf, "[NULL]");
+}
+
+
+static void sprintf_item_head (char * buf, struct item_head * ih)
+{
+    if (ih) {
+	sprintf (buf, "%s", (ih_version (ih) == ITEM_VERSION_2) ? "*NEW* " : "*OLD*");
+	sprintf_le_key (buf + strlen (buf), &(ih->ih_key));
+	sprintf (buf + strlen (buf), ", item_len %d, item_location %d, "
+		 "free_space(entry_count) %d",
+		 ih->ih_item_len, ih->ih_item_location, ih_free_space (ih));
+    } else
+	sprintf (buf, "[NULL]");
+}
+
+
+static void sprintf_direntry (char * buf, struct reiserfs_dir_entry * de)
+{
+  char name[20];
+
+  memcpy (name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen);
+  name [de->de_namelen > 19 ? 19 : de->de_namelen] = 0;
+  sprintf (buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid);
+}
+
+
+static void sprintf_block_head (char * buf, struct buffer_head * bh)
+{
+  sprintf (buf, "level=%d, nr_items=%d, free_space=%d rdkey ",
+	   B_LEVEL (bh), B_NR_ITEMS (bh), B_FREE_SPACE (bh));
+#if 0
+  if (B_LEVEL (bh) == DISK_LEAF_NODE_LEVEL)
+    sprintf_le_key (buf + strlen (buf), B_PRIGHT_DELIM_KEY (bh));
+#endif
+}
+
+
+static void sprintf_buffer_head (char * buf, struct buffer_head * bh) 
+{
+  sprintf (buf, "dev %s, size %d, blocknr %ld, count %d, list %d, state 0x%lx, page %p, (%s, %s, %s)",
+	   kdevname (bh->b_dev), bh->b_size, bh->b_blocknr, atomic_read (&(bh->b_count)), bh->b_list,
+	   bh->b_state, bh->b_page,
+	   buffer_uptodate (bh) ? "UPTODATE" : "!UPTODATE",
+	   buffer_dirty (bh) ? "DIRTY" : "CLEAN",
+	   buffer_locked (bh) ? "LOCKED" : "UNLOCKED");
+}
+
+
+static void sprintf_disk_child (char * buf, struct disk_child * dc)
+{
+  sprintf (buf, "[dc_number=%d, dc_size=%u]", dc->dc_block_number, dc->dc_size);
+}
+
+
+static char * is_there_reiserfs_struct (char * fmt, int * what, int * skip)
+{
+  char * k = fmt;
+
+  *skip = 0;
+  
+  while (1) {
+    k = strstr (k, "%");
+    if (!k)
+      break;
+    if (k && (k[1] == 'k' || k[1] == 'K' || k[1] == 'h' || k[1] == 't' ||
+	      k[1] == 'z' || k[1] == 'b' || k[1] == 'y')) {
+      *what = k[1];
+      break;
+    }
+    (*skip) ++;
+    k ++;
+  }
+  return k;
+}
+
+
+/* debugging reiserfs we used to print out a lot of different
+   variables, like keys, item headers, buffer heads etc. Values of
+   most fields matter. So it took a long time just to write
+   appropriative printk. With this reiserfs_warning you can use format
+   specification for complex structures like you used to do with
+   printfs for integers, doubles and pointers. For instance, to print
+   out key structure you have to write just: 
+   reiserfs_warning ("bad key %k", key); 
+   instead of 
+   printk ("bad key %lu %lu %lu %lu", key->k_dir_id, key->k_objectid, 
+           key->k_offset, key->k_uniqueness); 
+*/
+
+#define do_reiserfs_warning \
+{\
+  char * fmt1 = fmt_buf;\
+  va_list args;\
+  int i, j;\
+  char * k;\
+  char * p = error_buf;\
+  int what, skip;\
+\
+  strcpy (fmt1, fmt);\
+  va_start(args, fmt);\
+\
+  while (1) {\
+    k = is_there_reiserfs_struct (fmt1, &what, &skip);\
+    if (k != 0) {\
+      *k = 0;\
+      p += vsprintf (p, fmt1, args);\
+\
+      for (i = 0; i < skip; i ++)\
+	j = va_arg (args, int);\
+\
+      switch (what) {\
+      case 'k':\
+	sprintf_le_key (p, va_arg(args, struct key *));\
+	break;\
+      case 'K':\
+	sprintf_cpu_key (p, va_arg(args, struct cpu_key *));\
+	break;\
+      case 'h':\
+	sprintf_item_head (p, va_arg(args, struct item_head *));\
+	break;\
+      case 't':\
+	sprintf_direntry (p, va_arg(args, struct reiserfs_dir_entry *));\
+	break;\
+      case 'y':\
+	sprintf_disk_child (p, va_arg(args, struct disk_child *));\
+	break;\
+      case 'z':\
+	sprintf_block_head (p, va_arg(args, struct buffer_head *));\
+	break;\
+      case 'b':\
+	sprintf_buffer_head (p, va_arg(args, struct buffer_head *));\
+	break;\
+      }\
+      p += strlen (p);\
+      fmt1 = k + 2;\
+    } else {\
+      i = vsprintf (p, fmt1, args);\
+      break;\
+    }\
+  }\
+\
+  va_end(args);\
+}
+
+
+/* in addition to usual conversion specifiers this accepts reiserfs
+   specific conversion specifiers: 
+   %k to print little endian key, 
+   %K to print cpu key, 
+   %h to print item_head,
+   %t to print directory entry 
+   %z to print block head (arg must be struct buffer_head *
+   %b to print buffer_head
+*/
+void reiserfs_warning (const char * fmt, ...)
+{
+  do_reiserfs_warning;
+  /* console_print (error_buf); */
+  printk ("%s", error_buf);
+}
+
+void reiserfs_debug (struct super_block *s, int level, const char * fmt, ...)
+{
+#ifdef CONFIG_REISERFS_CHECK
+  do_reiserfs_warning;
+  printk ("%s", error_buf);
+#else
+  ; 
+#endif
+}
+
+/* The format:
+
+           maintainer-errorid: [function-name:] message
+
+    where errorid is unique to the maintainer and function-name is
+    optional, is recommended, so that anyone can easily find the bug
+    with a simple grep for the short to type string
+    maintainer-errorid.  Don't bother with reusing errorids, there are
+    lots of numbers out there.
+
+    Example: 
+    
+    reiserfs_panic(
+	p_sb, "reiser-29: reiserfs_new_blocknrs: "
+	"one of search_start or rn(%d) is equal to MAX_B_NUM,"
+	"which means that we are optimizing location based on the bogus location of a temp buffer (%p).", 
+	rn, bh
+    );
+
+    Regular panic()s sometimes clear the screen before the message can
+    be read, thus the need for the while loop.  
+
+    Numbering scheme for panic used by Vladimir and Anatoly( Hans completely ignores this scheme, and considers it
+    pointless complexity):
+
+    panics in reiserfs_fs.h have numbers from 1000 to 1999
+    super.c				        2000 to 2999
+    preserve.c				    3000 to 3999
+    bitmap.c				    4000 to 4999
+    stree.c				        5000 to 5999
+    prints.c				    6000 to 6999
+    namei.c                     7000 to 7999
+    fix_nodes.c                 8000 to 8999
+    dir.c                       9000 to 9999
+	lbalance.c					10000 to 10999
+	ibalance.c		11000 to 11999 not ready
+	do_balan.c		12000 to 12999
+	inode.c			13000 to 13999
+	file.c			14000 to 14999
+    objectid.c                       15000 - 15999
+    buffer.c                         16000 - 16999
+    symlink.c                        17000 - 17999
+
+   .  */
+
+
+#ifdef CONFIG_REISERFS_CHECK
+extern struct tree_balance * cur_tb;
+#endif
+
+void reiserfs_panic (struct super_block * sb, const char * fmt, ...)
+{
+#ifdef __KERNEL__
+  show_reiserfs_locks() ;
+#endif
+  do_reiserfs_warning;
+  printk ("%s", error_buf);
+  BUG ();
+  // console_print (error_buf);
+  // for (;;);
+
+#ifdef __KERNEL__
+
+  /* comment before release */
+  //for (;;);
+
+#if 0 /* this is not needed, the state is ignored */
+  if (sb && !(sb->s_flags & MS_RDONLY)) {
+    sb->u.reiserfs_sb.s_mount_state |= REISERFS_ERROR_FS;
+    sb->u.reiserfs_sb.s_rs->s_state = REISERFS_ERROR_FS;
+    
+    mark_buffer_dirty(sb->u.reiserfs_sb.s_sbh) ;
+    sb->s_dirt = 1;
+  }
+#endif
+
+  /* this is to prevent panic from syncing this filesystem */
+  if (sb && sb->s_lock)
+    sb->s_lock=0;
+  if (sb)
+    sb->s_flags |= MS_RDONLY;
+
+  panic ("REISERFS: panic (device %s): %s\n",
+	 sb ? kdevname(sb->s_dev) : "sb == 0", error_buf);
+#else
+  exit (0);
+#endif
+}
+
+
+void print_virtual_node (struct virtual_node * vn)
+{
+    int i;
+    struct virtual_item * vi;
+
+    printk ("VIRTUAL NODE CONTAINS %d items, has size %d,%s,%s, ITEM_POS=%d POS_IN_ITEM=%d MODE=\'%c\'\n",
+	    vn->vn_nr_item, vn->vn_size,
+	    (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE )? "left mergeable" : "", 
+	    (vn->vn_vi[vn->vn_nr_item - 1].vi_type & VI_TYPE_RIGHT_MERGEABLE) ? "right mergeable" : "",
+	    vn->vn_affected_item_num, vn->vn_pos_in_item, vn->vn_mode);
+    
+    vi = vn->vn_vi;
+    for (i = 0; i < vn->vn_nr_item; i ++, vi ++)
+	op_print_vi (vi);
+	
+}
+
+
+void print_path (struct tree_balance * tb, struct path * path)
+{
+    int h = 0;
+    struct buffer_head * bh;
+    
+    if (tb) {
+	while (tb->insert_size[h]) {
+	    bh = PATH_H_PBUFFER (path, h);
+	    printk ("block %lu (level=%d), position %d\n", bh ? bh->b_blocknr : 0,
+		    bh ? B_LEVEL (bh) : 0, PATH_H_POSITION (path, h));
+	    h ++;
+	}
+  } else {
+      int offset = path->path_length;
+      struct buffer_head * bh;
+      printk ("Offset    Bh     (b_blocknr, b_count) Position Nr_item\n");
+      while ( offset > ILLEGAL_PATH_ELEMENT_OFFSET ) {
+	  bh = PATH_OFFSET_PBUFFER (path, offset);
+	  printk ("%6d %10p (%9lu, %7d) %8d %7d\n", offset, 
+		  bh, bh ? bh->b_blocknr : 0, bh ? atomic_read (&(bh->b_count)) : 0,
+		  PATH_OFFSET_POSITION (path, offset), bh ? B_NR_ITEMS (bh) : -1);
+	  
+	  offset --;
+      }
+  }
+
+}
+
+
+/* this prints internal nodes (4 keys/items in line) (dc_number,
+   dc_size)[k_dirid, k_objectid, k_offset, k_uniqueness](dc_number,
+   dc_size)...*/
+static int print_internal (struct buffer_head * bh, int first, int last)
+{
+    struct key * key;
+    struct disk_child * dc;
+    int i;
+    int from, to;
+    
+    if (!B_IS_KEYS_LEVEL (bh))
+	return 1;
+
+    check_internal (bh);
+    
+    if (first == -1) {
+	from = 0;
+	to = B_NR_ITEMS (bh);
+    } else {
+	from = first;
+	to = last < B_NR_ITEMS (bh) ? last : B_NR_ITEMS (bh);
+    }
+
+    reiserfs_warning ("INTERNAL NODE (%ld) contains %z\n",  bh->b_blocknr, bh);
+    
+    dc = B_N_CHILD (bh, from);
+    reiserfs_warning ("PTR %d: %y ", from, dc);
+    
+    for (i = from, key = B_N_PDELIM_KEY (bh, from), dc ++; i < to; i ++, key ++, dc ++) {
+	reiserfs_warning ("KEY %d: %k PTR %d: %y ", i, key, i + 1, dc);
+	if (i && i % 4 == 0)
+	    printk ("\n");
+    }
+    printk ("\n");
+    return 0;
+}
+
+
+
+
+
+static int print_leaf (struct buffer_head * bh, int print_mode, int first, int last)
+{
+    struct block_head * blkh;
+    struct item_head * ih;
+    int i;
+    int from, to;
+
+    if (!B_IS_ITEMS_LEVEL (bh))
+	return 1;
+
+    check_leaf (bh);
+
+    blkh = B_BLK_HEAD (bh);
+    ih = B_N_PITEM_HEAD (bh,0);
+
+    printk ("\n===================================================================\n");
+    reiserfs_warning ("LEAF NODE (%ld) contains %z\n", bh->b_blocknr, bh);
+
+    if (!(print_mode & PRINT_LEAF_ITEMS)) {
+	reiserfs_warning ("FIRST ITEM_KEY: %k, LAST ITEM KEY: %k\n",
+			  &(ih->ih_key), &((ih + le16_to_cpu (blkh->blk_nr_item) - 1)->ih_key));
+	return 0;
+    }
+
+    if (first < 0 || first > le16_to_cpu (blkh->blk_nr_item) - 1) 
+	from = 0;
+    else 
+	from = first;
+
+    if (last < 0 || last > le16_to_cpu (blkh->blk_nr_item))
+	to = le16_to_cpu (blkh->blk_nr_item);
+    else
+	to = last;
+
+    ih += from;
+    printk ("-------------------------------------------------------------------------------\n");
+    printk ("|##|   type    |           key           | ilen | free_space | version | loc  |\n");
+    for (i = from; i < to; i++, ih ++) {
+	printk ("-------------------------------------------------------------------------------\n");
+	reiserfs_warning ("|%2d| %h |\n", i, ih);
+	if (print_mode & PRINT_LEAF_ITEMS)
+	    op_print_item (ih, B_I_PITEM (bh, ih));
+    }
+
+    printk ("===================================================================\n");
+
+    return 0;
+}
+
+static char * reiserfs_version (char * buf)
+{
+    __u16 * pversion;
+
+    pversion = (__u16 *)(buf) + 36;
+    if (*pversion == 0)
+	return "0";
+    if (*pversion == 2)
+	return "2";
+    return "Unknown";
+}
+
+
+/* return 1 if this is not super block */
+static int print_super_block (struct buffer_head * bh)
+{
+    struct reiserfs_super_block * rs = (struct reiserfs_super_block *)(bh->b_data);
+    int skipped, data_blocks;
+    
+
+    if (strncmp (rs->s_magic,  REISERFS_SUPER_MAGIC_STRING, strlen ( REISERFS_SUPER_MAGIC_STRING)) &&
+	strncmp (rs->s_magic,  REISER2FS_SUPER_MAGIC_STRING, strlen ( REISER2FS_SUPER_MAGIC_STRING)))
+	return 1;
+
+    printk ("%s\'s super block in block %ld\n======================\n", kdevname (bh->b_dev), bh->b_blocknr);
+    printk ("Reiserfs version %s\n", reiserfs_version (bh->b_data));
+    printk ("Block count %u\n", le32_to_cpu (rs->s_block_count));
+    printk ("Blocksize %d\n", le16_to_cpu (rs->s_blocksize));
+    printk ("Free blocks %u\n", le32_to_cpu (rs->s_free_blocks));
+    skipped = bh->b_blocknr; // FIXME: this would be confusing if
+    // someone stores reiserfs super block in some data block ;)
+    data_blocks = le32_to_cpu (rs->s_block_count) - skipped - 1 -
+      le16_to_cpu (rs->s_bmap_nr) - (le32_to_cpu (rs->s_orig_journal_size) + 1) -
+      le32_to_cpu (rs->s_free_blocks);
+    printk ("Busy blocks (skipped %d, bitmaps - %d, journal blocks - %d\n"
+	    "1 super blocks, %d data blocks\n", 
+	    skipped, le16_to_cpu (rs->s_bmap_nr), 
+	    (le32_to_cpu (rs->s_orig_journal_size) + 1), data_blocks);
+    printk ("Root block %u\n", le32_to_cpu (rs->s_root_block));
+    printk ("Journal block (first) %d\n", le32_to_cpu (rs->s_journal_block));
+    printk ("Journal dev %d\n", le32_to_cpu (rs->s_journal_dev));    
+    printk ("Journal orig size %d\n", le32_to_cpu (rs->s_orig_journal_size));
+    printk ("Filesystem state %s\n", 
+	    (le16_to_cpu (rs->s_state) == REISERFS_VALID_FS) ? "VALID" : "ERROR");
+    printk ("Hash function \"%s\"\n", le16_to_cpu (rs->s_hash_function_code) == TEA_HASH ? "tea" :
+	    ((le16_to_cpu (rs->s_hash_function_code) == YURA_HASH) ? "rupasov" : "unknown"));
+
+#if 0
+    __u32 s_journal_trans_max ;           /* max number of blocks in a transaction.  */
+    __u32 s_journal_block_count ;         /* total size of the journal. can change over time  */
+    __u32 s_journal_max_batch ;           /* max number of blocks to batch into a trans */
+    __u32 s_journal_max_commit_age ;      /* in seconds, how old can an async commit be */
+    __u32 s_journal_max_trans_age ;       /* in seconds, how old can a transaction be */
+#endif
+    printk ("Tree height %d\n", rs->s_tree_height);
+    return 0;
+}
+
+
+static int print_desc_block (struct buffer_head * bh)
+{
+    struct reiserfs_journal_desc * desc;
+
+    desc = (struct reiserfs_journal_desc *)(bh->b_data);
+    if (memcmp(desc->j_magic, JOURNAL_DESC_MAGIC, 8))
+	return 1;
+
+    printk ("Desc block %lu (j_trans_id %d, j_mount_id %d, j_len %d)",
+	    bh->b_blocknr, desc->j_trans_id, desc->j_mount_id, desc->j_len);
+
+    return 0;
+}
+
+
+void print_block (struct buffer_head * bh, ...)//int print_mode, int first, int last)
+{
+    va_list args;
+    int mode, first, last;
+
+    va_start (args, bh);
+
+    if ( ! bh ) {
+	printk("print_block: buffer is NULL\n");
+	return;
+    }
+
+    mode = va_arg (args, int);
+    first = va_arg (args, int);
+    last = va_arg (args, int);
+    if (print_leaf (bh, mode, first, last))
+	if (print_internal (bh, first, last))
+	    if (print_super_block (bh))
+		if (print_desc_block (bh))
+		    printk ("Block %ld contains unformatted data\n", bh->b_blocknr);
+}
+
+
+
+char print_tb_buf[2048];
+
+/* this stores initial state of tree balance in the print_tb_buf */
+void store_print_tb (struct tree_balance * tb)
+{
+    int h = 0;
+    int i;
+    struct buffer_head * tbSh, * tbFh;
+
+    if (!tb)
+	return;
+
+    sprintf (print_tb_buf, "\n"
+	     "BALANCING %d\n"
+	     "MODE=%c, ITEM_POS=%d POS_IN_ITEM=%d\n" 
+	     "=====================================================================\n"
+	     "* h *    S    *    L    *    R    *   F   *   FL  *   FR  *  CFL  *  CFR  *\n",
+	     tb->tb_sb->u.reiserfs_sb.s_do_balance,
+	     tb->tb_mode, PATH_LAST_POSITION (tb->tb_path), tb->tb_path->pos_in_item);
+  
+    for (h = 0; h < sizeof(tb->insert_size) / sizeof (tb->insert_size[0]); h ++) {
+	if (PATH_H_PATH_OFFSET (tb->tb_path, h) <= tb->tb_path->path_length && 
+	    PATH_H_PATH_OFFSET (tb->tb_path, h) > ILLEGAL_PATH_ELEMENT_OFFSET) {
+	    tbSh = PATH_H_PBUFFER (tb->tb_path, h);
+	    tbFh = PATH_H_PPARENT (tb->tb_path, h);
+	} else {
+	    tbSh = 0;
+	    tbFh = 0;
+	}
+	sprintf (print_tb_buf + strlen (print_tb_buf),
+		 "* %d * %3ld(%2d) * %3ld(%2d) * %3ld(%2d) * %5ld * %5ld * %5ld * %5ld * %5ld *\n",
+		 h, 
+		 (tbSh) ? (tbSh->b_blocknr):(-1),
+		 (tbSh) ? atomic_read (&(tbSh->b_count)) : -1,
+		 (tb->L[h]) ? (tb->L[h]->b_blocknr):(-1),
+		 (tb->L[h]) ? atomic_read (&(tb->L[h]->b_count)) : -1,
+		 (tb->R[h]) ? (tb->R[h]->b_blocknr):(-1),
+		 (tb->R[h]) ? atomic_read (&(tb->R[h]->b_count)) : -1,
+		 (tbFh) ? (tbFh->b_blocknr):(-1),
+		 (tb->FL[h]) ? (tb->FL[h]->b_blocknr):(-1),
+		 (tb->FR[h]) ? (tb->FR[h]->b_blocknr):(-1),
+		 (tb->CFL[h]) ? (tb->CFL[h]->b_blocknr):(-1),
+		 (tb->CFR[h]) ? (tb->CFR[h]->b_blocknr):(-1));
+    }
+
+    sprintf (print_tb_buf + strlen (print_tb_buf), 
+	     "=====================================================================\n"
+	     "* h * size * ln * lb * rn * rb * blkn * s0 * s1 * s1b * s2 * s2b * curb * lk * rk *\n"
+	     "* 0 * %4d * %2d * %2d * %2d * %2d * %4d * %2d * %2d * %3d * %2d * %3d * %4d * %2d * %2d *\n",
+	     tb->insert_size[0], tb->lnum[0], tb->lbytes, tb->rnum[0],tb->rbytes, tb->blknum[0], 
+	     tb->s0num, tb->s1num,tb->s1bytes,  tb->s2num, tb->s2bytes, tb->cur_blknum, tb->lkey[0], tb->rkey[0]);
+
+    /* this prints balance parameters for non-leaf levels */
+    h = 0;
+    do {
+	h++;
+	sprintf (print_tb_buf + strlen (print_tb_buf),
+		 "* %d * %4d * %2d *    * %2d *    * %2d *\n",
+		h, tb->insert_size[h], tb->lnum[h], tb->rnum[h], tb->blknum[h]);
+    } while (tb->insert_size[h]);
+
+    sprintf (print_tb_buf + strlen (print_tb_buf), 
+	     "=====================================================================\n"
+	     "FEB list: ");
+
+    /* print FEB list (list of buffers in form (bh (b_blocknr, b_count), that will be used for new nodes) */
+    h = 0;
+    for (i = 0; i < sizeof (tb->FEB) / sizeof (tb->FEB[0]); i ++)
+	sprintf (print_tb_buf + strlen (print_tb_buf),
+		 "%p (%lu %d)%s", tb->FEB[i], tb->FEB[i] ? tb->FEB[i]->b_blocknr : 0,
+		 tb->FEB[i] ? atomic_read (&(tb->FEB[i]->b_count)) : 0, 
+		 (i == sizeof (tb->FEB) / sizeof (tb->FEB[0]) - 1) ? "\n" : ", ");
+
+    sprintf (print_tb_buf + strlen (print_tb_buf), 
+	     "======================== the end ====================================\n");
+}
+
+void print_cur_tb (char * mes)
+{
+    printk ("%s\n%s", mes, print_tb_buf);
+}
+
+
+#ifndef __KERNEL__
+
+void print_bmap_block (int i, char * data, int size, int silent)
+{
+    int j, k;
+    int bits = size * 8;
+    int zeros = 0, ones = 0;
+  
+
+    if (test_bit (0, data)) {
+	/* first block addressed by this bitmap block is used */
+	ones ++;
+	if (!silent)
+	    printf ("Busy (%d-", i * bits);
+	for (j = 1; j < bits; j ++) {
+	    while (test_bit (j, data)) {
+		ones ++;
+		if (j == bits - 1) {
+		    if (!silent)
+			printf ("%d)\n", j + i * bits);
+		    goto end;
+		}
+		j++;
+	    }
+	    if (!silent)
+		printf ("%d) Free(%d-", j - 1 + i * bits, j + i * bits);
+
+	    while (!test_bit (j, data)) {
+		zeros ++;
+		if (j == bits - 1) {
+		    if (!silent)
+			printf ("%d)\n", j + i * bits);
+		    goto end;
+		}
+		j++;
+	    }
+	    if (!silent)
+		printf ("%d) Busy(%d-", j - 1 + i * bits, j + i * bits);
+
+	    j --;
+	end:
+	}
+    } else {
+	/* first block addressed by this bitmap is free */
+	zeros ++;
+	if (!silent)
+	    printf ("Free (%d-", i * bits);
+	for (j = 1; j < bits; j ++) {
+	    k = 0;
+	    while (!test_bit (j, data)) {
+		k ++;
+		if (j == bits - 1) {
+		    if (!silent)
+			printf ("%d)\n", j + i * bits);
+		    zeros += k;
+		    goto end2;
+		}
+		j++;
+	    }
+	    zeros += k;
+	    if (!silent)
+		printf ("%d) Busy(%d-", j - 1 + i * bits, j + i * bits);
+	    
+	    k = 0;
+	    while (test_bit (j, data)) {
+		ones ++;
+		if (j == bits - 1) {
+		    if (!silent)
+			printf ("%d)\n", j + i * bits);
+		    ones += k;
+		    goto end2;
+		}
+		j++;
+	    }
+	    ones += k;
+	    if (!silent)
+		printf ("%d) Busy(%d-", j - 1 + i * bits, j + i * bits);
+	
+	    j --;
+	end2:
+	}
+    }
+
+    printf ("used %d, free %d\n", ones, zeros);
+}
+
+
+/* if silent == 1, do not print details */
+void print_bmap (struct super_block * s, int silent)
+{
+    int bmapnr = SB_BMAP_NR (s);
+    int i;
+
+    printf ("Bitmap blocks are:\n");
+    for (i = 0; i < bmapnr; i ++) {
+	printf ("#%d: block %lu: ", i, SB_AP_BITMAP(s)[i]->b_blocknr);
+	print_bmap_block (i, SB_AP_BITMAP(s)[i]->b_data, s->s_blocksize, silent);
+    }
+
+}
+
+
+
+
+void print_objectid_map (struct super_block * s)
+{
+  int i;
+  struct reiserfs_super_block * rs;
+  unsigned long * omap;
+
+  rs = SB_DISK_SUPER_BLOCK (s);
+  omap = (unsigned long *)(rs + 1);
+  printk ("Map of objectids\n");
+      
+  for (i = 0; i < rs->s_oid_cursize; i ++) {
+    if (i % 2 == 0)
+      printk ("busy(%lu-%lu) ", omap[i], omap[i+1] - 1); 
+    else
+      printk ("free(%lu-%lu) ", 
+	      omap[i], ((i+1) == rs->s_oid_cursize) ? -1 : omap[i+1] - 1);
+    }
+  printk ("\n");
+  
+  printk ("Object id array has size %d (max %d):", rs->s_oid_cursize, 
+	  rs->s_oid_maxsize);
+  
+  for (i = 0; i < rs->s_oid_cursize; i ++)
+    printk ("%lu ", omap[i]); 
+  printk ("\n");
+
+}
+
+#endif	/* #ifndef __KERNEL__ */
+
+
+static void check_leaf_block_head (struct buffer_head * bh)
+{
+  struct block_head * blkh;
+
+  blkh = B_BLK_HEAD (bh);
+  if (le16_to_cpu (blkh->blk_nr_item) > (bh->b_size - BLKH_SIZE) / IH_SIZE)
+    reiserfs_panic (0, "vs-6010: check_leaf_block_head: invalid item number %z", bh);
+  if (le16_to_cpu (blkh->blk_free_space) > 
+      bh->b_size - BLKH_SIZE - IH_SIZE * le16_to_cpu (blkh->blk_nr_item))
+    reiserfs_panic (0, "vs-6020: check_leaf_block_head: invalid free space %z", bh);
+    
+}
+
+static void check_internal_block_head (struct buffer_head * bh)
+{
+    struct block_head * blkh;
+    
+    blkh = B_BLK_HEAD (bh);
+    if (!(B_LEVEL (bh) > DISK_LEAF_NODE_LEVEL && B_LEVEL (bh) <= MAX_HEIGHT))
+	reiserfs_panic (0, "vs-6025: check_internal_block_head: invalid level %z", bh);
+
+    if (B_NR_ITEMS (bh) > (bh->b_size - BLKH_SIZE) / IH_SIZE)
+	reiserfs_panic (0, "vs-6030: check_internal_block_head: invalid item number %z", bh);
+
+    if (B_FREE_SPACE (bh) != 
+	bh->b_size - BLKH_SIZE - KEY_SIZE * B_NR_ITEMS (bh) - DC_SIZE * (B_NR_ITEMS (bh) + 1))
+	reiserfs_panic (0, "vs-6040: check_internal_block_head: invalid free space %z", bh);
+
+}
+
+
+void check_leaf (struct buffer_head * bh)
+{
+    int i;
+    struct item_head * ih;
+
+    if (!bh)
+	return;
+    check_leaf_block_head (bh);
+    for (i = 0, ih = B_N_PITEM_HEAD (bh, 0); i < B_NR_ITEMS (bh); i ++, ih ++)
+	op_check_item (ih, B_I_PITEM (bh, ih));
+}
+
+
+void check_internal (struct buffer_head * bh)
+{
+  if (!bh)
+    return;
+  check_internal_block_head (bh);
+}
+
+
+void print_statistics (struct super_block * s)
+{
+
+  /*
+  printk ("reiserfs_put_super: session statistics: balances %d, fix_nodes %d, preserve list freeings %d, \
+bmap with search %d, without %d, dir2ind %d, ind2dir %d\n",
+	  s->u.reiserfs_sb.s_do_balance, s->u.reiserfs_sb.s_fix_nodes, s->u.reiserfs_sb.s_preserve_list_freeings,
+	  s->u.reiserfs_sb.s_bmaps, s->u.reiserfs_sb.s_bmaps_without_search,
+	  s->u.reiserfs_sb.s_direct2indirect, s->u.reiserfs_sb.s_indirect2direct);
+  */
+
+}
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/resize.c linux/fs/reiserfs/resize.c
--- v2.4.0/linux/fs/reiserfs/resize.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/resize.c	Mon Jan 15 12:42:32 2001
@@ -0,0 +1,168 @@
+/* 
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+ 
+/* 
+ * Written by Alexander Zarochentcev.
+ *
+ * The kernel part of the (on-line) reiserfs resizer.
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/locks.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/reiserfs_fs_sb.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+int reiserfs_resize (struct super_block * s, unsigned long block_count_new)
+{
+	struct reiserfs_super_block * sb;
+	struct buffer_head ** bitmap, * bh;
+	struct reiserfs_transaction_handle th;
+	unsigned int bmap_nr_new, bmap_nr;
+	unsigned int block_r_new, block_r;
+	
+	struct reiserfs_list_bitmap * jb;
+	struct reiserfs_list_bitmap jbitmap[JOURNAL_NUM_BITMAPS];
+	
+	unsigned long int block_count, free_blocks;
+	int i;
+	int copy_size ;
+
+	sb = SB_DISK_SUPER_BLOCK(s);
+
+	if (SB_BLOCK_COUNT(s) >= block_count_new) {
+		printk("can\'t shrink filesystem on-line\n");
+		return -EINVAL;
+	}
+
+	/* check the device size */
+	bh = bread(s->s_dev, block_count_new - 1, s->s_blocksize);
+	if (!bh) {
+		printk("reiserfs_resize: can\'t read last block\n");
+		return -EINVAL;
+	}	
+	bforget(bh);
+
+	/* old disk layout detection; those partitions can be mounted, but
+	 * cannot be resized */
+	if (SB_BUFFER_WITH_SB(s)->b_blocknr *	SB_BUFFER_WITH_SB(s)->b_size 
+		!= REISERFS_DISK_OFFSET_IN_BYTES ) {
+		printk("reiserfs_resize: unable to resize a reiserfs without distributed bitmap (fs version < 3.5.12)\n");
+		return -ENOTSUPP;
+	}
+       
+	/* count used bits in last bitmap block */
+	block_r = SB_BLOCK_COUNT(s) -
+	        (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8;
+	
+	/* count bitmap blocks in new fs */
+	bmap_nr_new = block_count_new / ( s->s_blocksize * 8 );
+	block_r_new = block_count_new - bmap_nr_new * s->s_blocksize * 8;
+	if (block_r_new) 
+		bmap_nr_new++;
+	else
+		block_r_new = s->s_blocksize * 8;
+
+	/* save old values */
+	block_count = SB_BLOCK_COUNT(s);
+	bmap_nr     = SB_BMAP_NR(s);
+
+	/* resizing of reiserfs bitmaps (journal and real), if needed */
+	if (bmap_nr_new > bmap_nr) {	    
+	    /* reallocate journal bitmaps */
+	    if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) {
+		printk("reiserfs_resize: unable to allocate memory for journal bitmaps\n");
+		unlock_super(s) ;
+		return -ENOMEM ;
+	    }
+	    /* the new journal bitmaps are zero filled, now we copy in the bitmap
+	    ** node pointers from the old journal bitmap structs, and then
+	    ** transfer the new data structures into the journal struct.
+	    **
+	    ** using the copy_size var below allows this code to work for
+	    ** both shrinking and expanding the FS.
+	    */
+	    copy_size = bmap_nr_new < bmap_nr ? bmap_nr_new : bmap_nr ;
+	    copy_size = copy_size * sizeof(struct reiserfs_list_bitmap_node *) ;
+	    for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
+		struct reiserfs_bitmap_node **node_tmp ;
+		jb = SB_JOURNAL(s)->j_list_bitmap + i ;
+		memcpy(jbitmap[i].bitmaps, jb->bitmaps, copy_size) ;
+
+		/* just in case vfree schedules on us, copy the new
+		** pointer into the journal struct before freeing the 
+		** old one
+		*/
+		node_tmp = jb->bitmaps ;
+		jb->bitmaps = jbitmap[i].bitmaps ;
+		vfree(node_tmp) ;
+	    }	
+	
+	    /* allocate additional bitmap blocks, reallocate array of bitmap
+	     * block pointers */
+	    bitmap = reiserfs_kmalloc(sizeof(struct buffer_head *) * bmap_nr_new, GFP_KERNEL, s);
+	    if (!bitmap) {
+		printk("reiserfs_resize: unable to allocate memory.\n");
+		return -ENOMEM;
+	    }
+	    for (i = 0; i < bmap_nr; i++)
+		bitmap[i] = SB_AP_BITMAP(s)[i];
+	    for (i = bmap_nr; i < bmap_nr_new; i++) {
+		bitmap[i] = reiserfs_getblk(s->s_dev, i * s->s_blocksize * 8, s->s_blocksize);
+		memset(bitmap[i]->b_data, 0, sb->s_blocksize);
+		reiserfs_test_and_set_le_bit(0, bitmap[i]->b_data);
+
+		mark_buffer_dirty(bitmap[i]) ;
+		mark_buffer_uptodate(bitmap[i], 1);
+		ll_rw_block(WRITE, 1, bitmap + i);
+		wait_on_buffer(bitmap[i]);
+	    }	
+	    /* free old bitmap blocks array */
+	    reiserfs_kfree(SB_AP_BITMAP(s), 
+			   sizeof(struct buffer_head *) * bmap_nr, s);
+	    SB_AP_BITMAP(s) = bitmap;
+	}
+	
+	/* begin transaction */
+	journal_begin(&th, s, 10);
+
+	/* correct last bitmap blocks in old and new disk layout */
+	reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr - 1], 1);
+	for (i = block_r; i < s->s_blocksize * 8; i++)
+	    reiserfs_test_and_clear_le_bit(i, 
+					   SB_AP_BITMAP(s)[bmap_nr - 1]->b_data);
+	journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr - 1]);
+
+	reiserfs_prepare_for_journal(s, SB_AP_BITMAP(s)[bmap_nr_new - 1], 1);
+	for (i = block_r_new; i < s->s_blocksize * 8; i++)
+	    reiserfs_test_and_set_le_bit(i,
+					 SB_AP_BITMAP(s)[bmap_nr_new - 1]->b_data);
+	journal_mark_dirty(&th, s, SB_AP_BITMAP(s)[bmap_nr_new - 1]);
+ 
+ 	/* update super */
+	reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ;
+	free_blocks = SB_FREE_BLOCKS(s);
+	PUT_SB_FREE_BLOCKS(s, free_blocks + (block_count_new - block_count - (bmap_nr_new - bmap_nr)));
+	PUT_SB_BLOCK_COUNT(s, block_count_new);
+	PUT_SB_BMAP_NR(s, bmap_nr_new);
+	s->s_dirt = 1;
+
+	journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s));
+	
+	SB_JOURNAL(s)->j_must_wait = 1;
+	journal_end(&th, s, 10);
+
+	return 0;
+}
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/stree.c linux/fs/reiserfs/stree.c
--- v2.4.0/linux/fs/reiserfs/stree.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/stree.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,2078 @@
+/*
+ *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+/*
+ *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
+ *  Programm System Institute
+ *  Pereslavl-Zalessky Russia
+ */
+
+/*
+ *  This file contains functions dealing with S+tree
+ *
+ * B_IS_IN_TREE
+ * copy_short_key
+ * copy_item_head
+ * comp_short_keys
+ * comp_keys
+ * comp_cpu_keys
+ * comp_short_le_keys
+ * comp_short_cpu_keys
+ * cpu_key2cpu_key
+ * le_key2cpu_key
+ * comp_le_keys
+ * bin_search
+ * get_lkey
+ * get_rkey
+ * key_in_buffer
+ * decrement_bcount
+ * decrement_counters_in_path
+ * reiserfs_check_path
+ * pathrelse_and_restore
+ * pathrelse
+ * search_by_key_reada
+ * search_by_key
+ * search_for_position_by_key
+ * comp_items
+ * prepare_for_direct_item
+ * prepare_for_direntry_item
+ * prepare_for_delete_or_cut
+ * calc_deleted_bytes_number
+ * init_tb_struct
+ * padd_item
+ * reiserfs_delete_item
+ * reiserfs_delete_solid_item
+ * reiserfs_delete_object
+ * maybe_indirect_to_direct
+ * indirect_to_direct_roll_back
+ * reiserfs_cut_from_item
+ * truncate_directory
+ * reiserfs_do_truncate
+ * reiserfs_paste_into_item
+ * reiserfs_insert_item
+ */
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/pagemap.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/smp_lock.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+
+
+/* Does the buffer contain a disk block which is in the tree. */
+inline int B_IS_IN_TREE (struct buffer_head * p_s_bh)
+{
+
+#ifdef CONFIG_REISERFS_CHECK
+
+  if ( B_LEVEL (p_s_bh) > MAX_HEIGHT ) {
+    reiserfs_panic(0, "PAP-1010: B_IS_IN_TREE: block (%b) has too big level (%z)",
+		   p_s_bh, p_s_bh);
+  }
+#endif
+
+  return ( B_LEVEL (p_s_bh) != FREE_LEVEL );
+}
+
+
+
+
+inline void copy_short_key (void * to, void * from)
+{
+    memcpy (to, from, SHORT_KEY_SIZE);
+}
+
+//
+// to gets item head in le form
+//
+inline void copy_item_head(void * p_v_to, void * p_v_from)
+{
+  memcpy (p_v_to, p_v_from, IH_SIZE);
+}
+
+
+/* k1 is pointer to on-disk structure which is stored in little-endian
+   form. k2 is pointer to cpu variable. For key of items of the same
+   object this returns 0.
+   Returns: -1 if key1 < key2 
+   0 if key1 == key2
+   1 if key1 > key2 */
+inline int  comp_short_keys (struct key * le_key, struct cpu_key * cpu_key)
+{
+  __u32 * p_s_le_u32, * p_s_cpu_u32;
+  int n_key_length = REISERFS_SHORT_KEY_LEN;
+
+  p_s_le_u32 = (__u32 *)le_key;
+  p_s_cpu_u32 = (__u32 *)cpu_key;
+  for( ; n_key_length--; ++p_s_le_u32, ++p_s_cpu_u32 ) {
+    if ( le32_to_cpu (*p_s_le_u32) < *p_s_cpu_u32 )
+      return -1;
+    if ( le32_to_cpu (*p_s_le_u32) > *p_s_cpu_u32 )
+      return 1;
+  }
+
+  return 0;
+}
+
+
+/* k1 is pointer to on-disk structure which is stored in little-endian
+   form. k2 is pointer to cpu variable.
+   Compare keys using all 4 key fields.
+   Returns: -1 if key1 < key2 0
+   if key1 = key2 1 if key1 > key2 */
+inline int  comp_keys (struct key * le_key, struct cpu_key * cpu_key)
+{
+  int retval;
+
+  retval = comp_short_keys (le_key, cpu_key);
+  if (retval)
+      return retval;
+  if (le_key_k_offset (cpu_key->version, le_key) < cpu_key_k_offset (cpu_key))
+      return -1;
+  if (le_key_k_offset (cpu_key->version, le_key) > cpu_key_k_offset (cpu_key))
+      return 1;
+
+  if (cpu_key->key_length == 3)
+      return 0;
+
+  /* this part is needed only when tail conversion is in progress */
+  if (le_key_k_type (cpu_key->version, le_key) < cpu_key_k_type (cpu_key))
+    return -1;
+
+  if (le_key_k_type (cpu_key->version, le_key) > cpu_key_k_type (cpu_key))
+    return 1;
+
+  return 0;
+}
+
+
+//
+// FIXME: not used yet
+//
+inline int comp_cpu_keys (struct cpu_key * key1, struct cpu_key * key2)
+{
+    if (key1->on_disk_key.k_dir_id < key2->on_disk_key.k_dir_id)
+	return -1;
+    if (key1->on_disk_key.k_dir_id > key2->on_disk_key.k_dir_id)
+	return 1;
+
+    if (key1->on_disk_key.k_objectid < key2->on_disk_key.k_objectid)
+	return -1;
+    if (key1->on_disk_key.k_objectid > key2->on_disk_key.k_objectid)
+	return 1;
+
+    if (cpu_key_k_offset (key1) < cpu_key_k_offset (key2))
+	return -1;
+    if (cpu_key_k_offset (key1) > cpu_key_k_offset (key2))
+	return 1;
+
+    reiserfs_warning ("comp_cpu_keys: type are compared for %k and %k\n",
+		      key1, key2);
+
+    if (cpu_key_k_type (key1) < cpu_key_k_type (key2))
+	return -1;
+    if (cpu_key_k_type (key1) > cpu_key_k_type (key2))
+	return 1;
+    return 0;
+}
+
+inline int comp_short_le_keys (struct key * key1, struct key * key2)
+{
+  __u32 * p_s_1_u32, * p_s_2_u32;
+  int n_key_length = REISERFS_SHORT_KEY_LEN;
+
+  p_s_1_u32 = (__u32 *)key1;
+  p_s_2_u32 = (__u32 *)key2;
+  for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) {
+    if ( le32_to_cpu (*p_s_1_u32) < le32_to_cpu (*p_s_2_u32) )
+      return -1;
+    if ( le32_to_cpu (*p_s_1_u32) > le32_to_cpu (*p_s_2_u32) )
+      return 1;
+  }
+  return 0;
+}
+
+inline int comp_short_cpu_keys (struct cpu_key * key1, 
+				struct cpu_key * key2)
+{
+  __u32 * p_s_1_u32, * p_s_2_u32;
+  int n_key_length = REISERFS_SHORT_KEY_LEN;
+
+  p_s_1_u32 = (__u32 *)key1;
+  p_s_2_u32 = (__u32 *)key2;
+
+  for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) {
+    if ( *p_s_1_u32 < *p_s_2_u32 )
+      return -1;
+    if ( *p_s_1_u32 > *p_s_2_u32 )
+      return 1;
+  }
+  return 0;
+}
+
+
+
+inline void cpu_key2cpu_key (struct cpu_key * to, struct cpu_key * from)
+{
+    memcpy (to, from, sizeof (struct cpu_key));
+}
+
+
+inline void le_key2cpu_key (struct cpu_key * to, struct key * from)
+{
+    to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id);
+    to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid);
+    
+    // find out version of the key
+    to->version = le_key_version (from);
+    if (to->version == ITEM_VERSION_1) {
+	to->on_disk_key.u.k_offset_v1.k_offset = le32_to_cpu (from->u.k_offset_v1.k_offset);
+	to->on_disk_key.u.k_offset_v1.k_uniqueness = le32_to_cpu (from->u.k_offset_v1.k_uniqueness);
+    } else {
+	to->on_disk_key.u.k_offset_v2.k_offset = le64_to_cpu (from->u.k_offset_v2.k_offset);
+	to->on_disk_key.u.k_offset_v2.k_type = le16_to_cpu (from->u.k_offset_v2.k_type);
+    } 
+}
+
+
+
+// this does not say which one is bigger, it only returns 1 if keys
+// are not equal, 0 otherwise
+inline int comp_le_keys (struct key * k1, struct key * k2)
+{
+    return memcmp (k1, k2, sizeof (struct key));
+}
+
+/**************************************************************************
+ *  Binary search toolkit function                                        *
+ *  Search for an item in the array by the item key                       *
+ *  Returns:    1 if found,  0 if not found;                              *
+ *        *p_n_pos = number of the searched element if found, else the    *
+ *        number of the first element that is larger than p_v_key.        *
+ **************************************************************************/
+/* For those not familiar with binary search: n_lbound is the leftmost item that it
+ could be, n_rbound the rightmost item that it could be.  We examine the item
+ halfway between n_lbound and n_rbound, and that tells us either that we can increase
+ n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that
+ there are no possible items, and we have not found it. With each examination we
+ cut the number of possible items it could be by one more than half rounded down,
+ or we find it. */
+inline	int bin_search (
+              void    * p_v_key,    /* Key to search for.                   */
+	      void    * p_v_base,   /* First item in the array.             */
+	      int       p_n_num,    /* Number of items in the array.        */
+	      int       p_n_width,  /* Item size in the array.
+				       searched. Lest the reader be
+				       confused, note that this is crafted
+				       as a general function, and when it
+				       is applied specifically to the array
+				       of item headers in a node, p_n_width
+				       is actually the item header size not
+				       the item size.                      */
+	      int     * p_n_pos     /* Number of the searched for element. */
+            ) {
+  int   n_rbound, n_lbound, n_j;
+
+  for ( n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0))/2; n_lbound <= n_rbound; n_j = (n_rbound + n_lbound)/2 )
+    switch( COMP_KEYS((struct key *)((char * )p_v_base + n_j * p_n_width), (struct cpu_key *)p_v_key) )  {
+    case -1: n_lbound = n_j + 1; continue;
+    case  1: n_rbound = n_j - 1; continue;
+    case  0: *p_n_pos = n_j;     return ITEM_FOUND; /* Key found in the array.  */
+    }
+
+  /* bin_search did not find given key, it returns position of key,
+     that is minimal and greater than the given one. */
+  *p_n_pos = n_lbound;
+  return ITEM_NOT_FOUND;
+}
+
+#ifdef CONFIG_REISERFS_CHECK
+extern struct tree_balance * cur_tb;
+#endif
+
+
+
+/* Minimal possible key. It is never in the tree. */
+struct key  MIN_KEY = {0, 0, {{0, 0},}};
+
+/* Maximal possible key. It is never in the tree. */
+struct key  MAX_KEY = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff},}};
+
+
+/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
+   of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
+   the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
+   case we return a special key, either MIN_KEY or MAX_KEY. */
+inline	struct  key * get_lkey  (
+	                struct path         * p_s_chk_path,
+                        struct super_block  * p_s_sb
+                      ) {
+  int                   n_position, n_path_offset = p_s_chk_path->path_length;
+  struct buffer_head  * p_s_parent;
+  
+#ifdef CONFIG_REISERFS_CHECK
+  if ( n_path_offset < FIRST_PATH_ELEMENT_OFFSET )
+    reiserfs_panic(p_s_sb,"PAP-5010: get_lkey: illegal offset in the path");
+#endif
+
+  /* While not higher in path than first element. */
+  while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) {
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) )
+      reiserfs_panic(p_s_sb, "PAP-5020: get_lkey: parent is not uptodate");
+#endif
+
+    /* Parent at the path is not in the tree now. */
+    if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) )
+      return &MAX_KEY;
+    /* Check whether position in the parent is correct. */
+    if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) )
+       return &MAX_KEY;
+    /* Check whether parent at the path really points to the child. */
+    if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
+	 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr )
+      return &MAX_KEY;
+    /* Return delimiting key if position in the parent is not equal to zero. */
+    if ( n_position )
+      return B_N_PDELIM_KEY(p_s_parent, n_position - 1);
+  }
+  /* Return MIN_KEY if we are in the root of the buffer tree. */
+  if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
+       SB_ROOT_BLOCK (p_s_sb) )
+    return &MIN_KEY;
+  return  &MAX_KEY;
+}
+
+
+/* Get delimiting key of the buffer at the path and its right neighbor. */
+inline	struct  key * get_rkey  (
+	                struct path         * p_s_chk_path,
+                        struct super_block  * p_s_sb
+                      ) {
+  int                   n_position,
+    			n_path_offset = p_s_chk_path->path_length;
+  struct buffer_head  * p_s_parent;
+
+#ifdef CONFIG_REISERFS_CHECK
+  if ( n_path_offset < FIRST_PATH_ELEMENT_OFFSET )
+    reiserfs_panic(p_s_sb,"PAP-5030: get_rkey: illegal offset in the path");
+#endif
+
+  while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) {
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) )
+      reiserfs_panic(p_s_sb, "PAP-5040: get_rkey: parent is not uptodate");
+#endif
+
+    /* Parent at the path is not in the tree now. */
+    if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) )
+      return &MIN_KEY;
+    /* Check whether position in the parrent is correct. */
+    if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) )
+      return &MIN_KEY;
+    /* Check whether parent at the path really points to the child. */
+    if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
+                                        PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr )
+      return &MIN_KEY;
+    /* Return delimiting key if position in the parent is not the last one. */
+    if ( n_position != B_NR_ITEMS(p_s_parent) )
+      return B_N_PDELIM_KEY(p_s_parent, n_position);
+  }
+  /* Return MAX_KEY if we are in the root of the buffer tree. */
+  if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
+       SB_ROOT_BLOCK (p_s_sb) )
+    return &MAX_KEY;
+  return  &MIN_KEY;
+}
+
+
+/* Check whether a key is contained in the tree rooted from a buffer at a path. */
+/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
+   the path.  These delimiting keys are stored at least one level above that buffer in the tree. If the
+   buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
+   this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
+static  inline  int key_in_buffer (
+                      struct path         * p_s_chk_path, /* Path which should be checked.  */
+                      struct cpu_key      * p_s_key,      /* Key which should be checked.   */
+                      struct super_block  * p_s_sb        /* Super block pointer.           */
+		      ) {
+
+#ifdef CONFIG_REISERFS_CHECK
+  if ( ! p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET ||
+       p_s_chk_path->path_length > MAX_HEIGHT )
+    reiserfs_panic(p_s_sb, "PAP-5050: key_in_buffer:  pointer to the key(%p) is NULL or illegal path length(%d)",
+		   p_s_key, p_s_chk_path->path_length);
+  
+  if ( PATH_PLAST_BUFFER(p_s_chk_path)->b_dev == NODEV )
+    reiserfs_panic(p_s_sb, "PAP-5060: key_in_buffer: device must not be NODEV");
+#endif
+
+  if ( COMP_KEYS(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1 )
+    /* left delimiting key is bigger, that the key we look for */
+    return 0;
+  //  if ( COMP_KEYS(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
+  if ( COMP_KEYS(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1 )
+    /* p_s_key must be less than right delimitiing key */
+    return 0;
+  return 1;
+}
+
+
+inline void decrement_bcount(
+              struct buffer_head  * p_s_bh
+            ) { 
+  if ( p_s_bh ) {
+    if ( atomic_read (&(p_s_bh->b_count)) ) {
+      atomic_dec (&(p_s_bh->b_count));
+      return;
+    }
+    reiserfs_panic(NULL, "PAP-5070: decrement_bcount: trying to free free buffer %b", p_s_bh);
+  }
+}
+
+
+/* Decrement b_count field of the all buffers in the path. */
+void decrement_counters_in_path (
+              struct path * p_s_search_path
+            ) {
+  int n_path_offset = p_s_search_path->path_length;
+
+#ifdef CONFIG_REISERFS_CHECK
+  if ( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ||
+       n_path_offset > EXTENDED_MAX_HEIGHT - 1 )
+    reiserfs_panic(NULL, "PAP-5080: decrement_counters_in_path: illegal path offset of %d", n_path_offset);
+#endif
+
+  while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) {
+    struct buffer_head * bh;
+
+    bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
+    decrement_bcount (bh);
+  }
+  p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
+}
+
+
+int reiserfs_check_path(struct path *p) {
+#ifdef CONFIG_REISERFS_CHECK 
+  if (p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET) {
+    reiserfs_warning("check_path, path not properly relsed\n") ;
+    BUG() ;
+  }
+#endif
+  return 0 ;
+}
+
+
+/* Release all buffers in the path. Restore dirty bits clean
+** when preparing the buffer for the log
+**
+** only called from fix_nodes()
+*/
+void  pathrelse_and_restore (
+	struct super_block *s, 
+        struct path * p_s_search_path
+      ) {
+  int n_path_offset = p_s_search_path->path_length;
+
+#ifdef CONFIG_REISERFS_CHECK
+  if ( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET )
+    reiserfs_panic(NULL, "clm-4000: pathrelse: illegal path offset");
+#endif
+  
+  while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET )  {
+    reiserfs_restore_prepared_buffer(s, PATH_OFFSET_PBUFFER(p_s_search_path, 
+                                     n_path_offset));
+    brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
+  }
+  p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
+}
+
+/* Release all buffers in the path. */
+void  pathrelse (
+        struct path * p_s_search_path
+      ) {
+  int n_path_offset = p_s_search_path->path_length;
+
+#ifdef CONFIG_REISERFS_CHECK
+  if ( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET )
+    reiserfs_panic(NULL, "PAP-5090: pathrelse: illegal path offset");
+#endif
+  
+  while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET )  
+    brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
+
+  p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
+}
+
+
+
+static int is_leaf (char * buf, int blocksize, struct buffer_head * bh)
+{
+    struct block_head * blkh;
+    struct item_head * ih;
+    int used_space;
+    int prev_location;
+    int i;
+    int nr;
+
+    blkh = (struct block_head *)buf;
+    if (le16_to_cpu (blkh->blk_level) != DISK_LEAF_NODE_LEVEL) {
+	printk ("is_leaf: this should be caught earlier\n");
+	return 0;
+    }
+
+    nr = le16_to_cpu (blkh->blk_nr_item);
+    if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
+	/* item number is too big or too small */
+	reiserfs_warning ("is_leaf: nr_item seems wrong: %z\n", bh);
+	return 0;
+    }
+    ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
+    used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location (ih));
+    if (used_space != blocksize - le16_to_cpu (blkh->blk_free_space)) {
+	/* free space does not match to calculated amount of use space */
+	reiserfs_warning ("is_leaf: free space seems wrong: %z\n", bh);
+	return 0;
+    }
+
+    // FIXME: it is_leaf will hit performance too much - we may have
+    // return 1 here
+
+    /* check tables of item heads */
+    ih = (struct item_head *)(buf + BLKH_SIZE);
+    prev_location = blocksize;
+    for (i = 0; i < nr; i ++, ih ++) {
+	if (ih_location (ih) >= blocksize || ih_location (ih) < IH_SIZE * nr) {
+	    reiserfs_warning ("is_leaf: item location seems wrong: %h\n", ih);
+	    return 0;
+	}
+	if (ih_item_len (ih) < 1 || ih_item_len (ih) > MAX_ITEM_LEN (blocksize)) {
+	    reiserfs_warning ("is_leaf: item length seems wrong: %h\n", ih);
+	    return 0;
+	}
+	if (prev_location - ih_location (ih) != ih_item_len (ih)) {
+	    reiserfs_warning ("is_leaf: item location seems wrong (second one): %h\n", ih);
+	    return 0;
+	}
+	prev_location = ih_location (ih);
+    }
+
+    // one may imagine much more checks
+    return 1;
+}
+
+
+/* returns 1 if buf looks like an internal node, 0 otherwise */
+static int is_internal (char * buf, int blocksize, struct buffer_head * bh)
+{
+    struct block_head * blkh;
+    int nr;
+    int used_space;
+
+    blkh = (struct block_head *)buf;
+    if (le16_to_cpu (blkh->blk_level) <= DISK_LEAF_NODE_LEVEL ||
+	le16_to_cpu (blkh->blk_level) > MAX_HEIGHT) {
+	/* this level is not possible for internal nodes */
+	printk ("is_internal: this should be caught earlier\n");
+	return 0;
+    }
+    
+    nr = le16_to_cpu (blkh->blk_nr_item);
+    if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
+	/* for internal which is not root we might check min number of keys */
+	reiserfs_warning ("is_internal: number of key seems wrong: %z\n", bh);
+	return 0;
+    }
+
+    used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
+    if (used_space != blocksize - le16_to_cpu (blkh->blk_free_space)) {
+	reiserfs_warning ("is_internal: free space seems wrong: %z\n", bh);
+	return 0;
+    }
+
+    // one may imagine much more checks
+    return 1;
+}
+
+
+// make sure that bh contains formatted node of reiserfs tree of
+// 'level'-th level
+static int is_tree_node (struct buffer_head * bh, int level)
+{
+    if (B_LEVEL (bh) != level) {
+	printk ("is_tree_node: node level %d does not match to the expected one %d\n",
+		B_LEVEL (bh), level);
+	return 0;
+    }
+    if (level == DISK_LEAF_NODE_LEVEL)
+	return is_leaf (bh->b_data, bh->b_size, bh);
+
+    return is_internal (bh->b_data, bh->b_size, bh);
+}
+
+
+
+#ifdef SEARCH_BY_KEY_READA
+
+/* The function is NOT SCHEDULE-SAFE! */
+static void search_by_key_reada (struct super_block * s, int blocknr)
+{
+    struct buffer_head * bh;
+  
+    if (blocknr == 0)
+	return;
+
+    bh = reiserfs_getblk (s->s_dev, blocknr, s->s_blocksize);
+  
+    if (!buffer_uptodate (bh)) {
+	ll_rw_block (READA, 1, &bh);
+    }
+    bh->b_count --;
+}
+
+#endif
+
+/**************************************************************************
+ * Algorithm   SearchByKey                                                *
+ *             look for item in the Disk S+Tree by its key                *
+ * Input:  p_s_sb   -  super block                                        *
+ *         p_s_key  - pointer to the key to search                        *
+ * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR                         *
+ *         p_s_search_path - path from the root to the needed leaf        *
+ **************************************************************************/
+
+/* This function fills up the path from the root to the leaf as it
+   descends the tree looking for the key.  It uses reiserfs_bread to
+   try to find buffers in the cache given their block number.  If it
+   does not find them in the cache it reads them from disk.  For each
+   node search_by_key finds using reiserfs_bread it then uses
+   bin_search to look through that node.  bin_search will find the
+   position of the block_number of the next node if it is looking
+   through an internal node.  If it is looking through a leaf node
+   bin_search will find the position of the item which has key either
+   equal to given key, or which is the maximal key less than the given
+   key.  search_by_key returns a path that must be checked for the
+   correctness of the top of the path but need not be checked for the
+   correctness of the bottom of the path */
+/* The function is NOT SCHEDULE-SAFE! */
+int search_by_key (struct super_block * p_s_sb,
+		   struct cpu_key * p_s_key, /* Key to search. */
+		   struct path * p_s_search_path, /* This structure was
+						     allocated and initialized
+						     by the calling
+						     function. It is filled up
+						     by this function.  */
+		   int n_stop_level /* How far down the tree to search. To
+                                       stop at leaf level - set to
+                                       DISK_LEAF_NODE_LEVEL */
+    ) {
+    kdev_t n_dev = p_s_sb->s_dev;
+    int  n_block_number = SB_ROOT_BLOCK (p_s_sb),
+      expected_level = SB_TREE_HEIGHT (p_s_sb),
+      n_block_size    = p_s_sb->s_blocksize;
+    struct buffer_head  *       p_s_bh;
+    struct path_element *       p_s_last_element;
+    int				n_node_level, n_retval;
+    int 			right_neighbor_of_leaf_node;
+    int				fs_gen;
+
+#ifdef CONFIG_REISERFS_CHECK
+    int n_repeat_counter = 0;
+#endif
+
+    /* As we add each node to a path we increase its count.  This means that
+       we must be careful to release all nodes in a path before we either
+       discard the path struct or re-use the path struct, as we do here. */
+
+    decrement_counters_in_path(p_s_search_path);
+
+    right_neighbor_of_leaf_node = 0;
+
+    /* With each iteration of this loop we search through the items in the
+       current node, and calculate the next current node(next path element)
+       for the next iteration of this loop.. */
+    while ( 1 ) {
+
+#ifdef CONFIG_REISERFS_CHECK
+	if ( !(++n_repeat_counter % 50000) )
+	    reiserfs_warning ("PAP-5100: search_by_key: %s:"
+			      "there were %d iterations of while loop "
+			      "looking for key %K\n",
+			      current->comm, n_repeat_counter, p_s_key);
+#endif
+
+	/* prep path to have another element added to it. */
+	p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path, ++p_s_search_path->path_length);
+	fs_gen = get_generation (p_s_sb);
+	expected_level --;
+
+#ifdef SEARCH_BY_KEY_READA
+	/* schedule read of right neighbor */
+	search_by_key_reada (p_s_sb, right_neighbor_of_leaf_node);
+#endif
+
+	/* Read the next tree node, and set the last element in the path to
+           have a pointer to it. */
+	if ( ! (p_s_bh = p_s_last_element->pe_buffer =
+		reiserfs_bread(n_dev, n_block_number, n_block_size)) ) {
+	    p_s_search_path->path_length --;
+	    pathrelse(p_s_search_path);
+	    return IO_ERROR;
+	}
+
+	/* It is possible that schedule occured. We must check whether the key
+	   to search is still in the tree rooted from the current buffer. If
+	   not then repeat search from the root. */
+	if ( fs_changed (fs_gen, p_s_sb) && 
+	     (!B_IS_IN_TREE (p_s_bh) || !key_in_buffer(p_s_search_path, p_s_key, p_s_sb)) ) {
+	    decrement_counters_in_path(p_s_search_path);
+	    
+	    /* Get the root block number so that we can repeat the search
+               starting from the root. */
+	    n_block_number = SB_ROOT_BLOCK (p_s_sb);
+	    expected_level = SB_TREE_HEIGHT (p_s_sb);
+	    right_neighbor_of_leaf_node = 0;
+	    
+	    /* repeat search from the root */
+	    continue;
+	}
+
+#ifdef CONFIG_REISERFS_CHECK
+
+	if ( ! key_in_buffer(p_s_search_path, p_s_key, p_s_sb) )
+	    reiserfs_panic(p_s_sb, "PAP-5130: search_by_key: key is not in the buffer");
+	if ( cur_tb ) {
+	    print_cur_tb ("5140");
+	    reiserfs_panic(p_s_sb, "PAP-5140: search_by_key: schedule occurred in do_balance!");
+	}
+
+#endif
+
+	// make sure, that the node contents look like a node of
+	// certain level
+	if (!is_tree_node (p_s_bh, expected_level)) {
+	    reiserfs_warning ("vs-5150: search_by_key: "
+			      "invalid format found in block %d. Fsck?\n", p_s_bh->b_blocknr);
+	    pathrelse (p_s_search_path);
+	    return IO_ERROR;
+	}
+	
+	/* ok, we have acquired next formatted node in the tree */
+	n_node_level = B_LEVEL (p_s_bh);
+
+#ifdef CONFIG_REISERFS_CHECK
+
+	if (n_node_level < n_stop_level)
+	    reiserfs_panic (p_s_sb, "vs-5152: search_by_key: tree level is less than stop level (%d)",
+			    n_node_level, n_stop_level);
+
+#endif
+
+	n_retval = bin_search (p_s_key, B_N_PITEM_HEAD(p_s_bh, 0), B_NR_ITEMS(p_s_bh),
+			       ( n_node_level == DISK_LEAF_NODE_LEVEL ) ? IH_SIZE : KEY_SIZE, &(p_s_last_element->pe_position));
+	if (n_node_level == n_stop_level) {
+	    return n_retval;
+	}
+
+	/* we are not in the stop level */
+	if (n_retval == ITEM_FOUND)
+	    /* item has been found, so we choose the pointer which is to the right of the found one */
+	    p_s_last_element->pe_position++;
+
+	/* if item was not found we choose the position which is to
+	   the left of the found item. This requires no code,
+	   bin_search did it already.*/
+
+	/* So we have chosen a position in the current node which is
+	   an internal node.  Now we calculate child block number by
+	   position in the node. */
+	n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
+
+#ifdef SEARCH_BY_KEY_READA
+	/* if we are going to read leaf node, then calculate its right neighbor if possible */
+	if (n_node_level == DISK_LEAF_NODE_LEVEL + 1 && p_s_last_element->pe_position < B_NR_ITEMS (p_s_bh))
+	    right_neighbor_of_leaf_node = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position + 1);
+#endif
+    }
+}
+
+
+/* Form the path to an item and position in this item which contains
+   file byte defined by p_s_key. If there is no such item
+   corresponding to the key, we point the path to the item with
+   maximal key less than p_s_key, and *p_n_pos_in_item is set to one
+   past the last entry/byte in the item.  If searching for entry in a
+   directory item, and it is not found, *p_n_pos_in_item is set to one
+   entry more than the entry with maximal key which is less than the
+   sought key.
+
+   Note that if there is no entry in this same node which is one more,
+   then we point to an imaginary entry.  for direct items, the
+   position is in units of bytes, for indirect items the position is
+   in units of blocknr entries, for directory items the position is in
+   units of directory entries.  */
+
+/* The function is NOT SCHEDULE-SAFE! */
+int search_for_position_by_key (struct super_block  * p_s_sb,         /* Pointer to the super block.          */
+				struct cpu_key      * p_cpu_key,      /* Key to search (cpu variable)         */
+				struct path         * p_s_search_path /* Filled up by this function.          */
+    ) {
+    struct item_head    * p_le_ih; /* pointer to on-disk structure */
+    int                   n_blk_size;
+    loff_t item_offset, offset;
+    struct reiserfs_dir_entry de;
+    int retval;
+
+    /* If searching for directory entry. */
+    if ( is_direntry_cpu_key (p_cpu_key) )
+	return  search_by_entry_key (p_s_sb, p_cpu_key, p_s_search_path, &de);
+
+    /* If not searching for directory entry. */
+    
+    /* If item is found. */
+    retval = search_item (p_s_sb, p_cpu_key, p_s_search_path);
+    if (retval == IO_ERROR)
+	return retval;
+    if ( retval == ITEM_FOUND )  {
+
+#ifdef CONFIG_REISERFS_CHECK
+	if ( ! B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
+			      PATH_LAST_POSITION(p_s_search_path))->ih_item_len )
+	    reiserfs_panic(p_s_sb, "PAP-5165: search_for_position_by_key: item length equals zero");
+#endif
+
+	pos_in_item(p_s_search_path) = 0;
+	return POSITION_FOUND;
+    }
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( ! PATH_LAST_POSITION(p_s_search_path) )
+	reiserfs_panic(p_s_sb, "PAP-5170: search_for_position_by_key: position equals zero");
+#endif
+
+    /* Item is not found. Set path to the previous item. */
+    p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), --PATH_LAST_POSITION(p_s_search_path));
+    n_blk_size = p_s_sb->s_blocksize;
+
+    if (comp_short_keys (&(p_le_ih->ih_key), p_cpu_key)) {
+	return FILE_NOT_FOUND;
+    }
+
+#if 0
+/*#ifdef CONFIG_REISERFS_CHECK*/
+
+  /* we expect to find stat data or item of the same type */
+    if ( ! is_statdata_le_ih(p_le_ih) && ((is_indirect_cpu_key(p_cpu_key) && ! is_indirect_le_ih(p_le_ih)) ||
+					  (is_direct_cpu_key(p_cpu_key) && ! is_direct_le_ih(p_le_ih))) ) {
+	print_block (PATH_PLAST_BUFFER(p_s_search_path), PRINT_LEAF_ITEMS, 
+		     PATH_LAST_POSITION (p_s_search_path) - 2,
+		     PATH_LAST_POSITION (p_s_search_path) + 2);
+	reiserfs_panic(p_s_sb, "PAP-5190: search_for_position_by_key: "
+		       "found item %h type does not match to the expected one %k",
+		       p_le_ih, p_cpu_key);
+    }
+/*#endif*/
+#endif
+
+    // FIXME: quite ugly this far
+
+    item_offset = le_ih_k_offset (p_le_ih);
+    offset = cpu_key_k_offset (p_cpu_key);
+
+    /* Needed byte is contained in the item pointed to by the path.*/
+    if (item_offset <= offset &&
+	item_offset + op_bytes_number (p_le_ih, n_blk_size) > offset) {
+	pos_in_item (p_s_search_path) = offset - item_offset;
+	if ( is_indirect_le_ih(p_le_ih) ) {
+	    pos_in_item (p_s_search_path) /= n_blk_size;
+	}
+	return POSITION_FOUND;
+    }
+
+    /* Needed byte is not contained in the item pointed to by the
+     path. Set pos_in_item out of the item. */
+    if ( is_indirect_le_ih (p_le_ih) )
+	pos_in_item (p_s_search_path) = le16_to_cpu (p_le_ih->ih_item_len) / UNFM_P_SIZE;
+    else
+	pos_in_item (p_s_search_path) = le16_to_cpu (p_le_ih->ih_item_len);
+  
+    return POSITION_NOT_FOUND;
+}
+
+
+/* Compare given item and item pointed to by the path. */
+int comp_items (struct item_head * stored_ih, struct path * p_s_path)
+{
+    struct buffer_head  * p_s_bh;
+    struct item_head    * ih;
+
+    /* Last buffer at the path is not in the tree. */
+    if ( ! B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)) )
+	return 1;
+
+    /* Last path position is invalid. */
+    if ( PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh) )
+	return 1;
+
+    /* we need only to know, whether it is the same item */
+    ih = get_ih (p_s_path);
+    return memcmp (stored_ih, ih, IH_SIZE);
+
+#if 0
+    /* Get item at the path. */
+    p_s_path_item = PATH_PITEM_HEAD(p_s_path);
+    /* Compare keys. */
+    if ( COMP_KEYS(&(p_s_path_item->ih_key), &(p_cpu_ih->ih_key)) )
+	return 1;
+
+    /* Compare other items fields. */
+    if ( le16_to_cpu (p_s_path_item->u.ih_entry_count) != p_cpu_ih->u.ih_entry_count ||
+	 le16_to_cpu (p_s_path_item->ih_item_len) != p_cpu_ih->ih_item_len ||
+	 le16_to_cpu ( p_s_path_item->ih_item_location) != p_cpu_ih->ih_item_location )
+	return 1;
+
+    /* Items are equal. */
+    return 0;
+#endif
+}
+
+
+/* unformatted nodes are not logged anymore, ever.  This is safe
+** now
+*/
+#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
+
+// block can not be forgotten as it is in I/O or held by someone
+#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
+
+
+
+// prepare for delete or cut of direct item
+static inline int prepare_for_direct_item (struct path * path,
+					   struct item_head * le_ih,
+					   struct inode * inode,
+					   loff_t new_file_length,
+					   int * cut_size)
+{
+    loff_t round_len;
+
+
+    if ( new_file_length == max_reiserfs_offset (inode) ) {
+	/* item has to be deleted */
+	*cut_size = -(IH_SIZE + le16_to_cpu (le_ih->ih_item_len));
+	return M_DELETE;
+    }
+	
+    // new file gets truncated
+    if (inode_items_version (inode) == ITEM_VERSION_2) {
+	// 
+	round_len = ROUND_UP (new_file_length); 
+	/* this was n_new_file_length < le_ih ... */
+	if ( round_len < le_ih_k_offset (le_ih) )  {
+	    *cut_size = -(IH_SIZE + le16_to_cpu (le_ih->ih_item_len));
+	    return M_DELETE; /* Delete this item. */
+	}
+	/* Calculate first position and size for cutting from item. */
+	pos_in_item (path) = round_len - (le_ih_k_offset (le_ih) - 1);
+	*cut_size = -(le16_to_cpu (le_ih->ih_item_len) - pos_in_item(path));
+	
+	return M_CUT; /* Cut from this item. */
+    }
+
+
+    // old file: items may have any length
+
+    if ( new_file_length < le_ih_k_offset (le_ih) )  {
+	*cut_size = -(IH_SIZE + le16_to_cpu (le_ih->ih_item_len));
+	return M_DELETE; /* Delete this item. */
+    }
+    /* Calculate first position and size for cutting from item. */
+    *cut_size = -(le16_to_cpu (le_ih->ih_item_len) -
+		      (pos_in_item (path) = new_file_length + 1 - le_ih_k_offset (le_ih)));
+    return M_CUT; /* Cut from this item. */
+}
+
+
+static inline int prepare_for_direntry_item (struct path * path,
+					     struct item_head * le_ih,
+					     struct inode * inode,
+					     loff_t new_file_length,
+					     int * cut_size)
+{
+    if (le_ih_k_offset (le_ih) == DOT_OFFSET && 
+	new_file_length == max_reiserfs_offset (inode)) {
+#ifdef CONFIG_REISERFS_CHECK
+	if (ih_entry_count (le_ih) != 2)
+	    reiserfs_panic(inode->i_sb,"PAP-5220: prepare_for_delete_or_cut: "
+			   "incorrect empty directory item (%h)", le_ih);
+#endif
+	*cut_size = -(IH_SIZE + le16_to_cpu (le_ih->ih_item_len));
+	return M_DELETE; /* Delete the directory item containing "." and ".." entry. */
+    }
+    
+    if ( ih_entry_count (le_ih) == 1 )  {
+	/* Delete the directory item such as there is one record only
+	   in this item*/
+	*cut_size = -(IH_SIZE + le16_to_cpu (le_ih->ih_item_len));
+	return M_DELETE;
+    }
+    
+    /* Cut one record from the directory item. */
+    *cut_size = -(DEH_SIZE + entry_length (get_bh (path), le_ih, pos_in_item (path)));
+    return M_CUT; 
+}
+
+
+/*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
+    If the path points to an indirect item, remove some number of its unformatted nodes.
+    In case of file truncate calculate whether this item must be deleted/truncated or last
+    unformatted node of this item will be converted to a direct item.
+    This function returns a determination of what balance mode the calling function should employ. */
+static char  prepare_for_delete_or_cut(
+				       struct reiserfs_transaction_handle *th, 
+				       struct inode * inode,
+				       struct path         * p_s_path,
+				       struct cpu_key      * p_s_item_key,
+				       int                 * p_n_removed,      /* Number of unformatted nodes which were removed
+										  from end of the file. */
+				       int                 * p_n_cut_size,
+				       unsigned long long    n_new_file_length /* MAX_KEY_OFFSET in case of delete. */
+    ) {
+    struct super_block  * p_s_sb = inode->i_sb;
+    struct item_head    * p_le_ih = PATH_PITEM_HEAD(p_s_path);
+    struct buffer_head  * p_s_bh = PATH_PLAST_BUFFER(p_s_path);
+
+#ifdef CONFIG_REISERFS_CHECK
+    int n_repeat_counter = 0;
+#endif
+
+    /* Stat_data item. */
+    if ( is_statdata_le_ih (p_le_ih) ) {
+
+#ifdef CONFIG_REISERFS_CHECK
+	if ( n_new_file_length != max_reiserfs_offset (inode) )
+	    reiserfs_panic(p_s_sb, "PAP-5210: prepare_for_delete_or_cut: mode must be M_DELETE");
+#endif
+
+	*p_n_cut_size = -(IH_SIZE + le16_to_cpu (p_le_ih->ih_item_len));
+	return M_DELETE;
+    }
+
+
+    /* Directory item. */
+    if ( is_direntry_le_ih (p_le_ih) )
+	return prepare_for_direntry_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);
+
+    /* Direct item. */
+    if ( is_direct_le_ih (p_le_ih) )
+	return prepare_for_direct_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);
+
+
+    /* Case of an indirect item. */
+    {
+	int                   n_unfm_number,    /* Number of the item unformatted nodes. */
+	    n_counter,
+	    n_retry,        /* Set to one if there is unformatted node buffer in use. */
+	    n_blk_size;
+	__u32               * p_n_unfm_pointer; /* Pointer to the unformatted node number. */
+	__u32 tmp;
+	struct item_head      s_ih;           /* Item header. */
+	char                  c_mode;           /* Returned mode of the balance. */
+	struct buffer_head  * p_s_un_bh;
+	int need_research;
+
+
+	n_blk_size = p_s_sb->s_blocksize;
+
+	/* Search for the needed object indirect item until there are no unformatted nodes to be removed. */
+	do  {
+	    need_research = 0;
+            p_s_bh = PATH_PLAST_BUFFER(p_s_path);
+	    /* Copy indirect item header to a temp variable. */
+	    copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
+	    /* Calculate number of unformatted nodes in this item. */
+	    n_unfm_number = I_UNFM_NUM(&s_ih);
+
+#ifdef CONFIG_REISERFS_CHECK
+	    if ( ! is_indirect_le_ih(&s_ih) || ! n_unfm_number ||
+		 pos_in_item (p_s_path) + 1 !=  n_unfm_number ) {
+		printk("n_unfm_number = %d *p_n_pos_in_item = %d\n",n_unfm_number, pos_in_item (p_s_path));
+		reiserfs_panic(p_s_sb, "PAP-5240: prepare_for_delete_or_cut: illegal item %h", &s_ih);
+	    }
+#endif
+
+	    /* Calculate balance mode and position in the item to remove unformatted nodes. */
+	    if ( n_new_file_length == max_reiserfs_offset (inode) ) {/* Case of delete. */
+		pos_in_item (p_s_path) = 0;
+		*p_n_cut_size = -(IH_SIZE + le16_to_cpu (s_ih.ih_item_len));
+		c_mode = M_DELETE;
+	    }
+	    else  { /* Case of truncate. */
+		if ( n_new_file_length < le_ih_k_offset (&s_ih) )  {
+		    pos_in_item (p_s_path) = 0;
+		    *p_n_cut_size = -(IH_SIZE + le16_to_cpu (s_ih.ih_item_len));
+		    c_mode = M_DELETE; /* Delete this item. */
+		}
+		else  {
+		    /* indirect item must be truncated starting from *p_n_pos_in_item-th position */
+		    pos_in_item (p_s_path) = (n_new_file_length + n_blk_size - le_ih_k_offset (&s_ih) ) >> p_s_sb->s_blocksize_bits;
+
+#ifdef CONFIG_REISERFS_CHECK
+		    if ( pos_in_item (p_s_path) > n_unfm_number ) 
+			reiserfs_panic(p_s_sb, "PAP-5250: prepare_for_delete_or_cut: illegal position in the item");
+#endif
+
+		    /* Either convert last unformatted node of indirect item to direct item or increase
+		       its free space.  */
+		    if ( pos_in_item (p_s_path) == n_unfm_number )  {
+			*p_n_cut_size = 0; /* Nothing to cut. */
+			return M_CONVERT; /* Maybe convert last unformatted node to the direct item. */
+		    }
+		    /* Calculate size to cut. */
+		    *p_n_cut_size = -(s_ih.ih_item_len - pos_in_item (p_s_path) * UNFM_P_SIZE);
+
+		    c_mode = M_CUT;     /* Cut from this indirect item. */
+		}
+	    }
+
+#ifdef CONFIG_REISERFS_CHECK
+	    if ( n_unfm_number <= pos_in_item (p_s_path) ) 
+		reiserfs_panic(p_s_sb, "PAP-5260: prepare_for_delete_or_cut: illegal position in the indirect item");
+#endif
+
+	    /* pointers to be cut */
+	    n_unfm_number -= pos_in_item (p_s_path);
+	    /* Set pointer to the last unformatted node pointer that is to be cut. */
+	    p_n_unfm_pointer = (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed;
+
+
+	    /* We go through the unformatted nodes pointers of the indirect
+	       item and look for the unformatted nodes in the cache. If we
+	       found some of them we free it, zero corresponding indirect item
+	       entry and log buffer containing that indirect item. For this we
+	       need to prepare last path element for logging. If some
+	       unformatted node has b_count > 1 we must not free this
+	       unformatted node since it is in use. */
+	    reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
+	    // note: path could be changed, first line in for loop takes care
+	    // of it
+
+	    for ( n_retry = 0, n_counter = *p_n_removed;
+		  n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- )  {
+
+		if (item_moved (&s_ih, p_s_path)) {
+		    need_research = 1 ;
+		    break;
+		}
+#ifdef CONFIG_REISERFS_CHECK
+		if (p_n_unfm_pointer < (__u32 *)B_I_PITEM(p_s_bh, &s_ih) ||
+		    p_n_unfm_pointer > (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1)
+		    reiserfs_panic (p_s_sb, "vs-5265: prepare_for_delete_or_cut: pointer out of range");
+#endif
+
+		if ( ! *p_n_unfm_pointer )  { /* Hole, nothing to remove. */
+		    if ( ! n_retry )
+			(*p_n_removed)++;
+		    continue;
+		}
+		/* Search for the buffer in cache. */
+		p_s_un_bh = get_hash_table(p_s_sb->s_dev, *p_n_unfm_pointer, n_blk_size);
+
+		if (p_s_un_bh && buffer_locked(p_s_un_bh)) {
+		  __wait_on_buffer(p_s_un_bh) ;
+		  if ( item_moved (&s_ih, p_s_path) )  {
+		      need_research = 1;
+		      brelse(p_s_un_bh) ;
+		      break ;
+		  }
+		}
+		if ( p_s_un_bh && block_in_use (p_s_un_bh)) {
+		    /* Block is locked or held more than by one holder and by
+                       journal. */
+
+#ifndef __KERNEL__
+		    reiserfs_panic(p_s_sb, "PAP-5270: prepare_for_delete_or_cut: b_count != 1");
+#endif
+
+#ifdef CONFIG_REISERFS_CHECK
+		    if (n_repeat_counter && (n_repeat_counter % 100000) == 0) {
+		      printk("prepare_for_delete, waiting on buffer %lu, b_count %d, %s%cJDIRTY %cJDIRTY_WAIT\n", 
+			     p_s_un_bh->b_blocknr, atomic_read (&p_s_un_bh->b_count),
+			     buffer_locked (p_s_un_bh) ? "locked, " : "",
+			     buffer_journaled(p_s_un_bh) ? ' ' : '!', 
+			     buffer_journal_dirty(p_s_un_bh) ? ' ' : '!') ;
+
+		    }
+#endif
+		    n_retry = 1;
+		    brelse (p_s_un_bh);
+		    continue;
+		}
+      
+		if ( ! n_retry )
+		    (*p_n_removed)++;
+      
+#ifdef CONFIG_REISERFS_CHECK
+		if ( p_s_un_bh && (*p_n_unfm_pointer != p_s_un_bh->b_blocknr ))
+		    // note: minix_truncate allows that. As truncate is
+		    // protected by down (inode->i_sem), two truncates can not
+		    // co-exist
+		    reiserfs_panic(p_s_sb, "PAP-5280: prepare_for_delete_or_cut: blocks numbers are different");	
+#endif
+
+		tmp = *p_n_unfm_pointer;
+		*p_n_unfm_pointer = 0;
+		journal_mark_dirty (th, p_s_sb, p_s_bh);
+		bforget (p_s_un_bh);
+		inode->i_blocks -= p_s_sb->s_blocksize / 512;
+		reiserfs_free_block(th, tmp);
+		if ( item_moved (&s_ih, p_s_path) )  {
+		    need_research = 1;
+		    break ;
+#if 0
+		    reiserfs_prepare_for_journal(p_s_sb, 
+		                                 PATH_PLAST_BUFFER(p_s_path),
+						 1) ;
+		    if ( comp_items(&s_ih, p_s_path) )  {
+		      reiserfs_restore_prepared_buffer(p_s_sb, 
+		                               PATH_PLAST_BUFFER(p_s_path)) ;
+		      brelse(p_s_un_bh);
+		      break;
+		    }
+		    *p_n_unfm_pointer = 0;
+		    journal_mark_dirty (th,p_s_sb,PATH_PLAST_BUFFER(p_s_path));
+
+		    reiserfs_free_block(th, p_s_sb, block_addr);
+		    if (p_s_un_bh) {
+			mark_buffer_clean (p_s_un_bh);
+			brelse (p_s_un_bh);
+		    }
+		    if ( comp_items(&s_ih, p_s_path) )  {
+		      break ;
+		    }
+#endif
+		}
+
+	    }
+
+	    /* a trick.  If the buffer has been logged, this
+	    ** will do nothing.  If we've broken the loop without
+	    ** logging it, it will restore the buffer
+	    **
+	    */
+	    reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
+
+	    if ( n_retry ) {
+		/* There is block in use. Wait, they should release it soon */
+
+#ifdef CONFIG_REISERFS_CHECK
+		if ( *p_n_removed >= n_unfm_number )
+		    reiserfs_panic(p_s_sb, "PAP-5290: prepare_for_delete_or_cut: illegal case");
+		if ( !(++n_repeat_counter % 500000) ) {
+		    reiserfs_warning("PAP-5300: prepare_for_delete_or_cut: (pid %u): "
+				     "could not delete item %k in (%d) iterations. New file length %Lu. (inode %Ld), Still trying\n",
+				     current->pid, p_s_item_key, n_repeat_counter, n_new_file_length, inode->i_size);
+		    if (n_repeat_counter == 5000000) {
+			print_block (PATH_PLAST_BUFFER(p_s_path), 3, 
+				     PATH_LAST_POSITION (p_s_path) - 2, PATH_LAST_POSITION (p_s_path) + 2);
+			reiserfs_panic(p_s_sb, "PAP-5305: prepare_for_delete_or_cut: key %k, new_file_length %Ld",
+				       p_s_item_key, n_new_file_length);
+		    }
+		}
+#endif
+
+#ifdef __KERNEL__
+		run_task_queue(&tq_disk);
+		current->policy |= SCHED_YIELD;
+		schedule();
+#endif
+	    }
+	    /* This loop can be optimized. */
+	} while ( (*p_n_removed < n_unfm_number || need_research) &&
+		  search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND );
+
+#ifdef CONFIG_REISERFS_CHECK
+	if ( *p_n_removed < n_unfm_number )
+	    reiserfs_panic(p_s_sb, "PAP-5310: prepare_for_delete_or_cut: indirect item is not found");
+
+	if (item_moved (&s_ih, p_s_path) ) {
+	    printk("prepare_for_delete_or_cut: after while, comp failed, retry\n") ;
+	    BUG ();
+	}
+#endif
+
+	if (c_mode == M_CUT)
+	    pos_in_item (p_s_path) *= UNFM_P_SIZE;
+	return c_mode;
+    }
+}
+
+
+/* Calculate bytes number which will be deleted or cutted in the balance. */
+int calc_deleted_bytes_number(
+    struct  tree_balance  * p_s_tb,
+    char                    c_mode
+    ) {
+    int                     n_del_size;
+    struct  item_head     * p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path);
+
+    if ( is_statdata_le_ih (p_le_ih) )
+	return 0;
+
+    if ( is_direntry_le_ih (p_le_ih) ) {
+	// return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
+	// we can't use EMPTY_DIR_SIZE, as old format dirs have a different
+	// empty size.  ick. FIXME, is this right?
+	//
+        return le16_to_cpu(p_le_ih->ih_item_len) ;
+    }
+    n_del_size = ( c_mode == M_DELETE ) ? le16_to_cpu (p_le_ih->ih_item_len) : -p_s_tb->insert_size[0];
+
+    if ( is_indirect_le_ih (p_le_ih) )
+	n_del_size = (n_del_size/UNFM_P_SIZE)*
+	  (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size);// - get_ih_free_space (p_le_ih);
+    return n_del_size;
+}
+
+static void init_tb_struct(
+    struct reiserfs_transaction_handle *th,
+    struct tree_balance * p_s_tb,
+    struct super_block  * p_s_sb,
+    struct path         * p_s_path,
+    int                   n_size
+    ) {
+    memset (p_s_tb,'\0',sizeof(struct tree_balance));
+    p_s_tb->transaction_handle = th ;
+    p_s_tb->tb_sb = p_s_sb;
+    p_s_tb->tb_path = p_s_path;
+    PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
+    PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
+    p_s_tb->insert_size[0] = n_size;
+}
+
+
+
+void padd_item (char * item, int total_length, int length)
+{
+    int i;
+
+    for (i = total_length; i > length; )
+	item [--i] = 0;
+}
+
+
+/* Delete object item. */
+int reiserfs_delete_item (struct reiserfs_transaction_handle *th, 
+			  struct path * p_s_path, /* Path to the deleted item. */
+			  struct cpu_key * p_s_item_key, /* Key to search for the deleted item.  */
+			  struct inode * p_s_inode,/* inode is here just to update i_blocks */
+			  struct buffer_head  * p_s_un_bh)    /* NULL or unformatted node pointer.    */
+{
+    struct super_block * p_s_sb = p_s_inode->i_sb;
+    struct tree_balance   s_del_balance;
+    struct item_head      s_ih;
+    int                   n_ret_value,
+	n_del_size,
+	n_removed;
+
+#ifdef CONFIG_REISERFS_CHECK
+    char                  c_mode;
+    int			n_iter = 0;
+#endif
+
+    init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path, 0/*size is unknown*/);
+
+    while ( 1 ) {
+	n_removed = 0;
+
+#ifdef CONFIG_REISERFS_CHECK
+	n_iter++;
+	c_mode =
+#endif
+	    prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, &n_del_size, max_reiserfs_offset (p_s_inode));
+
+#ifdef CONFIG_REISERFS_CHECK
+	if ( c_mode != M_DELETE )
+	    reiserfs_panic(p_s_sb, "PAP-5320: reiserfs_delete_item: mode must be M_DELETE");
+#endif
+
+	copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
+	s_del_balance.insert_size[0] = n_del_size;
+
+	n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, 0);
+	if ( n_ret_value != REPEAT_SEARCH )
+	    break;
+
+	// file system changed, repeat search
+	n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
+	if (n_ret_value == IO_ERROR)
+	    break;
+	if (n_ret_value == FILE_NOT_FOUND) {
+	    reiserfs_warning ("vs-5340: reiserfs_delete_item: "
+			      "no items of the file %K found\n", p_s_item_key);
+	    break;
+	}
+    } /* while (1) */
+
+    if ( n_ret_value != CARRY_ON ) {
+	unfix_nodes(&s_del_balance);
+	return 0;
+    }
+
+    // reiserfs_delete_item returns item length when success
+    n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
+
+    if ( p_s_un_bh )  {
+	int off;
+        int block_off ;
+        char *data ;
+
+	/* We are in direct2indirect conversion, so move tail contents
+           to the unformatted node */
+	/* note, we do the copy before preparing the buffer because we
+	** don't care about the contents of the unformatted node yet.
+	** the only thing we really care about is the direct item's data
+	** is in the unformatted node.
+	**
+	** Otherwise, we would have to call reiserfs_prepare_for_journal on
+	** the unformatted node, which might schedule, meaning we'd have to
+	** loop all the way back up to the start of the while loop.
+	**
+	** The unformatted node is prepared and logged after the do_balance.
+        **
+        ** p_s_un_bh is from the page cache (all unformatted nodes are
+        ** from the page cache) and might be a highmem page.  So, we
+        ** can't use p_s_un_bh->b_data.  But, the page has already been
+        ** kmapped, so we can use page_address()
+	** -clm
+	*/
+
+        data = page_address(p_s_un_bh->b_page) ;
+	off = ((le_ih_k_offset (&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
+        block_off = off & (p_s_un_bh->b_size - 1) ;
+	memcpy(data + off,
+	       B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), n_ret_value);
+
+	/* clear out the rest of the block past the end of the file. */
+	if (block_off + n_ret_value < p_s_un_bh->b_size) {
+	    memset(data + off + n_ret_value, 0, 
+		   p_s_un_bh->b_size - block_off - n_ret_value) ;
+	}
+    }
+
+    /* Perform balancing after all resources have been collected at once. */ 
+    do_balance(&s_del_balance, NULL, NULL, M_DELETE);
+
+    /* see comment above for why this is after the do_balance */
+    if (p_s_un_bh) {
+        mark_buffer_dirty(p_s_un_bh) ;
+    }
+
+    /* Return deleted body length */
+    return n_ret_value;
+}
+
+
+/* Summary Of Mechanisms For Handling Collisions Between Processes:
+
+ deletion of the body of the object is performed by iput(), with the
+ result that if multiple processes are operating on a file, the
+ deletion of the body of the file is deferred until the last process
+ that has an open inode performs its iput().
+
+ writes and truncates are protected from collisions by use of
+ semaphores.
+
+ creates, linking, and mknod are protected from collisions with other
+ processes by making the reiserfs_add_entry() the last step in the
+ creation, and then rolling back all changes if there was a collision.
+ - Hans
+*/
+
+
+/* this deletes item which never gets split */
+static void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th,
+					struct key * key)
+{
+    struct tree_balance tb;
+    INITIALIZE_PATH (path);
+    int item_len;
+    int tb_init = 0 ;
+    struct cpu_key cpu_key;
+    int retval;
+    
+    le_key2cpu_key (&cpu_key, key);
+    
+    while (1) {
+	retval = search_item (th->t_super, &cpu_key, &path);
+	if (retval == IO_ERROR) {
+	    reiserfs_warning ("vs-: reiserfs_delete_solid_item: "
+			      "i/o failure occured trying to delete %K\n", &cpu_key);
+	    break;
+	}
+	if (retval != ITEM_FOUND) {
+	    pathrelse (&path);
+	    reiserfs_warning ("vs-: reiserfs_delete_solid_item: %k not found",
+			      key);
+	    break;
+	}
+	if (!tb_init) {
+	    tb_init = 1 ;
+	    item_len = le16_to_cpu (PATH_PITEM_HEAD (&path)->ih_item_len);
+	    init_tb_struct (th, &tb, th->t_super, &path, - (IH_SIZE + item_len));
+	}
+
+	retval = fix_nodes (M_DELETE, &tb, NULL, 0);
+	if (retval == REPEAT_SEARCH)
+	    continue;
+
+	if (retval == CARRY_ON) {
+	    do_balance (&tb, 0, 0, M_DELETE);
+	    break;
+	}
+
+	// IO_ERROR, NO_DISK_SPACE, etc
+	reiserfs_warning ("vs-: reiserfs_delete_solid_item: "
+			  "could not delete %K due to fix_nodes failure\n", &cpu_key);
+	unfix_nodes (&tb);
+	break;
+    }
+
+    reiserfs_check_path(&path) ;
+}
+
+
+void reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * inode)
+{
+    inode->i_size = 0;
+
+    /* for directory this deletes item containing "." and ".." */
+    reiserfs_do_truncate (th, inode, NULL, 0/*no timestamp updates*/);
+    
+    /* delete stat data */
+    /* this debug code needs to go away.  Trying to find a truncate race
+    ** -- clm -- 4/1/2000
+    */
+#if 0
+    if (inode->i_nlink != 0) {
+        reiserfs_warning("clm-4001: deleting inode with link count==%d\n", inode->i_nlink) ;
+    }
+#endif
+    reiserfs_delete_solid_item (th, INODE_PKEY (inode));
+}
+
+
+static int maybe_indirect_to_direct (struct reiserfs_transaction_handle *th, 
+			      struct inode * p_s_inode,
+			      struct page *page, 
+			      struct path         * p_s_path,
+			      struct cpu_key      * p_s_item_key,
+			      loff_t         n_new_file_size,
+			      char                * p_c_mode
+			      ) {
+    struct super_block * p_s_sb = p_s_inode->i_sb;
+    int n_block_size = p_s_sb->s_blocksize;
+    int cut_bytes;
+
+    if (n_new_file_size != p_s_inode->i_size)
+	BUG ();
+
+    /* the page being sent in could be NULL if there was an i/o error
+    ** reading in the last block.  The user will hit problems trying to
+    ** read the file, but for now we just skip the indirect2direct
+    */
+    if (atomic_read(&p_s_inode->i_count) > 1 || 
+        !tail_has_to_be_packed (p_s_inode) || 
+	!page || p_s_inode->u.reiserfs_i.nopack) {
+	// leave tail in an unformatted node	
+	*p_c_mode = M_SKIP_BALANCING;
+	cut_bytes = n_block_size - (n_new_file_size & (n_block_size - 1));
+	pathrelse(p_s_path);
+	return cut_bytes;
+    }
+    /* Permorm the conversion to a direct_item. */
+    /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);*/
+    return indirect2direct (th, p_s_inode, page, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);
+}
+
+
+/* we did indirect_to_direct conversion. And we have inserted direct
+   item successesfully, but there were no disk space to cut unfm
+   pointer being converted. Therefore we have to delete inserted
+   direct item(s) */
+static void indirect_to_direct_roll_back (struct reiserfs_transaction_handle *th, struct inode * inode, struct path * path)
+{
+    struct cpu_key tail_key;
+    int tail_len;
+    int removed;
+
+    make_cpu_key (&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);// !!!!
+    tail_key.key_length = 4;
+
+    tail_len = (cpu_key_k_offset (&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
+    while (tail_len) {
+	/* look for the last byte of the tail */
+	if (search_for_position_by_key (inode->i_sb, &tail_key, path) == POSITION_NOT_FOUND)
+	    reiserfs_panic (inode->i_sb, "vs-5615: indirect_to_direct_roll_back: found invalid item");
+#ifdef CONFIG_REISERFS_CHECK
+	if (path->pos_in_item != PATH_PITEM_HEAD (path)->ih_item_len - 1)
+	    reiserfs_panic (inode->i_sb, "vs-5616: indirect_to_direct_roll_back: appended bytes found");
+#endif
+	PATH_LAST_POSITION (path) --;
+	
+	removed = reiserfs_delete_item (th, path, &tail_key, inode, 0/*unbh not needed*/);
+#ifdef CONFIG_REISERFS_CHECK
+	if (removed <= 0 || removed > tail_len)
+	    reiserfs_panic (inode->i_sb, "vs-5617: indirect_to_direct_roll_back: "
+			    "there was tail %d bytes, removed item length %d bytes",
+			    tail_len, removed);
+#endif
+	tail_len -= removed;
+	set_cpu_key_k_offset (&tail_key, cpu_key_k_offset (&tail_key) - removed);
+    }
+    printk ("indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space\n");
+    //mark_file_without_tail (inode);
+    mark_inode_dirty (inode);
+}
+
+
+/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
+int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th, 
+			    struct path * p_s_path,
+			    struct cpu_key * p_s_item_key,
+			    struct inode * p_s_inode,
+			    struct page *page, 
+			    loff_t n_new_file_size)
+{
+    struct super_block * p_s_sb = p_s_inode->i_sb;
+    /* Every function which is going to call do_balance must first
+       create a tree_balance structure.  Then it must fill up this
+       structure by using the init_tb_struct and fix_nodes functions.
+       After that we can make tree balancing. */
+    struct tree_balance s_cut_balance;
+    int n_cut_size = 0,        /* Amount to be cut. */
+	n_ret_value = CARRY_ON,
+	n_removed = 0,     /* Number of the removed unformatted nodes. */
+	n_is_inode_locked = 0;
+    char                c_mode;            /* Mode of the balance. */
+    int retval2 = -1;
+    
+    
+    init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, n_cut_size);
+
+
+    /* Repeat this loop until we either cut the item without needing
+       to balance, or we fix_nodes without schedule occuring */
+    while ( 1 ) {
+	/* Determine the balance mode, position of the first byte to
+	   be cut, and size to be cut.  In case of the indirect item
+	   free unformatted nodes which are pointed to by the cut
+	   pointers. */
+      
+	c_mode = prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, 
+					   &n_cut_size, n_new_file_size);
+	if ( c_mode == M_CONVERT )  {
+	    /* convert last unformatted node to direct item or leave
+               tail in the unformatted node */
+#ifdef CONFIG_REISERFS_CHECK
+	    if ( n_ret_value != CARRY_ON )
+		reiserfs_panic (p_s_sb, "PAP-5570: reiserfs_cut_from_item: can not convert twice");
+#endif
+
+	    n_ret_value = maybe_indirect_to_direct (th, p_s_inode, page, p_s_path, p_s_item_key,
+						    n_new_file_size, &c_mode);
+	    if ( c_mode == M_SKIP_BALANCING )
+		/* tail has been left in the unformatted node */
+		return n_ret_value;
+
+	    n_is_inode_locked = 1;
+	  
+	    /* removing of last unformatted node will change value we
+               have to return to truncate. Save it */
+	    retval2 = n_ret_value;
+	    /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1));*/
+	  
+	    /* So, we have performed the first part of the conversion:
+	       inserting the new direct item.  Now we are removing the
+	       last unformatted node pointer. Set key to search for
+	       it. */
+      	    set_cpu_key_k_type (p_s_item_key, TYPE_INDIRECT);
+	    p_s_item_key->key_length = 4;
+	    n_new_file_size -= (n_new_file_size & (p_s_sb->s_blocksize - 1));
+	    set_cpu_key_k_offset (p_s_item_key, n_new_file_size + 1);
+	    if ( search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ){
+		print_block (PATH_PLAST_BUFFER (p_s_path), 3, PATH_LAST_POSITION (p_s_path) - 1, PATH_LAST_POSITION (p_s_path) + 1);
+		reiserfs_panic(p_s_sb, "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%k)", p_s_item_key);
+	    }
+	    continue;
+	}
+	if (n_cut_size == 0) {
+	    pathrelse (p_s_path);
+	    return 0;
+	}
+
+	s_cut_balance.insert_size[0] = n_cut_size;
+	
+	n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, 0);
+      	if ( n_ret_value != REPEAT_SEARCH )
+	    break;
+	
+	n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
+	if (n_ret_value == POSITION_FOUND)
+	    continue;
+
+	reiserfs_warning ("PAP-5610: reiserfs_cut_from_item: item %K not found\n", p_s_item_key);
+	pathrelse (p_s_path);
+	return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT;
+    } /* while */
+  
+    // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
+    if ( n_ret_value != CARRY_ON ) {
+	if ( n_is_inode_locked ) {
+	    // FIXME: this seems to be not needed: we are always able
+	    // to cut item
+	    indirect_to_direct_roll_back (th, p_s_inode, p_s_path);
+	}
+	if (n_ret_value == NO_DISK_SPACE)
+	    reiserfs_warning ("");
+	unfix_nodes (&s_cut_balance);
+	return -EIO;
+    }
+
+    /* go ahead and perform balancing */
+    
+#ifdef CONFIG_REISERFS_CHECK
+    if ( c_mode == M_PASTE || c_mode == M_INSERT )
+	reiserfs_panic (p_s_sb, "PAP-5640: reiserfs_cut_from_item: illegal mode");
+#endif
+
+    /* Calculate number of bytes that need to be cut from the item. */
+    if (retval2 == -1)
+	n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
+    else
+	n_ret_value = retval2;
+    
+    if ( c_mode == M_DELETE ) {
+	struct item_head * p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
+	
+	if ( is_direct_le_ih (p_le_ih) && (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) {
+	    /* we delete first part of tail which was stored in direct
+               item(s) */
+	    // FIXME: this is to keep 3.5 happy
+	    p_s_inode->u.reiserfs_i.i_first_direct_byte = U32_MAX;
+	    p_s_inode->i_blocks -= p_s_sb->s_blocksize / 512;
+	}
+    }
+
+#ifdef CONFIG_REISERFS_CHECK
+    if (n_is_inode_locked) {
+	struct item_head * le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
+	/* we are going to complete indirect2direct conversion. Make
+           sure, that we exactly remove last unformatted node pointer
+           of the item */
+	if (!is_indirect_le_ih (le_ih))
+	    reiserfs_panic (p_s_sb, "vs-5652: reiserfs_cut_from_item: "
+			    "item must be indirect %h", le_ih);
+
+	if (c_mode == M_DELETE && le16_to_cpu (le_ih->ih_item_len) != UNFM_P_SIZE)
+	    reiserfs_panic (p_s_sb, "vs-5653: reiserfs_cut_from_item: "
+			    "completing indirect2direct conversion indirect item %h"
+			    "being deleted must be of 4 byte long", le_ih);
+
+	if (c_mode == M_CUT && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
+	    reiserfs_panic (p_s_sb, "vs-5654: reiserfs_cut_from_item: "
+			    "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)",
+			    le_ih, s_cut_balance.insert_size[0]);
+	}
+	/* it would be useful to make sure, that right neighboring
+           item is direct item of this file */
+    }
+#endif
+    
+    do_balance(&s_cut_balance, NULL, NULL, c_mode);
+    if ( n_is_inode_locked ) {
+        /* we've converted from indirect to direct, we must remove
+	** ourselves from the list of pages that need flushing before
+	** this transaction can commit
+	*/
+	reiserfs_remove_page_from_flush_list(th, p_s_inode) ;
+	p_s_inode->u.reiserfs_i.i_pack_on_close = 0 ;
+    }
+    return n_ret_value;
+}
+
+
+static void truncate_directory (struct reiserfs_transaction_handle *th, struct inode * inode)
+{
+    if (inode->i_nlink)
+	reiserfs_warning ("vs-5655: truncate_directory: link count != 0");
+
+    set_le_key_k_offset (ITEM_VERSION_1, INODE_PKEY (inode), DOT_OFFSET);
+    set_le_key_k_type (ITEM_VERSION_1, INODE_PKEY (inode), TYPE_DIRENTRY);
+    reiserfs_delete_solid_item (th, INODE_PKEY (inode));
+
+    set_le_key_k_offset (ITEM_VERSION_1, INODE_PKEY (inode), SD_OFFSET);
+    set_le_key_k_type (ITEM_VERSION_1, INODE_PKEY (inode), TYPE_STAT_DATA);    
+}
+
+
+
+
+/* Truncate file to the new size. Note, this must be called with a transaction
+   already started */
+void reiserfs_do_truncate (struct reiserfs_transaction_handle *th, 
+			   struct  inode * p_s_inode, /* ->i_size contains new
+                                                         size */
+			   struct page *page, /* up to date for last block */
+			   int update_timestamps  /* when it is called by
+						     file_release to convert
+						     the tail - no timestamps
+						     should be updated */
+    ) {
+    INITIALIZE_PATH (s_search_path);       /* Path to the current object item. */
+    struct item_head    * p_le_ih;         /* Pointer to an item header. */
+    struct cpu_key      s_item_key;     /* Key to search for a previous file item. */
+    loff_t         n_file_size,    /* Old file size. */
+	n_new_file_size;/* New file size. */
+    int                   n_deleted;      /* Number of deleted or truncated bytes. */
+    int retval;
+
+    if ( ! (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) || S_ISLNK(p_s_inode->i_mode)) )
+	return;
+
+    if (S_ISDIR(p_s_inode->i_mode)) {
+	// deletion of directory - no need to update timestamps
+	truncate_directory (th, p_s_inode);
+	return;
+    }
+
+    /* Get new file size. */
+    n_new_file_size = p_s_inode->i_size;
+
+    // FIXME: note, that key type is unimportant here
+    make_cpu_key (&s_item_key, p_s_inode, max_reiserfs_offset (p_s_inode), TYPE_DIRECT, 3);
+
+    retval = search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path);
+    if (retval == IO_ERROR) {
+	reiserfs_warning ("vs-5657: reiserfs_do_truncate: "
+			  "i/o failure occured trying to truncate %K\n", &s_item_key);
+	return;
+    }
+    if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
+	reiserfs_warning ("PAP-5660: reiserfs_do_truncate: "
+			  "wrong result %d of search for %K\n", retval, &s_item_key);
+	return;
+    }
+
+    s_search_path.pos_in_item --;
+
+    /* Get real file size (total length of all file items) */
+    p_le_ih = PATH_PITEM_HEAD(&s_search_path);
+    if ( is_statdata_le_ih (p_le_ih) )
+	n_file_size = 0;
+    else {
+	loff_t offset = le_ih_k_offset (p_le_ih);
+	int bytes = op_bytes_number (p_le_ih,p_s_inode->i_sb->s_blocksize);
+
+	/* this may mismatch with real file size: if last direct item
+           had no padding zeros and last unformatted node had no free
+           space, this file would have this file size */
+	n_file_size = offset + bytes - 1;
+    }
+
+    if ( n_file_size == 0 || n_file_size < n_new_file_size ) {
+	pathrelse(&s_search_path);
+	return;
+    }
+    /* Update key to search for the last file item. */
+    set_cpu_key_k_offset (&s_item_key, n_file_size);
+
+    do  {
+	/* Cut or delete file item. */
+	n_deleted = reiserfs_cut_from_item(th, &s_search_path, &s_item_key, p_s_inode,  page, n_new_file_size);
+	if (n_deleted < 0) {
+	    reiserfs_warning ("vs-5665: reiserfs_truncate_file: cut_from_item failed");
+	    reiserfs_check_path(&s_search_path) ;
+	    return;
+	}
+
+#ifdef CONFIG_REISERFS_CHECK
+	if ( n_deleted > n_file_size ){
+	    reiserfs_panic (p_s_inode->i_sb, "PAP-5670: reiserfs_truncate_file: "
+			    "reiserfs_truncate_file returns too big number: deleted %d, file_size %lu, item_key %k",
+			    n_deleted, n_file_size, &s_item_key);
+	}
+#endif
+
+	/* Change key to search the last file item. */
+	n_file_size -= n_deleted;
+
+	set_cpu_key_k_offset (&s_item_key, n_file_size);
+
+	/* While there are bytes to truncate and previous file item is presented in the tree. */
+
+	/*
+	** This loop could take a really long time, and could log 
+	** many more blocks than a transaction can hold.  So, we do a polite
+	** journal end here, and if the transaction needs ending, we make
+	** sure the file is consistent before ending the current trans
+	** and starting a new one
+	*/
+        if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
+	  int orig_len_alloc = th->t_blocks_allocated ;
+	  decrement_counters_in_path(&s_search_path) ;
+
+	  if (update_timestamps) {
+	      p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME;
+	      // FIXME: sd gets wrong size here
+	  } 
+	  reiserfs_update_sd(th, p_s_inode) ;
+
+	  journal_end(th, p_s_inode->i_sb, orig_len_alloc) ;
+	  journal_begin(th, p_s_inode->i_sb, orig_len_alloc) ;
+	}
+    } while ( n_file_size > ROUND_UP (n_new_file_size) &&
+	      search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path) == POSITION_FOUND )  ;
+
+#ifdef CONFIG_REISERFS_CHECK
+    if ( n_file_size > ROUND_UP (n_new_file_size) )
+	reiserfs_panic (p_s_inode->i_sb, "PAP-5680: reiserfs_truncate_file: "
+			"truncate did not finish: new_file_size %Ld, current %Ld, oid %d\n",
+			n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
+#endif
+
+    if (update_timestamps) {
+	// this is truncate, not file closing
+	p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME;
+    }
+    reiserfs_update_sd (th, p_s_inode);
+
+    pathrelse(&s_search_path) ;
+}
+
+
+#ifdef CONFIG_REISERFS_CHECK
+// this makes sure, that we __append__, not overwrite or add holes
+static void check_research_for_paste (struct path * path, struct cpu_key * p_s_key)
+{
+    struct item_head * found_ih = get_ih (path);
+    
+    if (is_direct_le_ih (found_ih)) {
+	if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_bh (path)->b_size) !=
+	    cpu_key_k_offset (p_s_key) ||
+	    op_bytes_number (found_ih, get_bh (path)->b_size) != pos_in_item (path))
+	    reiserfs_panic (0, "PAP-5720: check_research_for_paste: "
+			    "found direct item %h or position (%d) does not match to key %K",
+			    found_ih, pos_in_item (path), p_s_key);
+    }
+    if (is_indirect_le_ih (found_ih)) {
+	if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_bh (path)->b_size) != cpu_key_k_offset (p_s_key) || 
+	    I_UNFM_NUM (found_ih) != pos_in_item (path) ||
+	    get_ih_free_space (found_ih) != 0)
+	    reiserfs_panic (0, "PAP-5730: check_research_for_paste: "
+			    "found indirect item (%h) or position (%d) does not match to key (%K)",
+			    found_ih, pos_in_item (path), p_s_key);
+    }
+}
+#endif /* config reiserfs check */
+
+
+/* Paste bytes to the existing item. Returns bytes number pasted into the item. */
+int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th, 
+			      struct path         * p_s_search_path,	/* Path to the pasted item.          */
+			      struct cpu_key      * p_s_key,        	/* Key to search for the needed item.*/
+			      const char          * p_c_body,       	/* Pointer to the bytes to paste.    */
+			      int                   n_pasted_size)  	/* Size of pasted bytes.             */
+{
+    struct tree_balance s_paste_balance;
+    int                 retval;
+
+    init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size);
+    
+    while ( (retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) == REPEAT_SEARCH ) {
+	/* file system changed while we were in the fix_nodes */
+	retval = search_for_position_by_key (th->t_super, p_s_key, p_s_search_path);
+	if (retval == IO_ERROR)
+	    return -EIO;
+	if (retval == POSITION_FOUND) {
+	    reiserfs_warning ("PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", p_s_key);
+	    pathrelse (p_s_search_path);
+	    return -EEXIST;
+	}
+	
+#ifdef CONFIG_REISERFS_CHECK
+	check_research_for_paste (p_s_search_path, p_s_key);
+#endif
+    }
+
+    /* Perform balancing after all resources are collected by fix_nodes, and
+       accessing them will not risk triggering schedule. */
+    if ( retval == CARRY_ON ) {
+	do_balance(&s_paste_balance, NULL/*ih*/, p_c_body, M_PASTE);
+	return 0;
+    }
+
+    unfix_nodes(&s_paste_balance);
+    return (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
+}
+
+
+/* Insert new item into the buffer at the path. */
+int reiserfs_insert_item(struct reiserfs_transaction_handle *th, 
+			 struct path         * 	p_s_path,         /* Path to the inserteded item.         */
+			 struct cpu_key      * key,
+			 struct item_head    * 	p_s_ih,           /* Pointer to the item header to insert.*/
+			 const char          * 	p_c_body)         /* Pointer to the bytes to insert.      */
+{
+    struct tree_balance s_ins_balance;
+    int                 retval;
+
+    init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + p_s_ih->ih_item_len);
+
+    /*
+    if (p_c_body == 0)
+      n_zeros_num = p_s_ih->ih_item_len;
+    */
+    //    le_key2cpu_key (&key, &(p_s_ih->ih_key));
+
+    while ( (retval = fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, p_c_body)) == REPEAT_SEARCH) {
+	/* file system changed while we were in the fix_nodes */
+	retval = search_item (th->t_super, key, p_s_path);
+	if (retval == IO_ERROR)
+	    return -EIO;
+
+	if (retval == ITEM_FOUND) {
+	    reiserfs_warning ("PAP-5760: reiserfs_insert_item: "
+			      "key %K already exists in the tree\n", key);
+	    pathrelse (p_s_path);
+	    return -EEXIST;
+	}
+    }
+
+    /* make balancing after all resources will be collected at a time */ 
+    if ( retval == CARRY_ON ) {
+	do_balance (&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
+	return 0;
+    }
+
+    unfix_nodes(&s_ins_balance);
+    return (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
+}
+
+
+
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/super.c linux/fs/reiserfs/super.c
--- v2.4.0/linux/fs/reiserfs/super.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/super.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,879 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/smp_lock.h>
+#include <linux/locks.h>
+#include <linux/init.h>
+
+#else
+
+#include "nokernel.h"
+#include <stdlib.h> // for simple_strtoul
+
+#endif
+
+#define SUPPORT_OLD_FORMAT
+
+#define REISERFS_OLD_BLOCKSIZE 4096
+#define REISERFS_SUPER_MAGIC_STRING_OFFSET_NJ 20
+
+
+#if 0
+// this one is not used currently
+inline void reiserfs_mark_buffer_dirty (struct buffer_head * bh, int flag)
+{
+  mark_buffer_dirty (bh, flag);
+}
+#endif
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+void reiserfs_write_super (struct super_block * s)
+{
+
+  int dirty = 0 ;
+  lock_kernel() ;
+  if (!(s->s_flags & MS_RDONLY)) {
+    dirty = flush_old_commits(s, 1) ;
+  }
+  s->s_dirt = dirty;
+  unlock_kernel() ;
+}
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+void reiserfs_write_super_lockfs (struct super_block * s)
+{
+
+  int dirty = 0 ;
+  struct reiserfs_transaction_handle th ;
+  lock_kernel() ;
+  if (!(s->s_flags & MS_RDONLY)) {
+    journal_begin(&th, s, 1) ;
+    journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s));
+    reiserfs_block_writes(&th) ;
+    journal_end(&th, s, 1) ;
+  }
+  s->s_dirt = dirty;
+  unlock_kernel() ;
+}
+
+void reiserfs_unlockfs(struct super_block *s) {
+  reiserfs_allow_writes(s) ;
+}
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+/* there should be no suspected recipients already. True and cautious
+   bitmaps should not differ. We only have to free preserve list and
+   write both bitmaps */
+void reiserfs_put_super (struct super_block * s)
+{
+  int i;
+  struct reiserfs_transaction_handle th ;
+  
+  /* change file system state to current state if it was mounted with read-write permissions */
+  if (!(s->s_flags & MS_RDONLY)) {
+    journal_begin(&th, s, 10) ;
+    reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ;
+    s->u.reiserfs_sb.s_rs->s_state = le16_to_cpu (s->u.reiserfs_sb.s_mount_state);
+    journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s));
+  }
+
+  /* note, journal_release checks for readonly mount, and can decide not
+  ** to do a journal_end
+  */
+  journal_release(&th, s) ;
+
+  for (i = 0; i < SB_BMAP_NR (s); i ++)
+    brelse (SB_AP_BITMAP (s)[i]);
+
+  reiserfs_kfree (SB_AP_BITMAP (s), sizeof (struct buffer_head *) * SB_BMAP_NR (s), s);
+
+  brelse (SB_BUFFER_WITH_SB (s));
+
+  print_statistics (s);
+
+  if (s->u.reiserfs_sb.s_kmallocs != 0) {
+    reiserfs_warning ("vs-2004: reiserfs_put_super: aloocated memory left %d\n",
+		      s->u.reiserfs_sb.s_kmallocs);
+  }
+
+  return;
+}
+
+struct super_operations reiserfs_sops = 
+{
+  read_inode: reiserfs_read_inode,
+  read_inode2: reiserfs_read_inode2,
+  write_inode: reiserfs_write_inode,
+  dirty_inode: reiserfs_dirty_inode,
+  delete_inode: reiserfs_delete_inode,
+  put_super: reiserfs_put_super,
+  write_super: reiserfs_write_super,
+  write_super_lockfs: reiserfs_write_super_lockfs,
+  unlockfs: reiserfs_unlockfs,
+  statfs: reiserfs_statfs,
+  remount_fs: reiserfs_remount,
+
+};
+
+/* this was (ext2)parse_options */
+static int parse_options (char * options, unsigned long * mount_options, unsigned long * blocks)
+{
+    char * this_char;
+    char * value;
+  
+    *blocks = 0;
+    if (!options)
+	/* use default configuration: create tails, journaling on, no
+           conversion to newest format */
+	return 1;
+    for (this_char = strtok (options, ","); this_char != NULL; this_char = strtok (NULL, ",")) {
+	if ((value = strchr (this_char, '=')) != NULL)
+	    *value++ = 0;
+	if (!strcmp (this_char, "notail")) {
+	    set_bit (NOTAIL, mount_options);
+	} else if (!strcmp (this_char, "conv")) {
+	    // if this is set, we update super block such that
+	    // the partition will not be mounable by 3.5.x anymore
+	    set_bit (REISERFS_CONVERT, mount_options);
+	} else if (!strcmp (this_char, "noborder")) {
+				/* this is used for benchmarking
+                                   experimental variations, it is not
+                                   intended for users to use, only for
+                                   developers who want to casually
+                                   hack in something to test */
+	    set_bit (REISERFS_NO_BORDER, mount_options);
+	} else if (!strcmp (this_char, "no_unhashed_relocation")) {
+	    set_bit (REISERFS_NO_UNHASHED_RELOCATION, mount_options);
+	} else if (!strcmp (this_char, "hashed_relocation")) {
+	    set_bit (REISERFS_HASHED_RELOCATION, mount_options);
+	} else if (!strcmp (this_char, "test4")) {
+	    set_bit (REISERFS_TEST4, mount_options);
+	} else if (!strcmp (this_char, "nolog")) {
+	    reiserfs_warning("reiserfs: nolog mount option not supported yet\n");
+	} else if (!strcmp (this_char, "replayonly")) {
+	    set_bit (REPLAYONLY, mount_options);
+	} else if (!strcmp (this_char, "resize")) {
+	    if (value && *value){
+		*blocks = simple_strtoul (value, &value, 0);
+	    } else {
+	  	printk("reiserfs: resize option requires a value\n");
+		return 0;
+	    }
+	} else if (!strcmp (this_char, "hash")) {
+	    if (value && *value) {
+		/* if they specify any hash option, we force detection
+		** to make sure they aren't using the wrong hash
+		*/
+	        if (!strcmp(value, "rupasov")) {
+		    set_bit (FORCE_RUPASOV_HASH, mount_options);
+		    set_bit (FORCE_HASH_DETECT, mount_options);
+		} else if (!strcmp(value, "tea")) {
+		    set_bit (FORCE_TEA_HASH, mount_options);
+		    set_bit (FORCE_HASH_DETECT, mount_options);
+		} else if (!strcmp(value, "r5")) {
+		    set_bit (FORCE_R5_HASH, mount_options);
+		    set_bit (FORCE_HASH_DETECT, mount_options);
+		} else if (!strcmp(value, "detect")) {
+		    set_bit (FORCE_HASH_DETECT, mount_options);
+		} else {
+		    printk("reiserfs: invalid hash function specified\n") ;
+		    return 0 ;
+		}
+	    } else {
+	  	printk("reiserfs: hash option requires a value\n");
+		return 0 ;
+	    }
+	} else {
+	    printk ("reiserfs: Unrecognized mount option %s\n", this_char);
+	    return 0;
+	}
+    }
+    return 1;
+}
+
+
+int reiserfs_is_super(struct super_block *s) {
+   return (s->s_dev != 0 && s->s_op == &reiserfs_sops) ;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_remount (struct super_block * s, int * flags, char * data)
+{
+  struct reiserfs_super_block * rs;
+  struct reiserfs_transaction_handle th ;
+  unsigned long blocks;
+  unsigned long mount_options;
+
+  rs = SB_DISK_SUPER_BLOCK (s);
+
+  if (!parse_options(data, &mount_options, &blocks))
+  	return 0;
+
+  if(blocks) {
+      int rc = reiserfs_resize(s, blocks);
+      if (rc != 0)
+	  return rc;
+  }
+
+  if ((unsigned long)(*flags & MS_RDONLY) == (s->s_flags & MS_RDONLY)) {
+    /* there is nothing to do to remount read-only fs as read-only fs */
+    return 0;
+  }
+  
+  if (*flags & MS_RDONLY) {
+    /* try to remount file system with read-only permissions */
+    if (le16_to_cpu (rs->s_state) == REISERFS_VALID_FS || s->u.reiserfs_sb.s_mount_state != REISERFS_VALID_FS) {
+      return 0;
+    }
+
+    journal_begin(&th, s, 10) ;
+    /* Mounting a rw partition read-only. */
+    reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ;
+    rs->s_state = cpu_to_le16 (s->u.reiserfs_sb.s_mount_state);
+    journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s));
+    s->s_dirt = 0;
+  } else {
+    s->u.reiserfs_sb.s_mount_state = le16_to_cpu(rs->s_state) ;
+    s->s_flags &= ~MS_RDONLY ; /* now it is safe to call journal_begin */
+    journal_begin(&th, s, 10) ;
+
+    /* Mount a partition which is read-only, read-write */
+    reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ;
+    s->u.reiserfs_sb.s_mount_state = le16_to_cpu (rs->s_state);
+    s->s_flags &= ~MS_RDONLY;
+    rs->s_state = cpu_to_le16 (REISERFS_ERROR_FS);
+    /* mark_buffer_dirty (SB_BUFFER_WITH_SB (s), 1); */
+    journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s));
+    s->s_dirt = 0;
+    s->u.reiserfs_sb.s_mount_state = REISERFS_VALID_FS ;
+  }
+  /* this will force a full flush of all journal lists */
+  SB_JOURNAL(s)->j_must_wait = 1 ;
+  journal_end(&th, s, 10) ;
+  return 0;
+}
+
+
+static int read_bitmaps (struct super_block * s)
+{
+    int i, bmp, dl ;
+    struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK(s);
+
+    SB_AP_BITMAP (s) = reiserfs_kmalloc (sizeof (struct buffer_head *) * le16_to_cpu (rs->s_bmap_nr), GFP_BUFFER, s);
+    if (SB_AP_BITMAP (s) == 0)
+	return 1;
+    memset (SB_AP_BITMAP (s), 0, sizeof (struct buffer_head *) * le16_to_cpu (rs->s_bmap_nr));
+
+    /* reiserfs leaves the first 64k unused so that any partition
+       labeling scheme currently used will have enough space. Then we
+       need one block for the super.  -Hans */
+    bmp = (REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1;	/* first of bitmap blocks */
+    SB_AP_BITMAP (s)[0] = reiserfs_bread (s->s_dev, bmp, s->s_blocksize);
+    if(!SB_AP_BITMAP(s)[0])
+	return 1;
+    for (i = 1, bmp = dl = rs->s_blocksize * 8; i < le16_to_cpu (rs->s_bmap_nr); i ++) {
+	SB_AP_BITMAP (s)[i] = reiserfs_bread (s->s_dev, bmp, s->s_blocksize);
+	if (!SB_AP_BITMAP (s)[i])
+	    return 1;
+	bmp += dl;
+    }
+
+    return 0;
+}
+
+static int read_old_bitmaps (struct super_block * s)
+{
+  int i ;
+  struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK(s);
+  int bmp1 = (REISERFS_OLD_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1;  /* first of bitmap blocks */
+
+  /* read true bitmap */
+  SB_AP_BITMAP (s) = reiserfs_kmalloc (sizeof (struct buffer_head *) * le16_to_cpu (rs->s_bmap_nr), GFP_BUFFER, s);
+  if (SB_AP_BITMAP (s) == 0)
+    return 1;
+
+  memset (SB_AP_BITMAP (s), 0, sizeof (struct buffer_head *) * le16_to_cpu (rs->s_bmap_nr));
+
+  for (i = 0; i < le16_to_cpu (rs->s_bmap_nr); i ++) {
+    SB_AP_BITMAP (s)[i] = reiserfs_bread (s->s_dev, bmp1 + i, s->s_blocksize);
+    if (!SB_AP_BITMAP (s)[i])
+      return 1;
+  }
+
+  return 0;
+}
+
+void check_bitmap (struct super_block * s)
+{
+  int i = 0;
+  int free = 0;
+  char * buf;
+
+  while (i < SB_BLOCK_COUNT (s)) {
+    buf = SB_AP_BITMAP (s)[i / (s->s_blocksize * 8)]->b_data;
+    if (!reiserfs_test_le_bit (i % (s->s_blocksize * 8), buf))
+      free ++;
+    i ++;
+  }
+
+  if (free != SB_FREE_BLOCKS (s))
+    reiserfs_warning ("vs-4000: check_bitmap: %d free blocks, must be %d\n",
+		      free, SB_FREE_BLOCKS (s));
+}
+
+#ifdef SUPPORT_OLD_FORMAT 
+
+/* support old disk layout */
+static int read_old_super_block (struct super_block * s, int size)
+{
+    struct buffer_head * bh;
+    struct reiserfs_super_block * rs;
+
+    printk("read_old_super_block: try to find super block in old location\n");
+    /* there are only 4k-sized blocks in v3.5.10 */
+    if (size != REISERFS_OLD_BLOCKSIZE)
+	set_blocksize(s->s_dev, REISERFS_OLD_BLOCKSIZE);
+    bh = bread (s->s_dev, 
+		REISERFS_OLD_DISK_OFFSET_IN_BYTES / REISERFS_OLD_BLOCKSIZE, 
+		REISERFS_OLD_BLOCKSIZE);
+    if (!bh) {
+	printk("read_old_super_block: unable to read superblock on dev %s\n", kdevname(s->s_dev));
+	return 1;
+    }
+
+    rs = (struct reiserfs_super_block *)bh->b_data;
+    if (strncmp (rs->s_magic,  REISERFS_SUPER_MAGIC_STRING, strlen ( REISERFS_SUPER_MAGIC_STRING))) {
+	/* pre-journaling version check */
+	if(!strncmp((char*)rs + REISERFS_SUPER_MAGIC_STRING_OFFSET_NJ,
+		    REISERFS_SUPER_MAGIC_STRING, strlen(REISERFS_SUPER_MAGIC_STRING))) {
+	    printk("read_old_super_blockr: a pre-journaling reiserfs filesystem isn't suitable there.\n");
+	    brelse(bh);
+	    return 1;
+	}
+	  
+	brelse (bh);
+	printk ("read_old_super_block: can't find a reiserfs filesystem on dev %s.\n", kdevname(s->s_dev));
+	return 1;
+    }
+
+    if(REISERFS_OLD_BLOCKSIZE != le16_to_cpu (rs->s_blocksize)) {
+	printk("read_old_super_block: blocksize mismatch, super block corrupted\n");
+	brelse(bh);
+	return 1;
+    }	
+
+    s->s_blocksize = REISERFS_OLD_BLOCKSIZE;
+    s->s_blocksize_bits = 0;
+    while ((1 << s->s_blocksize_bits) != s->s_blocksize)
+	s->s_blocksize_bits ++;
+
+    SB_BUFFER_WITH_SB (s) = bh;
+    SB_DISK_SUPER_BLOCK (s) = rs;
+    s->s_op = &reiserfs_sops;
+    return 0;
+}
+#endif
+
+//
+// FIXME: mounting old filesystems we _must_ change magic string to
+// make then unmountable by reiserfs of 3.5.x
+//
+static int read_super_block (struct super_block * s, int size)
+{
+    struct buffer_head * bh;
+    struct reiserfs_super_block * rs;
+
+    bh = bread (s->s_dev, REISERFS_DISK_OFFSET_IN_BYTES / size, size);
+    if (!bh) {
+	printk("read_super_block: unable to read superblock on dev %s\n", kdevname(s->s_dev));
+	return 1;
+    }
+
+    rs = (struct reiserfs_super_block *)bh->b_data;
+    if (!is_reiserfs_magic_string (rs)) {
+	printk ("read_super_block: can't find a reiserfs filesystem on dev %s\n",
+		kdevname(s->s_dev));
+	brelse (bh);
+	return 1;
+    }
+
+    //
+    // ok, reiserfs signature (old or new) found in 64-th 1k block of
+    // the device
+    //
+
+#ifndef SUPPORT_OLD_FORMAT 
+    // with SUPPORT_OLD_FORMAT undefined - detect old format by
+    // checking super block version
+    if (le16_to_cpu (rs->s_version) != REISERFS_VERSION_2) { 
+	brelse (bh);
+	printk ("read_super_block: unsupported version (%d) of reiserfs found on dev %s\n",
+		le16_to_cpu (rs->s_version), kdevname(s->s_dev));
+	return 1;
+    }
+#endif
+    
+    s->s_blocksize = le16_to_cpu (rs->s_blocksize);
+    s->s_blocksize_bits = 0;
+    while ((1 << s->s_blocksize_bits) != s->s_blocksize)
+	s->s_blocksize_bits ++;
+
+    brelse (bh);
+    
+    if (s->s_blocksize != size)
+	set_blocksize (s->s_dev, s->s_blocksize);
+    bh = reiserfs_bread (s->s_dev, REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize, s->s_blocksize);
+    if (!bh) {
+	printk("read_super_block: unable to read superblock on dev %s\n", kdevname(s->s_dev));
+	return 1;
+    }
+    
+    rs = (struct reiserfs_super_block *)bh->b_data;
+    if (!is_reiserfs_magic_string (rs) ||
+	le16_to_cpu (rs->s_blocksize) != s->s_blocksize) {
+	brelse (bh);
+	printk ("read_super_block: can't find a reiserfs filesystem on dev %s.\n", kdevname(s->s_dev));
+	return 1;
+    }
+    /* must check to be sure we haven't pulled an old format super out
+    ** of the old format's log.  This is a kludge of a check, but it
+    ** will work.  If block we've just read in is inside the
+    ** journal for that super, it can't be valid.  
+    */
+    if (bh->b_blocknr >= le32_to_cpu(rs->s_journal_block) && 
+	bh->b_blocknr < (le32_to_cpu(rs->s_journal_block) + JOURNAL_BLOCK_COUNT)) {
+	brelse(bh) ;
+	printk("super-459: read_super_block: super found at block %lu is within its own log. "
+	       "It must not be of this format type.\n", bh->b_blocknr) ;
+	return 1 ;
+    }
+    SB_BUFFER_WITH_SB (s) = bh;
+    SB_DISK_SUPER_BLOCK (s) = rs;
+    s->s_op = &reiserfs_sops;
+    return 0;
+}
+
+/* after journal replay, reread all bitmap and super blocks */
+static int reread_meta_blocks(struct super_block *s) {
+  int i ;
+  ll_rw_block(READ, 1, &(SB_BUFFER_WITH_SB(s))) ;
+  wait_on_buffer(SB_BUFFER_WITH_SB(s)) ;
+  if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) {
+    printk("reread_meta_blocks, error reading the super\n") ;
+    return 1 ;
+  }
+
+  for (i = 0; i < SB_BMAP_NR(s) ; i++) {
+    ll_rw_block(READ, 1, &(SB_AP_BITMAP(s)[i])) ;
+    wait_on_buffer(SB_AP_BITMAP(s)[i]) ;
+    if (!buffer_uptodate(SB_AP_BITMAP(s)[i])) {
+      printk("reread_meta_blocks, error reading bitmap block number %d at %ld\n", i, SB_AP_BITMAP(s)[i]->b_blocknr) ;
+      return 1 ;
+    }
+  }
+  return 0 ;
+
+}
+
+
+/////////////////////////////////////////////////////
+// hash detection stuff
+
+
+// if root directory is empty - we set default - Yura's - hash and
+// warn about it
+// FIXME: we look for only one name in a directory. If tea and yura
+// bith have the same value - we ask user to send report to the
+// mailing list
+__u32 find_hash_out (struct super_block * s)
+{
+    int retval;
+    struct inode * inode;
+    struct cpu_key key;
+    INITIALIZE_PATH (path);
+    struct reiserfs_dir_entry de;
+    __u32 hash = DEFAULT_HASH;
+
+    inode = s->s_root->d_inode;
+
+    while (1) {
+	make_cpu_key (&key, inode, ~0, TYPE_DIRENTRY, 3);
+	retval = search_by_entry_key (s, &key, &path, &de);
+	if (retval == IO_ERROR) {
+	    pathrelse (&path);
+	    return UNSET_HASH ;
+	}
+	if (retval == NAME_NOT_FOUND)
+	    de.de_entry_num --;
+	set_de_name_and_namelen (&de);
+	if (le32_to_cpu (de.de_deh[de.de_entry_num].deh_offset) == DOT_DOT_OFFSET) {
+	    /* allow override in this case */
+	    if (reiserfs_rupasov_hash(s)) {
+		hash = YURA_HASH ;
+	    }
+	    reiserfs_warning("reiserfs: FS seems to be empty, autodetect "
+	                     "is using the default hash\n");
+	    break;
+	}
+	if (GET_HASH_VALUE(yura_hash (de.de_name, de.de_namelen)) == 
+	    GET_HASH_VALUE(keyed_hash (de.de_name, de.de_namelen))) {
+	    reiserfs_warning ("reiserfs: Could not detect hash function "
+			      "please mount with -o hash={tea,rupasov,r5}\n") ;
+	    hash = UNSET_HASH ;
+	    break;
+	}
+	if (GET_HASH_VALUE(le32_to_cpu(de.de_deh[de.de_entry_num].deh_offset))==
+	    GET_HASH_VALUE (yura_hash (de.de_name, de.de_namelen)))
+	    hash = YURA_HASH;
+	else
+	    hash = TEA_HASH;
+	break;
+    }
+
+    pathrelse (&path);
+    return hash;
+}
+
+// finds out which hash names are sorted with
+static int what_hash (struct super_block * s)
+{
+    __u32 code;
+
+    code = le32_to_cpu (s->u.reiserfs_sb.s_rs->s_hash_function_code);
+
+    /* reiserfs_hash_detect() == true if any of the hash mount options
+    ** were used.  We must check them to make sure the user isn't
+    ** using a bad hash value
+    */
+    if (code == UNSET_HASH || reiserfs_hash_detect(s))
+	code = find_hash_out (s);
+
+    if (code != UNSET_HASH && reiserfs_hash_detect(s)) {
+	/* detection has found the hash, and we must check against the 
+	** mount options 
+	*/
+	if (reiserfs_rupasov_hash(s) && code != YURA_HASH) {
+	    printk("REISERFS: Error, tea hash detected, "
+		   "unable to force rupasov hash\n") ;
+	    code = UNSET_HASH ;
+	} else if (reiserfs_tea_hash(s) && code != TEA_HASH) {
+	    printk("REISERFS: Error, rupasov hash detected, "
+		   "unable to force tea hash\n") ;
+	    code = UNSET_HASH ;
+	} else if (reiserfs_r5_hash(s) && code != R5_HASH) {
+	    printk("REISERFS: Error, r5 hash detected, "
+		   "unable to force r5 hash\n") ;
+	    code = UNSET_HASH ;
+	} 
+    } else { 
+        /* find_hash_out was not called or could not determine the hash */
+	if (reiserfs_rupasov_hash(s)) {
+	    code = YURA_HASH ;
+	} else if (reiserfs_tea_hash(s)) {
+	    code = TEA_HASH ;
+	} else if (reiserfs_r5_hash(s)) {
+	    code = R5_HASH ;
+	} 
+    }
+
+    /* if we are mounted RW, and we have a new valid hash code, update 
+    ** the super
+    */
+    if (code != UNSET_HASH && 
+	!(s->s_flags & MS_RDONLY) && 
+        code != le32_to_cpu (s->u.reiserfs_sb.s_rs->s_hash_function_code)) {
+        s->u.reiserfs_sb.s_rs->s_hash_function_code = cpu_to_le32(code) ;
+    }
+    return code;
+}
+
+// return pointer to appropriate function
+static hashf_t hash_function (struct super_block * s)
+{
+    switch (what_hash (s)) {
+    case TEA_HASH:
+	reiserfs_warning ("Using tea hash to sort names\n");
+	return keyed_hash;
+    case YURA_HASH:
+	reiserfs_warning ("Using rupasov hash to sort names\n");
+	return yura_hash;
+    case R5_HASH:
+	reiserfs_warning ("Using r5 hash to sort names\n");
+	return r5_hash;
+    }
+    return NULL;
+}
+
+// this is used to set up correct value for old partitions
+int function2code (hashf_t func)
+{
+    if (func == keyed_hash)
+	return TEA_HASH;
+    if (func == yura_hash)
+	return YURA_HASH;
+    if (func == r5_hash)
+	return R5_HASH;
+
+    BUG() ; // should never happen 
+
+    return 0;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+struct super_block * reiserfs_read_super (struct super_block * s, void * data, int silent)
+{
+    int size;
+    struct inode *root_inode;
+    kdev_t dev = s->s_dev;
+    int j;
+    extern int *blksize_size[];
+    struct reiserfs_transaction_handle th ;
+    int old_format = 0;
+    unsigned long blocks;
+    int jinit_done = 0 ;
+    struct reiserfs_iget4_args args ;
+
+
+    memset (&s->u.reiserfs_sb, 0, sizeof (struct reiserfs_sb_info));
+
+    if (parse_options ((char *) data, &(s->u.reiserfs_sb.s_mount_opt), &blocks) == 0) {
+	return NULL;
+    }
+
+    if (blocks) {
+  	printk("reserfs: resize option for remount only\n");
+	return NULL;
+    }	
+
+    if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)] != 0) {
+	/* as blocksize is set for partition we use it */
+	size = blksize_size[MAJOR(dev)][MINOR(dev)];
+    } else {
+	size = BLOCK_SIZE;
+	set_blocksize (s->s_dev, BLOCK_SIZE);
+    }
+
+    /* read block (64-th 1k block), which can contain reiserfs super block */
+    if (read_super_block (s, size)) {
+#ifdef SUPPORT_OLD_FORMAT
+	// try old format (undistributed bitmap, super block in 8-th 1k block of a device)
+	if(read_old_super_block(s,size)) 
+	    goto error;
+	else
+	    old_format = 1;
+#endif
+	goto error ;
+    }
+
+    s->u.reiserfs_sb.s_mount_state = le16_to_cpu (SB_DISK_SUPER_BLOCK (s)->s_state); /* journal victim */
+    s->u.reiserfs_sb.s_mount_state = REISERFS_VALID_FS ;
+
+    if (old_format ? read_old_bitmaps(s) : read_bitmaps(s)) { 
+	printk ("reiserfs_read_super: unable to read bitmap\n");
+	goto error;
+    }
+#ifdef CONFIG_REISERFS_CHECK
+    printk("reiserfs:warning: CONFIG_REISERFS_CHECK is set ON\n");
+    printk("reiserfs:warning: - it is slow mode for debugging.\n");
+#endif
+
+    // set_device_ro(s->s_dev, 1) ;
+    if (journal_init(s)) {
+	printk("reiserfs_read_super: unable to initialize journal space\n") ;
+	goto error ;
+    } else {
+	jinit_done = 1 ; /* once this is set, journal_release must be called
+			 ** if we error out of the mount 
+			 */
+    }
+    if (reread_meta_blocks(s)) {
+	printk("reiserfs_read_super: unable to reread meta blocks after journal init\n") ;
+	goto error ;
+    }
+
+    if (replay_only (s))
+	goto error;
+
+    if (is_read_only(s->s_dev) && !(s->s_flags & MS_RDONLY)) {
+        printk("clm-7000: Detected readonly device, marking FS readonly\n") ;
+	s->s_flags |= MS_RDONLY ;
+    }
+    args.objectid = REISERFS_ROOT_PARENT_OBJECTID ;
+    root_inode = iget4 (s, REISERFS_ROOT_OBJECTID, 0, (void *)(&args));
+    if (!root_inode) {
+	printk ("reiserfs_read_super: get root inode failed\n");
+	goto error;
+    }
+
+    s->s_root = d_alloc_root(root_inode);  
+    if (!s->s_root) {
+	iput(root_inode);
+	goto error;
+    }
+
+    // define and initialize hash function
+    s->u.reiserfs_sb.s_hash_function = hash_function (s);
+    if (s->u.reiserfs_sb.s_hash_function == NULL) {
+      dput(s->s_root) ;
+      s->s_root = NULL ;
+      goto error ;
+    }
+
+    if (!(s->s_flags & MS_RDONLY)) {
+	struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s);
+
+	journal_begin(&th, s, 1) ;
+	reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ;
+
+	rs->s_state = cpu_to_le16 (REISERFS_ERROR_FS);
+
+        if (strncmp (rs->s_magic,  REISER2FS_SUPER_MAGIC_STRING, 
+		     strlen ( REISER2FS_SUPER_MAGIC_STRING))) {
+	    if (le16_to_cpu(rs->s_version) != 0)
+		BUG ();
+	    // filesystem created under 3.5.x found
+	    if (!old_format_only (s)) {
+		reiserfs_warning("reiserfs: converting 3.5.x filesystem to the new format\n") ;
+		// after this 3.5.x will not be able to mount this partition
+		memcpy (rs->s_magic, REISER2FS_SUPER_MAGIC_STRING, 
+			sizeof (REISER2FS_SUPER_MAGIC_STRING));
+
+		reiserfs_convert_objectid_map_v1(s) ;
+	    } else {
+		reiserfs_warning("reiserfs: using 3.5.x disk format\n") ;
+	    }
+	} else {
+	    // new format found
+	    set_bit (REISERFS_CONVERT, &(s->u.reiserfs_sb.s_mount_opt));
+	}
+
+	// mark hash in super block: it could be unset. overwrite should be ok
+	rs->s_hash_function_code = cpu_to_le32 (function2code (s->u.reiserfs_sb.s_hash_function));
+
+	journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s));
+	journal_end(&th, s, 1) ;
+	s->s_dirt = 0;
+    } else {
+	struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s);
+	if (strncmp (rs->s_magic,  REISER2FS_SUPER_MAGIC_STRING, 
+		     strlen ( REISER2FS_SUPER_MAGIC_STRING))) {
+	    reiserfs_warning("reiserfs: using 3.5.x disk format\n") ;
+	}
+    }
+
+    init_waitqueue_head (&(s->u.reiserfs_sb.s_wait));
+
+    printk("%s\n", reiserfs_get_version_string()) ;
+    return s;
+
+ error:
+    if (jinit_done) { /* kill the commit thread, free journal ram */
+	journal_release_error(NULL, s) ;
+    }
+    if (SB_DISK_SUPER_BLOCK (s)) {
+	for (j = 0; j < SB_BMAP_NR (s); j ++) {
+	    if (SB_AP_BITMAP (s))
+		brelse (SB_AP_BITMAP (s)[j]);
+	}
+	if (SB_AP_BITMAP (s))
+	    reiserfs_kfree (SB_AP_BITMAP (s), sizeof (struct buffer_head *) * SB_BMAP_NR (s), s);
+    }
+    if (SB_BUFFER_WITH_SB (s))
+	brelse(SB_BUFFER_WITH_SB (s));
+
+    return NULL;
+}
+
+
+//
+// a portion of this function, particularly the VFS interface portion,
+// was derived from minix or ext2's analog and evolved as the
+// prototype did. You should be able to tell which portion by looking
+// at the ext2 code and comparing. It's subfunctions contain no code
+// used as a template unless they are so labeled.
+//
+int reiserfs_statfs (struct super_block * s, struct statfs * buf)
+{
+  struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s);
+  
+				/* changed to accomodate gcc folks.*/
+  buf->f_type =  REISERFS_SUPER_MAGIC;
+  buf->f_bsize = le32_to_cpu (s->s_blocksize);
+  buf->f_blocks = le32_to_cpu (rs->s_block_count) - le16_to_cpu (rs->s_bmap_nr) - 1;
+  buf->f_bfree = le32_to_cpu (rs->s_free_blocks);
+  buf->f_bavail = buf->f_bfree;
+  buf->f_files = -1;
+  buf->f_ffree = -1;
+  buf->f_namelen = (REISERFS_MAX_NAME_LEN (s->s_blocksize));
+  return 0;
+}
+
+#ifdef __KERNEL__
+
+static DECLARE_FSTYPE_DEV(reiserfs_fs_type,"reiserfs",reiserfs_read_super);
+
+//
+// this is exactly what 2.3.99-pre9's init_ext2_fs is
+//
+static int __init init_reiserfs_fs (void)
+{
+        return register_filesystem(&reiserfs_fs_type);
+}
+
+EXPORT_NO_SYMBOLS;
+
+//
+// this is exactly what 2.3.99-pre9's init_ext2_fs is
+//
+static void __exit exit_reiserfs_fs(void)
+{
+        unregister_filesystem(&reiserfs_fs_type);
+}
+
+module_init(init_reiserfs_fs) ;
+module_exit(exit_reiserfs_fs) ;
+
+#endif
+
+
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/tail_conversion.c linux/fs/reiserfs/tail_conversion.c
--- v2.4.0/linux/fs/reiserfs/tail_conversion.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/tail_conversion.c	Mon Jan 15 15:31:19 2001
@@ -0,0 +1,297 @@
+/*
+ * Copyright 1999 Hans Reiser, see reiserfs/README for licensing and copyright details
+ */
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/pagemap.h>
+#include <linux/reiserfs_fs.h>
+#include <linux/locks.h>
+
+#else
+
+#include "nokernel.h"
+
+#endif
+
+
+/* access to tail : when one is going to read tail it must make sure, that is not running.
+ direct2indirect and indirect2direct can not run concurrently */
+
+
+/* Converts direct items to an unformatted node. Panics if file has no
+   tail. -ENOSPC if no disk space for conversion */
+/* path points to first direct item of the file regarless of how many of
+   them are there */
+int direct2indirect (struct reiserfs_transaction_handle *th, struct inode * inode, 
+		     struct path * path, struct buffer_head * unbh,
+		     loff_t tail_offset)
+{
+    struct super_block * sb = inode->i_sb;
+    struct buffer_head *up_to_date_bh ;
+    struct item_head * p_le_ih = PATH_PITEM_HEAD (path);
+    struct cpu_key end_key;  /* Key to search for the last byte of the
+				converted item. */
+    struct item_head ind_ih; /* new indirect item to be inserted or
+                                key of unfm pointer to be pasted */
+    int	n_blk_size,
+      n_retval;	  /* returned value for reiserfs_insert_item and clones */
+    struct unfm_nodeinfo unfm_ptr;  /* Handle on an unformatted node
+				       that will be inserted in the
+				       tree. */
+
+
+    sb->u.reiserfs_sb.s_direct2indirect ++;
+
+    n_blk_size = sb->s_blocksize;
+
+    /* and key to search for append or insert pointer to the new
+       unformatted node. */
+    copy_item_head (&ind_ih, p_le_ih);
+    set_le_ih_k_offset (&ind_ih, tail_offset);
+    set_le_ih_k_type (&ind_ih, TYPE_INDIRECT);
+
+    /* Set the key to search for the place for new unfm pointer */
+    make_cpu_key (&end_key, inode, tail_offset, TYPE_INDIRECT, 4);
+
+    // FIXME: we could avoid this 
+    if ( search_for_position_by_key (sb, &end_key, path) == POSITION_FOUND )
+	reiserfs_panic (sb, "PAP-14030: direct2indirect: "
+			"pasted or inserted byte exists in the tree");
+    
+    p_le_ih = PATH_PITEM_HEAD (path);
+
+    unfm_ptr.unfm_nodenum = cpu_to_le32 (unbh->b_blocknr);
+    unfm_ptr.unfm_freespace = 0; // ???
+    
+    if ( is_statdata_le_ih (p_le_ih) )  {
+	/* Insert new indirect item. */
+	set_ih_free_space (&ind_ih, 0); /* delete at nearest future */
+	ind_ih.ih_item_len = cpu_to_le16 (UNFM_P_SIZE);
+	PATH_LAST_POSITION (path)++;
+	n_retval = reiserfs_insert_item (th, path, &end_key, &ind_ih, 
+					 (char *)&unfm_ptr);
+    } else {
+	/* Paste into last indirect item of an object. */
+	n_retval = reiserfs_paste_into_item(th, path, &end_key,
+					    (char *)&unfm_ptr, UNFM_P_SIZE);
+    }
+    if ( n_retval ) {
+	return n_retval;
+    }
+
+    // note: from here there are two keys which have matching first
+    // three key components. They only differ by the fourth one.
+
+
+    /* Set the key to search for the direct items of the file */
+    make_cpu_key (&end_key, inode, max_reiserfs_offset (inode), TYPE_DIRECT, 4);
+
+    /* Move bytes from the direct items to the new unformatted node
+       and delete them. */
+    while (1)  {
+	int item_len, first_direct;
+
+	/* end_key.k_offset is set so, that we will always have found
+           last item of the file */
+	if ( search_for_position_by_key (sb, &end_key, path) == POSITION_FOUND )
+	    reiserfs_panic (sb, "PAP-14050: direct2indirect: "
+			    "direct item (%k) not found", &end_key);
+	p_le_ih = PATH_PITEM_HEAD (path);
+#ifdef CONFIG_REISERFS_CHECK
+	if (!is_direct_le_ih (p_le_ih))
+	    reiserfs_panic (sb, "vs-14055: direct2indirect: "
+			    "direct item expected, found %h", p_le_ih);
+#endif
+	if ((le_ih_k_offset (p_le_ih) & (n_blk_size - 1)) == 1)
+	    first_direct = 1;
+	else
+	    first_direct = 0;
+	item_len = le16_to_cpu (p_le_ih->ih_item_len);
+
+	/* we only send the unbh pointer if the buffer is not up to date.
+	** this avoids overwriting good data from writepage() with old data
+	** from the disk or buffer cache
+	*/
+	if (buffer_uptodate(unbh) || Page_Uptodate(unbh->b_page)) {
+	    up_to_date_bh = NULL ;
+	} else {
+	    up_to_date_bh = unbh ;
+	}
+	n_retval = reiserfs_delete_item (th, path, &end_key, inode, 
+	                                 up_to_date_bh) ;
+
+	if (first_direct && item_len == n_retval)
+	    // done: file does not have direct items anymore
+	    break;
+
+    }
+
+    inode->u.reiserfs_i.i_first_direct_byte = U32_MAX;
+
+    return 0;
+}
+
+
+/* stolen from fs/buffer.c */
+void reiserfs_unmap_buffer(struct buffer_head *bh) {
+  if (buffer_mapped(bh)) {
+    if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
+      BUG() ;
+    }
+    mark_buffer_clean(bh) ;
+    wait_on_buffer(bh) ;
+    // clear_bit(BH_Uptodate, &bh->b_state) ;
+    clear_bit(BH_Mapped, &bh->b_state) ;
+    clear_bit(BH_Req, &bh->b_state) ;
+    clear_bit(BH_New, &bh->b_state) ;
+  }
+}
+
+static void
+unmap_buffers(struct page *page, loff_t pos) {
+  struct buffer_head *bh ;
+  struct buffer_head *head ;
+  struct buffer_head *next ;
+  unsigned long tail_index ;
+  unsigned long cur_index ;
+
+  if (page) {
+    if (page->buffers) {
+      tail_index = pos & (PAGE_CACHE_SIZE - 1) ;
+      cur_index = 0 ;
+      head = page->buffers ;
+      bh = head ;
+      do {
+	next = bh->b_this_page ;
+
+        /* we want to unmap the buffers that contain the tail, and
+        ** all the buffers after it (since the tail must be at the
+        ** end of the file).  We don't want to unmap file data 
+        ** before the tail, since it might be dirty and waiting to 
+        ** reach disk
+        */
+        cur_index += bh->b_size ;
+        if (cur_index > tail_index) {
+          reiserfs_unmap_buffer(bh) ;
+        }
+	bh = next ;
+      } while (bh != head) ;
+    }
+  } 
+}
+
+/* this first locks inode (neither reads nor sync are permitted),
+   reads tail through page cache, insert direct item. When direct item
+   inserted successfully inode is left locked. Return value is always
+   what we expect from it (number of cut bytes). But when tail remains
+   in the unformatted node, we set mode to SKIP_BALANCING and unlock
+   inode */
+int indirect2direct (struct reiserfs_transaction_handle *th, 
+		     struct inode * p_s_inode,
+		     struct page *page, 
+		     struct path * p_s_path, /* path to the indirect item. */
+		     struct cpu_key * p_s_item_key, /* Key to look for unformatted node pointer to be cut. */
+		     loff_t n_new_file_size, /* New file size. */
+		     char * p_c_mode)
+{
+    struct super_block * p_s_sb = p_s_inode->i_sb;
+    struct item_head      s_ih;
+    unsigned long n_block_size = p_s_sb->s_blocksize;
+    char * tail;
+    int tail_len, round_tail_len;
+    loff_t pos, pos1; /* position of first byte of the tail */
+    struct cpu_key key;
+
+    p_s_sb->u.reiserfs_sb.s_indirect2direct ++;
+
+    *p_c_mode = M_SKIP_BALANCING;
+
+    /* store item head path points to. */
+    copy_item_head (&s_ih, PATH_PITEM_HEAD(p_s_path));
+
+    tail_len = (n_new_file_size & (n_block_size - 1));
+    if (!old_format_only (p_s_sb))
+	round_tail_len = ROUND_UP (tail_len);
+    else
+	round_tail_len = tail_len;
+
+    pos = le_ih_k_offset (&s_ih) - 1 + (le16_to_cpu (s_ih.ih_item_len) / UNFM_P_SIZE - 1) * p_s_sb->s_blocksize;
+    pos1 = pos;
+
+    // we are protected by i_sem. The tail can not disapper, not
+    // append can be done either
+    // we are in truncate or packing tail in file_release
+
+    tail = (char *)kmap(page) ; /* this can schedule */
+
+    if (path_changed (&s_ih, p_s_path)) {
+	/* re-search indirect item */
+	if ( search_for_position_by_key (p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND )
+	    reiserfs_panic(p_s_sb, "PAP-5520: indirect2direct: "
+			   "item to be converted %k does not exist", p_s_item_key);
+	copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
+#ifdef CONFIG_REISERFS_CHECK
+	pos = le_ih_k_offset (&s_ih) - 1 + 
+	    (le16_to_cpu (s_ih.ih_item_len) / UNFM_P_SIZE - 1) * p_s_sb->s_blocksize;
+	if (pos != pos1)
+	    reiserfs_panic (p_s_sb, "vs-5530: indirect2direct: "
+			    "tail position changed while we were reading it");
+#endif
+    }
+
+
+    /* Set direct item header to insert. */
+    make_le_item_head (&s_ih, 0, inode_items_version (p_s_inode), pos1 + 1,
+		       TYPE_DIRECT, round_tail_len, 0xffff/*ih_free_space*/);
+
+    /* we want a pointer to the first byte of the tail in the page.
+    ** the page was locked and this part of the page was up to date when
+    ** indirect2direct was called, so we know the bytes are still valid
+    */
+    tail = tail + (pos & (PAGE_CACHE_SIZE - 1)) ;
+
+    PATH_LAST_POSITION(p_s_path)++;
+
+    key = *p_s_item_key;
+    set_cpu_key_k_type (&key, TYPE_DIRECT);
+    key.key_length = 4;
+    /* Insert tail as new direct item in the tree */
+    if ( reiserfs_insert_item(th, p_s_path, &key, &s_ih,
+			      tail ? tail : NULL) < 0 ) {
+	/* No disk memory. So we can not convert last unformatted node
+	   to the direct item.  In this case we used to adjust
+	   indirect items's ih_free_space. Now ih_free_space is not
+	   used, it would be ideal to write zeros to corresponding
+	   unformatted node. For now i_size is considered as guard for
+	   going out of file size */
+	kunmap(page) ;
+	return n_block_size - round_tail_len;
+    }
+    kunmap(page) ;
+
+    /* this will invalidate all the buffers in the page after
+    ** pos1
+    */
+    unmap_buffers(page, pos1) ;
+
+    // note: we have now the same as in above direct2indirect
+    // conversion: there are two keys which have matching first three
+    // key components. They only differ by the fouhth one.
+
+    /* We have inserted new direct item and must remove last
+       unformatted node. */
+    p_s_inode->i_blocks += (p_s_sb->s_blocksize / 512);
+    *p_c_mode = M_CUT;
+
+    /* we store position of first direct item in the in-core inode */
+    //mark_file_with_tail (p_s_inode, pos1 + 1);
+    p_s_inode->u.reiserfs_i.i_first_direct_byte = pos1 + 1;
+
+    return n_block_size - round_tail_len;
+}
+
+
+
diff -u --recursive --new-file v2.4.0/linux/fs/reiserfs/version.c linux/fs/reiserfs/version.c
--- v2.4.0/linux/fs/reiserfs/version.c	Wed Dec 31 16:00:00 1969
+++ linux/fs/reiserfs/version.c	Mon Jan 15 12:42:32 2001
@@ -0,0 +1,7 @@
+/*
+ * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
+ */
+
+char *reiserfs_get_version_string(void) {
+  return "ReiserFS version 3.6.25" ;
+}
diff -u --recursive --new-file v2.4.0/linux/include/asm-alpha/errno.h linux/include/asm-alpha/errno.h
--- v2.4.0/linux/include/asm-alpha/errno.h	Wed Apr 16 14:15:00 1997
+++ linux/include/asm-alpha/errno.h	Mon Jan 15 12:42:32 2001
@@ -139,4 +139,6 @@
 #define ENOMEDIUM	129	/* No medium found */
 #define EMEDIUMTYPE	130	/* Wrong medium type */
 
+#define EHASHCOLLISION  131     /* Number of hash collisons exceeds maximum generation counter value.  */
+
 #endif
diff -u --recursive --new-file v2.4.0/linux/include/asm-i386/bugs.h linux/include/asm-i386/bugs.h
--- v2.4.0/linux/include/asm-i386/bugs.h	Thu Jan  4 14:50:45 2001
+++ linux/include/asm-i386/bugs.h	Mon Jan 15 18:20:19 2001
@@ -76,26 +76,23 @@
 	}
 
 /* Enable FXSR and company _before_ testing for FP problems. */
-#if defined(CONFIG_X86_FXSR) || defined(CONFIG_X86_RUNTIME_FXSR)
 	/*
 	 * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
 	 */
-	if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
-		panic("Kernel compiled for PII/PIII+ with FXSR, data not 16-byte aligned!");
-
+	if (offsetof(struct task_struct, thread.i387.fxsave) & 15) {
+		extern void __buggy_fxsr_alignment(void);
+		__buggy_fxsr_alignment();
+	}
 	if (cpu_has_fxsr) {
 		printk(KERN_INFO "Enabling fast FPU save and restore... ");
 		set_in_cr4(X86_CR4_OSFXSR);
 		printk("done.\n");
 	}
-#endif
-#ifdef CONFIG_X86_XMM
 	if (cpu_has_xmm) {
 		printk(KERN_INFO "Enabling unmasked SIMD FPU exception support... ");
 		set_in_cr4(X86_CR4_OSXMMEXCPT);
 		printk("done.\n");
 	}
-#endif
 
 	/* Test for the divl bug.. */
 	__asm__("fninit\n\t"
@@ -202,14 +199,6 @@
 	    && boot_cpu_data.x86_model == 2
 	    && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
 		panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
-#endif
-
-/*
- * If we configured ourselves for FXSR, we'd better have it.
- */
-#ifdef CONFIG_X86_FXSR
-	if (!cpu_has_fxsr)
-		panic("Kernel compiled for PII/PIII+, requires FXSR feature!");
 #endif
 }
 
diff -u --recursive --new-file v2.4.0/linux/include/asm-i386/errno.h linux/include/asm-i386/errno.h
--- v2.4.0/linux/include/asm-i386/errno.h	Mon Apr 14 16:28:18 1997
+++ linux/include/asm-i386/errno.h	Mon Jan 15 12:42:32 2001
@@ -128,5 +128,6 @@
 
 #define	ENOMEDIUM	123	/* No medium found */
 #define	EMEDIUMTYPE	124	/* Wrong medium type */
+#define	EHASHCOLLISION	125	/* Number of hash collisons exceeds maximum generation counter value.  */
 
 #endif
diff -u --recursive --new-file v2.4.0/linux/include/asm-i386/i387.h linux/include/asm-i386/i387.h
--- v2.4.0/linux/include/asm-i386/i387.h	Thu Jan  4 14:52:01 2001
+++ linux/include/asm-i386/i387.h	Mon Jan 15 17:26:26 2001
@@ -23,6 +23,10 @@
 extern void save_init_fpu( struct task_struct *tsk );
 extern void restore_fpu( struct task_struct *tsk );
 
+extern void kernel_fpu_begin(void);
+#define kernel_fpu_end() stts()
+
+
 #define unlazy_fpu( tsk ) do { \
 	if ( tsk->flags & PF_USEDFPU ) \
 		save_init_fpu( tsk ); \
@@ -50,10 +54,8 @@
 extern void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr );
 
 #define load_mxcsr( val ) do { \
-	if ( cpu_has_xmm ) { \
-		unsigned long __mxcsr = ((unsigned long)(val) & 0xffff); \
-		asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \
-	} \
+	unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \
+	asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \
 } while (0)
 
 /*
diff -u --recursive --new-file v2.4.0/linux/include/asm-i386/pgtable.h linux/include/asm-i386/pgtable.h
--- v2.4.0/linux/include/asm-i386/pgtable.h	Thu Jan  4 14:50:46 2001
+++ linux/include/asm-i386/pgtable.h	Mon Jan 15 17:25:05 2001
@@ -140,7 +140,11 @@
 #define VMALLOC_START	(((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \
 						~(VMALLOC_OFFSET-1))
 #define VMALLOC_VMADDR(x) ((unsigned long)(x))
-#define VMALLOC_END	(FIXADDR_START)
+#if CONFIG_HIGHMEM
+# define VMALLOC_END	(PKMAP_BASE-2*PAGE_SIZE)
+#else
+# define VMALLOC_END	(FIXADDR_START-2*PAGE_SIZE)
+#endif
 
 /*
  * The 4MB page is guessing..  Detailed in the infamous "Chapter H"
diff -u --recursive --new-file v2.4.0/linux/include/asm-i386/system.h linux/include/asm-i386/system.h
--- v2.4.0/linux/include/asm-i386/system.h	Thu Jan  4 14:50:46 2001
+++ linux/include/asm-i386/system.h	Mon Jan 15 17:25:04 2001
@@ -267,15 +267,8 @@
  * I expect future Intel CPU's to have a weaker ordering,
  * but I'd also expect them to finally get their act together
  * and add some real memory barriers if so.
- *
- * The Pentium III does add a real memory barrier with the
- * sfence instruction, so we use that where appropriate.
  */
-#ifndef CONFIG_X86_XMM
 #define mb() 	__asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
-#else
-#define mb()	__asm__ __volatile__ ("sfence": : :"memory")
-#endif
 #define rmb()	mb()
 #define wmb()	__asm__ __volatile__ ("": : :"memory")
 
diff -u --recursive --new-file v2.4.0/linux/include/linux/blk.h linux/include/linux/blk.h
--- v2.4.0/linux/include/linux/blk.h	Thu Jan  4 14:50:47 2001
+++ linux/include/linux/blk.h	Mon Jan 15 17:25:39 2001
@@ -87,10 +87,6 @@
 
 static inline void blkdev_dequeue_request(struct request * req)
 {
-	if (req->e) {
-		req->e->dequeue_fn(req);
-		req->e = NULL;
-	}
 	list_del(&req->queue);
 }
 
diff -u --recursive --new-file v2.4.0/linux/include/linux/blkdev.h linux/include/linux/blkdev.h
--- v2.4.0/linux/include/linux/blkdev.h	Thu Jan  4 14:50:47 2001
+++ linux/include/linux/blkdev.h	Mon Jan 15 17:25:28 2001
@@ -23,8 +23,6 @@
 	int elevator_sequence;
 	struct list_head table;
 
-	struct list_head *free_list;
-
 	volatile int rq_status;	/* should split this into a few status bits */
 #define RQ_INACTIVE		(-1)
 #define RQ_ACTIVE		1
@@ -47,7 +45,6 @@
 	struct buffer_head * bh;
 	struct buffer_head * bhtail;
 	request_queue_t *q;
-	elevator_t *e;
 };
 
 #include <linux/elevator.h>
@@ -67,9 +64,10 @@
 typedef void (unplug_device_fn) (void *q);
 
 /*
- * Default nr free requests per queue
+ * Default nr free requests per queue, ll_rw_blk will scale it down
+ * according to available RAM at init time
  */
-#define QUEUE_NR_REQUESTS	256
+#define QUEUE_NR_REQUESTS	8192
 
 struct request_queue
 {
@@ -77,6 +75,8 @@
 	 * the queue request freelist, one for reads and one for writes
 	 */
 	struct list_head	request_freelist[2];
+	struct list_head	pending_freelist[2];
+	int			pending_free[2];
 
 	/*
 	 * Together with queue_head for cacheline sharing
@@ -116,7 +116,7 @@
 	 * Is meant to protect the queue in the future instead of
 	 * io_request_lock
 	 */
-	spinlock_t		request_lock;
+	spinlock_t		queue_lock;
 
 	/*
 	 * Tasks wait here for free request
@@ -152,6 +152,7 @@
 extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size);
 extern void generic_make_request(int rw, struct buffer_head * bh);
 extern request_queue_t *blk_get_queue(kdev_t dev);
+extern inline request_queue_t *__blk_get_queue(kdev_t dev);
 extern void blkdev_release_request(struct request *);
 
 /*
@@ -162,6 +163,7 @@
 extern void blk_queue_headactive(request_queue_t *, int);
 extern void blk_queue_pluggable(request_queue_t *, plug_device_fn *);
 extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
+extern void generic_unplug_device(void *);
 
 extern int * blk_size[MAX_BLKDEV];
 
@@ -175,9 +177,10 @@
 
 extern int * max_segments[MAX_BLKDEV];
 
-#define MAX_SECTORS 254
+extern atomic_t queued_sectors;
 
-#define MAX_SEGMENTS MAX_SECTORS
+#define MAX_SEGMENTS 128
+#define MAX_SECTORS (MAX_SEGMENTS*8)
 
 #define PageAlignSize(size) (((size) + PAGE_SIZE -1) & PAGE_MASK)
 
@@ -203,5 +206,14 @@
 		return 512;
 }
 
+#define blk_finished_io(nsects)				\
+	atomic_sub(nsects, &queued_sectors);		\
+	if (atomic_read(&queued_sectors) < 0) {		\
+		printk("block: queued_sectors < 0\n");	\
+		atomic_set(&queued_sectors, 0);		\
+	}
+
+#define blk_started_io(nsects)				\
+	atomic_add(nsects, &queued_sectors);
 
 #endif
diff -u --recursive --new-file v2.4.0/linux/include/linux/elevator.h linux/include/linux/elevator.h
--- v2.4.0/linux/include/linux/elevator.h	Tue Jul 18 21:43:10 2000
+++ linux/include/linux/elevator.h	Mon Jan 15 13:08:15 2001
@@ -7,34 +7,32 @@
 			    struct list_head *,
 			    struct list_head *, int);
 
-typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
-				 struct buffer_head *, int, int *, int *);
+typedef int (elevator_merge_fn) (request_queue_t *, struct request **, struct list_head *,
+				 struct buffer_head *, int, int, int);
 
-typedef void (elevator_dequeue_fn) (struct request *);
+typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int);
+
+typedef void (elevator_merge_req_fn) (struct request *, struct request *);
 
 struct elevator_s
 {
-	int sequence;
-
 	int read_latency;
 	int write_latency;
-	int max_bomb_segments;
 
-	unsigned int nr_segments;
-	int read_pendings;
-
-	elevator_fn * elevator_fn;
 	elevator_merge_fn *elevator_merge_fn;
-	elevator_dequeue_fn *dequeue_fn;
+	elevator_merge_cleanup_fn *elevator_merge_cleanup_fn;
+	elevator_merge_req_fn *elevator_merge_req_fn;
 
 	unsigned int queue_ID;
 };
 
-void elevator_noop(struct request *, elevator_t *, struct list_head *, struct list_head *, int);
-int elevator_noop_merge(request_queue_t *, struct request **, struct buffer_head *, int, int *, int *);
-void elevator_noop_dequeue(struct request *);
-void elevator_linus(struct request *, elevator_t *, struct list_head *, struct list_head *, int);
-int elevator_linus_merge(request_queue_t *, struct request **, struct buffer_head *, int, int *, int *);
+int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int, int);
+void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int);
+void elevator_noop_merge_req(struct request *, struct request *);
+
+int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct buffer_head *, int, int, int);
+void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int);
+void elevator_linus_merge_req(struct request *, struct request *);
 
 typedef struct blkelv_ioctl_arg_s {
 	int queue_ID;
@@ -69,6 +67,10 @@
 	   (s1)->sector < (s2)->sector)) ||	\
 	 (s1)->rq_dev < (s2)->rq_dev)
 
+#define BHRQ_IN_ORDER(bh, rq)			\
+	(((bh)->b_rdev == (rq)->rq_dev &&	\
+	  (bh)->b_rsector < (rq)->sector))
+
 static inline int elevator_request_latency(elevator_t * elevator, int rw)
 {
 	int latency;
@@ -80,36 +82,24 @@
 	return latency;
 }
 
-#define ELEVATOR_NOOP						\
-((elevator_t) {							\
-	0,				/* sequence */		\
-								\
-	0,				/* read_latency */	\
-	0,				/* write_latency */	\
-	0,				/* max_bomb_segments */	\
-								\
-	0,				/* nr_segments */	\
-	0,				/* read_pendings */	\
-								\
-	elevator_noop,			/* elevator_fn */	\
-	elevator_noop_merge,		/* elevator_merge_fn */ \
-	elevator_noop_dequeue,		/* dequeue_fn */	\
+#define ELEVATOR_NOOP							\
+((elevator_t) {								\
+	0,				/* read_latency */		\
+	0,				/* write_latency */		\
+									\
+	elevator_noop_merge,		/* elevator_merge_fn */		\
+	elevator_noop_merge_cleanup,	/* elevator_merge_cleanup_fn */	\
+	elevator_noop_merge_req,	/* elevator_merge_req_fn */	\
 	})
 
-#define ELEVATOR_LINUS						\
-((elevator_t) {							\
-	0,				/* not used */		\
-								\
-	1000000,				/* read passovers */	\
-	2000000,				/* write passovers */	\
-	0,				/* max_bomb_segments */	\
-								\
-	0,				/* not used */		\
-	0,				/* not used */		\
-								\
-	elevator_linus,			/* elevator_fn */	\
-	elevator_linus_merge,		/* elevator_merge_fn */ \
-	elevator_noop_dequeue,		/* dequeue_fn */	\
+#define ELEVATOR_LINUS							\
+((elevator_t) {								\
+	8192,				/* read passovers */		\
+	16384,				/* write passovers */		\
+									\
+	elevator_linus_merge,		/* elevator_merge_fn */		\
+	elevator_linus_merge_cleanup,	/* elevator_merge_cleanup_fn */	\
+	elevator_linus_merge_req,	/* elevator_merge_req_fn */	\
 	})
 
 #endif
diff -u --recursive --new-file v2.4.0/linux/include/linux/fs.h linux/include/linux/fs.h
--- v2.4.0/linux/include/linux/fs.h	Thu Jan  4 14:50:47 2001
+++ linux/include/linux/fs.h	Mon Jan 15 17:25:05 2001
@@ -288,6 +288,7 @@
 #include <linux/hfs_fs_i.h>
 #include <linux/adfs_fs_i.h>
 #include <linux/qnx4_fs_i.h>
+#include <linux/reiserfs_fs_i.h>
 #include <linux/bfs_fs_i.h>
 #include <linux/udf_fs_i.h>
 #include <linux/ncp_fs_i.h>
@@ -450,6 +451,7 @@
 		struct hfs_inode_info		hfs_i;
 		struct adfs_inode_info		adfs_i;
 		struct qnx4_inode_info		qnx4_i;
+		struct reiserfs_inode_info	reiserfs_i;
 		struct bfs_inode_info		bfs_i;
 		struct udf_inode_info		udf_i;
 		struct ncp_inode_info		ncpfs_i;
@@ -460,35 +462,6 @@
 	} u;
 };
 
-/* Inode state bits.. */
-#define I_DIRTY_SYNC		1 /* Not dirty enough for O_DATASYNC */
-#define I_DIRTY_DATASYNC	2 /* Data-related inode changes pending */
-#define I_DIRTY_PAGES		4 /* Data-related inode changes pending */
-#define I_LOCK			8
-#define I_FREEING		16
-#define I_CLEAR			32
-
-#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
-
-extern void __mark_inode_dirty(struct inode *, int);
-static inline void mark_inode_dirty(struct inode *inode)
-{
-	if ((inode->i_state & I_DIRTY) != I_DIRTY)
-		__mark_inode_dirty(inode, I_DIRTY);
-}
-
-static inline void mark_inode_dirty_sync(struct inode *inode)
-{
-	if (!(inode->i_state & I_DIRTY_SYNC))
-		__mark_inode_dirty(inode, I_DIRTY_SYNC);
-}
-
-static inline void mark_inode_dirty_pages(struct inode *inode)
-{
-	if (inode && !(inode->i_state & I_DIRTY_PAGES))
-		__mark_inode_dirty(inode, I_DIRTY_PAGES);
-}
-
 struct fown_struct {
 	int pid;		/* pid or -pgrp where SIGIO should be sent */
 	uid_t uid, euid;	/* uid/euid of process setting the owner */
@@ -654,6 +627,7 @@
 #include <linux/hfs_fs_sb.h>
 #include <linux/adfs_fs_sb.h>
 #include <linux/qnx4_fs_sb.h>
+#include <linux/reiserfs_fs_sb.h>
 #include <linux/bfs_fs_sb.h>
 #include <linux/udf_fs_sb.h>
 #include <linux/ncp_fs_sb.h>
@@ -702,6 +676,7 @@
 		struct hfs_sb_info	hfs_sb;
 		struct adfs_sb_info	adfs_sb;
 		struct qnx4_sb_info	qnx4_sb;
+		struct reiserfs_sb_info	reiserfs_sb;
 		struct bfs_sb_info	bfs_sb;
 		struct udf_sb_info	udf_sb;
 		struct ncp_sb_info	ncpfs_sb;
@@ -815,17 +790,54 @@
  */
 struct super_operations {
 	void (*read_inode) (struct inode *);
+  
+  	/* reiserfs kludge.  reiserfs needs 64 bits of information to
+    	** find an inode.  We are using the read_inode2 call to get
+   	** that information.  We don't like this, and are waiting on some
+   	** VFS changes for the real solution.
+   	** iget4 calls read_inode2, iff it is defined
+   	*/
+    	void (*read_inode2) (struct inode *, void *) ;
+   	void (*dirty_inode) (struct inode *);
 	void (*write_inode) (struct inode *, int);
 	void (*put_inode) (struct inode *);
 	void (*delete_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
 	void (*write_super) (struct super_block *);
+	void (*write_super_lockfs) (struct super_block *);
+	void (*unlockfs) (struct super_block *);
 	int (*statfs) (struct super_block *, struct statfs *);
 	int (*remount_fs) (struct super_block *, int *, char *);
 	void (*clear_inode) (struct inode *);
 	void (*umount_begin) (struct super_block *);
 };
 
+/* Inode state bits.. */
+#define I_DIRTY_SYNC		1 /* Not dirty enough for O_DATASYNC */
+#define I_DIRTY_DATASYNC	2 /* Data-related inode changes pending */
+#define I_DIRTY_PAGES		4 /* Data-related inode changes pending */
+#define I_LOCK			8
+#define I_FREEING		16
+#define I_CLEAR			32
+
+#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
+
+extern void __mark_inode_dirty(struct inode *, int);
+static inline void mark_inode_dirty(struct inode *inode)
+{
+	__mark_inode_dirty(inode, I_DIRTY);
+}
+
+static inline void mark_inode_dirty_sync(struct inode *inode)
+{
+	__mark_inode_dirty(inode, I_DIRTY_SYNC);
+}
+
+static inline void mark_inode_dirty_pages(struct inode *inode)
+{
+	__mark_inode_dirty(inode, I_DIRTY_PAGES);
+}
+
 struct dquot_operations {
 	void (*initialize) (struct inode *, short);
 	void (*drop) (struct inode *);
@@ -987,6 +999,9 @@
 
 extern int try_to_free_buffers(struct page *, int);
 extern void refile_buffer(struct buffer_head * buf);
+
+/* reiserfs_writepage needs this */
+extern void set_buffer_async_io(struct buffer_head *bh) ;
 
 #define BUF_CLEAN	0
 #define BUF_LOCKED	1	/* Buffers scheduled for write */
diff -u --recursive --new-file v2.4.0/linux/include/linux/mm.h linux/include/linux/mm.h
--- v2.4.0/linux/include/linux/mm.h	Thu Jan  4 14:50:47 2001
+++ linux/include/linux/mm.h	Mon Jan 15 17:25:05 2001
@@ -464,6 +464,7 @@
 #else
 #define __GFP_HIGHMEM	0x0 /* noop */
 #endif
+#define __GFP_VM	0x20
 
 
 #define GFP_BUFFER	(__GFP_HIGH | __GFP_WAIT)
diff -u --recursive --new-file v2.4.0/linux/include/linux/reiserfs_fs.h linux/include/linux/reiserfs_fs.h
--- v2.4.0/linux/include/linux/reiserfs_fs.h	Wed Dec 31 16:00:00 1969
+++ linux/include/linux/reiserfs_fs.h	Mon Jan 15 13:23:01 2001
@@ -0,0 +1,2074 @@
+/*
+ * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for licensing and copyright details
+ */
+
+				/* this file has an amazingly stupid
+                                   name, yura please fix it to be
+                                   reiserfs.h, and merge all the rest
+                                   of our .h files that are in this
+                                   directory into it.  */
+
+
+#ifndef _LINUX_REISER_FS_H
+#define _LINUX_REISER_FS_H
+
+
+#include <linux/types.h>
+#ifdef __KERNEL__
+#include <linux/malloc.h>
+#include <linux/tqueue.h>
+#endif
+
+/*
+ *  include/linux/reiser_fs.h
+ *
+ *  Reiser File System constants and structures
+ *
+ */
+
+/* in reading the #defines, it may help to understand that they employ
+   the following abbreviations:
+
+   B = Buffer
+   I = Item header
+   H = Height within the tree (should be changed to LEV)
+   N = Number of the item in the node
+   STAT = stat data
+   DEH = Directory Entry Header
+   EC = Entry Count
+   E = Entry number
+   UL = Unsigned Long
+   BLKH = BLocK Header
+   UNFM = UNForMatted node
+   DC = Disk Child
+   P = Path
+
+   These #defines are named by concatenating these abbreviations,
+   where first comes the arguments, and last comes the return value,
+   of the macro.
+
+*/
+
+				/* Vladimir, what is the story with
+                                   new_get_new_buffer nowadays?  I
+                                   want a complete explanation written
+                                   here. */
+
+/* NEW_GET_NEW_BUFFER will try to allocate new blocks better */
+/*#define NEW_GET_NEW_BUFFER*/
+#define OLD_GET_NEW_BUFFER
+
+				/* Vladimir, what about this one too? */
+/* if this is undefined, all inode changes get into stat data immediately, if it can be found in RAM */
+#define DIRTY_LATER
+
+/* enable journalling */
+#define ENABLE_JOURNAL
+
+#ifdef __KERNEL__
+
+/* #define REISERFS_CHECK */
+
+#define REISERFS_PREALLOCATE
+#endif
+#define PREALLOCATION_SIZE 8
+
+/* if this is undefined, all inode changes get into stat data
+   immediately, if it can be found in RAM */
+#define DIRTY_LATER
+
+
+/*#define READ_LOCK_REISERFS*/
+
+
+/* n must be power of 2 */
+#define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u))
+
+// to be ok for alpha and others we have to align structures to 8 byte
+// boundary.
+// FIXME: do not change 4 by anything else: there is code which relies on that
+				/* what 4? -Hans */
+#define ROUND_UP(x) _ROUND_UP(x,8LL)
+
+/* debug levels.  Right now, CONFIG_REISERFS_CHECK means print all debug
+** messages.
+*/
+#define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ 
+
+/*
+ * Disk Data Structures
+ */
+
+/***************************************************************************/
+/*                             SUPER BLOCK                                 */
+/***************************************************************************/
+
+/*
+ * Structure of super block on disk, a version of which in RAM is often accessed as s->u.reiserfs_sb.s_rs
+ * the version in RAM is part of a larger structure containing fields never written to disk.
+ */
+
+				/* used by gcc */
+#define REISERFS_SUPER_MAGIC 0x52654973
+				/* used by file system utilities that
+                                   look at the superblock, etc. */
+#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
+#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
+
+extern inline int is_reiserfs_magic_string (struct reiserfs_super_block * rs)
+{
+    return (!strncmp (rs->s_magic, REISERFS_SUPER_MAGIC_STRING, 
+		      strlen ( REISERFS_SUPER_MAGIC_STRING)) ||
+	    !strncmp (rs->s_magic, REISER2FS_SUPER_MAGIC_STRING, 
+		      strlen ( REISER2FS_SUPER_MAGIC_STRING)));
+}
+
+				/* ReiserFS leaves the first 64k unused,
+                                   so that partition labels have enough
+                                   space.  If someone wants to write a
+                                   fancy bootloader that needs more than
+                                   64k, let us know, and this will be
+                                   increased in size.  This number must
+                                   be larger than than the largest block
+                                   size on any platform, or code will
+                                   break.  -Hans */
+#define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024)
+#define REISERFS_FIRST_BLOCK unused_define
+
+/* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */
+#define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024)
+
+
+// reiserfs internal error code (used by search_by_key adn fix_nodes))
+#define CARRY_ON      0
+#define REPEAT_SEARCH -1
+#define IO_ERROR      -2
+#define NO_DISK_SPACE -3
+#define NO_BALANCING_NEEDED  (-4)
+#define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5)
+
+//#define SCHEDULE_OCCURRED  	1
+//#define PATH_INCORRECT    	2
+
+//#define NO_DISK_SPACE        (-1)
+
+
+
+typedef unsigned long b_blocknr_t;
+typedef __u32 unp_t;
+
+				/* who is responsible for this
+                                   completely uncommented struct? */
+struct unfm_nodeinfo {
+				/* This is what? */
+    unp_t unfm_nodenum;
+				/* now this I know what it is, and
+                                   most of the people on our project
+                                   know what it is, but I bet nobody
+                                   new I hire will have a clue. */
+    unsigned short unfm_freespace;
+};
+
+
+/* when reiserfs_file_write is called with a byte count >= MIN_PACK_ON_CLOSE,
+** it sets the inode to pack on close, and when extending the file, will only
+** use unformatted nodes.
+**
+** This is a big speed up for the journal, which is badly hurt by direct->indirect
+** conversions (they must be logged).
+*/
+#define MIN_PACK_ON_CLOSE		512
+
+/* the defines below say, that if file size is >=
+   DIRECT_TAIL_SUPPRESSION_SIZE * blocksize, then if tail is longer
+   than MAX_BYTES_SUPPRESS_DIRECT_TAIL, it will be stored in
+   unformatted node */
+#define DIRECT_TAIL_SUPPRESSION_SIZE      1024
+#define MAX_BYTES_SUPPRESS_DIRECT_TAIL    1024
+
+#if 0
+
+//
+#define mark_file_with_tail(inode,offset) \
+{\
+inode->u.reiserfs_i.i_has_tail = 1;\
+}
+
+#define mark_file_without_tail(inode) \
+{\
+inode->u.reiserfs_i.i_has_tail = 0;\
+}
+
+#endif
+
+// this says about version of all items (but stat data) the object
+// consists of
+#define inode_items_version(inode) ((inode)->u.reiserfs_i.i_version)
+
+
+/* We store tail in unformatted node if it is too big to fit into a
+   formatted node or if DIRECT_TAIL_SUPPRESSION_SIZE,
+   MAX_BYTES_SUPPRESS_DIRECT_TAIL and file size say that. */
+/* #define STORE_TAIL_IN_UNFM(n_file_size,n_tail_size,n_block_size) \ */
+/* ( ((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \ */
+/*   ( ( (n_file_size) >= (n_block_size) * DIRECT_TAIL_SUPPRESSION_SIZE ) && \ */
+/*    ( (n_tail_size) >= MAX_BYTES_SUPPRESS_DIRECT_TAIL ) ) ) */
+
+  /* This is an aggressive tail suppression policy, I am hoping it
+     improves our benchmarks. The principle behind it is that
+     percentage space saving is what matters, not absolute space
+     saving.  This is non-intuitive, but it helps to understand it if
+     you consider that the cost to access 4 blocks is not much more
+     than the cost to access 1 block, if you have to do a seek and
+     rotate.  A tail risks a non-linear disk access that is
+     significant as a percentage of total time cost for a 4 block file
+     and saves an amount of space that is less significant as a
+     percentage of space, or so goes the hypothesis.  -Hans */
+#define STORE_TAIL_IN_UNFM(n_file_size,n_tail_size,n_block_size) \
+(\
+  (!(n_tail_size)) || \
+  (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \
+   ( (n_file_size) >= (n_block_size) * 4 ) || \
+   ( ( (n_file_size) >= (n_block_size) * 3 ) && \
+     ( (n_tail_size) >=   (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \
+   ( ( (n_file_size) >= (n_block_size) * 2 ) && \
+     ( (n_tail_size) >=   (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \
+   ( ( (n_file_size) >= (n_block_size) ) && \
+     ( (n_tail_size) >=   (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \
+)
+
+
+/*
+ * values for s_state field
+ */
+#define REISERFS_VALID_FS    1
+#define REISERFS_ERROR_FS    2
+
+
+
+/***************************************************************************/
+/*                       KEY & ITEM HEAD                                   */
+/***************************************************************************/
+
+//
+// we do support for old format of reiserfs: the problem is to
+// distinuquish keys with 32 bit offset and keys with 60 bit ones. On
+// leaf level we use ih_version of struct item_head (was
+// ih_reserved). For all old items it is set to 0
+// (ITEM_VERSION_1). For new items it is ITEM_VERSION_2. On internal
+// levels we have to know version of item key belongs to.
+//
+#define ITEM_VERSION_1 0
+#define ITEM_VERSION_2 1
+
+
+/* loff_t - long long */
+
+
+//
+// directories use this key as well as old files
+//
+struct offset_v1 {
+    __u32 k_offset;
+    __u32 k_uniqueness;
+} __attribute__ ((__packed__));
+
+struct offset_v2 {
+    __u64 k_offset:60;
+    __u64 k_type: 4;
+} __attribute__ ((__packed__));
+
+
+
+/* Key of an item determines its location in the S+tree, and
+   is composed of 4 components */
+struct key {
+    __u32 k_dir_id;    /* packing locality: by default parent
+			  directory object id */
+    __u32 k_objectid;  /* object identifier */
+    union {
+	struct offset_v1 k_offset_v1;
+	struct offset_v2 k_offset_v2;
+    } __attribute__ ((__packed__)) u;
+} __attribute__ ((__packed__));
+
+
+struct cpu_key {
+    struct key on_disk_key;
+    int version;
+    int key_length; /* 3 in all cases but direct2indirect and
+		       indirect2direct conversion */
+};
+
+
+
+
+
+
+
+ /* Our function for comparing keys can compare keys of different
+    lengths.  It takes as a parameter the length of the keys it is to
+    compare.  These defines are used in determining what is to be
+    passed to it as that parameter. */
+#define REISERFS_FULL_KEY_LEN     4
+
+#define REISERFS_SHORT_KEY_LEN    2
+
+/* The result of the key compare */
+#define FIRST_GREATER 1
+#define SECOND_GREATER -1
+#define KEYS_IDENTICAL 0
+#define KEY_FOUND 1
+#define KEY_NOT_FOUND 0
+
+
+#define KEY_SIZE (sizeof(struct key))
+#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32))
+
+/* return values for search_by_key and clones */
+#define ITEM_FOUND 1
+#define ITEM_NOT_FOUND 0
+#define ENTRY_FOUND 1
+#define ENTRY_NOT_FOUND 0
+#define DIRECTORY_NOT_FOUND -1
+#define REGULAR_FILE_FOUND -2
+#define DIRECTORY_FOUND -3
+#define BYTE_FOUND 1
+#define BYTE_NOT_FOUND 0
+#define FILE_NOT_FOUND -1
+
+#define POSITION_FOUND 1
+#define POSITION_NOT_FOUND 0
+
+// return values for reiserfs_find_entry and search_by_entry_key
+#define NAME_FOUND 1
+#define NAME_NOT_FOUND 0
+#define GOTO_PREVIOUS_ITEM 2
+#define NAME_FOUND_INVISIBLE 3
+
+
+
+/*  Everything in the filesystem is stored as a set of items.  The
+    item head contains the key of the item, its free space (for
+    indirect items) and specifies the location of the item itself
+    within the block.  */
+
+struct item_head
+{
+  struct key ih_key; 	/* Everything in the tree is found by searching for it based on its key.*/
+
+				/* This is bloat, this should be part
+                                   of the item not the item
+                                   header. -Hans */
+  union {
+    __u16 ih_free_space_reserved; /* The free space in the last unformatted node of an indirect item if this
+				     is an indirect item.  This equals 0xFFFF iff this is a direct item or
+				     stat data item. Note that the key, not this field, is used to determine
+				     the item type, and thus which field this union contains. */
+    __u16 ih_entry_count; /* Iff this is a directory item, this field equals the number of directory
+				      entries in the directory item. */
+  } __attribute__ ((__packed__)) u;
+  __u16 ih_item_len;           /* total size of the item body                  */
+  __u16 ih_item_location;      /* an offset to the item body within the block  */
+				/* I thought we were going to use this
+                                   for having lots of item types? Why
+                                   don't you use this for item type
+                                   not item version.  That is how you
+                                   talked me into this field a year
+                                   ago, remember?  I am still not
+                                   convinced it needs to be 16 bits
+                                   (for at least many years), but at
+                                   least I can sympathize with that
+                                   hope. Change the name from version
+                                   to type, and tell people not to use
+                                   FFFF in case 16 bits is someday too
+                                   small and needs to be extended:-). */
+  __u16 ih_version;	       /* 0 for all old items, 2 for new
+                                  ones. Highest bit is set by fsck
+                                  temporary, cleaned after all done */
+} __attribute__ ((__packed__));
+/* size of item header     */
+#define IH_SIZE (sizeof(struct item_head))
+
+#define ih_free_space(ih)            le16_to_cpu((ih)->u.ih_free_space_reserved)
+#define ih_version(ih)               le16_to_cpu((ih)->ih_version)
+#define ih_entry_count(ih)           le16_to_cpu((ih)->u.ih_entry_count)
+#define ih_location(ih)              le16_to_cpu((ih)->ih_item_location)
+#define ih_item_len(ih)              le16_to_cpu((ih)->ih_item_len)
+
+#define put_ih_free_space(ih, val)   do { (ih)->u.ih_free_space_reserved = cpu_to_le16(val); } while(0)
+#define put_ih_version(ih, val)      do { (ih)->ih_version = cpu_to_le16(val); } while (0)
+#define put_ih_entry_count(ih, val)  do { (ih)->u.ih_entry_count = cpu_to_le16(val); } while (0)
+#define put_ih_location(ih, val)     do { (ih)->ih_item_location = cpu_to_le16(val); } while (0)
+#define put_ih_item_len(ih, val)     do { (ih)->ih_item_len = cpu_to_le16(val); } while (0)
+
+
+// FIXME: now would that work for other than i386 archs
+#define unreachable_item(ih) (ih->ih_version & (1 << 15))
+
+#define get_ih_free_space(ih) (ih_version (ih) == ITEM_VERSION_2 ? 0 : ih_free_space (ih))
+#define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == ITEM_VERSION_2) ? 0 : (val)))
+
+
+//
+// there are 5 item types currently
+//
+#define TYPE_STAT_DATA 0
+#define TYPE_INDIRECT 1
+#define TYPE_DIRECT 2
+#define TYPE_DIRENTRY 3 
+#define TYPE_ANY 15 // FIXME: comment is required
+
+//
+// in old version uniqueness field shows key type
+//
+#define V1_SD_UNIQUENESS 0
+#define V1_INDIRECT_UNIQUENESS 0xfffffffe
+#define V1_DIRECT_UNIQUENESS 0xffffffff
+#define V1_DIRENTRY_UNIQUENESS 500
+#define V1_ANY_UNIQUENESS 555 // FIXME: comment is required
+
+//
+// here are conversion routines
+//
+extern inline int uniqueness2type (__u32 uniqueness)
+{
+    switch (uniqueness) {
+    case V1_SD_UNIQUENESS: return TYPE_STAT_DATA;
+    case V1_INDIRECT_UNIQUENESS: return TYPE_INDIRECT;
+    case V1_DIRECT_UNIQUENESS: return TYPE_DIRECT;
+    case V1_DIRENTRY_UNIQUENESS: return TYPE_DIRENTRY;
+    }
+/*
+    if (uniqueness != V1_ANY_UNIQUENESS) {
+	printk ("uniqueness %d\n", uniqueness);
+	BUG (); 
+    }
+*/
+    return TYPE_ANY;
+}
+
+extern inline __u32 type2uniqueness (int type)
+{
+    switch (type) {
+    case TYPE_STAT_DATA: return V1_SD_UNIQUENESS;
+    case TYPE_INDIRECT: return V1_INDIRECT_UNIQUENESS;
+    case TYPE_DIRECT: return V1_DIRECT_UNIQUENESS;
+    case TYPE_DIRENTRY: return V1_DIRENTRY_UNIQUENESS;
+    }
+    /*
+    if (type != TYPE_ANY)
+	BUG ();
+    */
+    return V1_ANY_UNIQUENESS;
+}
+
+
+//
+// key is pointer to on disk key which is stored in le, result is cpu,
+// there is no way to get version of object from key, so, provide
+// version to these defines
+//
+extern inline loff_t le_key_k_offset (int version, struct key * key)
+{
+    return (version == ITEM_VERSION_1) ? key->u.k_offset_v1.k_offset :
+	le64_to_cpu (key->u.k_offset_v2.k_offset);
+}
+extern inline loff_t le_ih_k_offset (struct item_head * ih)
+{
+    return le_key_k_offset (ih_version (ih), &(ih->ih_key));
+}
+
+
+extern inline loff_t le_key_k_type (int version, struct key * key)
+{
+    return (version == ITEM_VERSION_1) ? uniqueness2type (key->u.k_offset_v1.k_uniqueness) :
+	le16_to_cpu (key->u.k_offset_v2.k_type);
+}
+extern inline loff_t le_ih_k_type (struct item_head * ih)
+{
+    return le_key_k_type (ih_version (ih), &(ih->ih_key));
+}
+
+
+extern inline void set_le_key_k_offset (int version, struct key * key, loff_t offset)
+{
+    (version == ITEM_VERSION_1) ? (key->u.k_offset_v1.k_offset = offset) :
+	(key->u.k_offset_v2.k_offset = cpu_to_le64 (offset));
+}
+extern inline void set_le_ih_k_offset (struct item_head * ih, loff_t offset)
+{
+    set_le_key_k_offset (ih_version (ih), &(ih->ih_key), offset);
+}
+
+
+
+extern inline void set_le_key_k_type (int version, struct key * key, int type)
+{
+    (version == ITEM_VERSION_1) ? (key->u.k_offset_v1.k_uniqueness = type2uniqueness (type)) :
+	(key->u.k_offset_v2.k_type = cpu_to_le16 (type));
+}
+extern inline void set_le_ih_k_type (struct item_head * ih, int type)
+{
+    set_le_key_k_type (ih_version (ih), &(ih->ih_key), type);
+}
+
+
+#define is_direntry_le_key(version,key) (le_key_k_type (version, key) == TYPE_DIRENTRY)
+#define is_direct_le_key(version,key) (le_key_k_type (version, key) == TYPE_DIRECT)
+#define is_indirect_le_key(version,key) (le_key_k_type (version, key) == TYPE_INDIRECT)
+#define is_statdata_le_key(version,key) (le_key_k_type (version, key) == TYPE_STAT_DATA)
+
+//
+// item header has version.
+//
+#define is_direntry_le_ih(ih) is_direntry_le_key (ih_version (ih), &((ih)->ih_key))
+#define is_direct_le_ih(ih) is_direct_le_key (ih_version (ih), &((ih)->ih_key))
+#define is_indirect_le_ih(ih) is_indirect_le_key (ih_version(ih), &((ih)->ih_key))
+#define is_statdata_le_ih(ih) is_statdata_le_key (ih_version (ih), &((ih)->ih_key))
+
+
+
+//
+// key is pointer to cpu key, result is cpu
+//
+extern inline loff_t cpu_key_k_offset (struct cpu_key * key)
+{
+    return (key->version == ITEM_VERSION_1) ? key->on_disk_key.u.k_offset_v1.k_offset :
+	key->on_disk_key.u.k_offset_v2.k_offset;
+}
+
+extern inline loff_t cpu_key_k_type (struct cpu_key * key)
+{
+    return (key->version == ITEM_VERSION_1) ? uniqueness2type (key->on_disk_key.u.k_offset_v1.k_uniqueness) :
+	key->on_disk_key.u.k_offset_v2.k_type;
+}
+
+extern inline void set_cpu_key_k_offset (struct cpu_key * key, loff_t offset)
+{
+    (key->version == ITEM_VERSION_1) ? (key->on_disk_key.u.k_offset_v1.k_offset = offset) :
+	(key->on_disk_key.u.k_offset_v2.k_offset = offset);
+}
+
+
+extern inline void set_cpu_key_k_type (struct cpu_key * key, int type)
+{
+    (key->version == ITEM_VERSION_1) ? (key->on_disk_key.u.k_offset_v1.k_uniqueness = type2uniqueness (type)) :
+	(key->on_disk_key.u.k_offset_v2.k_type = type);
+}
+
+extern inline void cpu_key_k_offset_dec (struct cpu_key * key)
+{
+    if (key->version == ITEM_VERSION_1)
+	key->on_disk_key.u.k_offset_v1.k_offset --;
+    else
+	key->on_disk_key.u.k_offset_v2.k_offset --;
+}
+
+
+#define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY)
+#define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT)
+#define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT)
+#define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA)
+
+
+/* are these used ? */
+#define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key)))
+#define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key)))
+#define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key)))
+#define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key)))
+
+
+
+
+
+#define I_K_KEY_IN_ITEM(p_s_ih, p_s_key, n_blocksize) \
+    ( ! COMP_SHORT_KEYS(p_s_ih, p_s_key) && \
+          I_OFF_BYTE_IN_ITEM(p_s_ih, k_offset (p_s_key), n_blocksize) )
+
+/* maximal length of item */ 
+#define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE)
+#define MIN_ITEM_LEN 1
+
+
+/* object identifier for root dir */
+#define REISERFS_ROOT_OBJECTID 2
+#define REISERFS_ROOT_PARENT_OBJECTID 1
+extern struct key root_key;
+
+
+
+
+/* 
+ * Picture represents a leaf of the S+tree
+ *  ______________________________________________________
+ * |      |  Array of     |                   |           |
+ * |Block |  Object-Item  |      F r e e      |  Objects- |
+ * | head |  Headers      |     S p a c e     |   Items   |
+ * |______|_______________|___________________|___________|
+ */
+
+/* Header of a disk block.  More precisely, header of a formatted leaf
+   or internal node, and not the header of an unformatted node. */
+struct block_head {       
+  __u16 blk_level;        /* Level of a block in the tree. */
+  __u16 blk_nr_item;      /* Number of keys/items in a block. */
+  __u16 blk_free_space;   /* Block free space in bytes. */
+  __u16 blk_reserved;
+				/* dump this in v4/planA */
+  struct key  blk_right_delim_key; /* kept only for compatibility */
+};
+
+#define BLKH_SIZE (sizeof(struct block_head))
+
+/*
+ * values for blk_level field of the struct block_head
+ */
+
+#define FREE_LEVEL 0 /* when node gets removed from the tree its
+			blk_level is set to FREE_LEVEL. It is then
+			used to see whether the node is still in the
+			tree */
+
+#define DISK_LEAF_NODE_LEVEL  1 /* Leaf node level.*/
+
+/* Given the buffer head of a formatted node, resolve to the block head of that node. */
+#define B_BLK_HEAD(p_s_bh)  ((struct block_head *)((p_s_bh)->b_data))
+/* Number of items that are in buffer. */
+#define B_NR_ITEMS(p_s_bh)	  	(le16_to_cpu ( B_BLK_HEAD(p_s_bh)->blk_nr_item ))
+#define B_LEVEL(bh)			(le16_to_cpu ( B_BLK_HEAD(bh)->blk_level ))
+#define B_FREE_SPACE(bh)		(le16_to_cpu ( B_BLK_HEAD(bh)->blk_free_space ))
+
+#define PUT_B_NR_ITEMS(p_s_bh)	  	do { B_BLK_HEAD(p_s_bh)->blk_nr_item = cpu_to_le16(val); } while (0)
+#define PUT_B_LEVEL(bh, val)		do { B_BLK_HEAD(bh)->blk_level = cpu_to_le16(val); } while (0)
+#define PUT_B_FREE_SPACE(bh)		do { B_BLK_HEAD(bh)->blk_free_space = cpu_to_le16(val); } while (0)
+
+/* Get right delimiting key. */
+#define B_PRIGHT_DELIM_KEY(p_s_bh)	( &(B_BLK_HEAD(p_s_bh)->blk_right_delim_key) )
+
+/* Does the buffer contain a disk leaf. */
+#define B_IS_ITEMS_LEVEL(p_s_bh)   	( B_BLK_HEAD(p_s_bh)->blk_level == DISK_LEAF_NODE_LEVEL )
+
+/* Does the buffer contain a disk internal node */
+#define B_IS_KEYS_LEVEL(p_s_bh) 	( B_BLK_HEAD(p_s_bh)->blk_level > DISK_LEAF_NODE_LEVEL &&\
+					  B_BLK_HEAD(p_s_bh)->blk_level <= MAX_HEIGHT )
+
+
+
+
+/***************************************************************************/
+/*                             STAT DATA                                   */
+/***************************************************************************/
+
+
+//
+// old stat data is 32 bytes long. We are going to distinguish new one by
+// different size
+//
+struct stat_data_v1
+{
+    __u16 sd_mode;	/* file type, permissions */
+    __u16 sd_nlink;	/* number of hard links */
+    __u16 sd_uid;		/* owner */
+    __u16 sd_gid;		/* group */
+    __u32 sd_size;	/* file size */
+    __u32 sd_atime;	/* time of last access */
+    __u32 sd_mtime;	/* time file was last modified  */
+    __u32 sd_ctime;	/* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
+    union {
+	__u32 sd_rdev;
+	__u32 sd_blocks;	/* number of blocks file uses */
+    } __attribute__ ((__packed__)) u;
+    __u32 sd_first_direct_byte; /* first byte of file which is stored
+				   in a direct item: except that if it
+				   equals 1 it is a symlink and if it
+				   equals ~(__u32)0 there is no
+				   direct item.  The existence of this
+				   field really grates on me. Let's
+				   replace it with a macro based on
+				   sd_size and our tail suppression
+				   policy.  Someday.  -Hans */
+} __attribute__ ((__packed__));
+
+#define SD_V1_SIZE (sizeof(struct stat_data_v1))
+
+
+/* Stat Data on disk (reiserfs version of UFS disk inode minus the
+   address blocks) */
+struct stat_data {
+    __u16 sd_mode;	/* file type, permissions */
+    __u16 sd_reserved;
+    __u32 sd_nlink;	/* number of hard links */
+    __u64 sd_size;	/* file size */
+    __u32 sd_uid;		/* owner */
+    __u32 sd_gid;		/* group */
+    __u32 sd_atime;	/* time of last access */
+    __u32 sd_mtime;	/* time file was last modified  */
+    __u32 sd_ctime;	/* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
+    __u32 sd_blocks;
+    union {
+	__u32 sd_rdev;
+      //__u32 sd_first_direct_byte; 
+      /* first byte of file which is stored in a
+				       direct item: except that if it equals 1
+				       it is a symlink and if it equals
+				       ~(__u32)0 there is no direct item.  The
+				       existence of this field really grates
+				       on me. Let's replace it with a macro
+				       based on sd_size and our tail
+				       suppression policy? */
+  } __attribute__ ((__packed__)) u;
+} __attribute__ ((__packed__));
+//
+// this is 40 bytes long
+//
+#define SD_SIZE (sizeof(struct stat_data))
+
+#define stat_data_v1(ih) (ih_version (ih) == ITEM_VERSION_1)
+
+
+/***************************************************************************/
+/*                      DIRECTORY STRUCTURE                                */
+/***************************************************************************/
+/* 
+   Picture represents the structure of directory items
+   ________________________________________________
+   |  Array of     |   |     |        |       |   |
+   | directory     |N-1| N-2 | ....   |   1st |0th|
+   | entry headers |   |     |        |       |   |
+   |_______________|___|_____|________|_______|___|
+                    <----   directory entries         ------>
+
+ First directory item has k_offset component 1. We store "." and ".."
+ in one item, always, we never split "." and ".." into differing
+ items.  This makes, among other things, the code for removing
+ directories simpler. */
+#define SD_OFFSET  0
+#define SD_UNIQUENESS 0
+#define DOT_OFFSET 1
+#define DOT_DOT_OFFSET 2
+#define DIRENTRY_UNIQUENESS 500
+
+/* */
+#define FIRST_ITEM_OFFSET 1
+
+/*
+   Q: How to get key of object pointed to by entry from entry?  
+
+   A: Each directory entry has its header. This header has deh_dir_id and deh_objectid fields, those are key
+      of object, entry points to */
+
+/* NOT IMPLEMENTED:   
+   Directory will someday contain stat data of object */
+
+
+
+struct reiserfs_de_head
+{
+  __u32 deh_offset;		/* third component of the directory entry key */
+  __u32 deh_dir_id;		/* objectid of the parent directory of the object, that is referenced
+					   by directory entry */
+  __u32 deh_objectid;		/* objectid of the object, that is referenced by directory entry */
+  __u16 deh_location;		/* offset of name in the whole item */
+  __u16 deh_state;		/* whether 1) entry contains stat data (for future), and 2) whether
+					   entry is hidden (unlinked) */
+} __attribute__ ((__packed__));
+#define DEH_SIZE sizeof(struct reiserfs_de_head)
+
+/* empty directory contains two entries "." and ".." and their headers */
+#define EMPTY_DIR_SIZE \
+(DEH_SIZE * 2 + ROUND_UP (strlen (".")) + ROUND_UP (strlen ("..")))
+
+/* old format directories have this size when empty */
+#define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3)
+
+#define DEH_Statdata 0			/* not used now */
+#define DEH_Visible 2
+
+/* bitops which deals with unaligned addrs; 
+   needed for alpha port. --zam */
+#ifdef __alpha__
+#   define ADDR_UNALIGNED_BITS  (5)
+#endif
+
+#ifdef ADDR_UNALIGNED_BITS
+
+#   define aligned_address(addr)           ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1)))
+#   define unaligned_offset(addr)          (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3)
+
+#   define set_bit_unaligned(nr, addr)     set_bit((nr) + unaligned_offset(addr), aligned_address(addr))
+#   define clear_bit_unaligned(nr, addr)   clear_bit((nr) + unaligned_offset(addr), aligned_address(addr))
+#   define test_bit_unaligned(nr, addr)    test_bit((nr) + unaligned_offset(addr), aligned_address(addr))
+
+#else
+
+#   define set_bit_unaligned(nr, addr)     set_bit(nr, addr)
+#   define clear_bit_unaligned(nr, addr)   clear_bit(nr, addr)
+#   define test_bit_unaligned(nr, addr)    test_bit(nr, addr)
+
+#endif
+
+#define deh_dir_id(deh) (__le32_to_cpu ((deh)->deh_dir_id))
+#define deh_objectid(deh) (__le32_to_cpu ((deh)->deh_objectid))
+#define deh_offset(deh) (__le32_to_cpu ((deh)->deh_offset))
+
+
+#define mark_de_with_sd(deh)        set_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
+#define mark_de_without_sd(deh)     clear_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
+#define mark_de_visible(deh)	    set_bit_unaligned (DEH_Visible, &((deh)->deh_state))
+#define mark_de_hidden(deh)	    clear_bit_unaligned (DEH_Visible, &((deh)->deh_state))
+
+#define de_with_sd(deh)		    test_bit_unaligned (DEH_Statdata, &((deh)->deh_state))
+#define de_visible(deh)	    	    test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
+#define de_hidden(deh)	    	    !test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
+
+/* compose directory item containing "." and ".." entries (entries are
+   not aligned to 4 byte boundary) */
+extern inline void make_empty_dir_item_v1 (char * body, __u32 dirid, __u32 objid,
+					   __u32 par_dirid, __u32 par_objid)
+{
+    struct reiserfs_de_head * deh;
+
+    memset (body, 0, EMPTY_DIR_SIZE_V1);
+    deh = (struct reiserfs_de_head *)body;
+    
+    /* direntry header of "." */
+    deh[0].deh_offset = cpu_to_le32 (DOT_OFFSET);
+    deh[0].deh_dir_id = cpu_to_le32 (dirid);
+    deh[0].deh_objectid = cpu_to_le32 (objid);
+    deh[0].deh_location = cpu_to_le16 (EMPTY_DIR_SIZE_V1 - strlen ("."));
+    deh[0].deh_state = 0;
+    mark_de_visible(&(deh[0]));
+  
+    /* direntry header of ".." */
+    deh[1].deh_offset = cpu_to_le32 (DOT_DOT_OFFSET);
+    /* key of ".." for the root directory */
+    deh[1].deh_dir_id = cpu_to_le32 (par_dirid);
+    deh[1].deh_objectid = cpu_to_le32 (par_objid);
+    deh[1].deh_location = cpu_to_le16 (le16_to_cpu (deh[0].deh_location) - strlen (".."));
+    deh[1].deh_state = 0;
+    mark_de_visible(&(deh[1]));
+
+    /* copy ".." and "." */
+    memcpy (body + deh[0].deh_location, ".", 1);
+    memcpy (body + deh[1].deh_location, "..", 2);
+}
+
+/* compose directory item containing "." and ".." entries */
+extern inline void make_empty_dir_item (char * body, __u32 dirid, __u32 objid,
+					__u32 par_dirid, __u32 par_objid)
+{
+    struct reiserfs_de_head * deh;
+
+    memset (body, 0, EMPTY_DIR_SIZE);
+    deh = (struct reiserfs_de_head *)body;
+    
+    /* direntry header of "." */
+    deh[0].deh_offset = cpu_to_le32 (DOT_OFFSET);
+    deh[0].deh_dir_id = cpu_to_le32 (dirid);
+    deh[0].deh_objectid = cpu_to_le32 (objid);
+    deh[0].deh_location = cpu_to_le16 (EMPTY_DIR_SIZE - ROUND_UP (strlen (".")));
+    deh[0].deh_state = 0;
+    mark_de_visible(&(deh[0]));
+  
+    /* direntry header of ".." */
+    deh[1].deh_offset = cpu_to_le32 (DOT_DOT_OFFSET);
+    /* key of ".." for the root directory */
+    deh[1].deh_dir_id = cpu_to_le32 (par_dirid);
+    deh[1].deh_objectid = cpu_to_le32 (par_objid);
+    deh[1].deh_location = cpu_to_le16 (le16_to_cpu (deh[0].deh_location) - ROUND_UP (strlen ("..")));
+    deh[1].deh_state = 0;
+    mark_de_visible(&(deh[1]));
+
+    /* copy ".." and "." */
+    memcpy (body + deh[0].deh_location, ".", 1);
+    memcpy (body + deh[1].deh_location, "..", 2);
+}
+
+
+/* array of the entry headers */
+ /* get item body */
+#define B_I_PITEM(bh,ih) ( (bh)->b_data + (ih)->ih_item_location )
+#define B_I_DEH(bh,ih) ((struct reiserfs_de_head *)(B_I_PITEM(bh,ih)))
+
+/* length of the directory entry in directory item. This define
+   calculates length of i-th directory entry using directory entry
+   locations from dir entry head. When it calculates length of 0-th
+   directory entry, it uses length of whole item in place of entry
+   location of the non-existent following entry in the calculation.
+   See picture above.*/
+/*
+#define I_DEH_N_ENTRY_LENGTH(ih,deh,i) \
+((i) ? (((deh)-1)->deh_location - (deh)->deh_location) : ((ih)->ih_item_len) - (deh)->deh_location)
+*/
+extern inline int entry_length (struct buffer_head * bh, struct item_head * ih,
+				int pos_in_item)
+{
+    struct reiserfs_de_head * deh;
+
+    deh = B_I_DEH (bh, ih) + pos_in_item;
+    if (pos_in_item)
+	return (le16_to_cpu ((deh - 1)->deh_location) - le16_to_cpu (deh->deh_location));
+    return (le16_to_cpu (ih->ih_item_len) - le16_to_cpu (deh->deh_location));
+}
+
+
+
+/* number of entries in the directory item, depends on ENTRY_COUNT being at the start of directory dynamic data. */
+#define I_ENTRY_COUNT(ih) ((ih)->u.ih_entry_count)
+
+
+/* name by bh, ih and entry_num */
+#define B_I_E_NAME(bh,ih,entry_num) ((char *)(bh->b_data + ih->ih_item_location + (B_I_DEH(bh,ih)+(entry_num))->deh_location))
+
+// two entries per block (at least)
+//#define REISERFS_MAX_NAME_LEN(block_size) 
+//((block_size - BLKH_SIZE - IH_SIZE - DEH_SIZE * 2) / 2)
+
+// two entries per block (at least)
+#define REISERFS_MAX_NAME_LEN(block_size) 255
+
+
+
+
+/* this structure is used for operations on directory entries. It is
+   not a disk structure. */
+/* When reiserfs_find_entry or search_by_entry_key find directory
+   entry, they return filled reiserfs_dir_entry structure */
+struct reiserfs_dir_entry
+{
+  struct buffer_head * de_bh;
+  int de_item_num;
+  struct item_head * de_ih;
+  int de_entry_num;
+  struct reiserfs_de_head * de_deh;
+  int de_entrylen;
+  int de_namelen;
+  char * de_name;
+  char * de_gen_number_bit_string;
+
+  __u32 de_dir_id;
+  __u32 de_objectid;
+
+  struct cpu_key de_entry_key;
+};
+   
+/* these defines are useful when a particular member of a reiserfs_dir_entry is needed */
+
+/* pointer to file name, stored in entry */
+#define B_I_DEH_ENTRY_FILE_NAME(bh,ih,deh) (B_I_PITEM (bh, ih) + (deh)->deh_location)
+
+/* length of name */
+#define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \
+(I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0))
+
+
+
+/* hash value occupies bits from 7 up to 30 */
+#define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL)
+/* generation number occupies 7 bits starting from 0 up to 6 */
+#define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL)
+#define MAX_GENERATION_NUMBER  127
+
+#define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number))
+
+
+/*
+ * Picture represents an internal node of the reiserfs tree
+ *  ______________________________________________________
+ * |      |  Array of     |  Array of         |  Free     |
+ * |block |    keys       |  pointers         | space     |
+ * | head |      N        |      N+1          |           |
+ * |______|_______________|___________________|___________|
+ */
+
+/***************************************************************************/
+/*                      DISK CHILD                                         */
+/***************************************************************************/
+/* Disk child pointer: The pointer from an internal node of the tree
+   to a node that is on disk. */
+struct disk_child {
+  __u32       dc_block_number;              /* Disk child's block number. */
+  __u16       dc_size;		            /* Disk child's used space.   */
+  __u16       dc_reserved;
+};
+
+#define DC_SIZE (sizeof(struct disk_child))
+
+/* Get disk child by buffer header and position in the tree node. */
+#define B_N_CHILD(p_s_bh,n_pos)  ((struct disk_child *)\
+((p_s_bh)->b_data+BLKH_SIZE+B_NR_ITEMS(p_s_bh)*KEY_SIZE+DC_SIZE*(n_pos)))
+
+/* Get disk child number by buffer header and position in the tree node. */
+#define B_N_CHILD_NUM(p_s_bh,n_pos) (le32_to_cpu (B_N_CHILD(p_s_bh,n_pos)->dc_block_number))
+#define PUT_B_N_CHILD_NUM(p_s_bh,n_pos, val) do { B_N_CHILD(p_s_bh,n_pos)->dc_block_number = cpu_to_le32(val); } while (0)
+
+ /* maximal value of field child_size in structure disk_child */ 
+ /* child size is the combined size of all items and their headers */
+#define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE ))
+
+/* amount of used space in buffer (not including block head) */
+#define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur)))
+
+/* max and min number of keys in internal node */
+#define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) )
+#define MIN_NR_KEY(bh)    (MAX_NR_KEY(bh)/2)
+
+/***************************************************************************/
+/*                      PATH STRUCTURES AND DEFINES                        */
+/***************************************************************************/
+
+
+/* Search_by_key fills up the path from the root to the leaf as it descends the tree looking for the
+   key.  It uses reiserfs_bread to try to find buffers in the cache given their block number.  If it
+   does not find them in the cache it reads them from disk.  For each node search_by_key finds using
+   reiserfs_bread it then uses bin_search to look through that node.  bin_search will find the
+   position of the block_number of the next node if it is looking through an internal node.  If it
+   is looking through a leaf node bin_search will find the position of the item which has key either
+   equal to given key, or which is the maximal key less than the given key. */
+
+struct  path_element  {
+  struct buffer_head *	pe_buffer;    /* Pointer to the buffer at the path in the tree. */
+  int         		pe_position;  /* Position in the tree node which is placed in the */
+                                      /* buffer above.                                  */
+};
+
+#define MAX_HEIGHT 5 /* maximal height of a tree. don't change this without changing JOURNAL_PER_BALANCE_CNT */
+#define EXTENDED_MAX_HEIGHT         7 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */
+#define FIRST_PATH_ELEMENT_OFFSET   2 /* Must be equal to at least 2. */
+
+#define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */
+#define MAX_FEB_SIZE 6   /* this MUST be MAX_HEIGHT + 1. See about FEB below */
+
+
+
+/* We need to keep track of who the ancestors of nodes are.  When we
+   perform a search we record which nodes were visited while
+   descending the tree looking for the node we searched for. This list
+   of nodes is called the path.  This information is used while
+   performing balancing.  Note that this path information may become
+   invalid, and this means we must check it when using it to see if it
+   is still valid. You'll need to read search_by_key and the comments
+   in it, especially about decrement_counters_in_path(), to understand
+   this structure.  
+
+Paths make the code so much harder to work with and debug.... An
+enormous number of bugs are due to them, and trying to write or modify
+code that uses them just makes my head hurt.  They are based on an
+excessive effort to avoid disturbing the precious VFS code.:-( The
+gods only know how we are going to SMP the code that uses them.
+znodes are the way! */
+
+
+struct  path {
+  int                   path_length;                      	/* Length of the array above.   */
+  struct  path_element  path_elements[EXTENDED_MAX_HEIGHT];	/* Array of the path elements.  */
+  int			pos_in_item;
+};
+
+#define pos_in_item(path) ((path)->pos_in_item)
+
+#define INITIALIZE_PATH(var) \
+struct path var = {ILLEGAL_PATH_ELEMENT_OFFSET, }
+
+/* Get path element by path and path position. */
+#define PATH_OFFSET_PELEMENT(p_s_path,n_offset)  ((p_s_path)->path_elements +(n_offset))
+
+/* Get buffer header at the path by path and path position. */
+#define PATH_OFFSET_PBUFFER(p_s_path,n_offset)   (PATH_OFFSET_PELEMENT(p_s_path,n_offset)->pe_buffer)
+
+/* Get position in the element at the path by path and path position. */
+#define PATH_OFFSET_POSITION(p_s_path,n_offset) (PATH_OFFSET_PELEMENT(p_s_path,n_offset)->pe_position)
+
+
+#define PATH_PLAST_BUFFER(p_s_path) (PATH_OFFSET_PBUFFER((p_s_path), (p_s_path)->path_length))
+				/* you know, to the person who didn't
+                                   write this the macro name does not
+                                   at first suggest what it does.
+                                   Maybe POSITION_FROM_PATH_END? Or
+                                   maybe we should just focus on
+                                   dumping paths... -Hans */
+#define PATH_LAST_POSITION(p_s_path) (PATH_OFFSET_POSITION((p_s_path), (p_s_path)->path_length))
+
+
+#define PATH_PITEM_HEAD(p_s_path)    B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_path),PATH_LAST_POSITION(p_s_path))
+
+/* in do_balance leaf has h == 0 in contrast with path structure,
+   where root has level == 0. That is why we need these defines */
+#define PATH_H_PBUFFER(p_s_path, h) PATH_OFFSET_PBUFFER (p_s_path, p_s_path->path_length - (h))	/* tb->S[h] */
+#define PATH_H_PPARENT(path, h) PATH_H_PBUFFER (path, (h) + 1)			/* tb->F[h] or tb->S[0]->b_parent */
+#define PATH_H_POSITION(path, h) PATH_OFFSET_POSITION (path, path->path_length - (h))	
+#define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1)		/* tb->S[h]->b_item_order */
+
+#define PATH_H_PATH_OFFSET(p_s_path, n_h) ((p_s_path)->path_length - (n_h))
+
+#define get_bh(path) PATH_PLAST_BUFFER(path)
+#define get_ih(path) PATH_PITEM_HEAD(path)
+#define get_item_pos(path) PATH_LAST_POSITION(path)
+#define get_item(path) ((void *)B_N_PITEM(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION (path)))
+#define item_moved(ih,path) comp_items(ih, path)
+#define path_changed(ih,path) comp_items (ih, path)
+
+
+/***************************************************************************/
+/*                       MISC                                              */
+/***************************************************************************/
+
+/* Size of pointer to the unformatted node. */
+#define UNFM_P_SIZE (sizeof(unp_t))
+
+// in in-core inode key is stored on le form
+#define INODE_PKEY(inode) ((struct key *)((inode)->u.reiserfs_i.i_key))
+//#define mark_tail_converted(inode) (atomic_set(&((inode)->u.reiserfs_i.i_converted),1))
+//#define unmark_tail_converted(inode) (atomic_set(&((inode)->u.reiserfs_i.i_converted), 0))
+//#define is_tail_converted(inode) (atomic_read(&((inode)->u.reiserfs_i.i_converted)))
+
+
+
+#define MAX_UL_INT 0xffffffff
+#define MAX_INT    0x7ffffff
+#define MAX_US_INT 0xffff
+
+///#define TOO_LONG_LENGTH		(~0ULL)
+
+// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset
+#define U32_MAX (~(__u32)0)
+extern inline loff_t max_reiserfs_offset (struct inode * inode)
+{
+    if (inode_items_version (inode) == ITEM_VERSION_1)
+	return (loff_t)U32_MAX;
+
+    return (loff_t)((~(__u64)0) >> 4);
+}
+
+
+/*#define MAX_KEY_UNIQUENESS	MAX_UL_INT*/
+#define MAX_KEY_OBJECTID	MAX_UL_INT
+
+
+#define MAX_B_NUM  MAX_UL_INT
+#define MAX_FC_NUM MAX_US_INT
+
+
+/* the purpose is to detect overflow of an unsigned short */
+#define REISERFS_LINK_MAX (MAX_US_INT - 1000)
+
+
+/* The following defines are used in reiserfs_insert_item and reiserfs_append_item  */
+#define REISERFS_KERNEL_MEM		0	/* reiserfs kernel memory mode	*/
+#define REISERFS_USER_MEM		1	/* reiserfs user memory mode		*/
+
+#define fs_generation(s) ((s)->u.reiserfs_sb.s_generation_counter)
+#define get_generation(s) atomic_read (&fs_generation(s))
+#define FILESYSTEM_CHANGED_TB(tb)  (get_generation((tb)->tb_sb) != (tb)->fs_gen)
+#define fs_changed(gen,s) (gen != get_generation (s))
+
+
+/***************************************************************************/
+/*                  FIXATE NODES                                           */
+/***************************************************************************/
+
+//#define VI_TYPE_STAT_DATA 1
+//#define VI_TYPE_DIRECT 2
+//#define VI_TYPE_INDIRECT 4
+//#define VI_TYPE_DIRECTORY 8
+//#define VI_TYPE_FIRST_DIRECTORY_ITEM 16
+//#define VI_TYPE_INSERTED_DIRECTORY_ITEM 32
+
+#define VI_TYPE_LEFT_MERGEABLE 1
+#define VI_TYPE_RIGHT_MERGEABLE 2
+
+/* To make any changes in the tree we always first find node, that
+   contains item to be changed/deleted or place to insert a new
+   item. We call this node S. To do balancing we need to decide what
+   we will shift to left/right neighbor, or to a new node, where new
+   item will be etc. To make this analysis simpler we build virtual
+   node. Virtual node is an array of items, that will replace items of
+   node S. (For instance if we are going to delete an item, virtual
+   node does not contain it). Virtual node keeps information about
+   item sizes and types, mergeability of first and last items, sizes
+   of all entries in directory item. We use this array of items when
+   calculating what we can shift to neighbors and how many nodes we
+   have to have if we do not any shiftings, if we shift to left/right
+   neighbor or to both. */
+struct virtual_item
+{
+    int vi_index; // index in the array of item operations
+    unsigned short vi_type;	// left/right mergeability
+    unsigned short vi_item_len;           /* length of item that it will have after balancing */
+    struct item_head * vi_ih;
+    const char * vi_item;     // body of item (old or new)
+    const void * vi_new_data; // 0 always but paste mode
+    void * vi_uarea;    // item specific area
+};
+
+
+struct virtual_node
+{
+  char * vn_free_ptr;		/* this is a pointer to the free space in the buffer */
+  unsigned short vn_nr_item;	/* number of items in virtual node */
+  short vn_size;        	/* size of node , that node would have if it has unlimited size and no balancing is performed */
+  short vn_mode;		/* mode of balancing (paste, insert, delete, cut) */
+  short vn_affected_item_num; 
+  short vn_pos_in_item;
+  struct item_head * vn_ins_ih;	/* item header of inserted item, 0 for other modes */
+  const void * vn_data;
+  struct virtual_item * vn_vi;	/* array of items (including a new one, excluding item to be deleted) */
+};
+
+
+/***************************************************************************/
+/*                  TREE BALANCE                                           */
+/***************************************************************************/
+
+/* This temporary structure is used in tree balance algorithms, and
+   constructed as we go to the extent that its various parts are
+   needed.  It contains arrays of nodes that can potentially be
+   involved in the balancing of node S, and parameters that define how
+   each of the nodes must be balanced.  Note that in these algorithms
+   for balancing the worst case is to need to balance the current node
+   S and the left and right neighbors and all of their parents plus
+   create a new node.  We implement S1 balancing for the leaf nodes
+   and S0 balancing for the internal nodes (S1 and S0 are defined in
+   our papers.)*/
+
+#define MAX_FREE_BLOCK 7	/* size of the array of buffers to free at end of do_balance */
+
+/* maximum number of FEB blocknrs on a single level */
+#define MAX_AMOUNT_NEEDED 2
+
+/* someday somebody will prefix every field in this struct with tb_ */
+struct tree_balance
+{
+  int tb_mode;
+  int need_balance_dirty;
+  struct super_block * tb_sb;
+  struct reiserfs_transaction_handle *transaction_handle ;
+  struct path * tb_path;
+  struct buffer_head * L[MAX_HEIGHT];        /* array of left neighbors of nodes in the path */
+  struct buffer_head * R[MAX_HEIGHT];        /* array of right neighbors of nodes in the path*/
+  struct buffer_head * FL[MAX_HEIGHT];       /* array of fathers of the left  neighbors      */
+  struct buffer_head * FR[MAX_HEIGHT];       /* array of fathers of the right neighbors      */
+  struct buffer_head * CFL[MAX_HEIGHT];      /* array of common parents of center node and its left neighbor  */
+  struct buffer_head * CFR[MAX_HEIGHT];      /* array of common parents of center node and its right neighbor */
+
+  struct buffer_head * FEB[MAX_FEB_SIZE]; /* array of empty buffers. Number of buffers in array equals
+					     cur_blknum. */
+  struct buffer_head * used[MAX_FEB_SIZE];
+  struct buffer_head * thrown[MAX_FEB_SIZE];
+  int lnum[MAX_HEIGHT];	/* array of number of items which must be
+			   shifted to the left in order to balance the
+			   current node; for leaves includes item that
+			   will be partially shifted; for internal
+			   nodes, it is the number of child pointers
+			   rather than items. It includes the new item
+			   being created. The code sometimes subtracts
+			   one to get the number of wholly shifted
+			   items for other purposes. */
+  int rnum[MAX_HEIGHT];	/* substitute right for left in comment above */
+  int lkey[MAX_HEIGHT];               /* array indexed by height h mapping the key delimiting L[h] and
+					       S[h] to its item number within the node CFL[h] */
+  int rkey[MAX_HEIGHT];               /* substitute r for l in comment above */
+  int insert_size[MAX_HEIGHT];        /* the number of bytes by we are trying to add or remove from
+					       S[h]. A negative value means removing.  */
+  int blknum[MAX_HEIGHT];             /* number of nodes that will replace node S[h] after
+					       balancing on the level h of the tree.  If 0 then S is
+					       being deleted, if 1 then S is remaining and no new nodes
+					       are being created, if 2 or 3 then 1 or 2 new nodes is
+					       being created */
+
+  /* fields that are used only for balancing leaves of the tree */
+  int cur_blknum;	/* number of empty blocks having been already allocated			*/
+  int s0num;             /* number of items that fall into left most  node when S[0] splits	*/
+  int s1num;             /* number of items that fall into first  new node when S[0] splits	*/
+  int s2num;             /* number of items that fall into second new node when S[0] splits	*/
+  int lbytes;            /* number of bytes which can flow to the left neighbor from the	left	*/
+  /* most liquid item that cannot be shifted from S[0] entirely		*/
+  /* if -1 then nothing will be partially shifted */
+  int rbytes;            /* number of bytes which will flow to the right neighbor from the right	*/
+  /* most liquid item that cannot be shifted from S[0] entirely		*/
+  /* if -1 then nothing will be partially shifted                           */
+  int s1bytes;		/* number of bytes which flow to the first  new node when S[0] splits	*/
+            			/* note: if S[0] splits into 3 nodes, then items do not need to be cut	*/
+  int s2bytes;
+  struct buffer_head * buf_to_free[MAX_FREE_BLOCK]; /* buffers which are to be freed after do_balance finishes by unfix_nodes */
+  char * vn_buf;		/* kmalloced memory. Used to create
+				   virtual node and keep map of
+				   dirtied bitmap blocks */
+  int vn_buf_size;		/* size of the vn_buf */
+  struct virtual_node * tb_vn;	/* VN starts after bitmap of bitmap blocks */
+
+  int fs_gen;                  /* saved value of `reiserfs_generation' counter
+			          see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */
+} ;
+
+
+#if 0
+				/* when balancing we potentially affect a 3 node wide column of nodes
+                                   in the tree (the top of the column may be tapered). C is the nodes
+                                   at the center of this column, and L and R are the nodes to the
+                                   left and right.  */
+  struct seal * L_path_seals[MAX_HEIGHT];
+  struct seal * C_path_seals[MAX_HEIGHT];
+  struct seal * R_path_seals[MAX_HEIGHT];
+  char L_path_lock_types[MAX_HEIGHT];   /* 'r', 'w', or 'n' for read, write, or none */
+  char C_path_lock_types[MAX_HEIGHT];
+  char R_path_lock_types[MAX_HEIGHT];
+
+
+  struct seal_list_elem * C_seal[MAX_HEIGHT];        /* array of seals on nodes in the path */
+  struct seal_list_elem * L_seal[MAX_HEIGHT];        /* array of seals on left neighbors of nodes in the path */
+  struct seal_list_elem * R_seal[MAX_HEIGHT];        /* array of seals on right neighbors of nodes in the path*/
+  struct seal_list_elem * FL_seal[MAX_HEIGHT];       /* array of seals on fathers of the left  neighbors      */
+  struct seal_list_elem * FR_seal[MAX_HEIGHT];       /* array of seals on fathers of the right neighbors      */
+  struct seal_list_elem * CFL_seal[MAX_HEIGHT];      /* array of seals on common parents of center node and its left neighbor  */
+  struct seal_list_elem * CFR_seal[MAX_HEIGHT];      /* array of seals on common parents of center node and its right neighbor */
+ 
+  struct char C_desired_lock_type[MAX_HEIGHT]; /* 'r', 'w', or 'n' for read, write, or none */
+  struct char L_desired_lock_type[MAX_HEIGHT];        
+  struct char R_desired_lock_type[MAX_HEIGHT];        
+  struct char FL_desired_lock_type[MAX_HEIGHT];       
+  struct char FR_desired_lock_type[MAX_HEIGHT];       
+  struct char CFL_desired_lock_type[MAX_HEIGHT];      
+  struct char CFR_desired_lock_type[MAX_HEIGHT];      
+#endif
+
+
+
+
+
+/* These are modes of balancing */
+
+/* When inserting an item. */
+#define M_INSERT	'i'
+/* When inserting into (directories only) or appending onto an already
+   existant item. */
+#define M_PASTE		'p'
+/* When deleting an item. */
+#define M_DELETE	'd'
+/* When truncating an item or removing an entry from a (directory) item. */
+#define M_CUT 		'c'
+
+/* used when balancing on leaf level skipped (in reiserfsck) */
+#define M_INTERNAL	'n'
+
+/* When further balancing is not needed, then do_balance does not need
+   to be called. */
+#define M_SKIP_BALANCING 		's'
+#define M_CONVERT	'v'
+
+/* modes of leaf_move_items */
+#define LEAF_FROM_S_TO_L 0
+#define LEAF_FROM_S_TO_R 1
+#define LEAF_FROM_R_TO_L 2
+#define LEAF_FROM_L_TO_R 3
+#define LEAF_FROM_S_TO_SNEW 4
+
+#define FIRST_TO_LAST 0
+#define LAST_TO_FIRST 1
+
+/* used in do_balance for passing parent of node information that has
+   been gotten from tb struct */
+struct buffer_info {
+    struct tree_balance * tb;
+    struct buffer_head * bi_bh;
+    struct buffer_head * bi_parent;
+    int bi_position;
+};
+
+
+/* there are 4 types of items: stat data, directory item, indirect, direct.
++-------------------+------------+--------------+------------+
+|	            |  k_offset  | k_uniqueness | mergeable? |
++-------------------+------------+--------------+------------+
+|     stat data     |	0        |      0       |   no       |
++-------------------+------------+--------------+------------+
+| 1st directory item| DOT_OFFSET |DIRENTRY_UNIQUENESS|   no       | 
+| non 1st directory | hash value |              |   yes      |
+|     item          |            |              |            |
++-------------------+------------+--------------+------------+
+| indirect item     | offset + 1 |TYPE_INDIRECT |   if this is not the first indirect item of the object
++-------------------+------------+--------------+------------+
+| direct item       | offset + 1 |TYPE_DIRECT   | if not this is not the first direct item of the object
++-------------------+------------+--------------+------------+
+*/
+
+struct item_operations {
+    int (*bytes_number) (struct item_head * ih, int block_size);
+    void (*decrement_key) (struct cpu_key *);
+    int (*is_left_mergeable) (struct key * ih, unsigned long bsize);
+    void (*print_item) (struct item_head *, char * item);
+    void (*check_item) (struct item_head *, char * item);
+
+    int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi, 
+		      int is_affected, int insert_size);
+    int (*check_left) (struct virtual_item * vi, int free, 
+			    int start_skip, int end_skip);
+    int (*check_right) (struct virtual_item * vi, int free);
+    int (*part_size) (struct virtual_item * vi, int from, int to);
+    int (*unit_num) (struct virtual_item * vi);
+    void (*print_vi) (struct virtual_item * vi);
+};
+
+
+extern struct item_operations stat_data_ops, indirect_ops, direct_ops, 
+  direntry_ops;
+extern struct item_operations * item_ops [4];
+
+#define op_bytes_number(ih,bsize)                    item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize)
+#define op_is_left_mergeable(key,bsize)              item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize)
+#define op_print_item(ih,item)                       item_ops[le_ih_k_type (ih)]->print_item (ih, item)
+#define op_check_item(ih,item)                       item_ops[le_ih_k_type (ih)]->check_item (ih, item)
+#define op_create_vi(vn,vi,is_affected,insert_size)  item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size)
+#define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip)
+#define op_check_right(vi,free)                      item_ops[(vi)->vi_index]->check_right (vi, free)
+#define op_part_size(vi,from,to)                     item_ops[(vi)->vi_index]->part_size (vi, from, to)
+#define op_unit_num(vi)				     item_ops[(vi)->vi_index]->unit_num (vi)
+#define op_print_vi(vi)                              item_ops[(vi)->vi_index]->print_vi (vi)
+
+
+
+
+
+#define COMP_KEYS comp_keys
+#define COMP_SHORT_KEYS comp_short_keys
+#define keys_of_same_object comp_short_keys
+
+/*#define COMP_KEYS(p_s_key1, p_s_key2)		comp_keys((unsigned long *)(p_s_key1), (unsigned long *)(p_s_key2))
+#define COMP_SHORT_KEYS(p_s_key1, p_s_key2)	comp_short_keys((unsigned long *)(p_s_key1), (unsigned long *)(p_s_key2))*/
+
+
+/* number of blocks pointed to by the indirect item */
+#define I_UNFM_NUM(p_s_ih)	( (p_s_ih)->ih_item_len / UNFM_P_SIZE )
+
+/* the used space within the unformatted node corresponding to pos within the item pointed to by ih */
+#define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - (ih)->u.ih_free_space : (size))
+
+/* number of bytes contained by the direct item or the unformatted nodes the indirect item points to */
+
+
+/* get the item header */ 
+#define B_N_PITEM_HEAD(bh,item_num) ( (struct item_head * )((bh)->b_data + BLKH_SIZE) + (item_num) )
+
+/* get key */
+#define B_N_PDELIM_KEY(bh,item_num) ( (struct key * )((bh)->b_data + BLKH_SIZE) + (item_num) )
+
+/* get the key */
+#define B_N_PKEY(bh,item_num) ( &(B_N_PITEM_HEAD(bh,item_num)->ih_key) )
+
+/* get item body */
+#define B_N_PITEM(bh,item_num) ( (bh)->b_data + B_N_PITEM_HEAD((bh),(item_num))->ih_item_location)
+
+/* get the stat data by the buffer header and the item order */
+#define B_N_STAT_DATA(bh,nr) \
+( (struct stat_data *)((bh)->b_data+B_N_PITEM_HEAD((bh),(nr))->ih_item_location ) )
+
+                 /* following defines use reiserfs buffer header and item header */
+
+/* get stat-data */
+#define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + (ih)->ih_item_location) )
+
+// this is 3976 for size==4096
+#define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE)
+
+/* indirect items consist of entries which contain blocknrs, pos
+   indicates which entry, and B_I_POS_UNFM_POINTER resolves to the
+   blocknr contained by the entry pos points to */
+#define B_I_POS_UNFM_POINTER(bh,ih,pos) (*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)))
+#define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0)
+
+/* Reiserfs buffer cache statistics. */
+#ifdef REISERFS_CACHE_STAT
+ struct reiserfs_cache_stat
+	{
+  	int nr_reiserfs_ll_r_block; 		/* Number of block reads. */
+  	int nr_reiserfs_ll_w_block; 		/* Number of block writes. */
+	int nr_reiserfs_schedule; 		/* Number of locked buffers waits. */
+	unsigned long nr_reiserfs_bread;	/* Number of calls to reiserfs_bread function */
+	unsigned long nr_returns; /* Number of breads of buffers that were hoped to contain a key but did not after bread completed
+				     (usually due to object shifting while bread was executing.)
+				     In the code this manifests as the number
+				     of times that the repeat variable is nonzero in search_by_key.*/
+	unsigned long nr_fixed;		/* number of calls of fix_nodes function */
+	unsigned long nr_failed;	/* number of calls of fix_nodes in which schedule occurred while the function worked */
+	unsigned long nr_find1;		/* How many times we access a child buffer using its direct pointer from an internal node.*/
+	unsigned long nr_find2;	        /* Number of times there is neither a direct pointer to
+					   nor any entry in the child list pointing to the buffer. */
+	unsigned long nr_find3;	        /* When parent is locked (meaning that there are no direct pointers)
+					   or parent is leaf and buffer to be found is an unformatted node. */
+	}  cache_stat;
+#endif
+
+struct reiserfs_iget4_args {
+    __u32 objectid ;
+} ;
+
+/***************************************************************************/
+/*                    FUNCTION DECLARATIONS                                */
+/***************************************************************************/
+
+/*#ifdef __KERNEL__*/
+
+/* journal.c see journal.c for all the comments here */
+
+#define JOURNAL_TRANS_HALF 1018   /* must be correct to keep the desc and commit structs at 4k */
+
+
+/* first block written in a commit.  */
+struct reiserfs_journal_desc {
+  __u32 j_trans_id ;			/* id of commit */
+  __u32 j_len ;			/* length of commit. len +1 is the commit block */
+  __u32 j_mount_id ;				/* mount id of this trans*/
+  __u32 j_realblock[JOURNAL_TRANS_HALF] ; /* real locations for each block */
+  char j_magic[12] ;
+} ;
+
+/* last block written in a commit */
+struct reiserfs_journal_commit {
+  __u32 j_trans_id ;			/* must match j_trans_id from the desc block */
+  __u32 j_len ;			/* ditto */
+  __u32 j_realblock[JOURNAL_TRANS_HALF] ; /* real locations for each block */
+  char j_digest[16] ;			/* md5 sum of all the blocks involved, including desc and commit. not used, kill it */
+} ;
+
+/* this header block gets written whenever a transaction is considered fully flushed, and is more recent than the
+** last fully flushed transaction.  fully flushed means all the log blocks and all the real blocks are on disk,
+** and this transaction does not need to be replayed.
+*/
+struct reiserfs_journal_header {
+  __u32 j_last_flush_trans_id ;		/* id of last fully flushed transaction */
+  __u32 j_first_unflushed_offset ;      /* offset in the log of where to start replay after a crash */
+  __u32 j_mount_id ;
+} ;
+
+/* these are used to keep flush pages that contain converted direct items.
+** if the page is not flushed before the transaction that converted it
+** is committed, we risk losing data
+**
+** note, while a page is in this list, its counter is incremented.
+*/
+struct reiserfs_page_list {
+  struct reiserfs_page_list *next ;
+  struct reiserfs_page_list *prev ;
+  struct page *page ;
+  unsigned long blocknr ; /* block number holding converted data */
+
+  /* if a transaction writer has the page locked the flush_page_list
+  ** function doesn't need to (and can't) get the lock while flushing
+  ** the page.  do_not_lock needs to be set by anyone who calls journal_end
+  ** with a page lock held.  They have to look in the inode and see
+  ** if the inode has the page they have locked in the flush list.
+  **
+  ** this sucks.
+  */
+  int do_not_lock ; 
+} ;
+
+extern task_queue reiserfs_commit_thread_tq ;
+extern wait_queue_head_t reiserfs_commit_thread_wait ;
+
+/* biggest tunable defines are right here */
+#define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */
+#define JOURNAL_MAX_BATCH   900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */
+#define JOURNAL_MAX_COMMIT_AGE 30 
+#define JOURNAL_MAX_TRANS_AGE 30
+#define JOURNAL_PER_BALANCE_CNT 12   /* must be >= (5 + 2 * (MAX_HEIGHT-2) + 1) */
+
+/* both of these can be as low as 1, or as high as you want.  The min is the
+** number of 4k bitmap nodes preallocated on mount. New nodes are allocated
+** as needed, and released when transactions are committed.  On release, if 
+** the current number of nodes is > max, the node is freed, otherwise, 
+** it is put on a free list for faster use later.
+*/
+#define REISERFS_MIN_BITMAP_NODES 10 
+#define REISERFS_MAX_BITMAP_NODES 100 
+
+#define JBH_HASH_SHIFT 13 /* these are based on journal hash size of 8192 */
+#define JBH_HASH_MASK 8191
+
+/* After several hours of tedious analysis, the following hash
+ * function won.  Do not mess with it... -DaveM
+ */
+#define _jhashfn(dev,block)	\
+	((((dev)<<(JBH_HASH_SHIFT - 6)) ^ ((dev)<<(JBH_HASH_SHIFT - 9))) ^ \
+	 (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12))))
+#define journal_hash(t,dev,block) ((t)[_jhashfn((dev),(block)) & JBH_HASH_MASK])
+
+/* finds n'th buffer with 0 being the start of this commit.  Needs to go away, j_ap_blocks has changed
+** since I created this.  One chunk of code in journal.c needs changing before deleting it
+*/
+#define JOURNAL_BUFFER(j,n) ((j)->j_ap_blocks[((j)->j_start + (n)) % JOURNAL_BLOCK_COUNT])
+
+void reiserfs_wait_on_write_block(struct super_block *s) ;
+void reiserfs_block_writes(struct reiserfs_transaction_handle *th) ;
+void reiserfs_allow_writes(struct super_block *s) ;
+void reiserfs_check_lock_depth(char *caller) ;
+void reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, int wait) ;
+void reiserfs_restore_prepared_buffer(struct super_block *, struct buffer_head *bh) ;
+int journal_init(struct super_block *) ;
+int journal_release(struct reiserfs_transaction_handle*, struct super_block *) ;
+int journal_release_error(struct reiserfs_transaction_handle*, struct super_block *) ;
+int journal_end(struct reiserfs_transaction_handle *, struct super_block *, unsigned long) ;
+int journal_end_sync(struct reiserfs_transaction_handle *, struct super_block *, unsigned long) ;
+int journal_mark_dirty_nolog(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ;
+int journal_mark_freed(struct reiserfs_transaction_handle *, struct super_block *, unsigned long blocknr) ;
+int push_journal_writer(char *w) ;
+int pop_journal_writer(int windex) ;
+int journal_lock_dobalance(struct super_block *p_s_sb) ;
+int journal_unlock_dobalance(struct super_block *p_s_sb) ;
+int journal_transaction_should_end(struct reiserfs_transaction_handle *, int) ;
+int reiserfs_in_journal(struct super_block *p_s_sb, kdev_t dev, unsigned long bl, int size, int searchall, unsigned long *next) ;
+int journal_begin(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ;
+int journal_join(struct reiserfs_transaction_handle *, struct super_block *p_s_sb, unsigned long) ;
+struct super_block *reiserfs_get_super(kdev_t dev) ;
+void flush_async_commits(struct super_block *p_s_sb) ;
+
+int remove_from_transaction(struct super_block *p_s_sb, unsigned long blocknr, int already_cleaned) ;
+int remove_from_journal_list(struct super_block *s, struct reiserfs_journal_list *jl, struct buffer_head *bh, int remove_freed) ;
+
+int buffer_journaled(struct buffer_head *bh) ;
+int mark_buffer_journal_new(struct buffer_head *bh) ;
+int reiserfs_sync_all_buffers(kdev_t dev, int wait) ;
+int reiserfs_sync_buffers(kdev_t dev, int wait) ;
+int reiserfs_add_page_to_flush_list(struct reiserfs_transaction_handle *,
+                                    struct inode *, struct buffer_head *) ;
+int reiserfs_remove_page_from_flush_list(struct reiserfs_transaction_handle *,
+                                         struct inode *) ;
+
+int reiserfs_allocate_list_bitmaps(struct super_block *s, struct reiserfs_list_bitmap *, int) ;
+
+				/* why is this kerplunked right here? */
+static inline int reiserfs_buffer_prepared(struct buffer_head *bh) {
+  if (bh && test_bit(BH_JPrepared, &bh->b_state))
+    return 1 ;
+  else
+    return 0 ;
+}
+
+/* buffer was journaled, waiting to get to disk */
+static inline int buffer_journal_dirty(struct buffer_head *bh) {
+  if (bh)
+    return test_bit(BH_JDirty_wait, &bh->b_state) ;
+  else
+    return 0 ;
+}
+static inline int mark_buffer_notjournal_dirty(struct buffer_head *bh) {
+  if (bh)
+    clear_bit(BH_JDirty_wait, &bh->b_state) ;
+  return 0 ;
+}
+static inline int mark_buffer_notjournal_new(struct buffer_head *bh) {
+  if (bh) {
+    clear_bit(BH_JNew, &bh->b_state) ;
+  }
+  return 0 ;
+}
+
+/* objectid.c */
+__u32 reiserfs_get_unused_objectid (struct reiserfs_transaction_handle *th);
+void reiserfs_release_objectid (struct reiserfs_transaction_handle *th, __u32 objectid_to_release);
+int reiserfs_convert_objectid_map_v1(struct super_block *) ;
+
+/* stree.c */
+int B_IS_IN_TREE(struct buffer_head *);
+extern inline void copy_key (void * to, void * from);
+extern inline void copy_short_key (void * to, void * from);
+extern inline void copy_item_head(void * p_v_to, void * p_v_from);
+
+// first key is in cpu form, second - le
+extern inline int comp_keys (struct key * le_key, struct cpu_key * cpu_key);
+extern inline int  comp_short_keys (struct key * le_key, struct cpu_key * cpu_key);
+extern inline void le_key2cpu_key (struct cpu_key * to, struct key * from);
+
+// both are cpu keys
+extern inline int comp_cpu_keys (struct cpu_key *, struct cpu_key *);
+extern inline int comp_short_cpu_keys (struct cpu_key *, struct cpu_key *);
+extern inline void cpu_key2cpu_key (struct cpu_key *, struct cpu_key *);
+
+// both are in le form
+extern inline int comp_le_keys (struct key *, struct key *);
+extern inline int comp_short_le_keys (struct key *, struct key *);
+
+//
+// get key version from on disk key - kludge
+//
+extern inline int le_key_version (struct key * key)
+{
+    int type;
+    
+    type = le16_to_cpu (key->u.k_offset_v2.k_type);
+    if (type != TYPE_DIRECT && type != TYPE_INDIRECT && type != TYPE_DIRENTRY)
+	return ITEM_VERSION_1;
+
+    return ITEM_VERSION_2;
+	
+}
+
+
+extern inline void copy_key (void * to, void * from)
+{
+  memcpy (to, from, KEY_SIZE);
+}
+
+
+int comp_items (struct item_head  * p_s_ih, struct path * p_s_path);
+struct key * get_rkey (struct path * p_s_chk_path, struct super_block  * p_s_sb);
+inline int bin_search (void * p_v_key, void * p_v_base, int p_n_num, int p_n_width, int * p_n_pos);
+int search_by_key (struct super_block *, struct cpu_key *, struct path *, int);
+#define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL)
+int search_for_position_by_key (struct super_block * p_s_sb, struct cpu_key * p_s_cpu_key, struct path * p_s_search_path);
+extern inline void decrement_bcount (struct buffer_head * p_s_bh);
+void decrement_counters_in_path (struct path * p_s_search_path);
+void pathrelse (struct path * p_s_search_path);
+int reiserfs_check_path(struct path *p) ;
+void pathrelse_and_restore (struct super_block *s, struct path * p_s_search_path);
+
+int reiserfs_insert_item (struct reiserfs_transaction_handle *th, 
+			  struct path * path, 
+			  struct cpu_key * key,
+			  struct item_head * ih, const char * body);
+
+int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th,
+			      struct path * path,
+			      struct cpu_key * key,
+			      const char * body, int paste_size);
+
+int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th,
+			    struct path * path,
+			    struct cpu_key * key,
+			    struct inode * inode,
+			    struct page *page,
+			    loff_t new_file_size);
+
+int reiserfs_delete_item (struct reiserfs_transaction_handle *th,
+			  struct path * path, 
+			  struct cpu_key * key,
+			  struct inode * inode, 
+			  struct buffer_head  * p_s_un_bh);
+
+
+void reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * p_s_inode);
+void reiserfs_do_truncate (struct reiserfs_transaction_handle *th, 
+			   struct  inode * p_s_inode, struct page *, 
+			   int update_timestamps);
+//
+//void lock_inode_to_convert (struct inode * p_s_inode);
+//void unlock_inode_after_convert (struct inode * p_s_inode);
+//void increment_i_read_sync_counter (struct inode * p_s_inode);
+//void decrement_i_read_sync_counter (struct inode * p_s_inode);
+
+
+#define block_size(inode) ((inode)->i_sb->s_blocksize)
+#define file_size(inode) ((inode)->i_size)
+#define tail_size(inode) (file_size (inode) & (block_size (inode) - 1))
+
+#define tail_has_to_be_packed(inode) (!dont_have_tails ((inode)->i_sb) &&\
+!STORE_TAIL_IN_UNFM(file_size (inode), tail_size(inode), block_size (inode)))
+
+/*
+int get_buffer_by_range (struct super_block * p_s_sb, struct key * p_s_range_begin, struct key * p_s_range_end, 
+			 struct buffer_head ** pp_s_buf, unsigned long * p_n_objectid);
+int get_buffers_from_range (struct super_block * p_s_sb, struct key * p_s_range_start, struct key * p_s_range_end, 
+                            struct buffer_head ** p_s_range_buffers,
+			    int n_max_nr_buffers_to_return);
+*/
+
+#ifndef REISERFS_FSCK
+
+//inline int is_left_mergeable (struct item_head * ih, unsigned long bsize);
+
+#else
+
+int is_left_mergeable (struct super_block * s, struct path * path);
+int is_right_mergeable (struct super_block * s, struct path * path);
+int are_items_mergeable (struct item_head * left, struct item_head * right, int bsize);
+
+#endif
+void padd_item (char * item, int total_length, int length);
+
+
+/* inode.c */
+
+int reiserfs_prepare_write(struct file *, struct page *, unsigned, unsigned) ;
+void reiserfs_truncate_file(struct inode *, int update_timestamps) ;
+void make_cpu_key (struct cpu_key * cpu_key, const struct inode * inode, loff_t offset,
+		   int type, int key_length);
+void make_le_item_head (struct item_head * ih, struct cpu_key * key, int version,
+			       loff_t offset, int type, int length, int entry_count);
+/*void store_key (struct key * key);
+void forget_key (struct key * key);*/
+int reiserfs_get_block (struct inode * inode, long block,
+			struct buffer_head * bh_result, int create);
+struct inode * reiserfs_iget (struct super_block * s, struct cpu_key * key);
+void reiserfs_read_inode (struct inode * inode) ;
+void reiserfs_read_inode2(struct inode * inode, void *p) ;
+void reiserfs_delete_inode (struct inode * inode);
+extern int reiserfs_notify_change(struct dentry * dentry, struct iattr * attr);
+void reiserfs_write_inode (struct inode * inode, int) ;
+
+/* we don't mark inodes dirty, we just log them */
+void reiserfs_dirty_inode (struct inode * inode) ;
+
+struct inode * reiserfs_new_inode (struct reiserfs_transaction_handle *th, const struct inode * dir, int mode, 
+				   const char * symname, int item_len,
+				   struct dentry *dentry, struct inode *inode, int * err);
+int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode);
+void reiserfs_update_sd (struct reiserfs_transaction_handle *th, struct inode * inode);
+int reiserfs_inode_setattr(struct dentry *,  struct iattr * attr);
+
+/* namei.c */
+inline void set_de_name_and_namelen (struct reiserfs_dir_entry * de);
+int search_by_entry_key (struct super_block * sb, struct cpu_key * key, struct path * path, 
+			 struct reiserfs_dir_entry * de);
+struct dentry * reiserfs_lookup (struct inode * dir, struct dentry *dentry);
+int reiserfs_create (struct inode * dir, struct dentry *dentry,	int mode);
+int reiserfs_mknod (struct inode * dir_inode, struct dentry *dentry, int mode, int rdev);
+int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode);
+int reiserfs_rmdir (struct inode * dir,	struct dentry *dentry);
+int reiserfs_unlink (struct inode * dir, struct dentry *dentry);
+int reiserfs_symlink (struct inode * dir, struct dentry *dentry, const char * symname);
+int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct dentry *dentry);
+int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir, struct dentry *new_dentry);
+
+/* super.c */
+inline void reiserfs_mark_buffer_dirty (struct buffer_head * bh, int flag);
+inline void reiserfs_mark_buffer_clean (struct buffer_head * bh);
+void reiserfs_panic (struct super_block * s, const char * fmt, ...);
+void reiserfs_write_super (struct super_block * s);
+void reiserfs_put_super (struct super_block * s);
+int reiserfs_remount (struct super_block * s, int * flags, char * data);
+/*int read_super_block (struct super_block * s, int size);
+int read_bitmaps (struct super_block * s);
+int read_old_bitmaps (struct super_block * s);
+int read_old_super_block (struct super_block * s, int size);*/
+struct super_block * reiserfs_read_super (struct super_block * s, void * data, int silent);
+int reiserfs_statfs (struct super_block * s, struct statfs * buf);
+
+/* dir.c */
+extern struct inode_operations reiserfs_dir_inode_operations;
+extern struct file_operations reiserfs_dir_operations;
+
+/* tail_conversion.c */
+int direct2indirect (struct reiserfs_transaction_handle *, struct inode *, struct path *, struct buffer_head *, loff_t);
+int indirect2direct (struct reiserfs_transaction_handle *, struct inode *, struct page *, struct path *, struct cpu_key *, loff_t, char *);
+void reiserfs_unmap_buffer(struct buffer_head *) ;
+
+
+/* file.c */
+extern struct inode_operations reiserfs_file_inode_operations;
+extern struct file_operations reiserfs_file_operations;
+extern struct address_space_operations reiserfs_address_space_operations ;
+int get_new_buffer (struct reiserfs_transaction_handle *th, struct buffer_head *,
+		    struct buffer_head **, struct path *);
+
+
+/* buffer2.c */
+struct buffer_head * reiserfs_getblk (kdev_t n_dev, int n_block, int n_size);
+void wait_buffer_until_released (struct buffer_head * bh);
+struct buffer_head * reiserfs_bread (kdev_t n_dev, int n_block, int n_size);
+
+
+/* fix_nodes.c */
+void * reiserfs_kmalloc (size_t size, int flags, struct super_block * s);
+void reiserfs_kfree (const void * vp, size_t size, struct super_block * s);
+int fix_nodes (int n_op_mode, struct tree_balance * p_s_tb, struct item_head * p_s_ins_ih, const void *);
+void unfix_nodes (struct tree_balance *);
+void free_buffers_in_tb (struct tree_balance * p_s_tb);
+
+
+/* prints.c */
+void reiserfs_panic (struct super_block * s, const char * fmt, ...);
+void reiserfs_warning (const char * fmt, ...);
+void reiserfs_debug (struct super_block *s, int level, const char * fmt, ...);
+void print_virtual_node (struct virtual_node * vn);
+void print_indirect_item (struct buffer_head * bh, int item_num);
+void store_print_tb (struct tree_balance * tb);
+void print_cur_tb (char * mes);
+void print_de (struct reiserfs_dir_entry * de);
+void print_bi (struct buffer_info * bi, char * mes);
+#define PRINT_LEAF_ITEMS 1   /* print all items */
+#define PRINT_DIRECTORY_ITEMS 2 /* print directory items */
+#define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */
+void print_block (struct buffer_head * bh, ...);
+void print_path (struct tree_balance * tb, struct path * path);
+void print_bmap (struct super_block * s, int silent);
+void print_bmap_block (int i, char * data, int size, int silent);
+/*void print_super_block (struct super_block * s, char * mes);*/
+void print_objectid_map (struct super_block * s);
+void print_block_head (struct buffer_head * bh, char * mes);
+void check_leaf (struct buffer_head * bh);
+void check_internal (struct buffer_head * bh);
+void print_statistics (struct super_block * s);
+
+/* lbalance.c */
+int leaf_move_items (int shift_mode, struct tree_balance * tb, int mov_num, int mov_bytes, struct buffer_head * Snew);
+int leaf_shift_left (struct tree_balance * tb, int shift_num, int shift_bytes);
+int leaf_shift_right (struct tree_balance * tb, int shift_num, int shift_bytes);
+void leaf_delete_items (struct buffer_info * cur_bi, int last_first, int first, int del_num, int del_bytes);
+void leaf_insert_into_buf (struct buffer_info * bi, int before, 
+                           struct item_head * inserted_item_ih, const char * inserted_item_body, int zeros_number);
+void leaf_paste_in_buffer (struct buffer_info * bi, int pasted_item_num, 
+                           int pos_in_item, int paste_size, const char * body, int zeros_number);
+void leaf_cut_from_buffer (struct buffer_info * bi, int cut_item_num, int pos_in_item, 
+                           int cut_size);
+void leaf_paste_entries (struct buffer_head * bh, int item_num, int before, 
+                         int new_entry_count, struct reiserfs_de_head * new_dehs, const char * records, int paste_size);
+/* ibalance.c */
+int balance_internal (struct tree_balance * , int, int, struct item_head * , 
+                      struct buffer_head **);
+
+/* do_balance.c */
+inline void do_balance_mark_leaf_dirty (struct tree_balance * tb, 
+					struct buffer_head * bh, int flag);
+#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
+#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
+
+void do_balance (struct tree_balance * tb, struct item_head * ih, 
+                 const char * body, int flag);
+void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh);
+
+int get_left_neighbor_position (struct tree_balance * tb, int h);
+int get_right_neighbor_position (struct tree_balance * tb, int h);
+void replace_key (struct tree_balance * tb, struct buffer_head *, int, struct buffer_head *, int);
+void replace_lkey (struct tree_balance *, int, struct item_head *);
+void replace_rkey (struct tree_balance *, int, struct item_head *);
+void make_empty_node (struct buffer_info *);
+struct buffer_head * get_FEB (struct tree_balance *);
+
+/* bitmap.c */
+int is_reusable (struct super_block * s, unsigned long block, int bit_value);
+void reiserfs_free_block (struct reiserfs_transaction_handle *th, unsigned long);
+int reiserfs_new_blocknrs (struct reiserfs_transaction_handle *th,
+			   unsigned long * pblocknrs, unsigned long start_from, int amount_needed);
+int reiserfs_new_unf_blocknrs (struct reiserfs_transaction_handle *th,
+			       unsigned long * pblocknr, unsigned long start_from);
+#ifdef REISERFS_PREALLOCATE
+int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th, 
+				struct inode * inode,
+				unsigned long * pblocknr, 
+				unsigned long start_from);
+
+void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th, 
+				struct inode * inode);
+#endif
+
+/* hashes.c */
+__u32 keyed_hash (const char *msg, int len);
+__u32 yura_hash (const char *msg, int len);
+__u32 r5_hash (const char *msg, int len);
+
+/* version.c */
+char *reiserfs_get_version_string(void) ;
+
+/* the ext2 bit routines adjust for big or little endian as
+** appropriate for the arch, so in our laziness we use them rather
+** than using the bit routines they call more directly.  These
+** routines must be used when changing on disk bitmaps.  */
+#define reiserfs_test_and_set_le_bit   ext2_set_bit
+#define reiserfs_test_and_clear_le_bit ext2_clear_bit
+#define reiserfs_test_le_bit           ext2_test_bit
+#define reiserfs_find_next_zero_le_bit ext2_find_next_zero_bit
+
+
+//
+// this was totally copied from from linux's
+// find_first_zero_bit and changed a bit
+//
+
+#ifdef __i386__
+
+extern __inline__ int 
+find_first_nonzero_bit(void * addr, unsigned size) {
+  int res;
+  int __d0;
+  void *__d1;
+
+
+  if (!size) {
+    return (0);
+  }
+  __asm__ __volatile__ (
+	  "cld\n\t"
+	  "xorl %%eax,%%eax\n\t"
+	  "repe; scasl\n\t"
+	  "je 1f\n\t"
+	  "movl -4(%%edi),%%eax\n\t"
+	  "subl $4, %%edi\n\t"
+	  "bsfl %%eax,%%eax\n\t"
+	  "1:\tsubl %%edx,%%edi\n\t"
+	  "shll $3,%%edi\n\t"
+	  "addl %%edi,%%eax"
+	  :"=a" (res),
+	  "=c"(__d0), "=D"(__d1)
+	  :"1" ((size + 31) >> 5), "d" (addr), "2" (addr));
+  return (res);
+}
+
+#else /* __i386__ */
+
+extern __inline__ int find_next_nonzero_bit(void * addr, unsigned size, unsigned offset)
+{
+	unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
+	unsigned int result = offset & ~31UL;
+	unsigned int tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset &= 31UL;
+	if (offset) {
+		tmp = *p++;
+		/* set to zero first offset bits */
+		tmp &= ~(~0UL >> (32-offset));
+		if (size < 32)
+			goto found_first;
+		if (tmp != 0U)
+			goto found_middle;
+		size -= 32;
+		result += 32;
+	}
+	while (size >= 32) {
+		if ((tmp = *p++) != 0U)
+			goto found_middle;
+		result += 32;
+		size -= 32;
+	}
+	if (!size)
+		return result;
+	tmp = *p;
+found_first:
+found_middle:
+	return result + ffs(tmp);
+}
+
+#define find_first_nonzero_bit(addr,size) find_next_nonzero_bit((addr), (size), 0)
+
+#endif /* 0 */
+
+/* sometimes reiserfs_truncate may require to allocate few new blocks
+   to perform indirect2direct conversion. People probably used to
+   think, that truncate should work without problems on a filesystem
+   without free disk space. They may complain that they can not
+   truncate due to lack of free disk space. This spare space allows us
+   to not worry about it. 500 is probably too much, but it should be
+   absolutely safe */
+#define SPARE_SPACE 500
+
+extern inline unsigned long reiserfs_get_journal_block(struct super_block *s) {
+    return le32_to_cpu(SB_DISK_SUPER_BLOCK(s)->s_journal_block) ;
+}
+extern inline unsigned long reiserfs_get_journal_orig_size(struct super_block *s) {
+    return le32_to_cpu(SB_DISK_SUPER_BLOCK(s)->s_orig_journal_size) ;
+}
+
+/* prototypes from ioctl.c */
+int reiserfs_ioctl (struct inode * inode, struct file * filp, 
+ 		    unsigned int cmd, unsigned long arg);
+int reiserfs_unpack (struct inode * inode, struct file * filp);
+ 
+/* ioctl's command */
+#define REISERFS_IOC_UNPACK		_IOW(0xCD,1,long)
+ 			         
+#endif /* _LINUX_REISER_FS_H */
+
+
diff -u --recursive --new-file v2.4.0/linux/include/linux/reiserfs_fs_i.h linux/include/linux/reiserfs_fs_i.h
--- v2.4.0/linux/include/linux/reiserfs_fs_i.h	Wed Dec 31 16:00:00 1969
+++ linux/include/linux/reiserfs_fs_i.h	Mon Jan 15 12:42:32 2001
@@ -0,0 +1,63 @@
+#ifndef _REISER_FS_I
+#define _REISER_FS_I
+
+/* these are used to keep track of the pages that need
+** flushing before the current transaction can commit
+*/
+struct reiserfs_page_list ;
+
+struct reiserfs_inode_info {
+  __u32 i_key [4];/* key is still 4 32 bit integers */
+  
+				/* this comment will be totally
+                                   cryptic to readers not familiar
+                                   with 3.5/3.6 format conversion, and
+                                   it does not consider that that 3.6
+                                   might not be the last version */
+  int i_version;  // this says whether file is old or new
+
+  int i_pack_on_close ; // file might need tail packing on close 
+
+  __u32 i_first_direct_byte; // offset of first byte stored in direct item.
+
+  /* pointer to the page that must be flushed before 
+  ** the current transaction can commit.
+  **
+  ** this pointer is only used when the tail is converted back into
+  ** a direct item, or the file is deleted
+  */
+  struct reiserfs_page_list *i_converted_page ;
+
+  /* we save the id of the transaction when we did the direct->indirect
+  ** conversion.  That allows us to flush the buffers to disk
+  ** without having to update this inode to zero out the converted
+  ** page variable
+  */
+  int i_conversion_trans_id ;
+
+				/* My guess is this contains the first
+                                   unused block of a sequence of
+                                   blocks plus the length of the
+                                   sequence, which I think is always
+                                   at least two at the time of the
+                                   preallocation.  I really prefer
+                                   allocate on flush conceptually.....
+
+				   You know, it really annoys me when
+				   code is this badly commented that I
+				   have to guess what it does.
+				   Neither I nor anyone else has time
+				   for guessing what your
+				   datastructures mean.  -Hans */
+  //For preallocation
+  int i_prealloc_block;
+  int i_prealloc_count;
+
+				/* I regret that you think the below
+                                   is a comment you should make.... -Hans */
+  //nopack-attribute
+  int nopack;
+};
+
+
+#endif
diff -u --recursive --new-file v2.4.0/linux/include/linux/reiserfs_fs_sb.h linux/include/linux/reiserfs_fs_sb.h
--- v2.4.0/linux/include/linux/reiserfs_fs_sb.h	Wed Dec 31 16:00:00 1969
+++ linux/include/linux/reiserfs_fs_sb.h	Mon Jan 15 17:25:04 2001
@@ -0,0 +1,398 @@
+/* Copyright 1996-2000 Hans Reiser, see reiserfs/README for licensing
+ * and copyright details */
+
+#ifndef _LINUX_REISER_FS_SB
+#define _LINUX_REISER_FS_SB
+
+#ifdef __KERNEL__
+#include <linux/tqueue.h>
+#endif
+
+//
+// super block's field values
+//
+/*#define REISERFS_VERSION 0 undistributed bitmap */
+/*#define REISERFS_VERSION 1 distributed bitmap and resizer*/
+#define REISERFS_VERSION_2 2 /* distributed bitmap, resizer, 64-bit, etc*/
+#define UNSET_HASH 0 // read_super will guess about, what hash names
+                     // in directories were sorted with
+#define TEA_HASH  1
+#define YURA_HASH 2
+#define R5_HASH   3
+#define DEFAULT_HASH R5_HASH
+
+/* this is the on disk super block */
+
+struct reiserfs_super_block
+{
+  __u32 s_block_count;
+  __u32 s_free_blocks;                  /* free blocks count    */
+  __u32 s_root_block;           	/* root block number    */
+  __u32 s_journal_block;           	/* journal block number    */
+  __u32 s_journal_dev;           	/* journal device number  */
+
+  /* Since journal size is currently a #define in a header file, if 
+  ** someone creates a disk with a 16MB journal and moves it to a 
+  ** system with 32MB journal default, they will overflow their journal 
+  ** when they mount the disk.  s_orig_journal_size, plus some checks
+  ** while mounting (inside journal_init) prevent that from happening
+  */
+
+				/* great comment Chris. Thanks.  -Hans */
+
+  __u32 s_orig_journal_size; 		
+  __u32 s_journal_trans_max ;           /* max number of blocks in a transaction.  */
+  __u32 s_journal_block_count ;         /* total size of the journal. can change over time  */
+  __u32 s_journal_max_batch ;           /* max number of blocks to batch into a trans */
+  __u32 s_journal_max_commit_age ;      /* in seconds, how old can an async commit be */
+  __u32 s_journal_max_trans_age ;       /* in seconds, how old can a transaction be */
+  __u16 s_blocksize;                   	/* block size           */
+  __u16 s_oid_maxsize;			/* max size of object id array, see get_objectid() commentary  */
+  __u16 s_oid_cursize;			/* current size of object id array */
+  __u16 s_state;                       	/* valid or error       */
+  char s_magic[12];                     /* reiserfs magic string indicates that file system is reiserfs */
+  __u32 s_hash_function_code;		/* indicate, what hash function is being use to sort names in a directory*/
+  __u16 s_tree_height;                  /* height of disk tree */
+  __u16 s_bmap_nr;                      /* amount of bitmap blocks needed to address each block of file system */
+  __u16 s_version;		/* I'd prefer it if this was a string,
+                                   something like "3.6.4", and maybe
+                                   16 bytes long mostly unused. We
+                                   don't need to save bytes in the
+                                   superblock. -Hans */
+  __u16 s_reserved;
+  char s_unused[128] ;			/* zero filled by mkreiserfs */
+} __attribute__ ((__packed__));
+
+#define SB_SIZE (sizeof(struct reiserfs_super_block))
+
+/* this is the super from 3.5.X, where X >= 10 */
+struct reiserfs_super_block_v1
+{
+  __u32 s_block_count;			/* blocks count         */
+  __u32 s_free_blocks;                  /* free blocks count    */
+  __u32 s_root_block;           	/* root block number    */
+  __u32 s_journal_block;           	/* journal block number    */
+  __u32 s_journal_dev;           	/* journal device number  */
+  __u32 s_orig_journal_size; 		/* size of the journal on FS creation.  used to make sure they don't overflow it */
+  __u32 s_journal_trans_max ;           /* max number of blocks in a transaction.  */
+  __u32 s_journal_block_count ;         /* total size of the journal. can change over time  */
+  __u32 s_journal_max_batch ;           /* max number of blocks to batch into a trans */
+  __u32 s_journal_max_commit_age ;      /* in seconds, how old can an async commit be */
+  __u32 s_journal_max_trans_age ;       /* in seconds, how old can a transaction be */
+  __u16 s_blocksize;                   	/* block size           */
+  __u16 s_oid_maxsize;			/* max size of object id array, see get_objectid() commentary  */
+  __u16 s_oid_cursize;			/* current size of object id array */
+  __u16 s_state;                       	/* valid or error       */
+  char s_magic[16];                     /* reiserfs magic string indicates that file system is reiserfs */
+  __u16 s_tree_height;                  /* height of disk tree */
+  __u16 s_bmap_nr;                      /* amount of bitmap blocks needed to address each block of file system */
+  __u32 s_reserved;
+} __attribute__ ((__packed__));
+
+#define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1))
+
+/* LOGGING -- */
+
+/* These all interelate for performance.  
+**
+** If the journal block count is smaller than n transactions, you lose speed. 
+** I don't know what n is yet, I'm guessing 8-16.
+**
+** typical transaction size depends on the application, how often fsync is
+** called, and how many metadata blocks you dirty in a 30 second period.  
+** The more small files (<16k) you use, the larger your transactions will
+** be.
+** 
+** If your journal fills faster than dirty buffers get flushed to disk, it must flush them before allowing the journal
+** to wrap, which slows things down.  If you need high speed meta data updates, the journal should be big enough
+** to prevent wrapping before dirty meta blocks get to disk.
+**
+** If the batch max is smaller than the transaction max, you'll waste space at the end of the journal
+** because journal_end sets the next transaction to start at 0 if the next transaction has any chance of wrapping.
+**
+** The large the batch max age, the better the speed, and the more meta data changes you'll lose after a crash.
+**
+*/
+
+/* don't mess with these for a while */
+				/* we have a node size define somewhere in reiserfs_fs.h. -Hans */
+#define JOURNAL_BLOCK_SIZE  4096 /* BUG gotta get rid of this */
+#define JOURNAL_MAX_CNODE   1500 /* max cnodes to allocate. */
+#define JOURNAL_TRANS_MAX 1024   /* biggest possible single transaction, don't change for now (8/3/99) */
+#define JOURNAL_HASH_SIZE 8192   
+#define JOURNAL_NUM_BITMAPS 5 /* number of copies of the bitmaps to have floating.  Must be >= 2 */
+#define JOURNAL_LIST_COUNT 64
+
+/* these are bh_state bit flag offset numbers, for use in the buffer head */
+
+#define BH_JDirty       16      /* journal data needs to be written before buffer can be marked dirty */
+#define BH_JDirty_wait 18	/* commit is done, buffer marked dirty */
+#define BH_JNew 19		/* buffer allocated during this transaction, no need to write if freed during this trans too */
+
+/* ugly.  metadata blocks must be prepared before they can be logged.  
+** prepared means unlocked and cleaned.  If the block is prepared, but not
+** logged for some reason, any bits cleared while preparing it must be 
+** set again.
+*/
+#define BH_JPrepared 20		/* block has been prepared for the log */
+#define BH_JRestore_dirty 22    /* restore the dirty bit later */
+
+/* One of these for every block in every transaction
+** Each one is in two hash tables.  First, a hash of the current transaction, and after journal_end, a
+** hash of all the in memory transactions.
+** next and prev are used by the current transaction (journal_hash).
+** hnext and hprev are used by journal_list_hash.  If a block is in more than one transaction, the journal_list_hash
+** links it in multiple times.  This allows flush_journal_list to remove just the cnode belonging
+** to a given transaction.
+*/
+struct reiserfs_journal_cnode {
+  struct buffer_head *bh ;		 /* real buffer head */
+  kdev_t dev ;				 /* dev of real buffer head */
+  unsigned long blocknr ;		 /* block number of real buffer head, == 0 when buffer on disk */		 
+  int state ;
+  struct reiserfs_journal_list *jlist ;  /* journal list this cnode lives in */
+  struct reiserfs_journal_cnode *next ;  /* next in transaction list */
+  struct reiserfs_journal_cnode *prev ;  /* prev in transaction list */
+  struct reiserfs_journal_cnode *hprev ; /* prev in hash list */
+  struct reiserfs_journal_cnode *hnext ; /* next in hash list */
+};
+
+struct reiserfs_bitmap_node {
+  int id ;
+  char *data ;
+  struct list_head list ;
+} ;
+
+struct reiserfs_list_bitmap {
+  struct reiserfs_journal_list *journal_list ;
+  struct reiserfs_bitmap_node **bitmaps ;
+} ;
+
+/*
+** transaction handle which is passed around for all journal calls
+*/
+struct reiserfs_transaction_handle {
+				/* ifdef it. -Hans */
+  char *t_caller ;              /* debugging use */
+  int t_blocks_logged ;         /* number of blocks this writer has logged */
+  int t_blocks_allocated ;      /* number of blocks this writer allocated */
+  unsigned long t_trans_id ;    /* sanity check, equals the current trans id */
+  struct super_block *t_super ; /* super for this FS when journal_begin was 
+                                   called. saves calls to reiserfs_get_super */
+
+} ;
+
+/*
+** one of these for each transaction.  The most important part here is the j_realblock.
+** this list of cnodes is used to hash all the blocks in all the commits, to mark all the
+** real buffer heads dirty once all the commits hit the disk,
+** and to make sure every real block in a transaction is on disk before allowing the log area
+** to be overwritten */
+struct reiserfs_journal_list {
+  unsigned long j_start ;
+  unsigned long j_len ;
+  atomic_t j_nonzerolen ;
+  atomic_t j_commit_left ;
+  atomic_t j_flushing ;
+  atomic_t j_commit_flushing ;
+  atomic_t j_older_commits_done ;      /* all commits older than this on disk*/
+  unsigned long j_trans_id ;
+  time_t j_timestamp ;
+  struct reiserfs_list_bitmap *j_list_bitmap ;
+  struct buffer_head *j_commit_bh ; /* commit buffer head */
+  struct reiserfs_journal_cnode *j_realblock  ;
+  struct reiserfs_journal_cnode *j_freedlist ; /* list of buffers that were freed during this trans.  free each of these on flush */
+  wait_queue_head_t j_commit_wait ; /* wait for all the commit blocks to be flushed */
+  wait_queue_head_t j_flush_wait ; /* wait for all the real blocks to be flushed */
+} ;
+
+struct reiserfs_page_list  ; /* defined in reiserfs_fs.h */
+
+struct reiserfs_journal {
+  struct buffer_head ** j_ap_blocks ; /* journal blocks on disk */
+  struct reiserfs_journal_cnode *j_last ; /* newest journal block */
+  struct reiserfs_journal_cnode *j_first ; /*  oldest journal block.  start here for traverse */
+				
+  int j_state ;			
+  unsigned long j_trans_id ;
+  unsigned long j_mount_id ;
+  unsigned long j_start ;             /* start of current waiting commit (index into j_ap_blocks) */
+  unsigned long j_len ;               /* lenght of current waiting commit */
+  unsigned long j_len_alloc ;         /* number of buffers requested by journal_begin() */
+  atomic_t j_wcount ;            /* count of writers for current commit */
+  unsigned long j_bcount ;            /* batch count. allows turning X transactions into 1 */
+  unsigned long j_first_unflushed_offset ;  /* first unflushed transactions offset */
+  unsigned long j_last_flush_trans_id ;    /* last fully flushed journal timestamp */
+  struct buffer_head *j_header_bh ;   
+
+  /* j_flush_pages must be flushed before the current transaction can
+  ** commit
+  */
+  struct reiserfs_page_list *j_flush_pages ;
+  time_t j_trans_start_time ;         /* time this transaction started */
+  wait_queue_head_t j_wait ;         /* wait  journal_end to finish I/O */
+  atomic_t j_wlock ;                       /* lock for j_wait */
+  wait_queue_head_t j_join_wait ;    /* wait for current transaction to finish before starting new one */
+  atomic_t j_jlock ;                       /* lock for j_join_wait */
+  int j_journal_list_index ;	      /* journal list number of the current trans */
+  int j_list_bitmap_index ;	      /* number of next list bitmap to use */
+  int j_must_wait ;		       /* no more journal begins allowed. MUST sleep on j_join_wait */
+  int j_next_full_flush ;             /* next journal_end will flush all journal list */
+  int j_next_async_flush ;             /* next journal_end will flush all async commits */
+
+  int j_cnode_used ;	      /* number of cnodes on the used list */
+  int j_cnode_free ;          /* number of cnodes on the free list */
+
+  struct reiserfs_journal_cnode *j_cnode_free_list ;
+  struct reiserfs_journal_cnode *j_cnode_free_orig ; /* orig pointer returned from vmalloc */
+
+  int j_free_bitmap_nodes ;
+  int j_used_bitmap_nodes ;
+  struct list_head j_bitmap_nodes ;
+  struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS] ;	/* array of bitmaps to record the deleted blocks */
+  struct reiserfs_journal_list j_journal_list[JOURNAL_LIST_COUNT] ;	    /* array of all the journal lists */
+  struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE] ; 	    /* hash table for real buffer heads in current trans */ 
+  struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for all the real buffer heads in all 
+  										the transactions */
+};
+
+#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick.  magic string to find desc blocks in the journal */
+
+
+typedef __u32 (*hashf_t) (const char *, int);
+
+/* reiserfs union of in-core super block data */
+struct reiserfs_sb_info
+{
+    struct buffer_head * s_sbh;                   /* Buffer containing the super block */
+				/* both the comment and the choice of
+                                   name are unclear for s_rs -Hans */
+    struct reiserfs_super_block * s_rs;           /* Pointer to the super block in the buffer */
+    struct buffer_head ** s_ap_bitmap;       /* array of buffers, holding block bitmap */
+    struct reiserfs_journal *s_journal ;		/* pointer to journal information */
+    unsigned short s_mount_state;                 /* reiserfs state (valid, invalid) */
+  
+				/* Comment? -Hans */
+    void (*end_io_handler)(struct buffer_head *, int);
+    hashf_t s_hash_function;	/* pointer to function which is used
+                                   to sort names in directory. Set on
+                                   mount */
+    unsigned long s_mount_opt;	/* reiserfs's mount options are set
+                                   here (currently - NOTAIL, NOLOG,
+                                   REPLAYONLY) */
+
+				/* Comment? -Hans */
+    wait_queue_head_t s_wait;
+				/* To be obsoleted soon by per buffer seals.. -Hans */
+    atomic_t s_generation_counter; // increased by one every time the
+    // tree gets re-balanced
+    
+    /* session statistics */
+    int s_kmallocs;
+    int s_disk_reads;
+    int s_disk_writes;
+    int s_fix_nodes;
+    int s_do_balance;
+    int s_unneeded_left_neighbor;
+    int s_good_search_by_key_reada;
+    int s_bmaps;
+    int s_bmaps_without_search;
+    int s_direct2indirect;
+    int s_indirect2direct;
+};
+
+
+#define NOTAIL 0  /* -o notail: no tails will be created in a session */
+#define REPLAYONLY 3 /* replay journal and return 0. Use by fsck */
+#define REISERFS_NOLOG 4      /* -o nolog: turn journalling off */
+#define REISERFS_CONVERT 5    /* -o conv: causes conversion of old
+                                 format super block to the new
+                                 format. If not specified - old
+                                 partition will be dealt with in a
+                                 manner of 3.5.x */
+
+/* -o hash={tea, rupasov, r5, detect} is meant for properly mounting 
+** reiserfs disks from 3.5.19 or earlier.  99% of the time, this option
+** is not required.  If the normal autodection code can't determine which
+** hash to use (because both hases had the same value for a file)
+** use this option to force a specific hash.  It won't allow you to override
+** the existing hash on the FS, so if you have a tea hash disk, and mount
+** with -o hash=rupasov, the mount will fail.
+*/
+#define FORCE_TEA_HASH 6      /* try to force tea hash on mount */
+#define FORCE_RUPASOV_HASH 7  /* try to force rupasov hash on mount */
+#define FORCE_R5_HASH 8       /* try to force rupasov hash on mount */
+#define FORCE_HASH_DETECT 9   /* try to detect hash function on mount */
+
+
+/* used for testing experimental features, makes benchmarking new
+   features with and without more convenient, should never be used by
+   users in any code shipped to users (ideally) */
+
+#define REISERFS_NO_BORDER 11
+#define REISERFS_NO_UNHASHED_RELOCATION 12
+#define REISERFS_HASHED_RELOCATION 13
+#define REISERFS_TEST4 14 
+
+#define REISERFS_TEST1 11
+#define REISERFS_TEST2 12
+#define REISERFS_TEST3 13
+#define REISERFS_TEST4 14 
+
+#define reiserfs_r5_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_R5_HASH))
+#define reiserfs_rupasov_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_RUPASOV_HASH))
+#define reiserfs_tea_hash(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_TEA_HASH))
+#define reiserfs_hash_detect(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << FORCE_HASH_DETECT))
+#define reiserfs_no_border(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_BORDER))
+#define reiserfs_no_unhashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION))
+#define reiserfs_hashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_HASHED_RELOCATION))
+#define reiserfs_test4(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_TEST4))
+
+#define dont_have_tails(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << NOTAIL))
+#define replay_only(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REPLAYONLY))
+#define reiserfs_dont_log(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NOLOG))
+#define old_format_only(s) ((SB_VERSION(s) != REISERFS_VERSION_2) && !((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_CONVERT)))
+
+
+void reiserfs_file_buffer (struct buffer_head * bh, int list);
+int reiserfs_is_super(struct super_block *s)  ;
+int journal_mark_dirty(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ;
+int flush_old_commits(struct super_block *s, int) ;
+int show_reiserfs_locks(void) ;
+int reiserfs_resize(struct super_block *, unsigned long) ;
+
+#define CARRY_ON                0
+#define SCHEDULE_OCCURRED       1
+
+
+#define SB_BUFFER_WITH_SB(s) ((s)->u.reiserfs_sb.s_sbh)
+#define SB_JOURNAL(s) ((s)->u.reiserfs_sb.s_journal)
+#define SB_JOURNAL_LIST(s) (SB_JOURNAL(s)->j_journal_list)
+#define SB_JOURNAL_LIST_INDEX(s) (SB_JOURNAL(s)->j_journal_list_index) 
+#define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) 
+#define SB_AP_BITMAP(s) ((s)->u.reiserfs_sb.s_ap_bitmap)
+
+
+// on-disk super block fields converted to cpu form
+#define SB_DISK_SUPER_BLOCK(s) ((s)->u.reiserfs_sb.s_rs)
+#define SB_BLOCK_COUNT(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_block_count))
+#define SB_FREE_BLOCKS(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_free_blocks))
+#define SB_REISERFS_MAGIC(s) (SB_DISK_SUPER_BLOCK(s)->s_magic)
+#define SB_ROOT_BLOCK(s) le32_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_root_block))
+#define SB_TREE_HEIGHT(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_tree_height))
+#define SB_REISERFS_STATE(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_state))
+#define SB_VERSION(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_version))
+#define SB_BMAP_NR(s) le16_to_cpu ((SB_DISK_SUPER_BLOCK(s)->s_bmap_nr))
+
+#define PUT_SB_BLOCK_COUNT(s, val)    do { SB_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0)
+#define PUT_SB_FREE_BLOCKS(s, val)    do { SB_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0)
+#define PUT_SB_ROOT_BLOCK(s, val)     do { SB_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0)
+#define PUT_SB_TREE_HEIGHT(s, val)    do { SB_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0)
+#define PUT_SB_REISERFS_STATE(s, val) do { SB_DISK_SUPER_BLOCK(s)->s_state = cpu_to_le16(val); } while (0) 
+#define PUT_SB_VERSION(s, val)        do { SB_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0)
+#define PUT_SB_BMAP_NR(s, val)           do { SB_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0)
+
+#endif	/* _LINUX_REISER_FS_SB */
+
+
+
diff -u --recursive --new-file v2.4.0/linux/include/linux/sched.h linux/include/linux/sched.h
--- v2.4.0/linux/include/linux/sched.h	Thu Jan  4 14:50:47 2001
+++ linux/include/linux/sched.h	Mon Jan 15 17:25:05 2001
@@ -219,13 +219,14 @@
 	unsigned long rss, total_vm, locked_vm;
 	unsigned long def_flags;
 	unsigned long cpu_vm_mask;
-	unsigned long swap_cnt;	/* number of pages to swap on next pass */
 	unsigned long swap_address;
 
 	/* Architecture-specific MM context */
 	mm_context_t context;
 };
 
+extern int mmlist_nr;
+
 #define INIT_MM(name) \
 {			 				\
 	mmap:		&init_mmap, 			\
@@ -542,8 +543,8 @@
 
 #define CURRENT_TIME (xtime.tv_sec)
 
-extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode));
-extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode));
+extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr));
+extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
 extern void FASTCALL(sleep_on(wait_queue_head_t *q));
 extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q,
 				      signed long timeout));
@@ -552,12 +553,16 @@
 						    signed long timeout));
 extern void FASTCALL(wake_up_process(struct task_struct * tsk));
 
-#define wake_up(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE)
-#define wake_up_all(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,0)
-#define wake_up_sync(x)			__wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE)
-#define wake_up_interruptible(x)	__wake_up((x),TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE)
-#define wake_up_interruptible_all(x)	__wake_up((x),TASK_INTERRUPTIBLE,0)
-#define wake_up_interruptible_sync(x)	__wake_up_sync((x),TASK_INTERRUPTIBLE,WQ_FLAG_EXCLUSIVE)
+#define wake_up(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
+#define wake_up_nr(x, nr)		__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
+#define wake_up_all(x)			__wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
+#define wake_up_sync(x)			__wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
+#define wake_up_sync_nr(x, nr)		__wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
+#define wake_up_interruptible(x)	__wake_up((x),TASK_INTERRUPTIBLE, 1)
+#define wake_up_interruptible_nr(x, nr)	__wake_up((x),TASK_INTERRUPTIBLE, nr)
+#define wake_up_interruptible_all(x)	__wake_up((x),TASK_INTERRUPTIBLE, 0)
+#define wake_up_interruptible_sync(x)	__wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
+#define wake_up_interruptible_sync_nr(x) __wake_up_sync((x),TASK_INTERRUPTIBLE,  nr)
 
 extern int in_group_p(gid_t);
 extern int in_egroup_p(gid_t);
diff -u --recursive --new-file v2.4.0/linux/include/linux/swap.h linux/include/linux/swap.h
--- v2.4.0/linux/include/linux/swap.h	Thu Jan  4 14:50:46 2001
+++ linux/include/linux/swap.h	Mon Jan 15 17:25:04 2001
@@ -107,7 +107,7 @@
 extern int page_launder(int, int);
 extern int free_shortage(void);
 extern int inactive_shortage(void);
-extern void wakeup_kswapd(int);
+extern void wakeup_kswapd(void);
 extern int try_to_free_pages(unsigned int gfp_mask);
 
 /* linux/mm/page_io.c */
diff -u --recursive --new-file v2.4.0/linux/ipc/shm.c linux/ipc/shm.c
--- v2.4.0/linux/ipc/shm.c	Fri Dec 29 14:21:48 2000
+++ linux/ipc/shm.c	Sun Jan 14 11:22:21 2001
@@ -121,6 +121,7 @@
 {
 	shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	shm_rmid (shp->id);
+	shmem_lock(shp->shm_file, 0);
 	fput (shp->shm_file);
 	kfree (shp);
 }
@@ -467,10 +468,10 @@
 		if(err)
 			goto out_unlock;
 		if(cmd==SHM_LOCK) {
-			shp->shm_file->f_dentry->d_inode->u.shmem_i.locked = 1;
+			shmem_lock(shp->shm_file, 1);
 			shp->shm_flags |= SHM_LOCKED;
 		} else {
-			shp->shm_file->f_dentry->d_inode->u.shmem_i.locked = 0;
+			shmem_lock(shp->shm_file, 0);
 			shp->shm_flags &= ~SHM_LOCKED;
 		}
 		shm_unlock(shmid);
diff -u --recursive --new-file v2.4.0/linux/kernel/context.c linux/kernel/context.c
--- v2.4.0/linux/kernel/context.c	Sun Dec 10 09:53:51 2000
+++ linux/kernel/context.c	Fri Jan 12 09:52:41 2001
@@ -148,7 +148,7 @@
 	
 int start_context_thread(void)
 {
-	kernel_thread(context_thread, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+	kernel_thread(context_thread, NULL, CLONE_FS | CLONE_FILES);
 	return 0;
 }
 
diff -u --recursive --new-file v2.4.0/linux/kernel/fork.c linux/kernel/fork.c
--- v2.4.0/linux/kernel/fork.c	Wed Jan  3 20:45:26 2001
+++ linux/kernel/fork.c	Wed Jan 10 14:53:54 2001
@@ -134,7 +134,6 @@
 	mm->mmap_cache = NULL;
 	mm->map_count = 0;
 	mm->cpu_vm_mask = 0;
-	mm->swap_cnt = 0;
 	mm->swap_address = 0;
 	pprev = &mm->mmap;
 	for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
@@ -193,6 +192,7 @@
 }
 
 spinlock_t mmlist_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;
+int mmlist_nr;
 
 #define allocate_mm()	(kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
 #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
@@ -246,6 +246,7 @@
 {
 	if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
 		list_del(&mm->mmlist);
+		mmlist_nr--;
 		spin_unlock(&mmlist_lock);
 		exit_mmap(mm);
 		mmdrop(mm);
@@ -326,6 +327,7 @@
 	 */
 	spin_lock(&mmlist_lock);
 	list_add(&mm->mmlist, &oldmm->mmlist);
+	mmlist_nr++;
 	spin_unlock(&mmlist_lock);
 
 	if (retval)
diff -u --recursive --new-file v2.4.0/linux/kernel/ksyms.c linux/kernel/ksyms.c
--- v2.4.0/linux/kernel/ksyms.c	Tue Jan  2 16:45:37 2001
+++ linux/kernel/ksyms.c	Mon Jan 15 12:42:32 2001
@@ -159,6 +159,7 @@
 EXPORT_SYMBOL(d_lookup);
 EXPORT_SYMBOL(__d_path);
 EXPORT_SYMBOL(mark_buffer_dirty);
+EXPORT_SYMBOL(set_buffer_async_io); /* for reiserfs_writepage */
 EXPORT_SYMBOL(__mark_buffer_dirty);
 EXPORT_SYMBOL(__mark_inode_dirty);
 EXPORT_SYMBOL(get_empty_filp);
diff -u --recursive --new-file v2.4.0/linux/kernel/sched.c linux/kernel/sched.c
--- v2.4.0/linux/kernel/sched.c	Thu Jan  4 13:50:38 2001
+++ linux/kernel/sched.c	Mon Jan 15 13:08:15 2001
@@ -690,19 +690,15 @@
 }
 
 static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode,
-				     unsigned int wq_mode, const int sync)
+			 	     int nr_exclusive, const int sync)
 {
 	struct list_head *tmp, *head;
-	struct task_struct *p, *best_exclusive;
+	struct task_struct *p;
 	unsigned long flags;
-	int best_cpu, irq;
 
 	if (!q)
 		goto out;
 
-	best_cpu = smp_processor_id();
-	irq = in_interrupt();
-	best_exclusive = NULL;
 	wq_write_lock_irqsave(&q->lock, flags);
 
 #if WAITQUEUE_DEBUG
@@ -730,47 +726,27 @@
 #if WAITQUEUE_DEBUG
 			curr->__waker = (long)__builtin_return_address(0);
 #endif
-			/*
-			 * If waking up from an interrupt context then
-			 * prefer processes which are affine to this
-			 * CPU.
-			 */
-			if (irq && (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE)) {
-				if (!best_exclusive)
-					best_exclusive = p;
-				if (p->processor == best_cpu) {
-					best_exclusive = p;
-					break;
-				}
-			} else {
-				if (sync)
-					wake_up_process_synchronous(p);
-				else
-					wake_up_process(p);
-				if (curr->flags & wq_mode & WQ_FLAG_EXCLUSIVE)
-					break;
-			}
+			if (sync)
+				wake_up_process_synchronous(p);
+			else
+				wake_up_process(p);
+			if ((curr->flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+				break;
 		}
 	}
-	if (best_exclusive) {
-		if (sync)
-			wake_up_process_synchronous(best_exclusive);
-		else
-			wake_up_process(best_exclusive);
-	}
 	wq_write_unlock_irqrestore(&q->lock, flags);
 out:
 	return;
 }
 
-void __wake_up(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode)
+void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr)
 {
-	__wake_up_common(q, mode, wq_mode, 0);
+	__wake_up_common(q, mode, nr, 0);
 }
 
-void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, unsigned int wq_mode)
+void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)
 {
-	__wake_up_common(q, mode, wq_mode, 1);
+	__wake_up_common(q, mode, nr, 1);
 }
 
 #define	SLEEP_ON_VAR				\
diff -u --recursive --new-file v2.4.0/linux/mm/filemap.c linux/mm/filemap.c
--- v2.4.0/linux/mm/filemap.c	Tue Jan  2 18:59:45 2001
+++ linux/mm/filemap.c	Mon Jan 15 17:14:41 2001
@@ -143,7 +143,8 @@
 	list_add(&page->list, &mapping->dirty_pages);
 	spin_unlock(&pagecache_lock);
 
-	mark_inode_dirty_pages(mapping->host);
+	if (mapping->host)
+		mark_inode_dirty_pages(mapping->host);
 }
 
 /**
@@ -306,7 +307,7 @@
 	 */
 	age_page_up(page);
 	if (inactive_shortage() > inactive_target / 2 && free_shortage())
-			wakeup_kswapd(0);
+			wakeup_kswapd();
 not_found:
 	return page;
 }
@@ -974,10 +975,6 @@
  *   accessed sequentially.
  */
 	if (ahead) {
-		if (reada_ok == 2) {
-			run_task_queue(&tq_disk);
-		}
-
 		filp->f_ralen += ahead;
 		filp->f_rawin += filp->f_ralen;
 		filp->f_raend = raend + ahead + 1;
@@ -1835,7 +1832,8 @@
 	n->vm_end = end;
 	setup_read_behavior(n, behavior);
 	n->vm_raend = 0;
-	get_file(n->vm_file);
+	if (n->vm_file)
+		get_file(n->vm_file);
 	if (n->vm_ops && n->vm_ops->open)
 		n->vm_ops->open(n);
 	lock_vma_mappings(vma);
@@ -1861,7 +1859,8 @@
 	n->vm_pgoff += (n->vm_start - vma->vm_start) >> PAGE_SHIFT;
 	setup_read_behavior(n, behavior);
 	n->vm_raend = 0;
-	get_file(n->vm_file);
+	if (n->vm_file)
+		get_file(n->vm_file);
 	if (n->vm_ops && n->vm_ops->open)
 		n->vm_ops->open(n);
 	lock_vma_mappings(vma);
@@ -1893,7 +1892,8 @@
 	right->vm_pgoff += (right->vm_start - left->vm_start) >> PAGE_SHIFT;
 	left->vm_raend = 0;
 	right->vm_raend = 0;
-	atomic_add(2, &vma->vm_file->f_count);
+	if (vma->vm_file)
+		atomic_add(2, &vma->vm_file->f_count);
 
 	if (vma->vm_ops && vma->vm_ops->open) {
 		vma->vm_ops->open(left);
diff -u --recursive --new-file v2.4.0/linux/mm/memory.c linux/mm/memory.c
--- v2.4.0/linux/mm/memory.c	Mon Jan  1 10:37:41 2001
+++ linux/mm/memory.c	Mon Jan  8 15:39:38 2001
@@ -207,7 +207,8 @@
 			
 			src_pte = pte_offset(src_pmd, address);
 			dst_pte = pte_offset(dst_pmd, address);
-			
+
+			spin_lock(&src->page_table_lock);			
 			do {
 				pte_t pte = *src_pte;
 				struct page *ptepage;
@@ -240,16 +241,21 @@
 cont_copy_pte_range:		set_pte(dst_pte, pte);
 cont_copy_pte_range_noset:	address += PAGE_SIZE;
 				if (address >= end)
-					goto out;
+					goto out_unlock;
 				src_pte++;
 				dst_pte++;
 			} while ((unsigned long)src_pte & PTE_TABLE_MASK);
+			spin_unlock(&src->page_table_lock);
 		
 cont_copy_pmd_range:	src_pmd++;
 			dst_pmd++;
 		} while ((unsigned long)src_pmd & PMD_TABLE_MASK);
 	}
 out:
+	return 0;
+
+out_unlock:
+	spin_unlock(&src->page_table_lock);
 	return 0;
 
 nomem:
diff -u --recursive --new-file v2.4.0/linux/mm/page_alloc.c linux/mm/page_alloc.c
--- v2.4.0/linux/mm/page_alloc.c	Wed Jan  3 09:59:06 2001
+++ linux/mm/page_alloc.c	Mon Jan 15 12:35:12 2001
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/pagemap.h>
 #include <linux/bootmem.h>
+#include <linux/slab.h>
 
 int nr_swap_pages;
 int nr_active_pages;
@@ -303,7 +304,7 @@
 	 * an inactive page shortage, wake up kswapd.
 	 */
 	if (inactive_shortage() > inactive_target / 2 && free_shortage())
-		wakeup_kswapd(0);
+		wakeup_kswapd();
 	/*
 	 * If we are about to get low on free pages and cleaning
 	 * the inactive_dirty pages would fix the situation,
@@ -379,7 +380,7 @@
 	 * - if we don't have __GFP_IO set, kswapd may be
 	 *   able to free some memory we can't free ourselves
 	 */
-	wakeup_kswapd(0);
+	wakeup_kswapd();
 	if (gfp_mask & __GFP_WAIT) {
 		__set_current_state(TASK_RUNNING);
 		current->policy |= SCHED_YIELD;
@@ -404,7 +405,7 @@
 	 * - we're doing a higher-order allocation
 	 * 	--> move pages to the free list until we succeed
 	 * - we're /really/ tight on memory
-	 * 	--> wait on the kswapd waitqueue until memory is freed
+	 * 	--> try to free pages ourselves with page_launder
 	 */
 	if (!(current->flags & PF_MEMALLOC)) {
 		/*
@@ -443,36 +444,20 @@
 		/*
 		 * When we arrive here, we are really tight on memory.
 		 *
-		 * We wake up kswapd and sleep until kswapd wakes us
-		 * up again. After that we loop back to the start.
-		 *
-		 * We have to do this because something else might eat
-		 * the memory kswapd frees for us and we need to be
-		 * reliable. Note that we don't loop back for higher
-		 * order allocations since it is possible that kswapd
-		 * simply cannot free a large enough contiguous area
-		 * of memory *ever*.
+		 * We try to free pages ourselves by:
+		 * 	- shrinking the i/d caches.
+		 * 	- reclaiming unused memory from the slab caches.
+		 * 	- swapping/syncing pages to disk (done by page_launder)
+		 * 	- moving clean pages from the inactive dirty list to
+		 * 	  the inactive clean list. (done by page_launder)
 		 */
-		if ((gfp_mask & (__GFP_WAIT|__GFP_IO)) == (__GFP_WAIT|__GFP_IO)) {
-			wakeup_kswapd(1);
+		if (gfp_mask & __GFP_WAIT) {
 			memory_pressure++;
-			if (!order)
-				goto try_again;
-		/*
-		 * If __GFP_IO isn't set, we can't wait on kswapd because
-		 * kswapd just might need some IO locks /we/ are holding ...
-		 *
-		 * SUBTLE: The scheduling point above makes sure that
-		 * kswapd does get the chance to free memory we can't
-		 * free ourselves...
-		 */
-		} else if (gfp_mask & __GFP_WAIT) {
 			try_to_free_pages(gfp_mask);
-			memory_pressure++;
+			wakeup_bdflush(0);
 			if (!order)
 				goto try_again;
 		}
-
 	}
 
 	/*
@@ -554,14 +539,8 @@
 
 void free_pages(unsigned long addr, unsigned long order)
 {
-	struct page *fpage;
-
-#ifdef CONFIG_DISCONTIGMEM
-	if (addr == 0) return;
-#endif
-	fpage = virt_to_page(addr);
-	if (VALID_PAGE(fpage))
-		__free_pages(fpage, order);
+	if (addr != 0)
+		__free_pages(virt_to_page(addr), order);
 }
 
 /*
diff -u --recursive --new-file v2.4.0/linux/mm/shmem.c linux/mm/shmem.c
--- v2.4.0/linux/mm/shmem.c	Fri Dec 29 14:21:48 2000
+++ linux/mm/shmem.c	Sun Jan 14 11:22:21 2001
@@ -310,6 +310,8 @@
 	}
 	/* We have the page */
 	SetPageUptodate (page);
+	if (info->locked)
+		page_cache_get(page);
 
 cached_page:
 	UnlockPage (page);
@@ -374,8 +376,7 @@
 			inode->i_fop = &shmem_dir_operations;
 			break;
 		case S_IFLNK:
-			inode->i_op = &page_symlink_inode_operations;
-			break;
+			BUG();
 		}
 		spin_lock (&shmem_ilock);
 		list_add (&inode->u.shmem_i.list, &shmem_inodes);
@@ -401,6 +402,32 @@
 	return 0;
 }
 
+void shmem_lock(struct file * file, int lock)
+{
+	struct inode * inode = file->f_dentry->d_inode;
+	struct shmem_inode_info * info = &inode->u.shmem_i;
+	struct page * page;
+	unsigned long idx, size;
+
+	if (info->locked == lock)
+		return;
+	down(&inode->i_sem);
+	info->locked = lock;
+	size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	for (idx = 0; idx < size; idx++) {
+		page = find_lock_page(inode->i_mapping, idx);
+		if (!page)
+			continue;
+		if (!lock) {
+			/* release the extra count and our reference */
+			page_cache_release(page);
+			page_cache_release(page);
+		}
+		UnlockPage(page);
+	}
+	up(&inode->i_sem);
+}
+
 /*
  * Lookup the data. This is trivial - if the dentry didn't already
  * exist, we know it is negative.
@@ -528,19 +555,6 @@
 	return error;
 }
 
-static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
-{
-	int error;
-
-	error = shmem_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0);
-	if (!error) {
-		int l = strlen(symname)+1;
-		struct inode *inode = dentry->d_inode;
-		error = block_symlink(inode, symname, l);
-	}
-	return error;
-}
-
 static int shmem_mmap(struct file * file, struct vm_area_struct * vma)
 {
 	struct vm_operations_struct * ops;
@@ -677,7 +691,6 @@
 	lookup:		shmem_lookup,
 	link:		shmem_link,
 	unlink:		shmem_unlink,
-	symlink:	shmem_symlink,
 	mkdir:		shmem_mkdir,
 	rmdir:		shmem_rmdir,
 	mknod:		shmem_mknod,
diff -u --recursive --new-file v2.4.0/linux/mm/slab.c linux/mm/slab.c
--- v2.4.0/linux/mm/slab.c	Sun Oct  1 19:55:17 2000
+++ linux/mm/slab.c	Wed Jan 10 14:24:32 2001
@@ -1702,7 +1702,7 @@
  * kmem_cache_reap - Reclaim memory from caches.
  * @gfp_mask: the type of memory required.
  *
- * Called from try_to_free_page().
+ * Called from do_try_to_free_pages() and __alloc_pages()
  */
 void kmem_cache_reap (int gfp_mask)
 {
diff -u --recursive --new-file v2.4.0/linux/mm/vmalloc.c linux/mm/vmalloc.c
--- v2.4.0/linux/mm/vmalloc.c	Tue Nov 28 22:43:39 2000
+++ linux/mm/vmalloc.c	Mon Jan 15 16:54:20 2001
@@ -9,6 +9,7 @@
 #include <linux/malloc.h>
 #include <linux/vmalloc.h>
 #include <linux/spinlock.h>
+#include <linux/highmem.h>
 #include <linux/smp_lock.h>
 
 #include <asm/uaccess.h>
diff -u --recursive --new-file v2.4.0/linux/mm/vmscan.c linux/mm/vmscan.c
--- v2.4.0/linux/mm/vmscan.c	Wed Jan  3 20:45:26 2001
+++ linux/mm/vmscan.c	Mon Jan 15 12:36:49 2001
@@ -35,45 +35,21 @@
  * using a process that no longer actually exists (it might
  * have died while we slept).
  */
-static int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
+static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page)
 {
 	pte_t pte;
 	swp_entry_t entry;
-	struct page * page;
-	int onlist;
-
-	pte = *page_table;
-	if (!pte_present(pte))
-		goto out_failed;
-	page = pte_page(pte);
-	if ((!VALID_PAGE(page)) || PageReserved(page))
-		goto out_failed;
-
-	if (!mm->swap_cnt)
-		return 1;
-
-	mm->swap_cnt--;
 
-	onlist = PageActive(page);
 	/* Don't look at this pte if it's been accessed recently. */
 	if (ptep_test_and_clear_young(page_table)) {
-		age_page_up(page);
-		goto out_failed;
+		page->age += PAGE_AGE_ADV;
+		if (page->age > PAGE_AGE_MAX)
+			page->age = PAGE_AGE_MAX;
+		return;
 	}
-	if (!onlist)
-		/* The page is still mapped, so it can't be freeable... */
-		age_page_down_ageonly(page);
-
-	/*
-	 * If the page is in active use by us, or if the page
-	 * is in active use by others, don't unmap it or
-	 * (worse) start unneeded IO.
-	 */
-	if (page->age > 0)
-		goto out_failed;
 
 	if (TryLockPage(page))
-		goto out_failed;
+		return;
 
 	/* From this point on, the odds are that we're going to
 	 * nuke this pte, so read and clear the pte.  This hook
@@ -87,9 +63,6 @@
 	 * Is the page already in the swap cache? If so, then
 	 * we can just drop our reference to it without doing
 	 * any IO - it's already up-to-date on disk.
-	 *
-	 * Return 0, as we didn't actually free any real
-	 * memory, and we should just continue our scan.
 	 */
 	if (PageSwapCache(page)) {
 		entry.val = page->index;
@@ -99,12 +72,12 @@
 		swap_duplicate(entry);
 		set_pte(page_table, swp_entry_to_pte(entry));
 drop_pte:
-		UnlockPage(page);
 		mm->rss--;
-		deactivate_page(page);
+		if (!page->age)
+			deactivate_page(page);
+		UnlockPage(page);
 		page_cache_release(page);
-out_failed:
-		return 0;
+		return;
 	}
 
 	/*
@@ -153,34 +126,20 @@
 out_unlock_restore:
 	set_pte(page_table, pte);
 	UnlockPage(page);
-	return 0;
+	return;
 }
 
-/*
- * A new implementation of swap_out().  We do not swap complete processes,
- * but only a small number of blocks, before we continue with the next
- * process.  The number of blocks actually swapped is determined on the
- * number of page faults, that this process actually had in the last time,
- * so we won't swap heavily used processes all the time ...
- *
- * Note: the priority argument is a hint on much CPU to waste with the
- *       swap block search, not a hint, of how much blocks to swap with
- *       each process.
- *
- * (C) 1993 Kai Petzke, wpp@marie.physik.tu-berlin.de
- */
-
-static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
+static int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count)
 {
 	pte_t * pte;
 	unsigned long pmd_end;
 
 	if (pmd_none(*dir))
-		return 0;
+		return count;
 	if (pmd_bad(*dir)) {
 		pmd_ERROR(*dir);
 		pmd_clear(dir);
-		return 0;
+		return count;
 	}
 	
 	pte = pte_offset(dir, address);
@@ -190,28 +149,33 @@
 		end = pmd_end;
 
 	do {
-		int result;
-		mm->swap_address = address + PAGE_SIZE;
-		result = try_to_swap_out(mm, vma, address, pte, gfp_mask);
-		if (result)
-			return result;
+		if (pte_present(*pte)) {
+			struct page *page = pte_page(*pte);
+
+			if (VALID_PAGE(page) && !PageReserved(page)) {
+				try_to_swap_out(mm, vma, address, pte, page);
+				if (!--count)
+					break;
+			}
+		}
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
-	return 0;
+	mm->swap_address = address + PAGE_SIZE;
+	return count;
 }
 
-static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int gfp_mask)
+static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count)
 {
 	pmd_t * pmd;
 	unsigned long pgd_end;
 
 	if (pgd_none(*dir))
-		return 0;
+		return count;
 	if (pgd_bad(*dir)) {
 		pgd_ERROR(*dir);
 		pgd_clear(dir);
-		return 0;
+		return count;
 	}
 
 	pmd = pmd_offset(dir, address);
@@ -221,23 +185,23 @@
 		end = pgd_end;
 	
 	do {
-		int result = swap_out_pmd(mm, vma, pmd, address, end, gfp_mask);
-		if (result)
-			return result;
+		count = swap_out_pmd(mm, vma, pmd, address, end, count);
+		if (!count)
+			break;
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
-	return 0;
+	return count;
 }
 
-static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int gfp_mask)
+static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count)
 {
 	pgd_t *pgdir;
 	unsigned long end;
 
 	/* Don't swap out areas which are locked down */
 	if (vma->vm_flags & (VM_LOCKED|VM_RESERVED))
-		return 0;
+		return count;
 
 	pgdir = pgd_offset(mm, address);
 
@@ -245,18 +209,17 @@
 	if (address >= end)
 		BUG();
 	do {
-		int result = swap_out_pgd(mm, vma, pgdir, address, end, gfp_mask);
-		if (result)
-			return result;
+		count = swap_out_pgd(mm, vma, pgdir, address, end, count);
+		if (!count)
+			break;
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		pgdir++;
 	} while (address && (address < end));
-	return 0;
+	return count;
 }
 
-static int swap_out_mm(struct mm_struct * mm, int gfp_mask)
+static int swap_out_mm(struct mm_struct * mm, int count)
 {
-	int result = 0;
 	unsigned long address;
 	struct vm_area_struct* vma;
 
@@ -276,8 +239,8 @@
 			address = vma->vm_start;
 
 		for (;;) {
-			result = swap_out_vma(mm, vma, address, gfp_mask);
-			if (result)
+			count = swap_out_vma(mm, vma, address, count);
+			if (!count)
 				goto out_unlock;
 			vma = vma->vm_next;
 			if (!vma)
@@ -287,94 +250,63 @@
 	}
 	/* Reset to 0 when we reach the end of address space */
 	mm->swap_address = 0;
-	mm->swap_cnt = 0;
 
 out_unlock:
 	spin_unlock(&mm->page_table_lock);
-	return result;
+	return !count;
 }
 
 /*
- * Select the task with maximal swap_cnt and try to swap out a page.
  * N.B. This function returns only 0 or 1.  Return values != 1 from
  * the lower level routines result in continued processing.
  */
 #define SWAP_SHIFT 5
 #define SWAP_MIN 8
 
+static inline int swap_amount(struct mm_struct *mm)
+{
+	int nr = mm->rss >> SWAP_SHIFT;
+	return nr < SWAP_MIN ? SWAP_MIN : nr;
+}
+
 static int swap_out(unsigned int priority, int gfp_mask)
 {
 	int counter;
-	int __ret = 0;
+	int retval = 0;
+	struct mm_struct *mm = current->mm;
 
-	/* 
-	 * We make one or two passes through the task list, indexed by 
-	 * assign = {0, 1}:
-	 *   Pass 1: select the swappable task with maximal RSS that has
-	 *         not yet been swapped out. 
-	 *   Pass 2: re-assign rss swap_cnt values, then select as above.
-	 *
-	 * With this approach, there's no need to remember the last task
-	 * swapped out.  If the swap-out fails, we clear swap_cnt so the 
-	 * task won't be selected again until all others have been tried.
-	 *
-	 * Think of swap_cnt as a "shadow rss" - it tells us which process
-	 * we want to page out (always try largest first).
-	 */
-	counter = (nr_threads << SWAP_SHIFT) >> priority;
-	if (counter < 1)
-		counter = 1;
+	/* Always start by trying to penalize the process that is allocating memory */
+	if (mm)
+		retval = swap_out_mm(mm, swap_amount(mm));
 
-	for (; counter >= 0; counter--) {
+	/* Then, look at the other mm's */
+	counter = mmlist_nr >> priority;
+	do {
 		struct list_head *p;
-		unsigned long max_cnt = 0;
-		struct mm_struct *best = NULL;
-		int assign = 0;
-		int found_task = 0;
-	select:
+
 		spin_lock(&mmlist_lock);
 		p = init_mm.mmlist.next;
-		for (; p != &init_mm.mmlist; p = p->next) {
-			struct mm_struct *mm = list_entry(p, struct mm_struct, mmlist);
-	 		if (mm->rss <= 0)
-				continue;
-			found_task++;
-			/* Refresh swap_cnt? */
-			if (assign == 1) {
-				mm->swap_cnt = (mm->rss >> SWAP_SHIFT);
-				if (mm->swap_cnt < SWAP_MIN)
-					mm->swap_cnt = SWAP_MIN;
-			}
-			if (mm->swap_cnt > max_cnt) {
-				max_cnt = mm->swap_cnt;
-				best = mm;
-			}
-		}
+		if (p == &init_mm.mmlist)
+			goto empty;
+
+		/* Move it to the back of the queue.. */
+		list_del(p);
+		list_add_tail(p, &init_mm.mmlist);
+		mm = list_entry(p, struct mm_struct, mmlist);
 
-		/* Make sure it doesn't disappear */
-		if (best)
-			atomic_inc(&best->mm_users);
+		/* Make sure the mm doesn't disappear when we drop the lock.. */
+		atomic_inc(&mm->mm_users);
 		spin_unlock(&mmlist_lock);
 
-		/*
-		 * We have dropped the tasklist_lock, but we
-		 * know that "mm" still exists: we are running
-		 * with the big kernel lock, and exit_mm()
-		 * cannot race with us.
-		 */
-		if (!best) {
-			if (!assign && found_task > 0) {
-				assign = 1;
-				goto select;
-			}
-			break;
-		} else {
-			__ret = swap_out_mm(best, gfp_mask);
-			mmput(best);
-			break;
-		}
-	}
-	return __ret;
+		/* Walk about 6% of the address space each time */
+		retval |= swap_out_mm(mm, swap_amount(mm));
+		mmput(mm);
+	} while (--counter >= 0);
+	return retval;
+
+empty:
+	spin_unlock(&mmlist_lock);
+	return 0;
 }
 
 
@@ -540,7 +472,6 @@
 		 */
 		if (PageDirty(page)) {
 			int (*writepage)(struct page *) = page->mapping->a_ops->writepage;
-			int result;
 
 			if (!writepage)
 				goto page_active;
@@ -558,16 +489,12 @@
 			page_cache_get(page);
 			spin_unlock(&pagemap_lru_lock);
 
-			result = writepage(page);
+			writepage(page);
 			page_cache_release(page);
 
 			/* And re-start the thing.. */
 			spin_lock(&pagemap_lru_lock);
-			if (result != 1)
-				continue;
-			/* writepage refused to do anything */
-			set_page_dirty(page);
-			goto page_active;
+			continue;
 		}
 
 		/*
@@ -808,6 +735,9 @@
 int inactive_shortage(void)
 {
 	int shortage = 0;
+	pg_data_t *pgdat = pgdat_list;
+
+	/* Is the inactive dirty list too small? */
 
 	shortage += freepages.high;
 	shortage += inactive_target;
@@ -818,7 +748,27 @@
 	if (shortage > 0)
 		return shortage;
 
-	return 0;
+	/* If not, do we have enough per-zone pages on the inactive list? */
+
+	shortage = 0;
+
+	do {
+		int i;
+		for(i = 0; i < MAX_NR_ZONES; i++) {
+			int zone_shortage;
+			zone_t *zone = pgdat->node_zones+ i;
+
+			zone_shortage = zone->pages_high;
+			zone_shortage -= zone->inactive_dirty_pages;
+			zone_shortage -= zone->inactive_clean_pages;
+			zone_shortage -= zone->free_pages;
+			if (zone_shortage > 0)
+				shortage += zone_shortage;
+		}
+		pgdat = pgdat->node_next;
+	} while (pgdat);
+
+	return shortage;
 }
 
 /*
@@ -833,72 +783,35 @@
  * really care about latency. In that case we don't try
  * to free too many pages.
  */
+#define DEF_PRIORITY (6)
 static int refill_inactive(unsigned int gfp_mask, int user)
 {
-	int priority, count, start_count, made_progress;
+	int count, start_count, maxtry;
 
 	count = inactive_shortage() + free_shortage();
 	if (user)
 		count = (1 << page_cluster);
 	start_count = count;
 
-	/* Always trim SLAB caches when memory gets low. */
-	kmem_cache_reap(gfp_mask);
-
-	priority = 6;
+	maxtry = 6;
 	do {
-		made_progress = 0;
-
 		if (current->need_resched) {
 			__set_current_state(TASK_RUNNING);
 			schedule();
 		}
 
-		while (refill_inactive_scan(priority, 1)) {
-			made_progress = 1;
+		while (refill_inactive_scan(DEF_PRIORITY, 1)) {
 			if (--count <= 0)
 				goto done;
 		}
 
-		/*
-		 * don't be too light against the d/i cache since
-	   	 * refill_inactive() almost never fail when there's
-	   	 * really plenty of memory free. 
-		 */
-		shrink_dcache_memory(priority, gfp_mask);
-		shrink_icache_memory(priority, gfp_mask);
+		/* If refill_inactive_scan failed, try to page stuff out.. */
+		swap_out(DEF_PRIORITY, gfp_mask);
 
-		/*
-		 * Then, try to page stuff out..
-		 */
-		while (swap_out(priority, gfp_mask)) {
-			made_progress = 1;
-			if (--count <= 0)
-				goto done;
-		}
-
-		/*
-		 * If we either have enough free memory, or if
-		 * page_launder() will be able to make enough
-		 * free memory, then stop.
-		 */
-		if (!inactive_shortage() || !free_shortage())
-			goto done;
-
-		/*
-		 * Only switch to a lower "priority" if we
-		 * didn't make any useful progress in the
-		 * last loop.
-		 */
-		if (!made_progress)
-			priority--;
-	} while (priority >= 0);
-
-	/* Always end on a refill_inactive.., may sleep... */
-	while (refill_inactive_scan(0, 1)) {
-		if (--count <= 0)
-			goto done;
-	}
+		if (--maxtry <= 0)
+				return 0;
+		
+	} while (inactive_shortage());
 
 done:
 	return (count < start_count);
@@ -922,20 +835,29 @@
 
 	/*
 	 * If needed, we move pages from the active list
-	 * to the inactive list. We also "eat" pages from
-	 * the inode and dentry cache whenever we do this.
+	 * to the inactive list.
 	 */
-	if (free_shortage() || inactive_shortage()) {
-		shrink_dcache_memory(6, gfp_mask);
-		shrink_icache_memory(6, gfp_mask);
+	if (inactive_shortage())
 		ret += refill_inactive(gfp_mask, user);
+
+	/* 	
+	 * Delete pages from the inode and dentry caches and 
+	 * reclaim unused slab cache if memory is low.
+	 */
+	if (free_shortage()) {
+		shrink_dcache_memory(DEF_PRIORITY, gfp_mask);
+		shrink_icache_memory(DEF_PRIORITY, gfp_mask);
 	} else {
 		/*
-		 * Reclaim unused slab cache memory.
+		 * Illogical, but true. At least for now.
+		 *
+		 * If we're _not_ under shortage any more, we
+		 * reap the caches. Why? Because a noticeable
+		 * part of the caches are the buffer-heads, 
+		 * which we'll want to keep if under shortage.
 		 */
 		kmem_cache_reap(gfp_mask);
-		ret = 1;
-	}
+	} 
 
 	return ret;
 }
@@ -988,13 +910,8 @@
 		static int recalc = 0;
 
 		/* If needed, try to free some memory. */
-		if (inactive_shortage() || free_shortage()) {
-			int wait = 0;
-			/* Do we need to do some synchronous flushing? */
-			if (waitqueue_active(&kswapd_done))
-				wait = 1;
-			do_try_to_free_pages(GFP_KSWAPD, wait);
-		}
+		if (inactive_shortage() || free_shortage()) 
+			do_try_to_free_pages(GFP_KSWAPD, 0);
 
 		/*
 		 * Do some (very minimal) background scanning. This
@@ -1002,7 +919,7 @@
 		 * every minute. This clears old referenced bits
 		 * and moves unused pages to the inactive list.
 		 */
-		refill_inactive_scan(6, 0);
+		refill_inactive_scan(DEF_PRIORITY, 0);
 
 		/* Once a second, recalculate some VM stats. */
 		if (time_after(jiffies, recalc + HZ)) {
@@ -1010,11 +927,6 @@
 			recalculate_vm_stats();
 		}
 
-		/*
-		 * Wake up everybody waiting for free memory
-		 * and unplug the disk queue.
-		 */
-		wake_up_all(&kswapd_done);
 		run_task_queue(&tq_disk);
 
 		/* 
@@ -1045,33 +957,10 @@
 	}
 }
 
-void wakeup_kswapd(int block)
+void wakeup_kswapd(void)
 {
-	DECLARE_WAITQUEUE(wait, current);
-
-	if (current == kswapd_task)
-		return;
-
-	if (!block) {
-		if (waitqueue_active(&kswapd_wait))
-			wake_up(&kswapd_wait);
-		return;
-	}
-
-	/*
-	 * Kswapd could wake us up before we get a chance
-	 * to sleep, so we have to be very careful here to
-	 * prevent SMP races...
-	 */
-	__set_current_state(TASK_UNINTERRUPTIBLE);
-	add_wait_queue(&kswapd_done, &wait);
-
-	if (waitqueue_active(&kswapd_wait))
-		wake_up(&kswapd_wait);
-	schedule();
-
-	remove_wait_queue(&kswapd_done, &wait);
-	__set_current_state(TASK_RUNNING);
+	if (current != kswapd_task)
+		wake_up_process(kswapd_task);
 }
 
 /*
@@ -1096,7 +985,7 @@
 /*
  * Kreclaimd will move pages from the inactive_clean list to the
  * free list, in order to keep atomic allocations possible under
- * all circumstances. Even when kswapd is blocked on IO.
+ * all circumstances.
  */
 int kreclaimd(void *unused)
 {
diff -u --recursive --new-file v2.4.0/linux/net/ipv4/igmp.c linux/net/ipv4/igmp.c
--- v2.4.0/linux/net/ipv4/igmp.c	Thu Sep  7 08:32:01 2000
+++ linux/net/ipv4/igmp.c	Tue Jan  9 10:54:57 2001
@@ -504,8 +504,8 @@
 	im->timer.function=&igmp_timer_expire;
 	im->unsolicit_count = IGMP_Unsolicited_Report_Count;
 	im->reporter = 0;
-	im->loaded = 0;
 #endif
+	im->loaded = 0;
 	write_lock_bh(&in_dev->lock);
 	im->next=in_dev->mc_list;
 	in_dev->mc_list=im;
diff -u --recursive --new-file v2.4.0/linux/net/ipv4/tcp.c linux/net/ipv4/tcp.c
--- v2.4.0/linux/net/ipv4/tcp.c	Tue Nov 28 21:53:45 2000
+++ linux/net/ipv4/tcp.c	Wed Jan 10 14:12:12 2001
@@ -954,7 +954,7 @@
 			 */
 			skb = sk->write_queue.prev;
 			if (tp->send_head &&
-			    (mss_now - skb->len) > 0) {
+			    (mss_now > skb->len)) {
 				copy = skb->len;
 				if (skb_tailroom(skb) > 0) {
 					int last_byte_was_odd = (copy % 4);
diff -u --recursive --new-file v2.4.0/linux/net/ipv4/tcp_input.c linux/net/ipv4/tcp_input.c
--- v2.4.0/linux/net/ipv4/tcp_input.c	Fri Dec 29 14:07:24 2000
+++ linux/net/ipv4/tcp_input.c	Wed Jan 10 14:12:12 2001
@@ -1705,7 +1705,7 @@
 
 		if ((__s32)when < (__s32)tp->rttvar)
 			when = tp->rttvar;
-		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, when);
+		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, min(when, TCP_RTO_MAX));
 	}
 }
 
diff -u --recursive --new-file v2.4.0/linux/net/sunrpc/sunrpc_syms.c linux/net/sunrpc/sunrpc_syms.c
--- v2.4.0/linux/net/sunrpc/sunrpc_syms.c	Fri Apr 21 16:08:52 2000
+++ linux/net/sunrpc/sunrpc_syms.c	Thu Jan 11 15:53:02 2001
@@ -36,6 +36,7 @@
 EXPORT_SYMBOL(rpciod_up);
 EXPORT_SYMBOL(rpc_new_task);
 EXPORT_SYMBOL(rpc_wake_up_status);
+EXPORT_SYMBOL(rpc_release_task);
 
 /* RPC client functions */
 EXPORT_SYMBOL(rpc_create_client);
diff -u --recursive --new-file v2.4.0/linux/scripts/checkconfig.pl linux/scripts/checkconfig.pl
--- v2.4.0/linux/scripts/checkconfig.pl	Tue Aug 31 09:33:09 1999
+++ linux/scripts/checkconfig.pl	Mon Jan 15 15:31:19 2001
@@ -14,6 +14,7 @@
 
     # Initialize variables.
     my $fInComment   = 0;
+    my $fInString    = 0;
     my $fUseConfig   = 0;
     my $iLinuxConfig = 0;
     my %configList   = ();
@@ -23,6 +24,10 @@
 	# Strip comments.
 	$fInComment && (s+^.*?\*/+ +o ? ($fInComment = 0) : next);
 	m+/\*+o && (s+/\*.*?\*/+ +go, (s+/\*.*$+ +o && ($fInComment = 1)));
+
+	# Strip strings.
+	$fInString && (s+^.*?"+ +o ? ($fInString = 0) : next);
+	m+"+o && (s+".*?"+ +go, (s+".*$+ +o && ($fInString = 1)));
 
 	# Pick up definitions.
 	if ( m/^\s*#/o )