diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/CREDITS linuxppc64_2_4/CREDITS
--- ../kernel.org/linux-2.4.19/CREDITS	Fri Apr 19 11:00:43 2002
+++ linuxppc64_2_4/CREDITS	Mon Apr 22 10:35:08 2002
@@ -986,6 +986,14 @@
 S: 80050-430 - Curitiba - Paraná
 S: Brazil
 
+N: Tom Gall
+E: tom_gall@vnet.ibm.com
+E: tgall@rochcivictheatre.org
+D: ppc64, ppc
+S: 710 Walnut St
+S: Mantorville, MN 55955
+S: USA
+
 N: Nigel Gamble
 E: nigel@nrg.org
 E: nigel@sgi.com
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/Documentation/Configure.help linuxppc64_2_4/Documentation/Configure.help
--- ../kernel.org/linux-2.4.19/Documentation/Configure.help	Mon Apr 22 11:34:25 2002
+++ linuxppc64_2_4/Documentation/Configure.help	Tue Apr 23 09:37:22 2002
@@ -232,6 +232,13 @@
   CPU and the single-board computers built around it, targeted for
   network and embedded applications.  For more information see the
   Axis Communication site, <http://developer.axis.com/>.
+PowerPC64 processor
+CONFIG_PPC64
+  The PowerPC architecture was designed for both 32 bit and 64 bit
+  processor implementations. 64 bit PowerPC processors are in many
+  ways a superset of their 32 bit PowerPC cousins. Each 64 bit PowerPC
+  processor also has a 32 bit mode to allow for 32 bit compatibility.
+  The home of the PowerPC 64 Linux project is at <http://linuxppc64.org>
 
 Multiquad support for NUMA systems
 CONFIG_MULTIQUAD
@@ -15232,6 +15239,20 @@
   hard drives and ADFS-formatted floppy disks. This is experimental
   codes, so if you're unsure, say N.
 
+JFS filesystem support
+CONFIG_JFS_FS
+  This is a port of IBM's Journaled Filesystem .  More information is
+  available in the file Documentation/filesystems/jfs.txt.
+
+  If you do not intend to use the JFS filesystem, say N.
+
+JFS Debugging
+CONFIG_JFS_DEBUG
+  If you are experiencing any problems with the JFS filesystem, say
+  Y here.  This will result in additional debugging messages to be
+  written to the system log.  Under normal circumstances, this
+  results in very little overhead.
+
 /dev/pts file system for Unix98 PTYs
 CONFIG_DEVPTS_FS
   You should say Y here if you said Y to "Unix98 PTY support" above.
@@ -16401,6 +16422,19 @@
   <file:Documentation/modules.txt>.  The module will be called
   isicom.o.
 
+IBM Multiport Serial Adapter
+CONFIG_ICOM
+   This driver is for a family of multiport serial adapters including
+   2 port RVX (iSeries  2745),  2 port modem (iSeries 
+   2772) and  1 port RVX + 1 port modem (iSeries 2771).  The
+  module is called iCom.o
+CONFIG_ICOM_MODEM_CC
+  This field entry enables the device driver to configure the modem
+  for appropriate operations based on country code.  If you do not
+  have an internal modem card then a blank entry is recommended.
+  If you do  have an internal modem card, look for the comment in iCom.c
+  indicating which value relates to your country.
+
 Unix98 PTY support
 CONFIG_UNIX98_PTYS
   A pseudo terminal (PTY) is a software device consisting of two
@@ -21073,6 +21107,12 @@
   Select APUS if configuring for a PowerUP Amiga.
   More information is available at:
   <http://linux-apus.sourceforge.net/>.
+# Choice: i or p
+Platform support
+CONFIG_PPC_ISERIES
+  Linux runs on certain models of the IBM AS/400, now known as the 
+  IBM iSeries. Generally if you can run LPAR (Logical Partitioning)
+  on your iSeries you can run Linux in a partition on your machine.
 
 AltiVec kernel support
 CONFIG_ALTIVEC
@@ -21136,6 +21176,16 @@
   You may also want to compile the dma sound driver as a module and
   have it autoloaded. The act of removing the module shuts down the
   sound hardware for more power savings.
+Platform support
+CONFIG_PPC_PSERIES
+  Linux runs on most models of IBM pSeries hardware. (pSeries used
+  to be known as the RS/6000)
+  
+  See <http://linuxppc64.org> for exact model information for the 
+  64 bit PowerPC kernel.
+  
+  pSeries Linux information from IBM can be found at:
+  <http://www.ibm.com/servers/eserver/pseries/linux/>
 
 APM emulation
 CONFIG_PMAC_APM_EMU
@@ -21424,6 +21474,12 @@
     Date of Release: early 2001 (?)
     End of life: -
     URL: <http://www.microsys.de/html/cu824.html>
+Support for Large Memory
+CONFIG_MSCHUNKS
+  MsChunks stands for Main Store Chunks and specifically allows the 
+  64 bit PowerPC Linux kernel to optimize for machines with sparse 
+  discontiguous memory. iSeries kernels need to have this on. 
+  It is recommended that for pSeries hardware that you answer N.
 
 ADB raw keycode support
 CONFIG_MAC_ADBKEYCODES
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/Documentation/cachetlb.txt linuxppc64_2_4/Documentation/cachetlb.txt
--- ../kernel.org/linux-2.4.19/Documentation/cachetlb.txt	Fri Apr 19 11:00:11 2002
+++ linuxppc64_2_4/Documentation/cachetlb.txt	Mon Apr 22 10:24:05 2002
@@ -260,8 +260,9 @@
 
 Here is the new interface:
 
-  void copy_user_page(void *to, void *from, unsigned long address)
-  void clear_user_page(void *to, unsigned long address)
+  void copy_user_page(struct page *to, struct page *from,
+		      unsigned long address)
+  void clear_user_page(struct page *to, unsigned long address)
 
 	These two routines store data in user anonymous or COW
 	pages.  It allows a port to efficiently avoid D-cache alias
@@ -279,6 +280,11 @@
 
 	If D-cache aliasing is not an issue, these two routines may
 	simply call memcpy/memset directly and do nothing more.
+
+	There are default versions of these procedures supplied in
+	include/linux/highmem.h.  If a port does not want to use the
+	default versions it should declare them and define the symbol
+	__HAVE_ARCH_USER_PAGE in include/asm/page.h.
 
   void flush_dcache_page(struct page *page)
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/Documentation/filesystems/00-INDEX linuxppc64_2_4/Documentation/filesystems/00-INDEX
--- ../kernel.org/linux-2.4.19/Documentation/filesystems/00-INDEX	Fri Apr 19 10:30:50 2002
+++ linuxppc64_2_4/Documentation/filesystems/00-INDEX	Thu Sep 13 14:29:38 2001
@@ -22,6 +22,8 @@
 	- info and mount options for the OS/2 HPFS.
 isofs.txt
 	- info and mount options for the ISO 9660 (CDROM) filesystem.
+jfs.txt
+	- info and mount options for the JFS filesystem.
 ncpfs.txt
 	- info on Novell Netware(tm) filesystem using NCP protocol.
 ntfs.txt
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/Documentation/filesystems/changelog.jfs linuxppc64_2_4/Documentation/filesystems/changelog.jfs
--- ../kernel.org/linux-2.4.19/Documentation/filesystems/changelog.jfs	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/Documentation/filesystems/changelog.jfs	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,234 @@
+IBM's Journaled File System (JFS) for Linux version 1.0.17
+Team members
+Steve Best        sbest@us.ibm.com
+Dave Kleikamp     shaggy@austin.ibm.com  
+Barry Arndt       barndt@us.ibm.com
+Christoph Hellwig hch@infradead.org
+
+
+Release April 2, 2002 (version 1.0.17)
+
+This is our fifty-fifth release of IBM's Enterprise JFS technology port to Linux.
+Beta 1 was release 0.1.0 on 12/8/2000, Beta 2 was release 0.2.0 on 3/7/2001, 
+Beta 3 was release 0.3.0 on 4/30/2001, and release 1.0.0 on 6/28/2001.
+
+
+Function and Fixes in drop 55 (1.0.17)
+   - Call sb_set_blocksize instead of set_blocksize in 2.5 (Christoph Hellwig)
+   - Replace strtok by strsep (Christoph Hellwig)
+   - Store entire device number in log superblock rather than just the minor.
+   - Include file clean (Christoph Hellwig)
+   - Fix race introduced by thread handling cleanups (Christoph Hellwig)
+   - Detect dtree corruption to avoid infinite loop
+   - JFS needs to include completion.h
+   - Support external log(journal) device file system work part 1 (Christoph Hellwig)
+
+Function and Fixes in drop 54 (1.0.16)
+   - Limit readdir offset to signed integer for NFSv2 (Christoph Hellwig)
+   - missing static in jfs_imap.c (Christoph Hellwig)
+   - Fix infinite loop in jfs_readdir
+     weren't updating the directory index table completely (bug # 2591)
+   - Sync up 2.4 tree with 2.5 -- (Christoph Hellwig & Shaggy)
+     move to completions, provide back-compact for pre-2.4.7
+     remove dead code
+     add kdev_t conversion, that should have been in 2.4 anyway
+     move one-time inode initialization into slab constructor
+   - Remove non-core files from CVS
+
+Function and Fixes in drop 53 (1.0.15)
+   - Fix trap when appending to very large file
+   - Moving jfs headers into fs/jfs at Linus' request
+   - Move up to linux-2.5.4
+   - Fix file size limit on 32-bit (Andi Kleen)
+   - make changelog more read-able and include only 1.0.0 and above (Christoph Hellwig)
+   - Don't allocate metadata pages from high memory. JFS keeps them kmapped too long causing deadlock.
+   - Fix xtree corruption when creating file with >= 64 GB of physically contiguous dasd
+   - Replace semaphore with struct completion for thread startup/shutdown (Benedikt Spranger)
+   - cleanup Tx alloc/free (Christoph Hellwig)
+   - Move up to linux-2.5.3
+   - thread cleanups (Christoph Hellwig)
+   - First step toward making tblocks and tlocks dynamically allocated. Intro tid_t and lid_t to
+     insulate the majority of the code from future changes. Also hide TxBlock and TxLock arrays
+     by using macros to get from tids and lids to real structures.
+   - minor list-handling cleanup (Christoph Hellwig)
+   - Replace altnext and altprev with struct list_head
+   - Clean up the debugging code and add support for collecting statistics (Christoph Hellwig)
+   
+  
+Function and Fixes in drop 52 (1.0.14)
+   - Fix hang in invalidate_metapages when jfs.o is built as a module
+   - Fix anon_list removal logic in txLock
+
+Function and Fixes in drop 51 (1.0.13)
+   - chmod changes on newly created directories are lost after umount (bug 2535)
+   - Page locking race fixes
+   - Improve metapage locking
+   - Fix timing window. Lock page while metapage is active to avoid page going 
+     away before the metadata is released. (Fixed crash during mount/umount testing)
+   - Make changes for 2.5.2 kernel
+   - Fix race condition truncating large files
+         
+Function and Fixes in drop50 (1.0.12)
+   - Add O_DIRECT support
+   - Add support for 2.4.17 kernel
+   - Make sure COMMIT_STALE gets reset before the inode is unlocked. Fixing
+     this gets rid of XT_GETPAGE errors
+   - Remove invalid __exit keyword from metapage_exit and txExit.
+   - fix assert(log->cqueue.head == NULL by waiting longer
+   
+Function and Fixes in drop49 (1.0.11)
+   - Readdir was not handling multibyte codepages correctly.
+   - Make mount option parsing more robust.
+   - Add iocharset mount option.
+   - Journalling of symlinks incorrect, resulting in logredo failure of -265.
+   - Add jfsutils information to Changes file
+   - Improve recoverability of the file system when metadata corruption is detected.
+   - Fix kernel OOPS when root inode is corrupted 
+   
+Function and Fixes in drop48 (1.0.10)
+   - put inodes later on hash queues
+   - Fix boundary case in xtTruncate
+   - When invalidating metadata, try to flush the dirty buffers rather than sync them.
+   - Add another sanity check to avoid trapping when imap is corrupt
+   - Fix file truncate while removing large file (assert(cmp == 0))
+   - read_cache_page returns ERR_PTR, not NULL on error
+   - Add dtSearchNode and dtRelocate
+   - JFS needs to use generic_file_open & generic_file_llseek
+   - Remove lazyQwait, etc. It created an unnecessary bottleneck in TxBegin.
+
+Function and Fixes in drop47 (1.0.9)
+   - Fix data corruption problem when creating files while deleting others. (jitterbug 183)
+   - Make sure all metadata is written before finalizing the log
+   - Fix serialization problem in shutdown by setting i_size of directory sooner. (bugzilla #334)
+   - JFS should quit whining when special files are marked dirty during read-only mount.
+   - Must always check rc after DT_GETPAGE
+   - Add diExtendFS
+   - Removing defconfig form JFS source - not really needed
+   
+Function and Fixes in drop46 (1.0.8)
+   - Synclist was being built backwards causing logredo to quit too early
+   - jfs_compat.h needs to include module.h
+   - uncomment EXPORTS_NO_SYMBOLS in super.c
+   - Minor code cleanup
+   - xtree of zero-truncated file not being logged
+   - Fix logging on file truncate
+   - remove unused metapage fields
+
+Function and Fixes in drop45 (1.0.7)
+   - cleanup remove IS_KIOBUFIO define.
+   - cleanup remove TRUNC_NO_TOSS define. 
+   - have jFYI's use the name directly from dentry  
+   - Remove nul _ALLOC and _FREE macros and also make spinlocks static. 
+   - cleanup add externs where needed in the header files  
+   - jfs_write_inode is a bad place to call iput.  Also limit warnings.
+   - More truncate cleanup 
+   - Truncate cleanup 
+   - Add missing statics in jfs_metapage.c 
+   - fsync fixes   
+   - Clean up symlink code - use page_symlink_inode_operations 
+   - unicode handling cleanup   
+   - cleanup replace UniChar with wchar_t
+   - Get rid of CDLL_* macros - use list.h instead 
+   - 2.4.11-prex mount problem Call new_inode instead of get_empty_inode 
+   - use kernel min/max macros 
+   - Add MODULE_LICENSE stub for older kernels 
+   - IA64/gcc3 fixes 
+   - Log Manager fixes, introduce __SLEEP_COND macro 
+   - Mark superblock dirty when some errors detected (forcing fsck to be run).
+   - More robust remounting from r/o to r/w. 
+   - Misc. cleanup add static where appropriate 
+   - small cleanup in jfs_umount_rw 
+   - add MODULE_ stuff 
+   - Set *dropped_lock in alloc_metapage 
+   - Get rid of unused log list 
+   - cleanup jfs_imap.c to remove _OLD_STUFF and _NO_MORE_MOUNT_INODE defines 
+   - Log manager cleanup  
+   - Transaction manager cleanup 
+   - correct memory allocations flags 
+   - Better handling of iterative truncation
+   - Change continue to break, otherwise we don't re-acquire LAZY_LOCK
+
+Function and Fixes in drop44 (1.0.6)
+   - Create jfs_incore.h which merges linux/jfs_fs.h, linux/jfs_fs_i.h, and jfs_fs_sb.h
+   - Create a configuration option to handle JFS_DEBUG define
+   - Fixed a few cases where positive error codes were returned to the VFS.
+   - Replace jfs_dir_read by generic_read_dir.
+   - jfs_fsync_inode is only called by jfs_fsync_file, merge the two and rename to jfs_fsync.
+   - Add a bunch of missing externs.
+   - jfs_rwlock_lock is unused, nuke it.
+   - Always use atomic set/test_bit operations to protect jfs_ip->cflag 
+   - Combine jfs_ip->flag with jfs_ip->cflag
+   - Fixed minor format errors reported by fsck 
+   - cflags should be long so bitops always works correctly
+   - Use GFP_NOFS for runtime memory allocations 
+   - Support  VM changes in 2.4.10 of the kernel
+   - Remove ifdefs supporting older 2.4 kernels. JFS now requires at least 2.4.3 or 2.4.2-ac2
+   - Simplify and remove one use of IWRITE_TRYLOCK
+   - jfs_truncate was not passing tid to xtTruncate
+   - removed obsolete extent_page workaround
+   - correct recovery from failed diAlloc call (disk full)
+   - In write_metapage, don't call commit_write if prepare_write failed   
+   
+Function and Fixes in drop43 (1.0.5)
+   - Allow separate allocation of JFS-private superblock/inode data.
+   - Remove checks in namei.c that are already done by the VFS.
+   - Remove redundant mutex defines.
+   - Replace all occurrences of #include <linux/malloc.h> with #include <linux/slab.h>
+   - Work around race condition in remount -fixes OOPS during shutdown
+   - Truncate large files incrementally ( affects directories too)
+
+Function and Fixes in drop42 (1.0.4)
+   - Fixed compiler warnings in the FS when building on 64 bits systems
+   - Fixed deadlock where jfsCommit hung in hold_metapage
+   - Fixed problems with remount
+   - Reserve metapages for jfsCommit thread 
+   - Get rid of buggy invalidate_metapage & use discard_metapage 
+   - Don't hand metapages to jfsIOthread (too many context switches) (jitterbug 125, bugzilla 238)
+   - Fix error message in jfs_strtoUCS
+
+Function and Fixes in drop41 (1.0.3)
+   - Patch to move from previous release to latest release needs to update the version number in super.c 
+   - Jitterbug problems (134,140,152) removing files have been fixed
+   - Set rc=ENOSPC if ialloc fails in jfs_create and jfs_mkdir
+   - Fixed jfs_txnmgr.c 775! assert
+   - Fixed jfs_txnmgr.c 884! assert(mp->nohomeok==0)
+   - Fix hang - prevent tblocks from being exhausted
+   - Fix oops trying to mount reiserfs
+   - Fail more gracefully in jfs_imap.c
+   - Print more information when char2uni fails
+   - Fix timing problem between Block map and metapage cache - jitterbug 139
+   - Code Cleanup (removed many ifdef's, obsolete code, ran code through indent) Mostly 2.4 tree
+   - Split source tree (Now have a separate source tree for 2.2, 2.4, and jfsutils)  
+
+Function and Fixes in drop40 (1.0.2)
+   - Fixed multiple truncate hang
+   - Fixed hang on unlink a file and sync happening at the same time
+   - Improved handling of kmalloc error conditions
+   - Fixed hang in blk_get_queue and SMP deadlock: bh_end_io call generic_make_request
+     (jitterbug 145 and 146)
+   - stbl was not set correctly set in dtDelete  
+   - changed trap to printk in dbAllocAG to avoid system hang
+
+Function and Fixes in drop 39 (1.0.1)
+   - Fixed hang during copying files on 2.2.x series
+   - Fixed TxLock compile problem
+   - Fixed to correctly update the number of blocks for directories (this was causing the FS 
+     to show fsck error after compiling mozilla).
+   - Fixed to prevent old data from being written to disk from the page cache. 
+
+Function and Fixes in drop 38 (1.0.0)
+   - Fixed some general log problems   
+
+Please send bugs, comments, cards and letters to linuxjfs@us.ibm.com.
+
+The JFS mailing list can be subscribed to by using the link labeled "Mail list Subscribe"
+at our web page http://oss.software.ibm.com/jfs/.
+
+
+
+
+
+
+
+
+
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/Documentation/filesystems/jfs.txt linuxppc64_2_4/Documentation/filesystems/jfs.txt
--- ../kernel.org/linux-2.4.19/Documentation/filesystems/jfs.txt	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/Documentation/filesystems/jfs.txt	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,10 @@
+IBM's Journaled File System (JFS) for Linux
+
+The JFS utilities can be found at the JFS homepage at
+http://oss.software.ibm.com/jfs
+
+Team members
+Steve Best         sbest@us.ibm.com
+Dave Kleikamp      shaggy@austin.ibm.com  
+Barry Arndt        barndt@us.ibm.com
+Christoph Hellwig  hch@infradead.org
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/MAINTAINERS linuxppc64_2_4/MAINTAINERS
--- ../kernel.org/linux-2.4.19/MAINTAINERS	Mon Apr 22 11:34:25 2002
+++ linuxppc64_2_4/MAINTAINERS	Tue Apr 23 09:37:19 2002
@@ -852,6 +852,13 @@
 W:	http://sources.redhat.com/jffs2/
 S:	Maintained
 
+JFS FILESYSTEM
+P:	Dave Kleikamp
+M:	shaggy@austin.ibm.com
+L:	jfs-discussion@oss.software.ibm.com
+W:	http://oss.software.ibm.com/developerworks/opensource/jfs/
+S:	Supported
+
 JOYSTICK DRIVER
 P:	Vojtech Pavlik
 M:	vojtech@suse.cz
@@ -925,6 +932,13 @@
 W:	http://www.linuxppc.org/
 L:	linuxppc-dev@lists.linuxppc.org
 S:	Maintained
+
+LINUX FOR 64BIT POWERPC
+P:	David Engebretsen
+M:	engebret@us.ibm.com
+W:	http://linuxppc64.org
+L:	linuxppc64-dev@lists.linuxppc.org
+S:	Supported
 
 LINUX FOR 64BIT POWERPC
 P:	David Engebretsen
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/Makefile linuxppc64_2_4/Makefile
--- ../kernel.org/linux-2.4.19/Makefile	Mon Apr 22 11:34:25 2002
+++ linuxppc64_2_4/Makefile	Tue Apr 23 09:37:19 2002
@@ -5,7 +5,8 @@
 
 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
 
-ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)
+#ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)
+ARCH := ppc64
 KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//g")
 
 CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
@@ -19,7 +20,7 @@
 HOSTCC  	= gcc
 HOSTCFLAGS	= -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
 
-CROSS_COMPILE 	=
+CROSS_COMPILE 	= /usr/local/ppc64-current3.0/bin/powerpc64-linux-
 
 #
 # Include the make variables (CC, etc...)
@@ -154,6 +155,7 @@
 DRIVERS-$(CONFIG_SCSI) += drivers/scsi/scsidrv.o
 DRIVERS-$(CONFIG_FUSION_BOOT) += drivers/message/fusion/fusion.o
 DRIVERS-$(CONFIG_IEEE1394) += drivers/ieee1394/ieee1394drv.o
+DRIVERS-$(CONFIG_PPC_ISERIES) += drivers/iseries/iseries.o
 
 ifneq ($(CONFIG_CD_NO_IDESCSI)$(CONFIG_BLK_DEV_IDECD)$(CONFIG_BLK_DEV_SR)$(CONFIG_PARIDE_PCD),)
 DRIVERS-y += drivers/cdrom/driver.o
@@ -170,7 +172,6 @@
 DRIVERS-$(CONFIG_SBUS) += drivers/sbus/sbus_all.o
 DRIVERS-$(CONFIG_ZORRO) += drivers/zorro/driver.o
 DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a
-DRIVERS-$(CONFIG_PPC) += drivers/macintosh/macintosh.o
 DRIVERS-$(CONFIG_MAC) += drivers/macintosh/macintosh.o
 DRIVERS-$(CONFIG_ISAPNP) += drivers/pnp/pnp.o
 DRIVERS-$(CONFIG_SGI_IP22) += drivers/sgi/sgi.a
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/arm/mach-mx1ads/cpu.c linuxppc64_2_4/arch/arm/mach-mx1ads/cpu.c
--- ../kernel.org/linux-2.4.19/arch/arm/mach-mx1ads/cpu.c	Fri Apr 19 11:00:32 2002
+++ linuxppc64_2_4/arch/arm/mach-mx1ads/cpu.c	Mon Apr 22 14:12:27 2002
@@ -3,7 +3,7 @@
  *
  *  Copyright (C) 2001 Deep Blue Solutions Ltd.
  *
- *  $Id: cpu.c,v 1.2 2001/09/22 12:11:17 rmk Exp $
+ *  $Id: cpu.c,v 1.1 2002/04/22 19:12:27 tgall Exp $
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/arm/mach-sa1100/leds-system3.c linuxppc64_2_4/arch/arm/mach-sa1100/leds-system3.c
--- ../kernel.org/linux-2.4.19/arch/arm/mach-sa1100/leds-system3.c	Fri Apr 19 11:00:32 2002
+++ linuxppc64_2_4/arch/arm/mach-sa1100/leds-system3.c	Mon Apr 22 14:12:27 2002
@@ -5,13 +5,21 @@
  *
  * Original (leds-footbridge.c) by Russell King
  *
- * $Id: leds-system3.c,v 1.1.6.1 2001/12/04 15:19:26 seletz Exp $
+ * $Id: leds-system3.c,v 1.1 2002/04/22 19:12:27 tgall Exp $
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
  * $Log: leds-system3.c,v $
+ * Revision 1.1  2002/04/22 19:12:27  tgall
+ * TAG: adds and deletes up to 2.4.19-pre6 now
+ *
+ *
+ * Code Origin : Myself
+ * License     : Same as project
+ * Copyright   : IBM
+ *
  * Revision 1.1.6.1  2001/12/04 15:19:26  seletz
  * - merged from linux_2_4_13_ac5_rmk2
  *
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/arm/mach-sa1100/system3.c linuxppc64_2_4/arch/arm/mach-sa1100/system3.c
--- ../kernel.org/linux-2.4.19/arch/arm/mach-sa1100/system3.c	Fri Apr 19 11:00:32 2002
+++ linuxppc64_2_4/arch/arm/mach-sa1100/system3.c	Mon Apr 22 14:12:27 2002
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2001 Stefan Eletzhofer <stefan.eletzhofer@eletztrick.de>
  *
- * $Id: system3.c,v 1.1.6.1 2001/12/04 17:28:06 seletz Exp $
+ * $Id: system3.c,v 1.1 2002/04/22 19:12:27 tgall Exp $
  *
  * This file contains all PT Sytsem 3 tweaks. Based on original work from
  * Nicolas Pitre's assabet fixes
@@ -13,6 +13,14 @@
  * published by the Free Software Foundation.
  *
  * $Log: system3.c,v $
+ * Revision 1.1  2002/04/22 19:12:27  tgall
+ * TAG: adds and deletes up to 2.4.19-pre6 now
+ *
+ *
+ * Code Origin : Myself
+ * License     : Same as project
+ * Copyright   : IBM
+ *
  * Revision 1.1.6.1  2001/12/04 17:28:06  seletz
  * - merged from previous branch
  *
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ia64/sn/fakeprom/klgraph_init.c linuxppc64_2_4/arch/ia64/sn/fakeprom/klgraph_init.c
--- ../kernel.org/linux-2.4.19/arch/ia64/sn/fakeprom/klgraph_init.c	Fri Apr 19 10:59:47 2002
+++ linuxppc64_2_4/arch/ia64/sn/fakeprom/klgraph_init.c	Mon Apr 22 13:25:20 2002
@@ -1,4 +1,4 @@
-/* $Id: klgraph_init.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: klgraph_init.c,v 1.1 2002/04/22 18:25:20 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ia64/sn/io/ate_utils.c linuxppc64_2_4/arch/ia64/sn/io/ate_utils.c
--- ../kernel.org/linux-2.4.19/arch/ia64/sn/io/ate_utils.c	Fri Apr 19 10:59:47 2002
+++ linuxppc64_2_4/arch/ia64/sn/io/ate_utils.c	Mon Apr 22 13:25:20 2002
@@ -1,4 +1,4 @@
-/* $Id: ate_utils.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: ate_utils.c,v 1.1 2002/04/22 18:25:20 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ia64/sn/io/ifconfig_net.c linuxppc64_2_4/arch/ia64/sn/io/ifconfig_net.c
--- ../kernel.org/linux-2.4.19/arch/ia64/sn/io/ifconfig_net.c	Fri Apr 19 10:59:47 2002
+++ linuxppc64_2_4/arch/ia64/sn/io/ifconfig_net.c	Mon Apr 22 13:25:20 2002
@@ -1,4 +1,4 @@
-/* $Id: ifconfig_net.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: ifconfig_net.c,v 1.1 2002/04/22 18:25:20 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn1/hub_intr.c linuxppc64_2_4/arch/ia64/sn/io/sn1/hub_intr.c
--- ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn1/hub_intr.c	Fri Apr 19 10:59:47 2002
+++ linuxppc64_2_4/arch/ia64/sn/io/sn1/hub_intr.c	Mon Apr 22 13:25:20 2002
@@ -1,4 +1,4 @@
-/* $Id: hub_intr.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: hub_intr.c,v 1.1 2002/04/22 18:25:20 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn1/hubcounters.c linuxppc64_2_4/arch/ia64/sn/io/sn1/hubcounters.c
--- ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn1/hubcounters.c	Fri Apr 19 10:59:47 2002
+++ linuxppc64_2_4/arch/ia64/sn/io/sn1/hubcounters.c	Mon Apr 22 13:25:20 2002
@@ -1,4 +1,4 @@
-/* $Id: hubcounters.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: hubcounters.c,v 1.1 2002/04/22 18:25:20 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn1/huberror.c linuxppc64_2_4/arch/ia64/sn/io/sn1/huberror.c
--- ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn1/huberror.c	Fri Apr 19 10:59:47 2002
+++ linuxppc64_2_4/arch/ia64/sn/io/sn1/huberror.c	Mon Apr 22 13:25:20 2002
@@ -1,4 +1,4 @@
-/* $Id: huberror.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: huberror.c,v 1.1 2002/04/22 18:25:20 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn1/mem_refcnt.c linuxppc64_2_4/arch/ia64/sn/io/sn1/mem_refcnt.c
--- ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn1/mem_refcnt.c	Fri Apr 19 10:59:47 2002
+++ linuxppc64_2_4/arch/ia64/sn/io/sn1/mem_refcnt.c	Mon Apr 22 13:25:20 2002
@@ -1,4 +1,4 @@
-/* $Id: mem_refcnt.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: mem_refcnt.c,v 1.1 2002/04/22 18:25:20 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn1/ml_SN_intr.c linuxppc64_2_4/arch/ia64/sn/io/sn1/ml_SN_intr.c
--- ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn1/ml_SN_intr.c	Fri Apr 19 10:59:47 2002
+++ linuxppc64_2_4/arch/ia64/sn/io/sn1/ml_SN_intr.c	Mon Apr 22 13:25:20 2002
@@ -1,4 +1,4 @@
-/* $Id: ml_SN_intr.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: ml_SN_intr.c,v 1.1 2002/04/22 18:25:20 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn2/bte_error.c linuxppc64_2_4/arch/ia64/sn/io/sn2/bte_error.c
--- ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn2/bte_error.c	Fri Apr 19 10:59:47 2002
+++ linuxppc64_2_4/arch/ia64/sn/io/sn2/bte_error.c	Mon Apr 22 13:25:20 2002
@@ -1,4 +1,4 @@
-/* $Id: bte_error.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: bte_error.c,v 1.1 2002/04/22 18:25:20 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn2/ml_SN_intr.c linuxppc64_2_4/arch/ia64/sn/io/sn2/ml_SN_intr.c
--- ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn2/ml_SN_intr.c	Fri Apr 19 10:59:47 2002
+++ linuxppc64_2_4/arch/ia64/sn/io/sn2/ml_SN_intr.c	Mon Apr 22 13:25:20 2002
@@ -1,4 +1,4 @@
-/* $Id: ml_SN_intr.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: ml_SN_intr.c,v 1.1 2002/04/22 18:25:20 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn2/shub_intr.c linuxppc64_2_4/arch/ia64/sn/io/sn2/shub_intr.c
--- ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn2/shub_intr.c	Fri Apr 19 10:59:47 2002
+++ linuxppc64_2_4/arch/ia64/sn/io/sn2/shub_intr.c	Mon Apr 22 13:25:20 2002
@@ -1,4 +1,4 @@
-/* $Id: shub_intr.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: shub_intr.c,v 1.1 2002/04/22 18:25:20 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn2/shuberror.c linuxppc64_2_4/arch/ia64/sn/io/sn2/shuberror.c
--- ../kernel.org/linux-2.4.19/arch/ia64/sn/io/sn2/shuberror.c	Fri Apr 19 10:59:47 2002
+++ linuxppc64_2_4/arch/ia64/sn/io/sn2/shuberror.c	Mon Apr 22 13:25:20 2002
@@ -1,4 +1,4 @@
-/* $Id: shuberror.c,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: shuberror.c,v 1.1 2002/04/22 18:25:20 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/mips64/sgi-ip32/ip32-reset.c linuxppc64_2_4/arch/mips64/sgi-ip32/ip32-reset.c
--- ../kernel.org/linux-2.4.19/arch/mips64/sgi-ip32/ip32-reset.c	Fri Apr 19 10:59:51 2002
+++ linuxppc64_2_4/arch/mips64/sgi-ip32/ip32-reset.c	Wed Dec 31 18:00:00 1969
@@ -1,34 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 Keith M Wesolowski
- * Copyright (C) 2001 Paul Mundt
- */
-#include <linux/init.h>
-
-#include <asm/reboot.h>
-#include <asm/sgialib.h>
-
-static void ip32_machine_restart(char *cmd)
-{
-	ArcReboot();
-}
-
-static inline void ip32_machine_halt(void)
-{
-	ArcEnterInteractiveMode();
-}
-
-static void ip32_machine_power_off(void)
-{
-	ip32_machine_halt();
-}
-
-void __init ip32_reboot_setup(void)
-{
-	_machine_restart = ip32_machine_restart;
-	_machine_halt = ip32_machine_halt;
-	_machine_power_off = ip32_machine_power_off;
-}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/boot/Makefile linuxppc64_2_4/arch/ppc64/boot/Makefile
--- ../kernel.org/linux-2.4.19/arch/ppc64/boot/Makefile	Fri Apr 19 11:00:32 2002
+++ linuxppc64_2_4/arch/ppc64/boot/Makefile	Wed Apr 10 12:21:34 2002
@@ -27,7 +27,9 @@
 BOOTCFLAGS = $(HOSTCFLAGS) -I$(HPATH)
 BOOTLD = $(CROSS32_COMPILE)ld
 BOOTAS = $(CROSS32_COMPILE)as
-BOOTAFLAGS = -D__ASSEMBLY__ $(HOSTCFLAGS)
+BOOTAFLAGS = -D__ASSEMBLY__ $(HOSTCFLAGS) -I$(HPATH)
+
+OBJCOPYFLAGS = contents,alloc,load,readonly,data
 
 .c.o:
 	$(BOOTCC) $(BOOTCFLAGS) -c -o $*.o $<
@@ -37,7 +39,7 @@
 CFLAGS	= $(CPPFLAGS) -O -fno-builtin -DSTDC_HEADERS
 LD_ARGS = -Ttext 0x00400000 -e _start
 
-OBJS = crt0.o start.o main.o zlib.o image.o imagesize.o
+OBJS = crt0.o string.o prom.o zImage.o zlib.o imagesize.o
 #LIBS = $(TOPDIR)/lib/lib.a
 LIBS =
 
@@ -83,43 +85,37 @@
 znetboot.initrd: zImage.initrd
 	cp zImage.initrd $(TFTPIMAGE)
 
-floppy: zImage
-	mcopy zImage a:zImage
-
-piggyback: piggyback.c
-	$(HOSTCC) $(HOSTCFLAGS) -DKERNELBASE=$(KERNELBASE) -o piggyback piggyback.c
-
 addnote: addnote.c
 	$(HOSTCC) $(HOSTCFLAGS) -o addnote addnote.c
 
-image.o: piggyback vmlinux.gz
-	./piggyback image < vmlinux.gz | $(BOOTAS) -o image.o
-
-sysmap.o: piggyback ../../../System.map
-	./piggyback sysmap < ../../../System.map | $(BOOTAS) -o sysmap.o
 
-initrd.o: ramdisk.image.gz piggyback
-	./piggyback initrd < ramdisk.image.gz | $(BOOTAS) -o initrd.o
+imagesize.c: $(TOPDIR)/vmlinux
+	ls -l $(TOPDIR)/vmlinux | awk '{printf "/* generated -- do not edit! */\nunsigned long vmlinux_filesize = %d;\n", $$5}' > imagesize.c
+	$(CROSS_COMPILE)nm -n $(TOPDIR)/vmlinux | tail -1 | awk '{printf "unsigned long vmlinux_memsize = 0x%s;\n", substr($$1,8)}' >> imagesize.c
+
+vmlinux .config System.map: % : $(TOPDIR)/%
+	gzip -cvf9 $(TOPDIR)/$@ > kernel-$@.gz
+	$(OBJCOPY) zImage.o \
+		--add-section=.kernel:$@=kernel-$@.gz \
+		--set-section-flags=.kernel:$@=$(OBJCOPYFLAGS)
+
+initrd: ramdisk.image.gz
+	$(OBJCOPY) zImage.o \
+		--add-section=.kernel:$@=ramdisk.image.gz \
+		--set-section-flags=.kernel:$@=$(OBJCOPYFLAGS)
 
-zImage: $(OBJS) no_initrd.o addnote
-	$(BOOTLD) $(LD_ARGS) -T zImage.lds -o $@ $(OBJS) no_initrd.o $(LIBS)
+zImage: $(OBJS) addnote vmlinux .config System.map
+	$(BOOTLD) $(LD_ARGS) -T zImage.lds -o $@ $(OBJS) $(LIBS)
 	./addnote $@
 
-zImage.initrd: $(OBJS) initrd.o addnote
-	$(BOOTLD) $(LD_ARGS) -T zImage.lds -o $@ $(OBJS) initrd.o $(LIBS)
+zImage.initrd: $(OBJS) addnote vmlinux .config System.map initrd
+	$(BOOTLD) $(LD_ARGS) -T zImage.lds -o $@ $(OBJS) $(LIBS)
 	./addnote $@
 
-
-vmlinux.gz: $(TOPDIR)/vmlinux
-	$(OBJCOPY) -S -O binary $(TOPDIR)/vmlinux vmlinux
-	ls -l vmlinux | awk '{printf "/* generated -- do not edit! */\nint uncompressed_size = %d;\n", $$5}' > imagesize.c
-	$(CROSS_COMPILE)nm -n $(TOPDIR)/vmlinux | tail -1 | awk '{printf "long vmlinux_end = 0x%s;\n", substr($$1,8)}' >> imagesize.c
-	gzip -vf9 vmlinux
-
-imagesize.c: vmlinux.gz
-
 clean:
-	rm -f piggyback note addnote $(OBJS) zImage zImage.initrd vmlinux.gz no_initrd.o imagesize.c addSystemMap vmlinux.sm addRamDisk vmlinux.initrd vmlinux.sminitrd 
+	rm -f add{note,RamDisk,SystemMap} $(OBJS) initrd.o \
+		vmlinux.{sm,initrd} zImage{,.initrd} imagesize.c \
+		kernel-{vmlinux,.config,System.map}.gz
 
 fastdep:
 	$(TOPDIR)/scripts/mkdep *.[Sch] > .depend
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/boot/crt0.S linuxppc64_2_4/arch/ppc64/boot/crt0.S
--- ../kernel.org/linux-2.4.19/arch/ppc64/boot/crt0.S	Fri Apr 19 11:00:32 2002
+++ linuxppc64_2_4/arch/ppc64/boot/crt0.S	Wed Apr 10 12:21:34 2002
@@ -8,258 +8,41 @@
  *
  * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
  */
+
+#include <asm/ppc_asm.tmpl>
+
 	.text
 	.globl	_start
 _start:
-	lis	9,_start@h
-	lis	8,_etext@ha
-	addi	8,8,_etext@l
-1:	dcbf	0,9
-	icbi	0,9
-	addi	9,9,0x20
-	cmplwi	0,9,8
+	lis	r9,_start@h
+	lis	r8,_etext@ha
+	addi	r8,r8,_etext@l
+1:	dcbf	r0,r9
+	icbi	r0,r9
+	addi	r9,r9,0x20
+	cmplwi	0,r9,8
 	blt	1b
 	sync
 	isync
 
 	## Clear out the BSS as per ANSI C requirements
 
-	lis 7,_end@ha
-	addi    7,7,_end@l		# r7 = &_end 
-	lis 8,__bss_start@ha		# 
-	addi    8,8,__bss_start@l	# r8 = &_bss_start
+	lis	r7,_end@ha
+	addi    r7,r7,_end@l		# r7 = &_end 
+	lis	r8,__bss_start@ha	# 
+	addi    r8,r8,__bss_start@l	# r8 = &_bss_start
 
 	## Determine how large an area, in number of words, to clear
 
-	subf	7,8,7			# r7 = &_end - &_bss_start + 1 
-	addi	7,7,3			# r7 += 3 
-	srwi.	7,7,2			# r7 = size in words.
-	beq	3f			# If the size is zero, do not bother
-	addi	8,8,-4			# r8 -= 4 
-	mtctr	7			# SPRN_CTR = number of words to clear
-	li	0,0			# r0 = 0
-2:	stwu	0,4(8)			# Clear out a word
-	bdnz	2b			# If we are not done yet, keep clearing
+	subf	r7,r8,r7		# r7 = &_end - &_bss_start + 1 
+	addi	r7,r7,3			# r7 += 3 
+	srwi.	r7,r7,2			# r7 = size in words.
+	beq	3f			# If the size is zero, don't bother
+	addi	r8,r8,-4		# r8 -= 4 
+	mtctr	r7			# SPRN_CTR = number of words to clear
+	li	r0,0			# r0 = 0
+2:	stwu	r0,4(r8)		# Clear out a word
+	bdnz	2b			# Keep clearing until done
 3:
-
-
 	b	start
 
-
-
-/*
- * Flush the dcache and invalidate the icache for a range of addresses.
- *
- * flush_cache(addr, len)
- */
-	.global	flush_cache
-flush_cache:
-	addi	4,4,0x1f	/* len = (len + 0x1f) / 0x20 */
-	rlwinm.	4,4,27,5,31
-	mtctr	4
-	beqlr
-1:	dcbf	0,3
-	icbi	0,3
-	addi	3,3,0x20
-	bdnz	1b
-	sync
-	isync
-	blr
-
-
-#define r0	0
-#define r3	3
-#define r4	4
-#define r5	5
-#define r6	6
-#define r7	7
-#define r8	8
-
-	.globl	strcpy
-strcpy:
-	addi	r5,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	stbu	r0,1(r5)
-	bne	1b
-	blr
-
-	.globl	strncpy
-strncpy:
-	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	stbu	r0,1(r6)
-	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
-	blr
-
-	.globl	strcat
-strcat:
-	addi	r5,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r0,1(r5)
-	cmpwi	0,r0,0
-	bne	1b
-	addi	r5,r5,-1
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	stbu	r0,1(r5)
-	bne	1b
-	blr
-
-	.globl	strcmp
-strcmp:
-	addi	r5,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r3,1(r5)
-	cmpwi	1,r3,0
-	lbzu	r0,1(r4)
-	subf.	r3,r0,r3
-	beqlr	1
-	beq	1b
-	blr
-
-	.globl	strlen
-strlen:
-	addi	r4,r3,-1
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	bne	1b
-	subf	r3,r3,r4
-	blr
-
-	.globl	memset
-memset:
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
-	addi	r6,r3,-4
-	cmplwi	0,r5,4
-	blt	7f
-	stwu	r4,4(r6)
-	beqlr
-	andi.	r0,r6,3
-	add	r5,r0,r5
-	subf	r6,r0,r6
-	rlwinm	r0,r5,32-2,2,31
-	mtctr	r0
-	bdz	6f
-1:	stwu	r4,4(r6)
-	bdnz	1b
-6:	andi.	r5,r5,3
-7:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r6,3
-8:	stbu	r4,1(r6)
-	bdnz	8b
-	blr
-
-	.globl	bcopy
-bcopy:
-	mr	r6,r3
-	mr	r3,r4
-	mr	r4,r6
-	b	memcpy
-
-	.globl	memmove
-memmove:
-	cmplw	0,r3,r4
-	bgt	backwards_memcpy
-	/* fall through */
-
-	.globl	memcpy
-memcpy:
-	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
-	addi	r6,r3,-4
-	addi	r4,r4,-4
-	beq	2f			/* if less than 8 bytes to do */
-	andi.	r0,r6,3			/* get dest word aligned */
-	mtctr	r7
-	bne	5f
-1:	lwz	r7,4(r4)
-	lwzu	r8,8(r4)
-	stw	r7,4(r6)
-	stwu	r8,8(r6)
-	bdnz	1b
-	andi.	r5,r5,7
-2:	cmplwi	0,r5,4
-	blt	3f
-	lwzu	r0,4(r4)
-	addi	r5,r5,-4
-	stwu	r0,4(r6)
-3:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r4,r4,3
-	addi	r6,r6,3
-4:	lbzu	r0,1(r4)
-	stbu	r0,1(r6)
-	bdnz	4b
-	blr
-5:	subfic	r0,r0,4
-	mtctr	r0
-6:	lbz	r7,4(r4)
-	addi	r4,r4,1
-	stb	r7,4(r6)
-	addi	r6,r6,1
-	bdnz	6b
-	subf	r5,r0,r5
-	rlwinm.	r7,r5,32-3,3,31
-	beq	2b
-	mtctr	r7
-	b	1b
-
-	.globl	backwards_memcpy
-backwards_memcpy:
-	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
-	add	r6,r3,r5
-	add	r4,r4,r5
-	beq	2f
-	andi.	r0,r6,3
-	mtctr	r7
-	bne	5f
-1:	lwz	r7,-4(r4)
-	lwzu	r8,-8(r4)
-	stw	r7,-4(r6)
-	stwu	r8,-8(r6)
-	bdnz	1b
-	andi.	r5,r5,7
-2:	cmplwi	0,r5,4
-	blt	3f
-	lwzu	r0,-4(r4)
-	subi	r5,r5,4
-	stwu	r0,-4(r6)
-3:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-4:	lbzu	r0,-1(r4)
-	stbu	r0,-1(r6)
-	bdnz	4b
-	blr
-5:	mtctr	r0
-6:	lbzu	r7,-1(r4)
-	stbu	r7,-1(r6)
-	bdnz	6b
-	subf	r5,r0,r5
-	rlwinm.	r7,r5,32-3,3,31
-	beq	2b
-	mtctr	r7
-	b	1b
-
-	.globl	memcmp
-memcmp:
-	cmpwi	0,r5,0
-	blelr
-	mtctr	r5
-	addi	r6,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r3,1(r6)
-	lbzu	r0,1(r4)
-	subf.	r3,r0,r3
-	bdnzt	2,1b
-	blr
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/boot/main.c linuxppc64_2_4/arch/ppc64/boot/main.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/boot/main.c	Fri Apr 19 11:00:32 2002
+++ linuxppc64_2_4/arch/ppc64/boot/main.c	Wed Dec 31 18:00:00 1969
@@ -1,292 +0,0 @@
-/*
- * Copyright (C) Paul Mackerras 1997.
- *
- * Updates for PPC64 by Todd Inglett & Dave Engebretsen.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#define __KERNEL__
-#include "zlib.h"
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/bootinfo.h>
-
-void memmove(void *dst, void *im, int len);
-
-extern void *finddevice(const char *);
-extern int getprop(void *, const char *, void *, int);
-extern void printf(const char *fmt, ...);
-extern int sprintf(char *buf, const char *fmt, ...);
-void gunzip(void *, int, unsigned char *, int *);
-void *claim(unsigned int, unsigned int, unsigned int);
-void flush_cache(void *, int);
-void pause(void);
-static struct bi_record *make_bi_recs(unsigned long);
-
-#define RAM_START	0x00000000
-#define RAM_END		(64<<20)
-
-#define BOOT_START	((unsigned long)_start)
-#define BOOT_END	((unsigned long)_end)
-
-/* Value picked to match that used by yaboot */
-#define PROG_START	0x01400000
-
-char *avail_ram;
-char *begin_avail, *end_avail;
-char *avail_high;
-unsigned int heap_use;
-unsigned int heap_max;
-unsigned long initrd_start = 0;
-unsigned long initrd_size = 0;
-
-extern char _end[];
-extern char image_data[];
-extern int image_len;
-extern char initrd_data[];
-extern int initrd_len;
-extern char sysmap_data[];
-extern int sysmap_len;
-extern int uncompressed_size;
-extern long vmlinux_end;
-
-static char scratch[128<<10];	/* 128kB of scratch space for gunzip */
-
-typedef void (*kernel_entry_t)( unsigned long,
-                                unsigned long,
-                                void *,
-				struct bi_record *);
-
-void
-chrpboot(unsigned long a1, unsigned long a2, void *prom)
-{
-	unsigned len;
-	void *dst = (void *)-1;
-	unsigned long claim_addr;
-	unsigned char *im;
-	extern char _start;
-	struct bi_record *bi_recs;
-	kernel_entry_t kernel_entry;
-    
-	printf("chrpboot starting: loaded at 0x%x\n\r", (unsigned)&_start);
-
-	if (initrd_len) {
-		initrd_size = initrd_len;
-		initrd_start = (RAM_END - initrd_size) & ~0xFFF;
-		a1 = a2 = 0;
-		claim(initrd_start, RAM_END - initrd_start, 0);
-		printf("initial ramdisk moving 0x%lx <- 0x%lx (%lx bytes)\n\r",
-		       initrd_start, (unsigned long)initrd_data, initrd_size);
-		memcpy((void *)initrd_start, (void *)initrd_data, initrd_size);
-	}
-
-	im = image_data;
-	len = image_len;
-	uncompressed_size = PAGE_ALIGN(uncompressed_size);
-
-	for(claim_addr = PROG_START; 
-	    claim_addr <= PROG_START * 8; 
-	    claim_addr += 0x100000) {
-		printf("    trying: 0x%08lx\n\r", claim_addr);
-		dst = claim(claim_addr, uncompressed_size, 0);
-		if (dst != (void *)-1) break;
-	}
-	if (dst == (void *)-1) {
-		printf("claim error, can't allocate kernel memory\n\r");
-		return;
-	}
-
-	if (im[0] == 0x1f && im[1] == 0x8b) {
-		avail_ram = scratch;
-		begin_avail = avail_high = avail_ram;
-		end_avail = scratch + sizeof(scratch);
-		printf("gunzipping (0x%x <- 0x%x:0x%0x)...",
-		       (unsigned)dst, (unsigned)im, (unsigned)im+len);
-		gunzip(dst, uncompressed_size, im, &len);
-		printf("done %u bytes\n\r", len);
-		printf("%u bytes of heap consumed, max in use %u\n\r",
-		       (unsigned)(avail_high - begin_avail), heap_max);
-	} else {
-		memmove(dst, im, len);
-	}
-
-	flush_cache(dst, len);
-
-	bi_recs = make_bi_recs((unsigned long)dst + vmlinux_end);
-
-	kernel_entry = (kernel_entry_t)dst;
-	printf( "kernel:\n\r"
-		"        entry addr = 0x%lx\n\r"
-		"        a1         = 0x%lx,\n\r"
-		"        a2         = 0x%lx,\n\r"
-		"        prom       = 0x%lx,\n\r"
-		"        bi_recs    = 0x%lx,\n\r",
-		(unsigned long)kernel_entry, a1, a2,
-		(unsigned long)prom, (unsigned long)bi_recs);
-
-	kernel_entry( a1, a2, prom, bi_recs );
-
-	printf("returned?\n\r");
-
-	pause();
-}
-
-static struct bi_record *
-make_bi_recs(unsigned long addr)
-{
-	struct bi_record *bi_recs;
-	struct bi_record *rec;
-
-	bi_recs = rec = bi_rec_init(addr);
-
-	rec = bi_rec_alloc(rec, 2);
-	rec->tag = BI_FIRST;
-	/* rec->data[0] = ...;	# Written below before return */
-	/* rec->data[1] = ...;	# Written below before return */
-
-	rec = bi_rec_alloc_bytes(rec, strlen("chrpboot")+1);
-	rec->tag = BI_BOOTLOADER_ID;
-	sprintf( (char *)rec->data, "chrpboot");
-
-	rec = bi_rec_alloc(rec, 2);
-	rec->tag = BI_MACHTYPE;
-	rec->data[0] = _MACH_pSeries;
-	rec->data[1] = 1;
-
-	if ( initrd_size > 0 ) {
-		rec = bi_rec_alloc(rec, 2);
-		rec->tag = BI_INITRD;
-		rec->data[0] = initrd_start;
-		rec->data[1] = initrd_size;
-	}
-
-#if 0
-	if ( sysmap_len > 0 ) {
-		rec = bi_rec_alloc(rec, 2);
-		rec->tag = BI_SYSMAP;
-		rec->data[0] = (unsigned long)sysmap_data;
-		rec->data[1] = sysmap_len;
-	}
-#endif
-
-	rec = bi_rec_alloc(rec, 1);
-	rec->tag = BI_LAST;
-	rec->data[0] = (bi_rec_field)bi_recs;
-
-	/* Save the _end_ address of the bi_rec's in the first bi_rec
-	 * data field for easy access by the kernel.
-	 */
-	bi_recs->data[0] = (bi_rec_field)rec;
-	bi_recs->data[1] = (bi_rec_field)rec + rec->size - (bi_rec_field)bi_recs;
-
-	return bi_recs;
-}
-
-struct memchunk {
-	unsigned int size;
-	unsigned int pad;
-	struct memchunk *next;
-};
-
-static struct memchunk *freechunks;
-
-void *zalloc(void *x, unsigned items, unsigned size)
-{
-	void *p;
-	struct memchunk **mpp, *mp;
-
-	size *= items;
-	size = _ALIGN(size, sizeof(struct memchunk));
-	heap_use += size;
-	if (heap_use > heap_max)
-		heap_max = heap_use;
-	for (mpp = &freechunks; (mp = *mpp) != 0; mpp = &mp->next) {
-		if (mp->size == size) {
-			*mpp = mp->next;
-			return mp;
-		}
-	}
-	p = avail_ram;
-	avail_ram += size;
-	if (avail_ram > avail_high)
-		avail_high = avail_ram;
-	if (avail_ram > end_avail) {
-		printf("oops... out of memory\n\r");
-		pause();
-	}
-	return p;
-}
-
-void zfree(void *x, void *addr, unsigned nb)
-{
-	struct memchunk *mp = addr;
-
-	nb = _ALIGN(nb, sizeof(struct memchunk));
-	heap_use -= nb;
-	if (avail_ram == addr + nb) {
-		avail_ram = addr;
-		return;
-	}
-	mp->size = nb;
-	mp->next = freechunks;
-	freechunks = mp;
-}
-
-#define HEAD_CRC	2
-#define EXTRA_FIELD	4
-#define ORIG_NAME	8
-#define COMMENT		0x10
-#define RESERVED	0xe0
-
-#define DEFLATED	8
-
-void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp)
-{
-	z_stream s;
-	int r, i, flags;
-
-	/* skip header */
-	i = 10;
-	flags = src[3];
-	if (src[2] != DEFLATED || (flags & RESERVED) != 0) {
-		printf("bad gzipped data\n\r");
-		exit();
-	}
-	if ((flags & EXTRA_FIELD) != 0)
-		i = 12 + src[10] + (src[11] << 8);
-	if ((flags & ORIG_NAME) != 0)
-		while (src[i++] != 0)
-			;
-	if ((flags & COMMENT) != 0)
-		while (src[i++] != 0)
-			;
-	if ((flags & HEAD_CRC) != 0)
-		i += 2;
-	if (i >= *lenp) {
-		printf("gunzip: ran out of data in header\n\r");
-		exit();
-	}
-
-	s.zalloc = zalloc;
-	s.zfree = zfree;
-	r = inflateInit2(&s, -MAX_WBITS);
-	if (r != Z_OK) {
-		printf("inflateInit2 returned %d\n\r", r);
-		exit();
-	}
-	s.next_in = src + i;
-	s.avail_in = *lenp - i;
-	s.next_out = dst;
-	s.avail_out = dstlen;
-	r = inflate(&s, Z_FINISH);
-	if (r != Z_OK && r != Z_STREAM_END) {
-		printf("inflate returned %d msg: %s\n\r", r, s.msg);
-		exit();
-	}
-	*lenp = s.next_out - (unsigned char *) dst;
-	inflateEnd(&s);
-}
-
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/boot/no_initrd.c linuxppc64_2_4/arch/ppc64/boot/no_initrd.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/boot/no_initrd.c	Fri Apr 19 11:00:32 2002
+++ linuxppc64_2_4/arch/ppc64/boot/no_initrd.c	Wed Dec 31 18:00:00 1969
@@ -1,2 +0,0 @@
-char initrd_data[1];
-int initrd_len = 0;
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/boot/piggyback.c linuxppc64_2_4/arch/ppc64/boot/piggyback.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/boot/piggyback.c	Fri Apr 19 11:00:32 2002
+++ linuxppc64_2_4/arch/ppc64/boot/piggyback.c	Wed Dec 31 18:00:00 1969
@@ -1,74 +0,0 @@
-/*
- * Copyright 2001 IBM Corp 
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <stdio.h>
-#include <unistd.h>
-
-extern long ce_exec_config[];
-
-int main(int argc, char *argv[])
-{
-	int i, cnt, pos, len;
-	unsigned int cksum, val;
-	unsigned char *lp;
-	unsigned char buf[8192];
-	if (argc != 2)
-	{
-		fprintf(stderr, "usage: %s name <in-file >out-file\n",
-			argv[0]);
-		exit(1);
-	}
-	fprintf(stdout, "#\n");
-	fprintf(stdout, "# Miscellaneous data structures:\n");
-	fprintf(stdout, "# WARNING - this file is automatically generated!\n");
-	fprintf(stdout, "#\n");
-	fprintf(stdout, "\n");
-	fprintf(stdout, "\t.data\n");
-	fprintf(stdout, "\t.globl %s_data\n", argv[1]);
-	fprintf(stdout, "%s_data:\n", argv[1]);
-	pos = 0;
-	cksum = 0;
-	while ((len = read(0, buf, sizeof(buf))) > 0)
-	{
-		cnt = 0;
-		lp = (unsigned char *)buf;
-		len = (len + 3) & ~3;  /* Round up to longwords */
-		for (i = 0;  i < len;  i += 4)
-		{
-			if (cnt == 0)
-			{
-				fprintf(stdout, "\t.long\t");
-			}
-			fprintf(stdout, "0x%02X%02X%02X%02X", lp[0], lp[1], lp[2], lp[3]);
-			val = *(unsigned long *)lp;
-			cksum ^= val;
-			lp += 4;
-			if (++cnt == 4)
-			{
-				cnt = 0;
-				fprintf(stdout, " # %x \n", pos+i-12);
-				fflush(stdout);
-			} else
-			{
-				fprintf(stdout, ",");
-			}
-		}
-		if (cnt)
-		{
-			fprintf(stdout, "0\n");
-		}
-		pos += len;
-	}
-	fprintf(stdout, "\t.globl %s_len\n", argv[1]);
-	fprintf(stdout, "%s_len:\t.long\t0x%x\n", argv[1], pos);
-	fflush(stdout);
-	fclose(stdout);
-	fprintf(stderr, "cksum = %x\n", cksum);
-	exit(0);
-}
-
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/boot/ppc32-types.h linuxppc64_2_4/arch/ppc64/boot/ppc32-types.h
--- ../kernel.org/linux-2.4.19/arch/ppc64/boot/ppc32-types.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/arch/ppc64/boot/ppc32-types.h	Wed Apr 10 12:21:34 2002
@@ -0,0 +1,30 @@
+#ifndef _PPC64_TYPES_H
+#define _PPC64_TYPES_H
+
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+#define BITS_PER_LONG 32
+
+#endif /* _PPC64_TYPES_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/boot/prom.c linuxppc64_2_4/arch/ppc64/boot/prom.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/boot/prom.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/arch/ppc64/boot/prom.c	Wed Apr 10 12:21:34 2002
@@ -0,0 +1,636 @@
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <stdarg.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+
+#include <asm/div64.h>
+
+int (*prom)(void *);
+
+void *chosen_handle;
+void *stdin;
+void *stdout;
+void *stderr;
+
+void exit(void);
+void *finddevice(const char *name);
+int getprop(void *phandle, const char *name, void *buf, int buflen);
+void chrpboot(int a1, int a2, void *prom);	/* in main.c */
+
+void printk(char *fmt, ...);
+
+int
+write(void *handle, void *ptr, int nb)
+{
+	struct prom_args {
+		char *service;
+		int nargs;
+		int nret;
+		void *ihandle;
+		void *addr;
+		int len;
+		int actual;
+	} args;
+
+	args.service = "write";
+	args.nargs = 3;
+	args.nret = 1;
+	args.ihandle = handle;
+	args.addr = ptr;
+	args.len = nb;
+	args.actual = -1;
+	(*prom)(&args);
+	return args.actual;
+}
+
+int
+read(void *handle, void *ptr, int nb)
+{
+	struct prom_args {
+		char *service;
+		int nargs;
+		int nret;
+		void *ihandle;
+		void *addr;
+		int len;
+		int actual;
+	} args;
+
+	args.service = "read";
+	args.nargs = 3;
+	args.nret = 1;
+	args.ihandle = handle;
+	args.addr = ptr;
+	args.len = nb;
+	args.actual = -1;
+	(*prom)(&args);
+	return args.actual;
+}
+
+void
+exit()
+{
+	struct prom_args {
+		char *service;
+	} args;
+
+	for (;;) {
+		args.service = "exit";
+		(*prom)(&args);
+	}
+}
+
+void
+pause(void)
+{
+	struct prom_args {
+		char *service;
+	} args;
+
+	args.service = "enter";
+	(*prom)(&args);
+}
+
+void *
+finddevice(const char *name)
+{
+	struct prom_args {
+		char *service;
+		int nargs;
+		int nret;
+		const char *devspec;
+		void *phandle;
+	} args;
+
+	args.service = "finddevice";
+	args.nargs = 1;
+	args.nret = 1;
+	args.devspec = name;
+	args.phandle = (void *) -1;
+	(*prom)(&args);
+	return args.phandle;
+}
+
+void *
+claim(unsigned long virt, unsigned long size, unsigned long align)
+{
+	struct prom_args {
+		char *service;
+		int nargs;
+		int nret;
+		unsigned int virt;
+		unsigned int size;
+		unsigned int align;
+		void *ret;
+	} args;
+
+	args.service = "claim";
+	args.nargs = 3;
+	args.nret = 1;
+	args.virt = virt;
+	args.size = size;
+	args.align = align;
+	(*prom)(&args);
+	return args.ret;
+}
+
+int
+getprop(void *phandle, const char *name, void *buf, int buflen)
+{
+	struct prom_args {
+		char *service;
+		int nargs;
+		int nret;
+		void *phandle;
+		const char *name;
+		void *buf;
+		int buflen;
+		int size;
+	} args;
+
+	args.service = "getprop";
+	args.nargs = 4;
+	args.nret = 1;
+	args.phandle = phandle;
+	args.name = name;
+	args.buf = buf;
+	args.buflen = buflen;
+	args.size = -1;
+	(*prom)(&args);
+	return args.size;
+}
+
+int
+putc(int c, void *f)
+{
+	char ch = c;
+
+	if (c == '\n')
+		putc('\r', f);
+	return write(f, &ch, 1) == 1? c: -1;
+}
+
+int
+putchar(int c)
+{
+	return putc(c, stdout);
+}
+
+int
+fputs(char *str, void *f)
+{
+	int n = strlen(str);
+
+	return write(f, str, n) == n? 0: -1;
+}
+
+int
+readchar(void)
+{
+	char ch;
+
+	for (;;) {
+		switch (read(stdin, &ch, 1)) {
+		case 1:
+			return ch;
+		case -1:
+			printk("read(stdin) returned -1\r\n");
+			return -1;
+		}
+	}
+}
+
+static char line[256];
+static char *lineptr;
+static int lineleft;
+
+int
+getchar(void)
+{
+	int c;
+
+	if (lineleft == 0) {
+		lineptr = line;
+		for (;;) {
+			c = readchar();
+			if (c == -1 || c == 4)
+				break;
+			if (c == '\r' || c == '\n') {
+				*lineptr++ = '\n';
+				putchar('\n');
+				break;
+			}
+			switch (c) {
+			case 0177:
+			case '\b':
+				if (lineptr > line) {
+					putchar('\b');
+					putchar(' ');
+					putchar('\b');
+					--lineptr;
+				}
+				break;
+			case 'U' & 0x1F:
+				while (lineptr > line) {
+					putchar('\b');
+					putchar(' ');
+					putchar('\b');
+					--lineptr;
+				}
+				break;
+			default:
+				if (lineptr >= &line[sizeof(line) - 1])
+					putchar('\a');
+				else {
+					putchar(c);
+					*lineptr++ = c;
+				}
+			}
+		}
+		lineleft = lineptr - line;
+		lineptr = line;
+	}
+	if (lineleft == 0)
+		return -1;
+	--lineleft;
+	return *lineptr++;
+}
+
+
+
+/* String functions lifted from lib/vsprintf.c and lib/ctype.c */
+unsigned char _ctype[] = {
+_C,_C,_C,_C,_C,_C,_C,_C,			/* 0-7 */
+_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,		/* 8-15 */
+_C,_C,_C,_C,_C,_C,_C,_C,			/* 16-23 */
+_C,_C,_C,_C,_C,_C,_C,_C,			/* 24-31 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,			/* 32-39 */
+_P,_P,_P,_P,_P,_P,_P,_P,			/* 40-47 */
+_D,_D,_D,_D,_D,_D,_D,_D,			/* 48-55 */
+_D,_D,_P,_P,_P,_P,_P,_P,			/* 56-63 */
+_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,	/* 64-71 */
+_U,_U,_U,_U,_U,_U,_U,_U,			/* 72-79 */
+_U,_U,_U,_U,_U,_U,_U,_U,			/* 80-87 */
+_U,_U,_U,_P,_P,_P,_P,_P,			/* 88-95 */
+_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,	/* 96-103 */
+_L,_L,_L,_L,_L,_L,_L,_L,			/* 104-111 */
+_L,_L,_L,_L,_L,_L,_L,_L,			/* 112-119 */
+_L,_L,_L,_P,_P,_P,_P,_C,			/* 120-127 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,		/* 128-143 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,		/* 144-159 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,   /* 160-175 */
+_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,       /* 176-191 */
+_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,       /* 192-207 */
+_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,       /* 208-223 */
+_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,       /* 224-239 */
+_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};      /* 240-255 */
+
+size_t strnlen(const char * s, size_t count)
+{
+	const char *sc;
+
+	for (sc = s; count-- && *sc != '\0'; ++sc)
+		/* nothing */;
+	return sc - s;
+}
+
+unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
+{
+	unsigned long result = 0,value;
+
+	if (!base) {
+		base = 10;
+		if (*cp == '0') {
+			base = 8;
+			cp++;
+			if ((*cp == 'x') && isxdigit(cp[1])) {
+				cp++;
+				base = 16;
+			}
+		}
+	}
+	while (isxdigit(*cp) &&
+	       (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
+		result = result*base + value;
+		cp++;
+	}
+	if (endp)
+		*endp = (char *)cp;
+	return result;
+}
+
+long simple_strtol(const char *cp,char **endp,unsigned int base)
+{
+	if(*cp=='-')
+		return -simple_strtoul(cp+1,endp,base);
+	return simple_strtoul(cp,endp,base);
+}
+
+static int skip_atoi(const char **s)
+{
+	int i=0;
+
+	while (isdigit(**s))
+		i = i*10 + *((*s)++) - '0';
+	return i;
+}
+
+#define ZEROPAD	1		/* pad with zero */
+#define SIGN	2		/* unsigned/signed long */
+#define PLUS	4		/* show plus */
+#define SPACE	8		/* space if plus */
+#define LEFT	16		/* left justified */
+#define SPECIAL	32		/* 0x */
+#define LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * str, long long num, int base, int size, int precision, int type)
+{
+	char c,sign,tmp[66];
+	const char *digits="0123456789abcdefghijklmnopqrstuvwxyz";
+	int i;
+
+	if (type & LARGE)
+		digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+	if (type & LEFT)
+		type &= ~ZEROPAD;
+	if (base < 2 || base > 36)
+		return 0;
+	c = (type & ZEROPAD) ? '0' : ' ';
+	sign = 0;
+	if (type & SIGN) {
+		if (num < 0) {
+			sign = '-';
+			num = -num;
+			size--;
+		} else if (type & PLUS) {
+			sign = '+';
+			size--;
+		} else if (type & SPACE) {
+			sign = ' ';
+			size--;
+		}
+	}
+	if (type & SPECIAL) {
+		if (base == 16)
+			size -= 2;
+		else if (base == 8)
+			size--;
+	}
+	i = 0;
+	if (num == 0)
+		tmp[i++]='0';
+	else while (num != 0)
+		tmp[i++] = digits[do_div(num,base)];
+	if (i > precision)
+		precision = i;
+	size -= precision;
+	if (!(type&(ZEROPAD+LEFT)))
+		while(size-->0)
+			*str++ = ' ';
+	if (sign)
+		*str++ = sign;
+	if (type & SPECIAL) {
+		if (base==8)
+			*str++ = '0';
+		else if (base==16) {
+			*str++ = '0';
+			*str++ = digits[33];
+		}
+	}
+	if (!(type & LEFT))
+		while (size-- > 0)
+			*str++ = c;
+	while (i < precision--)
+		*str++ = '0';
+	while (i-- > 0)
+		*str++ = tmp[i];
+	while (size-- > 0)
+		*str++ = ' ';
+	return str;
+}
+
+/* Forward decl. needed for IP address printing stuff... */
+int sprintf(char * buf, const char *fmt, ...);
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+	int len;
+	unsigned long long num;
+	int i, base;
+	char * str;
+	const char *s;
+
+	int flags;		/* flags to number() */
+
+	int field_width;	/* width of output field */
+	int precision;		/* min. # of digits for integers; max
+				   number of chars for from string */
+	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
+	                        /* 'z' support added 23/7/1999 S.H.    */
+				/* 'z' changed to 'Z' --davidm 1/25/99 */
+
+	
+	for (str=buf ; *fmt ; ++fmt) {
+		if (*fmt != '%') {
+			*str++ = *fmt;
+			continue;
+		}
+			
+		/* process flags */
+		flags = 0;
+		repeat:
+			++fmt;		/* this also skips first '%' */
+			switch (*fmt) {
+				case '-': flags |= LEFT; goto repeat;
+				case '+': flags |= PLUS; goto repeat;
+				case ' ': flags |= SPACE; goto repeat;
+				case '#': flags |= SPECIAL; goto repeat;
+				case '0': flags |= ZEROPAD; goto repeat;
+				}
+		
+		/* get field width */
+		field_width = -1;
+		if (isdigit(*fmt))
+			field_width = skip_atoi(&fmt);
+		else if (*fmt == '*') {
+			++fmt;
+			/* it's the next argument */
+			field_width = va_arg(args, int);
+			if (field_width < 0) {
+				field_width = -field_width;
+				flags |= LEFT;
+			}
+		}
+
+		/* get the precision */
+		precision = -1;
+		if (*fmt == '.') {
+			++fmt;	
+			if (isdigit(*fmt))
+				precision = skip_atoi(&fmt);
+			else if (*fmt == '*') {
+				++fmt;
+				/* it's the next argument */
+				precision = va_arg(args, int);
+			}
+			if (precision < 0)
+				precision = 0;
+		}
+
+		/* get the conversion qualifier */
+		qualifier = -1;
+		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
+			qualifier = *fmt;
+			++fmt;
+		}
+
+		/* default base */
+		base = 10;
+
+		switch (*fmt) {
+		case 'c':
+			if (!(flags & LEFT))
+				while (--field_width > 0)
+					*str++ = ' ';
+			*str++ = (unsigned char) va_arg(args, int);
+			while (--field_width > 0)
+				*str++ = ' ';
+			continue;
+
+		case 's':
+			s = va_arg(args, char *);
+			if (!s)
+				s = "<NULL>";
+
+			len = strnlen(s, precision);
+
+			if (!(flags & LEFT))
+				while (len < field_width--)
+					*str++ = ' ';
+			for (i = 0; i < len; ++i)
+				*str++ = *s++;
+			while (len < field_width--)
+				*str++ = ' ';
+			continue;
+
+		case 'p':
+			if (field_width == -1) {
+				field_width = 2*sizeof(void *);
+				flags |= ZEROPAD;
+			}
+			str = number(str,
+				(unsigned long) va_arg(args, void *), 16,
+				field_width, precision, flags);
+			continue;
+
+
+		case 'n':
+			if (qualifier == 'l') {
+				long * ip = va_arg(args, long *);
+				*ip = (str - buf);
+			} else if (qualifier == 'Z') {
+				size_t * ip = va_arg(args, size_t *);
+				*ip = (str - buf);
+			} else {
+				int * ip = va_arg(args, int *);
+				*ip = (str - buf);
+			}
+			continue;
+
+		case '%':
+			*str++ = '%';
+			continue;
+
+		/* integer number formats - set up the flags and "break" */
+		case 'o':
+			base = 8;
+			break;
+
+		case 'X':
+			flags |= LARGE;
+		case 'x':
+			base = 16;
+			break;
+
+		case 'd':
+		case 'i':
+			flags |= SIGN;
+		case 'u':
+			break;
+
+		default:
+			*str++ = '%';
+			if (*fmt)
+				*str++ = *fmt;
+			else
+				--fmt;
+			continue;
+		}
+		if (qualifier == 'L')
+			num = va_arg(args, long long);
+		else if (qualifier == 'l') {
+			num = va_arg(args, unsigned long);
+			if (flags & SIGN)
+				num = (signed long) num;
+		} else if (qualifier == 'Z') {
+			num = va_arg(args, size_t);
+		} else if (qualifier == 'h') {
+			num = (unsigned short) va_arg(args, int);
+			if (flags & SIGN)
+				num = (signed short) num;
+		} else {
+			num = va_arg(args, unsigned int);
+			if (flags & SIGN)
+				num = (signed int) num;
+		}
+		str = number(str, num, base, field_width, precision, flags);
+	}
+	*str = '\0';
+	return str-buf;
+}
+
+int sprintf(char * buf, const char *fmt, ...)
+{
+	va_list args;
+	int i;
+
+	va_start(args, fmt);
+	i=vsprintf(buf,fmt,args);
+	va_end(args);
+	return i;
+}
+
+static char sprint_buf[1024];
+
+void
+printk(char *fmt, ...)
+{
+	va_list args;
+	int n;
+
+	va_start(args, fmt);
+	n = vsprintf(sprint_buf, fmt, args);
+	va_end(args);
+	write(stdout, sprint_buf, n);
+}
+
+int
+printf(char *fmt, ...)
+{
+	va_list args;
+	int n;
+
+	va_start(args, fmt);
+	n = vsprintf(sprint_buf, fmt, args);
+	va_end(args);
+	write(stdout, sprint_buf, n);
+	return n;
+}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/boot/start.c linuxppc64_2_4/arch/ppc64/boot/start.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/boot/start.c	Fri Apr 19 11:00:32 2002
+++ linuxppc64_2_4/arch/ppc64/boot/start.c	Wed Dec 31 18:00:00 1969
@@ -1,654 +0,0 @@
-/*
- * Copyright (C) Paul Mackerras 1997.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <stdarg.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-
-#include <asm/div64.h>
-
-int (*prom)(void *);
-
-void *chosen_handle;
-void *stdin;
-void *stdout;
-void *stderr;
-
-void exit(void);
-void *finddevice(const char *name);
-int getprop(void *phandle, const char *name, void *buf, int buflen);
-void chrpboot(int a1, int a2, void *prom);	/* in main.c */
-
-void printk(char *fmt, ...);
-
-void
-start(int a1, int a2, void *promptr)
-{
-	prom = (int (*)(void *)) promptr;
-	chosen_handle = finddevice("/chosen");
-	if (chosen_handle == (void *) -1)
-		exit();
-	if (getprop(chosen_handle, "stdout", &stdout, sizeof(stdout)) != 4)
-		exit();
-	stderr = stdout;
-	if (getprop(chosen_handle, "stdin", &stdin, sizeof(stdin)) != 4)
-	exit();
-
-	chrpboot(a1, a2, promptr);
-	for (;;)
-		exit();
-}
-
-int
-write(void *handle, void *ptr, int nb)
-{
-	struct prom_args {
-		char *service;
-		int nargs;
-		int nret;
-		void *ihandle;
-		void *addr;
-		int len;
-		int actual;
-	} args;
-
-	args.service = "write";
-	args.nargs = 3;
-	args.nret = 1;
-	args.ihandle = handle;
-	args.addr = ptr;
-	args.len = nb;
-	args.actual = -1;
-	(*prom)(&args);
-	return args.actual;
-}
-
-int
-read(void *handle, void *ptr, int nb)
-{
-	struct prom_args {
-		char *service;
-		int nargs;
-		int nret;
-		void *ihandle;
-		void *addr;
-		int len;
-		int actual;
-	} args;
-
-	args.service = "read";
-	args.nargs = 3;
-	args.nret = 1;
-	args.ihandle = handle;
-	args.addr = ptr;
-	args.len = nb;
-	args.actual = -1;
-	(*prom)(&args);
-	return args.actual;
-}
-
-void
-exit()
-{
-	struct prom_args {
-		char *service;
-	} args;
-
-	for (;;) {
-		args.service = "exit";
-		(*prom)(&args);
-	}
-}
-
-void
-pause(void)
-{
-	struct prom_args {
-		char *service;
-	} args;
-
-	args.service = "enter";
-	(*prom)(&args);
-}
-
-void *
-finddevice(const char *name)
-{
-	struct prom_args {
-		char *service;
-		int nargs;
-		int nret;
-		const char *devspec;
-		void *phandle;
-	} args;
-
-	args.service = "finddevice";
-	args.nargs = 1;
-	args.nret = 1;
-	args.devspec = name;
-	args.phandle = (void *) -1;
-	(*prom)(&args);
-	return args.phandle;
-}
-
-void *
-claim(unsigned long virt, unsigned long size, unsigned long align)
-{
-	struct prom_args {
-		char *service;
-		int nargs;
-		int nret;
-		unsigned int virt;
-		unsigned int size;
-		unsigned int align;
-		void *ret;
-	} args;
-
-	args.service = "claim";
-	args.nargs = 3;
-	args.nret = 1;
-	args.virt = virt;
-	args.size = size;
-	args.align = align;
-	(*prom)(&args);
-	return args.ret;
-}
-
-int
-getprop(void *phandle, const char *name, void *buf, int buflen)
-{
-	struct prom_args {
-		char *service;
-		int nargs;
-		int nret;
-		void *phandle;
-		const char *name;
-		void *buf;
-		int buflen;
-		int size;
-	} args;
-
-	args.service = "getprop";
-	args.nargs = 4;
-	args.nret = 1;
-	args.phandle = phandle;
-	args.name = name;
-	args.buf = buf;
-	args.buflen = buflen;
-	args.size = -1;
-	(*prom)(&args);
-	return args.size;
-}
-
-int
-putc(int c, void *f)
-{
-	char ch = c;
-
-	if (c == '\n')
-		putc('\r', f);
-	return write(f, &ch, 1) == 1? c: -1;
-}
-
-int
-putchar(int c)
-{
-	return putc(c, stdout);
-}
-
-int
-fputs(char *str, void *f)
-{
-	int n = strlen(str);
-
-	return write(f, str, n) == n? 0: -1;
-}
-
-int
-readchar(void)
-{
-	char ch;
-
-	for (;;) {
-		switch (read(stdin, &ch, 1)) {
-		case 1:
-			return ch;
-		case -1:
-			printk("read(stdin) returned -1\r\n");
-			return -1;
-		}
-	}
-}
-
-static char line[256];
-static char *lineptr;
-static int lineleft;
-
-int
-getchar(void)
-{
-	int c;
-
-	if (lineleft == 0) {
-		lineptr = line;
-		for (;;) {
-			c = readchar();
-			if (c == -1 || c == 4)
-				break;
-			if (c == '\r' || c == '\n') {
-				*lineptr++ = '\n';
-				putchar('\n');
-				break;
-			}
-			switch (c) {
-			case 0177:
-			case '\b':
-				if (lineptr > line) {
-					putchar('\b');
-					putchar(' ');
-					putchar('\b');
-					--lineptr;
-				}
-				break;
-			case 'U' & 0x1F:
-				while (lineptr > line) {
-					putchar('\b');
-					putchar(' ');
-					putchar('\b');
-					--lineptr;
-				}
-				break;
-			default:
-				if (lineptr >= &line[sizeof(line) - 1])
-					putchar('\a');
-				else {
-					putchar(c);
-					*lineptr++ = c;
-				}
-			}
-		}
-		lineleft = lineptr - line;
-		lineptr = line;
-	}
-	if (lineleft == 0)
-		return -1;
-	--lineleft;
-	return *lineptr++;
-}
-
-
-
-/* String functions lifted from lib/vsprintf.c and lib/ctype.c */
-unsigned char _ctype[] = {
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 0-7 */
-_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,		/* 8-15 */
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 16-23 */
-_C,_C,_C,_C,_C,_C,_C,_C,			/* 24-31 */
-_S|_SP,_P,_P,_P,_P,_P,_P,_P,			/* 32-39 */
-_P,_P,_P,_P,_P,_P,_P,_P,			/* 40-47 */
-_D,_D,_D,_D,_D,_D,_D,_D,			/* 48-55 */
-_D,_D,_P,_P,_P,_P,_P,_P,			/* 56-63 */
-_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,	/* 64-71 */
-_U,_U,_U,_U,_U,_U,_U,_U,			/* 72-79 */
-_U,_U,_U,_U,_U,_U,_U,_U,			/* 80-87 */
-_U,_U,_U,_P,_P,_P,_P,_P,			/* 88-95 */
-_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,	/* 96-103 */
-_L,_L,_L,_L,_L,_L,_L,_L,			/* 104-111 */
-_L,_L,_L,_L,_L,_L,_L,_L,			/* 112-119 */
-_L,_L,_L,_P,_P,_P,_P,_C,			/* 120-127 */
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,		/* 128-143 */
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,		/* 144-159 */
-_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,   /* 160-175 */
-_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,       /* 176-191 */
-_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,       /* 192-207 */
-_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,       /* 208-223 */
-_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,       /* 224-239 */
-_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};      /* 240-255 */
-
-size_t strnlen(const char * s, size_t count)
-{
-	const char *sc;
-
-	for (sc = s; count-- && *sc != '\0'; ++sc)
-		/* nothing */;
-	return sc - s;
-}
-
-unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
-{
-	unsigned long result = 0,value;
-
-	if (!base) {
-		base = 10;
-		if (*cp == '0') {
-			base = 8;
-			cp++;
-			if ((*cp == 'x') && isxdigit(cp[1])) {
-				cp++;
-				base = 16;
-			}
-		}
-	}
-	while (isxdigit(*cp) &&
-	       (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
-		result = result*base + value;
-		cp++;
-	}
-	if (endp)
-		*endp = (char *)cp;
-	return result;
-}
-
-long simple_strtol(const char *cp,char **endp,unsigned int base)
-{
-	if(*cp=='-')
-		return -simple_strtoul(cp+1,endp,base);
-	return simple_strtoul(cp,endp,base);
-}
-
-static int skip_atoi(const char **s)
-{
-	int i=0;
-
-	while (isdigit(**s))
-		i = i*10 + *((*s)++) - '0';
-	return i;
-}
-
-#define ZEROPAD	1		/* pad with zero */
-#define SIGN	2		/* unsigned/signed long */
-#define PLUS	4		/* show plus */
-#define SPACE	8		/* space if plus */
-#define LEFT	16		/* left justified */
-#define SPECIAL	32		/* 0x */
-#define LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
-
-static char * number(char * str, long long num, int base, int size, int precision, int type)
-{
-	char c,sign,tmp[66];
-	const char *digits="0123456789abcdefghijklmnopqrstuvwxyz";
-	int i;
-
-	if (type & LARGE)
-		digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-	if (type & LEFT)
-		type &= ~ZEROPAD;
-	if (base < 2 || base > 36)
-		return 0;
-	c = (type & ZEROPAD) ? '0' : ' ';
-	sign = 0;
-	if (type & SIGN) {
-		if (num < 0) {
-			sign = '-';
-			num = -num;
-			size--;
-		} else if (type & PLUS) {
-			sign = '+';
-			size--;
-		} else if (type & SPACE) {
-			sign = ' ';
-			size--;
-		}
-	}
-	if (type & SPECIAL) {
-		if (base == 16)
-			size -= 2;
-		else if (base == 8)
-			size--;
-	}
-	i = 0;
-	if (num == 0)
-		tmp[i++]='0';
-	else while (num != 0)
-		tmp[i++] = digits[do_div(num,base)];
-	if (i > precision)
-		precision = i;
-	size -= precision;
-	if (!(type&(ZEROPAD+LEFT)))
-		while(size-->0)
-			*str++ = ' ';
-	if (sign)
-		*str++ = sign;
-	if (type & SPECIAL) {
-		if (base==8)
-			*str++ = '0';
-		else if (base==16) {
-			*str++ = '0';
-			*str++ = digits[33];
-		}
-	}
-	if (!(type & LEFT))
-		while (size-- > 0)
-			*str++ = c;
-	while (i < precision--)
-		*str++ = '0';
-	while (i-- > 0)
-		*str++ = tmp[i];
-	while (size-- > 0)
-		*str++ = ' ';
-	return str;
-}
-
-/* Forward decl. needed for IP address printing stuff... */
-int sprintf(char * buf, const char *fmt, ...);
-
-int vsprintf(char *buf, const char *fmt, va_list args)
-{
-	int len;
-	unsigned long long num;
-	int i, base;
-	char * str;
-	const char *s;
-
-	int flags;		/* flags to number() */
-
-	int field_width;	/* width of output field */
-	int precision;		/* min. # of digits for integers; max
-				   number of chars for from string */
-	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
-	                        /* 'z' support added 23/7/1999 S.H.    */
-				/* 'z' changed to 'Z' --davidm 1/25/99 */
-
-	
-	for (str=buf ; *fmt ; ++fmt) {
-		if (*fmt != '%') {
-			*str++ = *fmt;
-			continue;
-		}
-			
-		/* process flags */
-		flags = 0;
-		repeat:
-			++fmt;		/* this also skips first '%' */
-			switch (*fmt) {
-				case '-': flags |= LEFT; goto repeat;
-				case '+': flags |= PLUS; goto repeat;
-				case ' ': flags |= SPACE; goto repeat;
-				case '#': flags |= SPECIAL; goto repeat;
-				case '0': flags |= ZEROPAD; goto repeat;
-				}
-		
-		/* get field width */
-		field_width = -1;
-		if (isdigit(*fmt))
-			field_width = skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			field_width = va_arg(args, int);
-			if (field_width < 0) {
-				field_width = -field_width;
-				flags |= LEFT;
-			}
-		}
-
-		/* get the precision */
-		precision = -1;
-		if (*fmt == '.') {
-			++fmt;	
-			if (isdigit(*fmt))
-				precision = skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				precision = va_arg(args, int);
-			}
-			if (precision < 0)
-				precision = 0;
-		}
-
-		/* get the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') {
-			qualifier = *fmt;
-			++fmt;
-		}
-
-		/* default base */
-		base = 10;
-
-		switch (*fmt) {
-		case 'c':
-			if (!(flags & LEFT))
-				while (--field_width > 0)
-					*str++ = ' ';
-			*str++ = (unsigned char) va_arg(args, int);
-			while (--field_width > 0)
-				*str++ = ' ';
-			continue;
-
-		case 's':
-			s = va_arg(args, char *);
-			if (!s)
-				s = "<NULL>";
-
-			len = strnlen(s, precision);
-
-			if (!(flags & LEFT))
-				while (len < field_width--)
-					*str++ = ' ';
-			for (i = 0; i < len; ++i)
-				*str++ = *s++;
-			while (len < field_width--)
-				*str++ = ' ';
-			continue;
-
-		case 'p':
-			if (field_width == -1) {
-				field_width = 2*sizeof(void *);
-				flags |= ZEROPAD;
-			}
-			str = number(str,
-				(unsigned long) va_arg(args, void *), 16,
-				field_width, precision, flags);
-			continue;
-
-
-		case 'n':
-			if (qualifier == 'l') {
-				long * ip = va_arg(args, long *);
-				*ip = (str - buf);
-			} else if (qualifier == 'Z') {
-				size_t * ip = va_arg(args, size_t *);
-				*ip = (str - buf);
-			} else {
-				int * ip = va_arg(args, int *);
-				*ip = (str - buf);
-			}
-			continue;
-
-		case '%':
-			*str++ = '%';
-			continue;
-
-		/* integer number formats - set up the flags and "break" */
-		case 'o':
-			base = 8;
-			break;
-
-		case 'X':
-			flags |= LARGE;
-		case 'x':
-			base = 16;
-			break;
-
-		case 'd':
-		case 'i':
-			flags |= SIGN;
-		case 'u':
-			break;
-
-		default:
-			*str++ = '%';
-			if (*fmt)
-				*str++ = *fmt;
-			else
-				--fmt;
-			continue;
-		}
-		if (qualifier == 'L')
-			num = va_arg(args, long long);
-		else if (qualifier == 'l') {
-			num = va_arg(args, unsigned long);
-			if (flags & SIGN)
-				num = (signed long) num;
-		} else if (qualifier == 'Z') {
-			num = va_arg(args, size_t);
-		} else if (qualifier == 'h') {
-			num = (unsigned short) va_arg(args, int);
-			if (flags & SIGN)
-				num = (signed short) num;
-		} else {
-			num = va_arg(args, unsigned int);
-			if (flags & SIGN)
-				num = (signed int) num;
-		}
-		str = number(str, num, base, field_width, precision, flags);
-	}
-	*str = '\0';
-	return str-buf;
-}
-
-int sprintf(char * buf, const char *fmt, ...)
-{
-	va_list args;
-	int i;
-
-	va_start(args, fmt);
-	i=vsprintf(buf,fmt,args);
-	va_end(args);
-	return i;
-}
-
-static char sprint_buf[1024];
-
-void
-printk(char *fmt, ...)
-{
-	va_list args;
-	int n;
-
-	va_start(args, fmt);
-	n = vsprintf(sprint_buf, fmt, args);
-	va_end(args);
-	write(stdout, sprint_buf, n);
-}
-
-int
-printf(char *fmt, ...)
-{
-	va_list args;
-	int n;
-
-	va_start(args, fmt);
-	n = vsprintf(sprint_buf, fmt, args);
-	va_end(args);
-	write(stdout, sprint_buf, n);
-	return n;
-}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/boot/string.S linuxppc64_2_4/arch/ppc64/boot/string.S
--- ../kernel.org/linux-2.4.19/arch/ppc64/boot/string.S	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/arch/ppc64/boot/string.S	Wed Apr 10 12:21:34 2002
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * NOTE: this code runs in 32 bit mode and is packaged as ELF32.
+ */
+
+#include <asm/ppc_asm.tmpl>
+
+	.text
+	.globl	strcpy
+strcpy:
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r5)
+	bne	1b
+	blr
+
+	.globl	strncpy
+strncpy:
+	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r6)
+	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
+	blr
+
+	.globl	strcat
+strcat:
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r5)
+	cmpwi	0,r0,0
+	bne	1b
+	addi	r5,r5,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r5)
+	bne	1b
+	blr
+
+	.globl	strcmp
+strcmp:
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r5)
+	cmpwi	1,r3,0
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	beqlr	1
+	beq	1b
+	blr
+
+	.globl	strlen
+strlen:
+	addi	r4,r3,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	bne	1b
+	subf	r3,r3,r4
+	blr
+
+	.globl	memset
+memset:
+	rlwimi	r4,r4,8,16,23
+	rlwimi	r4,r4,16,0,15
+	addi	r6,r3,-4
+	cmplwi	0,r5,4
+	blt	7f
+	stwu	r4,4(r6)
+	beqlr
+	andi.	r0,r6,3
+	add	r5,r0,r5
+	subf	r6,r0,r6
+	rlwinm	r0,r5,32-2,2,31
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+	.globl	bcopy
+bcopy:
+	mr	r6,r3
+	mr	r3,r4
+	mr	r4,r6
+	b	memcpy
+
+	.globl	memmove
+memmove:
+	cmplw	0,r3,r4
+	bgt	backwards_memcpy
+	/* fall through */
+
+	.globl	memcpy
+memcpy:
+	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
+	addi	r6,r3,-4
+	addi	r4,r4,-4
+	beq	2f			/* if less than 8 bytes to do */
+	andi.	r0,r6,3			/* get dest word aligned */
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,4(r4)
+	lwzu	r8,8(r4)
+	stw	r7,4(r6)
+	stwu	r8,8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,4(r4)
+	addi	r5,r5,-4
+	stwu	r0,4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r4,r4,3
+	addi	r6,r6,3
+4:	lbzu	r0,1(r4)
+	stbu	r0,1(r6)
+	bdnz	4b
+	blr
+5:	subfic	r0,r0,4
+	mtctr	r0
+6:	lbz	r7,4(r4)
+	addi	r4,r4,1
+	stb	r7,4(r6)
+	addi	r6,r6,1
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+	.globl	backwards_memcpy
+backwards_memcpy:
+	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
+	add	r6,r3,r5
+	add	r4,r4,r5
+	beq	2f
+	andi.	r0,r6,3
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,-4(r4)
+	lwzu	r8,-8(r4)
+	stw	r7,-4(r6)
+	stwu	r8,-8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,-4(r4)
+	subi	r5,r5,4
+	stwu	r0,-4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+4:	lbzu	r0,-1(r4)
+	stbu	r0,-1(r6)
+	bdnz	4b
+	blr
+5:	mtctr	r0
+6:	lbzu	r7,-1(r4)
+	stbu	r7,-1(r6)
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+	.globl	memcmp
+memcmp:
+	cmpwi	0,r5,0
+	blelr
+	mtctr	r5
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r6)
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	bdnzt	2,1b
+	blr
+
+
+/*
+ * Flush the dcache and invalidate the icache for a range of addresses.
+ *
+ * flush_cache(addr, len)
+ */
+	.global	flush_cache
+flush_cache:
+	addi	4,4,0x1f	/* len = (len + 0x1f) / 0x20 */
+	rlwinm.	4,4,27,5,31
+	mtctr	4
+	beqlr
+1:	dcbf	0,3
+	icbi	0,3
+	addi	3,3,0x20
+	bdnz	1b
+	sync
+	isync
+	blr
+
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/boot/zImage.c linuxppc64_2_4/arch/ppc64/boot/zImage.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/boot/zImage.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/arch/ppc64/boot/zImage.c	Wed Apr 10 12:21:34 2002
@@ -0,0 +1,362 @@
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * Updates for PPC64 by Todd Inglett, Dave Engebretsen & Peter Bergner.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define __KERNEL__
+#include "ppc32-types.h"
+#include "zlib.h"
+#include <linux/elf.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/bootinfo.h>
+
+void memmove(void *dst, void *im, int len);
+
+extern void *finddevice(const char *);
+extern int getprop(void *, const char *, void *, int);
+extern void printk(char *fmt, ...);
+extern void printf(const char *fmt, ...);
+extern int sprintf(char *buf, const char *fmt, ...);
+void gunzip(void *, int, unsigned char *, int *);
+void *claim(unsigned int, unsigned int, unsigned int);
+void flush_cache(void *, unsigned long);
+void pause(void);
+extern void exit(void);
+
+static struct bi_record *make_bi_recs(unsigned long);
+
+#define RAM_START	0x00000000
+#define RAM_END		(64<<20)
+
+/* Value picked to match that used by yaboot */
+#define PROG_START	0x01400000
+
+char *avail_ram;
+char *begin_avail, *end_avail;
+char *avail_high;
+unsigned int heap_use;
+unsigned int heap_max;
+
+extern char _end[];
+extern char _vmlinux_start[];
+extern char _vmlinux_end[];
+extern char _sysmap_start[];
+extern char _sysmap_end[];
+extern char _initrd_start[];
+extern char _initrd_end[];
+extern unsigned long vmlinux_filesize;
+extern unsigned long vmlinux_memsize;
+
+struct addr_range {
+	unsigned long addr;
+	unsigned long size;
+	unsigned long memsize;
+};
+struct addr_range vmlinux = {0, 0, 0};
+struct addr_range vmlinuz = {0, 0, 0};
+struct addr_range sysmap  = {0, 0, 0};
+struct addr_range initrd  = {0, 0, 0};
+
+static char scratch[128<<10];	/* 128kB of scratch space for gunzip */
+
+typedef void (*kernel_entry_t)( unsigned long,
+                                unsigned long,
+                                void *,
+				struct bi_record *);
+
+
+int (*prom)(void *);
+
+void *chosen_handle;
+void *stdin;
+void *stdout;
+void *stderr;
+
+
+void
+start(unsigned long a1, unsigned long a2, void *promptr)
+{
+	unsigned long i, claim_addr, claim_size;
+	extern char _start;
+	struct bi_record *bi_recs;
+	kernel_entry_t kernel_entry;
+	Elf64_Ehdr *elf64;
+	Elf64_Phdr *elf64ph;
+
+	prom = (int (*)(void *)) promptr;
+	chosen_handle = finddevice("/chosen");
+	if (chosen_handle == (void *) -1)
+		exit();
+	if (getprop(chosen_handle, "stdout", &stdout, sizeof(stdout)) != 4)
+		exit();
+	stderr = stdout;
+	if (getprop(chosen_handle, "stdin", &stdin, sizeof(stdin)) != 4)
+		exit();
+
+	printf("zImage starting: loaded at 0x%x\n\r", (unsigned)&_start);
+
+#if 0
+	sysmap.size = (unsigned long)(_sysmap_end - _sysmap_start);
+	sysmap.memsize = sysmap.size;
+	if ( sysmap.size > 0 ) {
+		sysmap.addr = (RAM_END - sysmap.size) & ~0xFFF;
+		claim(sysmap.addr, RAM_END - sysmap.addr, 0);
+		printf("initial ramdisk moving 0x%lx <- 0x%lx (%lx bytes)\n\r",
+		       sysmap.addr, (unsigned long)_sysmap_start, sysmap.size);
+		memcpy((void *)sysmap.addr, (void *)_sysmap_start, sysmap.size);
+	}
+#endif
+
+	initrd.size = (unsigned long)(_initrd_end - _initrd_start);
+	initrd.memsize = initrd.size;
+	if ( initrd.size > 0 ) {
+		initrd.addr = (RAM_END - initrd.size) & ~0xFFF;
+		a1 = a2 = 0;
+		claim(initrd.addr, RAM_END - initrd.addr, 0);
+		printf("initial ramdisk moving 0x%lx <- 0x%lx (%lx bytes)\n\r",
+		       initrd.addr, (unsigned long)_initrd_start, initrd.size);
+		memcpy((void *)initrd.addr, (void *)_initrd_start, initrd.size);
+	}
+
+	vmlinuz.addr = (unsigned long)_vmlinux_start;
+	vmlinuz.size = (unsigned long)(_vmlinux_end - _vmlinux_start);
+	vmlinux.addr = (unsigned long)(void *)-1;
+	vmlinux.size = PAGE_ALIGN(vmlinux_filesize);
+	vmlinux.memsize = vmlinux_memsize;
+
+	claim_size = vmlinux.memsize /* PPPBBB: + fudge for bi_recs */;
+	for(claim_addr = PROG_START; 
+	    claim_addr <= PROG_START * 8; 
+	    claim_addr += 0x100000) {
+		printf("    trying: 0x%08lx\n\r", claim_addr);
+		vmlinux.addr = (unsigned long)claim(claim_addr, claim_size, 0);
+		if ((void *)vmlinux.addr != (void *)-1) break;
+	}
+	if ((void *)vmlinux.addr == (void *)-1) {
+		printf("claim error, can't allocate kernel memory\n\r");
+		exit();
+	}
+
+	/* PPPBBB: should kernel always be gziped? */
+	if (*(unsigned short *)vmlinuz.addr == 0x1f8b) {
+		avail_ram = scratch;
+		begin_avail = avail_high = avail_ram;
+		end_avail = scratch + sizeof(scratch);
+		printf("gunzipping (0x%lx <- 0x%lx:0x%0lx)...",
+		       vmlinux.addr, vmlinuz.addr, vmlinuz.addr+vmlinuz.size);
+		gunzip((void *)vmlinux.addr, vmlinux.size,
+			(unsigned char *)vmlinuz.addr, (int *)&vmlinuz.size);
+		printf("done %lu bytes\n\r", vmlinuz.size);
+		printf("%u bytes of heap consumed, max in use %u\n\r",
+		       (unsigned)(avail_high - begin_avail), heap_max);
+	} else {
+		memmove((void *)vmlinux.addr,(void *)vmlinuz.addr,vmlinuz.size);
+	}
+
+	/* Skip over the ELF header */
+	elf64 = (Elf64_Ehdr *)vmlinux.addr;
+	if ( elf64->e_ident[EI_MAG0]  != ELFMAG0	||
+	     elf64->e_ident[EI_MAG1]  != ELFMAG1	||
+	     elf64->e_ident[EI_MAG2]  != ELFMAG2	||
+	     elf64->e_ident[EI_MAG3]  != ELFMAG3	||
+	     elf64->e_ident[EI_CLASS] != ELFCLASS64	||
+	     elf64->e_ident[EI_DATA]  != ELFDATA2MSB	||
+	     elf64->e_type            != ET_EXEC	||
+	     elf64->e_machine         != EM_PPC64 )
+	{
+		printf("Error: not a valid PPC64 ELF file!\n\r");
+		exit();
+	}
+
+	elf64ph = (Elf64_Phdr *)((unsigned long)elf64 +
+				(unsigned long)elf64->e_phoff);
+	for(i=0; i < (unsigned int)elf64->e_phnum ;i++,elf64ph++) {
+		if (elf64ph->p_type == PT_LOAD && elf64ph->p_offset != 0)
+			break;
+	}
+	printf("... skipping 0x%lx bytes of ELF header\n\r",
+			(unsigned long)elf64ph->p_offset);
+	vmlinux.addr += (unsigned long)elf64ph->p_offset;
+	vmlinux.size -= (unsigned long)elf64ph->p_offset;
+
+	flush_cache((void *)vmlinux.addr, vmlinux.memsize);
+
+	bi_recs = make_bi_recs(vmlinux.addr + vmlinux.memsize);
+
+	kernel_entry = (kernel_entry_t)vmlinux.addr;
+	printf( "kernel:\n\r"
+		"        entry addr = 0x%lx\n\r"
+		"        a1         = 0x%lx,\n\r"
+		"        a2         = 0x%lx,\n\r"
+		"        prom       = 0x%lx,\n\r"
+		"        bi_recs    = 0x%lx,\n\r",
+		(unsigned long)kernel_entry, a1, a2,
+		(unsigned long)prom, (unsigned long)bi_recs);
+
+	kernel_entry( a1, a2, prom, bi_recs );
+
+	printf("Error: Linux kernel returned to zImage bootloader!\n\r");
+
+	exit();
+}
+
+static struct bi_record *
+make_bi_recs(unsigned long addr)
+{
+	struct bi_record *bi_recs;
+	struct bi_record *rec;
+
+	bi_recs = rec = bi_rec_init(addr);
+
+	rec = bi_rec_alloc(rec, 2);
+	rec->tag = BI_FIRST;
+	/* rec->data[0] = ...;	# Written below before return */
+	/* rec->data[1] = ...;	# Written below before return */
+
+	rec = bi_rec_alloc_bytes(rec, strlen("chrpboot")+1);
+	rec->tag = BI_BOOTLOADER_ID;
+	sprintf( (char *)rec->data, "chrpboot");
+
+	rec = bi_rec_alloc(rec, 2);
+	rec->tag = BI_MACHTYPE;
+	rec->data[0] = _MACH_pSeries;
+	rec->data[1] = 1;
+
+	if ( initrd.size > 0 ) {
+		rec = bi_rec_alloc(rec, 2);
+		rec->tag = BI_INITRD;
+		rec->data[0] = initrd.addr;
+		rec->data[1] = initrd.size;
+	}
+
+	if ( sysmap.size > 0 ) {
+		rec = bi_rec_alloc(rec, 2);
+		rec->tag = BI_SYSMAP;
+		rec->data[0] = (unsigned long)sysmap.addr;
+		rec->data[1] = (unsigned long)sysmap.size;
+	}
+
+	rec = bi_rec_alloc(rec, 1);
+	rec->tag = BI_LAST;
+	rec->data[0] = (bi_rec_field)bi_recs;
+
+	/* Save the _end_ address of the bi_rec's in the first bi_rec
+	 * data field for easy access by the kernel.
+	 */
+	bi_recs->data[0] = (bi_rec_field)rec;
+	bi_recs->data[1] = (bi_rec_field)rec + rec->size - (bi_rec_field)bi_recs;
+
+	return bi_recs;
+}
+
+struct memchunk {
+	unsigned int size;
+	unsigned int pad;
+	struct memchunk *next;
+};
+
+static struct memchunk *freechunks;
+
+void *zalloc(void *x, unsigned items, unsigned size)
+{
+	void *p;
+	struct memchunk **mpp, *mp;
+
+	size *= items;
+	size = _ALIGN(size, sizeof(struct memchunk));
+	heap_use += size;
+	if (heap_use > heap_max)
+		heap_max = heap_use;
+	for (mpp = &freechunks; (mp = *mpp) != 0; mpp = &mp->next) {
+		if (mp->size == size) {
+			*mpp = mp->next;
+			return mp;
+		}
+	}
+	p = avail_ram;
+	avail_ram += size;
+	if (avail_ram > avail_high)
+		avail_high = avail_ram;
+	if (avail_ram > end_avail) {
+		printf("oops... out of memory\n\r");
+		pause();
+	}
+	return p;
+}
+
+void zfree(void *x, void *addr, unsigned nb)
+{
+	struct memchunk *mp = addr;
+
+	nb = _ALIGN(nb, sizeof(struct memchunk));
+	heap_use -= nb;
+	if (avail_ram == addr + nb) {
+		avail_ram = addr;
+		return;
+	}
+	mp->size = nb;
+	mp->next = freechunks;
+	freechunks = mp;
+}
+
+#define HEAD_CRC	2
+#define EXTRA_FIELD	4
+#define ORIG_NAME	8
+#define COMMENT		0x10
+#define RESERVED	0xe0
+
+#define DEFLATED	8
+
+void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp)
+{
+	z_stream s;
+	int r, i, flags;
+
+	/* skip header */
+	i = 10;
+	flags = src[3];
+	if (src[2] != DEFLATED || (flags & RESERVED) != 0) {
+		printf("bad gzipped data\n\r");
+		exit();
+	}
+	if ((flags & EXTRA_FIELD) != 0)
+		i = 12 + src[10] + (src[11] << 8);
+	if ((flags & ORIG_NAME) != 0)
+		while (src[i++] != 0)
+			;
+	if ((flags & COMMENT) != 0)
+		while (src[i++] != 0)
+			;
+	if ((flags & HEAD_CRC) != 0)
+		i += 2;
+	if (i >= *lenp) {
+		printf("gunzip: ran out of data in header\n\r");
+		exit();
+	}
+
+	s.zalloc = zalloc;
+	s.zfree = zfree;
+	r = inflateInit2(&s, -MAX_WBITS);
+	if (r != Z_OK) {
+		printf("inflateInit2 returned %d\n\r", r);
+		exit();
+	}
+	s.next_in = src + i;
+	s.avail_in = *lenp - i;
+	s.next_out = dst;
+	s.avail_out = dstlen;
+	r = inflate(&s, Z_FINISH);
+	if (r != Z_OK && r != Z_STREAM_END) {
+		printf("inflate returned %d msg: %s\n\r", r, s.msg);
+		exit();
+	}
+	*lenp = s.next_out - (unsigned char *) dst;
+	inflateEnd(&s);
+}
+
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/boot/zImage.lds linuxppc64_2_4/arch/ppc64/boot/zImage.lds
--- ../kernel.org/linux-2.4.19/arch/ppc64/boot/zImage.lds	Fri Apr 19 11:00:32 2002
+++ linuxppc64_2_4/arch/ppc64/boot/zImage.lds	Wed Apr 10 12:21:34 2002
@@ -57,6 +57,27 @@
     *(.dynamic)
     CONSTRUCTORS
   }
+
+  . = ALIGN(4096);
+  _vmlinux_start =  .;
+  .kernel:vmlinux : { *(.kernel:vmlinux) }
+  _vmlinux_end =  .;
+
+  . = ALIGN(4096);
+  _dotconfig_start =  .;
+  .kernel:.config : { *(.kernel:.config) }
+  _dotconfig_end =  .;
+
+  . = ALIGN(4096);
+  _sysmap_start =  .;
+  .kernel:System.map : { *(.kernel:System.map) }
+  _sysmap_end =  .;
+
+  . = ALIGN(4096);
+  _initrd_start =  .;
+  .kernel:initrd : { *(.kernel:initrd) }
+  _initrd_end =  .;
+
   . = ALIGN(4096);
   _edata  =  .;
   PROVIDE (edata = .);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/boot/zlib.c linuxppc64_2_4/arch/ppc64/boot/zlib.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/boot/zlib.c	Fri Apr 19 11:00:32 2002
+++ linuxppc64_2_4/arch/ppc64/boot/zlib.c	Wed Apr 10 12:21:34 2002
@@ -671,11 +671,6 @@
 /*   load local pointers */
 #define LOAD {LOADIN LOADOUT}
 
-/*
- * The IBM 150 firmware munges the data right after _etext[].  This
- * protects it. -- Cort
- */
-local uInt protect_mask[] = {0, 0, 0, 0, 0, 0, 0, 0, 0 ,0 ,0 ,0};
 /* And'ing with mask[n] masks the lower n bits */
 local uInt inflate_mask[] = {
     0x0000,
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/config.in linuxppc64_2_4/arch/ppc64/config.in
--- ../kernel.org/linux-2.4.19/arch/ppc64/config.in	Fri Apr 19 11:00:32 2002
+++ linuxppc64_2_4/arch/ppc64/config.in	Fri Mar 29 10:18:26 2002
@@ -31,7 +31,6 @@
   bool '  Distribute interrupts on all CPUs by default' CONFIG_IRQ_ALL_CPUS
   if [ "$CONFIG_PPC_PSERIES" = "y" ]; then
     bool '  Hardware multithreading' CONFIG_HMT
-    bool '  PCI Enhanced Error Handling' CONFIG_PPC_EEH
   fi
 fi
 if [ "$CONFIG_PPC_ISERIES" = "y" ]; then
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/configs/pSeries_defconfig linuxppc64_2_4/arch/ppc64/configs/pSeries_defconfig
--- ../kernel.org/linux-2.4.19/arch/ppc64/configs/pSeries_defconfig	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/configs/pSeries_defconfig	Fri Mar 29 10:18:26 2002
@@ -24,7 +24,6 @@
 CONFIG_SMP=y
 CONFIG_IRQ_ALL_CPUS=y
 # CONFIG_HMT is not set
-# CONFIG_PPC_EEH is not set
 # CONFIG_MSCHUNKS is not set
 
 #
@@ -72,7 +71,6 @@
 #
 # CONFIG_PNP is not set
 # CONFIG_ISAPNP is not set
-# CONFIG_PNPBIOS is not set
 
 #
 # Block devices
@@ -106,8 +104,6 @@
 #
 CONFIG_PACKET=y
 # CONFIG_PACKET_MMAP is not set
-CONFIG_NETLINK=y
-# CONFIG_RTNETLINK is not set
 # CONFIG_NETLINK_DEV is not set
 # CONFIG_NETFILTER is not set
 CONFIG_FILTER=y
@@ -119,11 +115,13 @@
 CONFIG_NET_IPIP=y
 # CONFIG_NET_IPGRE is not set
 # CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
 # CONFIG_INET_ECN is not set
 CONFIG_SYN_COOKIES=y
 # CONFIG_IPV6 is not set
 # CONFIG_KHTTPD is not set
 # CONFIG_ATM is not set
+# CONFIG_VLAN_8021Q is not set
 
 #
 #  
@@ -260,6 +258,7 @@
 # CONFIG_SCSI_AHA152X is not set
 # CONFIG_SCSI_AHA1542 is not set
 # CONFIG_SCSI_AHA1740 is not set
+# CONFIG_SCSI_AACRAID is not set
 # CONFIG_SCSI_AIC7XXX is not set
 # CONFIG_SCSI_AIC7XXX_OLD is not set
 # CONFIG_SCSI_DPT_I2O is not set
@@ -281,6 +280,7 @@
 # CONFIG_SCSI_INIA100 is not set
 # CONFIG_SCSI_NCR53C406A is not set
 # CONFIG_SCSI_NCR53C7xx is not set
+# CONFIG_SCSI_SYM53C8XX_2 is not set
 # CONFIG_SCSI_NCR53C8XX is not set
 CONFIG_SCSI_SYM53C8XX=y
 CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8
@@ -304,8 +304,6 @@
 # CONFIG_SCSI_T128 is not set
 # CONFIG_SCSI_U14_34F is not set
 # CONFIG_SCSI_DEBUG is not set
-# CONFIG_SCSI_MESH is not set
-# CONFIG_SCSI_MAC53C94 is not set
 
 #
 # IEEE 1394 (FireWire) support (EXPERIMENTAL)
@@ -334,12 +332,10 @@
 # CONFIG_MACE is not set
 # CONFIG_BMAC is not set
 # CONFIG_GMAC is not set
-# CONFIG_OAKNET is not set
 # CONFIG_SUNLANCE is not set
 # CONFIG_HAPPYMEAL is not set
 # CONFIG_SUNBMAC is not set
 # CONFIG_SUNQE is not set
-# CONFIG_SUNLANCE is not set
 # CONFIG_SUNGEM is not set
 # CONFIG_NET_VENDOR_3COM is not set
 # CONFIG_LANCE is not set
@@ -368,11 +364,13 @@
 # CONFIG_8139TOO_PIO is not set
 # CONFIG_8139TOO_TUNE_TWISTER is not set
 # CONFIG_8139TOO_8129 is not set
+# CONFIG_8139_NEW_RX_RESET is not set
 # CONFIG_SIS900 is not set
 # CONFIG_EPIC100 is not set
 # CONFIG_SUNDANCE is not set
 # CONFIG_TLAN is not set
 # CONFIG_VIA_RHINE is not set
+# CONFIG_VIA_RHINE_MMIO is not set
 # CONFIG_WINBOND_840 is not set
 # CONFIG_NET_POCKET is not set
 
@@ -382,6 +380,7 @@
 CONFIG_ACENIC=y
 # CONFIG_ACENIC_OMIT_TIGON_I is not set
 # CONFIG_DL2K is not set
+CONFIG_E1000=y
 # CONFIG_MYRI_SBUS is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
@@ -467,6 +466,7 @@
 # CONFIG_FB_SIS is not set
 # CONFIG_FB_3DFX is not set
 # CONFIG_FB_VOODOO1 is not set
+# CONFIG_FB_TRIDENT is not set
 # CONFIG_FB_VIRTUAL is not set
 # CONFIG_FBCON_ADVANCED is not set
 CONFIG_FBCON_CFB8=y
@@ -480,6 +480,15 @@
 # CONFIG_FONT_SUN8x16 is not set
 # CONFIG_FONT_PEARL_8x8 is not set
 # CONFIG_FONT_ACORN_8x8 is not set
+
+#
+# Input core support
+#
+# CONFIG_INPUT is not set
+# CONFIG_INPUT_KEYBDEV is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
 CONFIG_VIOPATH=y
 
 #
@@ -513,36 +522,14 @@
 # Joysticks
 #
 # CONFIG_INPUT_GAMEPORT is not set
-# CONFIG_INPUT_NS558 is not set
-# CONFIG_INPUT_LIGHTNING is not set
-# CONFIG_INPUT_PCIGAME is not set
-# CONFIG_INPUT_CS461X is not set
-# CONFIG_INPUT_EMU10K1 is not set
-# CONFIG_INPUT_SERIO is not set
-# CONFIG_INPUT_SERPORT is not set
 
 #
-# Joysticks
+# Input core support is needed for gameports
+#
+
+#
+# Input core support is needed for joysticks
 #
-# CONFIG_INPUT_ANALOG is not set
-# CONFIG_INPUT_A3D is not set
-# CONFIG_INPUT_ADI is not set
-# CONFIG_INPUT_COBRA is not set
-# CONFIG_INPUT_GF2K is not set
-# CONFIG_INPUT_GRIP is not set
-# CONFIG_INPUT_INTERACT is not set
-# CONFIG_INPUT_TMDC is not set
-# CONFIG_INPUT_SIDEWINDER is not set
-# CONFIG_INPUT_IFORCE_USB is not set
-# CONFIG_INPUT_IFORCE_232 is not set
-# CONFIG_INPUT_WARRIOR is not set
-# CONFIG_INPUT_MAGELLAN is not set
-# CONFIG_INPUT_SPACEORB is not set
-# CONFIG_INPUT_SPACEBALL is not set
-# CONFIG_INPUT_STINGER is not set
-# CONFIG_INPUT_DB9 is not set
-# CONFIG_INPUT_GAMECON is not set
-# CONFIG_INPUT_TURBOGRAFX is not set
 # CONFIG_QIC02_TAPE is not set
 
 #
@@ -550,9 +537,9 @@
 #
 # CONFIG_WATCHDOG is not set
 # CONFIG_INTEL_RNG is not set
-CONFIG_NVRAM=y
-CONFIG_RTC=y
-CONFIG_RTC=y
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_RTC is not set
 # CONFIG_DTLK is not set
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
@@ -563,7 +550,6 @@
 # CONFIG_FTAPE is not set
 # CONFIG_AGP is not set
 # CONFIG_DRM is not set
-# CONFIG_MWAVE is not set
 
 #
 # File systems
@@ -573,11 +559,15 @@
 # CONFIG_AUTOFS4_FS is not set
 CONFIG_REISERFS_FS=y
 # CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
 # CONFIG_ADFS_FS is not set
 # CONFIG_ADFS_FS_RW is not set
 # CONFIG_AFFS_FS is not set
 # CONFIG_HFS_FS is not set
 # CONFIG_BFS_FS is not set
+CONFIG_EXT3_FS=y
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
 CONFIG_FAT_FS=y
 CONFIG_MSDOS_FS=y
 # CONFIG_UMSDOS_FS is not set
@@ -590,6 +580,7 @@
 # CONFIG_RAMFS is not set
 CONFIG_ISO9660_FS=y
 # CONFIG_JOLIET is not set
+# CONFIG_ZISOFS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_JFS_FS is not set
 # CONFIG_JFS_DEBUG is not set
@@ -616,6 +607,7 @@
 # Network File Systems
 #
 # CONFIG_CODA_FS is not set
+# CONFIG_INTERMEZZO_FS is not set
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 # CONFIG_ROOT_NFS is not set
@@ -635,6 +627,8 @@
 # CONFIG_NCPFS_SMALLDOS is not set
 # CONFIG_NCPFS_NLS is not set
 # CONFIG_NCPFS_EXTRAS is not set
+# CONFIG_ZISOFS_FS is not set
+# CONFIG_ZLIB_FS_INFLATE is not set
 
 #
 # Partition Types
@@ -669,6 +663,7 @@
 # CONFIG_NLS_CODEPAGE_949 is not set
 # CONFIG_NLS_CODEPAGE_874 is not set
 # CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
 # CONFIG_NLS_CODEPAGE_1251 is not set
 # CONFIG_NLS_ISO8859_1 is not set
 # CONFIG_NLS_ISO8859_2 is not set
@@ -722,11 +717,10 @@
 #
 # USB Human Interface Devices (HID)
 #
-# CONFIG_USB_HID is not set
-# CONFIG_USB_HIDDEV is not set
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# CONFIG_USB_WACOM is not set
+
+#
+#   Input core support is needed for USB HID
+#
 
 #
 # USB Imaging devices
@@ -744,6 +738,8 @@
 # CONFIG_USB_OV511 is not set
 # CONFIG_USB_PWC is not set
 # CONFIG_USB_SE401 is not set
+# CONFIG_USB_STV680 is not set
+# CONFIG_USB_VICAM is not set
 # CONFIG_USB_DSBR is not set
 # CONFIG_USB_DABUSB is not set
 
@@ -772,6 +768,7 @@
 # CONFIG_USB_SERIAL_EMPEG is not set
 # CONFIG_USB_SERIAL_FTDI_SIO is not set
 # CONFIG_USB_SERIAL_VISOR is not set
+# CONFIG_USB_SERIAL_IPAQ is not set
 # CONFIG_USB_SERIAL_IR is not set
 # CONFIG_USB_SERIAL_EDGEPORT is not set
 # CONFIG_USB_SERIAL_KEYSPAN_PDA is not set
@@ -785,6 +782,7 @@
 # CONFIG_USB_SERIAL_KEYSPAN_USA19W is not set
 # CONFIG_USB_SERIAL_KEYSPAN_USA49W is not set
 # CONFIG_USB_SERIAL_MCT_U232 is not set
+# CONFIG_USB_SERIAL_KLSI is not set
 # CONFIG_USB_SERIAL_PL2303 is not set
 # CONFIG_USB_SERIAL_CYBERJACK is not set
 # CONFIG_USB_SERIAL_XIRCOM is not set
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/defconfig linuxppc64_2_4/arch/ppc64/defconfig
--- ../kernel.org/linux-2.4.19/arch/ppc64/defconfig	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/defconfig	Fri Mar 29 10:18:26 2002
@@ -24,7 +24,6 @@
 CONFIG_SMP=y
 CONFIG_IRQ_ALL_CPUS=y
 # CONFIG_HMT is not set
-# CONFIG_PPC_EEH is not set
 # CONFIG_MSCHUNKS is not set
 
 #
@@ -72,7 +71,6 @@
 #
 # CONFIG_PNP is not set
 # CONFIG_ISAPNP is not set
-# CONFIG_PNPBIOS is not set
 
 #
 # Block devices
@@ -106,8 +104,6 @@
 #
 CONFIG_PACKET=y
 # CONFIG_PACKET_MMAP is not set
-CONFIG_NETLINK=y
-# CONFIG_RTNETLINK is not set
 # CONFIG_NETLINK_DEV is not set
 # CONFIG_NETFILTER is not set
 CONFIG_FILTER=y
@@ -119,11 +115,13 @@
 CONFIG_NET_IPIP=y
 # CONFIG_NET_IPGRE is not set
 # CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
 # CONFIG_INET_ECN is not set
 CONFIG_SYN_COOKIES=y
 # CONFIG_IPV6 is not set
 # CONFIG_KHTTPD is not set
 # CONFIG_ATM is not set
+# CONFIG_VLAN_8021Q is not set
 
 #
 #  
@@ -260,6 +258,7 @@
 # CONFIG_SCSI_AHA152X is not set
 # CONFIG_SCSI_AHA1542 is not set
 # CONFIG_SCSI_AHA1740 is not set
+# CONFIG_SCSI_AACRAID is not set
 # CONFIG_SCSI_AIC7XXX is not set
 # CONFIG_SCSI_AIC7XXX_OLD is not set
 # CONFIG_SCSI_DPT_I2O is not set
@@ -281,6 +280,7 @@
 # CONFIG_SCSI_INIA100 is not set
 # CONFIG_SCSI_NCR53C406A is not set
 # CONFIG_SCSI_NCR53C7xx is not set
+# CONFIG_SCSI_SYM53C8XX_2 is not set
 # CONFIG_SCSI_NCR53C8XX is not set
 CONFIG_SCSI_SYM53C8XX=y
 CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8
@@ -304,8 +304,6 @@
 # CONFIG_SCSI_T128 is not set
 # CONFIG_SCSI_U14_34F is not set
 # CONFIG_SCSI_DEBUG is not set
-# CONFIG_SCSI_MESH is not set
-# CONFIG_SCSI_MAC53C94 is not set
 
 #
 # IEEE 1394 (FireWire) support (EXPERIMENTAL)
@@ -334,12 +332,10 @@
 # CONFIG_MACE is not set
 # CONFIG_BMAC is not set
 # CONFIG_GMAC is not set
-# CONFIG_OAKNET is not set
 # CONFIG_SUNLANCE is not set
 # CONFIG_HAPPYMEAL is not set
 # CONFIG_SUNBMAC is not set
 # CONFIG_SUNQE is not set
-# CONFIG_SUNLANCE is not set
 # CONFIG_SUNGEM is not set
 # CONFIG_NET_VENDOR_3COM is not set
 # CONFIG_LANCE is not set
@@ -368,11 +364,13 @@
 # CONFIG_8139TOO_PIO is not set
 # CONFIG_8139TOO_TUNE_TWISTER is not set
 # CONFIG_8139TOO_8129 is not set
+# CONFIG_8139_NEW_RX_RESET is not set
 # CONFIG_SIS900 is not set
 # CONFIG_EPIC100 is not set
 # CONFIG_SUNDANCE is not set
 # CONFIG_TLAN is not set
 # CONFIG_VIA_RHINE is not set
+# CONFIG_VIA_RHINE_MMIO is not set
 # CONFIG_WINBOND_840 is not set
 # CONFIG_NET_POCKET is not set
 
@@ -382,6 +380,7 @@
 CONFIG_ACENIC=y
 # CONFIG_ACENIC_OMIT_TIGON_I is not set
 # CONFIG_DL2K is not set
+CONFIG_E1000=y
 # CONFIG_MYRI_SBUS is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
@@ -467,6 +466,7 @@
 # CONFIG_FB_SIS is not set
 # CONFIG_FB_3DFX is not set
 # CONFIG_FB_VOODOO1 is not set
+# CONFIG_FB_TRIDENT is not set
 # CONFIG_FB_VIRTUAL is not set
 # CONFIG_FBCON_ADVANCED is not set
 CONFIG_FBCON_CFB8=y
@@ -480,6 +480,15 @@
 # CONFIG_FONT_SUN8x16 is not set
 # CONFIG_FONT_PEARL_8x8 is not set
 # CONFIG_FONT_ACORN_8x8 is not set
+
+#
+# Input core support
+#
+# CONFIG_INPUT is not set
+# CONFIG_INPUT_KEYBDEV is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
 CONFIG_VIOPATH=y
 
 #
@@ -513,36 +522,14 @@
 # Joysticks
 #
 # CONFIG_INPUT_GAMEPORT is not set
-# CONFIG_INPUT_NS558 is not set
-# CONFIG_INPUT_LIGHTNING is not set
-# CONFIG_INPUT_PCIGAME is not set
-# CONFIG_INPUT_CS461X is not set
-# CONFIG_INPUT_EMU10K1 is not set
-# CONFIG_INPUT_SERIO is not set
-# CONFIG_INPUT_SERPORT is not set
 
 #
-# Joysticks
+# Input core support is needed for gameports
+#
+
+#
+# Input core support is needed for joysticks
 #
-# CONFIG_INPUT_ANALOG is not set
-# CONFIG_INPUT_A3D is not set
-# CONFIG_INPUT_ADI is not set
-# CONFIG_INPUT_COBRA is not set
-# CONFIG_INPUT_GF2K is not set
-# CONFIG_INPUT_GRIP is not set
-# CONFIG_INPUT_INTERACT is not set
-# CONFIG_INPUT_TMDC is not set
-# CONFIG_INPUT_SIDEWINDER is not set
-# CONFIG_INPUT_IFORCE_USB is not set
-# CONFIG_INPUT_IFORCE_232 is not set
-# CONFIG_INPUT_WARRIOR is not set
-# CONFIG_INPUT_MAGELLAN is not set
-# CONFIG_INPUT_SPACEORB is not set
-# CONFIG_INPUT_SPACEBALL is not set
-# CONFIG_INPUT_STINGER is not set
-# CONFIG_INPUT_DB9 is not set
-# CONFIG_INPUT_GAMECON is not set
-# CONFIG_INPUT_TURBOGRAFX is not set
 # CONFIG_QIC02_TAPE is not set
 
 #
@@ -550,9 +537,9 @@
 #
 # CONFIG_WATCHDOG is not set
 # CONFIG_INTEL_RNG is not set
-CONFIG_NVRAM=y
-CONFIG_RTC=y
-CONFIG_RTC=y
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_RTC is not set
 # CONFIG_DTLK is not set
 # CONFIG_R3964 is not set
 # CONFIG_APPLICOM is not set
@@ -563,7 +550,6 @@
 # CONFIG_FTAPE is not set
 # CONFIG_AGP is not set
 # CONFIG_DRM is not set
-# CONFIG_MWAVE is not set
 
 #
 # File systems
@@ -573,11 +559,15 @@
 # CONFIG_AUTOFS4_FS is not set
 CONFIG_REISERFS_FS=y
 # CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
 # CONFIG_ADFS_FS is not set
 # CONFIG_ADFS_FS_RW is not set
 # CONFIG_AFFS_FS is not set
 # CONFIG_HFS_FS is not set
 # CONFIG_BFS_FS is not set
+CONFIG_EXT3_FS=y
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
 CONFIG_FAT_FS=y
 CONFIG_MSDOS_FS=y
 # CONFIG_UMSDOS_FS is not set
@@ -590,6 +580,7 @@
 # CONFIG_RAMFS is not set
 CONFIG_ISO9660_FS=y
 # CONFIG_JOLIET is not set
+# CONFIG_ZISOFS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_JFS_FS is not set
 # CONFIG_JFS_DEBUG is not set
@@ -616,6 +607,7 @@
 # Network File Systems
 #
 # CONFIG_CODA_FS is not set
+# CONFIG_INTERMEZZO_FS is not set
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 # CONFIG_ROOT_NFS is not set
@@ -635,6 +627,8 @@
 # CONFIG_NCPFS_SMALLDOS is not set
 # CONFIG_NCPFS_NLS is not set
 # CONFIG_NCPFS_EXTRAS is not set
+# CONFIG_ZISOFS_FS is not set
+# CONFIG_ZLIB_FS_INFLATE is not set
 
 #
 # Partition Types
@@ -669,6 +663,7 @@
 # CONFIG_NLS_CODEPAGE_949 is not set
 # CONFIG_NLS_CODEPAGE_874 is not set
 # CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
 # CONFIG_NLS_CODEPAGE_1251 is not set
 # CONFIG_NLS_ISO8859_1 is not set
 # CONFIG_NLS_ISO8859_2 is not set
@@ -722,11 +717,10 @@
 #
 # USB Human Interface Devices (HID)
 #
-# CONFIG_USB_HID is not set
-# CONFIG_USB_HIDDEV is not set
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
-# CONFIG_USB_WACOM is not set
+
+#
+#   Input core support is needed for USB HID
+#
 
 #
 # USB Imaging devices
@@ -744,6 +738,8 @@
 # CONFIG_USB_OV511 is not set
 # CONFIG_USB_PWC is not set
 # CONFIG_USB_SE401 is not set
+# CONFIG_USB_STV680 is not set
+# CONFIG_USB_VICAM is not set
 # CONFIG_USB_DSBR is not set
 # CONFIG_USB_DABUSB is not set
 
@@ -772,6 +768,7 @@
 # CONFIG_USB_SERIAL_EMPEG is not set
 # CONFIG_USB_SERIAL_FTDI_SIO is not set
 # CONFIG_USB_SERIAL_VISOR is not set
+# CONFIG_USB_SERIAL_IPAQ is not set
 # CONFIG_USB_SERIAL_IR is not set
 # CONFIG_USB_SERIAL_EDGEPORT is not set
 # CONFIG_USB_SERIAL_KEYSPAN_PDA is not set
@@ -785,6 +782,7 @@
 # CONFIG_USB_SERIAL_KEYSPAN_USA19W is not set
 # CONFIG_USB_SERIAL_KEYSPAN_USA49W is not set
 # CONFIG_USB_SERIAL_MCT_U232 is not set
+# CONFIG_USB_SERIAL_KLSI is not set
 # CONFIG_USB_SERIAL_PL2303 is not set
 # CONFIG_USB_SERIAL_CYBERJACK is not set
 # CONFIG_USB_SERIAL_XIRCOM is not set
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/ItLpQueue.c linuxppc64_2_4/arch/ppc64/kernel/ItLpQueue.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/ItLpQueue.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/ItLpQueue.c	Tue Apr  9 11:30:17 2002
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <asm/system.h>
-#include <asm/Paca.h>
+#include <asm/paca.h>
 #include <asm/iSeries/ItLpQueue.h>
 #include <asm/iSeries/HvLpEvent.h>
 #include <asm/iSeries/HvCallEvent.h>
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/LparData.c linuxppc64_2_4/arch/ppc64/kernel/LparData.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/LparData.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/LparData.c	Fri Apr 19 12:43:08 2002
@@ -14,13 +14,13 @@
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/init.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 #include <asm/abs_addr.h>
 #include <asm/bitops.h>
 #include <asm/iSeries/ItLpNaca.h>
 #include <asm/iSeries/ItLpPaca.h>
 #include <asm/iSeries/ItLpRegSave.h>
-#include <asm/Paca.h>
+#include <asm/paca.h>
 #include <asm/iSeries/HvReleaseData.h>
 #include <asm/iSeries/LparMap.h>
 #include <asm/iSeries/ItVpdAreas.h>
@@ -61,8 +61,8 @@
 struct HvReleaseData hvReleaseData = {
 	0xc8a5d9c4,	/* desc = "HvRD" ebcdic */
 	sizeof(struct HvReleaseData),
-	offsetof(struct Naca, xItVpdAreas),
-	(struct Naca *)(KERNELBASE+0x4000),	/* 64-bit Naca address */
+	offsetof(struct naca_struct, xItVpdAreas),
+	(struct naca_struct *)(KERNELBASE+0x4000), /* 64-bit Naca address */
 	0x6000,		/* offset of LparMap within loadarea (see head.S) */
 	0,
 	1,		/* tags inactive       */
@@ -144,7 +144,9 @@
 
 struct ItIplParmsReal xItIplParmsReal = {};
 
-struct IoHriProcessorVpd xIoHriProcessorVpd[maxProcessors] = {
+#define maxPhysicalProcessors 32
+
+struct IoHriProcessorVpd xIoHriProcessorVpd[maxPhysicalProcessors] = {
 	{
 		xInstCacheOperandSize: 32,
 		xDataCacheOperandSize: 32,
@@ -172,7 +174,7 @@
 	0, 0,
 	26,		/* # VPD array entries */
 	10,		/* # DMA array entries */
-	maxProcessors*2, maxProcessors,	/* Max logical, physical procs */
+	MAX_PROCESSORS*2, maxPhysicalProcessors,	/* Max logical, physical procs */
 	offsetof(struct ItVpdAreas,xPlicDmaToks),/* offset to DMA toks */
 	offsetof(struct ItVpdAreas,xSlicVpdAdrs),/* offset to VPD addrs */
 	offsetof(struct ItVpdAreas,xPlicDmaLens),/* offset to DMA lens */
@@ -184,7 +186,7 @@
 	{0},		/* DMA tokens */
 	{		/* VPD lengths */
 		0,0,0,0,		/*  0 -  3 */
-		sizeof(struct Paca),	/*       4 length of Paca  */
+		sizeof(struct paca_struct),	/*       4 length of Paca  */
 		0,			/*       5 */
 		sizeof(struct ItIplParmsReal),/* 6 length of IPL parms */
 		26992,			/*	 7 length of MS VPD */
@@ -201,7 +203,7 @@
 		},
 	{			/* VPD addresses */
 		0,0,0,0,		/*	 0 -  3 */
-		&xPaca[0],		/*       4 first Paca */
+		&paca[0],		/*       4 first Paca */
 		0,			/*       5 */
 		&xItIplParmsReal,	/*	 6 IPL parms */
 		&xMsVpd,		/*	 7 MS Vpd */
@@ -217,10 +219,6 @@
 		0,0
 	}
 };
-
-
-/* Data area used in flush_hash_page  */
-long long flush_hash_page_hpte[2];
 
 struct msChunks msChunks = {0, 0, 0, 0, NULL};
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/Makefile linuxppc64_2_4/arch/ppc64/kernel/Makefile
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/Makefile	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/Makefile	Fri Mar 29 10:18:26 2002
@@ -30,25 +30,19 @@
 			rtc.o
 
 obj-$(CONFIG_PCI) +=  pci.o pci_dn.o pci_dma.o
-obj-$(CONFIG_PPC_EEH) += eeh.o
 
 ifeq ($(CONFIG_PPC_ISERIES),y)
 obj-$(CONFIG_PCI) += iSeries_pci.o iSeries_pci_reset.o iSeries_IoMmTable.o iSeries_irq.o iSeries_VpdInfo.o XmPciLpEvent.o 
 endif
 ifeq ($(CONFIG_PPC_PSERIES),y)
-obj-$(CONFIG_PCI) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o
+obj-$(CONFIG_PCI) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o eeh.o
 
-obj-y += rtasd.o 
+obj-y += rtasd.o nvram.o
 endif
 
 obj-$(CONFIG_KGDB) += ppc-stub.o
 
 obj-$(CONFIG_SMP) += smp.o
-
-# tibit: for matrox_init2()
-ifeq ($(CONFIG_NVRAM),y)
-   obj-$(CONFIG_NVRAM) += pmac_nvram.o
-endif
 
 obj-y += prom.o lmb.o rtas.o rtas-proc.o chrp_setup.o i8259.o
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/align.c linuxppc64_2_4/arch/ppc64/kernel/align.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/align.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/align.c	Mon Mar 11 13:08:46 2002
@@ -1,12 +1,12 @@
-/*
- * align.c - handle alignment exceptions for the Power PC.
+/* align.c - handle alignment exceptions for the Power PC.
  *
  * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
  * Copyright (c) 1998-1999 TiVo, Inc.
  *   PowerPC 403GCX modifications.
  * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
  *   PowerPC 403GCX/405GP modifications.
- * Copyright (c) 2001 PPC64 team, IBM Corp
+ * Copyright (c) 2001-2002 PPC64 team, IBM Corp
+ *   64-bit and Power4 support
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -27,10 +27,8 @@
 	unsigned char flags;
 };
 
-#define	OPCD(inst)	(((inst) & 0xFC000000) >> 26)
-#define	RS(inst)	(((inst) & 0x03E00000) >> 21)
-#define	RA(inst)	(((inst) & 0x001F0000) >> 16)
-#define	IS_DFORM(code)	((code) >= 32 && (code) <= 47)
+#define IS_XFORM(inst)	(((inst) >> 26) == 31)
+#define IS_DSFORM(inst)	(((inst) >> 26) >= 56)
 
 #define INVALID	{ 0, 0 }
 
@@ -40,9 +38,7 @@
 #define F	8	/* to/from fp regs */
 #define U	0x10	/* update index register */
 #define M	0x20	/* multiple load/store */
-#define S	0x40	/* single-precision fp, or byte-swap value */
-#define HARD	0x80	/* string, stwcx. */
-#define D	0x100	/* double-word load/store */
+#define SW	0x40	/* byte swap */
 
 #define DCBZ	0x5f	/* 8xx/82xx dcbz faults when cache not enabled */
 
@@ -61,9 +57,9 @@
 	{ 2, LD+SE },		/* 00 0 0101: lha */
 	{ 2, ST },		/* 00 0 0110: sth */
 	{ 4, LD+M },		/* 00 0 0111: lmw */
-	{ 4, LD+F+S },		/* 00 0 1000: lfs */
+	{ 4, LD+F },		/* 00 0 1000: lfs */
 	{ 8, LD+F },		/* 00 0 1001: lfd */
-	{ 4, ST+F+S },		/* 00 0 1010: stfs */
+	{ 4, ST+F },		/* 00 0 1010: stfs */
 	{ 8, ST+F },		/* 00 0 1011: stfd */
 	INVALID,		/* 00 0 1100 */
 	{ 8, LD },		/* 00 0 1101: ld */
@@ -77,12 +73,12 @@
 	{ 2, LD+SE+U },		/* 00 1 0101: lhau */
 	{ 2, ST+U },		/* 00 1 0110: sthu */
 	{ 4, ST+M },		/* 00 1 0111: stmw */
-	{ 4, LD+F+S+U },	/* 00 1 1000: lfsu */
+	{ 4, LD+F+U },		/* 00 1 1000: lfsu */
 	{ 8, LD+F+U },		/* 00 1 1001: lfdu */
-	{ 4, ST+F+S+U },	/* 00 1 1010: stfsu */
+	{ 4, ST+F+U },		/* 00 1 1010: stfsu */
 	{ 8, ST+F+U },		/* 00 1 1011: stfdu */
 	INVALID,		/* 00 1 1100 */
-	{ 8, ST },		/* 00 1 1101: std */
+	INVALID,		/* 00 1 1101 */
 	INVALID,		/* 00 1 1110 */
 	INVALID,		/* 00 1 1111 */
 	{ 8, LD },		/* 01 0 0000: ldx */
@@ -90,13 +86,13 @@
 	{ 8, ST },		/* 01 0 0010: stdx */
 	INVALID,		/* 01 0 0011 */
 	INVALID,		/* 01 0 0100 */
-	INVALID,		/* 01 0 0101: lwax?? */
+	{ 4, LD+SE },		/* 01 0 0101: lwax */
 	INVALID,		/* 01 0 0110 */
 	INVALID,		/* 01 0 0111 */
-	{ 0, LD+HARD },		/* 01 0 1000: lswx */
-	{ 0, LD+HARD },		/* 01 0 1001: lswi */
-	{ 0, ST+HARD },		/* 01 0 1010: stswx */
-	{ 0, ST+HARD },		/* 01 0 1011: stswi */
+	{ 0, LD },		/* 01 0 1000: lswx */
+	{ 0, LD },		/* 01 0 1001: lswi */
+	{ 0, ST },		/* 01 0 1010: stswx */
+	{ 0, ST },		/* 01 0 1011: stswi */
 	INVALID,		/* 01 0 1100 */
 	{ 8, LD+U },		/* 01 0 1101: ldu */
 	INVALID,		/* 01 0 1110 */
@@ -106,7 +102,7 @@
 	{ 8, ST+U },		/* 01 1 0010: stdux */
 	INVALID,		/* 01 1 0011 */
 	INVALID,		/* 01 1 0100 */
-	INVALID,		/* 01 1 0101: lwaux?? */
+	{ 4, LD+SE+U },		/* 01 1 0101: lwaux */
 	INVALID,		/* 01 1 0110 */
 	INVALID,		/* 01 1 0111 */
 	INVALID,		/* 01 1 1000 */
@@ -119,19 +115,19 @@
 	INVALID,		/* 01 1 1111 */
 	INVALID,		/* 10 0 0000 */
 	INVALID,		/* 10 0 0001 */
-	{ 0, ST+HARD },		/* 10 0 0010: stwcx. */
+	{ 0, ST },		/* 10 0 0010: stwcx. */
 	INVALID,		/* 10 0 0011 */
 	INVALID,		/* 10 0 0100 */
 	INVALID,		/* 10 0 0101 */
 	INVALID,		/* 10 0 0110 */
 	INVALID,		/* 10 0 0111 */
-	{ 4, LD+S },		/* 10 0 1000: lwbrx */
+	{ 4, LD+SW },		/* 10 0 1000: lwbrx */
 	INVALID,		/* 10 0 1001 */
-	{ 4, ST+S },		/* 10 0 1010: stwbrx */
+	{ 4, ST+SW },		/* 10 0 1010: stwbrx */
 	INVALID,		/* 10 0 1011 */
-	{ 2, LD+S },		/* 10 0 1100: lhbrx */
-	INVALID,		/* 10 0 1101 */
-	{ 2, ST+S },		/* 10 0 1110: sthbrx */
+	{ 2, LD+SW },		/* 10 0 1100: lhbrx */
+	{ 4, LD+SE },		/* 10 0 1101  lwa */
+	{ 2, ST+SW },		/* 10 0 1110: sthbrx */
 	INVALID,		/* 10 0 1111 */
 	INVALID,		/* 10 1 0000 */
 	INVALID,		/* 10 1 0001 */
@@ -148,7 +144,7 @@
 	INVALID,		/* 10 1 1100 */
 	INVALID,		/* 10 1 1101 */
 	INVALID,		/* 10 1 1110 */
-	{ 0, ST+HARD },		/* 10 1 1111: dcbz */
+	{ L1_CACHE_BYTES, ST },	/* 10 1 1111: dcbz */
 	{ 4, LD },		/* 11 0 0000: lwzx */
 	INVALID,		/* 11 0 0001 */
 	{ 4, ST },		/* 11 0 0010: stwx */
@@ -157,14 +153,14 @@
 	{ 2, LD+SE },		/* 11 0 0101: lhax */
 	{ 2, ST },		/* 11 0 0110: sthx */
 	INVALID,		/* 11 0 0111 */
-	{ 4, LD+F+S },		/* 11 0 1000: lfsx */
+	{ 4, LD+F },		/* 11 0 1000: lfsx */
 	{ 8, LD+F },		/* 11 0 1001: lfdx */
-	{ 4, ST+F+S },		/* 11 0 1010: stfsx */
+	{ 4, ST+F },		/* 11 0 1010: stfsx */
 	{ 8, ST+F },		/* 11 0 1011: stfdx */
 	INVALID,		/* 11 0 1100 */
-	INVALID,		/* 11 0 1101 */
+	{ 8, LD+M },		/* 11 0 1101: lmd */
 	INVALID,		/* 11 0 1110 */
-	INVALID,		/* 11 0 1111 */
+	{ 8, ST+M },		/* 11 0 1111: stmd */
 	{ 4, LD+U },		/* 11 1 0000: lwzux */
 	INVALID,		/* 11 1 0001 */
 	{ 4, ST+U },		/* 11 1 0010: stwux */
@@ -173,9 +169,9 @@
 	{ 2, LD+SE+U },		/* 11 1 0101: lhaux */
 	{ 2, ST+U },		/* 11 1 0110: sthux */
 	INVALID,		/* 11 1 0111 */
-	{ 4, LD+F+S+U },	/* 11 1 1000: lfsux */
+	{ 4, LD+F+U },		/* 11 1 1000: lfsux */
 	{ 8, LD+F+U },		/* 11 1 1001: lfdux */
-	{ 4, ST+F+S+U },	/* 11 1 1010: stfsux */
+	{ 4, ST+F+U },		/* 11 1 1010: stfsux */
 	{ 8, ST+F+U },		/* 11 1 1011: stfdux */
 	INVALID,		/* 11 1 1100 */
 	INVALID,		/* 11 1 1101 */
@@ -185,178 +181,203 @@
 
 #define SWAP(a, b)	(t = (a), (a) = (b), (b) = t)
 
+unsigned static inline make_dsisr( unsigned instr )
+{
+	unsigned dsisr;
+	
+	/* create a DSISR value from the instruction */
+	dsisr = (instr & 0x03ff0000) >> 16;			/* bits  6:15 --> 22:31 */
+	
+	if ( IS_XFORM(instr) ) {
+		dsisr |= (instr & 0x00000006) << 14;		/* bits 29:30 --> 15:16 */
+		dsisr |= (instr & 0x00000040) << 8;		/* bit     25 -->    17 */
+		dsisr |= (instr & 0x00000780) << 3;		/* bits 21:24 --> 18:21 */
+	}
+	else {
+		dsisr |= (instr & 0x04000000) >> 12;		/* bit      5 -->    17 */
+		dsisr |= (instr & 0x78000000) >> 17;		/* bits  1: 4 --> 18:21 */
+		if ( IS_DSFORM(instr) ) {
+			dsisr |= (instr & 0x00000003) << 18;	/* bits 30:31 --> 12:13 */
+		}
+	}
+	
+	return dsisr;
+}
+
 int
 fix_alignment(struct pt_regs *regs)
 {
-	int instr, nb, flags;
-	int opcode, f1, f2, f3;
-	int i, t;
-	int reg, areg;
-	unsigned char *addr;
+	unsigned int instr, nb, flags;
+	int t;
+	unsigned long reg, areg;
+	unsigned long i;
+	int ret;
+	unsigned dsisr;
+	unsigned char *addr, *p;
+	unsigned long *lp;
 	union {
-		int l;
 		long ll;
-		float f;
-		double d;
+		double dd;
 		unsigned char v[8];
+		struct {
+			unsigned hi32;
+			int	 low32;
+		} x32;
+		struct {
+			unsigned char hi48[6];
+			short	      low16;
+		} x16;
 	} data;
 
-	if (__is_processor(PV_POWER4)) {
-		/* 
-		 * The POWER4 has a DSISR register but doesn't set it on
-		 * an alignment fault.  -- paulus
-		 */
-
-		instr = *((unsigned int *)regs->nip);
-		opcode = OPCD(instr);
-		reg = RS(instr);
-		areg = RA(instr);
-
-		if (IS_DFORM(opcode)) {
-			f1 = 0;
-			f2 = (instr & 0x04000000) >> 26;
-			f3 = (instr & 0x78000000) >> 27;
-		} else {
-			f1 = (instr & 0x00000006) >> 1;
-			f2 = (instr & 0x00000040) >> 6;
-			f3 = (instr & 0x00000780) >> 7;
-		}
-
-		instr = ((f1 << 5) | (f2 << 4) | f3);
-	} else {
-		reg = (regs->dsisr >> 5) & 0x1f;	/* source/dest register */
-		areg = regs->dsisr & 0x1f;		/* register to update */
-		instr = (regs->dsisr >> 10) & 0x7f;
-		instr |= (regs->dsisr >> 13) & 0x60;
-	}
+	/*
+	 * Return 1 on success
+	 * Return 0 if unable to handle the interrupt
+	 * Return -EFAULT if data address is bad
+	 */
+
+	dsisr = regs->dsisr;
+
+	/* Power4 doesn't set DSISR for an alignment interrupt */
+	if (__is_processor(PV_POWER4))
+		dsisr = make_dsisr( *((unsigned *)regs->nip) );
+
+	/* extract the operation and registers from the dsisr */
+	reg = (dsisr >> 5) & 0x1f;	/* source/dest register */
+	areg = dsisr & 0x1f;		/* register to update */
+	instr = (dsisr >> 10) & 0x7f;
+	instr |= (dsisr >> 13) & 0x60;
 
+	/* Lookup the operation in our table */
 	nb = aligninfo[instr].len;
-	if (nb == 0) {
-		long *p;
-		int i;
-
-		if (instr != DCBZ)
-			return 0;	/* too hard or invalid instruction */
-		/*
-		 * The dcbz (data cache block zero) instruction
-		 * gives an alignment fault if used on non-cacheable
-		 * memory.  We handle the fault mainly for the
-		 * case when we are running with the cache disabled
-		 * for debugging.
-		 */
-		p = (long *) (regs->dar & -L1_CACHE_BYTES);
-		for (i = 0; i < L1_CACHE_BYTES / sizeof(long); ++i)
-			p[i] = 0;
-		return 1;
-	}
-
 	flags = aligninfo[instr].flags;
+
+	/* DAR has the operand effective address */
 	addr = (unsigned char *)regs->dar;
 
+	/* A size of 0 indicates an instruction we don't support */
+	/* we also don't support the multiples (lmw, stmw, lmd, stmd) */
+	if ((nb == 0) || (flags & M))
+		return 0;		/* too hard or invalid instruction */
+
+	/*
+	 * Special handling for dcbz
+	 * dcbz may give an alignment exception for accesses to caching inhibited
+	 * storage
+	 */
+	if (instr == DCBZ)
+		addr = (unsigned char *) ((unsigned long)addr & -L1_CACHE_BYTES);
+
 	/* Verify the address of the operand */
 	if (user_mode(regs)) {
 		if (verify_area((flags & ST? VERIFY_WRITE: VERIFY_READ), addr, nb))
 			return -EFAULT;	/* bad address */
 	}
 
+	/* Force the fprs into the save area so we can reference them */
 	if ((flags & F) && (regs->msr & MSR_FP))
 		giveup_fpu(current);
-	if (flags & M)
-		return 0;		/* too hard for now */
-
-	/* If we read the operand, copy it in */
+	
+	/* If we are loading, get the data from user space */
 	if (flags & LD) {
-		if (nb == 2) {
-			data.v[0] = data.v[1] = 0;
-			if (__get_user(data.v[2], addr)
-			    || __get_user(data.v[3], addr+1))
+		data.ll = 0;
+		ret = 0;
+		p = addr;
+		switch (nb) {
+		case 8:
+			ret |= __get_user(data.v[0], p++);
+			ret |= __get_user(data.v[1], p++);
+			ret |= __get_user(data.v[2], p++);
+			ret |= __get_user(data.v[3], p++);
+		case 4:
+			ret |= __get_user(data.v[4], p++);
+			ret |= __get_user(data.v[5], p++);
+		case 2:
+			ret |= __get_user(data.v[6], p++);
+			ret |= __get_user(data.v[7], p++);
+			if (ret)
 				return -EFAULT;
-		} else {
-			for (i = 0; i < nb; ++i)
-				if (__get_user(data.v[i], addr+i))
-					return -EFAULT;
 		}
 	}
-	/* Unfortunately D (== 0x100) doesn't fit in the aligninfo[n].flags
-	   field.  So synthesize it here. */
-	if ((flags & F) == 0 && nb == 8)
-		flags |= D;
-
-	switch (flags & ~U) {
-	case LD+SE:
-		if (data.v[2] >= 0x80)
-			data.v[0] = data.v[1] = -1;
-		/* fall through */
-	case LD:
-		regs->gpr[reg] = data.l;
-		break;
-	case LD+D:
-		regs->gpr[reg] = data.ll;
-		break;
-	case LD+S:
-		if (nb == 2) {
-			SWAP(data.v[2], data.v[3]);
-		} else {
-			SWAP(data.v[0], data.v[3]);
-			SWAP(data.v[1], data.v[2]);
+	
+	/* If we are storing, get the data from the saved gpr or fpr */
+	if (flags & ST) {
+		if (flags & F) {
+			if (nb == 4) {
+				/* Doing stfs, have to convert to single */
+				enable_kernel_fp();
+				cvt_df(&current->thread.fpr[reg], (float *)&data.v[4], &current->thread.fpscr);
+			}
+			else
+				data.dd = current->thread.fpr[reg];
 		}
-		regs->gpr[reg] = data.l;
-		break;
-	case ST:
-		data.l = regs->gpr[reg];
-		break;
-	case ST+D:
-		data.ll = regs->gpr[reg];
-		break;
-	case ST+S:
-		data.l = regs->gpr[reg];
-		if (nb == 2) {
-			SWAP(data.v[2], data.v[3]);
-		} else {
-			SWAP(data.v[0], data.v[3]);
-			SWAP(data.v[1], data.v[2]);
+		else 
+			data.ll = regs->gpr[reg];
+	}
+	
+	/* Swap bytes as needed */
+	if (flags & SW) {
+		if (nb == 2)
+			SWAP(data.v[6], data.v[7]);
+		else {	/* nb must be 4 */
+			SWAP(data.v[4], data.v[7]);
+			SWAP(data.v[5], data.v[6]);
 		}
-		break;
-	case LD+F:
-		current->thread.fpr[reg] = data.d;
-		break;
-	case ST+F:
-		data.d = current->thread.fpr[reg];
-		break;
-	/* these require some floating point conversions... */
-	/* we'd like to use the assignment, but we have to compile
-	 * the kernel with -msoft-float so it doesn't use the
-	 * fp regs for copying 8-byte objects. */
-	case LD+F+S:
-		enable_kernel_fp();
-		cvt_fd(&data.f, &current->thread.fpr[reg], &current->thread.fpscr);
-		/* current->thread.fpr[reg] = data.f; */
-		break;
-	case ST+F+S:
-		enable_kernel_fp();
-		cvt_df(&current->thread.fpr[reg], &data.f, &current->thread.fpscr);
-		/* data.f = current->thread.fpr[reg]; */
-		break;
-	default:
-		printk("align: can't handle flags=%x\n", flags);
-		return 0;
 	}
-
+	
+	/* Sign extend as needed */
+	if (flags & SE) {
+		if ( nb == 2 )
+			data.ll = data.x16.low16;
+		else	/* nb must be 4 */
+			data.ll = data.x32.low32;
+	}
+	
+	/* If we are loading, move the data to the gpr or fpr */
+	if (flags & LD) {
+		if (flags & F) {
+			if (nb == 4) {
+				/* Doing lfs, have to convert to double */
+				enable_kernel_fp();
+				cvt_fd((float *)&data.v[4], &current->thread.fpr[reg], &current->thread.fpscr);
+			}
+			else
+				current->thread.fpr[reg] = data.dd;
+		}
+		else
+			regs->gpr[reg] = data.ll;
+	}
+	
+	/* If we are storing, copy the data to the user */
 	if (flags & ST) {
-		if (nb == 2) {
-			if (__put_user(data.v[2], addr)
-			    || __put_user(data.v[3], addr+1))
-				return -EFAULT;
-		} else {
-			for (i = 0; i < nb; ++i)
-				if (__put_user(data.v[i], addr+i))
-					return -EFAULT;
+		ret = 0;
+		p = addr;
+		switch (nb) {
+		case 128:	/* Special case - must be dcbz */
+			lp = (unsigned long *)p;
+			for (i = 0; i < L1_CACHE_BYTES / sizeof(long); ++i)
+				ret |= __put_user(0, lp++);
+			break;
+		case 8:
+			ret |= __put_user(data.v[0], p++);
+			ret |= __put_user(data.v[1], p++);
+			ret |= __put_user(data.v[2], p++);
+			ret |= __put_user(data.v[3], p++);
+		case 4:
+			ret |= __put_user(data.v[4], p++);
+			ret |= __put_user(data.v[5], p++);
+		case 2:
+			ret |= __put_user(data.v[6], p++);
+			ret |= __put_user(data.v[7], p++);
 		}
+		if (ret)
+			return -EFAULT;
 	}
-
+	
+	/* Update RA as needed */
 	if (flags & U) {
 		regs->gpr[areg] = regs->dar;
 	}
 
 	return 1;
 }
+
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/chrp_setup.c linuxppc64_2_4/arch/ppc64/kernel/chrp_setup.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/chrp_setup.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/chrp_setup.c	Thu Apr 18 09:38:23 2002
@@ -57,7 +57,7 @@
 #include <asm/irq.h>
 #include <asm/keyboard.h>
 #include <asm/init.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 #include <asm/time.h>
 
 #include "local_irq.h"
@@ -67,7 +67,6 @@
 #include <asm/ppcdebug.h>
 
 extern volatile unsigned char *chrp_int_ack_special;
-extern struct Naca *naca;
 
 void chrp_setup_pci_ptrs(void);
 void chrp_progress(char *, unsigned short);
@@ -91,6 +90,9 @@
 extern void pSeries_get_rtc_time(struct rtc_time *rtc_time);
 extern int  pSeries_set_rtc_time(struct rtc_time *rtc_time);
 void pSeries_calibrate_decr(void);
+static void fwnmi_init(void);
+extern void SystemReset_FWNMI(void), MachineCheck_FWNMI(void);	/* from head.S */
+int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 
 kdev_t boot_dev;
 unsigned long  virtPython0Facilities = 0;  // python0 facility area (memory mapped io) (64-bit format) VIRTUAL address.
@@ -153,6 +155,8 @@
 
 	printk("Boot arguments: %s\n", cmd_line);
 
+	fwnmi_init();
+
 	/* Find and initialize PCI host bridges */
 	/* iSeries needs to be done much later. */
  	#ifndef CONFIG_PPC_ISERIES
@@ -190,6 +194,23 @@
 	ppc_md.progress(UTS_RELEASE, 0x7777);
 }
 
+/* Initialize firmware assisted non-maskable interrupts if
+ * the firmware supports this feature.
+ *
+ */
+static void __init fwnmi_init(void)
+{
+	long ret;
+	int ibm_nmi_register = rtas_token("ibm,nmi-register");
+	if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
+		return;
+	ret = rtas_call(ibm_nmi_register, 2, 1, NULL,
+			__pa((unsigned long)SystemReset_FWNMI),
+			__pa((unsigned long)MachineCheck_FWNMI));
+	if (ret == 0)
+		fwnmi_active = 1;
+}
+
 
 /* Early initialization.  Relocation is on but do not reference unbolted pages */
 void __init pSeries_init_early(void)
@@ -230,7 +251,7 @@
 #endif /* CONFIG_BLK_DEV_INITRD */
 #endif
 
-	ppc_md.ppc_machine = _machine;
+	ppc_md.ppc_machine = naca->platform;
 
 	ppc_md.setup_arch     = chrp_setup_arch;
 	ppc_md.setup_residual = NULL;
@@ -296,7 +317,7 @@
 	if (hex)
 		udbg_printf("<chrp_progress> %s\n", s);
 
-	if (!rtas.base || (_machine != _MACH_pSeries))
+	if (!rtas.base || (naca->platform != PLATFORM_PSERIES))
 		return;
 
 	if (max_width == 0) {
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/eeh.c linuxppc64_2_4/arch/ppc64/kernel/eeh.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/eeh.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/eeh.c	Thu Apr 18 09:38:09 2002
@@ -26,9 +26,9 @@
 #include <linux/pci.h>
 #include <linux/proc_fs.h>
 #include <linux/bootmem.h>
-#include <asm/Paca.h>
+#include <asm/paca.h>
 #include <asm/processor.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 #include <asm/io.h>
 #include "pci.h"
 
@@ -36,7 +36,6 @@
 #define BUID_LO(buid) ((buid) & 0xffffffff)
 #define CONFIG_ADDR(busno, devfn) (((((busno) & 0xff) << 8) | ((devfn) & 0xf8)) << 8)
 
-unsigned long eeh_total_mmio_reads;
 unsigned long eeh_total_mmio_ffs;
 unsigned long eeh_false_positives;
 /* RTAS tokens */
@@ -44,11 +43,11 @@
 static int ibm_set_slot_reset;
 static int ibm_read_slot_reset_state;
 
-static int eeh_implemented;
+int eeh_implemented;
 #define EEH_MAX_OPTS 4096
 static char *eeh_opts;
 static int eeh_opts_last;
-static int eeh_check_opts_config(struct pci_dev *dev);
+static int eeh_check_opts_config(struct pci_dev *dev, int default_state);
 
 
 unsigned long eeh_token(unsigned long phb, unsigned long bus, unsigned long devfn, unsigned long offset)
@@ -86,33 +85,58 @@
 		panic("EEH: checking token %p phb index of %ld is greater than max of %d\n", token, phbidx, global_phb_number-1);
 	}
 	phb = phbtab[phbidx];
-	eeh_false_positives++;
 
 	ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
 			config_addr, BUID_HI(phb->buid), BUID_LO(phb->buid));
-	if (ret == 0 && rets[1] == 1 && rets[2] != 0) {
+	if (ret == 0 && rets[1] == 1 && rets[0] >= 2) {
 		struct pci_dev *dev;
 		int bus = ((unsigned long)token >> 40) & 0xffff; /* include PHB# in bus */
 		int devfn = (config_addr >> 8) & 0xff;
 
 		dev = pci_find_slot(bus, devfn);
-		if (dev)
+		if (dev) {
+			udbg_printf("EEH:  MMIO failure (%ld) on device:\n  %s %s\n",
+			      rets[0], dev->slot_name, dev->name);
+			printk("EEH:  MMIO failure (%ld) on device:\n  %s %s\n",
+			      rets[0], dev->slot_name, dev->name);
+			PPCDBG_ENTER_DEBUGGER();
 			panic("EEH:  MMIO failure (%ld) on device:\n  %s %s\n",
-			      rets[2], dev->slot_name, dev->name);
-		else
-			panic("EEH:  MMIO failure (%ld) on device buid %lx, config_addr %lx\n", rets[2], phb->buid, config_addr);
+			      rets[0], dev->slot_name, dev->name);
+		} else {
+			udbg_printf("EEH:  MMIO failure (%ld) on device buid %lx, config_addr %lx\n", rets[0], phb->buid, config_addr);
+			printk("EEH:  MMIO failure (%ld) on device buid %lx, config_addr %lx\n", rets[0], phb->buid, config_addr);
+			PPCDBG_ENTER_DEBUGGER();
+			panic("EEH:  MMIO failure (%ld) on device buid %lx, config_addr %lx\n", rets[0], phb->buid, config_addr);
+		}
 	}
+	eeh_false_positives++;
 	return val;	/* good case */
 }
 
 void eeh_init(void) {
+	extern char cmd_line[];	/* Very early cmd line parse.  Cheap, but works. */
+	char *eeh_force_off = strstr(cmd_line, "eeh-force-off");
+	char *eeh_force_on = strstr(cmd_line, "eeh-force-on");
+
 	ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
 	ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
 	ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
-	if (ibm_set_eeh_option != RTAS_UNKNOWN_SERVICE) {
-		printk("PCI Enhanced I/O Error Handling Enabled\n");
+	if (ibm_set_eeh_option != RTAS_UNKNOWN_SERVICE && naca->platform == PLATFORM_PSERIES_LPAR)
 		eeh_implemented = 1;
+
+	if (eeh_force_off > eeh_force_on) {
+		/* User is forcing EEH off.  Be noisy if it is implemented. */
+		if (eeh_implemented)
+			printk("EEH: WARNING: PCI Enhanced I/O Error Handling is user disabled\n");
+		eeh_implemented = 0;
+		return;
 	}
+
+	if (eeh_force_on > eeh_force_off)
+		eeh_implemented = 1;	/* User is forcing it on. */
+
+	if (eeh_implemented)
+		printk("EEH: PCI Enhanced I/O Error Handling Enabled\n");
 }
 
 
@@ -124,27 +148,35 @@
 	struct device_node *dn = pci_device_to_OF_node(dev);
 	struct pci_controller *phb = PCI_GET_PHB_PTR(dev);
 	unsigned long ret, rets[2];
+	int eeh_capable;
+	int default_state = 1;	/* default enable EEH if we can. */
 
-	if (dn == NULL || phb == NULL || phb->buid == 0 || !eeh_implemented)
+	if (dn == NULL || phb == NULL || !eeh_implemented)
 		return 0;
 
-	/* Hack: turn off eeh for display class devices.
+	/* Hack: turn off eeh for display class devices by default.
 	 * This fixes matrox accel framebuffer.
 	 */
 	if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
-		return 0;
+		default_state = 0;
 
-	if (!eeh_check_opts_config(dev))
+	/* Ignore known PHBs and EADs bridges */
+	if (dev->vendor == PCI_VENDOR_ID_IBM &&
+	    (dev->device == 0x0102 || dev->device == 0x008b))
+		default_state = 0;
+
+	if (!eeh_check_opts_config(dev, default_state)) {
+		if (default_state)
+			printk("EEH: %s %s user requested to run without EEH.\n", dev->slot_name, dev->name);
 		return 0;
+	}
 
 	ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
 			CONFIG_ADDR(dn->busno, dn->devfn),
 			BUID_HI(phb->buid), BUID_LO(phb->buid));
-	if (ret == 0 && rets[1] == 1) {
-		printk("EEH: %s %s is EEH capable.\n", dev->slot_name, dev->name);
-		return 1;
-	}
-	return 0;
+	eeh_capable = (ret == 0 && rets[1] == 1);
+	printk("EEH: %s %s is%s EEH capable.\n", dev->slot_name, dev->name, eeh_capable ? "" : " not");
+	return eeh_capable;
 }
 
 int eeh_set_option(struct pci_dev *dev, int option)
@@ -166,41 +198,41 @@
 {
 	int len;
 	len = sprintf(page, "eeh_false_positives=%ld\n"
-		      "eeh_total_mmio_ffs=%ld\n"
-		      "eeh_total_mmio_reads=%ld\n",
-		      eeh_false_positives, eeh_total_mmio_ffs, eeh_total_mmio_reads);
+		      "eeh_total_mmio_ffs=%ld\n",
+		      eeh_false_positives, eeh_total_mmio_ffs);
 	return len;
 }
 
 /* Implementation of /proc/ppc64/eeh
  * For now it is one file showing false positives.
  */
-void eeh_init_proc(struct proc_dir_entry *top)
+static int __init eeh_init_proc(void)
 {
-	struct proc_dir_entry *ent = create_proc_entry("eeh", S_IRUGO, top);
+	struct proc_dir_entry *ent = create_proc_entry("ppc64/eeh", S_IRUGO, 0);
 	if (ent) {
 		ent->nlink = 1;
 		ent->data = NULL;
 		ent->read_proc = (void *)eeh_proc_falsepositive_read;
 	}
+	return 0;
 }
 
 /*
  * Test if "dev" should be configured on or off.
- * This processes the options literally from right to left.
+ * This processes the options literally from left to right.
  * This lets the user specify stupid combinations of options,
  * but at least the result should be very predictable.
  */
-static int eeh_check_opts_config(struct pci_dev *dev)
+static int eeh_check_opts_config(struct pci_dev *dev, int default_state)
 {
 	struct device_node *dn = pci_device_to_OF_node(dev);
 	struct pci_controller *phb = PCI_GET_PHB_PTR(dev);
 	char devname[32], classname[32], phbname[32];
 	char *strs[8], *s;
 	int nstrs, i;
-	int ret = 0;
+	int ret = default_state;
 
-	if (dn == NULL || phb == NULL || phb->buid == 0 || !eeh_implemented)
+	if (dn == NULL || phb == NULL)
 		return 0;
 	/* Build list of strings to match */
 	nstrs = 0;
@@ -221,7 +253,7 @@
 	for (s = eeh_opts; s && (s < (eeh_opts + eeh_opts_last)); s += strlen(s)+1) {
 		for (i = 0; i < nstrs; i++) {
 			if (strcasecmp(strs[i], s+1) == 0) {
-				ret = (strs[0] == '+') ? 1 : 0;
+				ret = (strs[i][0] == '+') ? 1 : 0;
 			}
 		}
 	}
@@ -234,7 +266,7 @@
  *	eeh-off=loc1,loc2,loc3...
  *
  * and this option can be repeated so
- *      eeh-off=loc1,loc2 eeh=loc3
+ *      eeh-off=loc1,loc2 eeh-off=loc3
  * is the same as eeh-off=loc1,loc2,loc3
  *
  * loc is an IBM location code that can be found in a manual or
@@ -285,7 +317,6 @@
 			curend = cur + strlen(cur);
 		if (*cur) {
 			int curlen = curend-cur;
-			char *sym = eeh_opts+eeh_opts_last;
 			if (eeh_opts_last + curlen > EEH_MAX_OPTS-2) {
 				printk("EEH: sorry...too many eeh cmd line options\n");
 				return 1;
@@ -308,6 +339,6 @@
 	return eeh_parm(str, 1);
 }
 
-
+__initcall(eeh_init_proc);
 __setup("eeh-off", eehoff_parm);
 __setup("eeh-on", eehon_parm);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/entry.S linuxppc64_2_4/arch/ppc64/kernel/entry.S
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/entry.S	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/entry.S	Mon Apr 15 21:05:04 2002
@@ -1,7 +1,5 @@
 /*
- *  arch/ppc/kernel/entry.S
- *
- *  
+ *  arch/ppc64/kernel/entry.S
  *
  *  PowerPC version 
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -20,10 +18,8 @@
  *  modify it under the terms of the GNU General Public License
  *  as published by the Free Software Foundation; either version
  *  2 of the License, or (at your option) any later version.
- *	
  */
 
-
 #include "ppc_asm.h"
 #include <asm/processor.h>
 #include <asm/page.h>
@@ -246,8 +242,8 @@
  * SAVE_REGS macro), you'll have to change the fork code also.
  *
  * The code which creates the new task context is in 'copy_thread'
- * in arch/ppc/kernel/process.c
- */	
+ * in arch/ppc64/kernel/process.c
+ */
 _GLOBAL(_switch)
 	stdu	r1,-INT_FRAME_SIZE(r1)
 	ld	r6,0(r1)
@@ -523,7 +519,6 @@
 	mtlr    r0
         blr				/* return to caller */
 
-
 _GLOBAL(enter_prom)
 	mflr	r0
 	std	r0,16(r1)
@@ -613,4 +608,3 @@
 
 	mtlr    r0
         blr				/* return to caller */
-
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/head.S linuxppc64_2_4/arch/ppc64/kernel/head.S
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/head.S	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/head.S	Thu Apr 18 10:33:27 2002
@@ -1,8 +1,6 @@
 /*
  *  arch/ppc64/kernel/head.S
  *
- *
- *
  *  PowerPC version
  *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
  *
@@ -23,7 +21,6 @@
  *  modify it under the terms of the GNU General Public License
  *  as published by the Free Software Foundation; either version
  *  2 of the License, or (at your option) any later version.
- *
  */
 
 #define SECONDARY_PROCESSORS
@@ -34,12 +31,19 @@
 #include <asm/page.h>
 #include <linux/config.h>
 #include <asm/mmu.h>
+// #include <asm/paca.h>
 
 #ifdef CONFIG_PPC_ISERIES
 #define DO_SOFT_DISABLE
 #endif
 
 /*
+ * hcall interface to pSeries LPAR
+ */
+#define HSC .long 0x44000022
+#define H_SET_ASR		0x30
+
+/*
  * We layout physical memory as follows:
  * 0x0000 - 0x00ff : Secondary processor spin code
  * 0x0100 - 0x2fff : pSeries Interrupt prologs
@@ -47,7 +51,6 @@
  * 0x4000 - 0x4fff : NACA
  * 0x5000 - 0x5fff : Initial segment table
  * 0x6000          : iSeries and common interrupt prologs
- *
  */
 
 /*
@@ -58,7 +61,7 @@
  *   SPRG0             reserved for hypervisor
  *   SPRG1             temp - used to save gpr
  *   SPRG2             temp - used to save gpr
- *   SPRG3             virt addr of Paca
+ *   SPRG3             virt addr of paca
  */
 
 /*
@@ -167,14 +170,18 @@
 #define EX_R23		40
 #define EX_DAR		48
 #define EX_DSISR	56
+#define EX_CCR   	60
+#define EX_TRAP   	60
 
-#define EXCEPTION_PROLOG_PSERIES(label)                                  \
+#define EXCEPTION_PROLOG_PSERIES(n,label)                                \
 	mtspr   SPRG2,r20;              /* use SPRG2 as scratch reg   */ \
 	mtspr   SPRG1,r21;              /* save r21                   */ \
-	mfspr   r20,SPRG3;              /* get Paca virt addr         */ \
+	mfspr   r20,SPRG3;              /* get paca virt addr         */ \
 	ld      r21,PACAEXCSP(r20);     /* get exception stack ptr    */ \
 	addi    r21,r21,EXC_FRAME_SIZE; /* make exception frame       */ \
 	std	r22,EX_R22(r21);	/* Save r22 in exc. frame     */ \
+	li	r22,n;                  /* Save the ex # in exc. frame*/ \
+	stw	r22,EX_TRAP(r21);	/*                            */ \
 	std	r23,EX_R23(r21);	/* Save r23 in exc. frame     */ \
 	mfspr   r22,SRR0;               /* EA of interrupted instr    */ \
 	std	r22,EX_SRR0(r21);	/* Save SRR0 in exc. frame    */ \
@@ -196,19 +203,21 @@
  * This is the start of the interrupt handlers for i_series
  * This code runs with relocation on.
  */
-#define EXCEPTION_PROLOG_ISERIES	\
-	mtspr	SPRG2,r20;		    /* use SPRG2 as scratch reg */\
-	mtspr   SPRG1,r21;                  /* save r21 */\
-	mfspr	r20,SPRG3;		    /* get Paca */\
-	ld      r21,PACAEXCSP(r20);         /* get exception stack ptr */\
-	addi    r21,r21,EXC_FRAME_SIZE;     /* make exception frame */\
-	std	r22,EX_R22(r21);	    /* save r22 on exception frame */\
-	std	r23,EX_R23(r21);	    /* Save r23 in exc. frame */\
-	ld      r22,LPPACA+LPPACASRR0(r20); /* Get SRR0 from ItLpPaca */\
-	std	r22,EX_SRR0(r21);	    /* save SRR0 in exc. frame */\
-	ld      r23,LPPACA+LPPACASRR1(r20); /* Get SRR1 from ItLpPaca */\
-	std	r23,EX_SRR1(r21);	    /* save SRR1 in exc. frame */\
-	mfcr    r23;                        /* save CR in r23 */
+#define EXCEPTION_PROLOG_ISERIES(n)	                                      \
+	mtspr	SPRG2,r20;		    /* use SPRG2 as scratch reg    */ \
+	mtspr   SPRG1,r21;                  /* save r21                    */ \
+	mfspr	r20,SPRG3;		    /* get Paca                    */ \
+	ld      r21,PACAEXCSP(r20);         /* get exception stack ptr     */ \
+	addi    r21,r21,EXC_FRAME_SIZE;     /* make exception frame        */ \
+	std	r22,EX_R22(r21);	    /* save r22 on exception frame */ \
+	li	r22,n;                      /* Save the ex # in exc. frame */ \
+	stw	r22,EX_TRAP(r21);	    /*                             */ \
+	std	r23,EX_R23(r21);	    /* Save r23 in exc. frame      */ \
+	ld      r22,LPPACA+LPPACASRR0(r20); /* Get SRR0 from ItLpPaca      */ \
+	std	r22,EX_SRR0(r21);	    /* save SRR0 in exc. frame     */ \
+	ld      r23,LPPACA+LPPACASRR1(r20); /* Get SRR1 from ItLpPaca      */ \
+	std	r23,EX_SRR1(r21);	    /* save SRR1 in exc. frame     */ \
+	mfcr    r23;                        /* save CR in r23              */
 
 /*
  * The common exception prolog is used for all except a few exceptions
@@ -219,54 +228,54 @@
  * On entry r20 points to the paca and r21 points to the exception
  * frame on entry, r23 contains the saved CR, and relocation is on.
  */
-#define EXCEPTION_PROLOG_COMMON                                             \
-	mfspr	r22,SPRG2;		/* Save r20 in exc. frame */ \
-	std	r22,EX_R20(r21);	\
-	mfspr	r22,SPRG1;		/* Save r21 in exc. frame */ \
-	std	r22,EX_R21(r21);	\
-	mfspr   r22,DAR;                /* Save DAR in exc. frame        */ \
-	std	r22,EX_DAR(r21);	\
-	std     r21,PACAEXCSP(r20);     /* update exception stack ptr    */ \
-		                        /*   iff no protection flt       */ \
-	mfspr	r22,DSISR;		/* Save DSISR in exc. frame */ \
-	std	r22,EX_DSISR(r21);	\
-	ld	r22,EX_SRR1(r21);	/* Get SRR1 from exc. frame */ \
-	andi.   r22,r22,MSR_PR;         /* Set CR for later branch       */ \
-	mr      r22,r1;                 /* Save r1                       */ \
-	subi    r1,r1,INT_FRAME_SIZE;    /* alloc frame on kernel stack  */ \
-	beq-    1f;                     \
-	ld      r1,PACAKSAVE(r20);      /* kernel stack to use */ \
-1:      std     r22,GPR1(r1);           /* save r1 in stackframe */ \
-	std     r22,0(r1);              /* make stack chain pointer */ \
-	std     r23,_CCR(r1);           /* save CR in stackframe */ \
-	ld	r22,EX_R20(r21);	/* move r20 to stackframe */ \
-	std	r22,GPR20(r1);		\
-	ld	r23,EX_R21(r21);	/* move r21 to stackframe */ \
-	std	r23,GPR21(r1);		\
-	ld	r22,EX_R22(r21);	/* move r22 to stackframe */ \
-	std	r22,GPR22(r1);		\
-	ld	r23,EX_R23(r21);	/* move r23 to stackframe */ \
-	std	r23,GPR23(r1);		\
-	mflr    r22;                    /* save LR in stackframe */ \
-	std     r22,_LINK(r1);          \
-	mfctr   r23;                    /* save CTR in stackframe */ \
-	std     r23,_CTR(r1);           \
-	mfspr   r22,XER;                /* save XER in stackframe */ \
-	std     r22,_XER(r1);           \
-	ld	r23,EX_DAR(r21);	/* move DAR to stackframe */ \
-	std	r23,_DAR(r1);		\
-	ld	r22,EX_DSISR(r21);	/* move DSISR to stackframe */ \
-	std	r22,_DSISR(r1);		\
-	lbz	r22,PACAPROCENABLED(r20); \
-	std	r22,SOFTE(r1);		\
-	ld	r22,EX_SRR0(r21);	/* get SRR0 from exc. frame */ \
-	ld	r23,EX_SRR1(r21);	/* get SRR1 from exc. frame */ \
-	addi    r21,r21,-EXC_FRAME_SIZE;/* pop off exception frame */ \
-	std     r21,PACAEXCSP(r20);     \
-	SAVE_GPR(0, r1);                /* save r0 in stackframe */ \
+#define EXCEPTION_PROLOG_COMMON                                           \
+	mfspr	r22,SPRG2;		/* Save r20 in exc. frame      */ \
+	std	r22,EX_R20(r21);	                                  \
+	mfspr	r22,SPRG1;		/* Save r21 in exc. frame      */ \
+	std	r22,EX_R21(r21);	                                  \
+	mfspr   r22,DAR;                /* Save DAR in exc. frame      */ \
+	std	r22,EX_DAR(r21);	                                  \
+	std     r21,PACAEXCSP(r20);     /* update exception stack ptr  */ \
+		                        /*   iff no protection flt     */ \
+	mfspr	r22,DSISR;		/* Save DSISR in exc. frame    */ \
+	stw	r22,EX_DSISR(r21);	                                  \
+	ld	r22,EX_SRR1(r21);	/* Get SRR1 from exc. frame    */ \
+	andi.   r22,r22,MSR_PR;         /* Set CR for later branch     */ \
+	mr      r22,r1;                 /* Save r1                     */ \
+	subi    r1,r1,INT_FRAME_SIZE;   /* alloc frame on kernel stack */ \
+	beq-    1f;                                                       \
+	ld      r1,PACAKSAVE(r20);      /* kernel stack to use         */ \
+1:      std     r22,GPR1(r1);           /* save r1 in stackframe       */ \
+	std     r22,0(r1);              /* make stack chain pointer    */ \
+	std     r23,_CCR(r1);           /* save CR in stackframe       */ \
+	ld	r22,EX_R20(r21);	/* move r20 to stackframe      */ \
+	std	r22,GPR20(r1);		                                  \
+	ld	r23,EX_R21(r21);	/* move r21 to stackframe      */ \
+	std	r23,GPR21(r1);		                                  \
+	ld	r22,EX_R22(r21);	/* move r22 to stackframe      */ \
+	std	r22,GPR22(r1);		                                  \
+	ld	r23,EX_R23(r21);	/* move r23 to stackframe      */ \
+	std	r23,GPR23(r1);		                                  \
+	mflr    r22;                    /* save LR in stackframe       */ \
+	std     r22,_LINK(r1);                                            \
+	mfctr   r23;                    /* save CTR in stackframe      */ \
+	std     r23,_CTR(r1);                                             \
+	mfspr   r22,XER;                /* save XER in stackframe      */ \
+	std     r22,_XER(r1);                                             \
+	ld	r23,EX_DAR(r21);	/* move DAR to stackframe      */ \
+	std	r23,_DAR(r1);		                                  \
+	lwz     r22,EX_DSISR(r21);	/* move DSISR to stackframe    */ \
+	std	r22,_DSISR(r1);		                                  \
+	lbz	r22,PACAPROCENABLED(r20);                                 \
+	std	r22,SOFTE(r1);		                                  \
+	ld	r22,EX_SRR0(r21);	/* get SRR0 from exc. frame    */ \
+	ld	r23,EX_SRR1(r21);	/* get SRR1 from exc. frame    */ \
+	addi    r21,r21,-EXC_FRAME_SIZE;/* pop off exception frame     */ \
+	std     r21,PACAEXCSP(r20);                                       \
+	SAVE_GPR(0, r1);                /* save r0 in stackframe       */ \
 	SAVE_8GPRS(2, r1);              /* save r2 - r13 in stackframe */ \
-	SAVE_4GPRS(10, r1);             \
-	ld      r2,PACATOC(r20);	\
+	SAVE_4GPRS(10, r1);                                               \
+	ld      r2,PACATOC(r20);	                                  \
 	ld      r13,PACACURRENT(r20)
 
 /*
@@ -281,18 +290,18 @@
 	. = n;					\
 	.globl label##_Pseries;			\
 label##_Pseries:				\
-	EXCEPTION_PROLOG_PSERIES( label##_common )
+	EXCEPTION_PROLOG_PSERIES( n, label##_common )
 
-#define STD_EXCEPTION_ISERIES( label )		\
+#define STD_EXCEPTION_ISERIES( n, label )	\
 	.globl label##_Iseries;			\
 label##_Iseries:				\
-	EXCEPTION_PROLOG_ISERIES;		\
+	EXCEPTION_PROLOG_ISERIES( n );          \
 	b	label##_common
 
-#define MASKABLE_EXCEPTION_ISERIES( label )	\
+#define MASKABLE_EXCEPTION_ISERIES( n, label )	\
 	.globl label##_Iseries;			\
 label##_Iseries:				\
-	EXCEPTION_PROLOG_ISERIES;		\
+	EXCEPTION_PROLOG_ISERIES( n );		\
 	lbz	r22,PACAPROFENABLED(r20);	\
 	cmpi	0,r22,0;			\
 	bne-	label##_Iseries_profile;	\
@@ -348,26 +357,26 @@
 	STD_EXCEPTION_PSERIES( 0xf00, PerformanceMonitor )
 	STD_EXCEPTION_PSERIES( 0x1300, InstructionBreakpoint )
 
+	/* Space for the naca.  Architected to be located at real address
+	 * 0x4000.  Various tools rely on this location being fixed.
+	 * The first dword of the Naca is required by iSeries LPAR to
+	 * point to itVpdAreas.  On pSeries native, this value is not used.
+	 */
 	. = 0x4000
 	.globl __end_interupts
 	.globl __start_naca
 __end_interupts:
 __start_naca:
-	/* Save space for naca.
-	 * The first dword of the Naca is required by iSeries LPAR to
-	 * point to itVpdAreas.  On pSeries native, this value is not used.
-	 */
 	.llong itVpdAreas
 	.llong 0x0
 	.llong 0x0
-	.llong xPaca
+	.llong paca
 
 	/*
 	 * Space for the initial segment table
 	 * For LPAR, the hypervisor must fill in at least one entry
 	 * before we get control (with relocate on)
 	 */
-
 	. = 0x5000
 	.globl __end_naca
 	.globl __start_stab
@@ -406,26 +415,26 @@
 
 /***  ISeries-LPAR interrupt handlers ***/
 
-	STD_EXCEPTION_ISERIES( MachineCheck )
-	STD_EXCEPTION_ISERIES( DataAccess )
-	STD_EXCEPTION_ISERIES( DataAccessSLB )
-	STD_EXCEPTION_ISERIES( InstructionAccess )
-	STD_EXCEPTION_ISERIES( InstructionAccessSLB )
-	MASKABLE_EXCEPTION_ISERIES( HardwareInterrupt )
-	STD_EXCEPTION_ISERIES( Alignment )
-	STD_EXCEPTION_ISERIES( ProgramCheck )
-	STD_EXCEPTION_ISERIES( FPUnavailable )
-	MASKABLE_EXCEPTION_ISERIES( Decrementer )
-	STD_EXCEPTION_ISERIES( Trap_0a )
-	STD_EXCEPTION_ISERIES( Trap_0b )
-	STD_EXCEPTION_ISERIES( SystemCall )
-	STD_EXCEPTION_ISERIES( SingleStep )
-	STD_EXCEPTION_ISERIES( Trap_0e )
-	STD_EXCEPTION_ISERIES( PerformanceMonitor )
+	STD_EXCEPTION_ISERIES( 0x200, MachineCheck )
+	STD_EXCEPTION_ISERIES( 0x300, DataAccess )
+	STD_EXCEPTION_ISERIES( 0x380, DataAccessSLB )
+	STD_EXCEPTION_ISERIES( 0x400, InstructionAccess )
+	STD_EXCEPTION_ISERIES( 0x480, InstructionAccessSLB )
+	MASKABLE_EXCEPTION_ISERIES( 0x500, HardwareInterrupt )
+	STD_EXCEPTION_ISERIES( 0x600, Alignment )
+	STD_EXCEPTION_ISERIES( 0x700, ProgramCheck )
+	STD_EXCEPTION_ISERIES( 0x800, FPUnavailable )
+	MASKABLE_EXCEPTION_ISERIES( 0x900, Decrementer )
+	STD_EXCEPTION_ISERIES( 0xa00, Trap_0a )
+	STD_EXCEPTION_ISERIES( 0xb00, Trap_0b )
+	STD_EXCEPTION_ISERIES( 0xc00, SystemCall )
+	STD_EXCEPTION_ISERIES( 0xd00, SingleStep )
+	STD_EXCEPTION_ISERIES( 0xe00, Trap_0e )
+	STD_EXCEPTION_ISERIES( 0xf00, PerformanceMonitor )
 
 	.globl SystemReset_Iseries
 SystemReset_Iseries:
-	mfspr	25,SPRG3		/* Get Paca address */
+	mfspr	25,SPRG3		/* Get paca address */
 	lhz	r24,PACAPACAINDEX(r25)	/* Get processor # */
 	cmpi	0,r24,0			/* Are we processor 0? */
 	beq	.__start_initialization_iSeries	/* Start up the first processor */
@@ -493,6 +502,24 @@
 	mfspr	r20,SPRG2
 	rfid
 
+/*
+ * Data area reserved for FWNMI option.
+ */
+        .= 0x7000
+	.globl fwnmi_data_area
+fwnmi_data_area:
+
+/*
+ * Vectors for the FWNMI option.  Share common code.
+ */
+	. = 0x8000
+	.globl SystemReset_FWNMI
+SystemReset_FWNMI:
+	EXCEPTION_PROLOG_PSERIES(0x100, SystemReset_common)
+	.globl MachineCheck_FWNMI
+MachineCheck_FWNMI:
+	EXCEPTION_PROLOG_PSERIES(0x200, MachineCheck_common)
+
 /*** Common interrupt handlers ***/
 
 	STD_EXCEPTION_COMMON( 0x100, SystemReset, .SystemResetException )
@@ -528,7 +555,6 @@
 	ld      r1,GPR1(r1)
 	rfid
 
-
 /*
  * Here r20 points to the PACA, r21 to the exception frame,
  * r23 contains the saved CR.
@@ -539,9 +565,10 @@
 	mfspr   r22,DAR
 	srdi    r22,r22,60
 	cmpi    0,r22,0xc
-
-	/* Segment fault on a bolted segment. Go off and map that segment. */
 	beq     .do_stab_bolted
+	cmpi    0,r22,0xb
+	beq     .do_stab_bolted
+
 stab_bolted_user_return:
 	EXCEPTION_PROLOG_COMMON
 	ld      r3,_DSISR(r1)
@@ -576,8 +603,8 @@
 	mfspr   r22,DAR
 	srdi    r22,r22,60
 	cmpi    0,r22,0xc
-
-	/* Segment fault on a bolted segment. Go off and map that segment. */
+	beq     .do_slb_bolted
+	cmpi    0,r22,0xb
 	beq     .do_slb_bolted
 
 	EXCEPTION_PROLOG_COMMON
@@ -663,9 +690,9 @@
 	/*                                                                */
 	/*  The call to do_irq will preserve the value of r14 - r31       */
 	/*                                                                */
-	mfspr	r20,SPRG3 		    /* get Paca                   */
+	mfspr	r20,SPRG3 		    /* get paca                   */
 	lbz     r21,PACAHRDWINTCOUNT(r20)    /* get hardware interrupt cnt */
-	cmpi    0,r21,1                     /*                            */
+	cmpi    0,r21,0                     /*                            */
 	addi    r21,r21,1                   /* incr hardware interrupt cnt*/
 	stb     r21,PACAHRDWINTCOUNT(r20)   /*                            */
 	bne     2f                          /*                            */
@@ -825,7 +852,7 @@
  * We assume we aren't going to take any exceptions during this procedure.
  */
 _GLOBAL(do_stab_bolted)
-	std	r23,EX_DAR(r21)	/* save CR in exc. frame */
+	stw	r23,EX_CCR(r21)	/* save CR in exc. frame */
 
 	mfspr   r22,DSISR
 	andis.  r22,r22,0x0020
@@ -931,7 +958,7 @@
 	mfsprg  r20,3                   /* Load the PACA pointer  */
 	ld      r21,PACAEXCSP(r20)      /* Get the exception frame pointer */
 	addi    r21,r21,EXC_FRAME_SIZE
-	ld	r23,EX_DAR(r21)		/* get saved CR */
+	lwz	r23,EX_CCR(r21)		/* get saved CR */
 	/* note that this is almost identical to maskable_exception_exit */
 	mtcr    r23                     /* restore CR */
 	ld	r22,EX_SRR0(r21)	/* Get SRR0 from exc. frame */
@@ -952,7 +979,7 @@
  * We assume we aren't going to take any exceptions during this procedure.
  */
 _GLOBAL(do_slb_bolted)
-	std     r23,48(r21)     /* save CR in exc. frame */
+	stw     r23,EX_CCR(r21) /* save CR in exc. frame */
 
 	/* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */
 	mfspr	r21,DAR
@@ -1018,7 +1045,7 @@
 	mfsprg  r20,3                   /* Load the PACA pointer  */
 	ld      r21,PACAEXCSP(r20)      /* Get the exception frame pointer */
 	addi    r21,r21,EXC_FRAME_SIZE
-	ld	r23,EX_DAR(r21)		/* get saved CR */
+	lwz	r23,EX_CCR(r21)		/* get saved CR */
 	/* note that this is almost identical to maskable_exception_exit */
 	mtcr    r23                     /* restore CR */
 	ld	r22,EX_SRR0(r21)	/* Get SRR0 from exc. frame */
@@ -1137,7 +1164,7 @@
 	isync
 
 	/* Set up a Paca value for this processor. */
-	LOADADDR(r24, xPaca) 		 /* Get base vaddr of Paca array  */
+	LOADADDR(r24, paca) 		 /* Get base vaddr of Paca array  */
 	mulli	r25,r3,PACA_SIZE	 /* Calculate vaddr of right Paca */
 	add	r25,r25,r24              /* for this processor.           */
 
@@ -1288,7 +1315,7 @@
 					/* this includes the code being   */
 					/* executed here.                 */
 
-	li	r0,4f@l			/* Jump to the copy of this code  */
+        LOADADDR(r0, 4f)                /* Jump to the copy of this code  */
 	mtctr	r0			/* that we just made              */
 	bctr
 
@@ -1439,8 +1466,6 @@
 #endif /* CONFIG_SMP */
 	blr
 
-
-
 #ifdef CONFIG_SMP
 /*
  * This function is called after the master CPU has released the
@@ -1497,7 +1522,17 @@
 	ori	r3,r3,4			/* 0x8000000000000004 */
 	sc				/* HvCall_setASR */
 #else
+	/* set the ASR */
+	addi  r3,0,0x4000     /* r3 = ptr to naca */
+	lhz   r3,PLATFORM(r3) /* r3 = platform flags */
+	cmpldi r3,PLATFORM_PSERIES_LPAR
+	bne   98f
+	li	r3,H_SET_ASR  /* hcall = H_SET_ASR */
+	HSC     			    /* Invoking hcall */
+	b     99f
+98:             /* This is not a hypervisor machine */
 	mtasr	r4			/* set the stab location            */
+99:
 #endif
 	li	r7,0
 	mtlr	r7
@@ -1552,7 +1587,6 @@
 	isync
 	blr
 
-
 /*
  * This is where the main kernel code starts.
  */
@@ -1586,7 +1620,7 @@
 #ifdef CONFIG_SMP
 	/* All secondary cpus are now spinning on a common
 	 * spinloop, release them all now so they can start
-	 * to spin on their individual Paca spinloops.
+	 * to spin on their individual paca spinloops.
 	 * For non SMP kernels, the secondary cpus never
 	 * get out of the common spinloop.
 	 */
@@ -1623,12 +1657,25 @@
 	/* Get the pointer to the segment table which is used by           */
 	/* stab_initialize                                                 */
 	li	r27,0x4000
-	ld	r6,PACA(r27)            /* Get the base Paca pointer       */
+	ld	r6,PACA(r27)            /* Get the base paca pointer       */
 	sub	r6,r6,r26		/* convert to physical addr         */
 	mtspr	SPRG3,r6		/* PPPBBB: Temp... -Peter */
 	ld	r3,PACASTABREAL(r6)
 	ori	r4,r3,1			/* turn on valid bit                */
+	
+	/* set the ASR */
+	addi  r3,0,0x4000     /* r3 = ptr to naca */
+	lhz   r3,PLATFORM(r3) /* r3 = platform flags */
+	cmpldi r3,PLATFORM_PSERIES_LPAR
+	bne   98f
+	li	r3,H_SET_ASR  /* hcall = H_SET_ASR */
+	HSC     			    /* Invoking hcall */
+	b     99f
+98:                 /* This is not a hypervisor machine */
 	mtasr	r4			/* set the stab location            */
+99:
+	mfspr	r6,SPRG3
+	ld	r3,PACASTABREAL(r6)     /* restore r3 for stab_initialize */
 
 	/* Initialize an initial memory mapping and turn on relocation.   */
 	bl	.stab_initialize
@@ -1713,10 +1760,7 @@
 	li      r3,0
 	stb     r3,PACAHRDWINTCOUNT(r4)
 
-
-	/*
-	 * Restore the parms passed in from the bootloader.
-	 */
+	/* Restore the parms passed in from the bootloader. */
 	mr	r3,r31
 	mr	r4,r30
 	mr	r5,r29
@@ -1831,15 +1875,19 @@
 ioremap_dir:
 	.space	4096
 
+	.globl	bolted_dir
+bolted_dir:
+	.space	4096
+
 	.globl  hardware_int_paca0
 hardware_int_paca0:
 	.space	8*4096
 
-
-/* 4096 * 31 bytes of storage */
+/* 1 page segment table per cpu (max 48, cpu0 allocated at 0x5000) */
 	.globl	stab_array
 stab_array:
-	.space	131072
+        .space	4096 * (48 - 1)
+	
 /*
  * This space gets a copy of optional info passed to us by the bootstrap
  * Used to pass parameters into the kernel like root=/dev/sda1, etc.
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/htab.c linuxppc64_2_4/arch/ppc64/kernel/htab.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/htab.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/htab.c	Thu Apr 18 09:37:52 2002
@@ -37,14 +37,12 @@
 #include <linux/ctype.h>
 #include <linux/cache.h>
 #include <asm/uaccess.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 #include <asm/system.h>
 #include <asm/pmc.h>
 #include <asm/machdep.h>
 #include <asm/lmb.h>
-#ifdef CONFIG_PPC_EEH
 #include <asm/eeh.h>
-#endif
 
 /* For iSeries */
 #include <asm/iSeries/HvCallHpt.h>
@@ -69,7 +67,6 @@
 
 extern unsigned long _SDR1;
 extern unsigned long klimit;
-extern struct Naca *naca;
 
 extern unsigned long _ASR;
 extern inline void make_ste(unsigned long stab,
@@ -77,7 +74,7 @@
 
 extern char _stext[], _etext[], __start_naca[], __end_stab[];
 
-static spinlock_t hash_table_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+static spinlock_t hash_table_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 
 #define PTRRELOC(x)	((typeof(x))((unsigned long)(x) - offset))
 #define PTRUNRELOC(x)	((typeof(x))((unsigned long)(x) + offset))
@@ -117,7 +114,7 @@
 	unsigned long pteg_count;
 	unsigned long mode_ro, mode_rw, mask;
 	unsigned long offset = reloc_offset();
-	struct Naca *_naca = RELOC(naca);
+	struct naca_struct *_naca = RELOC(naca);
 	HTAB *_htab_data = PTRRELOC(&htab_data);
 
 	/*
@@ -136,7 +133,7 @@
 	_htab_data->htab_num_ptegs = pteg_count;
 	_htab_data->htab_hash_mask = pteg_count - 1;
 
-	if(_machine == _MACH_pSeries) {
+	if(_naca->platform == PLATFORM_PSERIES) {
 		/* Find storage for the HPT.  Must be contiguous in
 		 * the absolute address space.
 		 */
@@ -203,7 +200,7 @@
 	unsigned long vpn;
 
 #ifdef CONFIG_PPC_PSERIES
-	if(_machine == _MACH_pSeriesLP) {
+	if(naca->platform == PLATFORM_PSERIES_LPAR) {
 		make_pte_LPAR(htab, va, pa, mode, hash_mask, large); 
 		return;
 	}
@@ -827,13 +824,11 @@
 		mm = &init_mm;
 		vsid = get_kernel_vsid( ea );
 		break;
-#ifdef CONFIG_PPC_EEH
 	case IO_UNMAPPED_REGION_ID:
 		udbg_printf("EEH Error ea = 0x%lx\n", ea);
  		PPCDBG_ENTER_DEBUGGER();
 		panic("EEH Error ea = 0x%lx\n", ea);
 		break;
-#endif
 	case KERNEL_REGION_ID:
 		/* As htab_initialize is now, we shouldn't ever get here since
 		 * we're bolting the entire 0xC0... region.
@@ -990,6 +985,10 @@
  			 */
 			slot = ppc_md.hpte_selectslot( vpn );
 
+			/* If hpte_selectslot returns 0x8000000000000000 that means
+			 * that there was already an entry in the HPT even though
+			 * the linux PTE said there couldn't be. 
+			 */
 			/* Debug code */
 			if ( slot == 0x8000000000000000 ) {
 				unsigned long xold_pte = pte_val(old_pte);
@@ -1004,7 +1003,6 @@
 			
 				panic("hash_page: hpte already exists\n");
 			}
-
 			hash_ind = 0;
 			if ( slot < 0 ) {
 				slot = -slot;
@@ -1046,7 +1044,7 @@
 	return rc;
 }
 
-void flush_hash_page( unsigned long context, unsigned long ea, pte_t pte )
+void flush_hash_page( unsigned long context, unsigned long ea, pte_t *ptep )
 {
 	unsigned long vsid, vpn, va, hash, secondary, slot, flags;
 	/* Local copy of first doubleword of HPTE */
@@ -1054,6 +1052,7 @@
 		unsigned long d;
 		Hpte_dword0   h;
 	} hpte_dw0;
+	pte_t pte;
 
 	if ( (ea >= USER_START ) && ( ea <= USER_END ) )
 		vsid = get_vsid( context, ea );
@@ -1062,39 +1061,42 @@
 	va = (vsid << 28) | (ea & 0x0fffffff);
 	vpn = va >> PAGE_SHIFT;
 	hash = hpt_hash(vpn, 0);
-	secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
-	if ( secondary )
-		hash = ~hash;
-	slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
-	slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
-	/* If there is an HPTE for this page it is indexed by slot */
 
 	spin_lock_irqsave( &hash_table_lock, flags);
-	hpte_dw0.d = ppc_md.hpte_getword0( slot );
-	if ( (hpte_dw0.h.avpn == (vpn >> 11) ) &&
-	     (hpte_dw0.h.v) && 
-	     (hpte_dw0.h.h == secondary ) ){
-		/* HPTE matches */
-		ppc_md.hpte_invalidate( slot );	
-	}
-	else {
-		unsigned k;
-		/* Temporarily lets check for the hpte in all possible slots */
-		for ( secondary = 0; secondary < 2; ++secondary ) {
-			hash = hpt_hash(vpn, 0);
-			if ( secondary )
-				hash = ~hash;
-			slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
-			for ( k=0; k<8; ++k ) {
-				hpte_dw0.d = ppc_md.hpte_getword0( slot+k );
-				if ( ( hpte_dw0.h.avpn == (vpn >> 11) ) &&
-				     ( hpte_dw0.h.v ) &&
-				     ( hpte_dw0.h.h == secondary ) ) {
-					while (1) ;
+	pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
+	if ( pte_val(pte) & _PAGE_HASHPTE ) {
+		secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15;
+		if ( secondary )
+			hash = ~hash;
+		slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
+		slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12;
+		/* If there is an HPTE for this page it is indexed by slot */
+
+		hpte_dw0.d = ppc_md.hpte_getword0( slot );
+		if ( (hpte_dw0.h.avpn == (vpn >> 11) ) &&
+		     (hpte_dw0.h.v) && 
+		     (hpte_dw0.h.h == secondary ) ){
+			/* HPTE matches */
+			ppc_md.hpte_invalidate( slot );	
+		}
+		else {
+			unsigned k;
+			/* Temporarily lets check for the hpte in all possible slots */
+			for ( secondary = 0; secondary < 2; ++secondary ) {
+				hash = hpt_hash(vpn, 0);
+				if ( secondary )
+					hash = ~hash;
+				slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP;
+				for ( k=0; k<8; ++k ) {
+					hpte_dw0.d = ppc_md.hpte_getword0( slot+k );
+					if ( ( hpte_dw0.h.avpn == (vpn >> 11) ) &&
+					     ( hpte_dw0.h.v ) &&
+					     ( hpte_dw0.h.h == secondary ) ) {
+						while (1) ;
+					}
 				}
 			}
 		}
-		
 	}
 	spin_unlock_irqrestore( &hash_table_lock, flags );
 }
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/i8259.c linuxppc64_2_4/arch/ppc64/kernel/i8259.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/i8259.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/i8259.c	Fri Mar 15 20:55:44 2002
@@ -20,7 +20,7 @@
 #define cached_A1 (cached_8259[0])
 #define cached_21 (cached_8259[1])
 
-static spinlock_t i8259_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+static spinlock_t i8259_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 
 int i8259_pic_irq_offset;
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/iSeries_VpdInfo.c linuxppc64_2_4/arch/ppc64/kernel/iSeries_VpdInfo.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/iSeries_VpdInfo.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_VpdInfo.c	Tue Apr 23 08:10:33 2002
@@ -146,7 +146,7 @@
 int   device_Location(struct pci_dev* PciDev,char* BufPtr)
 {
 	struct iSeries_Device_Node* DevNode = (struct iSeries_Device_Node*)PciDev->sysdata;
-	return sprintf(BufPtr,"PCI: Bus%3d, Device%3d, Vendor %04X, Location %s",
+	return sprintf(BufPtr,"PCI: Bus%3d, AgentId%3d, Vendor %04X, Location %s",
 		       DevNode->DsaAddr.busNumber,
 		       DevNode->AgentId,
 		       DevNode->Vendor,
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/iSeries_pci.c linuxppc64_2_4/arch/ppc64/kernel/iSeries_pci.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/iSeries_pci.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_pci.c	Fri Apr 19 15:56:26 2002
@@ -35,7 +35,7 @@
 #include <asm/machdep.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppcdebug.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 #include <asm/flight_recorder.h>
 #include <asm/pci_dma.h>
 
@@ -55,7 +55,6 @@
 extern int    global_phb_number;
 extern int    panic_timeout;
 
-extern struct Naca *naca;
 extern struct device_node *allnodes;
 extern unsigned long phb_tce_table_init(struct pci_controller *phb);
 extern unsigned long iSeries_Base_Io_Memory;    
@@ -449,7 +448,7 @@
 	HvSubBusNumber SubBus = BridgeInfo->subBusNumber;
 	u16       VendorId    = 0;
 	int       HvRc        = 0;
-	int       Irq         = 0;
+	u8        Irq         = 0;
 	int       IdSel       = ISERIES_GET_DEVICE_FROM_SUBBUS(SubBus);
 	int       Function    = ISERIES_GET_FUNCTION_FROM_SUBBUS(SubBus);
 	HvAgentId AgentId     = ISERIES_PCI_AGENTID(IdSel, Function);
@@ -476,9 +475,14 @@
 					/**********************************************************/
 					/* FoundDevice: 0x18.28.10 = 0x12AE                       */
 					/**********************************************************/
-					HvCallPci_configStore8(Bus, SubBus, AgentId, PCI_INTERRUPT_LINE, Irq);  
 					PPCDBG(PPCDBG_BUSWALK,"PCI:- FoundDevice: 0x%02X.%02X.%02X = 0x%04X\n",
 					                                       Bus, SubBus, AgentId, VendorId);
+
+					HvRc = HvCallPci_configStore8(Bus, SubBus, AgentId, PCI_INTERRUPT_LINE, Irq);  
+					if( HvRc != 0) {
+						pci_Log_Error("PciCfgStore Irq Failed!",Bus,SubBus,AgentId,HvRc);
+					}
+
 					++DeviceCount;
 					DeviceNode = build_device_node(Bus, SubBus, EADsIdSel, Function);
 					DeviceNode->Vendor      = VendorId;
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/iSeries_rtc.c linuxppc64_2_4/arch/ppc64/kernel/iSeries_rtc.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/iSeries_rtc.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_rtc.c	Wed Dec 31 18:00:00 1969
@@ -1,264 +0,0 @@
-/*
- *	Real Time Clock interface for IBM iSeries	
- *
- *	Based on rtc.c by Paul Gortmaker
- *
- *	This driver allows use of the real time clock
- *	from user space. It exports the /dev/rtc
- *	interface supporting various ioctl() and also the
- *	/proc/driver/rtc pseudo-file for status information.
- *
- * 	iSeries does not support RTC interrupts nor an alarm.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- *      1.0	Mike Corrigan: IBM iSeries rtc support
- */
-
-#define RTC_VERSION		"1.0"
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/miscdevice.h>
-#include <linux/ioport.h>
-#include <linux/fcntl.h>
-#include <linux/mc146818rtc.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/proc_fs.h>
-#include <linux/spinlock.h>
-
-#include <asm/io.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <asm/iSeries/mf.h>
-
-/*
- *	We sponge a minor off of the misc major. No need slurping
- *	up another valuable major dev number for this. If you add
- *	an ioctl, make sure you don't conflict with SPARC's RTC
- *	ioctls.
- */
-
-static loff_t rtc_llseek(struct file *file, loff_t offset, int origin);
-
-static ssize_t rtc_read(struct file *file, char *buf,
-			size_t count, loff_t *ppos);
-
-static int rtc_ioctl(struct inode *inode, struct file *file,
-		     unsigned int cmd, unsigned long arg);
-
-static void get_rtc_time (struct rtc_time *rtc_tm);
-
-static int rtc_read_proc(char *page, char **start, off_t off,
-                         int count, int *eof, void *data);
-
-/*
- *	If this driver ever becomes modularised, it will be really nice
- *	to make the epoch retain its value across module reload...
- */
-
-static unsigned long epoch = 1900;	/* year corresponding to 0x00	*/
-
-static const unsigned char days_in_mo[] = 
-{0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
-
-/*
- *	Now all the various file operations that we export.
- */
-
-static loff_t rtc_llseek(struct file *file, loff_t offset, int origin)
-{
-	return -ESPIPE;
-}
-
-static ssize_t rtc_read(struct file *file, char *buf,
-			size_t count, loff_t *ppos)
-{
-	return -EIO;
-}
-
-static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-		     unsigned long arg)
-{
-	struct rtc_time wtime; 
-
-	switch (cmd) {
-	case RTC_RD_TIME:	/* Read the time/date from RTC	*/
-	{
-		get_rtc_time(&wtime);
-		break;
-	}
-	case RTC_SET_TIME:	/* Set the RTC */
-	{
-		struct rtc_time rtc_tm;
-		unsigned char mon, day, hrs, min, sec, leap_yr;
-		unsigned int yrs;
-
-		if (!capable(CAP_SYS_TIME))
-			return -EACCES;
-
-		if (copy_from_user(&rtc_tm, (struct rtc_time*)arg,
-				   sizeof(struct rtc_time)))
-			return -EFAULT;
-
-		yrs = rtc_tm.tm_year;
-		mon = rtc_tm.tm_mon + 1;   /* tm_mon starts at zero */
-		day = rtc_tm.tm_mday;
-		hrs = rtc_tm.tm_hour;
-		min = rtc_tm.tm_min;
-		sec = rtc_tm.tm_sec;
-
-		if (yrs < 70)
-			return -EINVAL;
-
-		leap_yr = ((!(yrs % 4) && (yrs % 100)) || !(yrs % 400));
-
-		if ((mon > 12) || (day == 0))
-			return -EINVAL;
-
-		if (day > (days_in_mo[mon] + ((mon == 2) && leap_yr)))
-			return -EINVAL;
-			
-		if ((hrs >= 24) || (min >= 60) || (sec >= 60))
-			return -EINVAL;
-
-		if ( yrs > 169 )
-			return -EINVAL;
-
-		mf_setRtc( &rtc_tm );
-		
-		return 0;
-	}
-	case RTC_EPOCH_READ:	/* Read the epoch.	*/
-	{
-		return put_user (epoch, (unsigned long *)arg);
-	}
-	case RTC_EPOCH_SET:	/* Set the epoch.	*/
-	{
-		/* 
-		 * There were no RTC clocks before 1900.
-		 */
-		if (arg < 1900)
-			return -EINVAL;
-
-		if (!capable(CAP_SYS_TIME))
-			return -EACCES;
-
-		epoch = arg;
-		return 0;
-	}
-	default:
-		return -EINVAL;
-	}
-	return copy_to_user((void *)arg, &wtime, sizeof wtime) ? -EFAULT : 0;
-}
-
-static int rtc_open(struct inode *inode, struct file *file)
-{
-	return 0;
-}
-
-static int rtc_release(struct inode *inode, struct file *file)
-{
-	return 0;
-}
-
-/*
- *	The various file operations we support.
- */
-
-static struct file_operations rtc_fops = {
-	owner:		THIS_MODULE,
-	llseek:		rtc_llseek,
-	read:		rtc_read,
-	ioctl:		rtc_ioctl,
-	open:		rtc_open,
-	release:	rtc_release,
-};
-
-static struct miscdevice rtc_dev=
-{
-	RTC_MINOR,
-	"rtc",
-	&rtc_fops
-};
-
-static int __init rtc_init(void)
-{
-	misc_register(&rtc_dev);
-	create_proc_read_entry ("driver/rtc", 0, 0, rtc_read_proc, NULL);
-
-	printk(KERN_INFO "iSeries Real Time Clock Driver v" RTC_VERSION "\n");
-
-	return 0;
-}
-
-static void __exit rtc_exit (void)
-{
-	remove_proc_entry ("driver/rtc", NULL);
-	misc_deregister(&rtc_dev);
-}
-
-module_init(rtc_init);
-module_exit(rtc_exit);
-EXPORT_NO_SYMBOLS;
-
-/*
- *	Info exported via "/proc/driver/rtc".
- */
-
-static int rtc_proc_output (char *buf)
-{
-	
-	char *p;
-	struct rtc_time tm;
-	
-	p = buf;
-
-	get_rtc_time(&tm);
-
-	/*
-	 * There is no way to tell if the luser has the RTC set for local
-	 * time or for Universal Standard Time (GMT). Probably local though.
-	 */
-	p += sprintf(p,
-		     "rtc_time\t: %02d:%02d:%02d\n"
-		     "rtc_date\t: %04d-%02d-%02d\n"
-	 	     "rtc_epoch\t: %04lu\n",
-		     tm.tm_hour, tm.tm_min, tm.tm_sec,
-		     tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, epoch);
-
-	p += sprintf(p,
-		     "DST_enable\t: no\n"
-		     "BCD\t\t: yes\n"
-		     "24hr\t\t: yes\n" );
-
-	return  p - buf;
-}
-
-static int rtc_read_proc(char *page, char **start, off_t off,
-                         int count, int *eof, void *data)
-{
-        int len = rtc_proc_output (page);
-        if (len <= off+count) *eof = 1;
-        *start = page + off;
-        len -= off;
-        if (len>count) len = count;
-        if (len<0) len = 0;
-        return len;
-}
-
-static void get_rtc_time(struct rtc_time *rtc_tm)
-{
-	mf_getRtc( rtc_tm );
-
-	rtc_tm->tm_mon--;
-}
-
-
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/iSeries_setup.c linuxppc64_2_4/arch/ppc64/kernel/iSeries_setup.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/iSeries_setup.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/iSeries_setup.c	Tue Apr  9 11:23:18 2002
@@ -37,8 +37,8 @@
 
 #include <asm/time.h>
 #include "iSeries_setup.h"
-#include <asm/Naca.h>
-#include <asm/Paca.h>
+#include <asm/naca.h>
+#include <asm/paca.h>
 #include <asm/iSeries/LparData.h>
 #include <asm/iSeries/HvCallHpt.h>
 #include <asm/iSeries/HvLpConfig.h>
@@ -85,7 +85,6 @@
 
 extern char _end[];
 
-extern struct Naca *naca;
 extern int rd_size;		/* Defined in drivers/block/rd.c */
 extern unsigned long klimit;
 extern unsigned long embedded_sysmap_start;
@@ -889,13 +888,13 @@
 {
 	if ( dprof_buffer ) {
 		unsigned i;
-		for (i=0; i<maxPacas; ++i) {
-			xPaca[i].prof_shift = dprof_shift;
-			xPaca[i].prof_len = dprof_len-1;
-			xPaca[i].prof_buffer = dprof_buffer;
-			xPaca[i].prof_stext = (unsigned *)&_stext;
+		for (i=0; i<MAX_PACAS; ++i) {
+			paca[i].prof_shift = dprof_shift;
+			paca[i].prof_len = dprof_len-1;
+			paca[i].prof_buffer = dprof_buffer;
+			paca[i].prof_stext = (unsigned *)&_stext;
 			mb();
-			xPaca[i].prof_enabled = 1;
+			paca[i].prof_enabled = 1;
 		}
 	}
 }
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/idle.c linuxppc64_2_4/arch/ppc64/kernel/idle.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/idle.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/idle.c	Tue Apr  9 11:23:18 2002
@@ -40,11 +40,10 @@
 #ifdef CONFIG_PPC_ISERIES
 static void yield_shared_processor(void)
 {
-	struct Paca *paca;
+	struct paca_struct *lpaca = get_paca();
 	unsigned long tb;
 	unsigned long yieldTime;
 
-	paca = (struct Paca *)mfspr(SPRG3);
 	HvCall_setEnabledInterrupts( HvCall_MaskIPI |
 				     HvCall_MaskLpEvent |
 				     HvCall_MaskLpProd |
@@ -64,14 +63,14 @@
 	/* The decrementer stops during the yield.  Force a fake decrementer
 	 * here and let the timer_interrupt code sort out the actual time.
 	 */
-	paca->xLpPaca.xIntDword.xFields.xDecrInt = 1;
+	lpaca->xLpPaca.xIntDword.xFields.xDecrInt = 1;
 	process_iSeries_events();
 }
 #endif /* CONFIG_PPC_ISERIES */
 
 int idled(void)
 {
-	struct Paca *paca;
+	struct paca_struct *lpaca;
 	long oldval;
 #ifdef CONFIG_PPC_ISERIES
 	unsigned long CTRL;
@@ -89,12 +88,12 @@
 #endif
 	init_idle();	
 
-	paca = (struct Paca *)mfspr(SPRG3);
+	lpaca = get_paca();
 
 	for (;;) {
 #ifdef CONFIG_PPC_ISERIES
-		if ( paca->xLpPaca.xSharedProc ) {
-			if ( ItLpQueue_isLpIntPending( paca->lpQueuePtr ) )
+		if ( lpaca->xLpPaca.xSharedProc ) {
+			if ( ItLpQueue_isLpIntPending( lpaca->lpQueuePtr ) )
 				process_iSeries_events();
 			if ( !current->need_resched )
 				yield_shared_processor();
@@ -108,7 +107,7 @@
 				while(current->need_resched == -1) {
 #ifdef CONFIG_PPC_ISERIES
 					HMT_medium();
-					if ( ItLpQueue_isLpIntPending( paca->lpQueuePtr ) )
+					if ( ItLpQueue_isLpIntPending( lpaca->lpQueuePtr ) )
 						process_iSeries_events();
 #endif
 					HMT_low();
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/ioctl32.c linuxppc64_2_4/arch/ppc64/kernel/ioctl32.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/ioctl32.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/ioctl32.c	Tue Apr 23 09:37:25 2002
@@ -102,19 +102,9 @@
 #include <linux/usb.h>
 #include <linux/usbdevice_fs.h>
 #include <linux/nbd.h>
+#include <asm/ppc32.h>
 #include <asm/ppcdebug.h>
 
-/* Use this to get at 32-bit user passed pointers. 
-   See sys_sparc32.c for description about these. */
-#define A(__x) ((unsigned long)(__x))
-#define AA(__x)				\
-({	unsigned long __ret;		\
-	__asm__ ("clrldi	%0, %0, 32"	\
-		 : "=r" (__ret)		\
-		 : "0" (__x));		\
-	__ret;				\
-})
-
 /* Aiee. Someone does not find a difference between int and long */
 #define EXT2_IOC32_GETFLAGS               _IOR('f', 1, int)
 #define EXT2_IOC32_SETFLAGS               _IOW('f', 2, int)
@@ -4159,8 +4149,6 @@
 COMPATIBLE_IOCTL(HCIDEVUP),
 COMPATIBLE_IOCTL(HCIDEVDOWN),
 COMPATIBLE_IOCTL(HCIDEVRESET),
-COMPATIBLE_IOCTL(HCIRESETSTAT),
-COMPATIBLE_IOCTL(HCIGETINFO),
 COMPATIBLE_IOCTL(HCIGETDEVLIST),
 COMPATIBLE_IOCTL(HCISETRAW),
 COMPATIBLE_IOCTL(HCISETSCAN),
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/irq.c linuxppc64_2_4/arch/ppc64/kernel/irq.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/irq.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/irq.c	Thu Apr 18 09:37:39 2002
@@ -55,6 +55,7 @@
 #include <asm/ptrace.h>
 #include <asm/iSeries/LparData.h>
 #include <asm/machdep.h>
+#include <asm/paca.h>
 
 #include "local_irq.h"
 
@@ -557,14 +558,14 @@
 {
 	int cpu = smp_processor_id();
 	int irq;
-	struct Paca * paca;
+	struct paca_struct *lpaca;
 	struct ItLpQueue * lpq;
 
 	/* if(cpu) udbg_printf("Entering do_IRQ\n");  */
 
         irq_enter(cpu);
 
-	if ( _machine != _MACH_iSeries ) {
+	if (naca->platform != PLATFORM_ISERIES_LPAR) {
 	
 		/* every arch is required to have a get_irq -- Cort */
 		irq = ppc_md.get_irq( regs );
@@ -584,23 +585,23 @@
 	}
 	/* if on iSeries partition */
 	else {
-		paca = (struct Paca *)mfspr(SPRG3);
+		lpaca = get_paca();
 #ifdef CONFIG_SMP
-		if ( paca->xLpPaca.xIntDword.xFields.xIpiCnt ) {
-			paca->xLpPaca.xIntDword.xFields.xIpiCnt = 0;
+		if ( lpaca->xLpPaca.xIntDword.xFields.xIpiCnt ) {
+			lpaca->xLpPaca.xIntDword.xFields.xIpiCnt = 0;
 			iSeries_smp_message_recv( regs );
 		}
 #endif /* CONFIG_SMP */
-		lpq = paca->lpQueuePtr;
+		lpq = lpaca->lpQueuePtr;
 		if ( lpq && ItLpQueue_isLpIntPending( lpq ) )
 			lpEvent_count += ItLpQueue_process( lpq, regs );
 	}
 		
         irq_exit(cpu);
 
-	if ( _machine == _MACH_iSeries ) {
-		if ( paca->xLpPaca.xIntDword.xFields.xDecrInt ) {
-			paca->xLpPaca.xIntDword.xFields.xDecrInt = 0;
+	if (naca->platform == PLATFORM_ISERIES_LPAR) {
+		if ( lpaca->xLpPaca.xIntDword.xFields.xDecrInt ) {
+			lpaca->xLpPaca.xIntDword.xFields.xDecrInt = 0;
 			/* Signal a fake decrementer interrupt */
 			timer_interrupt( regs );
 		}
@@ -899,11 +900,11 @@
 #ifdef CONFIG_PPC_ISERIES
 	{
 		unsigned i;
-		for (i=0; i<maxPacas; ++i) {
-			if ( xPaca[i].prof_buffer && (new_value & 1) )
-				xPaca[i].prof_enabled = 1;
+		for (i=0; i<MAX_PACAS; ++i) {
+			if ( paca[i].prof_buffer && (new_value & 1) )
+				paca[i].prof_enabled = 1;
 			else
-				xPaca[i].prof_enabled = 0;
+				paca[i].prof_enabled = 0;
 			new_value >>= 1;
 		}
 	}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/lmb.c linuxppc64_2_4/arch/ppc64/kernel/lmb.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/lmb.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/lmb.c	Mon Apr 15 21:12:43 2002
@@ -76,12 +76,15 @@
 void
 lmb_analyze(void)
 {
-	unsigned long i, physbase = 0;
+	unsigned long i;
 	unsigned long mem_size = 0;
 	unsigned long io_size = 0;
 	unsigned long size_mask = 0;
 	unsigned long offset = reloc_offset();
 	struct lmb *_lmb = PTRRELOC(&lmb);
+#ifdef CONFIG_MSCHUNKS
+	unsigned long physbase = 0;
+#endif
 
 	for (i=0; i < _lmb->memory.cnt ;i++) {
 		unsigned long lmb_type = _lmb->memory.region[i].type;
@@ -102,6 +105,7 @@
 		size_mask |= lmb_size;
 	}
 
+#ifdef CONFIG_MSCHUNKS
 	for (i=0; i < _lmb->memory.cnt ;i++) {
 		unsigned long lmb_type = _lmb->memory.region[i].type;
 		unsigned long lmb_size;
@@ -111,15 +115,12 @@
 
 		lmb_size = _lmb->memory.region[i].size;
 
-#ifdef CONFIG_MSCHUNKS
 		_lmb->memory.region[i].physbase = physbase;
 		physbase += lmb_size;
-#else
-		_lmb->memory.region[i].physbase = _lmb->memory.region[i].base;
-#endif
 		io_size += lmb_size;
 		size_mask |= lmb_size;
 	}
+#endif /* CONFIG_MSCHUNKS */
 
 	_lmb->memory.size = mem_size;
 	_lmb->memory.iosize = io_size;
@@ -138,6 +139,7 @@
 
 }
 
+#ifdef CONFIG_MSCHUNKS
 /* This routine called with relocation disabled. */
 long
 lmb_add_io(unsigned long base, unsigned long size)
@@ -149,6 +151,7 @@
 	return lmb_add_region(_rgn, base, size, LMB_IO_AREA);
 
 }
+#endif /* CONFIG_MSCHUNKS */
 
 long
 lmb_reserve(unsigned long base, unsigned long size)
@@ -282,7 +285,16 @@
 {
 	unsigned long offset = reloc_offset();
 	struct lmb *_lmb = PTRRELOC(&lmb);
+#ifdef CONFIG_MSCHUNKS
 	return _lmb->memory.size;
+#else
+	struct lmb_region *_mem = &(_lmb->memory);
+	unsigned long idx = _mem->cnt-1;
+	unsigned long lastbase = _mem->region[idx].physbase;
+	unsigned long lastsize = _mem->region[idx].size;
+	
+	return (lastbase + lastsize);
+#endif /* CONFIG_MSCHUNKS */
 }
 
 unsigned long
@@ -291,10 +303,9 @@
 	unsigned long offset = reloc_offset();
 	struct lmb *_lmb = PTRRELOC(&lmb);
 	struct lmb_region *_mem = &(_lmb->memory);
-	unsigned long idx = _mem->cnt-1;
+	unsigned long idx;
 
 	for(idx=_mem->cnt-1; idx >= 0 ;idx--) {
-		unsigned long lastbase, lastsize;
 		if ( _mem->region[idx].type != LMB_MEMORY_AREA )
 			continue;
 #ifdef CONFIG_MSCHUNKS
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/mf.c linuxppc64_2_4/arch/ppc64/kernel/mf.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/mf.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/mf.c	Wed Apr 17 13:14:54 2002
@@ -1003,7 +1003,7 @@
 		}
 	}
 
-	pci_unmap_single(iSeries_vio_dev, dma_addr, *size, PCI_DMA_FROMDEVICE);
+	pci_unmap_single(iSeries_vio_dev, dma_addr, len, PCI_DMA_FROMDEVICE);
 
 	return rc;
 }
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/mf_proc.c linuxppc64_2_4/arch/ppc64/kernel/mf_proc.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/mf_proc.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/mf_proc.c	Wed Apr 17 13:14:54 2002
@@ -71,12 +71,12 @@
 	ent->read_proc = proc_mf_dump_cmdline;
 	ent->write_proc = proc_mf_change_cmdline;
 
-	ent = create_proc_entry("vmlinux", S_IFREG|S_IRUSR|S_IWUSR, mf_a);
+	ent = create_proc_entry("vmlinux", S_IFREG|S_IWUSR, mf_a);
 	if (!ent) return;
 	ent->nlink = 1;
 	ent->data = (void *)0;
-	ent->read_proc = proc_mf_dump_vmlinux;
 	ent->write_proc = proc_mf_change_vmlinux;
+	ent->read_proc = NULL;
 
 	mf_b = proc_mkdir("B", mf_proc_root);
 	if (!mf_b) return;
@@ -88,12 +88,12 @@
 	ent->read_proc = proc_mf_dump_cmdline;
 	ent->write_proc = proc_mf_change_cmdline;
 
-	ent = create_proc_entry("vmlinux", S_IFREG|S_IRUSR|S_IWUSR, mf_b);
+	ent = create_proc_entry("vmlinux", S_IFREG|S_IWUSR, mf_b);
 	if (!ent) return;
 	ent->nlink = 1;
 	ent->data = (void *)1;
-	ent->read_proc = proc_mf_dump_vmlinux;
 	ent->write_proc = proc_mf_change_vmlinux;
+	ent->read_proc = NULL;
 
 	mf_c = proc_mkdir("C", mf_proc_root);
 	if (!mf_c) return;
@@ -105,12 +105,12 @@
 	ent->read_proc = proc_mf_dump_cmdline;
 	ent->write_proc = proc_mf_change_cmdline;
 
-	ent = create_proc_entry("vmlinux", S_IFREG|S_IRUSR|S_IWUSR, mf_c);
+	ent = create_proc_entry("vmlinux", S_IFREG|S_IWUSR, mf_c);
 	if (!ent) return;
 	ent->nlink = 1;
 	ent->data = (void *)2;
-	ent->read_proc = proc_mf_dump_vmlinux;
 	ent->write_proc = proc_mf_change_vmlinux;
+	ent->read_proc = NULL;
 
 	mf_d = proc_mkdir("D", mf_proc_root);
 	if (!mf_d) return;
@@ -122,14 +122,14 @@
 	ent->data = (void *)3;
 	ent->read_proc = proc_mf_dump_cmdline;
 	ent->write_proc = proc_mf_change_cmdline;
-
+#if 0
 	ent = create_proc_entry("vmlinux", S_IFREG|S_IRUSR, mf_d);
 	if (!ent) return;
 	ent->nlink = 1;
 	ent->data = (void *)3;
 	ent->read_proc = proc_mf_dump_vmlinux;
 	ent->write_proc = NULL;
-
+#endif
 	ent = create_proc_entry("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root);
 	if (!ent) return;
 	ent->nlink = 1;
@@ -191,20 +191,16 @@
 		if (sizeToGet != 0)
 		{
 			*start = page + off;
-			printk("mf_proc.c: got count %d off %d\n", sizeToGet, (int)off);
 			return sizeToGet;
 		} else {
-			printk("mf_proc.c: eof\n");
 			*eof = 1;
 			return 0;
 		}
 	} else {
-		printk("mf_proc.c: eof\n");
 		*eof = 1;
 		return 0;
 	}
 }
-
 
 int proc_mf_dump_side
 (char *page, char **start, off_t off, int count, int *eof, void *data)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/misc.S linuxppc64_2_4/arch/ppc64/kernel/misc.S
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/misc.S	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/misc.S	Mon Apr  8 08:53:11 2002
@@ -519,8 +519,7 @@
 
 #ifdef CONFIG_BINFMT_ELF32
 /* Why isn't this a) automatic, b) written in 'C'? */	
-	.data
-	.align 8
+	.balign 8
 _GLOBAL(sys_call_table32)
 	.llong .sys_ni_syscall	/* 0  -  old "setup()" system call */
 	.llong .sys32_exit
@@ -729,12 +728,28 @@
 	.llong .sys32_fcntl64		/* 204 */
 	.llong .sys_madvise		/* 205 */
 	.llong .sys_mincore		/* 206 */
-	.rept NR_syscalls-206
+	.llong .sys_gettid		/* 207 */
+#if 0 /* Reserved syscalls */
+	.llong .sys_tkill		/* 208 */
+	.llong .sys_setxattr
+	.llong .sys_lsetxattr	/* 210 */
+	.llong .sys_fsetxattr
+	.llong .sys_getxattr
+	.llong .sys_lgetxattr
+	.llong .sys_fgetxattr
+	.llong .sys_listxattr	/* 215 */
+	.llong .sys_llistxattr
+	.llong .sys_flistxattr
+	.llong .sys_removexattr
+	.llong .sys_lremovexattr
+	.llong .sys_fremovexattr	/* 220 */
+	.llong .sys_futex
+#endif
+	.rept NR_syscalls-221
 		.llong .sys_ni_syscall
 	.endr
 #endif
-	.data
-	.align 8
+	.balign 8
 _GLOBAL(sys_call_table)
 	.llong .sys_ni_syscall	/* 0  -  old "setup()" system call */
 	.llong .sys_exit
@@ -943,6 +958,23 @@
 	.llong .sys_ni_syscall		/* 204 */
 	.llong .sys_madvise		/* 205 */
 	.llong .sys_mincore		/* 206 */
-	.rept NR_syscalls-206
+	.llong .sys_gettid		/* 207 */
+#if 0 /* Reserved syscalls */
+	.llong .sys_tkill		/* 208 */
+	.llong .sys_setxattr
+	.llong .sys_lsetxattr	/* 210 */
+	.llong .sys_fsetxattr
+	.llong .sys_getxattr
+	.llong .sys_lgetxattr
+	.llong .sys_fgetxattr
+	.llong .sys_listxattr	/* 215 */
+	.llong .sys_llistxattr
+	.llong .sys_flistxattr
+	.llong .sys_removexattr
+	.llong .sys_lremovexattr
+	.llong .sys_fremovexattr	/* 220 */
+	.llong .sys_futex
+#endif
+	.rept NR_syscalls-221
 	.llong .sys_ni_syscall
 	.endr
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/mk_defs.c linuxppc64_2_4/arch/ppc64/kernel/mk_defs.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/mk_defs.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/mk_defs.c	Thu Apr 18 09:36:18 2002
@@ -30,8 +30,8 @@
 #include <asm/processor.h>
 #include <asm/hardirq.h>
 
-#include <asm/Naca.h>
-#include <asm/Paca.h>
+#include <asm/naca.h>
+#include <asm/paca.h>
 #include <asm/iSeries/ItLpPaca.h>
 #include <asm/iSeries/ItLpQueue.h>
 #include <asm/iSeries/HvLpEvent.h>
@@ -50,44 +50,45 @@
 	DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct));
 	DEFINE(KSP, offsetof(struct thread_struct, ksp));
 
-        DEFINE(PACA, offsetof(struct Naca, paca));
-        DEFINE(PACA_SIZE, sizeof(struct Paca));
+        DEFINE(PACA, offsetof(struct naca_struct, paca));
+        DEFINE(PACA_SIZE, sizeof(struct paca_struct));
 
-        DEFINE(DCACHEL1LINESIZE, offsetof(struct Naca, dCacheL1LineSize));
-        DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct Naca, dCacheL1LogLineSize));
-        DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct Naca, dCacheL1LinesPerPage));
-
-        DEFINE(ICACHEL1LINESIZE, offsetof(struct Naca, iCacheL1LineSize));
-        DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct Naca, iCacheL1LogLineSize));
-        DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct Naca, iCacheL1LinesPerPage));
-	DEFINE(SLBSIZE, offsetof(struct Naca, slb_size));
-
-        DEFINE(PACAPACAINDEX, offsetof(struct Paca, xPacaIndex));
-        DEFINE(PACAPROCSTART, offsetof(struct Paca, xProcStart));
-        DEFINE(PACAKSAVE, offsetof(struct Paca, xKsave));
-	DEFINE(PACACURRENT, offsetof(struct Paca, xCurrent));
-        DEFINE(PACASAVEDMSR, offsetof(struct Paca, xSavedMsr));
-        DEFINE(PACASTABREAL, offsetof(struct Paca, xStab_data.real));
-        DEFINE(PACASTABVIRT, offsetof(struct Paca, xStab_data.virt));
-	DEFINE(PACASTABRR, offsetof(struct Paca, xStab_data.next_round_robin));
-        DEFINE(PACAR1, offsetof(struct Paca, xR1));
-        DEFINE(PACALPQUEUE, offsetof(struct Paca, lpQueuePtr));
-	DEFINE(PACATOC, offsetof(struct Paca, xTOC));
-	DEFINE(PACAEXCSP, offsetof(struct Paca, exception_sp));
-	DEFINE(PACAHRDWINTSTACK, offsetof(struct Paca, xHrdIntStack));
-	DEFINE(PACAPROCENABLED, offsetof(struct Paca, xProcEnabled));
-	DEFINE(PACAHRDWINTCOUNT, offsetof(struct Paca, xHrdIntCount));
-	DEFINE(PACADEFAULTDECR, offsetof(struct Paca, default_decr));
-	DEFINE(PACAPROFENABLED, offsetof(struct Paca, prof_enabled));
-	DEFINE(PACAPROFLEN, offsetof(struct Paca, prof_len));
-	DEFINE(PACAPROFSHIFT, offsetof(struct Paca, prof_shift));
-	DEFINE(PACAPROFBUFFER, offsetof(struct Paca, prof_buffer));
-	DEFINE(PACAPROFSTEXT, offsetof(struct Paca, prof_stext));
-	DEFINE(PACALPPACA, offsetof(struct Paca, xLpPaca));
-        DEFINE(LPPACA, offsetof(struct Paca, xLpPaca));
-        DEFINE(PACAREGSAV, offsetof(struct Paca, xRegSav));
-        DEFINE(PACAEXC, offsetof(struct Paca, exception_stack));
-        DEFINE(PACAGUARD, offsetof(struct Paca, guard));
+        DEFINE(DCACHEL1LINESIZE, offsetof(struct naca_struct, dCacheL1LineSize));
+        DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct naca_struct, dCacheL1LogLineSize));
+        DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct naca_struct, dCacheL1LinesPerPage));
+
+        DEFINE(ICACHEL1LINESIZE, offsetof(struct naca_struct, iCacheL1LineSize));
+        DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct naca_struct, iCacheL1LogLineSize));
+        DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct naca_struct, iCacheL1LinesPerPage));
+	DEFINE(SLBSIZE, offsetof(struct naca_struct, slb_size));
+	DEFINE(PLATFORM, offsetof(struct naca_struct, platform));
+
+        DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, xPacaIndex));
+        DEFINE(PACAPROCSTART, offsetof(struct paca_struct, xProcStart));
+        DEFINE(PACAKSAVE, offsetof(struct paca_struct, xKsave));
+	DEFINE(PACACURRENT, offsetof(struct paca_struct, xCurrent));
+        DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, xSavedMsr));
+        DEFINE(PACASTABREAL, offsetof(struct paca_struct, xStab_data.real));
+        DEFINE(PACASTABVIRT, offsetof(struct paca_struct, xStab_data.virt));
+	DEFINE(PACASTABRR, offsetof(struct paca_struct, xStab_data.next_round_robin));
+        DEFINE(PACAR1, offsetof(struct paca_struct, xR1));
+        DEFINE(PACALPQUEUE, offsetof(struct paca_struct, lpQueuePtr));
+	DEFINE(PACATOC, offsetof(struct paca_struct, xTOC));
+	DEFINE(PACAEXCSP, offsetof(struct paca_struct, exception_sp));
+	DEFINE(PACAHRDWINTSTACK, offsetof(struct paca_struct, xHrdIntStack));
+	DEFINE(PACAPROCENABLED, offsetof(struct paca_struct, xProcEnabled));
+	DEFINE(PACAHRDWINTCOUNT, offsetof(struct paca_struct, xHrdIntCount));
+	DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr));
+	DEFINE(PACAPROFENABLED, offsetof(struct paca_struct, prof_enabled));
+	DEFINE(PACAPROFLEN, offsetof(struct paca_struct, prof_len));
+	DEFINE(PACAPROFSHIFT, offsetof(struct paca_struct, prof_shift));
+	DEFINE(PACAPROFBUFFER, offsetof(struct paca_struct, prof_buffer));
+	DEFINE(PACAPROFSTEXT, offsetof(struct paca_struct, prof_stext));
+	DEFINE(PACALPPACA, offsetof(struct paca_struct, xLpPaca));
+        DEFINE(LPPACA, offsetof(struct paca_struct, xLpPaca));
+        DEFINE(PACAREGSAV, offsetof(struct paca_struct, xRegSav));
+        DEFINE(PACAEXC, offsetof(struct paca_struct, exception_stack));
+        DEFINE(PACAGUARD, offsetof(struct paca_struct, guard));
         DEFINE(LPPACASRR0, offsetof(struct ItLpPaca, xSavedSrr0));
         DEFINE(LPPACASRR1, offsetof(struct ItLpPaca, xSavedSrr1));
 	DEFINE(LPPACAANYINT, offsetof(struct ItLpPaca, xIntDword.xAnyInt));
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/nvram.c linuxppc64_2_4/arch/ppc64/kernel/nvram.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/nvram.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/arch/ppc64/kernel/nvram.c	Fri Mar  1 13:28:29 2002
@@ -0,0 +1,140 @@
+/*
+ *  c 2001 PPC 64 Team, IBM Corp
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ * /dev/nvram driver for PPC64
+ *
+ * This perhaps should live in drivers/char
+ */
+
+#include <linux/module.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/fcntl.h>
+#include <linux/nvram.h>
+#include <linux/init.h>
+#include <asm/uaccess.h>
+#include <asm/nvram.h>
+#include <asm/rtas.h>
+#include <asm/prom.h>
+
+static unsigned int rtas_nvram_size;
+static unsigned int nvram_fetch, nvram_store;
+static char nvram_buf[4];	/* assume this is in the first 4GB */
+
+static loff_t nvram_llseek(struct file *file, loff_t offset, int origin)
+{
+	switch (origin) {
+	case 1:
+		offset += file->f_pos;
+		break;
+	case 2:
+		offset += rtas_nvram_size;
+		break;
+	}
+	if (offset < 0)
+		return -EINVAL;
+	file->f_pos = offset;
+	return file->f_pos;
+}
+
+
+static ssize_t read_nvram(struct file *file, char *buf,
+			  size_t count, loff_t *ppos)
+{
+	unsigned int i;
+	unsigned long len;
+	char *p = buf;
+
+	if (verify_area(VERIFY_WRITE, buf, count))
+		return -EFAULT;
+	if (*ppos >= rtas_nvram_size)
+		return 0;
+	for (i = *ppos; count > 0 && i < rtas_nvram_size; ++i, ++p, --count) {
+		if ((rtas_call(nvram_fetch, 3, 2, &len, i, __pa(nvram_buf), 1) != 0) ||
+		    len != 1)
+			return -EIO;
+		if (__put_user(nvram_buf[0], p))
+			return -EFAULT;
+	}
+	*ppos = i;
+	return p - buf;
+}
+
+static ssize_t write_nvram(struct file *file, const char *buf,
+			   size_t count, loff_t *ppos)
+{
+	unsigned int i;
+	unsigned long len;
+	const char *p = buf;
+	char c;
+
+	if (verify_area(VERIFY_READ, buf, count))
+		return -EFAULT;
+	if (*ppos >= rtas_nvram_size)
+		return 0;
+	for (i = *ppos; count > 0 && i < rtas_nvram_size; ++i, ++p, --count) {
+		if (__get_user(c, p))
+			return -EFAULT;
+		nvram_buf[0] = c;
+		if ((rtas_call(nvram_store, 3, 2, &len, i, __pa(nvram_buf), 1) != 0) ||
+		    len != 1)
+			return -EIO;
+	}
+	*ppos = i;
+	return p - buf;
+}
+
+static int nvram_ioctl(struct inode *inode, struct file *file,
+	unsigned int cmd, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+struct file_operations nvram_fops = {
+	owner:		THIS_MODULE,
+	llseek:		nvram_llseek,
+	read:		read_nvram,
+	write:		write_nvram,
+	ioctl:		nvram_ioctl,
+};
+
+static struct miscdevice nvram_dev = {
+	NVRAM_MINOR,
+	"nvram",
+	&nvram_fops
+};
+
+int __init nvram_init(void)
+{
+	struct device_node *nvram;
+	unsigned int *nbytes_p, proplen;
+	if ((nvram = find_type_devices("nvram")) != NULL) {
+		nbytes_p = (unsigned int *)get_property(nvram, "#bytes", &proplen);
+		if (nbytes_p && proplen == sizeof(unsigned int)) {
+			rtas_nvram_size = *nbytes_p;
+		}
+	}
+	nvram_fetch = rtas_token("nvram-fetch");
+	nvram_store = rtas_token("nvram-store");
+	printk(KERN_INFO "PPC64 nvram contains %d bytes\n", rtas_nvram_size);
+
+	misc_register(&nvram_dev);
+	return 0;
+}
+
+void __exit nvram_cleanup(void)
+{
+        misc_deregister( &nvram_dev );
+}
+
+module_init(nvram_init);
+module_exit(nvram_cleanup);
+MODULE_LICENSE("GPL");
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pSeries_lpar.c linuxppc64_2_4/arch/ppc64/kernel/pSeries_lpar.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pSeries_lpar.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/pSeries_lpar.c	Fri Apr 19 13:38:07 2002
@@ -32,7 +32,7 @@
 #include <asm/ppcdebug.h>
 #include <asm/pci_dma.h>
 #include <linux/pci.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 
 /* Status return values */
 #define H_Success	0
@@ -532,7 +532,7 @@
 	local_hpte.dw0.dw0.avpn = va >> 23;
 	local_hpte.dw0.dw0.bolted = 1;				/* bolted */
 	if (large)
-    local_hpte.dw0.dw0.l = 1;  /* large page */
+		local_hpte.dw0.dw0.l = 1;  /* large page */
 	local_hpte.dw0.dw0.v = 1;
 
 	/* Set CEC cookie to 0                   */
@@ -577,7 +577,7 @@
 static void tce_build_pSeriesLP(struct TceTable *tbl, long tcenum, 
 				unsigned long uaddr, int direction )
 {
-	u64 setTceRc;
+	u64 set_tce_rc;
 	union Tce tce;
 	
 	PPCDBG(PPCDBG_TCE, "build_tce: uaddr = 0x%lx\n", uaddr);
@@ -590,90 +590,35 @@
 	tce.tceBits.readWrite = 1;
 	if ( direction != PCI_DMA_TODEVICE ) tce.tceBits.pciWrite = 1;
 
-	setTceRc = plpar_tce_put((u64)tbl->index, 
+	set_tce_rc = plpar_tce_put((u64)tbl->index, 
 				 (u64)tcenum << 12, 
 				 tce.wholeTce );
-	/* Make sure the update is visible to hardware.
-	 * ToDo: sync after setting *all* the tce's.
-	 */
-	__asm__ __volatile__ ("sync" : : : "memory");
 
-	if(setTceRc) {
-		PPCDBG(PPCDBG_TCE, "setTce failed. rc=%ld\n", setTceRc);
-		PPCDBG(PPCDBG_TCE, "\tindex   = 0x%lx\n", (u64)tbl->index);
-		PPCDBG(PPCDBG_TCE, "\ttcenum  = 0x%lx\n", (u64)tcenum);
-		PPCDBG(PPCDBG_TCE, "\ttce val = 0x%lx\n", tce.wholeTce );
+	if(set_tce_rc) {
+		printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", set_tce_rc);
+		printk("\tindex   = 0x%lx\n", (u64)tbl->index);
+		printk("\ttcenum  = 0x%lx\n", (u64)tcenum);
+		printk("\ttce val = 0x%lx\n", tce.wholeTce );
 	}
 }
 
-static inline void free_tce_range(struct TceTable *tbl, 
-				  long tcenum, unsigned order )
+static void tce_free_one_pSeriesLP(struct TceTable *tbl, long tcenum)
 {
-	unsigned long flags;
-
-	/* Lock the tce allocation bitmap */
-	spin_lock_irqsave( &(tbl->lock), flags );
-
-	/* Do the actual work */
-	free_tce_range_nolock( tbl, tcenum, order );
-	
-	/* Unlock the tce allocation bitmap */
-	spin_unlock_irqrestore( &(tbl->lock), flags );
-
-}
-
-static void tce_free_pSeriesLP(struct TceTable *tbl, dma_addr_t dma_addr, 
-			       unsigned order, unsigned numPages)
-{
-	u64 setTceRc;
-	long tcenum, freeTce, maxTcenum;
-	unsigned i;
+	u64 set_tce_rc;
 	union Tce tce;
 
-	maxTcenum = (tbl->size * (PAGE_SIZE / sizeof(union Tce))) - 1;
-	
-	tcenum = dma_addr >> PAGE_SHIFT;
-
-	freeTce = tcenum - tbl->startOffset;
-
-	if ( freeTce > maxTcenum ) {
-		printk("free_tces: tcenum > maxTcenum\n");
-		printk("\ttcenum    = 0x%lx\n", tcenum); 
-		printk("\tfreeTce   = 0x%lx\n", freeTce); 
-		printk("\tmaxTcenum = 0x%lx\n", maxTcenum); 
-		printk("\tTCE Table = 0x%lx\n", (u64)tbl);
-		printk("\tbus#      = 0x%lx\n", 
-		       (u64)tbl->busNumber );
-		printk("\tsize      = 0x%lx\n", (u64)tbl->size);
-		printk("\tstartOff  = 0x%lx\n", 
-		       (u64)tbl->startOffset );
-		printk("\tindex     = 0x%lx\n", (u64)tbl->index);
-		return;
-	}
-	
-	for (i=0; i<numPages; ++i) {
-		tce.wholeTce = 0;
-		setTceRc = plpar_tce_put((u64)tbl->index, 
-					 (u64)tcenum << 12, /* note: not freeTce */
-					 tce.wholeTce );
-		if ( setTceRc ) {
-			printk("tce_free: setTce failed\n");
-			printk("\trc      = %ld\n", setTceRc);
-			printk("\tindex   = 0x%lx\n", 
-			       (u64)tbl->index);
-			printk("\ttcenum  = 0x%lx\n", (u64)tcenum);
-			printk("\tfreeTce = 0x%lx\n", (u64)freeTce);
-			printk("\ttce val = 0x%lx\n", 
-			       tce.wholeTce );
-		}
-
-		++tcenum;
+	tce.wholeTce = 0;
+	set_tce_rc = plpar_tce_put((u64)tbl->index, 
+				 (u64)tcenum << 12,
+				 tce.wholeTce );
+	if ( set_tce_rc ) {
+		printk("tce_free_one_pSeriesLP: plpar_tce_put failed\n");
+		printk("\trc      = %ld\n", set_tce_rc);
+		printk("\tindex   = 0x%lx\n", (u64)tbl->index);
+		printk("\ttcenum  = 0x%lx\n", (u64)tcenum);
+		printk("\ttce val = 0x%lx\n", tce.wholeTce );
 	}
 
-	/* Make sure the update is visible to hardware. */
-	__asm__ __volatile__ ("sync" : : : "memory");
-
-	free_tce_range( tbl, freeTce, order );
 }
 
 /* PowerPC Interrupts for lpar. */
@@ -769,8 +714,13 @@
 		/* get some more chars. */
 		inbuflen = 0;
 		rc = plpar_get_term_char(vtermno, &inbuflen, buf);
-		if (inbuflen == 0 && rc == H_Success)
-			return -1;
+		if (rc != H_Success)
+			inbuflen = 0;	/* otherwise inbuflen is garbage */
+	}
+	if (inbuflen <= 0 || inbuflen > 16) {
+		/* Catch error case as well as other oddities (corruption) */
+		inbuflen = 0;
+		return -1;
 	}
 	ch = buf[0];
 	for (i = 1; i < inbuflen; i++)	/* shuffle them down. */
@@ -810,7 +760,7 @@
 	ppc_md.hpte_find	 = hpte_find_pSeriesLP;
 
 	ppc_md.tce_build	 = tce_build_pSeriesLP;
-	ppc_md.tce_free		 = tce_free_pSeriesLP;
+	ppc_md.tce_free_one	 = tce_free_one_pSeriesLP;
 
 #ifdef CONFIG_SMP
 	smp_init_pSeries();
@@ -846,7 +796,7 @@
 	}
 }
 
-/* Confidential code for hvc_console.  Should move it back eventually. */
+/* Code for hvc_console.  Should move it back eventually. */
 
 int hvc_get_chars(int index, char *buf, int count)
 {
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pSeries_pci.c linuxppc64_2_4/arch/ppc64/kernel/pSeries_pci.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pSeries_pci.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/pSeries_pci.c	Mon Apr 22 16:50:19 2002
@@ -39,11 +39,9 @@
 #include <asm/init.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppcdebug.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 #include <asm/pci_dma.h>
-#ifdef CONFIG_PPC_EEH
 #include <asm/eeh.h>
-#endif
 
 #include "xics.h"
 #include "open_pic.h"
@@ -130,7 +128,10 @@
 } \
 int __chrp \
 rtas_pci_write_config_##size(struct pci_dev *dev, int offset, type val) { \
-	return rtas_write_config_##size(pci_device_to_OF_node(dev), offset, val); \
+	struct device_node*  dn = pci_device_to_OF_node(dev); \
+	int  ret = rtas_write_config_##size(dn, offset, val); \
+	/* udbg_printf("write bus=%x, devfn=%x, ret=%d phb=%lx, dn=%lx\n", dev->bus->number, dev->devfn, ret, dn ? dn->phb : 0, dn); */ \
+	return ret ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL; \
 }
 
 RTAS_PCI_READ_OP(byte, u8 *, 1)
@@ -256,9 +257,8 @@
 	write_pci_config = rtas_token("write-pci-config");
 	ibm_read_pci_config = rtas_token("ibm,read-pci-config");
 	ibm_write_pci_config = rtas_token("ibm,write-pci-config");
-#ifdef CONFIG_PPC_EEH
+
 	eeh_init();
-#endif
 
 	if (naca->interrupt_controller == IC_OPEN_PIC) {
 		opprop = (unsigned int *)get_property(find_path_device("/"),
@@ -358,24 +358,24 @@
 				res = &phb->io_resource;
 				res->name = Pci_Node->full_name;
 				res->flags = IORESOURCE_IO;
-#ifdef CONFIG_PPC_EEH
-				if (!isa_io_base && has_isa) {
-					/* map a page for ISA ports.  Not EEH protected. */
-					isa_io_base = (unsigned long)__ioremap(phb->io_base_phys, PAGE_SIZE, _PAGE_NO_CACHE);
-				}
-			        res->start = phb->io_base_virt = eeh_token(index, 0, 0, 0);
-				res->end = eeh_token(index, 0xff, 0xff, 0xffffffff);
-#else
-			        phb->io_base_virt = ioremap(phb->io_base_phys, range.size);
-				if (!pci_io_base) {
-					pci_io_base = (unsigned long)phb->io_base_virt;
-					if (has_isa)
-						isa_io_base = pci_io_base;
+				if (is_eeh_implemented()) {
+					if (!isa_io_base && has_isa) {
+						/* map a page for ISA ports.  Not EEH protected. */
+						isa_io_base = (unsigned long)__ioremap(phb->io_base_phys, PAGE_SIZE, _PAGE_NO_CACHE);
+					}
+					res->start = phb->io_base_virt = eeh_token(index, 0, 0, 0);
+					res->end = eeh_token(index, 0xff, 0xff, 0xffffffff);
+				} else {
+					phb->io_base_virt = ioremap(phb->io_base_phys, range.size);
+					if (!pci_io_base) {
+						pci_io_base = (unsigned long)phb->io_base_virt;
+						if (has_isa)
+							isa_io_base = pci_io_base;
+					}
+					res->start = ((((unsigned long) range.child_addr.a_mid) << 32) | (range.child_addr.a_lo));
+					res->start += (unsigned long)phb->io_base_virt;
+					res->end =   res->start + range.size - 1;
 				}
-				res->start = ((((unsigned long) range.child_addr.a_mid) << 32) | (range.child_addr.a_lo));
-				res->start += (unsigned long)phb->io_base_virt;
-				res->end =   res->start + range.size - 1;
-#endif
 				res->parent = NULL;
 				res->sibling = NULL;
 				res->child = NULL;
@@ -399,13 +399,13 @@
 					++memno;
 					res->name = Pci_Node->full_name;
 					res->flags = IORESOURCE_MEM;
-#ifdef CONFIG_PPC_EEH
-					res->start = eeh_token(index, 0, 0, 0);
-					res->end =   eeh_token(index, 0xff, 0xff, 0xffffffff);
-#else
-					res->start = range.parent_addr;
-					res->end =   range.parent_addr + range.size - 1;
-#endif
+					if (is_eeh_implemented()) {
+						res->start = eeh_token(index, 0, 0, 0);
+						res->end =   eeh_token(index, 0xff, 0xff, 0xffffffff);
+					} else {
+						res->start = range.parent_addr;
+						res->end =   range.parent_addr + range.size - 1;
+					}
 					res->parent = NULL;
 					res->sibling = NULL;
 					res->child = NULL;
@@ -515,13 +515,15 @@
 
 	/***************************************************************
 	* Speedwagon
+	*   include Winnipeg as well for the time being.
 	***************************************************************/
-	} else if (strstr(model, "Speedwagon")) {
+	} else if ((strstr(model, "Speedwagon")) || 
+		   (strstr(model, "Winnipeg"))) {
 		PPCDBG(PPCDBG_PHBINIT, "\tCreate speedwagon\n");
 	        phb = pci_alloc_pci_controller("PHB SW",phb_type_speedwagon);
 		if (phb == NULL) return NULL;
 
-		if (_machine == _MACH_pSeries) {
+		if (naca->platform == PLATFORM_PSERIES) {
 			phb->cfg_addr = (volatile unsigned long *) 
 			  ioremap(reg_struct.address + 0x140, PAGE_SIZE);
 			phb->cfg_data = (char*)(phb->cfg_addr - 0x02); /* minus is correct */
@@ -603,7 +605,6 @@
 {
  	int i;
  	struct pci_controller *phb = PCI_GET_PHB_PTR(dev);
-#ifdef CONFIG_PPC_EEH
 	struct device_node *dn;
 	unsigned long eeh_disable_bit;
 
@@ -622,19 +623,19 @@
 		}
 	}
 
-	if (is_eeh_configured(dev)) {
-		eeh_disable_bit = 0;
-		printk("PCI: eeh configured for %s %s\n", dev->slot_name, dev->name);
-		if (eeh_set_option(dev, EEH_ENABLE) != 0) {
-			printk("PCI: failed to enable eeh for %s %s\n", dev->slot_name, dev->name);
+	if (is_eeh_implemented()) {
+		if (is_eeh_configured(dev)) {
+			eeh_disable_bit = 0;
+			if (eeh_set_option(dev, EEH_ENABLE) != 0) {
+				printk("PCI: failed to enable EEH for %s %s\n", dev->slot_name, dev->name);
+				eeh_disable_bit = EEH_TOKEN_DISABLED;
+			}
+		} else {
+			/* Assume device is by default EEH_DISABLE'd */
+			printk("PCI: eeh NOT configured for %s %s\n", dev->slot_name, dev->name);
 			eeh_disable_bit = EEH_TOKEN_DISABLED;
 		}
-	} else {
-		/* Assume device is by default EEH_DISABLE'd */
-		printk("PCI: eeh NOT configured for %s %s\n", dev->slot_name, dev->name);
-		eeh_disable_bit = EEH_TOKEN_DISABLED;
 	}
-#endif
 
 	PPCDBG(PPCDBG_PHBINIT, "fixup_resources:\n"); 
 	PPCDBG(PPCDBG_PHBINIT, "\tphb                 = 0x%016LX\n", phb); 
@@ -659,19 +660,19 @@
 		}
 
 		if (dev->resource[i].flags & IORESOURCE_IO) {
-#ifdef CONFIG_PPC_EEH
-			unsigned int busno = dev->bus ? dev->bus->number : 0;
-			unsigned long size = dev->resource[i].end - dev->resource[i].start;
-			unsigned long addr = (unsigned long)__ioremap(dev->resource[i].start + phb->io_base_phys, size, _PAGE_NO_CACHE);
-			if (!addr)
-				panic("fixup_resources: ioremap failed!\n");
-			dev->resource[i].start = eeh_token(phb->global_number, busno, dev->devfn, addr) | eeh_disable_bit;
-			dev->resource[i].end = dev->resource[i].start + size;
-#else
-			unsigned long offset = (unsigned long)phb->io_base_virt;
-			dev->resource[i].start += offset;
-			dev->resource[i].end += offset;
-#endif
+			if (is_eeh_implemented()) {
+				unsigned int busno = dev->bus ? dev->bus->number : 0;
+				unsigned long size = dev->resource[i].end - dev->resource[i].start;
+				unsigned long addr = (unsigned long)__ioremap(dev->resource[i].start + phb->io_base_phys, size, _PAGE_NO_CACHE);
+				if (!addr)
+					panic("fixup_resources: ioremap failed!\n");
+				dev->resource[i].start = eeh_token(phb->global_number, busno, dev->devfn, addr) | eeh_disable_bit;
+				dev->resource[i].end = dev->resource[i].start + size;
+			} else {
+				unsigned long offset = (unsigned long)phb->io_base_virt;
+				dev->resource[i].start += offset;
+				dev->resource[i].end += offset;
+			}
 			PPCDBG(PPCDBG_PHBINIT, "\t\t-> now [%lx .. %lx]\n",
 			       dev->resource[i].start, dev->resource[i].end);
 		} else if (dev->resource[i].flags & IORESOURCE_MEM) {
@@ -679,18 +680,18 @@
 				/* Bogus.  Probably an unused bridge. */
 				dev->resource[i].end = 0;
 			} else {
-#ifdef CONFIG_PPC_EEH
-				unsigned int busno = dev->bus ? dev->bus->number : 0;
-				unsigned long size = dev->resource[i].end - dev->resource[i].start;
-				unsigned long addr = (unsigned long)__ioremap(dev->resource[i].start + phb->pci_mem_offset, size, _PAGE_NO_CACHE);
-				if (!addr)
-					panic("fixup_resources: ioremap failed!\n");
-				dev->resource[i].start = eeh_token(phb->global_number, busno, dev->devfn, addr) | eeh_disable_bit;
-				dev->resource[i].end = dev->resource[i].start + size;
-#else
-				dev->resource[i].start += phb->pci_mem_offset;
-				dev->resource[i].end += phb->pci_mem_offset;
-#endif
+				if (is_eeh_implemented()) {
+					unsigned int busno = dev->bus ? dev->bus->number : 0;
+					unsigned long size = dev->resource[i].end - dev->resource[i].start;
+					unsigned long addr = (unsigned long)__ioremap(dev->resource[i].start + phb->pci_mem_offset, size, _PAGE_NO_CACHE);
+					if (!addr)
+						panic("fixup_resources: ioremap failed!\n");
+					dev->resource[i].start = eeh_token(phb->global_number, busno, dev->devfn, addr) | eeh_disable_bit;
+					dev->resource[i].end = dev->resource[i].start + size;
+				} else {
+					dev->resource[i].start += phb->pci_mem_offset;
+					dev->resource[i].end += phb->pci_mem_offset;
+				}
 			}
 			PPCDBG(PPCDBG_PHBINIT, "\t\t-> now [%lx..%lx]\n",
 			       dev->resource[i].start, dev->resource[i].end);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pacaData.c linuxppc64_2_4/arch/ppc64/kernel/pacaData.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pacaData.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/pacaData.c	Tue Apr  9 11:23:18 2002
@@ -7,7 +7,7 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#define __KERNEL__ 1
+#define __KERNEL__
 #include <asm/types.h>
 #include <asm/page.h>
 #include <stddef.h>
@@ -17,8 +17,10 @@
 #include <asm/ptrace.h>
 
 #include <asm/iSeries/ItLpPaca.h>
-#include <asm/Paca.h>
+#include <asm/naca.h>
+#include <asm/paca.h>
 
+struct naca_struct *naca;
 
 /* The Paca is an array with one entry per processor.  Each contains an 
  * ItLpPaca, which contains the information shared between the 
@@ -32,8 +34,8 @@
  */
 #define PACAINITDATA(number,start,lpq,asrr,asrv) \
 {                                                                          \
-        xLpPacaPtr: &xPaca[number].xLpPaca,                                 \
-        xLpRegSavePtr: &xPaca[number].xRegSav,                              \
+        xLpPacaPtr: &paca[number].xLpPaca,                                 \
+        xLpRegSavePtr: &paca[number].xRegSav,                              \
         xPacaIndex: (number),           /* Paca Index        */             \
         default_decr: 0x00ff0000,       /* Initial Decr      */             \
         xStab_data: {                                                       \
@@ -42,9 +44,9 @@
                 next_round_robin: 1     /* Round robin index */             \
         },                                                                  \
         lpQueuePtr: (lpq),              /* &xItLpQueue,                  */ \
-        xRtas: {                                                            \
+        /* xRtas: {                                                            \
                 lock: SPIN_LOCK_UNLOCKED                                    \
-        },                                                                  \
+        }, */                                                                  \
         xProcStart: (start),            /* Processor start */               \
         xLpPaca: {                                                          \
                 xDesc: 0xd397d781,      /* "LpPa"          */               \
@@ -59,10 +61,10 @@
                 xSize: sizeof(struct ItLpRegSave)                           \
         },                                                                  \
         exception_sp:                                                       \
-                (&xPaca[number].exception_stack[0]) - EXC_FRAME_SIZE,       \
+                (&paca[number].exception_stack[0]) - EXC_FRAME_SIZE,       \
 }
 
-struct Paca xPaca[maxPacas] __page_aligned = {
+struct paca_struct paca[MAX_PACAS] __page_aligned = {
 #ifdef CONFIG_PPC_ISERIES
 	PACAINITDATA( 0, 1, &xItLpQueue, 0, 0xc000000000005000),
 #else
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pci.c linuxppc64_2_4/arch/ppc64/kernel/pci.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pci.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/pci.c	Wed Apr 17 08:05:39 2002
@@ -30,12 +30,10 @@
 #include <asm/uaccess.h>
 #include <asm/flight_recorder.h>
 #include <asm/ppcdebug.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 #include <asm/pci_dma.h>
 #include <asm/machdep.h>
-#ifdef CONFIG_PPC_EEH
 #include <asm/eeh.h>
-#endif
 
 #include "pci.h"
 
@@ -61,11 +59,7 @@
 void   iSeries_pcibios_init(void);
 void   pSeries_pcibios_init(void);
 
-
-extern struct Naca *naca;
-
-int pci_assign_all_busses = 0;
-
+int    pci_assign_all_busses = 0;
 struct pci_controller* hose_head;
 struct pci_controller** hose_tail = &hose_head;
 
@@ -550,23 +544,23 @@
 				/* Transparent resource -- don't try to "fix" it. */
 				continue;
 			}
-#ifdef CONFIG_PPC_EEH
-			if (res->flags & (IORESOURCE_IO|IORESOURCE_MEM)) {
-				res->start = eeh_token(phb->global_number, bus->number, 0, 0);
-				res->end = eeh_token(phb->global_number, bus->number, 0xff, 0xffffffff);
-			}
-#else
-			if (res->flags & IORESOURCE_IO) {
-				res->start += (unsigned long)phb->io_base_virt;
-				res->end += (unsigned long)phb->io_base_virt;
-			} else if (phb->pci_mem_offset
-				   && (res->flags & IORESOURCE_MEM)) {
-				if (res->start < phb->pci_mem_offset) {
-					res->start += phb->pci_mem_offset;
-					res->end += phb->pci_mem_offset;
+			if (is_eeh_implemented()) {
+				if (res->flags & (IORESOURCE_IO|IORESOURCE_MEM)) {
+					res->start = eeh_token(phb->global_number, bus->number, 0, 0);
+					res->end = eeh_token(phb->global_number, bus->number, 0xff, 0xffffffff);
+				}
+			} else {
+				if (res->flags & IORESOURCE_IO) {
+					res->start += (unsigned long)phb->io_base_virt;
+					res->end += (unsigned long)phb->io_base_virt;
+				} else if (phb->pci_mem_offset
+					   && (res->flags & IORESOURCE_MEM)) {
+					if (res->start < phb->pci_mem_offset) {
+						res->start += phb->pci_mem_offset;
+						res->end += phb->pci_mem_offset;
+					}
 				}
 			}
-#endif
 		}
 	}
 #endif	
@@ -759,8 +753,7 @@
 
 	/* XXX would be nice to have a way to ask for write-through */
 	prot |= _PAGE_NO_CACHE;
-	if (!write_combine)
-		prot |= _PAGE_GUARDED;
+	prot |= _PAGE_GUARDED;
 	vma->vm_page_prot = __pgprot(prot);
 }
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pci_dma.c linuxppc64_2_4/arch/ppc64/kernel/pci_dma.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pci_dma.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/pci_dma.c	Fri Apr 19 13:38:07 2002
@@ -146,7 +146,7 @@
 		dev = ppc64_isabridge_dev;
 	if (!dev)
 		return NULL;
-	if ( _machine == _MACH_iSeries ) {
+	if (naca->platform == PLATFORM_ISERIES_LPAR) {
  		return ISERIES_DEVNODE(dev)->DevTceTable;
 	} else {
 		return PCI_GET_DN(dev)->tce_table;
@@ -213,8 +213,6 @@
 	tce_addr = ((union Tce *)tbl->base) + tcenum;
 	*tce_addr = (union Tce)tce.wholeTce;
 
-	/* Make sure the update is visible to hardware. */
-	__asm__ __volatile__ ("sync" : : : "memory");
 }
 
 /* 
@@ -427,7 +425,7 @@
 	unsigned char  * map, * bytep;
 
 	if (order >= NUM_TCE_LEVELS) {
-		panic("PCI_DMA: free_tce_range: invalid order: %d\n",order);
+		panic("PCI_DMA: free_tce_range: invalid order: 0x%x\n",order);
 		return;
 	}
 
@@ -475,7 +473,8 @@
 	 *      we are freeing the last block we can't buddy up
 	 * Don't buddy up if it's in the first 1/4 of the level
 	 */
-	if (( block > (tbl->mlbm.level[order].numBits/4) ) &&
+	if (( order < tbl->mlbm.maxLevel ) &&
+	    ( block > (tbl->mlbm.level[order].numBits/4) ) &&
 	    (( block < tbl->mlbm.level[order].numBits-1 ) ||
 	      ( 0 == ( tbl->mlbm.level[order].numBits & 1)))) {
 		/* See if we can buddy up the block we just freed */
@@ -549,6 +548,11 @@
 			++tcenum;
 			uaddr += PAGE_SIZE;
 		}
+		/* Make sure the update is visible to hardware. 
+		   sync required to synchronize the update to 
+		   the TCE table with the MMIO that will send
+		   the bus address to the IOA */
+		__asm__ __volatile__ ("sync" : : : "memory");
 	}
 	else {
 		panic("PCI_DMA: Tce Allocation failure in get_tces. 0x%p\n",tbl);
@@ -557,88 +561,65 @@
 	return retTce; 
 }
 
-static void tce_free_iSeries(struct TceTable *tbl, dma_addr_t dma_addr, 
-			     unsigned order, unsigned numPages)
+static void tce_free_one_iSeries( struct TceTable *tbl, long tcenum )
 {
-	u64 setTceRc;
-	long tcenum, freeTce, maxTcenum;
-	unsigned i;
+	u64 set_tce_rc;
 	union Tce tce;
+	tce.wholeTce = 0;
+	set_tce_rc = HvCallXm_setTce((u64)tbl->index,
+				   (u64)tcenum,
+				   tce.wholeTce);
+	if ( set_tce_rc ) 
+		panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", set_tce_rc);
 
-	maxTcenum = (tbl->size * (PAGE_SIZE / sizeof(union Tce))) - 1;
-	
-	tcenum = dma_addr >> PAGE_SHIFT;
+}
 
-	freeTce = tcenum - tbl->startOffset;
+static void tce_free_one_pSeries( struct TceTable *tbl, long tcenum )
+{
+	union Tce tce;
+	union Tce *tce_addr;
 
-	if ( freeTce > maxTcenum ) {
-		PPCDBG(PPCDBG_TCE, "free_tces: tcenum > maxTcenum\n");
-		PPCDBG(PPCDBG_TCE, "\ttcenum    = 0x%lx\n", tcenum); 
-		PPCDBG(PPCDBG_TCE, "\tmaxTcenum = 0x%lx\n", maxTcenum); 
-		PPCDBG(PPCDBG_TCE, "\tTCE Table = 0x%lx\n", (u64)tbl);
-		PPCDBG(PPCDBG_TCE, "\tbus#      = 0x%lx\n", (u64)tbl->busNumber );
-		PPCDBG(PPCDBG_TCE, "\tsize      = 0x%lx\n", (u64)tbl->size);
-		PPCDBG(PPCDBG_TCE, "\tstartOff  = 0x%lx\n", (u64)tbl->startOffset );
-		PPCDBG(PPCDBG_TCE, "\tindex     = 0x%lx\n", (u64)tbl->index);
-		return;
-	}
-	
-	for (i=0; i<numPages; ++i) {
-		tce.wholeTce = 0;
-		setTceRc = HvCallXm_setTce((u64)tbl->index, 
-					   (u64)tcenum, 
-					   tce.wholeTce );
+	tce.wholeTce = 0;
+
+	tce_addr  = ((union Tce *)tbl->base) + tcenum;
+	*tce_addr = (union Tce)tce.wholeTce;
 
-		if ( setTceRc ) {
-			panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", setTceRc);
-		}
-		++tcenum;
-	}
-	free_tce_range( tbl, freeTce, order );
 }
 
-static void tce_free_pSeries(struct TceTable *tbl, dma_addr_t dma_addr, 
-			     unsigned order, unsigned numPages)
+static void tce_free(struct TceTable *tbl, dma_addr_t dma_addr, 
+			     unsigned order, unsigned num_pages)
 {
-	long tcenum, freeTce, maxTcenum;
+	long tcenum, total_tces, free_tce;
 	unsigned i;
-	union Tce tce;
-	union Tce *tce_addr;
 
-	maxTcenum = (tbl->size * (PAGE_SIZE / sizeof(union Tce))) - 1;
+	total_tces = (tbl->size * (PAGE_SIZE / sizeof(union Tce)));
 	
 	tcenum = dma_addr >> PAGE_SHIFT;
-	// tcenum -= tbl->startOffset;
+	free_tce = tcenum - tbl->startOffset;
 
-	freeTce = tcenum - tbl->startOffset;
-
-	if ( freeTce > maxTcenum ) {
-		PPCDBG(PPCDBG_TCE, "free_tces: tcenum > maxTcenum\n");
-		PPCDBG(PPCDBG_TCE, "\ttcenum    = 0x%lx\n", tcenum); 
-		PPCDBG(PPCDBG_TCE, "\tmaxTcenum = 0x%lx\n", maxTcenum); 
-		PPCDBG(PPCDBG_TCE, "\tTCE Table = 0x%lx\n", (u64)tbl);
-		PPCDBG(PPCDBG_TCE, "\tbus#      = 0x%lx\n", 
-		       (u64)tbl->busNumber );
-		PPCDBG(PPCDBG_TCE, "\tsize      = 0x%lx\n", (u64)tbl->size);
-		PPCDBG(PPCDBG_TCE, "\tstartOff  = 0x%lx\n", 
-		       (u64)tbl->startOffset );
-		PPCDBG(PPCDBG_TCE, "\tindex     = 0x%lx\n", (u64)tbl->index);
+	if ( ( (free_tce + num_pages) > total_tces ) ||
+	     ( tcenum < tbl->startOffset ) ) {
+		printk("tce_free: invalid tcenum\n");
+		printk("\ttcenum    = 0x%lx\n", tcenum); 
+		printk("\tTCE Table = 0x%lx\n", (u64)tbl);
+		printk("\tbus#      = 0x%lx\n", (u64)tbl->busNumber );
+		printk("\tsize      = 0x%lx\n", (u64)tbl->size);
+		printk("\tstartOff  = 0x%lx\n", (u64)tbl->startOffset );
+		printk("\tindex     = 0x%lx\n", (u64)tbl->index);
 		return;
 	}
 	
-	for (i=0; i<numPages; ++i) {
-		tce.wholeTce = 0;
-
-		tce_addr  = ((union Tce *)tbl->base) + tcenum;
-		*tce_addr = (union Tce)tce.wholeTce;
-
+	for (i=0; i<num_pages; ++i) {
+		ppc_md.tce_free_one(tbl, tcenum);
 		++tcenum;
 	}
 
-	/* Make sure the update is visible to hardware. */
-	__asm__ __volatile__ ("sync" : : : "memory");
+	/* No sync (to make TCE change visible) is required here.
+	   The lwsync when acquiring the lock in free_tce_range
+	   is sufficient to synchronize with the bitmap.
+	*/
 
-	free_tce_range( tbl, freeTce, order );
+	free_tce_range( tbl, free_tce, order );
 }
 
 void __init create_virtual_bus_tce_table(void)
@@ -754,7 +735,7 @@
 	struct pci_dev *dev;
 	struct device_node *dn, *mydn;
 
-	if (_machine == _MACH_pSeriesLP) {
+	if (naca->platform == PLATFORM_PSERIES_LPAR) {
 		create_tce_tables_for_busesLP(&pci_root_buses);
 	}
 	else {
@@ -795,7 +776,7 @@
  	/* - Tce Table Share between buses,                              */
  	/* - Tce Table per logical slot.                                 */
 	/*****************************************************************/
-	if(_machine == _MACH_iSeries) {
+	if(naca->platform == PLATFORM_ISERIES_LPAR) {
 
 		struct iSeries_Device_Node* DevNode = (struct iSeries_Device_Node*)token;
 		getTceTableParmsiSeries(DevNode,newTceTable);
@@ -819,7 +800,7 @@
 
 		dn = (struct device_node *)token;
 		phb = dn->phb;
-		if (_machine == _MACH_pSeries)
+		if (naca->platform == PLATFORM_PSERIES)
 			getTceTableParmsPSeries(phb, dn, newTceTable);
 		else
 			getTceTableParmsPSeriesLP(phb, dn, newTceTable);
@@ -890,6 +871,11 @@
 	       pciBusTceTableParms->startOffset,
 	       pciBusTceTableParms->size);
 
+	if(pciBusTceTableParms->size == 0) {
+		printk("PCI_DMA: Possible Structure mismatch, 0x%p\n",pciBusTceTableParms);
+		panic( "PCI_DMA: pciBusTceTableParms->size is zero, halt here!");
+	}
+
 	newTceTable->size        = pciBusTceTableParms->size;
 	newTceTable->busNumber   = pciBusTceTableParms->busNumber;
 	newTceTable->startOffset = pciBusTceTableParms->startOffset;
@@ -1026,6 +1012,13 @@
 	order = get_order(size);
 	nPages = 1 << order;
 
+ 	/* Client asked for way to much space.  This is checked later anyway */
+	/* It is easier to debug here for the drivers than in the tce tables.*/
+ 	if(order >= NUM_TCE_LEVELS) {
+ 		printk("PCI_DMA: pci_alloc_consistent size to large: 0x%lx \n",size);
+ 		return (void *)NO_TCE;
+ 	}
+
 	tbl = get_tce_table(hwdev); 
 
 	if ( tbl ) {
@@ -1068,14 +1061,17 @@
 	order = get_order(size);
 	nPages = 1 << order;
 
-	if ( order > 10 )
-		PPCDBG(PPCDBG_TCE, "pci_free_consistent: order=%d, size=%d, nPages=%d, dma_handle=%016lx, vaddr=%016lx\n",
-			order, size, nPages, (unsigned long)dma_handle, (unsigned long)vaddr );
+ 	/* Client asked for way to much space.  This is checked later anyway */
+	/* It is easier to debug here for the drivers than in the tce tables.*/
+ 	if(order >= NUM_TCE_LEVELS) {
+ 		printk("PCI_DMA: pci_free_consistent size to large: 0x%lx \n",size);
+ 		return;
+ 	}
 	
 	tbl = get_tce_table(hwdev); 
 
 	if ( tbl ) {
-		ppc_md.tce_free(tbl, dma_handle, order, nPages);
+		tce_free(tbl, dma_handle, order, nPages);
 		free_pages( (unsigned long)vaddr, order );
 	}
 }
@@ -1104,6 +1100,13 @@
 	order = get_order( nPages & PAGE_MASK );
 	nPages >>= PAGE_SHIFT;
 	
+ 	/* Client asked for way to much space.  This is checked later anyway */
+	/* It is easier to debug here for the drivers than in the tce tables.*/
+ 	if(order >= NUM_TCE_LEVELS) {
+ 		printk("PCI_DMA: pci_map_single size to large: 0x%lx \n",size);
+ 		return NO_TCE;
+ 	}
+
 	tbl = get_tce_table(hwdev); 
 
 	if ( tbl ) {
@@ -1128,14 +1131,17 @@
 	order = get_order( nPages & PAGE_MASK );
 	nPages >>= PAGE_SHIFT;
 
-	if ( order > 10 )
-		PPCDBG(PPCDBG_TCE, "pci_unmap_single: order=%d, size=%d, nPages=%d, dma_handle=%016lx\n",
-			order, size, nPages, (unsigned long)dma_handle );
+ 	/* Client asked for way to much space.  This is checked later anyway */
+	/* It is easier to debug here for the drivers than in the tce tables.*/
+ 	if(order >= NUM_TCE_LEVELS) {
+ 		printk("PCI_DMA: pci_unmap_single size to large: 0x%lx \n",size);
+ 		return;
+ 	}
 	
 	tbl = get_tce_table(hwdev); 
 
 	if ( tbl ) 
-		ppc_md.tce_free(tbl, dma_handle, order, nPages);
+		tce_free(tbl, dma_handle, order, nPages);
 
 }
 
@@ -1283,6 +1289,13 @@
 	dmaAddr = NO_TCE;
 
 	order = get_order( numTces << PAGE_SHIFT );
+ 	/* Client asked for way to much space.  This is checked later anyway */
+	/* It is easier to debug here for the drivers than in the tce tables.*/
+ 	if(order >= NUM_TCE_LEVELS) {
+		printk("PCI_DMA: create_tces_sg size to large: 0x%x \n",(numTces << PAGE_SHIFT));
+ 		return NO_TCE;
+ 	}
+
 	/* allocate a block of tces */
 	tcenum = alloc_tce_range( tbl, order );
 	if ( tcenum != -1 ) {
@@ -1313,10 +1326,16 @@
 			  ++tcenum;
 			  uaddr += PAGE_SIZE;
 			}
-			
+		
 			prevEndPage = endPage;
 			sg++;
 		}
+		/* Make sure the update is visible to hardware. 
+		   sync required to synchronize the update to 
+		   the TCE table with the MMIO that will send
+		   the bus address to the IOA */
+		__asm__ __volatile__ ("sync" : : : "memory");
+
 		if ((tcenum - starttcenum) != numTces)
 	    		PPCDBG(PPCDBG_TCE, "create_tces_sg: numTces %d, tces used %d\n",
 		   		numTces, (unsigned)(tcenum - starttcenum));
@@ -1386,14 +1405,17 @@
 	numTces = ((dma_end_page - dma_start_page ) >> PAGE_SHIFT) + 1;
 	order = get_order( numTces << PAGE_SHIFT );
 
-	if ( order > 10 )
-		PPCDBG(PPCDBG_TCE, "pci_unmap_sg: order=%d, numTces=%d, nelms=%d, dma_start_page=%016lx, dma_end_page=%016lx\n",
-			order, numTces, nelms, (unsigned long)dma_start_page, (unsigned long)dma_end_page );
+ 	/* Client asked for way to much space.  This is checked later anyway */
+	/* It is easier to debug here for the drivers than in the tce tables.*/
+ 	if(order >= NUM_TCE_LEVELS) {
+		printk("PCI_DMA: pci_unmap_sg size to large: 0x%x \n",(numTces << PAGE_SHIFT));
+ 		return;
+ 	}
 	
 	tbl = get_tce_table(hwdev); 
 
 	if ( tbl ) 
-		ppc_md.tce_free( tbl, dma_start_page, order, numTces );
+		tce_free( tbl, dma_start_page, order, numTces );
 
 }
 
@@ -1464,11 +1486,11 @@
 void tce_init_pSeries(void)
 {
 	ppc_md.tce_build = tce_build_pSeries;
-	ppc_md.tce_free  = tce_free_pSeries;
+	ppc_md.tce_free_one = tce_free_one_pSeries;
 }
 
 void tce_init_iSeries(void)
 {
 	ppc_md.tce_build = tce_build_iSeries;
-	ppc_md.tce_free  = tce_free_iSeries;
+	ppc_md.tce_free_one = tce_free_one_iSeries;
 }
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pci_dn.c linuxppc64_2_4/arch/ppc64/kernel/pci_dn.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pci_dn.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/pci_dn.c	Tue Apr  9 11:23:18 2002
@@ -26,7 +26,6 @@
 #include <linux/delay.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/ide.h>
 #include <linux/bootmem.h>
 
 #include <asm/io.h>
@@ -37,7 +36,7 @@
 #include <asm/init.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppcdebug.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 #include <asm/pci_dma.h>
 
 #include "pci.h"
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pmc.c linuxppc64_2_4/arch/ppc64/kernel/pmc.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/pmc.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/pmc.c	Wed Apr 17 11:07:51 2002
@@ -19,22 +19,27 @@
 
 /* Change Activity:
  * 2001/06/05 : engebret : Created.
+ * 2002/04/11 : engebret : Add btmalloc code.
  * End Change Activity 
  */
 
 #include <asm/proc_fs.h>
-#include <asm/Paca.h>
+#include <asm/paca.h>
 #include <asm/iSeries/ItLpPaca.h>
 #include <asm/iSeries/ItLpQueue.h>
 #include <asm/processor.h>
 
 #include <linux/proc_fs.h>
 #include <linux/spinlock.h>
+#include <linux/malloc.h>
+#include <linux/vmalloc.h>
 #include <asm/pmc.h>
 #include <asm/uaccess.h>
-#include <asm/Naca.h>
-
-extern struct Naca *naca;
+#include <asm/naca.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/mmu_context.h>
+#include <asm/ppcdebug.h>
 
 struct _pmc_sw pmc_sw_system = {
 	0  
@@ -51,6 +56,19 @@
 struct _pmc_sw_text pmc_sw_text;
 struct _pmc_hw_text pmc_hw_text;
 
+extern pte_t *find_linux_pte( pgd_t * pgdir, unsigned long ea );
+extern pgd_t *bolted_pgd;
+
+static struct vm_struct *get_btm_area(unsigned long size, unsigned long flags);
+static int local_free_bolted_pages(unsigned long ea, unsigned long num);
+
+extern pgd_t bolted_dir[];
+pgd_t *bolted_pgd  = (pgd_t *)&bolted_dir;
+
+struct vm_struct *btmlist = NULL;
+struct mm_struct btmalloc_mm = {pgd             : bolted_dir,
+                                page_table_lock : SPIN_LOCK_UNLOCKED};
+
 char *
 ppc64_pmc_stab(int file)
 {
@@ -164,4 +182,166 @@
 	}
 
 	return(pmc_hw_text.buffer); 
+}
+
+/*
+ * Manage allocations of storage which is bolted in the HPT and low fault
+ * overhead in the segment tables. Intended to be used for buffers used 
+ * to collect performance data.  
+ *
+ * Remaining Issues:
+ *   - Power4 is not tested at all, 0xB regions will always be castout of slb
+ *   - On Power3, 0xB00000000 esid is left in the stab for all time,
+ *     other 0xB segments are castout, but not explicitly removed.
+ *   - Error path checking is weak at best, wrong at worst.
+ *
+ * btmalloc - Allocate a buffer which is bolted in the HPT and (eventually)
+ *            the segment table.
+ *
+ * Input : unsigned long size: bytes of storage to allocate.
+ * Return: void * : pointer to the kernal address of the buffer.
+ */
+void* btmalloc (unsigned long size) {
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	unsigned long ea_base, ea;
+	struct vm_struct *area;
+	unsigned long pa, pg_count, page, vsid;
+  
+	size = PAGE_ALIGN(size);
+	if (!size || (size >> PAGE_SHIFT) > num_physpages) return NULL;
+
+	spin_lock(&btmalloc_mm.page_table_lock);
+
+	/* Get a virtual address region in the bolted space */
+	area = get_btm_area(size, 0);
+	if (!area) {
+		spin_unlock(&btmalloc_mm.page_table_lock);
+		return NULL;
+	}
+
+	ea_base = (unsigned long) area->addr;
+	pg_count = (size >> PAGE_SHIFT);
+
+	/* Create a Linux page table entry and an HPTE for each page */
+	for(page = 0; page < pg_count; page++) {
+		pa = get_free_page(GFP_KERNEL) - PAGE_OFFSET; 
+		ea = ea_base + (page * PAGE_SIZE);
+
+		/* Get a pointer to the linux page table entry for this page
+		 * allocating pmd or pte pages along the way as needed.  Note
+		 * that the pmd & pte pages are not themselfs bolted.
+		 */
+		pgdp = pgd_offset_b(ea);
+		pmdp = pmd_alloc(&btmalloc_mm, pgdp, ea);
+		ptep = pte_alloc(&btmalloc_mm, pmdp, ea);
+
+		/* Clear any old hpte and set the new linux pte */
+		set_pte(ptep, mk_pte_phys(pa & PAGE_MASK, PAGE_KERNEL));
+
+		vsid = get_kernel_vsid(ea);
+		build_valid_hpte(vsid, ea, pa, ptep, 
+				  _PAGE_ACCESSED|_PAGE_COHERENT|PP_RWXX, 1);
+	}
+
+	spin_unlock(&btmalloc_mm.page_table_lock);
+	return (void*)ea_base;
+}
+
+/*
+ * Free a range of bolted pages that were allocated with btmalloc
+ */
+void btfree(void *ea) {
+	struct vm_struct **p, *tmp;
+	unsigned long size = 0;
+
+	if ((!ea) || ((PAGE_SIZE-1) & (unsigned long)ea)) {
+		printk(KERN_ERR "Trying to btfree() bad address (%p)\n", ea);
+		return;
+	}
+
+	spin_lock(&btmalloc_mm.page_table_lock);
+
+	/* Scan the bolted memory list for an entry matching
+	 * the address to be freed, get the size (in bytes)
+	 * and free the entry.  The list lock is not dropped
+	 * until the page table entries are removed.
+	 */
+	for(p = &btmlist; (tmp = *p); p = &tmp->next ) {
+		if ( tmp->addr == ea ) {
+			size = tmp->size;
+			break;
+		}
+	}
+
+	/* If no entry found, it is an error */
+	if ( !size ) {
+		printk(KERN_ERR "Trying to btfree() bad address (%p)\n", ea);
+		spin_unlock(&btmalloc_mm.page_table_lock);
+		return;
+	}
+
+	/* Free up the bolted pages and remove the page table entries */
+	if(local_free_bolted_pages((unsigned long)ea, size >> PAGE_SHIFT)) {
+		*p = tmp->next;
+		kfree(tmp);
+	}
+
+	spin_unlock(&btmalloc_mm.page_table_lock);
+}
+
+static int local_free_bolted_pages(unsigned long ea, unsigned long num) {
+	int i;
+	pte_t pte;
+
+	for(i=0; i<num; i++) {
+		pte_t *ptep = find_linux_pte(bolted_pgd, ea);
+		if(!ptep) {
+			panic("free_bolted_pages - page being freed "
+			      "(0x%lx) is not bolted", ea );
+		}
+		pte = *ptep;
+		pte_clear(ptep);
+		__free_pages(pte_page(pte), 0);
+		flush_hash_page(0, ea, ptep); 
+		ea += PAGE_SIZE;
+	}
+	return 1;
+}
+
+/*
+ * get_btm_area
+ *
+ * Get a virtual region in the bolted space
+ */
+static struct vm_struct *get_btm_area(unsigned long size, 
+				      unsigned long flags) {
+	unsigned long addr;
+	struct vm_struct **p, *tmp, *area;
+  
+	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
+	if (!area) return NULL;
+
+	addr = BTMALLOC_START;
+	for (p = &btmlist; (tmp = *p) ; p = &tmp->next) {
+		if (size + addr < (unsigned long) tmp->addr)
+			break;
+		addr = tmp->size + (unsigned long) tmp->addr;
+		if (addr + size > BTMALLOC_END) {
+			kfree(area);
+			return NULL;
+		}
+	}
+
+	if (addr + size > BTMALLOC_END) {
+		kfree(area);
+		return NULL;
+	}
+	area->flags = flags;
+	area->addr = (void *)addr;
+	area->size = size;
+	area->next = *p;
+	*p = area;
+	return area;
 }
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/ppc_asm.h linuxppc64_2_4/arch/ppc64/kernel/ppc_asm.h
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/ppc_asm.h	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/ppc_asm.h	Wed Apr 10 12:22:27 2002
@@ -1,5 +1,5 @@
 /*
- * arch/ppc/kernel/ppc_asm.h
+ * arch/ppc64/kernel/ppc_asm.h
  *
  * Definitions used by various bits of low-level assembly code on PowerPC.
  *
@@ -13,7 +13,7 @@
 
 #include <linux/config.h>
 
-#include "ppc_asm.tmpl"
+#include <asm/ppc_asm.tmpl>
 #include "ppc_defs.h"
 
 /*
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/ppc_asm.tmpl linuxppc64_2_4/arch/ppc64/kernel/ppc_asm.tmpl
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/ppc_asm.tmpl	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/ppc_asm.tmpl	Wed Dec 31 18:00:00 1969
@@ -1,115 +0,0 @@
-/* Condition Register Bit Fields */
-
-#define	cr0	0
-#define	cr1	1
-#define	cr2	2
-#define	cr3	3
-#define	cr4	4
-#define	cr5	5
-#define	cr6	6
-#define	cr7	7
-
-
-/* General Purpose Registers (GPRs) */
-
-#define	r0	0
-#define	r1	1
-#define	r2	2
-#define	r3	3
-#define	r4	4
-#define	r5	5
-#define	r6	6
-#define	r7	7
-#define	r8	8
-#define	r9	9
-#define	r10	10
-#define	r11	11
-#define	r12	12
-#define	r13	13
-#define	r14	14
-#define	r15	15
-#define	r16	16
-#define	r17	17
-#define	r18	18
-#define	r19	19
-#define	r20	20
-#define	r21	21
-#define	r22	22
-#define	r23	23
-#define	r24	24
-#define	r25	25
-#define	r26	26
-#define	r27	27
-#define	r28	28
-#define	r29	29
-#define	r30	30
-#define	r31	31
-
-
-/* Floating Point Registers (FPRs) */
-
-#define	fr0	0
-#define	fr1	1
-#define	fr2	2
-#define	fr3	3
-#define	fr4	4
-#define	fr5	5
-#define	fr6	6
-#define	fr7	7
-#define	fr8	8
-#define	fr9	9
-#define	fr10	10
-#define	fr11	11
-#define	fr12	12
-#define	fr13	13
-#define	fr14	14
-#define	fr15	15
-#define	fr16	16
-#define	fr17	17
-#define	fr18	18
-#define	fr19	19
-#define	fr20	20
-#define	fr21	21
-#define	fr22	22
-#define	fr23	23
-#define	fr24	24
-#define	fr25	25
-#define	fr26	26
-#define	fr27	27
-#define	fr28	28
-#define	fr29	29
-#define	fr30	30
-#define	fr31	31
-
-#define	vr0	0
-#define	vr1	1
-#define	vr2	2
-#define	vr3	3
-#define	vr4	4
-#define	vr5	5
-#define	vr6	6
-#define	vr7	7
-#define	vr8	8
-#define	vr9	9
-#define	vr10	10
-#define	vr11	11
-#define	vr12	12
-#define	vr13	13
-#define	vr14	14
-#define	vr15	15
-#define	vr16	16
-#define	vr17	17
-#define	vr18	18
-#define	vr19	19
-#define	vr20	20
-#define	vr21	21
-#define	vr22	22
-#define	vr23	23
-#define	vr24	24
-#define	vr25	25
-#define	vr26	26
-#define	vr27	27
-#define	vr28	28
-#define	vr29	29
-#define	vr30	30
-#define	vr31	31
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/ppc_ksyms.c linuxppc64_2_4/arch/ppc64/kernel/ppc_ksyms.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/ppc_ksyms.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/ppc_ksyms.c	Thu Apr 18 09:36:37 2002
@@ -15,7 +15,6 @@
 #include <linux/string.h>
 #include <linux/interrupt.h>
 #include <linux/vt_kern.h>
-#include <linux/nvram.h>
 #include <linux/spinlock.h>
 #include <linux/console.h>
 #include <linux/irq.h>
@@ -33,9 +32,6 @@
 #include <asm/bitops.h>
 #include <asm/checksum.h>
 #include <asm/pgtable.h>
-#include <linux/adb.h>
-#include <linux/cuda.h>
-#include <linux/pmu.h>
 #include <asm/prom.h>
 #include <asm/system.h>
 #include <asm/pci-bridge.h>
@@ -74,7 +70,6 @@
 long long __ashldi3(long long, int);
 long long __lshrdi3(long long, int);
 int abs(int);
-extern unsigned long ret_to_user_hook;
 
 extern struct pci_dev * iSeries_veth_dev;
 extern struct pci_dev * iSeries_vio_dev;
@@ -199,11 +194,10 @@
 EXPORT_SYMBOL(iSeries_Read_Byte);
 EXPORT_SYMBOL(iSeries_Write_Byte);
 #endif /* CONFIG_PPC_ISERIES */
-#ifdef CONFIG_PPC_EEH
+#ifndef CONFIG_PPC_ISERIES
 EXPORT_SYMBOL(eeh_check_failure);
 EXPORT_SYMBOL(eeh_total_mmio_ffs);
-EXPORT_SYMBOL(eeh_total_mmio_reads);
-#endif /* CONFIG_PPC_EEH */
+#endif /* CONFIG_PPC_ISERIES */
 #endif /* CONFIG_PCI */
 
 EXPORT_SYMBOL(iSeries_veth_dev);
@@ -233,9 +227,6 @@
 #endif
 #endif
 
-#ifndef CONFIG_MACH_SPECIFIC
-EXPORT_SYMBOL(_machine);
-#endif
 EXPORT_SYMBOL(ppc_md);
 
 EXPORT_SYMBOL(find_devices);
@@ -249,12 +240,7 @@
 
 #ifndef CONFIG_PPC_ISERIES
 EXPORT_SYMBOL(kd_mksound);
-EXPORT_SYMBOL_NOVERS(sys_ctrler); /* tibit */
 #endif
-#ifdef CONFIG_NVRAM
-EXPORT_SYMBOL(nvram_read_byte);
-EXPORT_SYMBOL(nvram_write_byte);
-#endif /* CONFIG_NVRAM */
 
 EXPORT_SYMBOL_NOVERS(__ashrdi3);
 EXPORT_SYMBOL_NOVERS(__ashldi3);
@@ -300,7 +286,5 @@
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(atomic_dec_and_lock);
 #endif
-
-EXPORT_SYMBOL(ret_to_user_hook);
 
 EXPORT_SYMBOL(tb_ticks_per_usec);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/proc_pmc.c linuxppc64_2_4/arch/ppc64/kernel/proc_pmc.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/proc_pmc.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/proc_pmc.c	Tue Apr  9 11:23:18 2002
@@ -26,7 +26,7 @@
  */
 
 #include <asm/proc_fs.h>
-#include <asm/Paca.h>
+#include <asm/paca.h>
 #include <asm/iSeries/ItLpPaca.h>
 #include <asm/iSeries/ItLpQueue.h>
 #include <asm/iSeries/HvCallXm.h>
@@ -40,7 +40,7 @@
 #include <linux/spinlock.h>
 #include <asm/pmc.h>
 #include <asm/uaccess.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 
 /* pci Flight Recorder AHT */
 extern void proc_pciFr_init(struct proc_dir_entry *proc_ppc64_root);
@@ -53,9 +53,9 @@
 static struct proc_dir_entry *proc_ppc64_pmc_cpu_root[NR_CPUS] = {NULL, };
 
 static spinlock_t proc_ppc64_lock;
-
-extern struct Naca *naca;
-
+static int proc_ppc64_page_read(char *page, char **start, off_t off,
+				int count, int *eof, void *data);
+static void proc_ppc64_create_paca(int num, struct proc_dir_entry *paca_dir);
 int proc_ppc64_pmc_find_file(void *data);
 int proc_ppc64_pmc_read(char *page, char **start, off_t off,
 			int count, int *eof, char *buffer);
@@ -107,13 +107,18 @@
 	if (!proc_ppc64_root) return;
 	spin_unlock(&proc_ppc64_lock);
 
+	/* /proc/ppc64/naca -- raw naca contents.  Only readable to root */
+	create_proc_read_entry("naca", S_IRUSR, proc_ppc64_root, proc_ppc64_page_read, naca);
+	/* /proc/ppc64/paca/XX -- raw paca contents.  Only readable to root */
+	ent = proc_mkdir("paca", proc_ppc64_root);
+	if (ent) {
+		for (i = 0; i < naca->processorCount; i++)
+			proc_ppc64_create_paca(i, ent);
+	}
+
 	/* Create the /proc/ppc64/pcifr for the Pci Flight Recorder.	 */
 	proc_pciFr_init(proc_ppc64_root);
 
-#ifdef CONFIG_PPC_EEH
-	eeh_init_proc(proc_ppc64_root);
-#endif
-
 	proc_ppc64_pmc_root = proc_mkdir("pmc", proc_ppc64_root);
 
 	proc_ppc64_pmc_system_root = proc_mkdir("system", proc_ppc64_pmc_root);
@@ -184,6 +189,44 @@
 	}
 }
 
+/* Read a page of raw data.  "data" points to the start addr.
+ * Intended as a proc read function.
+ */
+static int proc_ppc64_page_read(char *page, char **start, off_t off,
+				int count, int *eof, void *data)
+{
+	int len = PAGE_SIZE - off;
+	char *p = (char *)data;
+
+	if (len > count)
+		len = count;
+	if (len <= 0)
+		return 0;
+	/* Rely on a "hack" in fs/proc/generic.c.
+	 * If we could return a ptr to our own data this would be
+	 * trivial (currently *start must be either an offset, or
+	 * point into the given page).
+	 */
+	memcpy(page, p+off, len);
+	*start = (char *)len;
+	return len;
+}
+
+/* NOTE: since paca data is always in flux the values will never be a consistant set.
+ * In theory it could be made consistent if we made the corresponding cpu
+ * copy the page for us (via an IPI).  Probably not worth it.
+ *
+ */
+static void proc_ppc64_create_paca(int num, struct proc_dir_entry *paca_dir)
+{
+	struct proc_dir_entry *ent;
+	struct paca_struct *lpaca = paca + num;
+	char buf[16];
+
+	sprintf(buf, "%02x", num);
+	ent = create_proc_read_entry(buf, S_IRUSR, paca_dir, proc_ppc64_page_read, lpaca);
+}
+
 /*
  * Find the requested 'file' given a proc token.
  *
@@ -392,7 +435,7 @@
 	len += sprintf( page+len, "\n  events processed by processor:\n" );
 	for (i=0; i<naca->processorCount; ++i) {
 		len += sprintf( page+len, "    CPU%02d  %10u\n",
-			i, xPaca[i].lpEvent_count );
+			i, paca[i].lpEvent_count );
 	}
 
 	return pmc_calc_metrics( page, start, off, count, eof, len );
@@ -594,7 +637,7 @@
 	proc_pmc_control_mode = PMC_CONTROL_CPI;
 	
 	/* Indicate to hypervisor that we are using the PMCs */
-	((struct Paca *)mfspr(SPRG3))->xLpPacaPtr->xPMCRegsInUse = 1;
+	get_paca()->xLpPacaPtr->xPMCRegsInUse = 1;
 
 	/* Freeze all counters */
 	mtspr( MMCR0, 0x80000000 );
@@ -645,7 +688,7 @@
 	proc_pmc_control_mode = PMC_CONTROL_TLB;
 	
 	/* Indicate to hypervisor that we are using the PMCs */
-	((struct Paca *)mfspr(SPRG3))->xLpPacaPtr->xPMCRegsInUse = 1;
+	get_paca()->xLpPacaPtr->xPMCRegsInUse = 1;
 
 	/* Freeze all counters */
 	mtspr( MMCR0, 0x80000000 );
@@ -695,9 +738,9 @@
 	v = proc_pmc_conv_int( buffer, count );
 	v = v & ~0x04000000;	/* Don't allow interrupts for now */
 	if ( v & ~0x80000000 ) 	/* Inform hypervisor we are using PMCs */
-		((struct Paca *)mfspr(SPRG3))->xLpPacaPtr->xPMCRegsInUse = 1;
+		get_paca()->xLpPacaPtr->xPMCRegsInUse = 1;
 	else
-		((struct Paca *)mfspr(SPRG3))->xLpPacaPtr->xPMCRegsInUse = 0;
+		get_paca()->xLpPacaPtr->xPMCRegsInUse = 0;
 	mtspr( MMCR0, v );
 	
 	return count;	
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/process.c linuxppc64_2_4/arch/ppc64/kernel/process.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/process.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/process.c	Tue Apr  9 11:23:18 2002
@@ -1,7 +1,5 @@
 /*
- * 
- *
- *  linux/arch/ppc/kernel/process.c
+ *  linux/arch/ppc64/kernel/process.c
  *
  *  Derived from "arch/i386/kernel/process.c"
  *    Copyright (C) 1995  Linus Torvalds
@@ -16,7 +14,6 @@
  *  modify it under the terms of the GNU General Public License
  *  as published by the Free Software Foundation; either version
  *  2 of the License, or (at your option) any later version.
- *
  */
 
 #include <linux/config.h>
@@ -47,7 +44,6 @@
 #include <asm/iSeries/HvCallHpt.h>
 
 int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs);
-extern unsigned long _get_SP(void);
 
 struct task_struct *last_task_used_math = NULL;
 static struct fs_struct init_fs = INIT_FS;
@@ -73,84 +69,6 @@
 extern char __toc_start;
 
 #undef SHOW_TASK_SWITCHES
-#undef CHECK_STACK
-
-#if defined(CHECK_STACK)
-unsigned long
-kernel_stack_top(struct task_struct *tsk)
-{
-	return ((unsigned long)tsk) + sizeof(union task_union);
-}
-
-unsigned long
-task_top(struct task_struct *tsk)
-{
-	return ((unsigned long)tsk) + sizeof(struct task_struct);
-}
-
-/* check to make sure the kernel stack is healthy */
-int check_stack(struct task_struct *tsk)
-{
-	unsigned long stack_top = kernel_stack_top(tsk);
-	unsigned long tsk_top = task_top(tsk);
-	int ret = 0;
-
-#if 0	
-	/* check thread magic */
-	if ( tsk->thread.magic != THREAD_MAGIC )
-	{
-		ret |= 1;
-		printk("thread.magic bad: %08x\n", tsk->thread.magic);
-	}
-#endif
-
-	if ( !tsk )
-		printk("check_stack(): tsk bad tsk %p\n",tsk);
-	
-	/* check if stored ksp is bad */
-	if ( (tsk->thread.ksp > stack_top) || (tsk->thread.ksp < tsk_top) )
-	{
-		printk("stack out of bounds: %s/%d\n"
-		       " tsk_top %08lx ksp %08lx stack_top %08lx\n",
-		       tsk->comm,tsk->pid,
-		       tsk_top, tsk->thread.ksp, stack_top);
-		ret |= 2;
-	}
-	
-	/* check if stack ptr RIGHT NOW is bad */
-	if ( (tsk == current) && ((_get_SP() > stack_top ) || (_get_SP() < tsk_top)) )
-	{
-		printk("current stack ptr out of bounds: %s/%d\n"
-		       " tsk_top %08lx sp %08lx stack_top %08lx\n",
-		       current->comm,current->pid,
-		       tsk_top, _get_SP(), stack_top);
-		ret |= 4;
-	}
-
-#if 0	
-	/* check amount of free stack */
-	for ( i = (unsigned long *)task_top(tsk) ; i < kernel_stack_top(tsk) ; i++ )
-	{
-		if ( !i )
-			printk("check_stack(): i = %p\n", i);
-		if ( *i != 0 )
-		{
-			/* only notify if it's less than 900 bytes */
-			if ( (i - (unsigned long *)task_top(tsk))  < 900 )
-				printk("%d bytes free on stack\n",
-				       i - task_top(tsk));
-			break;
-		}
-	}
-#endif
-
-	if (ret)
-	{
-		panic("bad kernel stack");
-	}
-	return(ret);
-}
-#endif /* defined(CHECK_STACK) */
 
 void
 enable_kernel_fp(void)
@@ -183,10 +101,6 @@
 	
 	__save_flags(s);
 	__cli();
-#if CHECK_STACK
-	check_stack(prev);
-	check_stack(new);
-#endif
 
 #ifdef SHOW_TASK_SWITCHES
 	printk("%s/%d -> %s/%d NIP %08lx cpu %d root %x/%x\n",
@@ -465,7 +379,7 @@
 
 void initialize_paca_hardware_interrupt_stack(void)
 {
-	extern struct Naca *naca;
+	extern struct naca_struct *naca;
 
 	int i;
 	unsigned long stack;
@@ -482,8 +396,8 @@
 
 
 		/* Store the stack value in the PACA for the processor */
-		xPaca[i].xHrdIntStack = stack + (8*PAGE_SIZE) - STACK_FRAME_OVERHEAD;
-		xPaca[i].xHrdIntCount = 0;
+		paca[i].xHrdIntStack = stack + (8*PAGE_SIZE) - STACK_FRAME_OVERHEAD;
+		paca[i].xHrdIntCount = 0;
 
 	}
 
@@ -496,7 +410,7 @@
 
 	for (i=0; i < naca->processorCount; i++) {
 		/* set page at the top of stack to be protected - prevent overflow */
-		end_of_stack = xPaca[i].xHrdIntStack - (8*PAGE_SIZE - STACK_FRAME_OVERHEAD);
+		end_of_stack = paca[i].xHrdIntStack - (8*PAGE_SIZE - STACK_FRAME_OVERHEAD);
 		ppc_md.hpte_updateboltedpp(PP_RXRX,end_of_stack);
 	}
 }
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/prom.c linuxppc64_2_4/arch/ppc64/kernel/prom.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/prom.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/prom.c	Mon Apr 22 16:50:19 2002
@@ -64,7 +64,7 @@
 #include <asm/mmu.h>
 #include <asm/pgtable.h>
 #include <asm/bitops.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 #include <asm/pci.h>
 #include "open_pic.h"
 #include <asm/bootinfo.h>
@@ -169,7 +169,6 @@
 extern struct rtas_t rtas;
 extern unsigned long klimit;
 extern unsigned long embedded_sysmap_end;
-extern struct Naca *naca;
 extern struct lmb lmb;
 #ifdef CONFIG_MSCHUNKS
 extern struct msChunks msChunks;
@@ -339,7 +338,7 @@
         unsigned long num_cpus = 0;
         unsigned long offset = reloc_offset();
 	struct prom_t *_prom = PTRRELOC(&prom);
-        struct Naca *_naca = RELOC(naca);
+        struct naca_struct *_naca = RELOC(naca);
 
 #ifdef DEBUG_PROM
 	prom_print(RELOC("prom_initialize_naca: start...\n"));
@@ -377,7 +376,7 @@
 				_naca->iCacheL1LogLineSize  = __ilog2(size);
 				_naca->iCacheL1LinesPerPage = PAGE_SIZE / size;
 
-				if (RELOC(_machine) == _MACH_pSeriesLP) {
+				if (_naca->platform == PLATFORM_PSERIES_LPAR) {
 					u32 pft_size[2];
 					call_prom(RELOC("getprop"), 4, 1, node, 
 						  RELOC("ibm,pft-size"),
@@ -452,7 +451,7 @@
 
 	_naca->physicalMemorySize = lmb_phys_mem_size();
 
-	if (RELOC(_machine) == _MACH_pSeries) {
+	if (_naca->platform == PLATFORM_PSERIES) {
 		unsigned long rnd_mem_size, pteg_count;
 
 		/* round mem_size up to next power of 2 */
@@ -523,8 +522,8 @@
         prom_print_hex(_naca->interrupt_controller);
         prom_print_nl();
 
-        prom_print(RELOC("_machine                   = 0x"));
-        prom_print_hex(RELOC(_machine));
+        prom_print(RELOC("naca->platform             = 0x"));
+        prom_print_hex(_naca->platform);
         prom_print_nl();
 
 	prom_print(RELOC("prom_initialize_naca: end...\n"));
@@ -627,6 +626,7 @@
 	unsigned long offset = reloc_offset();
 	struct prom_t *_prom = PTRRELOC(&prom);
 	struct rtas_t *_rtas = PTRRELOC(&rtas);
+	struct naca_struct *_naca = RELOC(naca);
 	ihandle prom_rtas;
         u32 getprop_rval;
 
@@ -643,7 +643,7 @@
 				  RELOC("ibm,hypertas-functions"), 
 				  hypertas_funcs, 
 				  sizeof(hypertas_funcs))) > 0) {
-			RELOC(_machine) = _MACH_pSeriesLP;
+			_naca->platform = PLATFORM_PSERIES_LPAR;
 		}
 
 		call_prom(RELOC("getprop"), 
@@ -748,6 +748,7 @@
 		}
 	}
 
+#ifdef CONFIG_MSCHUNKS
 	/* Now create phys -> abs mapping for IO */
 	for (i=0; i < _lmb->memory.cnt ;i++) {
 		unsigned long base = _lmb->memory.region[i].base;
@@ -763,6 +764,7 @@
 			PTRRELOC(_msChunks->abs)[pchunk++] = achunk++;
 		}
 	}
+#endif /* CONFIG_MSCHUNKS */
 
 	return mem;
 }
@@ -876,7 +878,7 @@
 	phandle node;
 	ihandle phb_node;
         unsigned long offset = reloc_offset();
-	char compatible[64], path[64], type[64];
+	char compatible[64], path[64], type[64], model[64];
 	unsigned long i, table = 0;
 	unsigned long base, vbase, align;
 	unsigned int minalign, minsize;
@@ -891,16 +893,29 @@
 	for (node = 0; prom_next_node(&node); ) {
 		compatible[0] = 0;
 		type[0] = 0;
+		model[0] = 0;
 		call_prom(RELOC("getprop"), 4, 1, node, RELOC("compatible"),
 			  compatible, sizeof(compatible));
 		call_prom(RELOC("getprop"), 4, 1, node, RELOC("device_type"),
 			  type, sizeof(type));
+		call_prom(RELOC("getprop"), 4, 1, node, RELOC("model"),
+			  model, sizeof(model));
 
-		if ((compatible[0] == 0) ||
-		   ((strstr(compatible, RELOC("python")) == NULL) &&
-		    (strstr(compatible, RELOC("Speedwagon")) == NULL))) {
-			continue;
+		/* Keep the old logic in tack to avoid regression. */
+		if (compatible[0] != 0) {
+			if((strstr(compatible, RELOC("python")) == NULL) &&
+			   (strstr(compatible, RELOC("Speedwagon")) == NULL) &&
+			   (strstr(compatible, RELOC("Winnipeg")) == NULL))
+				continue;
+		} else if (model[0] != 0) {
+			if ((strstr(model, RELOC("ython")) == NULL) &&
+			    (strstr(model, RELOC("peedwagon")) == NULL) &&
+			    (strstr(model, RELOC("innipeg")) == NULL))
+				continue;
+		} else {
+			prom_print(RELOC("No known I/O bridge chip found.\n"));
 		}
+
 		if ((type[0] == 0) || (strstr(type, RELOC("pci")) == NULL)) {
 			continue;
 		}
@@ -1041,8 +1056,8 @@
         unsigned long *spinloop     = __v2a(&__secondary_hold_spinloop);
         unsigned long *acknowledge  = __v2a(&__secondary_hold_acknowledge);
         unsigned long secondary_hold = (unsigned long)__v2a(*PTRRELOC((unsigned long *)__secondary_hold));
-        struct Naca *_naca = RELOC(naca);
-	struct Paca *_xPaca = PTRRELOC(&xPaca[0]);
+        struct naca_struct *_naca = RELOC(naca);
+	struct paca_struct *_xPaca = PTRRELOC(&paca[0]);
 	struct prom_t *_prom = PTRRELOC(&prom);
 
 	/* Initially, we must have one active CPU. */
@@ -1235,12 +1250,12 @@
 	char *p, *d;
  	unsigned long phys;
         u32 getprop_rval;
-        struct Naca   *_naca = RELOC(naca);
-	struct Paca *_xPaca = PTRRELOC(&xPaca[0]);
+        struct naca_struct   *_naca = RELOC(naca);
+	struct paca_struct *_xPaca = PTRRELOC(&paca[0]);
 	struct prom_t *_prom = PTRRELOC(&prom);
 
 	/* Default machine type. */
-	RELOC(_machine) = _MACH_pSeries;
+	_naca->platform = PLATFORM_PSERIES;
 	/* Reset klimit to take into account the embedded system map */
 	if (RELOC(embedded_sysmap_end))
 		RELOC(klimit) = __va(PAGE_ALIGN(RELOC(embedded_sysmap_end)));
@@ -1412,7 +1427,7 @@
 
 	lmb_reserve(0, __pa(RELOC(klimit)));
 
-	if (RELOC(_machine) == _MACH_pSeries)
+	if (_naca->platform == PLATFORM_PSERIES)
 		prom_initialize_tce_table();
 
  	if ((long) call_prom(RELOC("getprop"), 4, 1,
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/ptrace32.c linuxppc64_2_4/arch/ppc64/kernel/ptrace32.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/ptrace32.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/ptrace32.c	Fri Mar  1 14:38:37 2002
@@ -226,9 +226,14 @@
 		{
 			if (child->thread.regs->msr & MSR_FP)
 				giveup_fpu(child);
-		}
-		tmp_reg_value = get_reg(child, numReg);
-		reg32bits = ((u32*)&tmp_reg_value)[part];
+		        if (numReg == PT_FPSCR) 
+			        tmp_reg_value = ((unsigned int *)child->thread.fpscr);
+		        else 
+			        tmp_reg_value = ((unsigned long int *)child->thread.fpr)[numReg - PT_FPR0];
+		} else { /* register within PT_REGS struct */
+		    tmp_reg_value = get_reg(child, numReg);
+		} 
+                reg32bits = ((u32*)&tmp_reg_value)[part];
 		ret = put_user(reg32bits, (u32*)data);  /* copy 4 bytes of data into the user location specified by the 8 byte pointer in "data". */
 		break;
 	}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/rtas-proc.c linuxppc64_2_4/arch/ppc64/kernel/rtas-proc.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/rtas-proc.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/rtas-proc.c	Thu Apr 18 09:39:53 2002
@@ -200,7 +200,7 @@
 	struct proc_dir_entry *entry;
 
 	rtas_node = find_devices("rtas");
-	if ((rtas_node == 0) || (_machine == _MACH_iSeries)) {
+	if ((rtas_node == 0) || (naca->platform == PLATFORM_ISERIES_LPAR)) {
 		return;
 	}
 	
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/rtas.c linuxppc64_2_4/arch/ppc64/kernel/rtas.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/rtas.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/rtas.c	Tue Apr  9 12:45:35 2002
@@ -21,7 +21,7 @@
 #include <asm/rtas.h>
 #include <asm/semaphore.h>
 #include <asm/machdep.h>
-#include <asm/Paca.h>
+#include <asm/paca.h>
 #include <asm/page.h>
 #include <asm/system.h>
 #include <asm/udbg.h>
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/rtasd.c linuxppc64_2_4/arch/ppc64/kernel/rtasd.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/rtasd.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/rtasd.c	Thu Apr 11 09:04:54 2002
@@ -121,11 +121,546 @@
 	release:	rtas_log_release,
 };
 
+
+#define RTAS_ERR KERN_ERR "RTAS: "
+
+/* Extended error log header (12 bytes) */
+struct exthdr {
+	unsigned int valid:1;
+	unsigned int unrecoverable:1;
+	unsigned int recoverable:1;
+	unsigned int unrecoverable_bypassed:1;	/* i.e. degraded performance */
+	unsigned int predictive:1;
+	unsigned int newlog:1;
+	unsigned int bigendian:1;		/* always 1 */
+	unsigned int /* reserved */:1;
+
+	unsigned int platform_specific:1;	/* only in version 3+ */
+	unsigned int /* reserved */:3;
+	unsigned int platform_value:4;		/* valid iff platform_specific */
+
+	unsigned int power_pc:1;		/* always 1 */
+	unsigned int /* reserved */:2;
+	unsigned int addr_invalid:1;		/* failing_address is invalid */
+	unsigned int format_type:4;
+#define EXTLOG_FMT_CPU 1
+#define EXTLOG_FMT_MEMORY 2
+#define EXTLOG_FMT_IO 3
+#define EXTLOG_FMT_POST 4
+#define EXTLOG_FMT_ENV 5
+#define EXTLOG_FMT_POW 6
+#define EXTLOG_FMT_IBMDIAG 12
+#define EXTLOG_FMT_IBMSP 13
+
+	/* This group is in version 3+ only */
+	unsigned int non_hardware:1;		/* Firmware or software is suspect */
+	unsigned int hot_plug:1;		/* Failing component may be hot plugged */
+	unsigned int group_failure:1;		/* Group of components should be replaced */
+	unsigned int /* reserved */:1;
+
+	unsigned int residual:1;		/* Residual error from previous boot (maybe a crash) */
+	unsigned int boot:1;			/* Error during boot */
+	unsigned int config_change:1;		/* Configuration changed since last boot */
+	unsigned int post:1;			/* Error during POST */
+
+	unsigned int bcdtime:32;		/* Time of error in BCD HHMMSS00 */
+	unsigned int bcddate:32;		/* Time of error in BCD YYYYMMDD */
+};
+
+struct cpuhdr {
+	unsigned int internal:1;
+	unsigned int intcache:1;
+	unsigned int extcache_parity:1;	/* or multi-bit ECC */
+	unsigned int extcache_ecc:1;
+	unsigned int sysbus_timeout:1;
+	unsigned int io_timeout:1;
+	unsigned int sysbus_parity:1;
+	unsigned int sysbus_protocol:1;
+	unsigned int cpuid:8;
+	unsigned int element:16;
+	unsigned int failing_address_hi:32;
+	unsigned int failing_address_lo:32;
+
+	/* These are version 4+ */
+	unsigned int try_reboot:1;	/* 1 => fault may be fixed by reboot */
+	unsigned int /* reserved */:7;
+	/* 15 bytes reserved here */
+};
+
+struct memhdr {
+	unsigned int uncorrectable:1;
+	unsigned int ECC:1;
+	unsigned int threshold_exceeded:1;
+	unsigned int control_internal:1;
+	unsigned int bad_address:1;
+	unsigned int bad_data:1;
+	unsigned int bus:1;
+	unsigned int timeout:1;
+	unsigned int sysbus_parity:1;
+	unsigned int sysbus_timeout:1;
+	unsigned int sysbus_protocol:1;
+	unsigned int hostbridge_timeout:1;
+	unsigned int hostbridge_parity:1;
+	unsigned int reserved1:1;
+	unsigned int support:1;
+	unsigned int sysbus_internal:1;
+	unsigned int mem_controller_detected:8;	/* who detected fault? */
+	unsigned int mem_controller_faulted:8;	/* who caused fault? */
+	unsigned int failing_address_hi:32;
+	unsigned int failing_address_lo:32;
+	unsigned int ecc_syndrome:16;
+	unsigned int memory_card:8;
+	unsigned int reserved2:8;
+	unsigned int sub_elements:32;		/* one bit per element */
+	unsigned int element:16;
+};
+
+struct iohdr {
+	unsigned int bus_addr_parity:1;
+	unsigned int bus_data_parity:1;
+	unsigned int bus_timeout:1;
+	unsigned int bridge_internal:1;
+	unsigned int non_pci:1;		/* i.e. secondary bus such as ISA */
+	unsigned int mezzanine_addr_parity:1;
+	unsigned int mezzanine_data_parity:1;
+	unsigned int mezzanine_timeout:1;
+
+	unsigned int bridge_via_sysbus:1;
+	unsigned int bridge_via_mezzanine:1;
+	unsigned int bridge_via_expbus:1;
+	unsigned int detected_by_expbus:1;
+	unsigned int expbus_data_parity:1;
+	unsigned int expbus_timeout:1;
+	unsigned int expbus_connection_failure:1;
+	unsigned int expbus_not_operating:1;
+
+	/* IOA signalling the error */
+	unsigned int pci_sig_busno:8;
+	unsigned int pci_sig_devfn:8;
+	unsigned int pci_sig_deviceid:16;
+	unsigned int pci_sig_vendorid:16;
+	unsigned int pci_sig_revisionid:8;
+	unsigned int pci_sig_slot:8;	/* 00 => system board, ff => multiple */
+
+	/* IOA sending at time of error */
+	unsigned int pci_send_busno:8;
+	unsigned int pci_send_devfn:8;
+	unsigned int pci_send_deviceid:16;
+	unsigned int pci_send_vendorid:16;
+	unsigned int pci_send_revisionid:8;
+	unsigned int pci_send_slot:8;	/* 00 => system board, ff => multiple */
+};
+
+struct posthdr {
+	unsigned int firmware:1;
+	unsigned int config:1;
+	unsigned int cpu:1;
+	unsigned int memory:1;
+	unsigned int io:1;
+	unsigned int keyboard:1;
+	unsigned int mouse:1;
+	unsigned int display:1;
+
+	unsigned int ipl_floppy:1;
+	unsigned int ipl_controller:1;
+	unsigned int ipl_cdrom:1;
+	unsigned int ipl_disk:1;
+	unsigned int ipl_net:1;
+	unsigned int ipl_other:1;
+	unsigned int /* reserved */:1;
+	unsigned int firmware_selftest:1;
+
+	char         devname[12];
+	unsigned int post_code:4;
+	unsigned int firmware_rev:2;
+	unsigned int loc_code:8;	/* currently unused */
+};
+
+struct epowhdr {
+	unsigned int epow_sensor_value:32;
+	unsigned int sensor:1;
+	unsigned int power_fault:1;
+	unsigned int fan:1;
+	unsigned int temp:1;
+	unsigned int redundancy:1;
+	unsigned int CUoD:1;
+	unsigned int /* reserved */:2;
+
+	unsigned int general:1;
+	unsigned int power_loss:1;
+	unsigned int power_supply:1;
+	unsigned int power_switch:1;
+	unsigned int /* reserved */:4;
+
+	unsigned int /* reserved */:16;
+	unsigned int sensor_token:32;
+	unsigned int sensor_index:32;
+	unsigned int sensor_value:32;
+	unsigned int sensor_status:32;
+};
+
+struct pm_eventhdr {
+	unsigned int event_id:32;
+};
+
+struct sphdr {
+	unsigned int ibm:32;	/* "IBM\0" */
+
+	unsigned int timeout:1;
+	unsigned int i2c_bus:1;
+	unsigned int i2c_secondary_bus:1;
+	unsigned int sp_memory:1;
+	unsigned int sp_registers:1;
+	unsigned int sp_communication:1;
+	unsigned int sp_firmware:1;
+	unsigned int sp_hardware:1;
+
+	unsigned int vpd_eeprom:1;
+	unsigned int op_panel:1;
+	unsigned int power_controller:1;
+	unsigned int fan_sensor:1;
+	unsigned int thermal_sensor:1;
+	unsigned int voltage_sensor:1;
+	unsigned int reserved1:2;
+
+	unsigned int serial_port:1;
+	unsigned int nvram:1;
+	unsigned int rtc:1;
+	unsigned int jtag:1;
+	unsigned int tod_battery:1;
+	unsigned int reserved2:1;
+	unsigned int heartbeat:1;
+	unsigned int surveillance:1;
+
+	unsigned int pcn_connection:1;	/* power control network */
+	unsigned int pcn_node:1;
+	unsigned int reserved3:2;
+	unsigned int pcn_access:1;
+	unsigned int reserved:3;
+
+	unsigned int sensor_token:32;	/* zero if undef */
+	unsigned int sensor_index:32;	/* zero if undef */
+};
+
+
+static char *severity_names[] = {
+	"NO ERROR", "EVENT", "WARNING", "ERROR_SYNC", "ERROR", "FATAL", "(6)", "(7)"
+};
+static char *rtas_disposition_names[] = {
+	"FULLY RECOVERED", "LIMITED RECOVERY", "NOT RECOVERED", "(4)"
+};
+static char *entity_names[] = { /* for initiator & targets */
+	"UNKNOWN", "CPU", "PCI", "ISA", "MEMORY", "POWER MANAGEMENT", "HOT PLUG", "(7)", "(8)",
+	"(9)", "(10)", "(11)", "(12)", "(13)", "(14)", "(15)"
+};
+static char *error_type[] = {	/* Not all types covered here so need to bounds check */
+	"UNKNOWN", "RETRY", "TCE_ERR", "INTERN_DEV_FAIL",
+	"TIMEOUT", "DATA_PARITY", "ADDR_PARITY", "CACHE_PARITY",
+	"ADDR_INVALID", "ECC_UNCORR", "ECC_CORR",
+};
+
+static char *rtas_error_type(int type)
+{
+	if (type < 11)
+		return error_type[type];
+	if (type == 64)
+		return "SENSOR";
+	if (type >=96 && type <= 159)
+		return "POWER";
+	return error_type[0];
+}
+
+static void printk_cpu_failure(int version, struct exthdr *exthdr, char *data)
+{
+	struct cpuhdr cpuhdr;
+
+	memcpy(&cpuhdr, data, sizeof(cpuhdr));
+
+	if (cpuhdr.internal) printk(RTAS_ERR "Internal error (not cache)\n");
+	if (cpuhdr.intcache) printk(RTAS_ERR "Internal cache\n");
+	if (cpuhdr.extcache_parity) printk(RTAS_ERR "External cache parity (or multi-bit)\n");
+	if (cpuhdr.extcache_ecc) printk(RTAS_ERR "External cache ECC\n");
+	if (cpuhdr.sysbus_timeout) printk(RTAS_ERR "System bus timeout\n");
+	if (cpuhdr.io_timeout) printk(RTAS_ERR "I/O timeout\n");
+	if (cpuhdr.sysbus_parity) printk(RTAS_ERR "System bus parity\n");
+	if (cpuhdr.sysbus_protocol) printk(RTAS_ERR "System bus protocol/transfer\n");
+	printk(RTAS_ERR "CPU id: %d\n", cpuhdr.cpuid);
+	printk(RTAS_ERR "Failing element: 0x%04x\n", cpuhdr.element);
+	if (!exthdr->addr_invalid)
+		printk(RTAS_ERR "Failing address: %08x%08x\n", cpuhdr.failing_address_hi, cpuhdr.failing_address_lo);
+	if (version >= 4 && cpuhdr.try_reboot)
+		printk(RTAS_ERR "A reboot of the system may correct the problem\n");
+}
+
+static void printk_mem_failure(int version, struct exthdr *exthdr, char *data)
+{
+	struct memhdr memhdr;
+
+	memcpy(&memhdr, data, sizeof(memhdr));
+	if (memhdr.uncorrectable) printk(RTAS_ERR "Uncorrectable Memory error\n");
+	if (memhdr.ECC) printk(RTAS_ERR "ECC Correctable error\n");
+	if (memhdr.threshold_exceeded) printk(RTAS_ERR "Correctable threshold exceeded\n");
+	if (memhdr.control_internal) printk(RTAS_ERR "Memory Controller internal error\n");
+	if (memhdr.bad_address) printk(RTAS_ERR "Memory Address error\n");
+	if (memhdr.bad_data) printk(RTAS_ERR "Memory Data error\n");
+	if (memhdr.bus) printk(RTAS_ERR "Memory bus/switch internal error\n");
+	if (memhdr.timeout) printk(RTAS_ERR "Memory timeout\n");
+	if (memhdr.sysbus_parity) printk(RTAS_ERR "System bus parity\n");
+	if (memhdr.sysbus_timeout) printk(RTAS_ERR "System bus timeout\n");
+	if (memhdr.sysbus_protocol) printk(RTAS_ERR "System bus protocol/transfer\n");
+	if (memhdr.hostbridge_timeout) printk(RTAS_ERR "I/O Host Bridge timeout\n");
+	if (memhdr.hostbridge_parity) printk(RTAS_ERR "I/O Host Bridge parity\n");
+	if (memhdr.support) printk(RTAS_ERR "System support function error\n");
+	if (memhdr.sysbus_internal) printk(RTAS_ERR "System bus internal hardware/switch error\n");
+	printk(RTAS_ERR "Memory Controller that detected failure: %d\n", memhdr.mem_controller_detected);
+	printk(RTAS_ERR "Memory Controller that faulted: %d\n", memhdr.mem_controller_faulted);
+	if (!exthdr->addr_invalid)
+		printk(RTAS_ERR "Failing address: 0x%016x%016x\n", memhdr.failing_address_hi, memhdr.failing_address_lo);
+	printk(RTAS_ERR "ECC syndrome bits: 0x%04x\n", memhdr.ecc_syndrome);
+	printk(RTAS_ERR "Memory Card: %d\n", memhdr.memory_card);
+	printk(RTAS_ERR "Failing element: 0x%04x\n", memhdr.element);
+	printk(RTAS_ERR "Sub element bits: 0x%08x\n", memhdr.sub_elements);
+}
+
+static void printk_io_failure(int version, struct exthdr *exthdr, char *data)
+{
+	struct iohdr iohdr;
+
+	memcpy(&iohdr, data, sizeof(iohdr));
+	if (iohdr.bus_addr_parity) printk(RTAS_ERR "I/O bus address parity\n");
+	if (iohdr.bus_data_parity) printk(RTAS_ERR "I/O bus data parity\n");
+	if (iohdr.bus_timeout) printk(RTAS_ERR "I/O bus timeout, access or other\n");
+	if (iohdr.bridge_internal) printk(RTAS_ERR "I/O bus bridge/device internal\n");
+	if (iohdr.non_pci) printk(RTAS_ERR "Signaling IOA is a PCI to non-PCI bridge (e.g. ISA)\n");
+	if (iohdr.mezzanine_addr_parity) printk(RTAS_ERR "Mezzanine/System bus address parity\n");
+	if (iohdr.mezzanine_data_parity) printk(RTAS_ERR "Mezzanine/System bus data parity\n");
+	if (iohdr.mezzanine_timeout) printk(RTAS_ERR "Mezzanine/System bus timeout, transfer or protocol\n");
+	if (iohdr.bridge_via_sysbus) printk(RTAS_ERR "Bridge is connected to system bus\n");
+	if (iohdr.bridge_via_mezzanine) printk(RTAS_ERR "Bridge is connected to memory controller via mezzanine bus\n");
+	if (iohdr.bridge_via_expbus) printk(RTAS_ERR "Bridge is connected to I/O expansion bus\n");
+	if (iohdr.detected_by_expbus) printk(RTAS_ERR "Error on system bus detected by I/O expansion bus controller\n");
+	if (iohdr.expbus_data_parity) printk(RTAS_ERR "I/O expansion bus data error\n");
+	if (iohdr.expbus_timeout) printk(RTAS_ERR "I/O expansion bus timeout, access or other\n");
+	if (iohdr.expbus_connection_failure) printk(RTAS_ERR "I/O expansion bus connection failure\n");
+	if (iohdr.expbus_not_operating) printk(RTAS_ERR "I/O expansion unit not in an operating state (powered off, off-line)\n");
+
+	printk(RTAS_ERR "IOA Signaling the error: %d:%d.%d vendor:%04x device:%04x rev:%02x slot:%d\n",
+	       iohdr.pci_sig_busno, iohdr.pci_sig_devfn >> 3, iohdr.pci_sig_devfn & 0x7,
+	       iohdr.pci_sig_vendorid, iohdr.pci_sig_deviceid, iohdr.pci_sig_revisionid, iohdr.pci_sig_slot);
+	printk(RTAS_ERR "IOA Sending during the error: %d:%d.%d vendor:%04x device:%04x rev:%02x slot:%d\n",
+	       iohdr.pci_send_busno, iohdr.pci_send_devfn >> 3, iohdr.pci_send_devfn & 0x7,
+	       iohdr.pci_send_vendorid, iohdr.pci_send_deviceid, iohdr.pci_send_revisionid, iohdr.pci_send_slot);
+
+}
+
+static void printk_post_failure(int version, struct exthdr *exthdr, char *data)
+{
+	struct posthdr posthdr;
+
+	memcpy(&posthdr, data, sizeof(posthdr));
+
+	if (posthdr.devname[0]) printk(RTAS_ERR "Failing Device: %s\n", posthdr.devname);
+	if (posthdr.firmware) printk(RTAS_ERR "Firmware Error\n");
+	if (posthdr.config) printk(RTAS_ERR "Configuration Error\n");
+	if (posthdr.cpu) printk(RTAS_ERR "CPU POST Error\n");
+	if (posthdr.memory) printk(RTAS_ERR "Memory POST Error\n");
+	if (posthdr.io) printk(RTAS_ERR "I/O Subsystem POST Error\n");
+	if (posthdr.keyboard) printk(RTAS_ERR "Keyboard POST Error\n");
+	if (posthdr.mouse) printk(RTAS_ERR "Mouse POST Error\n");
+	if (posthdr.display) printk(RTAS_ERR "Display POST Error\n");
+
+	if (posthdr.ipl_floppy) printk(RTAS_ERR "Floppy IPL Error\n");
+	if (posthdr.ipl_controller) printk(RTAS_ERR "Drive Controller Error during IPL\n");
+	if (posthdr.ipl_cdrom) printk(RTAS_ERR "CDROM IPL Error\n");
+	if (posthdr.ipl_disk) printk(RTAS_ERR "Disk IPL Error\n");
+	if (posthdr.ipl_net) printk(RTAS_ERR "Network IPL Error\n");
+	if (posthdr.ipl_other) printk(RTAS_ERR "Other (tape,flash) IPL Error\n");
+	if (posthdr.firmware_selftest) printk(RTAS_ERR "Self-test error in firmware extended diagnostics\n");
+	printk(RTAS_ERR "POST Code: %d\n", posthdr.post_code);
+	printk(RTAS_ERR "Firmware Revision Code: %d\n", posthdr.firmware_rev);
+}
+
+static void printk_epow_warning(int version, struct exthdr *exthdr, char *data)
+{
+	struct epowhdr epowhdr;
+
+	memcpy(&epowhdr, data, sizeof(epowhdr));
+	printk(RTAS_ERR "EPOW Sensor Value:  0x%08x\n", epowhdr.epow_sensor_value); 
+	if (epowhdr.sensor) {
+		printk(RTAS_ERR "EPOW detected by a sensor\n");
+		printk(RTAS_ERR "Sensor Token:  0x%08x\n", epowhdr.sensor_token); 
+		printk(RTAS_ERR "Sensor Index:  0x%08x\n", epowhdr.sensor_index); 
+		printk(RTAS_ERR "Sensor Value:  0x%08x\n", epowhdr.sensor_value); 
+		printk(RTAS_ERR "Sensor Status: 0x%08x\n", epowhdr.sensor_status);
+	}
+	if (epowhdr.power_fault) printk(RTAS_ERR "EPOW caused by a power fault\n");
+	if (epowhdr.fan) printk(RTAS_ERR "EPOW caused by fan failure\n");
+	if (epowhdr.temp) printk(RTAS_ERR "EPOW caused by over-temperature condition\n");
+	if (epowhdr.redundancy) printk(RTAS_ERR "EPOW warning due to loss of redundancy\n");
+	if (epowhdr.CUoD) printk(RTAS_ERR "EPOW warning due to CUoD Entitlement Exceeded\n");
+
+	if (epowhdr.general) printk(RTAS_ERR "EPOW general power fault\n");
+	if (epowhdr.power_loss) printk(RTAS_ERR "EPOW power fault due to loss of power source\n");
+	if (epowhdr.power_supply) printk(RTAS_ERR "EPOW power fault due to internal power supply failure\n");
+	if (epowhdr.power_switch) printk(RTAS_ERR "EPOW power fault due to activation of power switch\n");
+}
+
+static void printk_pm_event(int version, struct exthdr *exthdr, char *data)
+{
+	struct pm_eventhdr pm_eventhdr;
+
+	memcpy(&pm_eventhdr, data, sizeof(pm_eventhdr));
+	printk(RTAS_ERR "Event id: 0x%08x\n", pm_eventhdr.event_id);
+}
+
+static void printk_sp_log_msg(int version, struct exthdr *exthdr, char *data)
+{
+	struct sphdr sphdr;
+	u32 eyecatcher;
+
+	memcpy(&sphdr, data, sizeof(sphdr));
+
+	eyecatcher = sphdr.ibm;
+	if (strcmp((char *)&eyecatcher, "IBM") != 0)
+		printk(RTAS_ERR "This log entry may be corrupt (IBM signature malformed)\n");
+	if (sphdr.timeout) printk(RTAS_ERR "Timeout on communication response from service processor\n");
+	if (sphdr.i2c_bus) printk(RTAS_ERR "I2C general bus error\n");
+	if (sphdr.i2c_secondary_bus) printk(RTAS_ERR "I2C secondary bus error\n");
+	if (sphdr.sp_memory) printk(RTAS_ERR "Internal service processor memory error\n");
+	if (sphdr.sp_registers) printk(RTAS_ERR "Service processor error accessing special registers\n");
+	if (sphdr.sp_communication) printk(RTAS_ERR "Service processor reports unknown communcation error\n");
+	if (sphdr.sp_firmware) printk(RTAS_ERR "Internal service processor firmware error\n");
+	if (sphdr.sp_hardware) printk(RTAS_ERR "Other internal service processor hardware error\n");
+	if (sphdr.vpd_eeprom) printk(RTAS_ERR "Service processor error accessing VPD EEPROM\n");
+	if (sphdr.op_panel) printk(RTAS_ERR "Service processor error accessing Operator Panel\n");
+	if (sphdr.power_controller) printk(RTAS_ERR "Service processor error accessing Power Controller\n");
+	if (sphdr.fan_sensor) printk(RTAS_ERR "Service processor error accessing Fan Sensor\n");
+	if (sphdr.thermal_sensor) printk(RTAS_ERR "Service processor error accessing Thermal Sensor\n");
+	if (sphdr.voltage_sensor) printk(RTAS_ERR "Service processor error accessing Voltage Sensor\n");
+	if (sphdr.serial_port) printk(RTAS_ERR "Service processor error accessing serial port\n");
+	if (sphdr.nvram) printk(RTAS_ERR "Service processor detected NVRAM error\n");
+	if (sphdr.rtc) printk(RTAS_ERR "Service processor error accessing real time clock\n");
+	if (sphdr.jtag) printk(RTAS_ERR "Service processor error accessing JTAG/COP\n");
+	if (sphdr.tod_battery) printk(RTAS_ERR "Service processor or RTAS detects loss of voltage from TOD battery\n");
+	if (sphdr.heartbeat) printk(RTAS_ERR "Loss of heartbeat from Service processor\n");
+	if (sphdr.surveillance) printk(RTAS_ERR "Service processor detected a surveillance timeout\n");
+	if (sphdr.pcn_connection) printk(RTAS_ERR "Power Control Network general connection failure\n");
+	if (sphdr.pcn_node) printk(RTAS_ERR "Power Control Network node failure\n");
+	if (sphdr.pcn_access) printk(RTAS_ERR "Service processor error accessing Power Control Network\n");
+
+	if (sphdr.sensor_token) printk(RTAS_ERR "Sensor Token 0x%08x (%d)\n", sphdr.sensor_token, sphdr.sensor_token);
+	if (sphdr.sensor_index) printk(RTAS_ERR "Sensor Index 0x%08x (%d)\n", sphdr.sensor_index, sphdr.sensor_index);
+}
+
+
+static void printk_ext_raw_data(char *data)
+{
+	int i;
+	printk(RTAS_ERR "raw ext data: ");
+	for (i = 0; i < 40; i++) {
+		printk("%02x", data[i]);
+	}
+	printk("\n");
+}
+
+static void printk_ext_log_data(int version, char *buf)
+{
+	char *data = buf+12;
+	struct exthdr exthdr;
+	memcpy(&exthdr, buf, sizeof(exthdr));	/* copy for alignment */
+	if (!exthdr.valid) {
+		if (exthdr.bigendian && exthdr.power_pc)
+			printk(RTAS_ERR "extended log data is not valid\n");
+		else
+			printk(RTAS_ERR "extended log data can not be decoded\n");
+		return;
+	}
+
+	/* Dump useful stuff in the exthdr */
+	printk(RTAS_ERR "Status:%s%s%s%s%s\n",
+	       exthdr.unrecoverable ? " unrecoverable" : "",
+	       exthdr.recoverable ? " recoverable" : "",
+	       exthdr.unrecoverable_bypassed ? " bypassed" : "",
+	       exthdr.predictive ? " predictive" : "",
+	       exthdr.newlog ? " new" : "");
+	printk(RTAS_ERR "Date/Time: %08x %08x\n", exthdr.bcddate, exthdr.bcdtime);
+	switch (exthdr.format_type) {
+	    case EXTLOG_FMT_CPU:
+		printk(RTAS_ERR "CPU Failure\n");
+		printk_cpu_failure(version, &exthdr, data);
+		break;
+	    case EXTLOG_FMT_MEMORY:
+		printk(RTAS_ERR "Memory Failure\n");
+		printk_mem_failure(version, &exthdr, data);
+		break;
+	    case EXTLOG_FMT_IO:
+		printk(RTAS_ERR "I/O Failure\n");
+		printk_io_failure(version, &exthdr, data);
+		break;
+	    case EXTLOG_FMT_POST:
+		printk(RTAS_ERR "POST Failure\n");
+		printk_post_failure(version, &exthdr, data);
+		break;
+	    case EXTLOG_FMT_ENV:
+		printk(RTAS_ERR "Environment and Power Warning\n");
+		printk_epow_warning(version, &exthdr, data);
+		break;
+	    case EXTLOG_FMT_POW:
+		printk(RTAS_ERR "Power Management Event\n");
+		printk_pm_event(version, &exthdr, data);
+		break;
+	    case EXTLOG_FMT_IBMDIAG:
+		printk(RTAS_ERR "IBM Diagnostic Log\n");
+		printk_ext_raw_data(data);
+		break;
+	    case EXTLOG_FMT_IBMSP:
+		printk(RTAS_ERR "IBM Service Processor Log\n");
+		printk_sp_log_msg(version, &exthdr, data);
+		break;
+	    default:
+		printk(RTAS_ERR "Unknown ext format type %d\n", exthdr.format_type);
+		printk_ext_raw_data(data);
+		break;
+	}
+}
+
+
+/* Yeah, the output here is ugly, but we want a CE to be
+ * able to grep RTAS /var/log/messages and see all the info
+ * collected together with obvious begin/end.
+ */
+static void printk_log_rtas(char *buf)
+{
+	struct rtas_error_log *err = (struct rtas_error_log *)buf;
+
+	printk(RTAS_ERR "-------- event-scan begin --------\n");
+	if (strcmp(buf+8+40, "IBM") == 0) {
+		/* Location code follows */
+		char *loc = buf+8+40+4;
+		if (*loc >= 'A' && *loc <= 'Z')	/* Sanity check */
+			printk(RTAS_ERR "Location Code: %s\n", loc);
+	}
+
+	printk(RTAS_ERR "%s: (%s) type: %s\n",
+	       severity_names[err->severity],
+	       rtas_disposition_names[err->disposition],
+	       rtas_error_type(err->type));
+	printk(RTAS_ERR "initiator: %s  target: %s\n",
+	       entity_names[err->initiator], entity_names[err->target]);
+	if (err->extended_log_length)
+		printk_ext_log_data(err->version, buf+8);
+	printk(RTAS_ERR "-------- event-scan end ----------\n");
+}
+
+
 static void log_rtas(char *buf)
 {
 	unsigned long offset;
 
 	DEBUG("logging rtas event\n");
+
+	/* Temporary -- perhaps we can do this when nobody has the log open? */
+	printk_log_rtas(buf);
 
 	spin_lock(&rtas_log_lock);
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/semaphore.c linuxppc64_2_4/arch/ppc64/kernel/semaphore.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/semaphore.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/semaphore.c	Mon Apr  8 09:43:30 2002
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
+#include <asm/errno.h>
 
 /*
  * Atomically update sem->count.
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/setup.c linuxppc64_2_4/arch/ppc64/kernel/setup.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/setup.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/setup.c	Mon Apr 22 10:31:26 2002
@@ -31,8 +31,8 @@
 #include <asm/elf.h>
 #include <asm/machdep.h>
 #include <asm/iSeries/LparData.h>
-#include <asm/Naca.h>
-#include <asm/Paca.h>
+#include <asm/naca.h>
+#include <asm/paca.h>
 #include <asm/ppcdebug.h>
 #include <asm/time.h>
 
@@ -75,15 +75,11 @@
 		    unsigned long r6, unsigned long r7);
 int parse_bootinfo(void);
 
-unsigned long DMA_MODE_READ, DMA_MODE_WRITE;
-int _machine = _MACH_unknown;
-
 #ifdef CONFIG_MAGIC_SYSRQ
 unsigned long SYSRQ_KEY;
 #endif /* CONFIG_MAGIC_SYSRQ */
 
 struct machdep_calls ppc_md;
-struct Naca *naca;
 
 /*
  * Perhaps we can put the pmac screen_info[] here
@@ -118,34 +114,11 @@
  */
 void ppcdbg_initialize(void) {
 	unsigned long offset = reloc_offset();
-	struct Naca *_naca = RELOC(naca);
+	struct naca_struct *_naca = RELOC(naca);
 
 	_naca->debug_switch = PPC_DEBUG_DEFAULT; /* | PPCDBG_BUSWALK | PPCDBG_PHBINIT | PPCDBG_MM | PPCDBG_MMINIT | PPCDBG_TCEINIT | PPCDBG_TCE */;
 }
 
-/* 
- * Initialize a set of PACA's, one for each processor.
- *
- * At this point, relocation is on, but we have not done any other
- * setup of the mm subsystem.
- */
-void paca_init(void) {
-#if 0
-	int processorCount = naca->processorCount, i;
-	struct Paca *paca[];
-
-	/* Put the array of paca's on a page boundary & allocate 1/2 page of */
-	/* storage for each.                                                 */  
-	klimit += (PAGE_SIZE-1) & PAGE_MASK;
-	naca->xPaca = paca[0] = klimit;
-	klimit += ((PAGE_SIZE>>1) * processorCount); 
-
-	for(i=0; i<processorCount; i++) {
-		paca[0]->xPacaIndex = i;
-	}
-#endif
-}
-
 /*
  * Do some initial setup of the system.  The paramters are those which 
  * were passed in from the bootloader.
@@ -158,14 +131,15 @@
 
 	/* pSeries systems are identified in prom.c via OF. */
 	if ( itLpNaca.xLparInstalled == 1 )
-		_machine = _MACH_iSeries;
-	switch (_machine) {
-	case _MACH_iSeries:
+		naca->platform = PLATFORM_ISERIES_LPAR;
+	
+	switch (naca->platform) {
+	case PLATFORM_ISERIES_LPAR:
 		iSeries_init_early();
 		break;
 
 #ifdef CONFIG_PPC_PSERIES
-	case _MACH_pSeries:
+	case PLATFORM_PSERIES:
 		pSeries_init_early();
 #ifdef CONFIG_BLK_DEV_INITRD
 		initrd_start = initrd_end = 0;
@@ -173,7 +147,7 @@
 		parse_bootinfo();
 		break;
 
-	case _MACH_pSeriesLP:
+	case PLATFORM_PSERIES_LPAR:
 		pSeriesLP_init_early();
 #ifdef CONFIG_BLK_DEV_INITRD
 		initrd_start = initrd_end = 0;
@@ -244,15 +218,15 @@
 	udbg_puts("\n-----------------------------------------------------\n");
 
 
-	if ( _machine & _MACH_pSeries ) {
+	if (naca->platform & PLATFORM_PSERIES) {
 		finish_device_tree();
 		chrp_init(r3, r4, r5, r6, r7);
 	}
 
 	mm_init_ppc64();
 
-	switch (_machine) {
-	case _MACH_iSeries:
+	switch (naca->platform) {
+	case PLATFORM_ISERIES_LPAR:
 		iSeries_init();
 		break;
 	default:
@@ -300,14 +274,14 @@
 		return 0;
 #endif
 
-	pvr = xPaca[cpu_id].pvr;
+	pvr = paca[cpu_id].pvr;
 	maj = (pvr >> 8) & 0xFF;
 	min = pvr & 0xFF;
 
 	seq_printf(m, "processor\t: %lu\n", cpu_id);
 	seq_printf(m, "cpu\t\t: ");
 
-	pvr = xPaca[cpu_id].pvr;
+	pvr = paca[cpu_id].pvr;
 
 	switch (PVR_VER(pvr)) {
 	case PV_PULSAR:
@@ -337,7 +311,7 @@
 	 * Assume here that all clock rates are the same in a
 	 * smp system.  -- Cort
 	 */
-	if (_machine != _MACH_iSeries) {
+	if (naca->platform != PLATFORM_ISERIES_LPAR) {
 		struct device_node *cpu_node;
 		int *fp;
 
@@ -566,6 +540,8 @@
 	ppc_md.progress("setup_arch: exit", 0x3eab);
 }
 
+#ifdef CONFIG_IDE
+
 /* Convert the shorts/longs in hd_driveid from little to big endian;
  * chars are endian independant, of course, but strings need to be flipped.
  * (Despite what it says in drivers/block/ide.h, they come up as little
@@ -636,8 +612,12 @@
 	id->CurAPMvalues   = __le16_to_cpu(id->CurAPMvalues);
 	id->word92         = __le16_to_cpu(id->word92);
 	id->hw_config      = __le16_to_cpu(id->hw_config);
-	for (i = 0; i < 32; i++)
-		id->words94_125[i]  = __le16_to_cpu(id->words94_125[i]);
+	id->acoustic       = __le16_to_cpu(id->acoustic);
+	for (i = 0; i < 5; i++)
+		id->words95_99[i]  = __le16_to_cpu(id->words95_99[i]);
+	id->lba_capacity_2 = __le64_to_cpu(id->lba_capacity_2);
+	for (i = 0; i < 21; i++)
+		id->words104_125[i]  = __le16_to_cpu(id->words104_125[i]);
 	id->last_lun       = __le16_to_cpu(id->last_lun);
 	id->word127        = __le16_to_cpu(id->word127);
 	id->dlf            = __le16_to_cpu(id->dlf);
@@ -647,9 +627,16 @@
 	id->word156        = __le16_to_cpu(id->word156);
 	for (i = 0; i < 3; i++)
 		id->words157_159[i] = __le16_to_cpu(id->words157_159[i]);
-	for (i = 0; i < 96; i++)
-		id->words160_255[i] = __le16_to_cpu(id->words160_255[i]);
+	id->cfa_power=__le16_to_cpu(id->cfa_power);
+	for (i = 0; i < 15; i++)
+		id->words161_175[i] = __le16_to_cpu(id->words161_175[i]);
+	for (i = 0; i < 29; i++)
+		id->words176_205[i] = __le16_to_cpu(id->words176_205[i]);
+	for (i = 0; i < 48; i++)
+		id->words206_254[i] = __le16_to_cpu(id->words206_254[i]);
+	id->integrity_word=__le16_to_cpu(id->integrity_word);
 }
+#endif
 
 
 void exception_trace(unsigned long trap)
@@ -677,9 +664,9 @@
 	/* The parameter is the number of processors to share in processing lp events */
 	unsigned long i;
 	unsigned long val = simple_strtoul( str, NULL, 0 );
-	if ( ( val > 0 ) && ( val <= maxPacas ) ) {
+	if ( ( val > 0 ) && ( val <= MAX_PACAS ) ) {
 		for ( i=1; i<val; ++i )
-			xPaca[i].lpQueuePtr = xPaca[0].lpQueuePtr;
+			paca[i].lpQueuePtr = paca[0].lpQueuePtr;
 		printk("lpevent processing spread over %ld processors\n", val);
 	}
 	else
@@ -690,13 +677,13 @@
 /* This should only be called on processor 0 during calibrate decr */
 void setup_default_decr(void)
 {
-	struct Paca * paca = (struct Paca *)mfspr(SPRG3);
+	struct paca_struct *lpaca = get_paca();
 
 	if ( decr_overclock_set && !decr_overclock_proc0_set )
 		decr_overclock_proc0 = decr_overclock;
 
-	paca->default_decr = tb_ticks_per_jiffy / decr_overclock_proc0;	
-	paca->next_jiffy_update_tb = get_tb() + tb_ticks_per_jiffy;
+	lpaca->default_decr = tb_ticks_per_jiffy / decr_overclock_proc0;	
+	lpaca->next_jiffy_update_tb = get_tb() + tb_ticks_per_jiffy;
 }
 
 int set_decr_overclock_proc0( char * str )
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/signal.c linuxppc64_2_4/arch/ppc64/kernel/signal.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/signal.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/signal.c	Tue Apr  2 15:53:46 2002
@@ -28,6 +28,7 @@
 #include <linux/unistd.h>
 #include <linux/stddef.h>
 #include <linux/elf.h>
+#include <asm/ppc32.h>
 #include <asm/sigcontext.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
@@ -787,8 +788,3 @@
         PPCDBG(PPCDBG_SIGNAL, "do_signal - returning a signal was delivered \n");
 	return 1;
 }
-
-
-
-
-
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/signal32.c linuxppc64_2_4/arch/ppc64/kernel/signal32.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/signal32.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/signal32.c	Tue Apr  2 15:53:46 2002
@@ -47,7 +47,7 @@
 #include <asm/ipc.h>
 #include <asm/uaccess.h>
 #include <linux/elf.h>
-/* #include <asm/ppc32.h> */
+#include <asm/ppc32.h>
 #include <asm/ppcdebug.h>
 #include <asm/unistd.h>
 #include <asm/ucontext.h>
@@ -64,35 +64,6 @@
  * handler, even if the handler returns.
  */
 #define MSR_USERCHANGE	(MSR_FE0 | MSR_FE1)
-
-
-
-
-/* Use this to get at 32-bit user passed pointers. */
-/* Things to consider: the low-level assembly stub does
-   srl x, 0, x for first four arguments, so if you have
-   pointer to something in the first four arguments, just
-   declare it as a pointer, not u32. On the other side, 
-   arguments from 5th onwards should be declared as u32
-   for pointers, and need AA() around each usage.
-   A() macro should be used for places where you e.g.
-   have some internal variable u32 and just want to get
-   rid of a compiler warning. AA() has to be used in
-   places where you want to convert a function argument
-   to 32bit pointer or when you e.g. access pt_regs
-   structure and want to consider 32bit registers only.
-   -
- */
-#define A(__x) ((unsigned long)(__x))
-#define AA(__x)				\
-({	unsigned long __ret;		\
-	__asm__ ("clrldi	%0, %0, 32"	\
-		 : "=r" (__ret)		\
-		 : "0" (__x));		\
-	__ret;				\
-})
-
-
 
 struct timespec32 {
 	s32    tv_sec;
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/smp.c linuxppc64_2_4/arch/ppc64/kernel/smp.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/smp.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/smp.c	Thu Apr 18 13:27:00 2002
@@ -43,8 +43,8 @@
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/smp.h>
-#include <asm/Naca.h>
-#include <asm/Paca.h>
+#include <asm/naca.h>
+#include <asm/paca.h>
 #include <asm/iSeries/LparData.h>
 #include <asm/iSeries/HvCall.h>
 #include <asm/iSeries/HvCallCfg.h>
@@ -78,8 +78,6 @@
 void smp_call_function_interrupt(void);
 void smp_message_pass(int target, int msg, unsigned long data, int wait);
 static unsigned long iSeries_smp_message[NR_CPUS];
-extern struct Naca *naca;
-extern struct Paca xPaca[];
 
 void xics_setup_cpu(void);
 void xics_cause_IPI(int cpu);
@@ -129,7 +127,7 @@
                     (target == i) || 
                     ((target == MSG_ALL_BUT_SELF) && (i != smp_processor_id())) ) {
 			set_bit( msg, &iSeries_smp_message[i] );
-			HvCall_sendIPI(&(xPaca[i]));
+			HvCall_sendIPI(&(paca[i]));
 		}
 	}
 }
@@ -140,8 +138,8 @@
 	struct ItLpPaca * lpPaca;
 
 	np = 0;
-        for (i=0; i < maxPacas; ++i) {
-                lpPaca = xPaca[i].xLpPacaPtr;
+        for (i=0; i < MAX_PACAS; ++i) {
+                lpPaca = paca[i].xLpPacaPtr;
                 if ( lpPaca->xDynProcStatus < 2 ) {
                         ++np;
                 }
@@ -156,11 +154,11 @@
 	struct ItLpPaca * lpPaca;
 
 	np = 0;
-	for (i=0; i < maxPacas; ++i) {
-		lpPaca = xPaca[i].xLpPacaPtr;
+	for (i=0; i < MAX_PACAS; ++i) {
+		lpPaca = paca[i].xLpPacaPtr;
 		if ( lpPaca->xDynProcStatus < 2 ) {
 			++np;
-			xPaca[i].next_jiffy_update_tb = xPaca[0].next_jiffy_update_tb;
+			paca[i].next_jiffy_update_tb = paca[0].next_jiffy_update_tb;
 		}
 	}
 	
@@ -173,18 +171,25 @@
 	struct ItLpPaca * lpPaca;
 	/* Verify we have a Paca for processor nr */
 	if ( ( nr <= 0 ) ||
-	     ( nr >= maxPacas ) )
+	     ( nr >= MAX_PACAS ) )
 		return;
 	/* Verify that our partition has a processor nr */
-	lpPaca = xPaca[nr].xLpPacaPtr;
+	lpPaca = paca[nr].xLpPacaPtr;
 	if ( lpPaca->xDynProcStatus >= 2 )
 		return;
+
+	/* The information for processor bringup must
+	 * be written out to main store before we release
+	 * the processor.
+	 */
+	mb();
+
 	/* The processor is currently spinning, waiting
 	 * for the xProcStart field to become non-zero
 	 * After we set xProcStart, the processor will
 	 * continue on to secondary_start in iSeries_head.S
 	 */
-	xPaca[nr].xProcStart = 1;
+	paca[nr].xProcStart = 1;
 }
 
 static void smp_iSeries_setup_cpu(int nr)
@@ -241,28 +246,44 @@
 {
 	/* Verify we have a Paca for processor nr */
 	if ( ( nr <= 0 ) ||
-	     ( nr >= maxPacas ) )
+	     ( nr >= MAX_PACAS ) )
 		return;
 
+	/* The information for processor bringup must
+	 * be written out to main store before we release
+	 * the processor.
+	 */
+	mb();
+
 	/* The processor is currently spinning, waiting
 	 * for the xProcStart field to become non-zero
 	 * After we set xProcStart, the processor will
 	 * continue on to secondary_start in iSeries_head.S
 	 */
-	xPaca[nr].xProcStart = 1;
+	paca[nr].xProcStart = 1;
 }
 
 extern struct gettimeofday_struct do_gtod;
 
+static void smp_space_timers( unsigned nr )
+{
+	unsigned long offset, i;
+	
+	offset = tb_ticks_per_jiffy / nr;
+	for ( i=1; i<nr; ++i ) {
+		paca[i].next_jiffy_update_tb = paca[i-1].next_jiffy_update_tb + offset;
+	}
+}
+
 static void
 smp_chrp_setup_cpu(int cpu_nr)
 {
 	static atomic_t ready = ATOMIC_INIT(1);
 	static volatile int frozen = 0;
 
-	if (_machine == _MACH_pSeriesLP) {
+	if (naca->platform == PLATFORM_PSERIES_LPAR) {
 		/* timebases already synced under the hypervisor. */
-		xPaca[cpu_nr].next_jiffy_update_tb = tb_last_stamp = get_tb();
+		paca[cpu_nr].next_jiffy_update_tb = tb_last_stamp = get_tb();
 		if (cpu_nr == 0) {
 			do_gtod.tb_orig_stamp = tb_last_stamp;
 			/* Should update do_gtod.stamp_xsec.
@@ -282,7 +303,7 @@
 			mb();
 			frozen = 1;
 			set_tb(0, 0);
-			xPaca[0].next_jiffy_update_tb = 0;
+			paca[0].next_jiffy_update_tb = 0;
 			smp_space_timers(smp_num_cpus);
 			while (atomic_read(&ready) < smp_num_cpus)
 				barrier();
@@ -391,17 +412,6 @@
 
 void smp_send_reschedule(int cpu)
 {
-	/*
-	 * This is only used if `cpu' is running an idle task,
-	 * so it will reschedule itself anyway...
-	 *
-	 * This isn't the case anymore since the other CPU could be
-	 * sleeping and won't reschedule until the next interrupt (such
-	 * as the timer).
-	 *  -- Cort
-	 */
-	/* This is only used if `cpu' is running an idle task,
-	   so it will reschedule itself anyway... */
 	smp_message_pass(cpu, PPC_MSG_RESCHEDULE, 0, 0);
 }
 
@@ -430,7 +440,7 @@
  * static memory requirements. It also looks cleaner.
  * Stolen from the i386 version.
  */
-static spinlock_t call_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+static spinlock_t call_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 
 static struct call_data_struct {
 	void (*func) (void *info);
@@ -540,22 +550,11 @@
 		atomic_inc(&call_data->finished);
 }
 
-static void smp_space_timers( unsigned nr )
-{
-	unsigned long offset, i;
-	
-	offset = tb_ticks_per_jiffy / nr;
-	for ( i=1; i<nr; ++i ) {
-		xPaca[i].next_jiffy_update_tb = xPaca[i-1].next_jiffy_update_tb + offset;
-	}
-}
-
 extern unsigned long decr_overclock;
 
 void __init smp_boot_cpus(void)
 {
 	extern struct current_set_struct current_set[];
-	struct Paca *paca;
 	extern void __secondary_start_chrp(void);
 	int i, cpu_nr;
 	struct task_struct *p;
@@ -578,9 +577,8 @@
 	init_idle();
 
 	for (i = 0; i < NR_CPUS; i++) {
-		paca = &xPaca[i];
-		paca->prof_counter=1;
-		paca->prof_multiplier = 1;
+		paca[i].prof_counter=1;
+		paca[i].prof_multiplier = 1;
 		if(i != 0) {
 		        /*
 			 * Processor 0's segment table is statically 
@@ -588,10 +586,10 @@
 			 * Other processor's tables are created and
 			 * initialized here.
 			 */
-			paca->xStab_data.virt = (unsigned long)&stab_array[PAGE_SIZE * (i-1)];
-			memset((void *)paca->xStab_data.virt, 0, PAGE_SIZE); 
-			paca->xStab_data.real = __v2a(paca->xStab_data.virt);
-			paca->default_decr = tb_ticks_per_jiffy / decr_overclock;
+			paca[i].xStab_data.virt = (unsigned long)&stab_array[PAGE_SIZE * (i-1)];
+			memset((void *)paca[i].xStab_data.virt, 0, PAGE_SIZE); 
+			paca[i].xStab_data.real = __v2a(paca[i].xStab_data.virt);
+			paca[i].default_decr = tb_ticks_per_jiffy / decr_overclock;
 		}
 	}
 
@@ -706,7 +704,7 @@
 	int cpu = current->processor;
 	
         smp_store_cpu_info(cpu);
-	set_dec(xPaca[cpu].default_decr);
+	set_dec(paca[cpu].default_decr);
 	cpu_callin_map[cpu] = 1;
 
 	ppc_md.smp_setup_cpu(cpu);
@@ -753,7 +751,7 @@
  */
 void __init smp_store_cpu_info(int id)
 {
-        xPaca[id].pvr = _get_PVR();
+        paca[id].pvr = _get_PVR();
 }
 
 static int __init maxcpus(char *str)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/stab.c linuxppc64_2_4/arch/ppc64/kernel/stab.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/stab.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/stab.c	Wed Apr 17 11:07:51 2002
@@ -14,15 +14,14 @@
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
-#include <asm/Paca.h>
-#include <asm/Naca.h>
+#include <asm/paca.h>
+#include <asm/naca.h>
 #include <asm/pmc.h>
 
 inline int make_ste(unsigned long stab, 
 		    unsigned long esid, unsigned long vsid);
 inline void make_slbe(unsigned long esid, unsigned long vsid,
 		      int large);
-extern struct Naca *naca;
 
 /*
  * Build an entry for the base kernel segment and put it into
@@ -253,7 +252,7 @@
 	}
 	
 	/* Kernel or user address? */
-	if (REGION_ID(ea) >= KERNEL_REGION_ID) {
+	if (REGION_ID(ea)) {
 		kernel_segment = 1;
 		vsid = get_kernel_vsid( ea );
 	} else {
@@ -331,7 +330,7 @@
 			    entry++, ste++) {
 				unsigned long ea;
 				ea = ste->dw0.dw0.esid << SID_SHIFT;
-				if (STAB_PRESSURE || ea < KERNELBASE) {
+				if (STAB_PRESSURE || (!REGION_ID(ea))) {
 					ste->dw0.dw0.v = 0;
 					PMC_SW_PROCESSOR(stab_invalidations); 
 				}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/sys_ppc32.c linuxppc64_2_4/arch/ppc64/kernel/sys_ppc32.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/sys_ppc32.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/sys_ppc32.c	Tue Apr  2 15:53:46 2002
@@ -64,6 +64,7 @@
 #include <linux/elf.h>
 #include <asm/ppcdebug.h>
 #include <asm/time.h>
+#include <asm/ppc32.h>
 
 extern unsigned long wall_jiffies;
 #define USEC_PER_SEC (1000000)
@@ -79,32 +80,6 @@
  * handler, even if the handler returns.
  */
 #define MSR_USERCHANGE	(MSR_FE0 | MSR_FE1)
-
-/* Use this to get at 32-bit user passed pointers. */
-/* Things to consider: the low-level assembly stub does
-   srl x, 0, x for first four arguments, so if you have
-   pointer to something in the first four arguments, just
-   declare it as a pointer, not u32. On the other side, 
-   arguments from 5th onwards should be declared as u32
-   for pointers, and need AA() around each usage.
-   A() macro should be used for places where you e.g.
-   have some internal variable u32 and just want to get
-   rid of a compiler warning. AA() has to be used in
-   places where you want to convert a function argument
-   to 32bit pointer or when you e.g. access pt_regs
-   structure and want to consider 32bit registers only.
-   -
- */
-#define A(__x) ((unsigned long)(__x))
-#define AA(__x)				\
-({	unsigned long __ret;		\
-	__asm__ ("clrldi	%0, %0, 32"	\
-		 : "=r" (__ret)		\
-		 : "0" (__x));		\
-	__ret;				\
-})
-
-
 
 /* In order to reduce some races, while at the same time doing additional
  * checking and hopefully speeding things up, we copy filenames to the
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/time.c linuxppc64_2_4/arch/ppc64/kernel/time.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/time.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/time.c	Tue Apr  9 11:23:18 2002
@@ -53,6 +53,7 @@
 #include <asm/nvram.h>
 #include <asm/cache.h>
 #include <asm/machdep.h>
+#include <asm/init.h>
 #ifdef CONFIG_PPC_ISERIES
 #include <asm/iSeries/HvCallXm.h>
 #endif
@@ -98,6 +99,10 @@
 extern unsigned long prof_shift;
 extern char _stext;
 
+void ppc_adjtimex(void);
+
+static unsigned adjusting_time = 0;
+
 static inline void ppc_do_profile (unsigned long nip)
 {
 	if (!prof_buffer)
@@ -245,8 +250,8 @@
 {
 	int next_dec;
 	unsigned long cur_tb;
-	struct Paca * paca = (struct Paca *)mfspr(SPRG3);
-	unsigned long cpu = paca->xPacaIndex;
+	struct paca_struct *lpaca = get_paca();
+	unsigned long cpu = lpaca->xPacaIndex;
 	struct ItLpQueue * lpq;
 
 	irq_enter(cpu);
@@ -256,30 +261,32 @@
 		ppc_do_profile(instruction_pointer(regs));
 #endif
 
-	paca->xLpPaca.xIntDword.xFields.xDecrInt = 0;
+	lpaca->xLpPaca.xIntDword.xFields.xDecrInt = 0;
 
-	while (paca->next_jiffy_update_tb <= (cur_tb = get_tb())) {
+	while (lpaca->next_jiffy_update_tb <= (cur_tb = get_tb())) {
 
 #ifdef CONFIG_SMP
 		smp_local_timer_interrupt(regs);
 #endif
 		if (cpu == 0) {
 			write_lock(&xtime_lock);
-			tb_last_stamp = paca->next_jiffy_update_tb;
+			tb_last_stamp = lpaca->next_jiffy_update_tb;
 			do_timer(regs);
 			timer_sync_xtime( cur_tb );
 			timer_check_rtc();
 			write_unlock(&xtime_lock);
+			if ( adjusting_time && (time_adjust == 0) )
+				ppc_adjtimex();
 		}
-		paca->next_jiffy_update_tb += tb_ticks_per_jiffy;
+		lpaca->next_jiffy_update_tb += tb_ticks_per_jiffy;
 	}
 	
-	next_dec = paca->next_jiffy_update_tb - cur_tb;
-	if (next_dec > paca->default_decr)
-        	next_dec = paca->default_decr;
+	next_dec = lpaca->next_jiffy_update_tb - cur_tb;
+	if (next_dec > lpaca->default_decr)
+        	next_dec = lpaca->default_decr;
 	set_dec(next_dec);
 
-	lpq = paca->lpQueuePtr;
+	lpq = lpaca->lpQueuePtr;
 	if (lpq && ItLpQueue_isLpIntPending(lpq))
 		lpEvent_count += ItLpQueue_process(lpq, regs); 
 
@@ -448,6 +455,7 @@
 	tb_last_stamp = get_tb();
 	do_gtod.tb_orig_stamp = tb_last_stamp;
 	do_gtod.varp = &do_gtod.vars[0];
+	do_gtod.var_idx = 0;
 	do_gtod.varp->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
 	do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
 	do_gtod.varp->tb_to_xs = tb_to_xs;
@@ -456,6 +464,8 @@
 	xtime_sync_interval = tb_ticks_per_sec - (tb_ticks_per_sec/8);
 	next_xtime_sync_tb = tb_last_stamp + xtime_sync_interval;
 
+	time_freq = 0;
+
 	xtime.tv_usec = 0;
 	last_rtc_update = xtime.tv_sec;
 	write_unlock_irqrestore(&xtime_lock, flags);
@@ -474,10 +484,12 @@
  * to microseconds to keep do_gettimeofday synchronized 
  * with ntpd.
 
- * Use the time_freq and time_offset computed by adjtimex to 
+ * Use the time_adjust, time_freq and time_offset computed by adjtimex to 
  * adjust the frequency.
 */
 
+/* #define DEBUG_PPC_ADJTIMEX 1 */
+
 void ppc_adjtimex(void)
 {
 	unsigned long den, new_tb_ticks_per_sec, tb_ticks, old_xsec, new_tb_to_xs, new_xsec, new_stamp_xsec;
@@ -486,7 +498,12 @@
 	struct div_result divres; 
 	unsigned long flags;
 	struct gettimeofday_vars * temp_varp;
+	unsigned temp_idx;
+	long singleshot_ppm = 0;
 
+	/* Compute parts per million frequency adjustment to accomplish the time adjustment
+	   implied by time_offset to be applied over the elapsed time indicated by time_constant.
+	   Use SHIFT_USEC to get it into the same units as time_freq. */
 	if ( time_offset < 0 ) {
 		ltemp = -time_offset;
 		ltemp <<= SHIFT_USEC - SHIFT_UPDATE;
@@ -498,8 +515,40 @@
 		ltemp <<= SHIFT_USEC - SHIFT_UPDATE;
 		ltemp >>= SHIFT_KG + time_constant;
 	}
-	delta_freq = time_freq + ltemp;
-
+	
+	/* If there is a single shot time adjustment in progress */
+	if ( time_adjust ) {
+#ifdef DEBUG_PPC_ADJTIMEX
+		printk("ppc_adjtimex: ");
+		if ( adjusting_time == 0 )
+			printk("starting ");
+		printk("single shot time_adjust = %ld\n", time_adjust);
+#endif	
+	
+		adjusting_time = 1;
+		
+		/* Compute parts per million frequency adjustment to match time_adjust */
+		singleshot_ppm = tickadj * HZ;	
+		/* The adjustment should be tickadj*HZ to match the code in linux/kernel/timer.c, but
+		   experiments show that this is too large. 3/4 of tickadj*HZ seems about right */
+		singleshot_ppm -= singleshot_ppm / 4;
+		/* Use SHIFT_USEC to get it into the same units as time_freq */	
+		singleshot_ppm <<= SHIFT_USEC;
+		if ( time_adjust < 0 )
+			singleshot_ppm = -singleshot_ppm;
+	}
+	else {
+#ifdef DEBUG_PPC_ADJTIMEX
+		if ( adjusting_time )
+			printk("ppc_adjtimex: ending single shot time_adjust\n");
+#endif
+		adjusting_time = 0;
+	}
+	
+	/* Add up all of the frequency adjustments */
+	delta_freq = time_freq + ltemp + singleshot_ppm;
+	
+	/* Compute a new value for tb_ticks_per_sec based on the frequency adjustment */
 	den = 1000000 * (1 << (SHIFT_USEC - 8));
 	if ( delta_freq < 0 ) {
 		tb_ticks_per_sec_delta = ( tb_ticks_per_sec * ( (-delta_freq) >> (SHIFT_USEC - 8))) / den;
@@ -509,6 +558,16 @@
 		tb_ticks_per_sec_delta = ( tb_ticks_per_sec * ( delta_freq >> (SHIFT_USEC - 8))) / den;
 		new_tb_ticks_per_sec = tb_ticks_per_sec - tb_ticks_per_sec_delta;
 	}
+	
+#ifdef DEBUG_PPC_ADJTIMEX
+	printk("ppc_adjtimex: ltemp = %ld, time_freq = %ld, singleshot_ppm = %ld\n", ltemp, time_freq, singleshot_ppm);
+	printk("ppc_adjtimex: tb_ticks_per_sec - base = %ld  new = %ld\n", tb_ticks_per_sec, new_tb_ticks_per_sec);
+#endif
+				
+	/* Compute a new value of tb_to_xs (used to convert tb to microseconds and a new value of 
+	   stamp_xsec which is the time (in 1/2^20 second units) corresponding to tb_orig_stamp.  This 
+	   new value of stamp_xsec compensates for the change in frequency (implied by the new tb_to_xs)
+	   which guarantees that the current time remains the same */ 
 	tb_ticks = get_tb() - do_gtod.tb_orig_stamp;
 	div128_by_32( 1024*1024, 0, new_tb_ticks_per_sec, &divres );
 	new_tb_to_xs = divres.result_low;
@@ -518,14 +577,23 @@
 	old_xsec = mulhdu( tb_ticks, do_gtod.varp->tb_to_xs );
 	new_stamp_xsec = do_gtod.varp->stamp_xsec + old_xsec - new_xsec;
 
-	if (do_gtod.varp == &do_gtod.vars[0])
+	/* There are two copies of tb_to_xs and stamp_xsec so that no lock is needed to access and use these
+	   values in do_gettimeofday.  We alternate the copies and as long as a reasonable time elapses between
+	   changes, there will never be inconsistent values.  ntpd has a minimum of one minute between updates */
+
+	if (do_gtod.var_idx == 0) {
 		temp_varp = &do_gtod.vars[1];
-	else
+		temp_idx  = 1;
+	}
+	else {
 		temp_varp = &do_gtod.vars[0];
+		temp_idx  = 0;
+	}
 	temp_varp->tb_to_xs = new_tb_to_xs;
 	temp_varp->stamp_xsec = new_stamp_xsec;
 	mb();
 	do_gtod.varp = temp_varp;
+	do_gtod.var_idx = temp_idx;
 
 	write_unlock_irqrestore( &xtime_lock, flags );
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/traps.c linuxppc64_2_4/arch/ppc64/kernel/traps.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/traps.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/traps.c	Wed Apr 10 08:13:48 2002
@@ -44,6 +44,9 @@
 extern int fix_alignment(struct pt_regs *);
 extern void bad_page_fault(struct pt_regs *, unsigned long);
 
+/* This is true if we are using the firmware NMI handler (typically LPAR) */
+extern int fwnmi_active;
+
 #ifdef CONFIG_XMON
 extern void xmon(struct pt_regs *regs);
 extern int xmon_bpt(struct pt_regs *regs);
@@ -98,13 +101,55 @@
 	force_sig(signr, current);
 }
 
+/* Get the error information for errors coming through the
+ * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
+ * the actual r3 if possible, and a ptr to the error log entry
+ * will be returned if found.
+ */
+static struct rtas_error_log *FWNMI_get_errinfo(struct pt_regs *regs)
+{
+	unsigned long errdata = regs->gpr[3];
+	struct rtas_error_log *errhdr = NULL;
+	unsigned long *savep;
+
+	if ((errdata >= 0x7000 && errdata < 0x7fff0) ||
+	    (errdata >= rtas.base && errdata < rtas.base + rtas.size - 16)) {
+		savep = __va(errdata);
+		regs->gpr[3] = savep[0];	/* restore original r3 */
+		errhdr = (struct rtas_error_log *)(savep + 1);
+	} else {
+		printk("FWNMI: corrupt r3\n");
+	}
+	return errhdr;
+}
+
+/* Call this when done with the data returned by FWNMI_get_errinfo.
+ * It will release the saved data area for other CPUs in the
+ * partition to receive FWNMI errors.
+ */
+static void FWNMI_release_errinfo(void)
+{
+	unsigned long ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
+	if (ret != 0)
+		printk("FWNMI: nmi-interlock failed: %ld\n", ret);
+}
+
 void
 SystemResetException(struct pt_regs *regs)
 {
-	udbg_printf("System Reset in kernel mode.\n");
-	printk("System Reset in kernel mode.\n");
+	char *msg = "System Reset in kernel mode.\n";
+	udbg_printf(msg); printk(msg);
+	if (fwnmi_active) {
+		unsigned long *r3 = __va(regs->gpr[3]); /* for FWNMI debug */
+		struct rtas_error_log *errlog;
+
+		msg = "FWNMI is active with save area at %016lx\n";
+		udbg_printf(msg, r3); printk(msg, r3);
+		errlog = FWNMI_get_errinfo(regs);
+	}
 #if defined(CONFIG_XMON)
 	xmon(regs);
+	udbg_printf("leaving xmon...\n");
 #endif
 	for(;;);
 }
@@ -113,6 +158,13 @@
 void
 MachineCheckException(struct pt_regs *regs)
 {
+	if (fwnmi_active) {
+		struct rtas_error_log *errhdr = FWNMI_get_errinfo(regs);
+		if (errhdr) {
+			/* ToDo: attempt to recover from some errors here */
+		}
+		FWNMI_release_errinfo();
+	}
 	if ( !user_mode(regs) )
 	{
 #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/udbg.c linuxppc64_2_4/arch/ppc64/kernel/udbg.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/udbg.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/udbg.c	Thu Apr 18 09:40:31 2002
@@ -19,13 +19,10 @@
 #define WANT_PPCDBG_TAB /* Only defined here */
 #include <asm/ppcdebug.h>
 #include <asm/processor.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 #include <asm/uaccess.h>
 #include <asm/machdep.h>
 
-extern struct Naca *naca;
-extern int _machine;
-
 struct NS16550 {
 	/* this struct must be packed */
 	unsigned char rbr;  /* 0 */
@@ -87,7 +84,7 @@
 				/* wait for idle */;
 			udbg_comport->thr = '\r'; eieio();
 		}
-	} else if ( _machine == _MACH_iSeries ) {
+	} else if (naca->platform == PLATFORM_ISERIES_LPAR) {
 		/* ToDo: switch this via ppc_md */
 		printk("%c", c);
 	}
@@ -181,7 +178,7 @@
 void
 udbg_printSP(const char *s)
 {
-	if (_machine == _MACH_pSeries) {
+	if (naca->platform == PLATFORM_PSERIES) {
 		unsigned long sp;
 		asm("mr %0,1" : "=r" (sp) :);
 		if (s)
@@ -209,10 +206,10 @@
 
 /* Special print used by PPCDBG() macro */
 void
-udbg_ppcdbg(unsigned long flags, const char *fmt, ...)
+udbg_ppcdbg(unsigned long debug_flags, const char *fmt, ...)
 {
 	unsigned long flags;
-	unsigned long active_debugs = flags & naca->debug_switch;
+	unsigned long active_debugs = debug_flags & naca->debug_switch;
 
 	if ( active_debugs ) {
 		va_list ap;
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/kernel/xics.c linuxppc64_2_4/arch/ppc64/kernel/xics.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/kernel/xics.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/kernel/xics.c	Thu Apr 18 09:40:17 2002
@@ -17,14 +17,12 @@
 #include <asm/io.h>
 #include <asm/pgtable.h>
 #include <asm/smp.h>
-#include <asm/Naca.h>
+#include <asm/naca.h>
 #include <asm/rtas.h>
 #include "i8259.h"
 #include "xics.h"
 #include <asm/ppcdebug.h>
 
-extern struct Naca *naca;
-
 void xics_enable_irq(u_int irq);
 void xics_disable_irq(u_int irq);
 void xics_mask_and_ack_irq(u_int irq);
@@ -256,6 +254,12 @@
 			mb();
 			smp_message_recv(PPC_MSG_RESCHEDULE, regs);
 		}
+#ifdef CONFIG_XMON
+		if (test_and_clear_bit(PPC_MSG_XMON_BREAK, &xics_ipi_message[cpu])) {
+			mb();
+			smp_message_recv(PPC_MSG_XMON_BREAK, regs);
+		}
+#endif
 	}
 }
 
@@ -355,7 +359,7 @@
 		xics_irq_8259_cascade = virt_irq_create_mapping(xics_irq_8259_cascade_real);
 	}
 
-	if (_machine == _MACH_pSeries) {
+	if (naca->platform == PLATFORM_PSERIES) {
 #ifdef CONFIG_SMP
 		for (i = 0; i < naca->processorCount; ++i) {
 			xics_info.per_cpu[i] =
@@ -369,7 +373,7 @@
 	/* actually iSeries does not use any of xics...but it has link dependencies
 	 * for now, except this new one...
 	 */
-	} else if (_machine == _MACH_pSeriesLP) {
+	} else if (naca->platform == PLATFORM_PSERIES_LPAR) {
 		ops = &pSeriesLP_ops;
 #endif
 	}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/lib/checksum.S linuxppc64_2_4/arch/ppc64/lib/checksum.S
--- ../kernel.org/linux-2.4.19/arch/ppc64/lib/checksum.S	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/lib/checksum.S	Wed Apr 10 12:22:43 2002
@@ -15,7 +15,7 @@
 #include <linux/sys.h>
 #include <asm/processor.h>
 #include <asm/errno.h>
-#include "../kernel/ppc_asm.tmpl"
+#include <asm/ppc_asm.tmpl>
 
 	.text
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/lib/string.S linuxppc64_2_4/arch/ppc64/lib/string.S
--- ../kernel.org/linux-2.4.19/arch/ppc64/lib/string.S	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/lib/string.S	Wed Apr 10 12:22:43 2002
@@ -8,7 +8,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
-#include "../kernel/ppc_asm.tmpl"
+#include <asm/ppc_asm.tmpl>
 #include <asm/processor.h>
 #include <asm/errno.h>
 
@@ -192,91 +192,6 @@
 8:	stbu	r4,1(r6)
 	bdnz	8b
 	blr
-
-_GLOBAL(bcopy)
-	mr	r6,r3
-	mr	r3,r4
-	mr	r4,r6
-	b	.memcpy
-
-/*
- * This version uses dcbz on the complete cache lines in the
- * destination area to reduce memory traffic.  This requires that
- * the destination area is cacheable.
- * We only use this version if the source and dest don't overlap.
- * -- paulus.
- */
-_GLOBAL(cacheable_memcpy)
-	add	r7,r3,r5		/* test if the src & dst overlap */
-	add	r8,r4,r5
-	cmplw	0,r4,r7
-	cmplw	1,r3,r8
-	crand	0,0,4			/* cr0.lt &= cr1.lt */
-	blt	.memcpy			/* if regions overlap */
-
-	addi	r4,r4,-4
-	addi	r6,r3,-4
-	neg	r0,r3
-	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
-	beq	58f
-
-	cmplw	0,r5,r0			/* is this more than total to do? */
-	blt	63f			/* if not much to do */
-	andi.	r8,r0,3			/* get it word-aligned first */
-	subf	r5,r0,r5
-	mtctr	r8
-	beq+	61f
-70:	lbz	r9,4(r4)		/* do some bytes */
-	stb	r9,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	70b
-61:	srwi.	r0,r0,2
-	mtctr	r0
-	beq	58f
-72:	lwzu	r9,4(r4)		/* do some words */
-	stwu	r9,4(r6)
-	bdnz	72b
-
-58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
-	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
-	li	r11,4
-	mtctr	r0
-	beq	63f
-53:
-	dcbz	r11,r6
-	COPY_16_BYTES
-#if CACHE_LINE_SIZE >= 32
-	COPY_16_BYTES
-#if CACHE_LINE_SIZE >= 64
-	COPY_16_BYTES
-	COPY_16_BYTES
-#if CACHE_LINE_SIZE >= 128
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-#endif
-#endif
-#endif
-	bdnz	53b
-
-63:	srwi.	r0,r5,2
-	mtctr	r0
-	beq	64f
-30:	lwzu	r0,4(r4)
-	stwu	r0,4(r6)
-	bdnz	30b
-
-64:	andi.	r0,r5,3
-	mtctr	r0
-	beq+	65f
-40:	lbz	r0,4(r4)
-	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	40b
-65:	blr
 
 _GLOBAL(memmove)
 	cmplw	0,r3,r4
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/mm/extable.c linuxppc64_2_4/arch/ppc64/mm/extable.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/mm/extable.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/mm/extable.c	Mon Apr  8 19:57:28 2002
@@ -41,8 +41,22 @@
 {
 	unsigned long ret;
 
+#ifndef CONFIG_MODULES
+	/* There is only the kernel to search.  */
 	ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr);
 	if (ret) return ret;
+#else
+	/* The kernel is the last "module" -- no need to treat it special.  */
+	struct module *mp;
+	for (mp = module_list; mp != NULL; mp = mp->next) {
+		if (mp->ex_table_start == NULL)
+			continue;
+		ret = search_one_table(mp->ex_table_start,
+				       mp->ex_table_end - 1, addr);
+		if (ret)
+			return ret;
+	}
+#endif
 
 	return 0;
 }
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/mm/init.c linuxppc64_2_4/arch/ppc64/mm/init.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/mm/init.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/mm/init.c	Tue Apr  9 11:24:07 2002
@@ -57,10 +57,8 @@
 #include <asm/smp.h>
 #include <asm/machdep.h>
 #include <asm/tlb.h>
-#include <asm/Naca.h>
-#ifdef CONFIG_PPC_EEH
+#include <asm/naca.h>
 #include <asm/eeh.h>
-#endif
 
 #include <asm/ppcdebug.h>
 
@@ -85,7 +83,6 @@
 extern char _start[], _end[];
 extern char _stext[], etext[];
 extern struct task_struct *current_set[NR_CPUS];
-extern struct Naca *naca;
 
 void mm_init_ppc64(void);
 
@@ -134,7 +131,6 @@
 {
 	int i,free = 0,total = 0,reserved = 0;
 	int shared = 0, cached = 0;
-	struct task_struct *p;
 
 	printk("Mem-info:\n");
 	show_free_areas();
@@ -158,49 +154,6 @@
 	printk("%d pages swap cached\n",cached);
 	printk("%d pages in page table cache\n",(int)pgtable_cache_size);
 	show_buffers();
-	printk("%-8s %3s %8s %8s %8s %9s %8s", "Process", "Pid",
-	       "Ctx", "Ctx<<4", "Last Sys", "pc", "task");
-#ifdef CONFIG_SMP
-	printk(" %3s", "CPU");
-#endif /* CONFIG_SMP */
-	printk("\n");
-	for_each_task(p)
-	{
-		printk("%-8.8s %3d %8ld %8ld %8ld %c%08lx %08lx ",
-		       p->comm,p->pid,
-		       (p->mm)?p->mm->context:0,
-		       (p->mm)?(p->mm->context<<4):0,
-		       p->thread.last_syscall,
-		       (p->thread.regs)?user_mode(p->thread.regs) ? 'u' : 'k' : '?',
-		       (p->thread.regs)?p->thread.regs->nip:0,
-		       (ulong)p);
-		{
-			int iscur = 0;
-#ifdef CONFIG_SMP
-			printk("%3d ", p->processor);
-			if ( (p->processor != NO_PROC_ID) &&
-			     (p == current_set[p->processor]) )
-			{
-				iscur = 1;
-				printk("current");
-			}
-#else
-			if ( p == current )
-			{
-				iscur = 1;
-				printk("current");
-			}
-			
-			if ( p == last_task_used_math )
-			{
-				if ( iscur )
-					printk(",");
-				printk("last math");
-			}			
-#endif /* CONFIG_SMP */
-			printk("\n");
-		}
-	}
 }
 
 void si_meminfo(struct sysinfo *val)
@@ -220,13 +173,11 @@
 #ifdef CONFIG_PPC_ISERIES
 	return (void*)addr;
 #else
-#ifdef CONFIG_PPC_EEH
 	if(mem_init_done && (addr >> 60UL)) {
 		if (IS_EEH_TOKEN_DISABLED(addr))
 			return IO_TOKEN_TO_ADDR(addr);
 		return (void*)addr; /* already mapped address or EEH token. */
 	}
-#endif
 	return __ioremap(addr, size, _PAGE_NO_CACHE);
 #endif
 }
@@ -323,7 +274,6 @@
 	}
 }
 
-#if 0
 void
 local_flush_tlb_all(void)
 {
@@ -332,7 +282,6 @@
 	 */
 	local_flush_tlb_range( NULL, VMALLOC_START, VMALLOC_END );
 }
-#endif
 
 void
 local_flush_tlb_mm(struct mm_struct *mm)
@@ -349,7 +298,6 @@
 		local_flush_tlb_range( mm, USER_START, USER_END );
 }
 
-
 /*
  * Callers should hold the mm->page_table_lock
  */
@@ -360,7 +308,6 @@
 	pgd_t *pgd;
 	pmd_t *pmd;
 	pte_t *ptep;
-	pte_t pte;
 	
 	switch( REGION_ID(vmaddr) ) {
 	case VMALLOC_REGION_ID:
@@ -378,16 +325,13 @@
 	
 	}
 
-
 	if (!pgd_none(*pgd)) {
 		pmd = pmd_offset(pgd, vmaddr);
 		if (!pmd_none(*pmd)) {
 			ptep = pte_offset(pmd, vmaddr);
 			/* Check if HPTE might exist and flush it if so */
-			pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
-			if ( pte_val(pte) & _PAGE_HASHPTE ) {
-				flush_hash_page(context, vmaddr, pte);
-			}
+			if (pte_val(*ptep) & _PAGE_HASHPTE)
+				flush_hash_page(context, vmaddr, ptep);
 		}
 	}
 }
@@ -398,7 +342,6 @@
 	pgd_t *pgd;
 	pmd_t *pmd;
 	pte_t *ptep;
-	pte_t pte;
 	unsigned long pgd_end, pmd_end;
 	unsigned long context;
 
@@ -439,11 +382,8 @@
 				if ( !pmd_none( *pmd ) ) {
 					ptep = pte_offset( pmd, start );
 					do {
-						if ( pte_val(*ptep) & _PAGE_HASHPTE ) {
-							pte = __pte(pte_update(ptep, _PAGE_HPTEFLAGS, 0));
-							if ( pte_val(pte) & _PAGE_HASHPTE )
-								flush_hash_page( context, start, pte );
-						}
+						if ( pte_val(*ptep) & _PAGE_HASHPTE )
+							flush_hash_page( context, start, ptep );
 						start += PAGE_SIZE;
 						++ptep;
 					} while ( start < pmd_end );
@@ -500,7 +440,7 @@
  * Do very early mm setup.
  */
 void __init mm_init_ppc64(void) {
-	struct Paca *paca;
+	struct paca_struct *lpaca;
 	unsigned long guard_page, index;
 
 	ppc_md.progress("MM:init", 0);
@@ -519,8 +459,8 @@
 
 	/* Setup guard pages for the Paca's */
 	for (index = 0; index < NR_CPUS; index++) {
-		paca = &xPaca[index];
-		guard_page = ((unsigned long)paca) + 0x1000;
+		lpaca = &paca[index];
+		guard_page = ((unsigned long)lpaca) + 0x1000;
 		ppc_md.hpte_updateboltedpp(PP_RXRX, guard_page);
 	}
 
@@ -611,6 +551,8 @@
 extern unsigned long dprof_len;
 extern unsigned int * dprof_buffer;
 
+void initialize_paca_hardware_interrupt_stack(void);
+
 void __init mem_init(void)
 {
 	extern char *sysmap; 
@@ -620,6 +562,7 @@
 	int datapages = 0;
 	int initpages = 0;
 	unsigned long va_rtas_base = (unsigned long)__va(rtas.base);
+
 	max_mapnr = max_low_pfn;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 	num_physpages = max_mapnr;	/* RAM is assumed contiguous */
@@ -661,8 +604,8 @@
 	       PAGE_OFFSET, (unsigned long)__va(lmb_end_of_DRAM()));
 	mem_init_done = 1;
 
-    /* set the last page of each hardware interrupt stack to be protected       */
-    initialize_paca_hardware_interrupt_stack();
+	/* set the last page of each hardware interrupt stack to be protected */
+	initialize_paca_hardware_interrupt_stack();
 
 #ifdef CONFIG_PPC_ISERIES
 	create_virtual_bus_tce_table();
@@ -672,8 +615,6 @@
 	prof_buffer = dprof_buffer;
 #endif
 }
-
-
 
 /*
  * This is called when a page has been modified by the kernel.
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/xmon/adb.c linuxppc64_2_4/arch/ppc64/xmon/adb.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/xmon/adb.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/xmon/adb.c	Wed Dec 31 18:00:00 1969
@@ -1,217 +0,0 @@
-/*
- * Copyright (C) 1996 Paul Mackerras.
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-#include "nonstdio.h"
-#include "privinst.h"
-
-#define scanhex	xmon_scanhex
-#define skipbl	xmon_skipbl
-
-#define ADB_B		(*(volatile unsigned char *)0xf3016000)
-#define ADB_SR		(*(volatile unsigned char *)0xf3017400)
-#define ADB_ACR		(*(volatile unsigned char *)0xf3017600)
-#define ADB_IFR		(*(volatile unsigned char *)0xf3017a00)
-
-static inline void eieio(void) { asm volatile ("eieio" : :); }
-
-#define N_ADB_LOG	1000
-struct adb_log {
-    unsigned char b;
-    unsigned char ifr;
-    unsigned char acr;
-    unsigned int time;
-} adb_log[N_ADB_LOG];
-int n_adb_log;
-
-void
-init_adb_log(void)
-{
-    adb_log[0].b = ADB_B;
-    adb_log[0].ifr = ADB_IFR;
-    adb_log[0].acr = ADB_ACR;
-    adb_log[0].time = get_dec();
-    n_adb_log = 0;
-}
-
-void
-dump_adb_log(void)
-{
-    unsigned t, t0;
-    struct adb_log *ap;
-    int i;
-
-    ap = adb_log;
-    t0 = ap->time;
-    for (i = 0; i <= n_adb_log; ++i, ++ap) {
-	t = t0 - ap->time;
-	printf("b=%x ifr=%x acr=%x at %d.%.7d\n", ap->b, ap->ifr, ap->acr,
-	       t / 1000000000, (t % 1000000000) / 100);
-    }
-}
-
-void
-adb_chklog(void)
-{
-    struct adb_log *ap = &adb_log[n_adb_log + 1];
-
-    ap->b = ADB_B;
-    ap->ifr = ADB_IFR;
-    ap->acr = ADB_ACR;
-    if (ap->b != ap[-1].b || (ap->ifr & 4) != (ap[-1].ifr & 4)
-	|| ap->acr != ap[-1].acr) {
-	ap->time = get_dec();
-	++n_adb_log;
-    }
-}
-
-int
-adb_bitwait(int bmask, int bval, int fmask, int fval)
-{
-    int i;
-    struct adb_log *ap;
-
-    for (i = 10000; i > 0; --i) {
-	adb_chklog();
-	ap = &adb_log[n_adb_log];
-	if ((ap->b & bmask) == bval && (ap->ifr & fmask) == fval)
-	    return 0;
-    }
-    return -1;
-}
-
-int
-adb_wait(void)
-{
-    if (adb_bitwait(0, 0, 4, 4) < 0) {
-	printf("adb: ready wait timeout\n");
-	return -1;
-    }
-    return 0;
-}
-
-void
-adb_readin(void)
-{
-    int i, j;
-    unsigned char d[64];
-
-    if (ADB_B & 8) {
-	printf("ADB_B: %x\n", ADB_B);
-	return;
-    }
-    i = 0;
-    adb_wait();
-    j = ADB_SR;
-    eieio();
-    ADB_B &= ~0x20;
-    eieio();
-    for (;;) {
-	if (adb_wait() < 0)
-	    break;
-	d[i++] = ADB_SR;
-	eieio();
-	if (ADB_B & 8)
-	    break;
-	ADB_B ^= 0x10;
-	eieio();
-    }
-    ADB_B |= 0x30;
-    if (adb_wait() == 0)
-	j = ADB_SR;
-    for (j = 0; j < i; ++j)
-	printf("%.2x ", d[j]);
-    printf("\n");
-}
-
-int
-adb_write(unsigned char *d, int i)
-{
-    int j;
-    unsigned x;
-
-    if ((ADB_B & 8) == 0) {
-	printf("r: ");
-	adb_readin();
-    }
-    for (;;) {
-	ADB_ACR = 0x1c;
-	eieio();
-	ADB_SR = d[0];
-	eieio();
-	ADB_B &= ~0x20;
-	eieio();
-	if (ADB_B & 8)
-	    break;
-	ADB_ACR = 0xc;
-	eieio();
-	ADB_B |= 0x20;
-	eieio();
-	adb_readin();
-    }
-    adb_wait();
-    for (j = 1; j < i; ++j) {
-	ADB_SR = d[j];
-	eieio();
-	ADB_B ^= 0x10;
-	eieio();
-	if (adb_wait() < 0)
-	    break;
-    }
-    ADB_ACR = 0xc;
-    eieio();
-    x = ADB_SR;
-    eieio();
-    ADB_B |= 0x30;
-    return j;
-}
-
-void
-adbcmds(void)
-{
-    char cmd;
-    unsigned rtcu, rtcl, dec, pdec, x;
-    int i, j;
-    unsigned char d[64];
-
-    cmd = skipbl();
-    switch (cmd) {
-    case 't':
-	for (;;) {
-	    rtcl = get_rtcl();
-	    rtcu = get_rtcu();
-	    dec = get_dec();
-	    printf("rtc u=%u l=%u dec=%x (%d = %d.%.7d)\n",
-		   rtcu, rtcl, dec, pdec - dec, (pdec - dec) / 1000000000,
-		   ((pdec - dec) % 1000000000) / 100);
-	    pdec = dec;
-	    if (cmd == 'x')
-		break;
-	    while (xmon_read(stdin, &cmd, 1) != 1)
-		;
-	}
-	break;
-    case 'r':
-	init_adb_log();
-	while (adb_bitwait(8, 0, 0, 0) == 0)
-	    adb_readin();
-	break;
-    case 'w':
-	i = 0;
-	while (scanhex(&x))
-	    d[i++] = x;
-	init_adb_log();
-	j = adb_write(d, i);
-	printf("sent %d bytes\n", j);
-	while (adb_bitwait(8, 0, 0, 0) == 0)
-	    adb_readin();
-	break;
-    case 'l':
-	dump_adb_log();
-	break;
-    }
-}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/xmon/start.c linuxppc64_2_4/arch/ppc64/xmon/start.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/xmon/start.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/xmon/start.c	Mon Apr  8 09:45:05 2002
@@ -7,14 +7,11 @@
  *      2 of the License, or (at your option) any later version.
  */
 #include <linux/string.h>
-#include <asm/semaphore.h>
+#include <linux/kernel.h>
+#include <linux/sysrq.h>
 #include <asm/machdep.h>
 #include <asm/io.h>
 #include <asm/page.h>
-#include <linux/adb.h>
-#include <linux/pmu.h>
-#include <linux/kernel.h>
-#include <linux/sysrq.h>
 #include <asm/prom.h>
 #include <asm/processor.h>
 
@@ -55,8 +52,6 @@
 }
 #endif
 
-extern int adb_init(void);
-
 static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs, struct kbd_struct *kbd, struct tty_struct *tty) 
 {
   xmon(pt_regs);
@@ -120,7 +115,6 @@
 }
 
 int xmon_wants_key;
-int xmon_adb_keycode;
 
 int
 xmon_read(void *handle, void *ptr, int nb)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/arch/ppc64/xmon/xmon.c linuxppc64_2_4/arch/ppc64/xmon/xmon.c
--- ../kernel.org/linux-2.4.19/arch/ppc64/xmon/xmon.c	Fri Apr 19 11:00:33 2002
+++ linuxppc64_2_4/arch/ppc64/xmon/xmon.c	Thu Apr 18 09:38:53 2002
@@ -12,6 +12,8 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/reboot.h>
 #include <asm/ptrace.h>
 #include <asm/string.h>
 #include <asm/prom.h>
@@ -20,15 +22,11 @@
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
-#include <asm/Naca.h>
-#include <asm/Paca.h>
+#include <asm/naca.h>
+#include <asm/paca.h>
+#include <asm/ppcdebug.h>
 #include "nonstdio.h"
 #include "privinst.h"
-#include <linux/mm.h>
-
-#include <asm/ppcdebug.h>
-
-#include <asm/Paca.h>
 
 #define scanhex	xmon_scanhex
 #define skipbl	xmon_skipbl
@@ -132,8 +130,6 @@
 static void mem_map_lock_pages(void);
 static void mem_map_check_hash(void);
 static void mem_check_dup_rpn (void);
-static void show_task(struct task_struct * p);
-static void xmon_show_state(void);
 static void debug_trace(void);
 
 extern int print_insn_big_powerpc(FILE *, unsigned long, unsigned long);
@@ -145,7 +141,6 @@
 extern int setjmp(u_int *);
 extern void longjmp(u_int *, int);
 extern unsigned long _ASR;
-extern struct Naca *naca;
 
 pte_t *find_linux_pte(pgd_t *pgdir, unsigned long va);	/* from htab.c */
 
@@ -181,6 +176,7 @@
   T	Enable/Disable PPCDBG flags\n\
   x	exit monitor\n\
   z	reboot\n\
+  Z	halt\n\
 ";
 
 static int xmon_trace[NR_CPUS];
@@ -532,7 +528,7 @@
 	int i;
 	struct bpt *bp;
 
-	if (_machine != _MACH_pSeries)
+	if (naca->platform != PLATFORM_PSERIES)
 		return;
 	bp = bpts;
 	for (i = 0; i < NBPTS; ++i, ++bp) {
@@ -563,7 +559,7 @@
 	struct bpt *bp;
 	unsigned instr;
 
-	if (_machine != _MACH_pSeries)
+	if (naca->platform != PLATFORM_PSERIES)
 		return;
 	if (!__is_processor(PV_POWER4)) {
 		set_dabr(0);
@@ -611,8 +607,13 @@
 		}
 		switch (cmd) {
 		case 'z':
+			printf("Rebooting machine now...");
 			machine_restart(NULL);
 			break;
+		case 'Z':
+			printf("Halting machine now...");
+			machine_halt();
+			break;
 		case 'm':
 			cmd = inchar();
 			switch (cmd) {
@@ -701,7 +702,7 @@
 			printf(help_string);
 			break;
 		case 'p':
-			xmon_show_state();
+			show_state();
 			break;
 		case 'b':
 			bpt_cmds();
@@ -1046,7 +1047,6 @@
 		            (funcname = "ret_from_syscall_1"))
 #if 0
 		    || stack[2] == (unsigned) &ret_from_syscall_2
-		    || stack[2] == (unsigned) &do_bottom_half_ret
 		    || stack[2] == (unsigned) &do_signal_ret
 #endif
 		    ) {
@@ -1203,7 +1203,7 @@
 
 	instrs[0] = 0x7c6002a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6);
 	instrs[1] = 0x4e800020;
-	opd[0] = instrs;
+	opd[0] = (unsigned long)instrs;
 	opd[1] = 0;
 	opd[2] = 0;
 	store_inst(instrs);
@@ -1222,7 +1222,7 @@
 
 	instrs[0] = 0x7c6003a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6);
 	instrs[1] = 0x4e800020;
-	opd[0] = instrs;
+	opd[0] = (unsigned long)instrs;
 	opd[1] = 0;
 	opd[2] = 0;
 	store_inst(instrs);
@@ -1249,7 +1249,7 @@
 {
 	int i, cmd;
 	unsigned long val;
-	struct Paca*  ptrPaca = NULL;
+	struct paca_struct*  ptrPaca = NULL;
 	struct ItLpPaca*  ptrLpPaca = NULL;
 	struct ItLpRegSave*  ptrLpRegSave = NULL;
 
@@ -1271,7 +1271,7 @@
 
 		// Dump out relevant Paca data areas.
 		printf("Paca: \n");
-		ptrPaca = (struct Paca*)get_sprg3();
+		ptrPaca = get_paca();
     
 		printf("  Local Processor Control Area (LpPaca): \n");
 		ptrLpPaca = ptrPaca->xLpPacaPtr;
@@ -2789,111 +2789,6 @@
 	}
 
 	printf("\nDone -------------------\n");
-}
-
-
-
-static void show_task(struct task_struct * p)
-{
-	/* unsigned long free = 0;  --Unused */
-	int state;
-	static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
-
-	printf("--------------------------------------------------------------------------\n");
-	printf("%-11.11s pid: %5.5lx ppid: %5.5lx state: ", 
-	       p->comm, p->pid, p->p_pptr->pid);
-	state = p->state ? ffz(~p->state) + 1 : 0;
-	if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
-		printf(stat_nam[state]);
-	else
-		printf(" ");
-	if (p == current)
-		printf(" pc: current task       ");
-	else
-		printf(" pc: 0x%16.16lx ", thread_saved_pc(&p->thread));
-
-	if (p->p_cptr)
-		printf("%5d ", p->p_cptr->pid);
-	else
-		printf("      ");
-	if (!p->mm)
-		printf(" (L-TLB) ");
-	else
-		printf(" (NOTLB) ");
-	if (p->p_ysptr)
-		printf("%7d", p->p_ysptr->pid);
-	else
-		printf("       ");
-	if (p->p_osptr)
-		printf(" %5d\n", p->p_osptr->pid);
-	else
-		printf("\n");
-
-	{
-		struct sigqueue *q;
-		char s[sizeof(sigset_t)*2+1], b[sizeof(sigset_t)*2+1]; 
-
-		render_sigset_t(&p->pending.signal, s);
-		render_sigset_t(&p->blocked, b);
-		printf("            sig: %d %s %s :", signal_pending(p), s, b);
-		for (q = p->pending.head; q ; q = q->next)
-			printf(" %d", q->info.si_signo);
-		printf(" X\n");
-	}
-
-	printf("            pers   : %lx  current : %lx", 
-	       p->personality, p);
-	printf("\n");
-
-	printf("            thread : 0x%16.16lx  ksp   : 0x%16.16lx\n", 
-	       &(p->thread), (p->thread.ksp));
-	printf("            pgdir : 0x%16.16lx\n", (p->thread.pgdir));
-	printf("            regs   : 0x%16.16lx  sysc  : 0x%16.16lx\n", 
-	       (p->thread.regs), (p->thread.last_syscall));
-	if(p->thread.regs) {
-	  printf("            nip    : 0x%16.16lx  msr   : 0x%16.16lx\n", 
-		 ((p->thread.regs)->nip), ((p->thread.regs)->msr)); 
-	  printf("            ctr    : 0x%16.16lx  link  : 0x%16.16lx\n", 
-		 ((p->thread.regs)->ctr), ((p->thread.regs)->link));
-	  printf("            xer    : 0x%16.16lx  ccr   : 0x%16.16lx\n", 
-		 ((p->thread.regs)->xer), ((p->thread.regs)->ccr)); 
-	  printf("            trap   : 0x%16.16lx\n", 
-		 ((p->thread.regs)->trap));
-	  printf("            dar    : 0x%16.16lx  dsis  : 0x%16.16lx\n", 
-		 ((p->thread.regs)->dar), ((p->thread.regs)->dsisr));
-	  printf("            rslt   : 0x%16.16lx  org3  : 0x%16.16lx\n", 
-		 ((p->thread.regs)->result), (p->thread.regs->orig_gpr3));
-	}
-
-	if(p->mm) {
-	  struct mm_struct *mm = p->mm; 
-	  printf("            mm     : 0x%16.16lx  pgd   : 0x%16.16lx\n", 
-		 mm, mm->pgd);
-	  printf("            context: 0x%16.16lx  mmap  : 0x%16.16lx\n", 
-		 mm->context, mm->mmap);
-	  
-	  printf("\n");
-	}
-
-}
-
-static void xmon_show_state(void)
-{
-	struct task_struct *p;
-
-#if (BITS_PER_LONG == 32)
-	printf("\n"
-	       "                         free                        sibling\n");
-	printf("task name   st PC              stack   pid father child younger older\n");
-#else
-	printf("\n"
-	       "                                 free                        sibling\n");
-	printf("  task                 PC        stack   pid father child younger older\n");
-#endif
-	read_lock(&tasklist_lock);
-	for_each_task(p)
-		show_task(p);
-	read_unlock(&tasklist_lock);
 }
 
 static void debug_trace(void) {
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/Makefile linuxppc64_2_4/drivers/Makefile
--- ../kernel.org/linux-2.4.19/drivers/Makefile	Fri Apr 19 11:00:20 2002
+++ linuxppc64_2_4/drivers/Makefile	Mon Apr 22 10:32:49 2002
@@ -8,7 +8,7 @@
 
 mod-subdirs :=	dio mtd sbus video macintosh usb input telephony sgi ide \
 		message/i2o message/fusion scsi md ieee1394 pnp isdn atm \
-		fc4 net/hamradio i2c acpi bluetooth
+		fc4 net/hamradio i2c acpi bluetooth iseries
 
 subdir-y :=	parport char block net sound misc media cdrom hotplug
 subdir-m :=	$(subdir-y)
@@ -25,6 +25,7 @@
 subdir-$(CONFIG_VT)		+= video
 subdir-$(CONFIG_MAC)		+= macintosh
 subdir-$(CONFIG_ALL_PPC)	+= macintosh
+subdir-$(CONFIG_PPC_ISERIES)	+= iseries
 subdir-$(CONFIG_USB)		+= usb
 subdir-$(CONFIG_INPUT)		+= input
 subdir-$(CONFIG_PHONE)		+= telephony
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/block/genhd.c linuxppc64_2_4/drivers/block/genhd.c
--- ../kernel.org/linux-2.4.19/drivers/block/genhd.c	Fri Apr 19 11:00:44 2002
+++ linuxppc64_2_4/drivers/block/genhd.c	Mon Apr 22 10:32:49 2002
@@ -224,6 +224,9 @@
 #ifdef CONFIG_VT
 	console_map_init();
 #endif
+#ifdef CONFIG_VIODASD
+	viodasd_init();
+#endif
 	return 0;
 }
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/block/ll_rw_blk.c linuxppc64_2_4/drivers/block/ll_rw_blk.c
--- ../kernel.org/linux-2.4.19/drivers/block/ll_rw_blk.c	Fri Apr 19 11:00:44 2002
+++ linuxppc64_2_4/drivers/block/ll_rw_blk.c	Mon Apr 22 10:32:49 2002
@@ -1345,6 +1345,9 @@
 #ifdef CONFIG_BLK_DEV_XD
 	xd_init();
 #endif
+#ifdef CONFIG_VIOCD
+	viocd_init();
+#endif
 #ifdef CONFIG_BLK_DEV_MFM
 	mfm_init();
 #endif
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/bluetooth/hci_h4.c linuxppc64_2_4/drivers/bluetooth/hci_h4.c
--- ../kernel.org/linux-2.4.19/drivers/bluetooth/hci_h4.c	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/bluetooth/hci_h4.c	Tue Apr 23 09:37:26 2002
@@ -25,7 +25,7 @@
 /*
  * BlueZ HCI UART(H4) protocol.
  *
- * $Id: hci_h4.c,v 1.1.1.1 2002/03/08 21:03:15 maxk Exp $    
+ * $Id: hci_h4.c,v 1.1 2002/04/23 14:37:26 tgall Exp $    
  */
 #define VERSION "1.1"
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/bluetooth/hci_h4.h linuxppc64_2_4/drivers/bluetooth/hci_h4.h
--- ../kernel.org/linux-2.4.19/drivers/bluetooth/hci_h4.h	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/bluetooth/hci_h4.h	Tue Apr 23 09:37:26 2002
@@ -23,7 +23,7 @@
 */
 
 /*
- * $Id: hci_h4.h,v 1.1.1.1 2002/03/08 21:03:15 maxk Exp $
+ * $Id: hci_h4.h,v 1.1 2002/04/23 14:37:26 tgall Exp $
  */
 
 #ifdef __KERNEL__
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/bluetooth/hci_ldisc.c linuxppc64_2_4/drivers/bluetooth/hci_ldisc.c
--- ../kernel.org/linux-2.4.19/drivers/bluetooth/hci_ldisc.c	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/bluetooth/hci_ldisc.c	Tue Apr 23 09:37:26 2002
@@ -25,7 +25,7 @@
 /*
  * BlueZ HCI UART driver.
  *
- * $Id: hci_ldisc.c,v 1.1.1.1 2002/03/08 21:03:15 maxk Exp $    
+ * $Id: hci_ldisc.c,v 1.1 2002/04/23 14:37:26 tgall Exp $    
  */
 #define VERSION "2.0"
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/bluetooth/hci_uart.h linuxppc64_2_4/drivers/bluetooth/hci_uart.h
--- ../kernel.org/linux-2.4.19/drivers/bluetooth/hci_uart.h	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/bluetooth/hci_uart.h	Tue Apr 23 09:37:26 2002
@@ -23,7 +23,7 @@
 */
 
 /*
- * $Id: hci_uart.h,v 1.1.1.1 2002/03/08 21:03:15 maxk Exp $
+ * $Id: hci_uart.h,v 1.1 2002/04/23 14:37:26 tgall Exp $
  */
 
 #ifndef N_HCI
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/bluetooth/hci_usb.h linuxppc64_2_4/drivers/bluetooth/hci_usb.h
--- ../kernel.org/linux-2.4.19/drivers/bluetooth/hci_usb.h	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/bluetooth/hci_usb.h	Tue Apr 23 09:37:26 2002
@@ -23,7 +23,7 @@
 */
 
 /*
- * $Id: hci_usb.h,v 1.2 2002/03/18 19:10:04 maxk Exp $
+ * $Id: hci_usb.h,v 1.1 2002/04/23 14:37:26 tgall Exp $
  */
 
 #ifdef __KERNEL__
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/bluetooth/hci_vhci.h linuxppc64_2_4/drivers/bluetooth/hci_vhci.h
--- ../kernel.org/linux-2.4.19/drivers/bluetooth/hci_vhci.h	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/bluetooth/hci_vhci.h	Tue Apr 23 09:37:26 2002
@@ -23,7 +23,7 @@
 */
 
 /*
- * $Id: hci_vhci.h,v 1.1.1.1 2002/03/08 21:03:15 maxk Exp $
+ * $Id: hci_vhci.h,v 1.1 2002/04/23 14:37:26 tgall Exp $
  */
 
 #ifndef __HCI_VHCI_H
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/cdrom/Makefile linuxppc64_2_4/drivers/cdrom/Makefile
--- ../kernel.org/linux-2.4.19/drivers/cdrom/Makefile	Fri Apr 19 10:30:25 2002
+++ linuxppc64_2_4/drivers/cdrom/Makefile	Thu Oct 11 11:10:49 2001
@@ -27,6 +27,7 @@
 obj-$(CONFIG_BLK_DEV_IDECD)	+=              cdrom.o
 obj-$(CONFIG_BLK_DEV_SR)	+=              cdrom.o
 obj-$(CONFIG_PARIDE_PCD)	+=		cdrom.o
+obj-$(CONFIG_VIOCD)		+=		cdrom.o
 
 obj-$(CONFIG_AZTCD)		+= aztcd.o
 obj-$(CONFIG_CDU31A)		+= cdu31a.o     cdrom.o
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/char/Config.in linuxppc64_2_4/drivers/char/Config.in
--- ../kernel.org/linux-2.4.19/drivers/char/Config.in	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/char/Config.in	Tue Apr 23 09:37:26 2002
@@ -45,6 +45,10 @@
    if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
       dep_tristate '  Multi-Tech multiport card support (EXPERIMENTAL)' CONFIG_ISI m
    fi
+   tristate '  IBM Multiport Serial Adapter' CONFIG_ICOM
+   if [ "$CONFIG_ICOM" = "y" ]; then
+      string 'Modem Country Code (Internal Modem Users only)' CONFIG_ICOM_MODEM_CC ""
+   fi
    tristate '  Microgate SyncLink card support' CONFIG_SYNCLINK
    tristate '  HDLC line discipline support' CONFIG_N_HDLC
    tristate '  SDL RISCom/8 card support' CONFIG_RISCOM8
@@ -134,6 +138,7 @@
    fi
    dep_tristate 'Support for user-space parallel port device drivers' CONFIG_PPDEV $CONFIG_PARPORT
 fi
+dep_bool 'pSeries Hypervisor Virtual Console support' CONFIG_HVC_CONSOLE $CONFIG_PPC64
 
 source drivers/i2c/Config.in
 
@@ -228,6 +233,9 @@
    dep_tristate 'Intel i8x0 Random Number Generator support' CONFIG_INTEL_RNG $CONFIG_PCI
 fi
 tristate '/dev/nvram support' CONFIG_NVRAM
+if [ "$CONFIG_PPC_ISERIES" != "y" ]; then
+   tristate 'Enhanced Real Time Clock Support' CONFIG_RTC
+fi
 tristate 'Enhanced Real Time Clock Support' CONFIG_RTC
 if [ "$CONFIG_IA64" = "y" ]; then
    bool 'EFI Real Time Clock Services' CONFIG_EFI_RTC
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/char/Makefile linuxppc64_2_4/drivers/char/Makefile
--- ../kernel.org/linux-2.4.19/drivers/char/Makefile	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/char/Makefile	Tue Apr 23 09:37:26 2002
@@ -164,6 +164,7 @@
 obj-$(CONFIG_COMPUTONE) += ip2.o ip2main.o
 obj-$(CONFIG_RISCOM8) += riscom8.o
 obj-$(CONFIG_ISI) += isicom.o
+obj-$(CONFIG_ICOM) += icom.o
 obj-$(CONFIG_ESPSERIAL) += esp.o
 obj-$(CONFIG_SYNCLINK) += synclink.o
 obj-$(CONFIG_N_HDLC) += n_hdlc.o
@@ -177,6 +178,7 @@
 obj-$(CONFIG_MVME147_SCC) += generic_serial.o vme_scc.o
 obj-$(CONFIG_MVME162_SCC) += generic_serial.o vme_scc.o
 obj-$(CONFIG_BVME6000_SCC) += generic_serial.o vme_scc.o
+obj-$(CONFIG_HVC_CONSOLE) += hvc_console.o
 obj-$(CONFIG_SERIAL_TX3912) += generic_serial.o serial_tx3912.o
 obj-$(CONFIG_TXX927_SERIAL) += serial_txx927.o
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/char/hvc_console.c linuxppc64_2_4/drivers/char/hvc_console.c
--- ../kernel.org/linux-2.4.19/drivers/char/hvc_console.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/drivers/char/hvc_console.c	Mon Apr 22 10:32:50 2002
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2001 Paul Mackerras <paulus@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/major.h>
+#include <linux/kernel.h>
+#include <linux/sysrq.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/sched.h>
+#include <linux/kbd_kern.h>
+#include <asm/uaccess.h>
+#include <linux/spinlock.h>
+
+extern int hvc_count(int *);
+extern int hvc_get_chars(int index, char *buf, int count);
+extern int hvc_put_chars(int index, const char *buf, int count);
+
+#define HVC_MAJOR	229
+#define HVC_MINOR	0
+
+#define MAX_NR_HVC_CONSOLES	4
+
+#define TIMEOUT		((HZ + 99) / 100)
+
+struct tty_driver hvc_driver;
+static int hvc_refcount;
+static struct tty_struct *hvc_table[MAX_NR_HVC_CONSOLES];
+static struct termios *hvc_termios[MAX_NR_HVC_CONSOLES];
+static struct termios *hvc_termios_locked[MAX_NR_HVC_CONSOLES];
+static int hvc_offset;
+#ifdef CONFIG_MAGIC_SYSRQ
+static int sysrq_pressed;
+#endif
+
+#define N_OUTBUF	16
+
+#define __ALIGNED__	__attribute__((__aligned__(8)))
+
+struct hvc_struct {
+	spinlock_t lock;
+	int index;
+	struct tty_struct *tty;
+	unsigned int count;
+	int do_wakeup;
+	char outbuf[N_OUTBUF] __ALIGNED__;
+	int n_outbuf;
+};
+
+struct hvc_struct hvc_struct[MAX_NR_HVC_CONSOLES];
+
+static int hvc_open(struct tty_struct *tty, struct file * filp)
+{
+	int line = MINOR(tty->device) - tty->driver.minor_start;
+	struct hvc_struct *hp;
+	unsigned long flags;
+
+	if (line < 0 || line >= MAX_NR_HVC_CONSOLES)
+		return -ENODEV;
+	hp = &hvc_struct[line];
+
+	tty->driver_data = hp;
+	spin_lock_irqsave(&hp->lock, flags);
+	hp->tty = tty;
+	hp->count++;
+	spin_unlock_irqrestore(&hp->lock, flags);
+
+	return 0;
+}
+
+static void hvc_close(struct tty_struct *tty, struct file * filp)
+{
+	struct hvc_struct *hp = tty->driver_data;
+	unsigned long flags;
+
+	if (tty_hung_up_p(filp))
+		return;
+	spin_lock_irqsave(&hp->lock, flags);
+	if (--hp->count == 0)
+		hp->tty = NULL;
+	else if (hp->count < 0)
+		printk(KERN_ERR "hvc_close %lu: oops, count is %d\n",
+		       hp - hvc_struct, hp->count);
+	spin_unlock_irqrestore(&hp->lock, flags);
+}
+
+/* called with hp->lock held */
+static void hvc_push(struct hvc_struct *hp)
+{
+	int n;
+
+	n = hvc_put_chars(hp->index + hvc_offset, hp->outbuf, hp->n_outbuf);
+	if (n <= 0) {
+		if (n == 0)
+			return;
+		/* throw away output on error; this happens when
+		   there is no session connected to the vterm. */
+		hp->n_outbuf = 0;
+	} else
+		hp->n_outbuf -= n;
+	if (hp->n_outbuf > 0)
+		memmove(hp->outbuf, hp->outbuf + n, hp->n_outbuf);
+	else
+		hp->do_wakeup = 1;
+}
+
+static int hvc_write(struct tty_struct *tty, int from_user,
+		     const unsigned char *buf, int count)
+{
+	struct hvc_struct *hp = tty->driver_data;
+	char *p;
+	int todo, written = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&hp->lock, flags);
+	while (count > 0 && (todo = N_OUTBUF - hp->n_outbuf) > 0) {
+		if (todo > count)
+			todo = count;
+		p = hp->outbuf + hp->n_outbuf;
+		if (from_user) {
+			todo -= copy_from_user(p, buf, todo);
+			if (todo == 0) {
+				if (written == 0)
+					written = -EFAULT;
+				break;
+			}
+		} else
+			memcpy(p, buf, todo);
+		count -= todo;
+		buf += todo;
+		hp->n_outbuf += todo;
+		written += todo;
+		hvc_push(hp);
+	}
+	spin_unlock_irqrestore(&hp->lock, flags);
+
+	return written;
+}
+
+static int hvc_write_room(struct tty_struct *tty)
+{
+	struct hvc_struct *hp = tty->driver_data;
+
+	return N_OUTBUF - hp->n_outbuf;
+}
+
+static int hvc_chars_in_buffer(struct tty_struct *tty)
+{
+	struct hvc_struct *hp = tty->driver_data;
+
+	return hp->n_outbuf;
+}
+
+static void hvc_poll(int index)
+{
+	struct hvc_struct *hp = &hvc_struct[index];
+	struct tty_struct *tty;
+	int i, n;
+	char buf[16] __ALIGNED__;
+	unsigned long flags;
+
+	spin_lock_irqsave(&hp->lock, flags);
+
+	if (hp->n_outbuf > 0)
+		hvc_push(hp);
+
+	tty = hp->tty;
+	if (tty) {
+		for (;;) {
+			if (TTY_FLIPBUF_SIZE - tty->flip.count < sizeof(buf))
+				break;
+			n = hvc_get_chars(index + hvc_offset, buf, sizeof(buf));
+			if (n <= 0)
+				break;
+			for (i = 0; i < n; ++i) {
+#ifdef CONFIG_MAGIC_SYSRQ		/* Handle the SysRq Hack */
+				if (buf[i] == '\x0f') {	/* ^O -- should support a sequence */
+					sysrq_pressed = 1;
+					continue;
+				} else if (sysrq_pressed) {
+					handle_sysrq(buf[i], NULL, NULL, tty);
+					sysrq_pressed = 0;
+					continue;
+				}
+#endif
+				tty_insert_flip_char(tty, buf[i], 0);
+			}
+		}
+		if (tty->flip.count)
+			tty_schedule_flip(tty);
+
+		if (hp->do_wakeup) {
+			hp->do_wakeup = 0;
+			if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP))
+			    && tty->ldisc.write_wakeup)
+				(tty->ldisc.write_wakeup)(tty);
+			wake_up_interruptible(&tty->write_wait);
+		}
+	}
+
+	spin_unlock_irqrestore(&hp->lock, flags);
+}
+
+int khvcd(void *unused)
+{
+	int i;
+
+	daemonize();
+	reparent_to_init();
+	strcpy(current->comm, "khvcd");
+	sigfillset(&current->blocked);
+
+	for (;;) {
+		for (i = 0; i < MAX_NR_HVC_CONSOLES; ++i)
+			hvc_poll(i);
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(TIMEOUT);
+	}
+}
+
+int __init hvc_init(void)
+{
+	int i;
+
+	memset(&hvc_driver, 0, sizeof(struct tty_driver));
+
+	hvc_driver.magic = TTY_DRIVER_MAGIC;
+	hvc_driver.driver_name = "hvc";
+	hvc_driver.name = "hvc/%d";
+	hvc_driver.major = HVC_MAJOR;
+	hvc_driver.minor_start = HVC_MINOR;
+	hvc_driver.num = hvc_count(&hvc_offset);
+	if (hvc_driver.num > MAX_NR_HVC_CONSOLES)
+		hvc_driver.num = MAX_NR_HVC_CONSOLES;
+	hvc_driver.type = TTY_DRIVER_TYPE_SYSTEM;
+	hvc_driver.init_termios = tty_std_termios;
+	hvc_driver.flags = TTY_DRIVER_REAL_RAW;
+	hvc_driver.refcount = &hvc_refcount;
+	hvc_driver.table = hvc_table;
+	hvc_driver.termios = hvc_termios;
+	hvc_driver.termios_locked = hvc_termios_locked;
+
+	hvc_driver.open = hvc_open;
+	hvc_driver.close = hvc_close;
+	hvc_driver.write = hvc_write;
+	hvc_driver.write_room = hvc_write_room;
+	hvc_driver.chars_in_buffer = hvc_chars_in_buffer;
+
+	for (i = 0; i < hvc_driver.num; i++) {
+		hvc_struct[i].lock = SPIN_LOCK_UNLOCKED;
+		hvc_struct[i].index = i;
+		tty_register_devfs(&hvc_driver, 0, hvc_driver.minor_start + i);
+	}
+
+	if (tty_register_driver(&hvc_driver))
+		panic("Couldn't register hvc console driver\n");
+
+	if (hvc_driver.num > 0)
+		kernel_thread(khvcd, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
+
+	return 0;
+}
+
+static void __exit hvc_exit(void)
+{
+}
+
+void hvc_console_print(struct console *co, const char *b, unsigned count)
+{
+	char c[16] __ALIGNED__;
+	unsigned i, n;
+	int r, donecr = 0;
+
+	i = n = 0;
+	while (count > 0 || i > 0) {
+		if (count > 0 && i < sizeof(c)) {
+			if (b[n] == '\n' && !donecr) {
+				c[i++] = '\r';
+				donecr = 1;
+			} else {
+				c[i++] = b[n++];
+				donecr = 0;
+				--count;
+			}
+		} else {
+			r = hvc_put_chars(co->index + hvc_offset, c, i);
+			if (r < 0) {
+				/* throw away chars on error */
+				i = 0;
+			} else if (r > 0) {
+				i -= r;
+				if (i > 0)
+					memmove(c, c+r, i);
+			}
+		}
+	}
+}
+
+static kdev_t hvc_console_device(struct console *c)
+{
+	return MKDEV(HVC_MAJOR, HVC_MINOR + c->index);
+}
+
+int hvc_wait_for_keypress(struct console *co)
+{
+	char c[16] __ALIGNED__;
+
+	while (hvc_get_chars(co->index, &c[0], 1) < 1)
+		;
+	return 0;
+}
+
+static int __init hvc_console_setup(struct console *co, char *options)
+{
+	if (co->index < 0 || co->index >= MAX_NR_HVC_CONSOLES
+	    || co->index >= hvc_count(&hvc_offset))
+		return -1;
+	return 0;
+}
+
+struct console hvc_con_driver = {
+	name:		"hvc",
+	write:		hvc_console_print,
+	device:		hvc_console_device,
+	setup:		hvc_console_setup,
+	flags:		CON_PRINTBUFFER,
+	index:		-1,
+};
+
+int __init hvc_console_init(void)
+{
+	register_console(&hvc_con_driver);
+	return 0;
+}
+
+module_init(hvc_init);
+module_exit(hvc_exit);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/char/icom.c linuxppc64_2_4/drivers/char/icom.c
--- ../kernel.org/linux-2.4.19/drivers/char/icom.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/drivers/char/icom.c	Mon Apr 15 11:14:27 2002
@@ -0,0 +1,3311 @@
+/*
+ * iCom.c
+ *
+ * Copyright (C) 2001 Michael Anderson, IBM Corporation
+ *
+ * Serial device driver.
+ *
+ * Based on code from serial.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *
+ * NOTE:  Only for users with internal modem cards (eg. iSeries 2771, 2772)
+ *        find the decimal number associated with your country below
+ *        and set country code operations using module parameter at install
+ *        time, eg.  for Argentina the command would be (be sure number is in "")
+ *                      insmod iCom.o iCom_country_code="52"
+ *
+ * Module paramter values for
+ *   iCom_country_code:
+                          52     * AR Argentina *
+                          52     * AW Aruba *
+                           1     * AU Australia *
+                          52     * AT Austria *
+                          52     * BH Bahrain *
+                          52     * BE Belgium *
+                          52     * BR Brazil *
+                          52     * BN Brunei Darussalam *
+                          52     * CA Canada *
+                          52     * KY Cayman Islands *
+                          52     * CL Chile *
+                          52     * CN China *
+                          52     * CO Colombia *
+                          52     * CR Costa Rica *
+                          52     * HR Croatia *
+                          52     * CY Cyprus *
+                          37     * CZ Czech Republic *
+                          52     * DK Denmark *
+                          52     * EC Ecuador *
+                          52     * EG Egypt *
+                          52     * FI Finland *
+                          52     * FR France *
+                          52     * DE Germany *
+                          52     * GR Greece *
+                          52     * GT Guatemala *
+                          48     * HK China (Hong Kong S.A.R.) *
+                          48     * HU Hungary *
+                          52     * IS Iceland *
+                          48     * IN India *
+                          48     * ID Indonesia *
+                          52     * IE Ireland *
+                          48     * IL Israel *
+                          52     * IT Italy *
+                          52     * JM Jamaica *
+                          16     * JP Japan *
+                          52     * KR Korea, Republic of *
+                          52     * LU Luxembourg *
+                          52     * MO China (Macau S.A.R.) *
+                          48     * MY Malaysia *
+                          52     * MX Mexico *
+                          52     * MA Morocco *
+                          52     * NL Netherlands *
+                          52     * AN Netherlands Antilles *
+                           9     * NZ New Zealand *
+                          52     * NO Norway *
+                          52     * PK Pakistan *
+                          52     * PA Panama *
+                          52     * PE Peru *
+                          48     * PH Philippines *
+                          48     * PL Poland *
+                          52     * PT Portugal *
+                          52     * QA Qatar *
+                          52     * RO Romania *
+                          52     * RU Russia *
+                          52     * SA Saudi Arabia *
+                          48     * SG Singapore *
+                          52     * SK Slovakia *
+                          48     * SI Slovenia *
+                          53     * ZA South Africa *
+                          52     * ES Spain *
+                          52     * LK Sri Lanka *
+                          52     * SE Sweden *
+                          52     * CH Switzerland *
+                          52     * TW Taiwan *
+                          52     * TH Thailand *
+                          52     * TT Trinidad and Tobago *
+                          52     * TR Turkey *
+                          52     * UA Ukraine *
+                          52     * AE United Arab Emirates *
+                          52     * GB United Kingdom *
+                          52     * US United States of America*
+                          52     * UY Uruguay *
+                          52     * VE Venezuela *
+                          48     * VN Vietnam *
+*/
+#define SERIAL_DO_RESTART
+#ifdef	MODVERSIONS
+#include <linux/modversions.h>
+#endif
+#include <linux/module.h>
+
+MODULE_AUTHOR ("Michael Anderson <mjanders@us.ibm.com>");
+MODULE_DESCRIPTION ("IBM iSeries Serial IOA driver");
+MODULE_SUPPORTED_DEVICE("IBM iSeries 2745, 2771, 2772 Communications adapters");
+MODULE_LICENSE("GPL");
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/termios.h>
+#include <linux/fs.h>
+#include <linux/tty_flip.h>
+#include <linux/serial.h>
+#include <linux/serial_reg.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/fcntl.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/malloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/spinlock.h>
+
+#include <asm/system.h>
+#include <asm/segment.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/bitops.h>
+#include <asm/serial.h>
+
+/* adapter code loads */
+#include "icom.h"
+
+#ifdef MODULE
+#define CONFIG_ICOM_MODEM_CC ""
+#endif
+
+static char *iCom_country_code = CONFIG_ICOM_MODEM_CC;
+MODULE_PARM(iCom_country_code, "s");
+MODULE_PARM_DESC(iCom_country_code, "Modem country code configuration");
+
+#define ICOM_TRACE /* enable port trace capabalities */
+
+#define DRIVER_NAME      "iCom"
+#define VENDOR_ID        0x1014
+#define DEVICE_ID        0x0031
+#define DEVICE_ID2       0x0219
+#define MAX_ADAPTERS     4
+#define NR_PORTS	 (active_adapters * 4)
+#define MAX_PORTS        (MAX_ADAPTERS * 4)
+#define ASYNC_CLOSING    0x08000000 /* Serial port is closing */
+#define ASYNC_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes 
+				   on the callout port */
+
+#ifdef MODULE
+static const struct pci_device_id iCom_pci_table[] __initdata =
+{
+	{
+		vendor: VENDOR_ID,
+		device: DEVICE_ID,
+		subvendor: 0xFFFF,
+		subdevice: 0xFFFF,
+	},
+	{
+		vendor: VENDOR_ID,
+		device: DEVICE_ID2,
+		subvendor: VENDOR_ID,
+		subdevice: 0x021a,
+	},
+	{
+		vendor: VENDOR_ID,
+		device: DEVICE_ID2,
+		subvendor: VENDOR_ID,
+		subdevice: 0x0251,
+	},
+	{
+		vendor: VENDOR_ID,
+		device: DEVICE_ID2,
+		subvendor: VENDOR_ID,
+		subdevice: 0x0252,
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, iCom_pci_table);
+#endif
+
+/*
+ * adapter defines and structures
+ */
+#define ICOM_CONTROL_START_A         0x00000008
+#define ICOM_CONTROL_STOP_A          0x00000004
+#define ICOM_CONTROL_START_B         0x00000002
+#define ICOM_CONTROL_STOP_B          0x00000001
+#define ICOM_CONTROL_START_C         0x00000008
+#define ICOM_CONTROL_STOP_C          0x00000004
+#define ICOM_CONTROL_START_D         0x00000002
+#define ICOM_CONTROL_STOP_D          0x00000001
+#define ICOM_IRAM_OFFSET             0x1000
+#define ICOM_DCE_IRAM_OFFSET         0x0A00
+#define ICOM_CABLE_ID_VALID          0x01
+#define ICOM_CABLE_ID_MASK           0xF0
+#define ICOM_DISABLE                 0x80
+#define CMD_XMIT_RCV_ENABLE          0xC0
+#define CMD_XMIT_ENABLE              0x40
+#define CMD_RCV_DISABLE              0x00
+#define CMD_RCV_ENABLE               0x80
+#define CMD_RESTART                  0x01
+#define CMD_HOLD_XMIT                0x02
+#define CMD_SND_BREAK                0x04
+#define RS232_CABLE                  0x06
+#define V24_CABLE                    0x0E
+#define V35_CABLE                    0x0C
+#define V36_CABLE                    0x02
+#define START_DOWNLOAD               0x80
+#define ICOM_INT_MASK_PRC_A          0x00003FFF
+#define ICOM_INT_MASK_PRC_B          0x3FFF0000
+#define ICOM_INT_MASK_PRC_C          0x00003FFF
+#define ICOM_INT_MASK_PRC_D          0x3FFF0000
+#define INT_RCV_COMPLETED            0x1000
+#define INT_XMIT_COMPLETED           0x2000
+#define INT_IDLE_DETECT              0x0800
+#define INT_RCV_DISABLED             0x0400
+#define INT_XMIT_DISABLED            0x0200
+#define INT_RCV_XMIT_SHUTDOWN        0x0100
+#define INT_FATAL_ERROR              0x0080
+#define INT_CABLE_PULL               0x0020
+#define INT_SIGNAL_CHANGE            0x0010
+#define HDLC_PPP_PURE_ASYNC          0x02
+#define HDLC_FF_FILL                 0x00
+#define HDLC_HDW_FLOW                0x01
+#define START_XMIT                   0x80
+#define ICOM_ACFG_DRIVE1             0x20
+#define ICOM_ACFG_NO_PARITY          0x00
+#define ICOM_ACFG_PARITY_ENAB        0x02
+#define ICOM_ACFG_PARITY_ODD         0x01
+#define ICOM_ACFG_8BPC               0x00
+#define ICOM_ACFG_7BPC               0x04
+#define ICOM_ACFG_6BPC               0x08
+#define ICOM_ACFG_5BPC               0x0C
+#define ICOM_ACFG_1STOP_BIT          0x00
+#define ICOM_ACFG_2STOP_BIT          0x10
+#define DTR                          0x80
+#define RTS                          0x40
+#define RI                           0x08
+#define DSR                          0x80
+#define DCD                          0x20
+#define CTS                          0x40
+
+#define BAUD_TABLE_LIMIT             20
+static int icom_acfg_baud[] = {
+  300,
+  600,
+  900,
+  1200,
+  1800,
+  2400,
+  3600,
+  4800,
+  7200,
+  9600,
+  14400,
+  19200,
+  28800,
+  38400,
+  57600,
+  76800,
+  115200,
+  153600,
+  230400,
+  307200,
+  460800};
+
+static int                active_adapters;
+static struct tty_driver  serial_driver;
+static int                serial_refcount = 0;
+static struct tty_struct *serial_table[MAX_PORTS];
+static struct termios    *serial_termios[MAX_PORTS];
+static struct termios    *serial_termios_locked[MAX_PORTS];
+
+struct iCom_regs {
+  u32                  control;        /* Adapter Control Register     */
+  u32                  interrupt;      /* Adapter Interrupt Register   */
+  u32                  int_mask;       /* Adapter Interrupt Mask Reg   */
+  u32                  int_pri;        /* Adapter Interrupt Priority r */
+  u32                  int_reg_b;      /* Adapter non-masked Interrupt */
+  u32                  resvd01;
+  u32                  resvd02;
+  u32                  resvd03;
+  u32                  control_2;      /* Adapter Control Register 2   */
+  u32                  interrupt_2;    /* Adapter Interrupt Register 2 */
+  u32                  int_mask_2;     /* Adapter Interrupt Mask 2     */
+  u32                  int_pri_2;      /* Adapter Interrupt Prior 2    */
+  u32                  int_reg_2b;     /* Adapter non-masked 2         */
+};
+
+struct func_dram {
+  u32                 reserved[108];          /* 0-1B0   reserved by personality code */
+  u32                 RcvStatusAddr;          /* 1B0-1B3 Status Address for Next rcv */
+  u8                  RcvStnAddr;             /* 1B4     Receive Station Addr */
+  u8                  IdleState;              /* 1B5     Idle State */
+  u8                  IdleMonitor;            /* 1B6     Idle Monitor */
+  u8                  FlagFillIdleTimer;      /* 1B7     Flag Fill Idle Timer */
+  u32                 XmitStatusAddr;         /* 1B8-1BB Transmit Status Address */
+  u8                  StartXmitCmd;           /* 1BC     Start Xmit Command */
+  u8                  HDLCConfigReg;          /* 1BD     Reserved */
+  u8                  CauseCode;              /* 1BE     Cause code for fatal error */
+  u8                  xchar;                  /* 1BF     High priority send */
+  u32                 reserved3;              /* 1C0-1C3 Reserved */
+  u8                  PrevCmdReg;             /* 1C4     Reserved */
+  u8                  CmdReg;                 /* 1C5     Command Register */
+  u8                  async_config2;          /* 1C6     Async Config Byte 2*/
+  u8                  async_config3;          /* 1C7     Async Config Byte 3*/
+  u8                  dce_resvd[20];          /* 1C8-1DB DCE Rsvd           */
+  u8                  dce_resvd21;            /* 1DC     DCE Rsvd (21st byte*/
+  u8                  misc_flags;             /* 1DD     misc flags         */
+#define V2_HARDWARE 0x40
+  u8                  call_length;            /* 1DE     Phone #/CFI buff ln*/
+  u8                  call_length2;           /* 1DF     Upper byte (unused)*/
+  u32                 call_addr;              /* 1E0-1E3 Phn #/CFI buff addr*/
+  u16                 timer_value;            /* 1E4-1E5 general timer value*/
+  u8                  timer_command;          /* 1E6     general timer cmd  */
+  u8                  dce_command;            /* 1E7     dce command reg    */
+  u8                  dce_cmd_status;         /* 1E8     dce command stat   */
+  u8                  x21_r1_ioff;            /* 1E9     dce ready counter  */
+  u8                  x21_r0_ioff;            /* 1EA     dce not ready ctr  */
+  u8                  x21_ralt_ioff;          /* 1EB     dce CNR counter    */
+  u8                  x21_r1_ion;             /* 1EC     dce ready I on ctr */
+  u8                  rsvd_ier;               /* 1ED     Rsvd for IER (if ne*/
+  u8                  ier;                    /* 1EE     Interrupt Enable   */
+  u8                  isr;                    /* 1EF     Input Signal Reg   */
+  u8                  osr;                    /* 1F0     Output Signal Reg  */
+  u8                  reset;                  /* 1F1     Reset/Reload Reg   */
+  u8                  disable;                /* 1F2     Disable Reg        */
+  u8                  sync;                   /* 1F3     Sync Reg           */
+  u8                  error_stat;             /* 1F4     Error Status       */
+  u8                  cable_id;               /* 1F5     Cable ID           */
+  u8                  cs_length;              /* 1F6     CS Load Length     */
+  u8                  mac_length;             /* 1F7     Mac Load Length    */
+  u32                 cs_load_addr;           /* 1F8-1FB Call Load PCI Addr */
+  u32                 mac_load_addr;          /* 1FC-1FF Mac Load PCI Addr  */
+};
+
+#define NUM_XBUFFS 1
+#define NUM_RBUFFS 2
+#define RCV_BUFF_SZ 0x0200
+#define XMIT_BUFF_SZ 0x1000
+struct statusArea
+{
+  /**********************************************/
+  /* Transmit Status Area                       */
+  /**********************************************/
+  struct {
+    u32                    leNext;         /* Next entry in Little Endian on Adapter */
+    u32                    leNextASD;
+    u32                    leBuffer;       /* Buffer for entry in LE for Adapter */
+    u16                    leLengthASD;
+    u16                    leOffsetASD;
+    u16                    leLength;       /* Length of data in segment */
+    u16                    flags;
+#define SA_FLAGS_DONE           0x0080          /* Done with Segment */
+#define SA_FLAGS_CONTINUED      0x8000          /* More Segments */
+#define SA_FLAGS_IDLE           0x4000          /* Mark IDLE after frm */
+#define SA_FLAGS_READY_TO_XMIT  0x0800
+#define SA_FLAGS_STAT_MASK      0x007F
+  } xmit[NUM_XBUFFS];
+    
+  /**********************************************/
+  /* Receive Status Area                        */
+  /**********************************************/
+  struct {
+    u32                    leNext;         /* Next entry in Little Endian on Adapter */
+    u32                    leNextASD;
+    u32                    leBuffer;       /* Buffer for entry in LE for Adapter */
+    u16                    WorkingLength;  /* size of segment */
+    u16                    reserv01;
+    u16                    leLength;       /* Length of data in segment */
+    u16                    flags;
+#define SA_FL_RCV_DONE           0x0010          /* Data ready */
+#define SA_FLAGS_OVERRUN         0x0040
+#define SA_FLAGS_PARITY_ERROR    0x0080 
+#define SA_FLAGS_FRAME_ERROR     0x0001
+#define SA_FLAGS_FRAME_TRUNC     0x0002
+#define SA_FLAGS_BREAK_DET       0x0004    /* set conditionally by device driver, not hardware */
+#define SA_FLAGS_RCV_MASK        0xFFE6
+  } rcv[NUM_RBUFFS];
+};
+
+struct iCom_port {
+  int                   open_active_count;
+  struct tty_struct 	*tty;
+  unsigned long	int     event;
+  struct tq_struct	tqueue;
+  int                   flags;
+  int                   xmit_fifo_size;
+  int                   baud_base;
+  wait_queue_head_t	close_wait;
+  wait_queue_head_t	open_wait;
+  wait_queue_head_t	delta_msr_wait;
+  int                   blocked_open;
+  unsigned short        close_delay;
+  unsigned short        closing_wait;
+  unsigned long int     timeout;
+  long			session; /* Session of opening process */
+  long			pgrp; /* pgrp of opening process */
+  unsigned char         read_status_mask;
+  unsigned char         ignore_status_mask;
+  struct async_icount	icount;	
+  struct termios	normal_termios;
+  struct termios	callout_termios;
+  unsigned long int     int_reg;
+  struct iCom_regs      *global_reg;
+  struct func_dram      *dram;
+  int                   adapter;
+  int                   port;
+  struct statusArea     *statStg;
+  dma_addr_t            statStg_pci;
+  u32                   *xmitRestart;
+  dma_addr_t            xmitRestart_pci;
+  unsigned char         *xmit_buf;
+  dma_addr_t            xmit_buf_pci;
+  unsigned char         *recv_buf;
+  dma_addr_t            recv_buf_pci;
+  int                   next_rcv;
+  int                   put_length;
+  int                   status;
+#define ICOM_PORT_ACTIVE  1
+#define ICOM_PORT_OFF     0
+  unsigned long         *trace_blk;
+};
+
+static struct iCom_adapter {
+  unsigned long int  base_addr;
+  unsigned char      irq_number;
+  struct pci_dev     *pci_dev;
+  struct iCom_port   port_info[4];
+  int                version;
+#define ADAPTER_V1   0x0001
+#define ADAPTER_V2   0x0002
+  unsigned long int  subsystem_id;
+#define FOUR_PORT_MODEL 0x02521014
+  int                numb_ports;
+} *iCom_adapter_info;
+
+
+static DECLARE_MUTEX(tmp_buf_sem);
+
+static spinlock_t iComlock;
+
+/*
+   Utility functions
+*/
+static void return_port_memory(struct iCom_port *iCom_port_info);
+static void iCom_wait_until_sent(struct tty_struct *tty, int timeout);
+static void do_softint(void *);
+static void iCom_start(struct tty_struct * tty);
+static void iCom_flush_buffer(struct tty_struct * tty);
+static void iCom_set_code(struct iCom_port *iCom_port_info);
+#ifdef ICOM_TRACE
+static void TRACE(struct iCom_port *,u32 , u32);
+#else
+#define TRACE(x,y,z) /* nub out calls to TRACE function */
+#endif
+
+#ifdef CONFIG_PPC64
+extern int register_ioctl32_conversion(unsigned int cmd,
+				       int (*handler)(unsigned int, unsigned int, unsigned long, struct file *));
+extern int unregister_ioctl32_conversion(unsigned int cmd);
+#else
+static inline int register_ioctl32_conversion(unsigned int cmd,
+					      int (*handler)(unsigned int,
+							     unsigned int, unsigned long, struct file *))
+{
+	return 0;
+}
+static inline int unregister_ioctl32_conversion(unsigned int cmd)
+{
+	return 0;
+}
+#endif
+
+static u8 iCom_readb(void *address)
+{
+	/* Issue a 'write memory barrier' prior to the mmio to ensure ordering */
+	/* This translates to an eieio instruction on ppc */
+	wmb();
+	return readb(address);
+}
+
+static void iCom_writeb(u8 value, void *address)
+{
+	/* Issue a 'memory barrier' prior to the mmio to ensure data is flushed
+	 to memory. This translates to a sync instruction on ppc */
+	mb();
+	writeb(value, address);
+}
+
+static u16 iCom_readw(void *address)
+{
+	/* Issue a 'write memory barrier' prior to the mmio to ensure ordering */
+	/* This translates to an eieio instruction on ppc */
+	wmb();
+	return readw(address);
+}
+
+static void iCom_writew(u16 value, void *address)
+{
+	/* Issue a 'memory barrier' prior to the mmio to ensure data is flushed
+	 to memory. This translates to a sync instruction on ppc */
+	mb();
+	writew(value, address);
+}
+
+static u32 iCom_readl(void *address)
+{
+	/* Issue a 'write memory barrier' prior to the mmio to ensure ordering */
+	/* This translates to an eieio instruction on ppc */
+	wmb();
+	return readl(address);
+}
+
+static void iCom_writel(u32 value, void *address)
+{
+	/* Issue a 'memory barrier' prior to the mmio to ensure data is flushed
+	 to memory. This translates to a sync instruction on ppc */
+	mb();
+	writel(value, address);
+}
+
+static int get_port_memory(struct iCom_port *iCom_port_info)
+{
+  int index;
+  int number_of_buffs;
+  unsigned long int stgAddr;
+  unsigned long int startStgAddr;
+  unsigned long int offset;
+
+  TRACE(iCom_port_info,TRACE_GET_PORT_MEM,0);
+
+  iCom_port_info->xmit_buf = (unsigned char *)kmalloc(4096,GFP_KERNEL | GFP_DMA);
+  iCom_port_info->xmit_buf_pci = pci_map_single(iCom_adapter_info[iCom_port_info->adapter].pci_dev,
+						(void *)iCom_port_info->xmit_buf,
+						4096,
+						PCI_DMA_BIDIRECTIONAL);
+
+  if (!iCom_port_info->xmit_buf) {
+    printk("iCom:  ERROR, Can not allocate Transmit buffer\n");
+    return -ENOMEM;
+  }
+  TRACE(iCom_port_info,TRACE_GET_PORT_MEM,(unsigned long)iCom_port_info->xmit_buf);
+
+  iCom_port_info->recv_buf = (unsigned char *)kmalloc(4096,GFP_KERNEL | GFP_DMA);
+  iCom_port_info->recv_buf_pci = pci_map_single(iCom_adapter_info[iCom_port_info->adapter].pci_dev,
+						(void *)iCom_port_info->recv_buf,
+						4096,
+						PCI_DMA_BIDIRECTIONAL);
+
+  if (!iCom_port_info->recv_buf) {
+    printk("iCom:  ERROR, Can not allocate Receive buffer\n");
+    return_port_memory(iCom_port_info);
+    return -ENOMEM;
+  }
+  TRACE(iCom_port_info,TRACE_GET_PORT_MEM,(unsigned long)iCom_port_info->recv_buf);
+
+  iCom_port_info->statStg = (struct statusArea *)kmalloc(4096,GFP_KERNEL | GFP_DMA);
+  iCom_port_info->statStg_pci = pci_map_single(iCom_adapter_info[iCom_port_info->adapter].pci_dev,
+					       (void *)iCom_port_info->statStg,
+					       4096,
+					       PCI_DMA_BIDIRECTIONAL);
+
+  if (!iCom_port_info->statStg) {
+    printk("iCom:  ERROR, Can not allocate Status buffer\n");
+    return_port_memory(iCom_port_info);
+    return -ENOMEM;
+  }
+  TRACE(iCom_port_info,TRACE_GET_PORT_MEM,(unsigned long)iCom_port_info->statStg);
+
+  iCom_port_info->xmitRestart = (u32 *)kmalloc(sizeof(u32),GFP_KERNEL | GFP_DMA);
+  iCom_port_info->xmitRestart_pci = pci_map_single(iCom_adapter_info[iCom_port_info->adapter].pci_dev,
+						   iCom_port_info->xmitRestart,
+						   4,
+						   PCI_DMA_BIDIRECTIONAL);
+
+  if (!iCom_port_info->xmitRestart) {
+    printk("iCom:  ERROR, Can not allocate xmit Restart buffer\n");
+    return_port_memory(iCom_port_info);
+    return -ENOMEM;
+  }
+
+  memset(iCom_port_info->statStg, 0,4096);
+
+  /* FODs */
+  number_of_buffs = NUM_XBUFFS;
+  stgAddr = (unsigned long int)iCom_port_info->statStg;
+  startStgAddr = stgAddr;
+  for (index = 0; index < number_of_buffs; index++)
+  {
+    TRACE(iCom_port_info,TRACE_FOD_ADDR,stgAddr);
+    stgAddr = stgAddr + sizeof(iCom_port_info->statStg->xmit[0]);
+    if (index < (number_of_buffs - 1))
+    {
+      iCom_port_info->statStg->xmit[index].flags = 0;
+      iCom_port_info->statStg->xmit[index].leNext = 0;
+      iCom_port_info->statStg->xmit[index].leNextASD = 0;
+      iCom_port_info->statStg->xmit[index].leLengthASD = (unsigned short int)cpu_to_le16(XMIT_BUFF_SZ);
+      iCom_port_info->statStg->xmit[index].leOffsetASD = 0;
+      TRACE(iCom_port_info,TRACE_FOD_ADDR,stgAddr);
+      TRACE(iCom_port_info,TRACE_FOD_XBUFF,(unsigned long)iCom_port_info->xmit_buf);
+      iCom_port_info->statStg->xmit[index].leBuffer = cpu_to_le32(iCom_port_info->xmit_buf_pci);
+    }
+    else if (index == (number_of_buffs - 1))
+    {
+      iCom_port_info->statStg->xmit[index].flags = 0;
+      iCom_port_info->statStg->xmit[index].leNext = 0;
+      iCom_port_info->statStg->xmit[index].leNextASD = 0;
+      iCom_port_info->statStg->xmit[index].leLengthASD = (unsigned short int)cpu_to_le16(XMIT_BUFF_SZ);
+      iCom_port_info->statStg->xmit[index].leOffsetASD = 0;
+      TRACE(iCom_port_info,TRACE_FOD_XBUFF,(unsigned long)iCom_port_info->xmit_buf);
+      iCom_port_info->statStg->xmit[index].leBuffer = cpu_to_le32(iCom_port_info->xmit_buf_pci);
+    }
+    else
+    {
+      iCom_port_info->statStg->xmit[index].flags = 0;
+      iCom_port_info->statStg->xmit[index].leNext = 0;
+      iCom_port_info->statStg->xmit[index].leNextASD = 0;
+      iCom_port_info->statStg->xmit[index].leLengthASD = 0;
+      iCom_port_info->statStg->xmit[index].leOffsetASD = 0;
+      iCom_port_info->statStg->xmit[index].leBuffer = 0;
+    }
+  }
+  /* FIDs */
+  startStgAddr = stgAddr;
+
+  /* fill in every entry, even if no buffer */
+  number_of_buffs = NUM_RBUFFS;
+  for (index = 0; index < number_of_buffs; index++)
+  {
+    TRACE(iCom_port_info,TRACE_FID_ADDR,stgAddr);
+    stgAddr = stgAddr + sizeof(iCom_port_info->statStg->rcv[0]);
+    iCom_port_info->statStg->rcv[index].leLength = 0;
+    iCom_port_info->statStg->rcv[index].WorkingLength = (unsigned short int)cpu_to_le16(RCV_BUFF_SZ); 
+    if (index < (number_of_buffs - 1))
+    {
+      offset = stgAddr - (unsigned long)iCom_port_info->statStg;
+      iCom_port_info->statStg->rcv[index].leNext = (unsigned long)cpu_to_le32(iCom_port_info->statStg_pci + offset);
+      TRACE(iCom_port_info,TRACE_FID_RBUFF,(unsigned long)iCom_port_info->recv_buf);
+      iCom_port_info->statStg->rcv[index].leBuffer = cpu_to_le32(iCom_port_info->recv_buf_pci);
+    }
+    else if (index == (number_of_buffs - 1))
+    {
+      offset = startStgAddr - (unsigned long)iCom_port_info->statStg;
+      iCom_port_info->statStg->rcv[index].leNext = (unsigned long)cpu_to_le32(iCom_port_info->statStg_pci + offset);
+      TRACE(iCom_port_info,TRACE_FID_RBUFF,(unsigned long)iCom_port_info->recv_buf + 2048);
+      iCom_port_info->statStg->rcv[index].leBuffer = cpu_to_le32(iCom_port_info->recv_buf_pci + 2048);
+    }
+    else
+    {
+      iCom_port_info->statStg->rcv[index].leNext = 0;
+      iCom_port_info->statStg->rcv[index].leBuffer = 0;
+    }
+  }
+
+  return 0;
+}
+
+static void return_port_memory(struct iCom_port *iCom_port_info)
+{
+  TRACE(iCom_port_info, TRACE_RET_PORT_MEM,0);
+  if (iCom_port_info->recv_buf) {
+    pci_unmap_single(iCom_adapter_info[iCom_port_info->adapter].pci_dev,
+		     iCom_port_info->recv_buf_pci,
+		     4096,
+		     PCI_DMA_BIDIRECTIONAL);
+    kfree((void *)iCom_port_info->recv_buf);
+    iCom_port_info->recv_buf = 0;
+  }
+  if (iCom_port_info->xmit_buf) {
+    pci_unmap_single(iCom_adapter_info[iCom_port_info->adapter].pci_dev,
+		     iCom_port_info->xmit_buf_pci,
+		     4096,
+		     PCI_DMA_BIDIRECTIONAL);
+    kfree((void *)iCom_port_info->xmit_buf);
+    iCom_port_info->xmit_buf = 0;
+  }
+  if (iCom_port_info->statStg) {
+    pci_unmap_single(iCom_adapter_info[iCom_port_info->adapter].pci_dev,
+		     iCom_port_info->statStg_pci,
+		     4096,
+		     PCI_DMA_BIDIRECTIONAL);
+    kfree((void *)iCom_port_info->statStg);
+    iCom_port_info->statStg = 0;
+  }
+
+  if (iCom_port_info->xmitRestart) {
+    pci_unmap_single(iCom_adapter_info[iCom_port_info->adapter].pci_dev,
+		     iCom_port_info->xmitRestart_pci,
+		     4,
+		     PCI_DMA_BIDIRECTIONAL);
+    kfree(iCom_port_info->xmitRestart);
+    iCom_port_info->xmitRestart = 0;
+  }
+  TRACE(iCom_port_info,TRACE_RET_MEM,0);
+}
+
+static void stop_processor(struct iCom_port *iCom_port_info)
+{
+  unsigned long      temp;
+
+  switch (iCom_port_info->port) {
+  case 0:
+    temp = iCom_readl(&iCom_port_info->global_reg->control);
+    temp = (temp & ~ICOM_CONTROL_START_A) | ICOM_CONTROL_STOP_A;
+    iCom_writel(temp,&iCom_port_info->global_reg->control);
+    TRACE(iCom_port_info,TRACE_STOP_PROC_A,0);
+    break;
+  case 1:
+    temp = iCom_readl(&iCom_port_info->global_reg->control);
+    temp = (temp & ~ICOM_CONTROL_START_B) | ICOM_CONTROL_STOP_B;
+    iCom_writel(temp,&iCom_port_info->global_reg->control);
+    TRACE(iCom_port_info,TRACE_STOP_PROC_B,0);
+    break;
+  case 2:
+    temp = iCom_readl(&iCom_port_info->global_reg->control_2);
+    temp = (temp & ~ICOM_CONTROL_START_C) | ICOM_CONTROL_STOP_C;
+    iCom_writel(temp,&iCom_port_info->global_reg->control_2);
+    TRACE(iCom_port_info,TRACE_STOP_PROC_C,0);
+    break;
+  case 3:
+    temp = iCom_readl(&iCom_port_info->global_reg->control_2);
+    temp = (temp & ~ICOM_CONTROL_START_D) | ICOM_CONTROL_STOP_D;
+    iCom_writel(temp,&iCom_port_info->global_reg->control_2);
+    TRACE(iCom_port_info,TRACE_STOP_PROC_D,0);
+    break;
+  default:
+    printk("iCom:  ERROR:  invalid port assignment\n");
+  }
+}
+
+static void start_processor(struct iCom_port *iCom_port_info)
+{
+  unsigned long      temp;
+
+  switch (iCom_port_info->port) {
+  case 0:
+    temp = iCom_readl(&iCom_port_info->global_reg->control);
+    temp = (temp & ~ICOM_CONTROL_STOP_A) | ICOM_CONTROL_START_A;
+    iCom_writel(temp,&iCom_port_info->global_reg->control);
+    TRACE(iCom_port_info,TRACE_START_PROC_A,0);
+    break;
+  case 1:
+    temp = iCom_readl(&iCom_port_info->global_reg->control);
+    temp = (temp & ~ICOM_CONTROL_STOP_B) | ICOM_CONTROL_START_B;
+    iCom_writel(temp,&iCom_port_info->global_reg->control);
+    TRACE(iCom_port_info,TRACE_START_PROC_B,0);
+    break;
+  case 2:
+    temp = iCom_readl(&iCom_port_info->global_reg->control_2);
+    temp = (temp & ~ICOM_CONTROL_STOP_C) | ICOM_CONTROL_START_C;
+    iCom_writel(temp,&iCom_port_info->global_reg->control_2);
+    TRACE(iCom_port_info,TRACE_START_PROC_C,0);
+    break;
+  case 3:
+    temp = iCom_readl(&iCom_port_info->global_reg->control_2);
+    temp = (temp & ~ICOM_CONTROL_STOP_D) | ICOM_CONTROL_START_D;
+    iCom_writel(temp,&iCom_port_info->global_reg->control_2);
+    TRACE(iCom_port_info,TRACE_START_PROC_D,0);
+    break;
+  default:
+    printk("iCom:  ERROR: invalid port assignment\n");
+  }
+}
+
+/*
+ * irq = no lock
+ */
+static int loadCode (struct iCom_port *iCom_port_info)
+{
+  char               *iram_ptr;
+  int                index;
+  int                status = 0;
+  char               *dram_ptr = (char *)iCom_port_info->dram;
+  unsigned long int  temp;
+  unsigned char      *new_page;
+
+  TRACE(iCom_port_info,TRACE_GET_MEM,0); /* this really gets memory for trace */
+  TRACE(iCom_port_info,TRACE_LOAD_MEM,0);
+
+  /* Clear out any pending interrupts */
+  iCom_writew(0x3FFF,(void *)iCom_port_info->int_reg);
+
+  TRACE(iCom_port_info,TRACE_CLEAR_INTERRUPTS,0);
+
+  /* Stop processor */
+  stop_processor(iCom_port_info);
+
+  /* Zero out DRAM */
+  for (index = 0; index < 512; index++)
+  {
+    iCom_writeb(0x00,&dram_ptr[index]);
+  }
+
+  /* Load Call Setup into Adapter */
+  iram_ptr = (char *)iCom_port_info->dram + ICOM_IRAM_OFFSET;
+  for (index = 0; index < sizeof(callSetup); index++)
+  {
+    iCom_writeb(callSetup[index],&iram_ptr[index]);
+  }
+
+  /* Load Resident DCE portion of Adapter */
+  iram_ptr = (char *) iCom_port_info->dram + ICOM_IRAM_OFFSET +
+    ICOM_DCE_IRAM_OFFSET;
+
+  /* Load the RV dce code */
+  for (index = 0; index < sizeof(resRVdce); index++) {
+    iCom_writeb(resRVdce[index],&iram_ptr[index]);
+  }
+
+  /* Set Hardware level */
+  if ((iCom_adapter_info[iCom_port_info->adapter].version | ADAPTER_V2) == ADAPTER_V2) {
+    iCom_writeb(V2_HARDWARE,&(iCom_port_info->dram->misc_flags));
+  }
+
+  /* Start the processor in Adapter */
+  start_processor(iCom_port_info);
+
+  /* Wait 0.1 Sec for simple Init to complete */
+  current->state = TASK_INTERRUPTIBLE;
+  schedule_timeout(HZ/10);
+
+  /*
+   * Verify Code is running
+   */
+  status = 0;
+
+  iCom_writeb((HDLC_PPP_PURE_ASYNC | HDLC_FF_FILL),&(iCom_port_info->dram->HDLCConfigReg));
+  iCom_writeb(0x04,&(iCom_port_info->dram->FlagFillIdleTimer)); /* 0.5 seconds */
+  iCom_writeb(0x00,&(iCom_port_info->dram->CmdReg));
+  iCom_writeb(0x10,&(iCom_port_info->dram->async_config3));
+  iCom_writeb((ICOM_ACFG_DRIVE1 | ICOM_ACFG_NO_PARITY | ICOM_ACFG_8BPC | ICOM_ACFG_1STOP_BIT),&(iCom_port_info->dram->async_config2));
+
+  /*Set up data in iCom DRAM to indicate where personality
+   *code is located and its length.
+   */
+  new_page = (unsigned char *)kmalloc(4096,GFP_KERNEL | GFP_DMA);
+  for (index = 0; index < sizeof(funcLoad); index++) {
+    new_page[index] = funcLoad[index];
+  }
+  temp = pci_map_single(iCom_adapter_info[iCom_port_info->adapter].pci_dev,new_page,4096,PCI_DMA_BIDIRECTIONAL);
+
+  iCom_writeb((char)(sizeof(funcLoad)/16),&iCom_port_info->dram->mac_length);
+  iCom_writel(temp,&iCom_port_info->dram->mac_load_addr);
+
+  /*Setting the syncReg to 0x80 causes adapter to start downloading
+    the personality code into adapter instruction RAM.
+    Once code is loaded, it will begin executing and, based on
+    information provided above, will start DMAing data from
+    shared memory to adapter DRAM.
+  */
+  iCom_writeb(START_DOWNLOAD,&iCom_port_info->dram->sync);
+
+  /* Wait 1 Sec for data download */
+  current->state = TASK_INTERRUPTIBLE;
+  schedule_timeout(HZ);
+  pci_unmap_single(iCom_adapter_info[iCom_port_info->adapter].pci_dev,temp,4096,PCI_DMA_BIDIRECTIONAL);
+  kfree(new_page);
+  
+  if (status != 0)
+  {
+    /* Clear out any pending interrupts */
+    iCom_writew(0x3FFF,(void *)iCom_port_info->int_reg);
+
+    /* Turn off port */
+    iCom_writeb(ICOM_DISABLE,&iCom_port_info->dram->disable);
+  }
+
+  return status;
+}
+
+/*
+ * This routine is called to set the port to match
+ * the specified baud rate for a serial port.
+ * irq = locked
+ */
+static void change_speed(struct iCom_port *iCom_port_info,
+			 struct termios *old_termios, unsigned long flags)
+{
+  int	         baud;
+  unsigned       cflag;
+  int	         bits;
+  char           new_config2;
+  char           new_config3;
+  char           tmp_byte;
+  int            index;
+  int            rcv_buff,xmit_buff;
+  unsigned long int offset;
+
+  TRACE(iCom_port_info,TRACE_CHANGE_SPEED | TRACE_TIME,jiffies);
+
+  if (!iCom_port_info->tty || !iCom_port_info->tty->termios)
+    return;
+  cflag = iCom_port_info->tty->termios->c_cflag;
+
+  new_config2 = ICOM_ACFG_DRIVE1;
+
+  /* byte size and parity */
+  switch (cflag & CSIZE) {
+    case CS5: /* 5 bits/char */
+      new_config2 |= ICOM_ACFG_5BPC;
+      bits = 7;
+      break;
+    case CS6: /* 6 bits/char */
+      new_config2 |= ICOM_ACFG_6BPC;
+      bits = 8;
+      break;
+    case CS7: /* 7 bits/char */
+      new_config2 |= ICOM_ACFG_7BPC;
+      bits = 9;
+      break;
+    case CS8: /* 8 bits/char */
+      new_config2 |= ICOM_ACFG_8BPC;
+      bits = 10;
+      break;
+    default:  bits = 10;  break;
+  }
+  if (cflag & CSTOPB) {
+    /* 2 stop bits */
+    new_config2 |= ICOM_ACFG_2STOP_BIT;
+    bits++;
+  }
+  if (cflag & PARENB) {
+    /* parity bit enabled */
+    new_config2 |= ICOM_ACFG_PARITY_ENAB;
+    TRACE(iCom_port_info, TRACE_PARENB,0);
+    bits++;
+  }
+  if (cflag & PARODD) {
+    /* odd parity */
+    new_config2 |= ICOM_ACFG_PARITY_ODD;
+    TRACE(iCom_port_info, TRACE_PARODD,0);
+  }
+
+  /* Determine divisor based on baud rate */
+  baud = tty_get_baud_rate(iCom_port_info->tty);
+  if (!baud)
+    baud = 9600;	/* B0 transition handled in rs_set_termios */
+
+  for (index = 0; index < BAUD_TABLE_LIMIT; index++) {
+    if (icom_acfg_baud[index] == baud) {
+      new_config3 = index;
+      break;
+    }
+  }
+
+  iCom_port_info->timeout = XMIT_BUFF_SZ*HZ*bits/baud;
+  iCom_port_info->timeout += HZ/50;		/* Add .02 seconds of slop */
+
+  /* CTS flow control flag and modem status interrupts */
+  if (cflag & CRTSCTS) {
+    iCom_port_info->flags |= ASYNC_CTS_FLOW;
+    tmp_byte = iCom_readb(&(iCom_port_info->dram->HDLCConfigReg));
+    tmp_byte |= HDLC_HDW_FLOW;
+    iCom_writeb(tmp_byte, &(iCom_port_info->dram->HDLCConfigReg));
+  } else {
+    iCom_port_info->flags &= ~ASYNC_CTS_FLOW;
+    tmp_byte = iCom_readb(&(iCom_port_info->dram->HDLCConfigReg));
+    tmp_byte &= ~HDLC_HDW_FLOW;
+    iCom_writeb(tmp_byte, &(iCom_port_info->dram->HDLCConfigReg));
+  }
+  if (cflag & CLOCAL)
+    iCom_port_info->flags &= ~ASYNC_CHECK_CD;
+  else {
+    iCom_port_info->flags |= ASYNC_CHECK_CD;
+  }
+
+  /*
+   * Set up parity check flag
+   */
+  iCom_port_info->read_status_mask = SA_FLAGS_OVERRUN | SA_FL_RCV_DONE;
+  if (I_INPCK(iCom_port_info->tty))
+    iCom_port_info->read_status_mask |= SA_FLAGS_FRAME_ERROR | SA_FLAGS_PARITY_ERROR;
+
+  if (I_BRKINT(iCom_port_info->tty) || I_PARMRK(iCom_port_info->tty))
+    iCom_port_info->read_status_mask |= SA_FLAGS_BREAK_DET;
+
+  /*
+   * Characters to ignore
+   */
+  iCom_port_info->ignore_status_mask = 0;
+  if (I_IGNPAR(iCom_port_info->tty))
+    iCom_port_info->ignore_status_mask |= SA_FLAGS_PARITY_ERROR | SA_FLAGS_FRAME_ERROR;
+  if (I_IGNBRK(iCom_port_info->tty)) {
+    iCom_port_info->ignore_status_mask |= SA_FLAGS_BREAK_DET;
+    /*
+     * If we're ignore parity and break indicators, ignore 
+     * overruns too.  (For real raw support).
+     */
+    if (I_IGNPAR(iCom_port_info->tty))
+      iCom_port_info->ignore_status_mask |= SA_FLAGS_OVERRUN;
+  }
+
+  /*
+   * !!! ignore all characters if CREAD is not set
+   */
+  if ((cflag & CREAD) == 0)
+    iCom_port_info->ignore_status_mask |= SA_FL_RCV_DONE;
+
+  /* Turn off Receiver to prepare for reset */
+  iCom_writeb(CMD_RCV_DISABLE,&iCom_port_info->dram->CmdReg);
+
+  spin_unlock_irqrestore(&iComlock,flags);
+  for (index = 0; index < 10; index++) {
+    /* Wait 0.1 Sec for receive operations to complete*/
+    current->state = TASK_INTERRUPTIBLE;
+    schedule_timeout(HZ/10);
+
+    if (iCom_readb(&iCom_port_info->dram->PrevCmdReg) == 0x00) {
+	break;
+    }
+  }
+  spin_lock_irqsave(&iComlock, flags);
+
+  /* clear all current buffers of data */
+  for (rcv_buff = 0; rcv_buff < NUM_RBUFFS; rcv_buff++) {
+    iCom_port_info->statStg->rcv[rcv_buff].flags = 0;
+    iCom_port_info->statStg->rcv[rcv_buff].leLength = 0;
+    iCom_port_info->statStg->rcv[rcv_buff].WorkingLength = (unsigned short int)cpu_to_le16(RCV_BUFF_SZ);
+  }
+
+  for (xmit_buff = 0; xmit_buff < NUM_XBUFFS;  xmit_buff++) {
+    iCom_port_info->statStg->xmit[xmit_buff].flags = 0;
+  }
+
+  /* activate changes and start xmit and receiver here */
+  /* Enable the receiver */
+  iCom_writeb(new_config3,&(iCom_port_info->dram->async_config3));
+  iCom_writeb(new_config2,&(iCom_port_info->dram->async_config2));
+  tmp_byte = iCom_readb(&(iCom_port_info->dram->HDLCConfigReg));
+  tmp_byte |= HDLC_PPP_PURE_ASYNC | HDLC_FF_FILL;
+  iCom_writeb(tmp_byte,&(iCom_port_info->dram->HDLCConfigReg));
+  iCom_writeb(0x04, &(iCom_port_info->dram->FlagFillIdleTimer)); /* 0.5 seconds */
+  iCom_writeb(0xFF, &(iCom_port_info->dram->ier)); /* enable modem signal interrupts */
+
+  /* reset processor */
+  iCom_writeb(CMD_RESTART,&iCom_port_info->dram->CmdReg);
+  spin_unlock_irqrestore(&iComlock, flags);
+  for (index = 0; index < 10; index++) {
+    /* Wait for reset operation */
+    current->state = TASK_INTERRUPTIBLE;
+    schedule_timeout(HZ/10);
+
+    if (iCom_readb(&iCom_port_info->dram->CmdReg) == 0x00) {
+      break;
+    }
+  }
+  spin_lock_irqsave(&iComlock, flags);
+
+  /* Enable Transmitter and Reciever */
+  offset = (unsigned long int)&iCom_port_info->statStg->rcv[0] - (unsigned long int)iCom_port_info->statStg;
+  iCom_writel(iCom_port_info->statStg_pci + offset,&iCom_port_info->dram->RcvStatusAddr);
+  iCom_port_info->next_rcv = 0;
+  iCom_port_info->put_length = 0;
+  *iCom_port_info->xmitRestart = 0;
+  iCom_writel(iCom_port_info->xmitRestart_pci,&iCom_port_info->dram->XmitStatusAddr);
+  TRACE(iCom_port_info,TRACE_XR_ENAB,0);
+  iCom_writeb(CMD_XMIT_RCV_ENABLE,&iCom_port_info->dram->CmdReg);
+}
+
+static int block_til_ready(struct tty_struct *tty, struct file * filp,
+                           struct iCom_port *iCom_port_info, long int flags)
+{
+  DECLARE_WAITQUEUE(wait, current);
+  int		retval;
+  int		do_clocal = 0, extra_count = 0;
+
+  /*
+   * If the device is in the middle of being closed, then block
+   * until it's done, and then try again.
+   */
+  if (tty_hung_up_p(filp) ||
+      (iCom_port_info->flags & ASYNC_CLOSING)) {
+      if (iCom_port_info->flags & ASYNC_CLOSING) {
+	  spin_unlock_irqrestore(&iComlock,flags);
+	  interruptible_sleep_on(&iCom_port_info->close_wait);
+	  spin_lock_irqsave(&iComlock,flags);
+      }    
+#ifdef SERIAL_DO_RESTART
+    return ((iCom_port_info->flags & ASYNC_HUP_NOTIFY) ?
+            -EAGAIN : -ERESTARTSYS);
+#else
+    return -EAGAIN;
+#endif
+  }
+
+  /*
+   * If this is a callout device, then just make sure the normal
+   * device isn't being used.
+   */
+  if (tty->driver.subtype == SERIAL_TYPE_CALLOUT) {
+    if (iCom_port_info->flags & ASYNC_NORMAL_ACTIVE)
+      return -EBUSY;
+    if ((iCom_port_info->flags & ASYNC_CALLOUT_ACTIVE) &&
+        (iCom_port_info->flags & ASYNC_SESSION_LOCKOUT) &&
+        (iCom_port_info->session != current->session))
+      return -EBUSY;
+    if ((iCom_port_info->flags & ASYNC_CALLOUT_ACTIVE) &&
+        (iCom_port_info->flags & ASYNC_PGRP_LOCKOUT) &&
+        (iCom_port_info->pgrp != current->pgrp))
+      return -EBUSY;
+    iCom_port_info->flags |= ASYNC_CALLOUT_ACTIVE;
+    return 0;
+  }
+
+  /*
+   * If non-blocking mode is set, or the port is not enabled,
+   * then make the check up front and then exit.
+   */
+  if ((filp->f_flags & O_NONBLOCK) ||
+      (tty->flags & (1 << TTY_IO_ERROR))) {
+    if (iCom_port_info->flags & ASYNC_CALLOUT_ACTIVE)
+      return -EBUSY;
+    iCom_port_info->flags |= ASYNC_NORMAL_ACTIVE;
+    return 0;
+  }
+
+  if (iCom_port_info->flags & ASYNC_CALLOUT_ACTIVE) {
+    if (iCom_port_info->normal_termios.c_cflag & CLOCAL)
+      do_clocal = 1;
+  } else {
+    if (tty->termios->c_cflag & CLOCAL)
+      do_clocal = 1;
+  }
+
+  /*
+   * Block waiting for the carrier detect and the line to become
+   * free (i.e., not in use by the callout).  While we are in
+   * this loop, open_active_count is dropped by one, so that
+   * rs_close() knows when to free things.  We restore it upon
+   * exit, either normal or abnormal.
+   */
+  retval = 0;
+  add_wait_queue(&iCom_port_info->open_wait, &wait);
+
+  if (!tty_hung_up_p(filp)) {
+    extra_count = 1;
+    iCom_port_info->open_active_count--;
+  }
+  iCom_port_info->blocked_open++;
+  while (1) {
+    if (!(iCom_port_info->flags & ASYNC_CALLOUT_ACTIVE) &&
+        (tty->termios->c_cflag & CBAUD)) {
+      /* raise DTR and RTS */
+      TRACE(iCom_port_info,TRACE_RAISE_DTR_RTS,0);
+      iCom_writeb(0xC0,&iCom_port_info->dram->osr);
+    }
+    current->state = TASK_INTERRUPTIBLE;
+    if (tty_hung_up_p(filp) ||
+        !(iCom_port_info->flags & ASYNC_INITIALIZED)) {
+#ifdef SERIAL_DO_RESTART
+      if (iCom_port_info->flags & ASYNC_HUP_NOTIFY)
+        retval = -EAGAIN;
+      else
+        retval = -ERESTARTSYS;	
+#else
+      retval = -EAGAIN;
+#endif
+      break;
+    }
+
+    if (!(iCom_port_info->flags & ASYNC_CALLOUT_ACTIVE) &&
+        !(iCom_port_info->flags & ASYNC_CLOSING) &&
+        (do_clocal || (iCom_readb(&iCom_port_info->dram->isr) & 0x20))) /* 0x20 = Carrier Detect */
+      break;
+    if (signal_pending(current)) {
+      retval = -ERESTARTSYS;
+      break;
+    }
+    spin_unlock_irqrestore(&iComlock,flags);
+    printk("iCom:  WAIT for CD\n");
+    schedule();
+    spin_lock_irqsave(&iComlock,flags);
+  }
+  current->state = TASK_RUNNING;
+  remove_wait_queue(&iCom_port_info->open_wait, &wait);
+  if (extra_count)
+    iCom_port_info->open_active_count++;
+  iCom_port_info->blocked_open--;
+
+  if (retval)
+    return retval;
+  iCom_port_info->flags |= ASYNC_NORMAL_ACTIVE;
+
+  return 0;
+}
+
+static int startup(struct iCom_port *iCom_port_info, unsigned long flags)
+{
+  int	            retval=0;
+  unsigned long int temp;
+  unsigned char     cable_id;
+
+  TRACE(iCom_port_info,TRACE_STARTUP,0);
+
+  if (iCom_port_info->flags & ASYNC_INITIALIZED) {
+    goto errout;
+  }
+
+  /*
+   * check Cable ID
+   */
+  cable_id = iCom_readb(&iCom_port_info->dram->cable_id);
+  TRACE(iCom_port_info,TRACE_CABLE_ID,cable_id);
+  if (cable_id & ICOM_CABLE_ID_VALID)
+  {
+    /* Get cable ID into the lower 4 bits (standard form) */
+    cable_id = (cable_id & ICOM_CABLE_ID_MASK) >> 4;
+
+    /* Check Cable ID valid */
+    if ((cable_id == RS232_CABLE) || (cable_id == V24_CABLE) ||
+        (cable_id == V35_CABLE) || (cable_id == V36_CABLE))
+    {
+      ;
+    }
+  }
+
+  /*
+   * set appropriate modem signals
+   */
+  if (iCom_port_info->tty->termios->c_cflag & CBAUD) {
+    /* raise DTR and RTS */
+    TRACE(iCom_port_info,TRACE_RAISE_DTR_RTS,0);
+    iCom_writeb(0xC0,&iCom_port_info->dram->osr);
+  }
+
+  /*
+   * Finally, clear and  enable interrupts
+   */
+  switch (iCom_port_info->port) {
+  case 0:
+    /* Clear out any pending interrupts */
+    iCom_writew(0x00FF,(void *)iCom_port_info->int_reg);
+
+    /* Enable interrupts for first port */
+    TRACE(iCom_port_info,TRACE_ENABLE_INTERRUPTS_PA,0);
+    temp = iCom_readl(&iCom_port_info->global_reg->int_mask);
+    iCom_writel((temp & ~ICOM_INT_MASK_PRC_A),&iCom_port_info->global_reg->int_mask);
+    break;
+  case 1:
+    /* Clear out any pending interrupts */
+    iCom_writew(0x3F00,(void *)iCom_port_info->int_reg);
+
+    /* Enable interrupts for second port */
+    TRACE(iCom_port_info,TRACE_ENABLE_INTERRUPTS_PB,0);
+    temp = iCom_readl(&iCom_port_info->global_reg->int_mask);
+    iCom_writel((temp & ~ICOM_INT_MASK_PRC_B),&iCom_port_info->global_reg->int_mask);
+    break;
+  case 2:
+    /* Clear out any pending interrupts */
+    iCom_writew(0x00FF,(void *)iCom_port_info->int_reg);
+
+    /* Enable interrupts for first port */
+    TRACE(iCom_port_info,TRACE_ENABLE_INTERRUPTS_PC,0);
+    temp = iCom_readl(&iCom_port_info->global_reg->int_mask_2);
+    iCom_writel((temp & ~ICOM_INT_MASK_PRC_C),&iCom_port_info->global_reg->int_mask_2);
+    break;
+  case 3:
+    /* Clear out any pending interrupts */
+    iCom_writew(0x3F00,(void *)iCom_port_info->int_reg);
+
+    /* Enable interrupts for second port */
+    TRACE(iCom_port_info,TRACE_ENABLE_INTERRUPTS_PD,0);
+    temp = iCom_readl(&iCom_port_info->global_reg->int_mask_2);
+    iCom_writel((temp & ~ICOM_INT_MASK_PRC_D),&iCom_port_info->global_reg->int_mask_2);
+    break;
+  default:
+    printk("iCom:  ERROR:  Invalid port defined\n");
+  }
+
+  if (iCom_port_info->tty)
+    clear_bit(TTY_IO_ERROR, &iCom_port_info->tty->flags);
+
+  /*
+   * Set up the tty->alt_speed kludge
+   */
+  if (iCom_port_info->tty) {
+    if ((iCom_port_info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
+      iCom_port_info->tty->alt_speed = 57600;
+    if ((iCom_port_info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
+      iCom_port_info->tty->alt_speed = 115200;
+    if ((iCom_port_info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
+      iCom_port_info->tty->alt_speed = 230400;
+    if ((iCom_port_info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
+      iCom_port_info->tty->alt_speed = 460800;
+  }
+
+  /*
+   * and set the speed of the serial port
+   */
+  change_speed(iCom_port_info, 0, flags);
+
+  iCom_port_info->flags |= ASYNC_INITIALIZED;
+  return 0;
+
+  errout:
+  return retval;
+}
+
+/*
+ * This routine will shutdown a serial port; interrupts are disabled, and
+ * DTR is dropped if the hangup on close termio flag is on.
+ *
+ * irq = locked
+ */
+static void shutdown(struct iCom_port * iCom_port_info)
+{
+  unsigned long int temp;
+  unsigned char     cmdReg;
+
+  TRACE(iCom_port_info,TRACE_SHUTDOWN | TRACE_TIME,jiffies);
+
+  if (!(iCom_port_info->flags & ASYNC_INITIALIZED))
+    return;
+
+  /*
+   * clear delta_msr_wait queue to avoid mem leaks: we may free the irq
+   * here so the queue might never be waken up
+   */
+  wake_up_interruptible(&iCom_port_info->delta_msr_wait);
+
+  /*
+   * disable all interrupts
+   */
+  switch (iCom_port_info->port) {
+  case 0:
+    TRACE(iCom_port_info,TRACE_DIS_INTERRUPTS_PA,0);
+    temp = iCom_readl(&iCom_port_info->global_reg->int_mask);
+    iCom_writel((temp | ICOM_INT_MASK_PRC_A),&iCom_port_info->global_reg->int_mask);
+    break;
+  case 1:
+    TRACE(iCom_port_info,TRACE_DIS_INTERRUPTS_PB,0);
+    temp = iCom_readl(&iCom_port_info->global_reg->int_mask);
+    iCom_writel((temp | ICOM_INT_MASK_PRC_B),&iCom_port_info->global_reg->int_mask);
+    break;
+  case 2:
+    TRACE(iCom_port_info,TRACE_DIS_INTERRUPTS_PC,0);
+    temp = iCom_readl(&iCom_port_info->global_reg->int_mask_2);
+    iCom_writel((temp | ICOM_INT_MASK_PRC_C),&iCom_port_info->global_reg->int_mask_2);
+    break;
+  case 3:
+    TRACE(iCom_port_info,TRACE_DIS_INTERRUPTS_PD,0);
+    temp = iCom_readl(&iCom_port_info->global_reg->int_mask_2);
+    iCom_writel((temp | ICOM_INT_MASK_PRC_D),&iCom_port_info->global_reg->int_mask_2);
+    break;
+  default:
+    printk("iCom:  ERROR:  Invalid port assignment\n");
+  }
+
+  /*
+   * disable break condition
+   */
+  cmdReg = iCom_readb(&iCom_port_info->dram->CmdReg);
+  if ((cmdReg | CMD_SND_BREAK) == CMD_SND_BREAK) {
+    iCom_writeb(cmdReg & ~CMD_SND_BREAK,&iCom_port_info->dram->CmdReg);
+  }
+
+  if (!iCom_port_info->tty || (iCom_port_info->tty->termios->c_cflag & HUPCL)) {
+    /* drop DTR and RTS */
+    TRACE(iCom_port_info,TRACE_DROP_DTR_RTS,0);
+    iCom_writeb(0x00,&iCom_port_info->dram->osr);
+  }
+  
+  if (iCom_port_info->tty)
+    set_bit(TTY_IO_ERROR, &iCom_port_info->tty->flags);
+
+  iCom_port_info->flags &= ~ASYNC_INITIALIZED;
+}
+
+/*
+   Primary interface routines to iCom Driver
+*/
+static int iCom_open(struct tty_struct * tty, struct file * filp)
+{
+  int               line;
+  int               adapter_entry;
+  int               port_entry;
+  struct iCom_port *iCom_port_info;
+  int               retval;
+  unsigned long     flags;
+
+  /*
+      Minor Number
+      _ _ _ _ b (lower nibble)
+      ___ ___
+       |   |
+       |   - port number (lowest 2 bits is port identifier)
+       - adapter number (remaining higher order bits identify adapter #)
+  */
+
+  MOD_INC_USE_COUNT;
+  line = MINOR(tty->device) - tty->driver.minor_start;
+  if ((line < 0) || (line >= NR_PORTS)) {
+    MOD_DEC_USE_COUNT;
+    return -ENODEV;
+  }
+
+  adapter_entry = (line & 0xFFFE) >> 2; /* shift adapter # into position */
+  port_entry = line & 0x0003; /* mask of port number */
+
+  if ((port_entry == 1) &&
+      (iCom_adapter_info[adapter_entry].version == ADAPTER_V2) &&
+      (iCom_adapter_info[adapter_entry].subsystem_id != FOUR_PORT_MODEL)) {
+      port_entry = 2;
+  }
+  iCom_port_info = &iCom_adapter_info[adapter_entry].port_info[port_entry];
+
+  spin_lock_irqsave(&iComlock,flags);
+  TRACE(iCom_port_info,TRACE_DEVICE_NUMB,tty->device);
+  tty->driver_data = iCom_port_info;
+  iCom_port_info->tty = tty;
+  iCom_port_info->open_active_count++;
+
+  /*
+   * If the port is the middle of closing, bail out now
+   */
+  if (tty_hung_up_p(filp) ||
+      (iCom_port_info->flags & ASYNC_CLOSING)) {
+
+      spin_unlock_irqrestore(&iComlock,flags);
+      if (iCom_port_info->flags & ASYNC_CLOSING)
+	  interruptible_sleep_on(&iCom_port_info->close_wait);
+#ifdef SERIAL_DO_RESTART
+      return ((iCom_port_info->flags & ASYNC_HUP_NOTIFY) ?
+            -EAGAIN : -ERESTARTSYS);
+#else
+      return -EAGAIN;
+#endif
+  }
+
+  /*
+   * Start up serial port
+   */
+  retval = startup(iCom_port_info, flags);
+  if (retval) {
+    /* reset open variables */
+    TRACE(iCom_port_info,TRACE_STARTUP_ERROR,0);
+    spin_unlock_irqrestore(&iComlock,flags);
+    return retval;
+  }
+
+  retval = block_til_ready(tty, filp, iCom_port_info, flags);
+  if (retval) {
+    spin_unlock_irqrestore(&iComlock,flags);
+    return retval;
+  }
+
+  if ((iCom_port_info->open_active_count == 1) &&
+      (iCom_port_info->flags & ASYNC_SPLIT_TERMIOS)) {
+    if (tty->driver.subtype == SERIAL_TYPE_NORMAL)
+      *tty->termios = iCom_port_info->normal_termios;
+    else 
+      *tty->termios = iCom_port_info->callout_termios;
+    change_speed(iCom_port_info, 0, flags);
+  }
+
+  iCom_port_info->session = current->session;
+  iCom_port_info->pgrp = current->pgrp;
+
+  spin_unlock_irqrestore(&iComlock,flags);
+  return 0;
+}
+
+/*
+ * ------------------------------------------------------------
+ * iCom_close()
+ * 
+ * This routine is called when the serial port gets closed.  First, we
+ * wait for the last remaining data to be sent.  Then, we unlink its
+ * async structure from the interrupt chain if necessary.
+ * ------------------------------------------------------------
+ */
+static void iCom_close(struct tty_struct * tty, struct file * filp)
+{
+  struct iCom_port *iCom_port_info;
+  unsigned long     flags;
+  unsigned char     cmdReg;
+
+
+  if (!tty) {
+    printk("iCom:  iCom_close - no tty\n");
+    return;
+  }
+
+  iCom_port_info = (struct iCom_port *)tty->driver_data;
+  if (!iCom_port_info) {
+    printk("iCom:  iCom_close - no tty->driver_data\n");
+    return;
+  }
+  
+  TRACE(iCom_port_info,TRACE_CLOSE,0);
+  spin_lock_irqsave(&iComlock,flags);
+
+  if (tty_hung_up_p(filp)) {
+    TRACE(iCom_port_info,TRACE_CLOSE_HANGUP,0);
+    MOD_DEC_USE_COUNT;
+    spin_unlock_irqrestore(&iComlock,flags);
+    return;
+  }
+
+  if ((tty->count == 1) && (iCom_port_info->open_active_count != 1)) {
+    /*
+     * Uh, oh.  tty->count is 1, which means that the tty
+     * structure will be freed.  open_active_count should always
+     * be one in these conditions.  If it's greater than
+     * one, we've got real problems, since it means the
+     * serial port won't be shutdown.
+     */
+    iCom_port_info->open_active_count = 1;
+  }
+
+  if (--iCom_port_info->open_active_count < 0) {
+    iCom_port_info->open_active_count = 0;
+  }
+
+  if (iCom_port_info->open_active_count) {
+    TRACE(iCom_port_info,TRACE_OPEN_ACTIVE,0);
+    MOD_DEC_USE_COUNT;
+    spin_unlock_irqrestore(&iComlock,flags);
+    return;
+  }
+  iCom_port_info->flags |= ASYNC_CLOSING;
+
+  /*
+   * Save the termios structure, since this port may have
+   * separate termios for callout and dialin.
+   */
+  if (iCom_port_info->flags & ASYNC_NORMAL_ACTIVE)
+    iCom_port_info->normal_termios = *tty->termios;
+  if (iCom_port_info->flags & ASYNC_CALLOUT_ACTIVE)
+    iCom_port_info->callout_termios = *tty->termios;
+
+  /*
+   * Now we wait for the transmit buffer to clear; and we notify 
+   * the line discipline to only process XON/XOFF characters.
+   */
+  tty->closing = 1;
+  if (iCom_port_info->closing_wait != ASYNC_CLOSING_WAIT_NONE) {
+    spin_unlock_irqrestore(&iComlock,flags);
+    tty_wait_until_sent(tty, iCom_port_info->closing_wait);
+    spin_lock_irqsave(&iComlock,flags);
+  }
+
+  /*
+   * At this point we stop accepting input.  To do this, we
+   * disable the receive line status interrupts, and tell the
+   * interrupt driver to stop checking the data ready bit in the
+   * line status register.
+   */
+  if (iCom_port_info->flags & ASYNC_INITIALIZED) {
+    cmdReg = iCom_readb(&iCom_port_info->dram->CmdReg);
+    iCom_writeb(cmdReg & (unsigned char)~CMD_RCV_ENABLE,&iCom_port_info->dram->CmdReg);
+
+   /*
+     * Before we drop DTR, make sure the UART transmitter
+     * has completely drained; this is especially
+     * important if there is a transmit FIFO!
+     */
+    spin_unlock_irqrestore(&iComlock,flags);
+    iCom_wait_until_sent(tty, iCom_port_info->timeout);
+    spin_lock_irqsave(&iComlock,flags);
+  }
+
+  shutdown(iCom_port_info);
+
+  spin_unlock_irqrestore(&iComlock,flags);
+  if (tty->driver.flush_buffer)
+    tty->driver.flush_buffer(tty);
+  if (tty->ldisc.flush_buffer)
+    tty->ldisc.flush_buffer(tty);
+  spin_lock_irqsave(&iComlock,flags);
+  tty->closing = 0;
+  iCom_port_info->event = 0;
+  iCom_port_info->tty = 0;
+
+  if (iCom_port_info->blocked_open) {
+    if (iCom_port_info->close_delay) {
+	current->state = TASK_INTERRUPTIBLE;
+	spin_unlock_irqrestore(&iComlock,flags);
+	schedule_timeout(iCom_port_info->close_delay);
+	spin_lock_irqsave(&iComlock,flags);
+    }
+    wake_up_interruptible(&iCom_port_info->open_wait);
+  }
+  iCom_port_info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CALLOUT_ACTIVE|
+                   ASYNC_CLOSING);
+
+  wake_up_interruptible(&iCom_port_info->close_wait);
+
+  MOD_DEC_USE_COUNT;
+  spin_unlock_irqrestore(&iComlock,flags);
+}
+
+static int iCom_write(struct tty_struct * tty, int from_user,
+			const unsigned char * buf, int count)
+{
+  struct iCom_port  *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned long     data_count = count;
+  unsigned char     *data;
+  unsigned char     cmdReg;
+  unsigned long int offset;
+  unsigned long int flags;
+
+
+  if (!tty) {
+    printk("iCom:  iCom_write - no tty\n");
+    return 0;
+  }
+
+  spin_lock_irqsave(&iComlock,flags);
+
+  iCom_port_info = (struct iCom_port *)tty->driver_data;
+  TRACE(iCom_port_info,TRACE_WRITE | TRACE_TIME,jiffies);
+
+  down(&tmp_buf_sem);
+
+  if (cpu_to_le16(iCom_port_info->statStg->xmit[0].flags) & SA_FLAGS_READY_TO_XMIT) {
+      TRACE(iCom_port_info,TRACE_WRITE_FULL,0);
+      up(&tmp_buf_sem);
+      spin_unlock_irqrestore(&iComlock,flags);
+      return 0;
+  }
+
+  if (data_count > XMIT_BUFF_SZ)
+    data_count = XMIT_BUFF_SZ;
+
+  if (from_user) {
+    data_count -= copy_from_user(iCom_port_info->xmit_buf, buf, data_count);
+    if (!data_count) {
+      TRACE(iCom_port_info,TRACE_WRITE_NODATA,0);
+      up(&tmp_buf_sem);
+      spin_unlock_irqrestore(&iComlock,flags);
+      return -EFAULT;
+    }
+  } else {
+    memcpy(iCom_port_info->xmit_buf, buf, data_count);
+  }
+
+  data = iCom_port_info->xmit_buf;
+
+  if (data_count) {
+      iCom_port_info->statStg->xmit[0].flags = (unsigned short int)cpu_to_le16(SA_FLAGS_READY_TO_XMIT);
+      iCom_port_info->statStg->xmit[0].leLength = (unsigned short int)cpu_to_le16(data_count);
+      offset = (unsigned long int)&iCom_port_info->statStg->xmit[0] - (unsigned long int)iCom_port_info->statStg;
+      *iCom_port_info->xmitRestart = cpu_to_le32(iCom_port_info->statStg_pci + offset);
+      cmdReg = iCom_readb(&iCom_port_info->dram->CmdReg);
+      iCom_writeb(cmdReg | CMD_XMIT_RCV_ENABLE,&iCom_port_info->dram->CmdReg);
+      iCom_writeb(START_XMIT,&iCom_port_info->dram->StartXmitCmd);
+      TRACE(iCom_port_info,TRACE_WRITE_START,data_count);
+  }
+
+  up(&tmp_buf_sem);
+  spin_unlock_irqrestore(&iComlock,flags);
+
+  return data_count;
+}
+
+static void iCom_put_char(struct tty_struct * tty, unsigned char ch)
+{
+  /* iCom_put_char adds the character to the current buffer, the
+   * data is not actually sent until iCom_flush_chars is called.
+   * Per definition iCom_flush_chars MUST be called after
+   * iCom_put_char
+   */
+
+  unsigned char     *data;
+  struct iCom_port  *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned long int flags;
+
+  spin_lock_irqsave(&iComlock,flags);
+  TRACE(iCom_port_info,TRACE_PUT_CHAR, ch);
+
+  down(&tmp_buf_sem);
+
+  if (cpu_to_le16(iCom_port_info->statStg->xmit[0].flags) & SA_FLAGS_READY_TO_XMIT) {
+      TRACE(iCom_port_info,TRACE_PUT_FULL,0);
+      up(&tmp_buf_sem);
+      spin_unlock_irqrestore(&iComlock,flags);
+      return;
+  }
+
+  data = iCom_port_info->xmit_buf;
+  data[iCom_port_info->put_length] = ch;
+
+  if (!tty->stopped && !tty->hw_stopped) {
+    iCom_port_info->put_length++;
+  }
+
+  up(&tmp_buf_sem);
+  spin_unlock_irqrestore(&iComlock,flags);
+}
+
+static void iCom_flush_chars(struct tty_struct * tty)
+{
+  struct iCom_port  *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned char     cmdReg;
+  unsigned long int offset;
+  unsigned long int flags;
+
+  spin_lock_irqsave(&iComlock,flags);
+  TRACE(iCom_port_info,TRACE_FLUSH_CHAR | TRACE_TIME,jiffies);
+  if (iCom_port_info->put_length) {
+      TRACE(iCom_port_info,TRACE_START_FLUSH,iCom_port_info->put_length);
+      iCom_port_info->statStg->xmit[0].flags = (unsigned short int)cpu_to_le16(SA_FLAGS_READY_TO_XMIT);
+      iCom_port_info->statStg->xmit[0].leLength = (unsigned short int)cpu_to_le16(iCom_port_info->put_length);
+      offset = (unsigned long int)&iCom_port_info->statStg->xmit[0] - (unsigned long int)iCom_port_info->statStg;
+      *iCom_port_info->xmitRestart = cpu_to_le32(iCom_port_info->statStg_pci + offset);
+      cmdReg = iCom_readb(&iCom_port_info->dram->CmdReg);
+      iCom_writeb(cmdReg | CMD_XMIT_RCV_ENABLE,&iCom_port_info->dram->CmdReg);
+      iCom_writeb(START_XMIT,&iCom_port_info->dram->StartXmitCmd);
+  }
+  iCom_port_info->put_length = 0;
+  spin_unlock_irqrestore(&iComlock,flags);
+}
+
+static int iCom_write_room(struct tty_struct * tty)
+{
+  int bytes_avail;
+  struct iCom_port *iCom_port_info = tty->driver_data;
+  
+  if (cpu_to_le16(iCom_port_info->statStg->xmit[0].flags) & SA_FLAGS_READY_TO_XMIT)
+      bytes_avail = 0;
+  else
+      bytes_avail = XMIT_BUFF_SZ;
+
+  TRACE(iCom_port_info,TRACE_WRITE_ROOM,bytes_avail);
+  return bytes_avail;
+}
+
+static int iCom_chars_in_buffer(struct tty_struct * tty)
+{
+  unsigned long int dram;
+  struct iCom_port *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  int number_remaining = 0;
+
+  TRACE(iCom_port_info,TRACE_CHARS_IN_BUFF,0);
+  if (cpu_to_le16(iCom_port_info->statStg->xmit[0].flags) & SA_FLAGS_READY_TO_XMIT) {
+      dram = (unsigned long int)iCom_port_info->dram;
+      number_remaining = iCom_readw((void *)(dram + 0x168));
+      TRACE(iCom_port_info,TRACE_CHARS_REMAIN,number_remaining);
+  }
+  return number_remaining;
+}
+
+static int get_modem_info(struct iCom_port * iCom_port_info, unsigned int *value)
+{
+  unsigned char status,control;
+  unsigned int result;
+
+  TRACE(iCom_port_info,TRACE_GET_MODEM,0);
+
+  status = iCom_readb(&iCom_port_info->dram->isr);
+  control = iCom_readb(&iCom_port_info->dram->osr);
+
+  result =  ((control & 0x40) ? TIOCM_RTS : 0)
+    | ((control & DTR) ? TIOCM_DTR : 0)
+    | ((status  & DCD) ? TIOCM_CAR : 0)
+    | ((status  & RI ) ? TIOCM_RNG : 0)
+    | ((status  & DSR) ? TIOCM_DSR : 0)
+    | ((status  & CTS) ? TIOCM_CTS : 0);
+  return put_user(result,value);
+}
+
+static int set_modem_info(struct iCom_port * iCom_port_info, unsigned int cmd,
+			  unsigned int *value)
+{
+  int error;
+  unsigned int arg;
+  unsigned char local_osr;
+
+  TRACE(iCom_port_info,TRACE_SET_MODEM,0);
+  local_osr = iCom_readb(&iCom_port_info->dram->osr);
+
+  error = get_user(arg, value);
+  if (error)
+    return error;
+  switch (cmd) {
+    case TIOCMBIS: 
+      if (arg & TIOCM_RTS) {
+	TRACE(iCom_port_info,TRACE_RAISE_RTS,0);
+        local_osr |= RTS;
+      }
+      if (arg & TIOCM_DTR) {
+	TRACE(iCom_port_info,TRACE_RAISE_DTR,0);
+        local_osr |= DTR;
+      }
+      break;
+    case TIOCMBIC:
+      if (arg & TIOCM_RTS) {
+        TRACE(iCom_port_info,TRACE_LOWER_RTS,0);
+        local_osr &= ~RTS;
+      }
+      if (arg & TIOCM_DTR) {
+	TRACE(iCom_port_info,TRACE_LOWER_DTR,0);
+        local_osr &= ~DTR;
+      }
+      break;
+    case TIOCMSET:
+      local_osr = ((local_osr & ~(RTS | DTR))
+                   | ((arg & TIOCM_RTS) ? RTS : 0)
+                   | ((arg & TIOCM_DTR) ? DTR : 0));
+      break;
+    default:
+      return -EINVAL;
+  }
+
+  iCom_writeb(local_osr,&iCom_port_info->dram->osr);
+  return 0;
+}
+
+static int get_serial_info(struct iCom_port * iCom_port_info,
+			   struct serial_struct * retinfo)
+{
+  struct serial_struct tmp;
+
+  TRACE(iCom_port_info,TRACE_GET_SERIAL,0);
+
+  if (!retinfo)
+    return -EFAULT;
+  memset(&tmp, 0, sizeof(tmp));
+  tmp.type = 0x00; /* device specific, PORT_UNKNOWN */
+  tmp.line = iCom_port_info->adapter; /* adapter number */
+  tmp.port = iCom_port_info->port; /* port number on adapter */
+  tmp.irq = iCom_adapter_info[iCom_port_info->adapter].irq_number;
+  tmp.flags = iCom_port_info->flags;
+  tmp.xmit_fifo_size = XMIT_BUFF_SZ;
+  tmp.baud_base = 0x00; /* device specific */
+  tmp.close_delay = iCom_port_info->close_delay;
+  tmp.closing_wait = iCom_port_info->closing_wait;
+  tmp.custom_divisor = 0x00;  /* device specific */
+  tmp.hub6 = 0x00; /* device specific */
+  if (copy_to_user(retinfo,&tmp,sizeof(*retinfo)))
+    return -EFAULT;
+  return 0;
+}
+
+static int set_serial_info(struct iCom_port * iCom_port_info,
+                           struct serial_struct * new_info)
+{
+  struct serial_struct new_serial;
+  int                  old_flags;
+  int 		       retval = 0;
+  unsigned long        flags;
+
+  TRACE(iCom_port_info,TRACE_SET_SERIAL,0);
+
+  if (copy_from_user(&new_serial,new_info,sizeof(new_serial)))
+    return -EFAULT;
+
+  old_flags = iCom_port_info->flags;
+  /* new_serial.irq --- irq of adapter will not change, PCI only */
+  /* new_serial.xmit_fifo_size -- can not change on this device */
+  /* new_serial.baud_base -- ??? */
+  /* new_serial.custom_divisor -- device specific */
+  /* new_serial.hub6 -- device specific */
+  /* new_serial.type -- device specific */
+  /* new_serial.port -- address of port will not change, PCI only */
+
+  if (!capable(CAP_SYS_ADMIN)) {
+    if ((new_serial.baud_base != iCom_port_info->baud_base) ||
+        (new_serial.close_delay != iCom_port_info->close_delay) ||
+        ((new_serial.flags & ~ASYNC_USR_MASK) !=
+         (iCom_port_info->flags & ~ASYNC_USR_MASK)))
+      return -EPERM;
+    iCom_port_info->flags = ((iCom_port_info->flags & ~ASYNC_USR_MASK) |
+                             (new_serial.flags & ASYNC_USR_MASK));
+    goto check_and_exit;
+  }
+
+  if (new_serial.baud_base < 9600) {
+    return -EINVAL;
+  }
+
+  /*
+   * OK, past this point, all the error checking has been done.
+   * At this point, we start making changes.....
+   */
+  iCom_port_info->baud_base = new_serial.baud_base;
+  iCom_port_info->flags = ((iCom_port_info->flags & ~ASYNC_FLAGS) |
+                           (new_serial.flags & ASYNC_FLAGS));
+  iCom_port_info->close_delay = new_serial.close_delay * HZ/100;
+  iCom_port_info->closing_wait = new_serial.closing_wait * HZ/100;
+  iCom_port_info->tty->low_latency = (iCom_port_info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+
+  check_and_exit:
+  spin_lock_irqsave(&iComlock,flags);
+  if (iCom_port_info->flags & ASYNC_INITIALIZED) {
+    if (((iCom_port_info->flags & ASYNC_SPD_MASK) !=
+         (old_flags & ASYNC_SPD_MASK))) {
+      if ((iCom_port_info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
+        iCom_port_info->tty->alt_speed = 57600;
+      if ((iCom_port_info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
+        iCom_port_info->tty->alt_speed = 115200;
+      if ((iCom_port_info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
+        iCom_port_info->tty->alt_speed = 230400;
+      if ((iCom_port_info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
+        iCom_port_info->tty->alt_speed = 460800;
+      change_speed(iCom_port_info, 0, flags);
+    }
+  } else
+    retval = startup(iCom_port_info, flags);
+
+  spin_unlock_irqrestore(&iComlock,flags);
+
+  return retval;
+}
+
+/*
+ * get_lsr_info - get line status register info
+ *
+ * Purpose: Let user call ioctl() to get info when the UART physically
+ * 	    is emptied.  On bus types like RS485, the transmitter must
+ * 	    release the bus after transmitting. This must be done when
+ * 	    the transmit shift register is empty, not be done when the
+ * 	    transmit holding register is empty.  This functionality
+ * 	    allows an RS485 driver to be written in user space. 
+ */
+static int get_lsr_info(struct iCom_port * info, unsigned int *value)
+{
+  unsigned char status;
+  unsigned int result;
+
+  TRACE(info,TRACE_SET_LSR,0);
+
+  status = cpu_to_le16(info->statStg->xmit[0].flags);
+  result = ((status & SA_FLAGS_DONE) ? TIOCSER_TEMT : 0);
+  return put_user(result,value);
+}
+
+static int iCom_ioctl(struct tty_struct * tty, struct file * filp,
+			unsigned int cmd, unsigned long arg) 
+{
+  int error;
+  struct iCom_port * iCom_port_info = (struct iCom_port *)tty->driver_data;
+  struct async_icount cprev, cnow;	/* kernel counter temps */
+  struct serial_icounter_struct *p_cuser;	/* user space */
+  unsigned long flags;
+
+  TRACE(iCom_port_info,TRACE_IOCTL | TRACE_TIME,jiffies);
+  if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
+      (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGSTRUCT) &&
+      (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) {
+    if (tty->flags & (1 << TTY_IO_ERROR))
+      return -EIO;
+  }
+
+  switch (cmd) {
+    case 0x4300:
+      if (copy_to_user((void *)arg,iCom_port_info->trace_blk,TRACE_BLK_SZ))
+	  return -EFAULT;
+      return 0;
+    case TIOCMGET:
+      return get_modem_info(iCom_port_info, (unsigned int *) arg);
+    case TIOCMBIS:
+    case TIOCMBIC:
+    case TIOCMSET:
+      return set_modem_info(iCom_port_info, cmd, (unsigned int *) arg);
+    case TIOCGSERIAL:
+      return get_serial_info(iCom_port_info,
+                             (struct serial_struct *) arg);
+    case TIOCSSERIAL:
+      return set_serial_info(iCom_port_info,
+                             (struct serial_struct *) arg);
+
+    case TIOCSERGETLSR: /* Get line status register */
+      return get_lsr_info(iCom_port_info, (unsigned int *) arg);
+
+      /*
+       * Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change
+       * - mask passed in arg for lines of interest
+       *   (use |'ed TIOCM_RNG/DSR/CD/CTS for masking)
+       * Caller should use TIOCGICOUNT to see which one it was
+       */
+    case TIOCMIWAIT:
+      spin_lock_irqsave(&iComlock,flags);
+      /* note the counters on entry */
+      cprev = iCom_port_info->icount;
+      spin_unlock_irqrestore(&iComlock,flags);
+      while (1) {
+        interruptible_sleep_on(&iCom_port_info->delta_msr_wait);
+        /* see if a signal did it */
+        if (signal_pending(current))
+          return -ERESTARTSYS;
+        spin_lock_irqsave(&iComlock,flags);
+        cnow = iCom_port_info->icount; /* atomic copy */
+        spin_unlock_irqrestore(&iComlock,flags);
+        if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr && 
+            cnow.dcd == cprev.dcd && cnow.cts == cprev.cts)
+          return -EIO; /* no change => error */
+        if ( ((arg & TIOCM_RNG) && (cnow.rng != cprev.rng)) ||
+             ((arg & TIOCM_DSR) && (cnow.dsr != cprev.dsr)) ||
+             ((arg & TIOCM_CD)  && (cnow.dcd != cprev.dcd)) ||
+             ((arg & TIOCM_CTS) && (cnow.cts != cprev.cts)) ) {
+          return 0;
+        }
+        cprev = cnow;
+      }
+      /* NOTREACHED */
+
+      /* 
+       * Get counter of input serial line interrupts (DCD,RI,DSR,CTS)
+       * Return: write counters to the user passed counter struct
+       * NB: both 1->0 and 0->1 transitions are counted except for
+       *     RI where only 0->1 is counted.
+       */
+    case TIOCGICOUNT:
+      spin_lock_irqsave(&iComlock,flags);
+      cnow = iCom_port_info->icount;
+      spin_unlock_irqrestore(&iComlock,flags);
+      p_cuser = (struct serial_icounter_struct *) arg;
+      error = put_user(cnow.cts, &p_cuser->cts);
+      if (error) return error;
+      error = put_user(cnow.dsr, &p_cuser->dsr);
+      if (error) return error;
+      error = put_user(cnow.rng, &p_cuser->rng);
+      if (error) return error;
+      error = put_user(cnow.dcd, &p_cuser->dcd);
+      if (error) return error;
+      error = put_user(cnow.rx, &p_cuser->rx);
+      if (error) return error;
+      error = put_user(cnow.tx, &p_cuser->tx);
+      if (error) return error;
+      error = put_user(cnow.frame, &p_cuser->frame);
+      if (error) return error;
+      error = put_user(cnow.overrun, &p_cuser->overrun);
+      if (error) return error;
+      error = put_user(cnow.parity, &p_cuser->parity);
+      if (error) return error;
+      error = put_user(cnow.brk, &p_cuser->brk);
+      if (error) return error;
+      error = put_user(cnow.buf_overrun, &p_cuser->buf_overrun);
+      if (error) return error;			
+      return 0;
+
+    case TIOCSERGWILD:
+    case TIOCSERSWILD:
+      /* "setserial -W" is called in Debian boot */
+      printk ("TIOCSER?WILD ioctl obsolete, ignored.\n");
+      return 0;
+
+    default:
+      TRACE(iCom_port_info,TRACE_IOCTL_IGNORE,cmd);
+      return -ENOIOCTLCMD;
+  }
+  return 0;
+}
+
+static void iCom_send_xchar(struct tty_struct * tty, char ch)
+{
+  struct iCom_port *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned char    xdata;
+  int              index;
+  unsigned long    flags;
+
+  spin_lock_irqsave(&iComlock,flags);
+  TRACE(iCom_port_info,TRACE_SEND_XCHAR,ch);
+  /* attempt sending char for a period of .1 second */
+  for (index = 0; index < 10; index++ ) {
+      xdata = iCom_readb(&iCom_port_info->dram->xchar);
+      if (xdata == 0x00) {
+	  TRACE(iCom_port_info,TRACE_QUICK_WRITE,0);
+	  iCom_writeb(ch,&iCom_port_info->dram->xchar);
+	  break;
+      }
+      current->state = TASK_INTERRUPTIBLE;
+      spin_unlock_irqrestore(&iComlock,flags);
+      schedule_timeout(HZ/100);
+      spin_lock_irqsave(&iComlock,flags);
+  }
+  spin_unlock_irqrestore(&iComlock,flags);
+}
+
+static void iCom_throttle(struct tty_struct * tty)
+{
+  struct iCom_port *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned char    osr;
+
+  TRACE(iCom_port_info,TRACE_THROTTLE,0);
+  if (I_IXOFF(tty))
+    iCom_send_xchar(tty, STOP_CHAR(tty));
+
+  if (tty->termios->c_cflag & CRTSCTS) {
+    osr = iCom_readb(&iCom_port_info->dram->osr);
+    iCom_writeb(osr & ~RTS,&iCom_port_info->dram->osr);
+  }
+}
+
+static void iCom_unthrottle(struct tty_struct * tty)
+{
+  struct iCom_port *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned char    osr;
+
+  TRACE(iCom_port_info,TRACE_UNTHROTTLE,0);
+  if (I_IXOFF(tty)) {
+    iCom_send_xchar(tty, START_CHAR(tty));
+  }
+  if (tty->termios->c_cflag & CRTSCTS) {
+    osr = iCom_readb(&iCom_port_info->dram->osr);
+    iCom_writeb(osr | RTS,&iCom_port_info->dram->osr);
+  }
+}
+
+static void iCom_set_termios(struct tty_struct * tty, struct termios * old_termios)
+{
+  struct iCom_port *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned int     cflag = tty->termios->c_cflag;
+  unsigned char    osr;
+  unsigned long    flags;
+#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
+
+  spin_lock_irqsave(&iComlock,flags);
+  TRACE(iCom_port_info,TRACE_SET_TERMIOS,0);
+  if ((cflag == old_termios->c_cflag)
+      && (RELEVANT_IFLAG(tty->termios->c_iflag) 
+	  == RELEVANT_IFLAG(old_termios->c_iflag))) {
+      spin_unlock_irqrestore(&iComlock,flags);
+      return;
+  }
+
+  change_speed(iCom_port_info, old_termios, flags);
+
+  /* Handle transition to B0 status */
+  if ((old_termios->c_cflag & CBAUD) &&
+      !(cflag & CBAUD)) {
+    osr = iCom_readb(&iCom_port_info->dram->osr);
+    TRACE(iCom_port_info,TRACE_DROP_DTR_RTS,0);
+    iCom_writeb(osr & ~(DTR|RTS),&iCom_port_info->dram->osr);
+  }
+
+  /* Handle transition away from B0 status */
+  if (!(old_termios->c_cflag & CBAUD) &&
+      (cflag & CBAUD)) {
+    osr = iCom_readb(&iCom_port_info->dram->osr);
+    TRACE(iCom_port_info,TRACE_RAISE_DTR,0);
+    osr |= DTR;
+    if (!(tty->termios->c_cflag & CRTSCTS) || 
+        !test_bit(TTY_THROTTLED, &tty->flags)) {
+      TRACE(iCom_port_info,TRACE_RAISE_RTS,0);
+      osr |= RTS;
+    }
+    iCom_writeb(osr,&iCom_port_info->dram->osr);
+  }
+
+  spin_unlock_irqrestore(&iComlock,flags);
+
+  /* Handle turning off CRTSCTS */
+  if ((old_termios->c_cflag & CRTSCTS) &&
+      !(tty->termios->c_cflag & CRTSCTS)) {
+    tty->hw_stopped = 0;
+    iCom_start(tty);
+  }
+
+#if 0
+  /*
+   * No need to wake up processes in open wait, since they
+   * sample the CLOCAL flag once, and don't recheck it.
+   * XXX  It's not clear whether the current behavior is correct
+   * or not.  Hence, this may change.....
+   */
+  if (!(old_termios->c_cflag & CLOCAL) &&
+      (tty->termios->c_cflag & CLOCAL))
+    wake_up_interruptible(&iCom_port_info->open_wait);
+#endif
+}
+
+static void iCom_stop(struct tty_struct * tty)
+{
+  struct iCom_port *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned char    cmdReg;
+  unsigned long    flags;
+
+  spin_lock_irqsave(&iComlock,flags);
+  TRACE(iCom_port_info,TRACE_STOP,0);
+  cmdReg = iCom_readb(&iCom_port_info->dram->CmdReg);
+  iCom_writeb(cmdReg | CMD_HOLD_XMIT,&iCom_port_info->dram->CmdReg);
+  spin_unlock_irqrestore(&iComlock,flags);
+}
+
+static void iCom_start(struct tty_struct * tty)
+{
+  struct iCom_port *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned char    cmdReg;
+  unsigned long    flags;
+
+  spin_lock_irqsave(&iComlock,flags);
+  TRACE(iCom_port_info,TRACE_START,0);
+  cmdReg = iCom_readb(&iCom_port_info->dram->CmdReg);
+  iCom_writeb(cmdReg & ~CMD_HOLD_XMIT,&iCom_port_info->dram->CmdReg);
+  spin_unlock_irqrestore(&iComlock,flags);
+}
+
+static void iCom_hangup(struct tty_struct * tty)
+{
+  struct iCom_port *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned long    flags;
+
+  TRACE(iCom_port_info,TRACE_HANGUP,0);
+  iCom_flush_buffer(tty);
+  spin_lock_irqsave(&iComlock,flags);
+  shutdown(iCom_port_info);
+  iCom_port_info->open_active_count = 0;
+  iCom_port_info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CALLOUT_ACTIVE);
+  iCom_port_info->tty = 0;
+  wake_up_interruptible(&iCom_port_info->open_wait);
+  spin_unlock_irqrestore(&iComlock,flags);
+}
+
+static void iCom_break(struct tty_struct *tty, int break_state)
+{
+  struct iCom_port *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned char    cmdReg;
+  unsigned long    flags;
+
+  spin_lock_irqsave(&iComlock,flags);
+  TRACE(iCom_port_info,TRACE_BREAK,0);
+  cmdReg = iCom_readb(&iCom_port_info->dram->CmdReg);
+  if (break_state == -1) {
+    iCom_writeb(cmdReg | CMD_SND_BREAK,&iCom_port_info->dram->CmdReg);
+  }
+  else{
+    iCom_writeb(cmdReg & ~CMD_SND_BREAK,&iCom_port_info->dram->CmdReg);
+  }
+  spin_unlock_irqrestore(&iComlock,flags);
+}
+
+/*
+ * iCom_wait_until_sent() --- wait until the transmitter is empty
+ */
+static void iCom_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+  struct iCom_port *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned long orig_jiffies, char_time;
+  int status;
+
+  TRACE(iCom_port_info,TRACE_WAIT_UNTIL_SENT,0);
+
+  orig_jiffies = jiffies;
+  /*
+   * Set the check interval to be 1/5 of the estimated time to
+   * send a single character, and make it at least 1.  The check
+   * interval should also be less than the timeout.
+   * 
+   * Note: we have to use pretty tight timings here to satisfy
+   * the NIST-PCTS.
+   */
+  char_time = (iCom_port_info->timeout - HZ/50) / iCom_port_info->xmit_fifo_size;
+  char_time = char_time / 5;
+  if (char_time == 0)
+    char_time = 1;
+  if (timeout) {
+    if (timeout < char_time)
+      char_time = timeout;
+  }
+  /*
+   * If the transmitter hasn't cleared in twice the approximate
+   * amount of time to send the entire FIFO, it probably won't
+   * ever clear.  This assumes the UART isn't doing flow
+   * control, which is currently the case.  Hence, if it ever
+   * takes longer than iCom_port_info->timeout, this is probably due to a
+   * UART bug of some kind.  So, we clamp the timeout parameter at
+   * 2*iCom_port_info->timeout.
+   */
+  if (!timeout || timeout > 2*iCom_port_info->timeout)
+    timeout = 2*iCom_port_info->timeout;
+
+  status = cpu_to_le16(iCom_port_info->statStg->xmit[0].flags);
+  while (status & SA_FLAGS_DONE ) {  /*data still transmitting*/
+
+    current->state = TASK_INTERRUPTIBLE;
+    current->counter = 0;	/* make us low-priority */
+    schedule_timeout(char_time);
+    if (signal_pending(current))
+      break;
+    if (timeout && time_after(jiffies, orig_jiffies + timeout))
+      break;
+    status = cpu_to_le16(iCom_port_info->statStg->xmit[0].flags);
+  }
+  current->state = TASK_RUNNING;
+}
+
+/*
+ * /proc fs routines....
+ */
+static inline int line_info(char *buf, struct iCom_port *iCom_port_info)
+{
+  char	stat_buf[30], control, status;
+  int	ret, baud_index;
+  int   port;
+
+  if ((iCom_port_info->port == 2) &&
+      (iCom_adapter_info[iCom_port_info->adapter].subsystem_id != FOUR_PORT_MODEL))
+      port = 1;
+  else
+      port = iCom_port_info->port;
+
+  ret = sprintf(buf, "%d: port:%X irq:%d",
+                iCom_port_info->adapter,
+                port,
+                iCom_adapter_info[iCom_port_info->adapter].irq_number);
+
+  status = iCom_readb(&iCom_port_info->dram->isr);
+  control = iCom_readb(&iCom_port_info->dram->osr);
+
+  stat_buf[0] = 0;
+  stat_buf[1] = 0;
+  if (control & RTS)
+    strcat(stat_buf, "|RTS");
+  if (status & CTS)
+    strcat(stat_buf, "|CTS");
+  if (control & DTR)
+    strcat(stat_buf, "|DTR");
+  if (status & DSR)
+    strcat(stat_buf, "|DSR");
+  if (status & DCD)
+    strcat(stat_buf, "|CD");
+  if (status & RI)
+    strcat(stat_buf, "|RI");
+
+  baud_index = iCom_readb(&iCom_port_info->dram->async_config3);
+  ret += sprintf(buf+ret, " baud:%d",icom_acfg_baud[baud_index]);
+
+  ret += sprintf(buf+ret, " tx:%d rx:%d",
+                 iCom_port_info->icount.tx, iCom_port_info->icount.rx);
+
+  if (iCom_port_info->icount.frame)
+    ret += sprintf(buf+ret, " fe:%d", iCom_port_info->icount.frame);
+
+  if (iCom_port_info->icount.parity)
+    ret += sprintf(buf+ret, " pe:%d", iCom_port_info->icount.parity);
+
+  if (iCom_port_info->icount.brk)
+    ret += sprintf(buf+ret, " brk:%d", iCom_port_info->icount.brk);	
+
+  if (iCom_port_info->icount.overrun)
+    ret += sprintf(buf+ret, " oe:%d", iCom_port_info->icount.overrun);
+
+  /*
+   * Last thing is the RS-232 status lines
+   */
+  ret += sprintf(buf+ret, " %s\n", stat_buf+1);
+  return ret;
+}
+
+int iCom_read_proc(char *page, char **start, off_t off, int count,
+		   int *eof, void *data)
+{
+  int i, j, len = 0, l;
+  off_t	begin = 0;
+
+  len += sprintf(page, "iCom driver: %s\n", "1.0");
+  for (i = 0; i < active_adapters && len < 4000; i++) {
+    for (j= 0; j < 4 && len < 4000; j++) {
+	if (iCom_adapter_info[i].port_info[j].status == ICOM_PORT_ACTIVE) {
+	    l = line_info(page + len, &iCom_adapter_info[i].port_info[j]);
+	    len += l;
+	    if (len+begin > off+count)
+		goto done;
+	    if (len+begin < off) {
+		begin += len;
+		len = 0;
+	    }
+	}
+    }
+  }
+  *eof = 1;
+  done:
+    if (off >= len+begin)
+      return 0;
+  *start = page + (begin-off);
+  return ((count < begin+len-off) ? count : begin+len-off);
+}
+
+static void iCom_flush_buffer(struct tty_struct * tty)
+{
+  struct iCom_port *iCom_port_info = (struct iCom_port *)tty->driver_data;
+  unsigned char    cmdReg;
+  unsigned long    flags;
+
+  spin_lock_irqsave(&iComlock,flags);
+  TRACE(iCom_port_info,TRACE_FLUSH_BUFFER,0);
+  /*
+   * with no CMD_XMIT_ENABLE is same as disabling xmitter.  This should
+   * result in an interrupt if currently transmitting
+   */
+  cmdReg = iCom_readb(&iCom_port_info->dram->CmdReg);
+  iCom_writeb(cmdReg & ~CMD_XMIT_ENABLE,&iCom_port_info->dram->CmdReg);  
+  spin_unlock_irqrestore(&iComlock,flags);
+}
+
+/*
+ * This routine is used by the interrupt handler to schedule
+ * processing in the software interrupt portion of the driver.
+ */
+static inline void rs_sched_event(struct iCom_port *info,
+				  int event)
+{
+  info->event |= 1 << event;
+  queue_task(&info->tqueue, &tq_immediate);
+  mark_bh(IMMEDIATE_BH);
+}
+
+static inline void check_modem_status(struct iCom_port *iCom_port_info)
+{
+  static char old_status = 0;
+  char delta_status;
+  unsigned char status;
+
+  /*modem input register */
+  status = iCom_readb(&iCom_port_info->dram->isr);
+  TRACE(iCom_port_info,TRACE_CHECK_MODEM,status);
+  delta_status = status ^ old_status;
+  if (delta_status) {
+    if (delta_status & RI)
+      iCom_port_info->icount.rng++;
+    if (delta_status & DSR)
+      iCom_port_info->icount.dsr++;
+    if (delta_status & DCD)
+      iCom_port_info->icount.dcd++;
+    if (delta_status & CTS)
+      iCom_port_info->icount.cts++;
+  
+    wake_up_interruptible(&iCom_port_info->delta_msr_wait);
+    old_status = status;
+  }
+
+  if ((iCom_port_info->flags & ASYNC_CHECK_CD) && (status & 0x20)) {
+    if (status & 0x20) /* Carrier Detect up */
+      wake_up_interruptible(&iCom_port_info->open_wait);
+    else if (!((iCom_port_info->flags & ASYNC_CALLOUT_ACTIVE) &&
+               (iCom_port_info->flags & ASYNC_CALLOUT_NOHUP))) {
+      if (iCom_port_info->tty)
+        tty_hangup(iCom_port_info->tty);
+    }
+  }
+
+  if (iCom_port_info->flags & ASYNC_CTS_FLOW) {
+    if (iCom_port_info->tty->hw_stopped) {
+      if (status & 0x40) {  /* CTS up */
+        iCom_port_info->tty->hw_stopped = 0;
+	TRACE(iCom_port_info,TRACE_CTS_UP,0);
+        rs_sched_event(iCom_port_info, 0);
+        return;
+      }
+    } else {
+      if (!(status & 0x40)) { /* CTS down */
+        iCom_port_info->tty->hw_stopped = 1;
+	TRACE(iCom_port_info,TRACE_CTS_DOWN,0);
+      }
+    }
+  }
+}
+
+static void process_interrupt(u16 port_int_reg, struct iCom_port *iCom_port_info)
+{
+  short int           count, rcv_buff;
+  struct tty_struct   *tty = iCom_port_info->tty;
+  unsigned char       *data;
+  unsigned short int  status;
+  struct async_icount *icount;
+  unsigned long int   offset;
+
+
+  TRACE(iCom_port_info,TRACE_INTERRUPT | TRACE_TIME,jiffies);
+
+  if (port_int_reg & (INT_XMIT_COMPLETED | INT_XMIT_DISABLED)) {
+    if (port_int_reg & (INT_XMIT_COMPLETED))
+	TRACE(iCom_port_info,TRACE_XMIT_COMPLETE,0);
+    else
+	TRACE(iCom_port_info,TRACE_XMIT_DISABLED,0);
+
+    /* clear buffer in use bit */
+    iCom_port_info->statStg->xmit[0].flags &= cpu_to_le16(~SA_FLAGS_READY_TO_XMIT);
+    iCom_port_info->icount.tx += (unsigned short int)cpu_to_le16(iCom_port_info->statStg->xmit[0].leLength);
+
+    /* activate write queue */
+    rs_sched_event(iCom_port_info, 0);
+  }
+
+  if (port_int_reg & INT_RCV_COMPLETED) {
+
+    TRACE(iCom_port_info,TRACE_RCV_COMPLETE,0);
+    rcv_buff = iCom_port_info->next_rcv;
+
+    status = cpu_to_le16(iCom_port_info->statStg->rcv[rcv_buff].flags);
+    while (status & SA_FL_RCV_DONE) {
+
+      TRACE(iCom_port_info,TRACE_FID_STATUS,status);
+
+      count = cpu_to_le16(iCom_port_info->statStg->rcv[rcv_buff].leLength);
+	
+      TRACE(iCom_port_info,TRACE_RCV_COUNT,count);
+      if (count > (TTY_FLIPBUF_SIZE - tty->flip.count))
+        count = TTY_FLIPBUF_SIZE - tty->flip.count;
+
+      TRACE(iCom_port_info,TRACE_REAL_COUNT,count);
+
+      offset = cpu_to_le32(iCom_port_info->statStg->rcv[rcv_buff].leBuffer) - iCom_port_info->recv_buf_pci;
+
+      memcpy(tty->flip.char_buf_ptr,(unsigned char *)((unsigned long int)iCom_port_info->recv_buf + offset),count);
+
+      data = (unsigned char *)tty->flip.char_buf_ptr;
+
+      if (count > 0) {
+	tty->flip.count += count - 1;
+	tty->flip.char_buf_ptr += count - 1;
+	
+	memset(tty->flip.flag_buf_ptr, 0, count);
+	tty->flip.flag_buf_ptr += count - 1;
+      }
+
+      icount = &iCom_port_info->icount;
+      icount->rx += count;
+
+      /* Break detect logic */
+      if ((status & SA_FLAGS_FRAME_ERROR) && (tty->flip.char_buf_ptr[0] == 0x00)) {
+	status &= ~SA_FLAGS_FRAME_ERROR;
+	status |= SA_FLAGS_BREAK_DET;
+        TRACE(iCom_port_info,TRACE_BREAK_DET,0);
+      }
+
+      if (status & (SA_FLAGS_BREAK_DET | SA_FLAGS_PARITY_ERROR |
+                    SA_FLAGS_FRAME_ERROR | SA_FLAGS_OVERRUN)) {
+
+        if (status & SA_FLAGS_BREAK_DET)
+          icount->brk++;
+        if (status & SA_FLAGS_PARITY_ERROR)
+          icount->parity++;
+        if (status & SA_FLAGS_FRAME_ERROR)
+          icount->frame++;
+        if (status & SA_FLAGS_OVERRUN)
+          icount->overrun++;
+
+        /*
+         * Now check to see if character should be
+         * ignored, and mask off conditions which
+         * should be ignored.
+         */ 
+        if (status & iCom_port_info->ignore_status_mask) {
+	  TRACE(iCom_port_info,TRACE_IGNORE_CHAR,0);
+          goto ignore_char;
+        }
+
+        status &= iCom_port_info->read_status_mask;
+
+        if (status & SA_FLAGS_BREAK_DET) {
+          *tty->flip.flag_buf_ptr = TTY_BREAK;
+          if (iCom_port_info->flags & ASYNC_SAK)
+            do_SAK(tty);
+        } else if (status & SA_FLAGS_PARITY_ERROR) {
+	  TRACE(iCom_port_info,TRACE_PARITY_ERROR,0);
+          *tty->flip.flag_buf_ptr = TTY_PARITY;
+	}
+        else if (status & SA_FLAGS_FRAME_ERROR)
+          *tty->flip.flag_buf_ptr = TTY_FRAME;
+        if (status & SA_FLAGS_OVERRUN) {
+          /*
+           * Overrun is special, since it's
+           * reported immediately, and doesn't
+           * affect the current character
+           */
+          if (tty->flip.count < TTY_FLIPBUF_SIZE) {
+            tty->flip.count++;
+            tty->flip.flag_buf_ptr++;
+            tty->flip.char_buf_ptr++;
+            *tty->flip.flag_buf_ptr = TTY_OVERRUN;
+          }
+        }
+      }
+
+      tty->flip.flag_buf_ptr++;
+      tty->flip.char_buf_ptr++;
+      tty->flip.count++;
+      ignore_char:
+      iCom_port_info->statStg->rcv[rcv_buff].flags = 0;
+      iCom_port_info->statStg->rcv[rcv_buff].leLength = 0;
+      iCom_port_info->statStg->rcv[rcv_buff].WorkingLength = (unsigned short int)cpu_to_le16(RCV_BUFF_SZ);
+
+      rcv_buff++;
+      if (rcv_buff == NUM_RBUFFS) rcv_buff = 0;
+
+      status = cpu_to_le16(iCom_port_info->statStg->rcv[rcv_buff].flags);
+    }
+    iCom_port_info->next_rcv = rcv_buff;
+    tty_flip_buffer_push(tty);
+  }
+}
+
+static void iCom_interrupt(int irq, void * dev_id, struct pt_regs * regs)
+{
+  unsigned long int   int_reg;
+  u32                 adapter_interrupts;
+  u16                 port_int_reg;
+  struct iCom_adapter *iCom_adapter_ptr;
+  struct iCom_port    *iCom_port_info;
+  unsigned long       flags;
+
+  spin_lock_irqsave(&iComlock,flags);
+
+  /* find iCom_port_info for this interrupt */
+  iCom_adapter_ptr = (struct iCom_adapter *)dev_id;
+
+  if ((iCom_adapter_ptr->version | ADAPTER_V2) == ADAPTER_V2) {
+    int_reg = iCom_adapter_ptr->base_addr + 0x8024;
+
+    adapter_interrupts = iCom_readl((void *)int_reg);
+    
+    if (adapter_interrupts & 0x00003FFF) {
+      /* port 2 interrupt,  NOTE:  for all ADAPTER_V2, port 2 will be active */
+      iCom_port_info = &iCom_adapter_ptr->port_info[2];
+      port_int_reg = (u16)adapter_interrupts;
+      process_interrupt(port_int_reg, iCom_port_info);
+      check_modem_status(iCom_port_info);
+    }
+    if (adapter_interrupts & 0x3FFF0000) {
+      /* port 3 interrupt */
+      iCom_port_info = &iCom_adapter_ptr->port_info[3];
+      if (iCom_port_info->status == ICOM_PORT_ACTIVE) {
+	  port_int_reg = (u16)(adapter_interrupts >> 16);
+	  process_interrupt(port_int_reg, iCom_port_info);
+	  check_modem_status(iCom_port_info);
+      }
+    }
+    
+    /* Clear out any pending interrupts */
+    iCom_writel(adapter_interrupts,(void *)int_reg);
+    
+    int_reg = iCom_adapter_ptr->base_addr + 0x8004;
+  }
+  else {
+    int_reg = iCom_adapter_ptr->base_addr + 0x4004;
+  }
+
+  adapter_interrupts = iCom_readl((void *)int_reg);
+
+  if (adapter_interrupts & 0x00003FFF) {
+    /* port 0 interrupt, NOTE:  for all adapters, port 0 will be active */
+    iCom_port_info = &iCom_adapter_ptr->port_info[0];
+    port_int_reg = (u16)adapter_interrupts;
+    process_interrupt(port_int_reg, iCom_port_info);
+    check_modem_status(iCom_port_info);
+  }
+  if (adapter_interrupts & 0x3FFF0000) {
+    /* port 1 interrupt */
+    iCom_port_info = &iCom_adapter_ptr->port_info[1];
+    if (iCom_port_info->status == ICOM_PORT_ACTIVE) {
+	port_int_reg = (u16)(adapter_interrupts >> 16);
+	process_interrupt(port_int_reg, iCom_port_info);
+	check_modem_status(iCom_port_info);
+    }
+  }
+  
+  /* Clear out any pending interrupts */
+  iCom_writel(adapter_interrupts,(void *)int_reg);
+  spin_unlock_irqrestore(&iComlock,flags);
+}
+
+/*
+ * -------------------------------------------------------------------
+ * Here ends the serial interrupt routines.
+ * -------------------------------------------------------------------
+ */
+
+/*
+ * This routine is used to handle the "bottom half" processing for the
+ * serial driver, known also the "software interrupt" processing.
+ * This processing is done at the kernel interrupt level, after the
+ * iCom_interrupt() has returned, BUT WITH INTERRUPTS TURNED ON.  This
+ * is where time-consuming activities which can not be done in the
+ * interrupt driver proper are done; the interrupt driver schedules
+ * them using rs_sched_event(), and they get done here.
+ */
+static void do_softint(void *private_)
+{
+  struct iCom_port	*info = (struct iCom_port *) private_;
+  struct tty_struct	*tty;
+
+  tty = info->tty;
+  if (!tty)
+    return;
+
+  if (test_and_clear_bit(0, &info->event)) {
+    if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) && tty->ldisc.write_wakeup)
+      (tty->ldisc.write_wakeup)(tty);
+    wake_up_interruptible(&tty->write_wait);
+    TRACE(info,TRACE_WAKEUP,0);
+  }
+}
+
+/*
+   Module operations
+*/
+int iCom_init(void)
+{
+    int               index, 
+                      index_v2,
+                      index2,
+                      scan_index;
+    struct pci_dev   *dev[MAX_ADAPTERS];
+    unsigned int      irq_number[MAX_ADAPTERS];
+    unsigned long int base_addr[MAX_ADAPTERS];
+    unsigned char     valid_indices[MAX_ADAPTERS];
+#define VALID 1
+#define INVALID 0
+    unsigned int      command_reg;
+    struct iCom_port *iCom_port_info;
+    int               retval;
+    int               status;
+    int               port_num;
+    int               adapter_count = 0;
+    int               duplicate;
+    unsigned int      subsystem_id;
+
+
+    /*
+     * Find base addresses and IRQs for any/all installed cards
+     */
+    for (index=0; index < MAX_ADAPTERS; index++) {
+	valid_indices[index] = INVALID;
+	dev[index] = NULL;
+    }
+
+    /* check for Version 1 Adapters */
+    for (index = 0; index < MAX_ADAPTERS; index++){
+	if (index == 0) {
+	    if (!(dev[index] = pci_find_device(VENDOR_ID, DEVICE_ID, dev[index])))
+		break;
+	}
+	else {
+	    if (!(dev[index] = pci_find_device(VENDOR_ID, DEVICE_ID, dev[index-1])))
+		break;
+	}
+
+	adapter_count++;
+
+	if (pci_enable_device(dev[index])) {
+	    printk("iCom:  Device enable FAILED\n");
+	    continue;
+	}
+
+	if (pci_read_config_dword(dev[index], PCI_COMMAND, &command_reg)) {
+	    printk("iCom:  PCI Config read FAILED\n");
+	    continue;
+	}	
+
+	pci_write_config_dword(dev[index],PCI_COMMAND, command_reg | 0x00000146);
+	pci_write_config_dword(dev[index],0x44, 0x8300830A);
+
+	base_addr[index] = pci_resource_start(dev[index],0);
+	base_addr[index] &= PCI_BASE_ADDRESS_MEM_MASK;
+
+	duplicate = 0;
+	for (index2 = 0; index2 < index; index2++) {
+	    if (base_addr[index] == base_addr[index2])
+		duplicate = 1;
+	}
+	if (duplicate) continue;
+
+	irq_number[index] = dev[index]->irq;
+
+	valid_indices[index] = ADAPTER_V1;
+    }
+
+    /* check for version 2 Adapters */
+    for (index_v2=0; index_v2 < (MAX_ADAPTERS - adapter_count); index_v2++){
+	if (index_v2 == 0) {
+	    if (!(dev[index] = pci_find_device(VENDOR_ID, DEVICE_ID2, NULL)))
+		break;
+	}
+	else {
+	    if (!(dev[index] = pci_find_device(VENDOR_ID, DEVICE_ID2, dev[index-1])))
+		break;
+	}
+
+	adapter_count++;
+
+	if (pci_enable_device(dev[index])) {
+	    printk("iCom:  Device enable FAILED\n");
+	    continue;
+	}
+
+	if (pci_read_config_dword(dev[index], PCI_COMMAND, &command_reg)) {
+	    printk("iCom:  PCI Config read FAILED\n");
+	    continue;
+	}	
+
+	pci_write_config_dword(dev[index],PCI_COMMAND, command_reg | 0x00000146);
+	pci_write_config_dword(dev[index],0x44, 0x42004200);
+	pci_write_config_dword(dev[index],0x48, 0x42004200);
+
+	base_addr[index] = pci_resource_start(dev[index],0);	
+	base_addr[index] &= PCI_BASE_ADDRESS_MEM_MASK;
+
+	duplicate = 0;
+	for (index2 = 0; index2 < index; index2++) {
+	    if (base_addr[index] == base_addr[index2])
+		duplicate = 1;
+	}
+	if (duplicate) continue;
+
+	irq_number[index] = dev[index]->irq;
+
+	valid_indices[index++] = ADAPTER_V2;
+    }
+
+    /* allocate memory for control blocks representing each adapter */
+    iCom_adapter_info = (struct iCom_adapter *)
+      kmalloc(adapter_count*sizeof(struct iCom_adapter),GFP_KERNEL);
+
+    if (!iCom_adapter_info) {
+	return -ENOMEM;
+    }
+
+    memset(iCom_adapter_info, 0,adapter_count*sizeof(struct iCom_adapter));
+
+    /* store information just obtained on base_addr and irq */
+    for (index = scan_index = 0; (scan_index < MAX_ADAPTERS) &
+        (index < adapter_count);       scan_index++) {
+
+	if (valid_indices[scan_index]) {
+	    iCom_adapter_info[index].base_addr = base_addr[scan_index];
+	    iCom_adapter_info[index].irq_number = irq_number[scan_index];
+	    iCom_adapter_info[index].pci_dev = dev[scan_index];
+	    iCom_adapter_info[index].version = valid_indices[scan_index];
+	    pci_read_config_dword(dev[index], PCI_SUBSYSTEM_VENDOR_ID, &subsystem_id);
+	    iCom_adapter_info[index].subsystem_id = subsystem_id;
+
+            /* save off irq and request irq line */
+	    if (request_irq(irq_number[scan_index], iCom_interrupt, SA_INTERRUPT |
+			    SA_SHIRQ, DRIVER_NAME, (void *)&iCom_adapter_info[index])) {
+		printk("iCom:  request_irq FAILED\n");
+		continue;
+	    }
+
+	    if (iCom_adapter_info[index].version == ADAPTER_V1) {
+		iCom_adapter_info[index].numb_ports = 2;
+		iCom_adapter_info[index].port_info[0].port = 0;
+		iCom_adapter_info[index].port_info[0].status = ICOM_PORT_ACTIVE;
+		iCom_adapter_info[index].port_info[1].port = 1;
+		iCom_adapter_info[index].port_info[1].status = ICOM_PORT_ACTIVE;
+	    }
+	    else {
+		if (subsystem_id == FOUR_PORT_MODEL) {
+		    iCom_adapter_info[index].numb_ports = 4;
+		    iCom_adapter_info[index].port_info[0].port = 0;
+		    iCom_adapter_info[index].port_info[0].status = ICOM_PORT_ACTIVE;
+		    iCom_adapter_info[index].port_info[1].port = 1;
+		    iCom_adapter_info[index].port_info[1].status = ICOM_PORT_ACTIVE;
+		    iCom_adapter_info[index].port_info[2].port = 2;
+		    iCom_adapter_info[index].port_info[2].status = ICOM_PORT_ACTIVE;
+		    iCom_adapter_info[index].port_info[3].port = 3;
+		    iCom_adapter_info[index].port_info[3].status = ICOM_PORT_ACTIVE;
+		}
+		else {
+		    iCom_adapter_info[index].numb_ports = 4;
+		    iCom_adapter_info[index].port_info[0].port = 0;
+		    iCom_adapter_info[index].port_info[0].status = ICOM_PORT_ACTIVE;
+		    iCom_adapter_info[index].port_info[1].status = ICOM_PORT_OFF;
+		    iCom_adapter_info[index].port_info[2].port = 2;
+		    iCom_adapter_info[index].port_info[2].status = ICOM_PORT_ACTIVE;
+		    iCom_adapter_info[index].port_info[3].status = ICOM_PORT_OFF;
+		}
+	    }
+
+	    if (!request_mem_region(iCom_adapter_info[index].base_addr,
+				    pci_resource_len(iCom_adapter_info[index].pci_dev,0),
+				    "iCom")) {
+		printk("iCom:  request_mem_region FAILED\n");
+	    }
+
+	    for (port_num = 0; port_num < iCom_adapter_info[index].numb_ports; port_num++) {
+		iCom_port_info = &iCom_adapter_info[index].port_info[port_num];
+
+		if (iCom_port_info->status == ICOM_PORT_ACTIVE) {
+		    /* initialize wait queues */
+		    init_waitqueue_head(&iCom_port_info->open_wait);
+		    init_waitqueue_head(&iCom_port_info->close_wait);
+		    init_waitqueue_head(&iCom_port_info->delta_msr_wait);
+
+		    /* initialize port specific variables */
+		    iCom_port_info->tqueue.routine = do_softint;
+		    iCom_port_info->tqueue.data = iCom_port_info;
+		    if (iCom_adapter_info[index].version == ADAPTER_V1) {
+			iCom_port_info->global_reg = (struct iCom_regs *)((char *)iCom_adapter_info[index].base_addr + 0x4000);
+			iCom_port_info->int_reg = (unsigned long)iCom_adapter_info[index].base_addr + 0x4004 + 2 - 2 * port_num;
+		    }
+		    else {
+			iCom_port_info->global_reg = (struct iCom_regs *)((char *)iCom_adapter_info[index].base_addr + 0x8000);
+			if (iCom_port_info->port < 2)
+			    iCom_port_info->int_reg = (unsigned long)iCom_adapter_info[index].base_addr + 0x8004 + 2 - 2 * iCom_port_info->port;
+			else
+			    iCom_port_info->int_reg = (unsigned long)iCom_adapter_info[index].base_addr + 0x8024 + 2 - 2 * (iCom_port_info->port - 2);
+		    }
+		    iCom_port_info->dram = (struct func_dram*)((char*)iCom_adapter_info[index].base_addr + 0x2000 * iCom_port_info->port);
+		    iCom_port_info->close_delay = 5*HZ/10;
+		    iCom_port_info->closing_wait = 30*HZ;
+		    iCom_port_info->adapter = index;
+
+		    /*
+		     * Load and start processor
+		     */
+		    retval = loadCode(iCom_port_info);
+		    if (retval != 0) {
+			printk("iCom%d:  pico-code load of adapter FAILED\n",iCom_port_info->adapter);
+			return -ENODEV;
+		    }
+
+	   	    /* get port memory */
+		    if ((status = get_port_memory(iCom_port_info)) != 0) {
+			return -ENODEV;
+		    /* return status; *** -ENOMEM didn't work right for me */
+		    }
+
+		    /* Set Country Code */
+		    iCom_set_code(iCom_port_info);
+		}
+	    }
+	    index++;
+	}
+    }
+    active_adapters = index;
+
+    printk("iCom:  Adapter detection complete, %d adapters found with %d valid\n",adapter_count,active_adapters);
+
+    if (active_adapters > 0) {
+
+      /* Initialize the tty_driver structure */
+	memset(&serial_driver, 0, sizeof(struct tty_driver));
+	serial_driver.magic = TTY_DRIVER_MAGIC;
+	serial_driver.driver_name = DRIVER_NAME;
+#if defined(CONFIG_DEVFS_FS)
+	serial_driver.name = "ttyA%d";
+#else
+	serial_driver.name = "ttyA";
+#endif
+	serial_driver.major = 243;
+	serial_driver.minor_start = 0;
+	serial_driver.num = NR_PORTS;
+	serial_driver.type = TTY_DRIVER_TYPE_SERIAL;
+	serial_driver.subtype = SERIAL_TYPE_NORMAL;
+	serial_driver.init_termios = tty_std_termios;
+	serial_driver.init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+	serial_driver.flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_NO_DEVFS;
+	serial_driver.refcount = &serial_refcount;
+	serial_driver.table = serial_table;
+	serial_driver.termios = serial_termios;
+	serial_driver.termios_locked = serial_termios_locked;
+
+	serial_driver.open = iCom_open;
+	serial_driver.close = iCom_close;
+	serial_driver.write = iCom_write;
+	serial_driver.put_char = iCom_put_char;
+	serial_driver.flush_chars = iCom_flush_chars;
+	serial_driver.write_room = iCom_write_room;
+	serial_driver.chars_in_buffer = iCom_chars_in_buffer;
+	serial_driver.flush_buffer = iCom_flush_buffer;
+	serial_driver.ioctl = iCom_ioctl;
+	serial_driver.throttle = iCom_throttle;
+	serial_driver.unthrottle = iCom_unthrottle;
+	serial_driver.send_xchar = iCom_send_xchar;
+	serial_driver.set_termios = iCom_set_termios;
+	serial_driver.stop = iCom_stop;
+	serial_driver.start = iCom_start;
+	serial_driver.hangup = iCom_hangup;
+	serial_driver.break_ctl = iCom_break;
+	serial_driver.wait_until_sent = iCom_wait_until_sent;
+	serial_driver.read_proc = iCom_read_proc;
+
+
+	for (index=0; index < active_adapters; index++) {
+	    iCom_adapter_info[index].port_info[0].callout_termios = serial_driver.init_termios;
+	    iCom_adapter_info[index].port_info[0].normal_termios = serial_driver.init_termios;
+	    iCom_adapter_info[index].port_info[1].callout_termios = serial_driver.init_termios;
+	    iCom_adapter_info[index].port_info[1].normal_termios = serial_driver.init_termios;
+	    iCom_adapter_info[index].port_info[2].callout_termios = serial_driver.init_termios;
+	    iCom_adapter_info[index].port_info[2].normal_termios = serial_driver.init_termios;
+	    iCom_adapter_info[index].port_info[3].callout_termios = serial_driver.init_termios;
+	    iCom_adapter_info[index].port_info[3].normal_termios = serial_driver.init_termios;
+	}
+
+	if (tty_register_driver(&serial_driver)) {
+	    for (index=0; index < active_adapters; index++) {
+		free_irq(iCom_adapter_info[index].irq_number, (void *)&iCom_adapter_info[index]);
+	    }
+	    kfree(iCom_adapter_info);
+	    panic("Couldn't register serial driver\n");
+	}
+
+#if defined(CONFIG_DEVFS_FS)
+	for (index = 0; index < active_adapters; index++) {
+	    tty_register_devfs(&serial_driver,
+			       0, index*4 + serial_driver.minor_start);
+	    tty_register_devfs(&serial_driver,
+			       0, index*4 + serial_driver.minor_start + 1);
+
+	    if ((iCom_adapter_info[index].version == ADAPTER_V2) &&
+		(iCom_adapter_info[index].subsystem_id == FOUR_PORT_MODEL)) {
+		tty_register_devfs(&serial_driver,
+				   0, index*4 + serial_driver.minor_start + 2);
+		tty_register_devfs(&serial_driver,
+				   0, index*4 + serial_driver.minor_start + 3);
+	    }
+	}
+#endif
+
+	/* lastly, register unique ioctl */
+	register_ioctl32_conversion(0x4300,NULL);
+
+	return 0;
+    }
+    else {
+	if (adapter_count > 0) {
+	    kfree(iCom_adapter_info);
+	}
+    }
+
+    return -ENODEV;
+}
+
+int init_module(void)
+{
+   return iCom_init();
+}
+
+void cleanup_module(void) 
+{
+  unsigned long       flags;
+  int                 e1;
+  int                 index;
+  int                 port_num;
+  struct iCom_port   *iCom_port_info;
+
+  /* remove registered ioctl */
+  unregister_ioctl32_conversion(0x4300);
+
+  spin_lock_irqsave(&iComlock,flags);
+  if ((e1 = tty_unregister_driver(&serial_driver)))
+    printk("iCom:  failed to unregister serial driver (%d)\n",e1);
+
+#if defined(CONFIG_DEVFS_FS)
+  for (index = 0; index < active_adapters; index++) {
+      tty_unregister_devfs(&serial_driver,
+			   index*4 + serial_driver.minor_start);
+      tty_unregister_devfs(&serial_driver,
+			   index*4 + serial_driver.minor_start + 1);
+
+      if ((iCom_adapter_info[index].version == ADAPTER_V2) &&
+	  (iCom_adapter_info[index].subsystem_id == FOUR_PORT_MODEL)) {
+	  tty_unregister_devfs(&serial_driver,
+			     index*4 + serial_driver.minor_start + 2);
+	  tty_unregister_devfs(&serial_driver,
+			     index*4 + serial_driver.minor_start + 3);
+      }
+  }
+#endif
+
+  for (index=0; index < active_adapters; index++) {
+
+    for (port_num = 0; port_num < iCom_adapter_info[index].numb_ports; port_num++) {
+      iCom_port_info = &iCom_adapter_info[index].port_info[port_num];
+
+      if (iCom_port_info->status == ICOM_PORT_ACTIVE) {
+
+          /* be sure that DTR and RTS are dropped */
+	  iCom_writeb(0x00,&iCom_port_info->dram->osr);
+
+          /* Wait 0.1 Sec for simple Init to complete */
+	  current->state = TASK_INTERRUPTIBLE;
+	  schedule_timeout(HZ/10);
+
+          /* Stop proccessor */
+	  stop_processor(iCom_port_info);
+
+	  return_port_memory(iCom_port_info);
+      }
+    }
+
+    free_irq(iCom_adapter_info[index].irq_number, (void *)&iCom_adapter_info[index]);
+    release_mem_region(iCom_adapter_info[index].base_addr,
+		       pci_resource_len(iCom_adapter_info[index].pci_dev,0));
+  }
+  spin_unlock_irqrestore(&iComlock,flags);
+
+  kfree(iCom_adapter_info);
+  printk("iCom:  Driver removed\n");
+}
+
+/* the interrupts should be disabled here so that no hardware
+ interrupts should occur, polling will be done to check received
+ data to avoid interrupt level processing */
+static int mdm_rcv(struct iCom_port *iCom_port_info, char *exp_str) {
+  int status = 0;
+  int loop_count = 0;
+  char *start_str;
+  int rcv_buff;
+
+  /* search expected string in received data */
+  while (!status && (loop_count++ < 10)) {
+    /* check buffer 1 */
+    start_str = (char *)iCom_port_info->recv_buf;
+    if (strstr(start_str, exp_str)) {
+      /* string found! */
+      status = 1;
+      break;
+    }
+
+    /* check buffer 2 */
+    start_str = (char *)iCom_port_info->recv_buf + 2048;
+    if (strstr(start_str, exp_str)) {
+      /* string found! */
+      status = 1;
+      break;
+    }
+
+    /* wait .5 seconds */
+    current->state = TASK_INTERRUPTIBLE;
+    schedule_timeout(HZ/2);
+
+    /* free up buffers if they had been used */
+    for (rcv_buff = 0; rcv_buff < NUM_RBUFFS; rcv_buff++) {
+	iCom_port_info->statStg->rcv[rcv_buff].flags = 0;
+	iCom_port_info->statStg->rcv[rcv_buff].leLength = 0;
+	iCom_port_info->statStg->rcv[rcv_buff].WorkingLength = (unsigned short int)cpu_to_le16(RCV_BUFF_SZ);
+    }
+  }
+
+  /* clear interrupts */
+  iCom_writew(0x3FFF,(void *)iCom_port_info->int_reg);
+
+  return status;
+}
+
+static void mdm_send(struct iCom_port *iCom_port_info, char *mdm_cmnd,
+                     int cmnd_length) {
+
+  unsigned char     cmdReg;
+  unsigned long int offset;
+
+
+  /* initialize transmit and receive operations */
+  offset = (unsigned long int)&iCom_port_info->statStg->rcv[0] - (unsigned long int)iCom_port_info->statStg;
+  iCom_writel(iCom_port_info->statStg_pci + offset,&iCom_port_info->dram->RcvStatusAddr);
+  iCom_port_info->next_rcv = 0;
+  iCom_port_info->put_length = 0;
+  *iCom_port_info->xmitRestart = 0;
+  iCom_writel(iCom_port_info->xmitRestart_pci,&iCom_port_info->dram->XmitStatusAddr);
+  iCom_writeb(CMD_XMIT_RCV_ENABLE,&iCom_port_info->dram->CmdReg);
+
+  /* clear target receive buffers 1 and 2 */
+  memset(iCom_port_info->recv_buf,0,4096);
+
+  memcpy(iCom_port_info->xmit_buf, mdm_cmnd, cmnd_length);
+
+  iCom_port_info->statStg->xmit[0].flags = (unsigned short int)cpu_to_le16(SA_FLAGS_READY_TO_XMIT);
+  iCom_port_info->statStg->xmit[0].leLength = (unsigned short int)cpu_to_le16(cmnd_length);
+  offset = (unsigned long int)&iCom_port_info->statStg->xmit[0] - (unsigned long int)iCom_port_info->statStg;
+  *iCom_port_info->xmitRestart = cpu_to_le32(iCom_port_info->statStg_pci + offset);
+
+  cmdReg = iCom_readb(&iCom_port_info->dram->CmdReg);
+  iCom_writeb(cmdReg | CMD_XMIT_RCV_ENABLE,&iCom_port_info->dram->CmdReg);
+  iCom_writeb(START_XMIT,&iCom_port_info->dram->StartXmitCmd);
+  TRACE(iCom_port_info,TRACE_WRITE_START,cmnd_length);
+}
+
+static void iCom_set_code(struct iCom_port *iCom_port_info) {
+  char mdm_cmnd[15];
+  int index;
+
+  /* check if country code should be set at all */
+  if (strlen(iCom_country_code) == 0) return;
+
+  printk("iCom:  Checking for country code on serial adapter %d port %d...",iCom_port_info->adapter, iCom_port_info->port);
+
+  /* sync up modems (if present) */
+  mdm_send(iCom_port_info,"AT\r\n",4);
+  mdm_rcv(iCom_port_info,"OK");
+
+  printk(".");
+
+  /* send ATI0 to check for internal modem */
+  mdm_send(iCom_port_info,"ATI0\r\n",6);
+
+  /* check returned data for internal modem identification */
+  if (!mdm_rcv(iCom_port_info,"SMI")) {
+    /* must not be internal modem - return */
+    printk("\n\tno internal modem on this port\n");
+    return;
+  }
+
+  /* send ATE0S0=0 to turn off command Echo and turn off Auto Answer */
+  mdm_send(iCom_port_info,"ATE0S0=0\r\n",10);
+
+  /* wait for OK */
+  if (!mdm_rcv(iCom_port_info,"OK")) {
+    /* unable to send command to modem - error */
+    printk("\niCom:  Error, unable to set modem Country Code\n");
+    return;
+  }
+
+  printk(".");
+
+  /* Send new country code AT%T19,0,<new value> */
+  sprintf(mdm_cmnd,"AT%%T19,0,%s\r\n",iCom_country_code);
+  mdm_send(iCom_port_info,mdm_cmnd,strlen(mdm_cmnd));
+
+  /* wait for OK */
+  if (!mdm_rcv(iCom_port_info,"OK")) {
+    /* unable to set country code */
+    printk("\niCom:  Error, unable to set modem Country Code\n");
+    return;
+  }
+
+  printk(".\n");
+
+  /* send ATE1S0=2 to enable command Echo and auto answer */
+  mdm_send(iCom_port_info,"ATE1S0=2\r\n",10);
+
+  /* wait for OK */
+  if (!mdm_rcv(iCom_port_info,"OK")) {
+    /* modem state unknown */
+    printk("iCom:  Warning, modem state unknown\n");
+  }
+
+  /* print message that Country Code set appropriately */
+  printk("iCom:  Modem country code for adapter %d port %d has been set to %s\n",iCom_port_info->adapter, iCom_port_info->port,iCom_country_code);
+
+  /* disable xmitter/recvr */
+  iCom_writeb(CMD_RCV_DISABLE,&iCom_port_info->dram->CmdReg);
+  for (index = 0; index < 10; index++) {
+      /* Wait 0.1 Sec for receive operations to complete*/
+      current->state = TASK_INTERRUPTIBLE;
+      schedule_timeout(HZ/10);
+
+      if (iCom_readb(&iCom_port_info->dram->PrevCmdReg) == 0x00) {
+	  break;
+      }
+  }
+
+  /* clear interrupts */
+  iCom_writew(0x3FFF,(void *)iCom_port_info->int_reg);
+}
+
+#ifdef ICOM_TRACE
+void TRACE(struct iCom_port *iCom_port_info, u32 trace_pt,
+	   u32 trace_data) {
+
+  u32 *tp_start, *tp_end, **tp_next;
+
+  if (trace_pt == TRACE_GET_MEM) {
+    if (iCom_port_info->trace_blk != 0) return;
+    iCom_port_info->trace_blk = kmalloc(TRACE_BLK_SZ,GFP_KERNEL);
+    memset(iCom_port_info->trace_blk, 0,TRACE_BLK_SZ);
+    iCom_port_info->trace_blk[0] = (unsigned long)iCom_port_info->trace_blk + 3*sizeof(unsigned long);
+    iCom_port_info->trace_blk[1] = (unsigned long)iCom_port_info->trace_blk + TRACE_BLK_SZ;
+    iCom_port_info->trace_blk[2] = iCom_port_info->trace_blk[0];
+  }
+  if (iCom_port_info->trace_blk == 0) return;
+
+  if (trace_pt == TRACE_RET_MEM) {
+    kfree(iCom_port_info->trace_blk);
+    iCom_port_info->trace_blk = 0;
+    return;
+  }
+
+  tp_start  = (u32 *)iCom_port_info->trace_blk[0];
+  tp_end    = (u32 *)iCom_port_info->trace_blk[1];
+  tp_next   = (u32 **)&iCom_port_info->trace_blk[2];
+
+  if (trace_data != 0) {
+    **tp_next = trace_data;
+    *tp_next = *tp_next + 1;
+    if (*tp_next == tp_end) *tp_next = tp_start;
+    **tp_next = TRACE_WITH_DATA | trace_pt;
+  }
+  else
+    **tp_next = trace_pt;
+  
+  *tp_next = *tp_next + 1;
+  if (*tp_next == tp_end) *tp_next = tp_start;
+}
+#endif
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/char/icom.h linuxppc64_2_4/drivers/char/icom.h
--- ../kernel.org/linux-2.4.19/drivers/char/icom.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/drivers/char/icom.h	Fri Dec 14 09:07:47 2001
@@ -0,0 +1,364 @@
+/*
+ * iCom.h
+ *
+ * Copyright (C) 2001 Michael Anderson, IBM Corporation
+ *
+ * Serial device driver include file.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+#define TRACE_BLK_SZ         1024
+#define TRACE_WITH_DATA      0x80000000
+#define TRACE_TIME           0x40000000
+#define TRACE_GET_MEM        0x20000000
+#define TRACE_RET_MEM        0x10000000
+#define TRACE_GET_PORT_MEM   0x00000001
+#define TRACE_FOD_ADDR       0x00000005
+#define TRACE_FOD_XBUFF      0x00000006
+#define TRACE_FID_ADDR       0x00000007
+#define TRACE_FID_RBUFF      0x00000008
+#define TRACE_RET_PORT_MEM   0x00000100
+#define TRACE_LOAD_MEM       0x00000200
+#define TRACE_CHANGE_SPEED   0x00000300
+#define TRACE_PARENB         0x00000301
+#define TRACE_PARODD         0x00000302
+#define TRACE_XR_ENAB        0x00000303
+#define TRACE_STARTUP        0x00000400
+#define TRACE_CABLE_ID       0x00000401
+#define TRACE_SHUTDOWN       0x00000500
+#define TRACE_DEVICE_NUMB    0x00000600
+#define TRACE_STARTUP_ERROR  0x00000601
+#define TRACE_CLOSE          0x00000700
+#define TRACE_CLOSE_HANGUP   0x00000701
+#define TRACE_OPEN_ACTIVE    0x00000702
+#define TRACE_WRITE          0x00000800
+#define TRACE_WRITE_FULL     0x00000801
+#define TRACE_WRITE_NODATA   0x00000802
+#define TRACE_WRITE_START    0x00000803
+#define TRACE_PUT_CHAR       0x00000900
+#define TRACE_PUT_FULL       0x00000901
+#define TRACE_FLUSH_CHAR     0x00000a00
+#define TRACE_START_FLUSH    0x00000a01
+#define TRACE_WRITE_ROOM     0x00000b00
+#define TRACE_CHARS_IN_BUFF  0x00000c00
+#define TRACE_CHARS_REMAIN   0x00000c01
+#define TRACE_GET_MODEM      0x00000d00
+#define TRACE_SET_MODEM      0x00000e00
+#define TRACE_RAISE_RTS      0x00000e01
+#define TRACE_RAISE_DTR      0x00000e02
+#define TRACE_LOWER_RTS      0x00000e03
+#define TRACE_LOWER_DTR      0x00000e04
+#define TRACE_GET_SERIAL     0x00000f00
+#define TRACE_SET_SERIAL     0x00001000
+#define TRACE_SET_LSR        0x00001100
+#define TRACE_IOCTL          0x00001200
+#define TRACE_IOCTL_IGNORE   0x00001201
+#define TRACE_SEND_XCHAR     0x00001300
+#define TRACE_QUICK_WRITE    0x00001301
+#define TRACE_THROTTLE       0x00001400
+#define TRACE_UNTHROTTLE     0x00001500
+#define TRACE_SET_TERMIOS    0x00001600
+#define TRACE_STOP           0x00001700
+#define TRACE_START          0x00001800
+#define TRACE_HANGUP         0x00001900
+#define TRACE_BREAK          0x00001a00
+#define TRACE_WAIT_UNTIL_SENT  0x00001b00
+#define TRACE_FLUSH_BUFFER     0x00001c00
+#define TRACE_CHECK_MODEM      0x00001d00
+#define TRACE_CTS_UP           0x00001d01
+#define TRACE_CTS_DOWN         0x00001d02
+#define TRACE_INTERRUPT        0x00001e00
+#define TRACE_XMIT_COMPLETE    0x00001e01
+#define TRACE_RCV_COMPLETE     0x00001e02
+#define TRACE_FID_STATUS       0x00001e03
+#define TRACE_RCV_COUNT        0x00001e04
+#define TRACE_REAL_COUNT       0x00001e05
+#define TRACE_BREAK_DET        0x00001e06
+#define TRACE_IGNORE_CHAR      0x00001e07
+#define TRACE_PARITY_ERROR     0x00001e08
+#define TRACE_XMIT_DISABLED    0x00001e09
+#define TRACE_WAKEUP           0x00001f00
+#define TRACE_CLEAR_INTERRUPTS 0x0000ff00
+#define TRACE_START_PROC_A     0x0000ff01
+#define TRACE_START_PROC_B     0x0000ff02
+#define TRACE_STOP_PROC_A      0x0000ff03
+#define TRACE_STOP_PROC_B      0x0000ff04
+#define TRACE_RAISE_DTR_RTS    0x0000ff05
+#define TRACE_START_PROC_C     0x0000ff06
+#define TRACE_START_PROC_D     0x0000ff07
+#define TRACE_STOP_PROC_C      0x0000ff08
+#define TRACE_STOP_PROC_D      0x0000ff09
+#define TRACE_ENABLE_INTERRUPTS_PA 0x0000ff0a
+#define TRACE_ENABLE_INTERRUPTS_PB 0x0000ff0b
+#define TRACE_ENABLE_INTERRUPTS_PC 0x0000ff0c
+#define TRACE_ENABLE_INTERRUPTS_PD 0x0000ff0d
+#define TRACE_DIS_INTERRUPTS_PA 0x0000ff0e
+#define TRACE_DIS_INTERRUPTS_PB 0x0000ff0f
+#define TRACE_DIS_INTERRUPTS_PC 0x0000ff10
+#define TRACE_DIS_INTERRUPTS_PD 0x0000ff11
+#define TRACE_DROP_DTR_RTS   0x0000ff12
+
+#ifndef TRACE_ONLY
+ 
+static unsigned char callSetup[1936] = 
+          {0xBD,0xD9,0x23,0x00,0xDD,0xDD,0x18,0x05,0x23,0x80,0x3E,0x7F,0x23,0x00,0x3E,0x7C,
+           0x23,0x10,0x3E,0xB7,0x3E,0xB5,0x23,0x20,0x3E,0xB6,0x3E,0xB4,0xA2,0x0A,0x86,0x0A,
+           0xAE,0x0A,0x23,0x80,0x3E,0x01,0x23,0x2A,0x3E,0x02,0x3D,0xDA,0x23,0xFF,0x3E,0x03,
+           0x22,0x86,0xD9,0xDD,0x10,0x1C,0x22,0x08,0x2F,0xF0,0xD9,0xF1,0x18,0x48,0xD9,0xDD,
+           0x18,0x32,0x23,0x00,0x3E,0x06,0x3E,0x46,0x3E,0x86,0x3E,0xC6,0x21,0xF0,0x37,0xFA,
+           0x2F,0xFA,0x3E,0x08,0x23,0x00,0x3E,0x07,0x3E,0x87,0x3E,0xC7,0x23,0xFF,0x3E,0x09,
+           0x3E,0x47,0x00,0x39,0x23,0x00,0x3E,0x06,0x3E,0x08,0x23,0x13,0x3E,0x07,0x23,0x01,
+           0x3E,0x09,0xA2,0xFC,0xA2,0xF6,0xCD,0xF0,0x10,0x3E,0x82,0xFC,0x23,0xFF,0x68,0x69,
+           0x3B,0x00,0x10,0x3F,0x22,0x86,0xD9,0xDD,0x10,0x46,0x22,0x08,0x2F,0xF0,0x3D,0xF5,
+           0x3B,0xB0,0x10,0x60,0x23,0x00,0x3E,0x02,0x23,0x01,0x3E,0x7C,0x23,0x14,0x3E,0x03,
+           0x23,0x00,0x3E,0x7C,0x23,0x80,0xDD,0xF2,0x1B,0x95,0x68,0x69,0x3B,0x00,0x10,0x53,
+           0x23,0x09,0x3E,0x02,0x3D,0xDA,0x23,0x08,0xDD,0xDD,0x18,0x5F,0x23,0x88,0x3E,0x7F,
+           0x21,0xDA,0xC9,0xF0,0x10,0x64,0x33,0x04,0x3E,0x02,0x3D,0xDA,0xD9,0xF1,0x10,0x6B,
+           0x23,0x00,0x3D,0xF1,0x00,0x93,0x21,0xF5,0x2F,0xF0,0x3B,0xE0,0x10,0x71,0x23,0x22,
+           0x00,0x8A,0x3B,0x60,0x10,0x75,0x23,0x22,0x00,0x8A,0x3B,0xC0,0x10,0x79,0x23,0xEE,
+           0x00,0x8A,0x3B,0xB0,0x10,0x7D,0x23,0x44,0x00,0x8A,0x3B,0x20,0x10,0x81,0x23,0xC4,
+           0x00,0x8A,0x3B,0x30,0x10,0x85,0x23,0x22,0x00,0x8A,0x3B,0xF0,0x1B,0x99,0x00,0xB0,
+           0xD9,0xDD,0x18,0x8B,0x3E,0x46,0x23,0x11,0x68,0x69,0x3B,0x00,0x10,0x8C,0x22,0x06,
+           0x37,0xFF,0x3D,0xEF,0x81,0xF5,0x0A,0xD2,0x08,0xB6,0x08,0xD5,0x0A,0x4D,0x0B,0x4F,
+           0xDD,0xF3,0x10,0x93,0xBD,0xF3,0x9D,0xD9,0x21,0xDE,0x3B,0x00,0x18,0xA7,0x5D,0x00,
+           0x4B,0xC8,0x41,0xE0,0x22,0x7F,0x33,0x40,0x3E,0x7F,0x21,0xDE,0x4D,0xC8,0x58,0x00,
+           0x4B,0xC8,0x41,0xFC,0x22,0x7F,0x2F,0xBF,0x3E,0x7F,0x21,0xF7,0x4D,0xC8,0x00,0xAF,
+           0x23,0xDD,0x3D,0xF4,0x0B,0xB8,0x96,0xF4,0x9E,0xF6,0x00,0xB5,0xDD,0xE6,0x68,0x6E,
+           0xC5,0xE6,0x68,0x6F,0xC1,0xE6,0x18,0xC6,0x81,0xE6,0x68,0x70,0x43,0xC8,0x45,0xCA,
+           0x21,0xE4,0x68,0x62,0x21,0xE5,0x68,0x74,0x47,0xD4,0x68,0x6D,0x41,0xD2,0x43,0xC8,
+           0x50,0x00,0x47,0xC8,0x41,0xC8,0x68,0x71,0x68,0x6E,0xC1,0xE6,0x68,0x6E,0x85,0xE6,
+           0x8A,0xF4,0xC1,0xE6,0x68,0x6F,0xA5,0xE6,0x68,0x6D,0x21,0xE7,0x2F,0x0F,0x3D,0xCA,
+           0x2F,0x0C,0x3B,0x0C,0x68,0x6E,0x21,0xE7,0xDE,0xF0,0x1A,0x41,0xDA,0xF0,0x18,0xFE,
+           0xA2,0x0A,0x99,0xE7,0x23,0x00,0x3D,0xD6,0x3D,0xE8,0x21,0xCA,0x3B,0x0F,0x10,0xF0,
+           0x23,0x81,0x3E,0x01,0x21,0xDA,0x3E,0x02,0x23,0x7E,0x3E,0x03,0x92,0xF1,0x00,0xFC,
+           0x23,0x40,0x3E,0x01,0x21,0xC6,0x3E,0x02,0x21,0xC7,0x3E,0x03,0x21,0xCA,0x3B,0x0C,
+           0x23,0x00,0x10,0xFB,0x23,0x01,0x3D,0xD7,0x82,0x0A,0x68,0x6D,0x21,0xCA,0x3B,0x0F,
+           0x19,0x3C,0x21,0xE7,0xDE,0xF0,0x1A,0x41,0xD6,0xF0,0x11,0x22,0x68,0x70,0x43,0xC8,
+           0x21,0xCA,0xC2,0xF0,0x21,0xC9,0x68,0x64,0xDD,0xD7,0x11,0x18,0x3D,0xC8,0x21,0xCA,
+           0xC6,0xF0,0x21,0xC8,0x68,0x64,0x3D,0xC8,0x21,0xCA,0xCA,0xF0,0x21,0xC8,0x68,0x64,
+           0x68,0x6B,0x29,0xD8,0x3B,0x7F,0xDA,0xF1,0x21,0xE7,0x2F,0x0F,0x3D,0xCA,0x11,0x22,
+           0x91,0xE7,0x02,0x06,0xDE,0x04,0x68,0x6E,0x68,0x70,0x43,0xC8,0x21,0xCA,0xC2,0xF0,
+           0x21,0xC9,0x68,0x64,0x3D,0xD8,0x21,0xCA,0x3D,0xCB,0xDD,0xD7,0x11,0x39,0x21,0xCA,
+           0xC6,0xF0,0x21,0xD8,0x68,0x64,0x3D,0xD8,0x21,0xCA,0xCA,0xF0,0x21,0xD8,0x68,0x64,
+           0x3D,0xD8,0x21,0xE7,0x2F,0x0F,0x3D,0xCA,0xDE,0x04,0x68,0x6E,0x22,0x00,0xCA,0x0A,
+           0x11,0x48,0xA2,0x0A,0x99,0xE8,0x21,0xCA,0x3B,0x0F,0x1A,0x48,0x82,0x0A,0x68,0x6D,
+           0x3D,0xC8,0x22,0x04,0x3D,0xC9,0x21,0xCA,0x3B,0x0F,0x11,0x58,0x21,0xC9,0x2F,0x38,
+           0x3B,0x00,0x11,0x55,0x21,0xC8,0x3E,0xB0,0x01,0xEC,0xD5,0xE7,0x68,0x6E,0x02,0x06,
+           0x21,0xC9,0xD6,0xF0,0x11,0x5C,0x95,0xE8,0xCE,0xF0,0x11,0x5F,0x9D,0xE8,0x21,0xD7,
+           0x3B,0x00,0x19,0xEB,0xDE,0xF0,0x11,0x6C,0xDA,0xF0,0x19,0xAE,0x21,0xC8,0x3B,0x54,
+           0x11,0xEC,0x23,0xC2,0x3D,0xD7,0x01,0xEC,0x3B,0x01,0x11,0x72,0x21,0xC8,0x3B,0x52,
+           0x11,0xE9,0x01,0xE5,0x3B,0x02,0x11,0x78,0x21,0xC8,0x3B,0x4F,0x11,0xE9,0x01,0xE5,
+           0x3B,0x03,0x11,0x7E,0x21,0xC8,0x3B,0x42,0x11,0xE9,0x01,0xE5,0x3B,0x04,0x11,0x84,
+           0x21,0xC8,0x3B,0x47,0x11,0xE9,0x01,0xE5,0x3B,0x05,0x11,0x8A,0x21,0xC8,0x3B,0x20,
+           0x11,0xE9,0x01,0xE5,0x3B,0x06,0x11,0x90,0x21,0xC8,0x3B,0x54,0x11,0xE9,0x01,0xE5,
+           0x3B,0x07,0x11,0x96,0x21,0xC8,0x3B,0x4F,0x11,0xE9,0x01,0xE5,0x3B,0x08,0x11,0x9C,
+           0x21,0xC8,0x3B,0x4E,0x11,0xE9,0x01,0xE5,0x3B,0x09,0x11,0xE9,0x21,0xC8,0x3B,0x59,
+           0x11,0xE9,0x23,0x80,0x3D,0xD7,0x21,0xCB,0xC6,0xF0,0x21,0xD8,0x68,0x64,0x3D,0xD8,
+           0x21,0xCB,0xCA,0xF0,0x21,0xD8,0x68,0x64,0x3D,0xD8,0x01,0xEC,0x2F,0x3F,0x3B,0x02,
+           0x11,0xB5,0x21,0xC8,0x3B,0x4F,0x11,0xE2,0x01,0xE5,0x3B,0x03,0x11,0xBB,0x21,0xC8,
+           0x3B,0x4E,0x11,0xE2,0x01,0xE5,0x3B,0x04,0x11,0xC1,0x21,0xC8,0x3B,0x59,0x11,0xE2,
+           0x01,0xE5,0x3B,0x05,0x11,0xC7,0x21,0xC8,0x3B,0x20,0x11,0xE2,0x01,0xE5,0x3B,0x06,
+           0x11,0xCD,0x21,0xC8,0x3B,0x52,0x11,0xE2,0x01,0xE5,0x3B,0x07,0x11,0xD3,0x21,0xC8,
+           0x3B,0x4F,0x11,0xE2,0x01,0xE5,0x3B,0x08,0x11,0xD9,0x21,0xC8,0x3B,0x42,0x11,0xE2,
+           0x01,0xE5,0x3B,0x09,0x11,0xE2,0x21,0xC8,0x3B,0x47,0x11,0xE2,0x21,0xE8,0x33,0x01,
+           0x3D,0xE8,0x01,0xEC,0x23,0x80,0x3D,0xD7,0x01,0x66,0x21,0xD7,0x68,0x68,0x3D,0xD7,
+           0x01,0xEB,0x23,0x00,0x3D,0xD7,0x95,0xE7,0xD1,0xE8,0x1A,0x15,0x45,0xDE,0xDE,0xF1,
+           0x11,0xF3,0x91,0xE8,0x02,0x08,0x51,0xCC,0x21,0xD6,0x68,0x62,0x21,0xC8,0x68,0x67,
+           0x45,0xDE,0x23,0x01,0x68,0x63,0x47,0xDE,0x21,0xD6,0x68,0x68,0x3D,0xD6,0x21,0xE8,
+           0x2F,0x07,0x3B,0x01,0x1A,0x06,0x21,0xD6,0x3B,0x08,0x12,0x15,0xD1,0xE8,0x1A,0x15,
+           0x21,0xD6,0x3B,0x00,0x1A,0x15,0x41,0xE0,0x57,0xCC,0xDD,0xF2,0x1B,0x95,0xC2,0xF1,
+           0x12,0x0D,0x68,0x62,0x43,0xE0,0x23,0x00,0x3D,0xD6,0x21,0xCA,0x3B,0x0F,0x12,0x3A,
+           0x21,0xC9,0x2F,0x38,0x3B,0x00,0x12,0x1E,0x95,0xE7,0x68,0x6D,0x3B,0x10,0x12,0x35,
+           0x21,0xC8,0x3B,0x7E,0x12,0x32,0x22,0x74,0x3B,0x47,0x12,0x2F,0x22,0x75,0x3B,0x0F,
+           0x12,0x2F,0x21,0xE7,0xDE,0xF0,0x1A,0x41,0xD6,0xF0,0x68,0x6E,0x02,0x48,0x23,0x04,
+           0x3D,0xE8,0x02,0x48,0x23,0x03,0x3D,0xE8,0x02,0x48,0xCE,0xF0,0x68,0x6F,0x23,0x02,
+           0x3D,0xE8,0x02,0x48,0x21,0xE8,0x2F,0x07,0x3B,0x01,0x1A,0x48,0xD1,0xE7,0x68,0x6E,
+           0x02,0x48,0xA2,0x0A,0xCE,0xF4,0x68,0x6F,0xDD,0xE7,0x68,0x6E,0x23,0x07,0x3D,0xE8,
+           0xA2,0x0A,0x23,0x00,0x3D,0xE7,0x8E,0xF4,0x68,0x6D,0x21,0xE7,0x2F,0x0F,0x3D,0xCA,
+           0x2F,0x0C,0x3D,0xCB,0x3B,0x08,0x68,0x6E,0x21,0xE7,0xDE,0xF0,0x1A,0xC6,0xDA,0xF0,
+           0x1A,0x72,0x99,0xE7,0x23,0x00,0x3D,0xD6,0x3D,0xE8,0x21,0xCA,0x3B,0x0B,0x12,0x69,
+           0x23,0x81,0x3E,0x01,0x21,0xDA,0x3E,0x02,0x23,0x7E,0x3E,0x03,0x86,0x0A,0xAE,0x0A,
+           0x68,0x6D,0x23,0x40,0x3E,0x01,0x21,0xC6,0x3E,0x02,0x21,0xC7,0x3E,0x03,0x86,0x0A,
+           0xAE,0x0A,0x68,0x6D,0x21,0xD6,0x3B,0x00,0x12,0x96,0x45,0xDE,0xDE,0xF1,0x12,0x80,
+           0x21,0xCA,0x3B,0x0B,0x12,0xCE,0xD1,0xE7,0x12,0xB2,0xC6,0x04,0x68,0x6E,0x02,0xCE,
+           0x21,0xDF,0x3B,0x00,0x12,0x87,0x21,0xDE,0x3B,0x08,0xDA,0xF1,0x12,0x88,0x23,0x08,
+           0x41,0xE0,0x55,0xCC,0xDD,0xF2,0x1B,0x95,0xC2,0xF1,0x12,0x8A,0x68,0x62,0x43,0xE0,
+           0x45,0xDE,0x68,0x63,0x47,0xDE,0x3D,0xD6,0x23,0x00,0x3D,0xD8,0xC2,0x04,0x68,0x6E,
+           0xD5,0xE7,0x1A,0xA3,0x21,0xCA,0x3B,0x0B,0x12,0xA3,0x23,0x7E,0x68,0x6A,0x3E,0x00,
+           0x95,0xE7,0x96,0xF1,0xAE,0x0A,0x51,0xCC,0x21,0xD8,0x68,0x62,0x68,0x68,0x3D,0xD8,
+           0x68,0x66,0x68,0x6B,0x3E,0x00,0x3E,0xB1,0xCE,0x0A,0x1A,0xC2,0x21,0xD6,0x68,0x69,
+           0x3D,0xD6,0x02,0x72,0x22,0x05,0x2F,0x0F,0x3B,0x0C,0xDA,0xF1,0x68,0x6F,0x22,0x78,
+           0x68,0x6B,0x3E,0x00,0x22,0x79,0x3E,0x00,0x23,0x7E,0x68,0x6A,0x3E,0x00,0x91,0xE7,
+           0xCE,0x0A,0x68,0x6E,0x86,0x0A,0xAE,0x0A,0x99,0xE8,0x02,0xCE,0x86,0x0A,0xAE,0x0A,
+           0xCE,0xF4,0x68,0x6F,0xDD,0xE7,0x68,0x6E,0x23,0x07,0x3D,0xE8,0x23,0x00,0x3D,0xE7,
+           0x8E,0xF4,0x68,0x6D,0x22,0x86,0xD9,0xDD,0x12,0xD6,0x22,0x08,0x2F,0xF0,0x3B,0xB0,
+           0x1B,0x05,0x22,0x86,0xD9,0xDD,0x12,0xDD,0x22,0x08,0x2F,0xF0,0x3B,0xF0,0x1B,0x99,
+           0xD9,0xDD,0x1A,0xE7,0x21,0xF0,0x37,0xFA,0x2F,0xFA,0x3E,0x08,0x02,0xF0,0xDD,0xF0,
+           0x68,0x6C,0xF2,0x06,0xD9,0xF0,0x68,0x6C,0xE6,0x06,0xC5,0xF0,0x68,0x6C,0xE2,0x06,
+           0xDD,0xF2,0x1B,0x95,0xDD,0xF1,0x12,0xF7,0xDD,0xD9,0x1B,0x9F,0xBD,0xF1,0x22,0x06,
+           0x37,0xFF,0x39,0xEF,0x12,0xFC,0x68,0x6D,0x3D,0xC8,0x35,0xEF,0x2D,0xEE,0x3B,0x00,
+           0x21,0xC8,0x3D,0xEF,0x68,0x6F,0x92,0xF4,0x68,0x6D,0x22,0x86,0xD9,0xDD,0x13,0x09,
+           0x22,0x08,0x2F,0xF0,0x3B,0xF0,0x1B,0x99,0xD9,0xDD,0x1B,0x12,0xDD,0xF0,0x68,0x6C,
+           0xFE,0x08,0x03,0x15,0xDD,0xF0,0x68,0x6C,0xF2,0x06,0xDD,0xE9,0x13,0x1E,0xD9,0xE9,
+           0x1B,0x1E,0x22,0xBC,0x3B,0x10,0x13,0x1F,0x99,0xE9,0x82,0xF4,0x3E,0xBC,0xDD,0xEA,
+           0x13,0x28,0xD9,0xEA,0x1B,0x28,0x22,0xBD,0x3B,0x10,0x13,0x29,0x99,0xEA,0x82,0xF4,
+           0x3E,0xBD,0xDD,0xEB,0x13,0x32,0xD9,0xEB,0x1B,0x32,0x22,0xBE,0x3B,0x10,0x13,0x33,
+           0x99,0xEB,0x82,0xF4,0x3E,0xBE,0xDD,0xEC,0x13,0x3C,0xD9,0xEC,0x1B,0x3C,0x22,0xBF,
+           0x3B,0x10,0x13,0x3D,0x99,0xEC,0x82,0xF4,0x3E,0xBF,0xDD,0xF2,0x1B,0x95,0xDD,0xF1,
+           0x13,0x44,0xDD,0xD9,0x1B,0x9F,0xBD,0xF1,0x22,0x06,0x37,0xFF,0x2F,0x80,0x39,0xEF,
+           0x13,0x4A,0x68,0x6D,0x3D,0xEF,0xDD,0xEE,0x68,0x6E,0x92,0xF4,0x68,0x6D,0x21,0xE7,
+           0x2F,0x0F,0x3B,0x04,0x68,0x6E,0x21,0xE7,0xDE,0xF0,0x1B,0x75,0xDA,0xF0,0x1B,0x6A,
+           0x68,0x70,0x43,0xC8,0x21,0xCA,0xC2,0xF0,0x21,0xC9,0x68,0x64,0x3D,0xD8,0x23,0x81,
+           0x3E,0x01,0x21,0xDA,0x33,0x20,0x3E,0x02,0x23,0x80,0x3E,0x01,0x23,0x00,0x3E,0xF5,
+           0x99,0xE7,0x68,0x6D,0x68,0x70,0x43,0xC8,0x21,0xCA,0xC2,0xF0,0x21,0xC9,0x68,0x64,
+           0x68,0x6B,0x29,0xD8,0x3B,0x3B,0xDA,0xF1,0x13,0x80,0xCE,0xF4,0x68,0x6F,0xDD,0xE7,
+           0x68,0x6E,0x21,0xDA,0x2F,0xDF,0x3E,0x02,0x68,0x6A,0xFE,0x08,0xBD,0xF0,0x03,0x91,
+           0xD6,0xF0,0x1B,0x8D,0x22,0xF5,0x3B,0x08,0xDA,0xF1,0x68,0x6E,0x21,0xDA,0x2F,0xDF,
+           0x3E,0x02,0x68,0x6A,0xFE,0x08,0xBD,0xF0,0x95,0xE7,0x22,0xF5,0x3B,0x22,0xDA,0xF1,
+           0x68,0x6E,0x23,0x00,0x3D,0xE7,0x8E,0xF4,0x68,0x6D,0x0B,0xB8,0xBD,0xF2,0x9E,0xF6,
+           0x03,0x96,0x23,0xCC,0x3D,0xF4,0x0B,0xB8,0x96,0xF4,0x9E,0xF6,0x03,0x9E,0xBD,0xD9,
+           0xA2,0x0A,0x86,0x0A,0xAE,0x0A,0xBD,0xF1,0x0B,0x4F,0xD9,0xF1,0x1B,0xAF,0x0A,0xD2,
+           0xDD,0xF3,0x13,0xA3,0x23,0x00,0x3D,0xF1,0xBD,0xF3,0x9D,0xD9,0x00,0x00,0x58,0x00,
+           0x4B,0xC8,0x41,0xF8,0x22,0x7F,0x2F,0xBF,0x3E,0x7F,0x21,0xF6,0x4D,0xC8,0x03,0xB7,
+           0xA2,0x0A,0x86,0x0A,0xAE,0x0A,0xD9,0xDD,0x1B,0xBF,0x23,0x00,0x3E,0x46,0x23,0x00,
+           0x3D,0xF5,0x3D,0xF0,0x3D,0xEF,0x23,0x12,0x3E,0x06,0x23,0xC0,0x3E,0x08,0x68,0x6D};
+
+static unsigned char resRVdce[192] = 
+          {0x22,0x86,0xD9,0xDD,0x15,0x04,0x22,0x08,0x2F,0xF0,0x3B,0xF0,0x1D,0x30,0xD9,0xDD,
+           0x1D,0x0E,0x21,0xF0,0x37,0xFA,0x2F,0xFA,0x3E,0x08,0x05,0x17,0xDD,0xF0,0x68,0x6C,
+           0xF2,0x06,0xD9,0xF0,0x68,0x6C,0xE6,0x06,0xC5,0xF0,0x68,0x6C,0xE2,0x06,0xDD,0xF2,
+           0x1D,0x2C,0xDD,0xF1,0x15,0x1E,0xDD,0xD9,0x1D,0x36,0xBD,0xF1,0x22,0x06,0x37,0xFF,
+           0x39,0xEF,0x15,0x23,0x68,0x6D,0x3D,0xC8,0x35,0xEF,0x2D,0xEE,0x3B,0x00,0x21,0xC8,
+           0x3D,0xEF,0x68,0x6F,0x92,0xF4,0x68,0x6D,0x0D,0x4F,0xBD,0xF2,0x9E,0xF6,0x05,0x2D,
+           0x23,0xCC,0x3D,0xF4,0x0D,0x4F,0x96,0xF4,0x9E,0xF6,0x05,0x35,0xBD,0xD9,0xA2,0x0A,
+           0x86,0x0A,0xAE,0x0A,0xBD,0xF1,0x0D,0x5F,0xD9,0xF1,0x1D,0x46,0x0D,0x00,0xDD,0xF3,
+           0x15,0x3A,0x23,0x00,0x3D,0xF1,0xBD,0xF3,0x9D,0xD9,0x00,0x00,0x58,0x00,0x4B,0xC8,
+           0x41,0xF8,0x22,0x7F,0x2F,0xBF,0x3E,0x7F,0x21,0xF6,0x4D,0xC8,0x05,0x4E,0xA2,0x0A,
+           0x86,0x0A,0xAE,0x0A,0xD9,0xDD,0x1D,0x56,0x23,0x00,0x3E,0x46,0x23,0x00,0x3D,0xF5,
+           0x3D,0xF0,0x3D,0xEF,0x23,0x12,0x3E,0x06,0x23,0xC0,0x3E,0x08,0x68,0x6D,0x68,0x6D};
+
+static unsigned char funcLoad[1760] = 
+          {0xA1,0xC5,0x23,0x40,0x3E,0x01,0x21,0xC6,0x3E,0x02,0x21,0xC7,0x3E,0x03,0x23,0x10,
+           0x3E,0xB7,0x3E,0xB5,0x23,0x20,0x3E,0xB6,0x3E,0xB4,0xCD,0xBD,0x18,0x12,0x23,0x7E,
+           0x3D,0x9F,0x00,0x14,0x23,0xC0,0x3D,0x9F,0x50,0x00,0x43,0x84,0x23,0x00,0x3D,0x98,
+           0x0B,0x1E,0x09,0xAE,0xC1,0xC5,0x18,0x00,0x49,0x8C,0x68,0x6D,0x21,0xBF,0x3B,0x00,
+           0x18,0x26,0xC2,0x04,0x10,0x32,0x3E,0x00,0x23,0x00,0x3D,0xBF,0xC5,0xC5,0x18,0x32,
+           0xC9,0xC5,0x10,0x30,0xC2,0x04,0x10,0x32,0x23,0x00,0x68,0x6A,0x3E,0x00,0x00,0x32,
+           0x49,0x8E,0x68,0x6D,0x21,0xC5,0x39,0xC4,0x13,0x3A,0xC2,0xF1,0x10,0x59,0xD5,0x98,
+           0x18,0x44,0x95,0x98,0x45,0x8A,0xDE,0xF1,0x10,0x4F,0x21,0x42,0x39,0x40,0x10,0x54,
+           0x45,0x88,0xDE,0xF1,0x10,0x55,0x00,0x59,0xB5,0x98,0x45,0x88,0xDE,0xF1,0x10,0x55,
+           0x45,0x8A,0xDE,0xF1,0x10,0x4F,0x21,0x42,0x39,0x40,0x10,0x54,0x00,0x59,0x49,0x8A,
+           0x45,0x84,0x47,0x8A,0x68,0x6D,0x00,0x59,0x00,0xD9,0x49,0x88,0x45,0x84,0x47,0x88,
+           0x68,0x6D,0x0D,0x00,0x00,0x1A,0x00,0x1E,0xDE,0x04,0x10,0x1E,0x78,0x60,0x67,0x8C,
+           0xD9,0x97,0x10,0x64,0xB9,0x97,0x00,0x97,0xDE,0x04,0x10,0xB6,0x68,0x70,0x43,0x80,
+           0x45,0x82,0x21,0xB7,0x68,0x62,0x47,0x82,0x22,0x00,0x3D,0x90,0xCA,0x0A,0x18,0x97,
+           0x22,0x04,0x2F,0x28,0x3D,0x91,0x3B,0x00,0x18,0x7E,0xD6,0xF0,0x10,0x7A,0x95,0xA4,
+           0x8D,0xA4,0x00,0x7E,0xCE,0xF0,0x10,0x7E,0x9D,0xA4,0x8D,0xA4,0x45,0x70,0x21,0x90,
+           0x68,0x67,0x68,0x60,0x47,0x70,0x21,0x99,0x68,0x68,0x3D,0x99,0x53,0x74,0x18,0xD7,
+           0xCD,0xA4,0xAD,0xA4,0x18,0x97,0x3B,0x20,0x68,0x6E,0x21,0x70,0x3B,0x60,0xDA,0xF1,
+           0x10,0x95,0x23,0x00,0x3D,0x70,0x39,0x74,0x18,0xD7,0x23,0x00,0x00,0xBA,0x78,0x5C,
+           0x67,0x8C,0x21,0x70,0x3B,0x60,0xDA,0xF1,0x10,0xA1,0x23,0x00,0x3D,0x70,0x39,0x74,
+           0x18,0xA5,0xCA,0x0A,0x18,0xA5,0xC1,0x97,0x10,0xAA,0xAA,0x0A,0xA1,0x97,0xA2,0x0A,
+           0x23,0x50,0x00,0xB3,0xDD,0xA4,0x10,0xAE,0x23,0x90,0x00,0xB3,0xD5,0xA4,0x10,0xB2,
+           0x23,0x11,0x00,0xB3,0x23,0x10,0xBD,0xA4,0xB5,0xA4,0x00,0xBA,0x41,0x80,0x68,0x71,
+           0x68,0x6E,0x00,0x97,0x45,0x40,0x68,0x67,0x68,0x60,0x21,0x99,0x68,0x67,0x23,0x00,
+           0x3D,0x99,0x21,0x40,0x68,0x6B,0x27,0x02,0x3B,0x40,0x10,0xC7,0x23,0x00,0x39,0x42,
+           0x18,0xCB,0x3D,0x40,0x68,0x6D,0x45,0x40,0x3D,0x40,0x23,0x50,0x68,0x67,0x68,0x60,
+           0x68,0x66,0x33,0x80,0x68,0x67,0xA2,0x0A,0x78,0x5C,0x67,0x8C,0x00,0x1E,0x81,0x97,
+           0x00,0x97,0x45,0x42,0x68,0x66,0x3D,0x94,0x68,0x60,0x68,0x66,0x2F,0x3F,0x3D,0x95,
+           0x3B,0x00,0x19,0x1E,0xDD,0x97,0x18,0xFE,0xD9,0x97,0x10,0xE8,0x9D,0x97,0x00,0xFE,
+           0x45,0x50,0x43,0x90,0x50,0x00,0x21,0x95,0x68,0x62,0x53,0x90,0xDA,0xF1,0x19,0x59,
+           0x45,0x50,0x68,0x63,0x47,0x50,0x41,0x4C,0x21,0x95,0x4F,0x74,0x68,0x62,0x43,0x4C,
+           0x45,0x54,0x68,0x62,0x47,0x54,0x79,0x02,0x67,0x8A,0x00,0x59,0x21,0x95,0x45,0x54,
+           0x68,0x62,0x47,0x54,0x21,0x74,0x68,0x6B,0x25,0x95,0x3B,0x60,0xDA,0xF1,0x11,0x09,
+           0x23,0x00,0x3D,0x74,0x21,0x94,0xDA,0xF0,0x11,0x0E,0x82,0x0A,0x21,0x94,0xD2,0xF0,
+           0x19,0x1E,0x45,0x50,0xDE,0xF1,0x11,0x86,0xDD,0x97,0x19,0x86,0x41,0x48,0xDE,0xF1,
+           0x19,0x1C,0x23,0x0C,0x55,0x48,0x01,0x86,0x99,0x97,0x01,0x86,0xCD,0x94,0x11,0x22,
+           0x8E,0xF2,0x01,0x86,0x41,0x44,0xDE,0xF1,0x19,0x43,0x45,0x42,0x68,0x60,0x68,0x66,
+           0xDE,0xF0,0x11,0x2B,0x82,0x0A,0x21,0x94,0xDD,0x97,0x11,0x30,0x33,0x02,0xBD,0x97,
+           0xB9,0x97,0x3D,0x56,0x41,0xB0,0x23,0x10,0x68,0x62,0x23,0x04,0x57,0x54,0x79,0x3A,
+           0x67,0x8A,0x00,0x59,0x92,0xF2,0x23,0x00,0x3D,0x54,0x3D,0x55,0x41,0x44,0x43,0xB0,
+           0x23,0x10,0x55,0x44,0x01,0x86,0x41,0xB0,0x23,0x10,0x55,0x44,0x79,0x49,0x67,0x8A,
+           0x00,0x59,0x41,0x44,0xDE,0xF1,0x11,0x25,0x68,0x70,0x23,0x0E,0x68,0x62,0x43,0x78,
+           0x79,0x53,0x67,0x8A,0x00,0x59,0x41,0x78,0x68,0x71,0x19,0x43,0x79,0x53,0x67,0x8A,
+           0x00,0x59,0x21,0x95,0x68,0x6B,0x29,0x50,0x3D,0x95,0x41,0x4C,0x21,0x50,0x4F,0x74,
+           0x45,0x54,0x68,0x62,0x47,0x54,0x79,0x66,0x67,0x8A,0x00,0x59,0x21,0x74,0x68,0x6B,
+           0x25,0x50,0x3D,0x74,0x41,0x48,0xDE,0xF1,0x11,0x75,0x9D,0x97,0x21,0x95,0x45,0x54,
+           0x68,0x62,0x47,0x54,0x50,0x00,0x47,0x50,0x01,0x02,0x23,0x0C,0x55,0x48,0x79,0x7A,
+           0x67,0x8A,0x00,0x59,0x45,0x50,0x43,0x90,0x50,0x00,0x21,0x95,0x68,0x62,0x53,0x90,
+           0xDA,0xF1,0x19,0x59,0x45,0x50,0x68,0x63,0x47,0x50,0x00,0xF3,0x21,0x42,0x68,0x6B,
+           0x27,0x02,0x3B,0x40,0x11,0x8C,0x23,0x00,0x3D,0x42,0x00,0x59,0x4B,0x92,0x09,0xAE,
+           0x41,0xB0,0xDE,0xF1,0x11,0x99,0x79,0x97,0x67,0x8A,0x49,0x92,0x68,0x6D,0x4B,0x92,
+           0x01,0x90,0x23,0x10,0x55,0x44,0x79,0x9F,0x67,0x8A,0x49,0x92,0x68,0x6D,0x82,0x0A,
+           0x78,0x5C,0x67,0x8C,0x50,0x00,0x43,0x70,0x43,0x74,0x23,0x00,0x3D,0xB5,0x3D,0x99,
+           0x3D,0x54,0x3D,0x55,0x3D,0x97,0x00,0x53,0x81,0x98,0x01,0xAF,0xA1,0x98,0xA2,0x0A,
+           0x51,0x00,0x47,0x40,0x47,0x42,0x45,0x84,0x47,0x8A,0x78,0x5B,0x67,0x8C,0xC1,0x98,
+           0x68,0x6E,0x8A,0xF2,0x68,0x6D,0x00,0x32,0x96,0xF2,0xB1,0x98,0xDD,0xBC,0x10,0x32,
+           0xBD,0xBC,0x79,0xC6,0x67,0x8E,0x7A,0x5D,0x67,0x88,0x00,0x32,0x00,0x32,0xC2,0x04,
+           0x10,0x32,0x4B,0x92,0x0A,0x4D,0x49,0x92,0xC1,0x9D,0x11,0xCF,0x00,0x32,0x79,0xD2,
+           0x67,0x8E,0x68,0x6D,0xC2,0x04,0x10,0x32,0x4B,0x92,0x0A,0x4D,0x49,0x92,0xC1,0x9D,
+           0x11,0xDA,0x00,0x32,0x45,0x6E,0x68,0x66,0x68,0x6B,0x3E,0x00,0x21,0x6E,0x68,0x68,
+           0x3B,0x00,0x11,0xE3,0x23,0x80,0x3D,0x6E,0x68,0x6B,0x29,0x6C,0x11,0xEA,0xC9,0x98,
+           0x19,0xF7,0x01,0xEC,0xC9,0x98,0x19,0xF1,0x3B,0x00,0x68,0x6E,0x7A,0x0E,0x67,0x8E,
+           0x68,0x6D,0x3B,0x1F,0xDA,0xF1,0x19,0xF7,0x3B,0x00,0x19,0xFD,0x68,0x6D,0x45,0x88,
+           0xDE,0xF1,0x68,0x6E,0x7A,0x80,0x67,0x88,0x68,0x6D,0x7A,0x00,0x67,0x8E,0x68,0x6D,
+           0xC2,0x04,0x10,0x32,0x4B,0x92,0x0A,0x4D,0x49,0x92,0xC1,0x9D,0x12,0x08,0x00,0x32,
+           0x21,0x6C,0x39,0x6E,0x18,0x32,0x79,0xD2,0x67,0x8E,0x01,0xDA,0xC2,0x04,0x10,0x32,
+           0x4B,0x92,0x0A,0x4D,0x49,0x92,0xC1,0x9D,0x12,0x16,0x00,0x32,0xBD,0x98,0x91,0x98,
+           0xAD,0x98,0x7A,0x3D,0x67,0x8E,0x7A,0x1E,0x67,0x88,0x68,0x6D,0x41,0x58,0xDE,0xF1,
+           0x12,0x27,0x41,0x7C,0x23,0x14,0x55,0x58,0x7A,0x27,0x67,0x88,0x00,0x59,0x23,0x80,
+           0x3D,0x6A,0x41,0x7C,0x23,0x12,0x68,0x62,0x23,0x01,0x57,0x6A,0x7A,0x31,0x67,0x88,
+           0x00,0x59,0x8D,0x98,0x41,0x58,0xDE,0xF1,0x12,0x37,0x9D,0x98,0x68,0x6D,0x96,0xF2,
+           0xBD,0xBC,0xB1,0x98,0x79,0xC6,0x67,0x8E,0x02,0x6B,0xDD,0x98,0x10,0x32,0xDD,0xBC,
+           0x19,0xBC,0xD5,0x6B,0x1A,0x48,0xD1,0x6B,0x1A,0x46,0x91,0x6B,0xC6,0x04,0x10,0x32,
+           0x96,0xF2,0xB1,0x98,0x79,0xBE,0x67,0x8E,0x00,0x32,0xC1,0xBD,0x12,0x54,0x22,0x06,
+           0xDA,0xF0,0x1A,0x55,0xBE,0x0A,0xA1,0x9D,0x68,0x6D,0x81,0x9D,0x22,0x05,0x2F,0x0F,
+           0x3B,0x01,0xDA,0xF1,0x68,0x6E,0x9E,0x0A,0x68,0x6D,0x41,0xB8,0x23,0x04,0x55,0x7C,
+           0x7A,0x63,0x67,0x88,0x00,0x59,0x41,0x7C,0xDE,0xF1,0x12,0x6B,0x23,0x88,0x3D,0xBE,
+           0x79,0xBE,0x67,0x8E,0x00,0x59,0x23,0x14,0x55,0x58,0x43,0x7C,0x7A,0x72,0x67,0x88,
+           0x99,0xB6,0x00,0x59,0x41,0x58,0xDE,0xF1,0x12,0x7B,0x41,0xB8,0x23,0x04,0x57,0x84,
+           0x7A,0x7B,0x67,0x88,0x00,0x59,0x99,0x98,0x89,0x98,0x50,0x80,0x47,0x6C,0x47,0x6E,
+           0x45,0x68,0xDE,0xF1,0x12,0xA5,0x23,0x80,0xDD,0x6B,0x13,0x17,0x23,0x10,0x3D,0x6A,
+           0x41,0x7C,0x23,0x12,0x68,0x62,0x23,0x01,0x57,0x6A,0x7A,0x90,0x67,0x88,0x00,0x59,
+           0x41,0x58,0x23,0x82,0xDE,0xF1,0x1B,0x17,0x23,0x14,0x55,0x58,0x43,0x7C,0x7A,0x9A,
+           0x67,0x88,0x00,0x59,0xDD,0x6B,0x1A,0xA5,0x41,0x58,0xDE,0xF1,0x12,0xA5,0x41,0xB8,
+           0x23,0x04,0x57,0x84,0x7A,0xA5,0x67,0x88,0x00,0x59,0x45,0x66,0xDE,0xF1,0x1A,0xC0,
+           0x65,0x66,0x63,0x90,0x45,0x64,0x53,0x90,0xDA,0xF1,0x1A,0xB9,0x68,0x5D,0x67,0x66,
+           0x41,0x5C,0x23,0x84,0xDE,0xF1,0x1B,0x17,0x23,0x0A,0x55,0x5C,0x7A,0xA8,0x67,0x88,
+           0x00,0x59,0x68,0x7D,0x47,0x64,0x41,0x60,0x68,0x7C,0x43,0x60,0x70,0x00,0x67,0x66,
+           0x45,0x64,0xDE,0xF1,0x12,0xCC,0x41,0x5C,0x23,0x86,0xDE,0xF1,0x1B,0x17,0x23,0x0A,
+           0x55,0x5C,0x7A,0xCC,0x67,0x88,0x00,0x59,0x21,0x6E,0x68,0x6B,0x29,0x6C,0x1A,0xD3,
+           0x3B,0x00,0x1A,0xD3,0x02,0xD6,0x23,0x00,0x68,0x6B,0x29,0x6C,0x3D,0x96,0x45,0x68,
+           0x43,0x90,0x70,0x00,0x68,0x42,0x73,0x90,0xDA,0xF1,0x12,0xE5,0x45,0x64,0x53,0x90,
+           0xDA,0xF1,0x21,0x64,0x12,0xEB,0x21,0x68,0x02,0xEB,0x63,0x90,0x45,0x64,0x53,0x90,
+           0xDA,0xF1,0x1A,0xEC,0x21,0x64,0x3D,0x96,0x21,0x96,0x41,0x60,0x4D,0x6C,0x68,0x62,
+           0x43,0x60,0x45,0x64,0x68,0x63,0x47,0x64,0x45,0x68,0x68,0x63,0x47,0x68,0x7A,0xFA,
+           0x67,0x88,0x00,0x59,0x21,0x6C,0x68,0x6B,0x25,0x96,0x3B,0x00,0x13,0x00,0x23,0x80,
+           0x3D,0x6C,0x45,0x68,0xDE,0xF1,0x13,0x0A,0xDD,0x6B,0x1B,0x0A,0xA9,0x98,0xD9,0x98,
+           0x10,0x59,0x03,0x0F,0xD9,0x98,0x10,0x59,0x21,0x6C,0x3B,0x80,0x12,0x80,0xB9,0x98,
+           0x79,0xC7,0x67,0x8E,0xC2,0x04,0x10,0x59,0x49,0x8E,0x68,0x6D,0x03,0x12,0x3D,0xBE,
+           0x0B,0x1E,0x09,0xAE,0x9E,0xF4,0x00,0x1A,0x81,0x98,0x03,0x1F,0xA1,0x98,0x86,0x0A,
+           0xAE,0x0A,0xC5,0x98,0x13,0x26,0x22,0x01,0x2F,0xFC,0x3E,0x01,0x45,0x84,0x47,0x88,
+           0x79,0xBB,0x67,0x8E,0xC1,0x98,0x68,0x6E,0x86,0xF2,0x68,0x6D,0x79,0xBE,0x67,0x8E,
+           0xB1,0x98,0xC5,0x98,0x13,0x5D,0x22,0x01,0x2F,0xFD,0xD5,0xBD,0x13,0x38,0x33,0x03,
+           0x3E,0x01,0x03,0x5D,0x3D,0x90,0x35,0xC4,0x3D,0x91,0xD1,0x90,0x1B,0x60,0xD5,0x90,
+           0x13,0x48,0xD5,0x91,0x10,0x35,0x09,0xAE,0x0B,0x1E,0x82,0xF2,0x95,0xC4,0x00,0x35,
+           0xDD,0x91,0x13,0x4F,0xDD,0x90,0x13,0x4E,0x09,0x8E,0x03,0x4F,0x09,0xAC,0xD9,0x91,
+           0x13,0x5D,0xD9,0x90,0x1B,0x2E,0xD1,0x98,0x13,0x5C,0xCD,0x98,0x1B,0x5A,0x21,0x90,
+           0x33,0x40,0x03,0x5E,0x96,0xF2,0xB1,0x98,0x0B,0x1C,0x21,0x90,0x3D,0xC4,0x00,0x35,
+           0x91,0xC4,0x58,0x00,0x4B,0xC8,0x41,0xFC,0x22,0x7F,0x2F,0xBF,0x3E,0x7F,0x21,0xF7,
+           0x4D,0xC8,0x03,0x69,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+
+#endif
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/char/lp.c linuxppc64_2_4/drivers/char/lp.c
--- ../kernel.org/linux-2.4.19/drivers/char/lp.c	Fri Apr 19 10:59:54 2002
+++ linuxppc64_2_4/drivers/char/lp.c	Mon Apr 22 10:32:50 2002
@@ -611,10 +611,13 @@
 				return -EFAULT;
 			break;
 		case LPGETSTATUS:
-			lp_claim_parport_or_block (&lp_table[minor]);
+		        if (down_interruptible (&lp_table[minor].port_mutex))
+			  return -EINTR;
+			parport_claim_or_block (lp_table[minor].dev);
 			status = r_str(minor);
-			lp_release_parport (&lp_table[minor]);
-
+			parport_release (lp_table[minor].dev);
+			
+			up (&lp_table[minor].port_mutex);
 			if (copy_to_user((int *) arg, &status, sizeof(int)))
 				return -EFAULT;
 			break;
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/char/tty_io.c linuxppc64_2_4/drivers/char/tty_io.c
--- ../kernel.org/linux-2.4.19/drivers/char/tty_io.c	Fri Apr 19 11:00:45 2002
+++ linuxppc64_2_4/drivers/char/tty_io.c	Mon Apr 22 10:32:50 2002
@@ -2190,6 +2190,11 @@
 	 * set up the console device so that later boot sequences can 
 	 * inform about problems etc..
 	 */
+
+#ifdef CONFIG_VIOCONS
+	viocons_init();
+#endif	
+	
 #ifdef CONFIG_VT
 	con_init();
 #endif
@@ -2251,6 +2256,9 @@
 #ifdef CONFIG_SERIAL_TX3912_CONSOLE
 	tx3912_console_init();
 #endif
+#ifdef CONFIG_HVC_CONSOLE
+	hvc_console_init();
+#endif
 #ifdef CONFIG_TXX927_SERIAL_CONSOLE
 	txx927_console_init();
 #endif
@@ -2307,6 +2315,10 @@
 	/* console calls tty_register_driver() before kmalloc() works.
 	 * Thus, we can't devfs_register() then.  Do so now, instead. 
 	 */
+#ifdef CONFIG_VIOCONS
+	viocons_init2();
+#endif
+	
 #ifdef CONFIG_VT
 	con_init_devfs();
 #endif
@@ -2395,5 +2407,8 @@
 #endif
 #ifdef CONFIG_A2232
 	a2232board_init();
+#endif
+#ifdef CONFIG_ICOM
+	iCom_init();
 #endif
 }
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/ide/sl82c105.c linuxppc64_2_4/drivers/ide/sl82c105.c
--- ../kernel.org/linux-2.4.19/drivers/ide/sl82c105.c	Fri Apr 19 10:30:27 2002
+++ linuxppc64_2_4/drivers/ide/sl82c105.c	Mon Dec 17 15:04:57 2001
@@ -156,6 +156,29 @@
 }
 
 /*
+ * Reset the controller.
+ * If we are using INTC under a w83c553 we need to use a magic test
+ * bit to do this.  Return zero if successful (or applicable).
+ * 
+ */
+static int sl82c105_hard_reset(ide_drive_t *drive)
+{
+	ide_hwif_t *hwif = HWIF(drive);
+	struct pci_dev *dev = hwif->pci_dev;
+	unsigned int reg;
+
+	pci_read_config_dword(dev, 0x40, &reg);	/* LEGIRQ register */
+	if (reg & (1<<11)) {	/* Using INTC? */
+		printk("sl82c105: resetting device\n");
+		pci_read_config_dword(dev, 0x7e, &reg);
+		pci_write_config_word(dev, 0x7e, reg | (1<<2));
+		pci_write_config_word(dev, 0x7e, reg & (~(1<<2)));
+		return 0;
+	}
+	return 1;
+}
+
+/*
  * Our own dmaproc, only to intercept ide_dma_check
  */
 static int sl82c105_dmaproc(ide_dma_action_t func, ide_drive_t *drive)
@@ -171,6 +194,11 @@
 	case ide_dma_off:
 		config_for_pio(drive, 4, 0);
 		break;
+	case ide_dma_lostirq:
+	case ide_dma_timeout:
+	        if (sl82c105_hard_reset(drive) == 0)
+			return 0;
+	        break;
 	default:
 		break;
 	}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/iseries/Makefile linuxppc64_2_4/drivers/iseries/Makefile
--- ../kernel.org/linux-2.4.19/drivers/iseries/Makefile	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/drivers/iseries/Makefile	Thu Oct 11 11:10:49 2001
@@ -0,0 +1,43 @@
+#
+# Makefile for the iSeries-specific device drivers.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now inherited from the
+# parent makes..
+#
+
+# The target object and module list name.
+
+# O_TARGET	:= macintosh.o
+
+O_TARGET  := iseries.o
+
+# Objects that export symbols.
+
+# export-objs	:= adb.o rtc.o mac_hid.o via-pmu.o
+
+export-objs := veth.o viocons.o viotape.o viodasd.o viocd.o viopath.o
+
+# Object file lists.
+
+obj-y	:=
+obj-m	:=
+obj-n	:=
+obj-	:=
+
+# Each configuration option enables a list of files.
+
+obj-$(CONFIG_VETH) += veth.o
+obj-$(CONFIG_VIOCONS) += viocons.o
+obj-$(CONFIG_VIOPATH) += viopath.o
+obj-$(CONFIG_VIOTAPE) += viotape.o
+obj-$(CONFIG_VIODASD) += viodasd.o
+obj-$(CONFIG_VIOCD)   += viocd.o
+
+# The global Rules.make.
+
+include $(TOPDIR)/Rules.make
+
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/iseries/veth.c linuxppc64_2_4/drivers/iseries/veth.c
--- ../kernel.org/linux-2.4.19/drivers/iseries/veth.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/drivers/iseries/veth.c	Tue Mar 12 08:51:03 2002
@@ -0,0 +1,1676 @@
+/* File veth.c created by Kyle A. Lucke on Mon Aug  7 2000. */
+
+/**************************************************************************/
+/*                                                                        */
+/* IBM eServer iSeries Virtual Ethernet Device Driver                     */
+/* Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp.        */
+/*                                                                        */
+/*  This program is free software; you can redistribute it and/or modify  */
+/*  it under the terms of the GNU General Public License as published by  */
+/*  the Free Software Foundation; either version 2 of the License, or     */
+/*  (at your option) any later version.                                   */
+/*                                                                        */
+/*  This program is distributed in the hope that it will be useful,       */
+/*  but WITHOUT ANY WARRANTY; without even the implied warranty of        */
+/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         */
+/*  GNU General Public License for more details.                          */
+/*                                                                        */
+/*  You should have received a copy of the GNU General Public License     */
+/*  along with this program; if not, write to the Free Software           */
+/*  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  */
+/*                                                                   USA  */
+/*                                                                        */
+/* This module contains the implementation of a virtual ethernet device   */
+/* for use with iSeries LPAR Linux.  It utilizes low-level message passing*/
+/* provided by the hypervisor to enable an ethernet-like network device   */
+/* that can be used to enable inter-partition communications on the same  */
+/* physical iSeries.                                                      */
+/*                                                                        */
+/* The iSeries LPAR hypervisor has currently defined the ability for a    */
+/* partition to communicate on up to 16 different virtual ethernets, all  */
+/* dynamically configurable, at least for an OS/400 partition.  The       */
+/* dynamic nature is not supported for Linux yet.                         */
+/*                                                                        */
+/* Each virtual ethernet a given Linux partition participates in will     */
+/* cause a network device with the form ethXX to be created,              */
+/*                                                                        */
+/* The virtual ethernet a given ethXX virtual ethernet device talks on    */
+/* can be determined either by dumping /proc/iSeries/veth/vethX, where    */
+/* X is the virtual ethernet number, and the netdevice name will be       */
+/* printed out.  The virtual ethernet a given ethX device communicates on */
+/* is also printed to the printk() buffer at module load time.            */
+/*                                                                        */
+/* This driver (and others like it on other partitions) is responsible for*/
+/* routing packets to and from other partitions.  The MAC addresses used  */
+/* by the virtual ethernets contain meaning, and should not be modified.  */
+/* Doing so could disable the ability of your Linux partition to          */
+/* communicate with the other OS/400 partitions on your physical iSeries. */
+/* Similarly, setting the MAC address to something other than the         */
+/* "virtual burned-in" address is not allowed, for the same reason.       */
+/*                                                                        */
+/* Notes:                                                                 */
+/*                                                                        */
+/* 1. Although there is the capability to talk on multiple shared         */
+/*    ethernets to communicate to the same partition, each shared         */
+/*    ethernet to a given partition X will use a finite, shared amount    */
+/*    of hypervisor messages to do the communication.  So having 2 shared */
+/*    ethernets to the same remote partition DOES NOT double the          */
+/*    available bandwidth.  Each of the 2 shared ethernets will share the */
+/*    same bandwidth available to another.                                */
+/*                                                                        */
+/* 2. It is allowed to have a virtual ethernet that does not communicate  */
+/*    with any other partition.  It won't do anything, but it's allowed.  */
+/*                                                                        */
+/* 3. There is no "loopback" mode for a virtual ethernet device.  If you  */
+/*    send a packet to your own mac address, it will just be dropped, you */
+/*    won't get it on the receive side.  Such a thing could be done,      */
+/*    but my default driver DOES NOT do so.                               */
+/*                                                                        */
+/* 4. Multicast addressing is implemented via broadcasting the multicast  */
+/*    frames to other partitions.  It is the responsibility of the        */
+/*    receiving partition to filter the addresses desired.                */
+/*                                                                        */
+/* 5. This module utilizes several different bottom half handlers for     */
+/*    non-high-use path function (setup, error handling, etc.).  Multiple */
+/*    bottom halves were used because only one would not keep up to the   */
+/*    much faster iSeries device drivers this Linux driver is talking to. */
+/*    All hi-priority work (receiving frames, handling frame acks) is done*/
+/*    in the interrupt handler for maximum performance.                   */
+/*                                                                        */
+/* Tunable parameters:                                                    */
+/*                                                                        */
+/* VethBuffersToAllocate: This compile time option defaults to 120. It can*/
+/* be safely changed to something greater or less than the default.  It   */
+/* controls how much memory Linux will allocate per remote partition it is*/
+/* communicating with.  The user can play with this to see how it affects */
+/* performance, packets dropped, etc.  Without trying to understand the   */
+/* complete driver, it can be thought of as the maximum number of packets */
+/* outstanding to a remote partition at a time.                           */
+/*                                                                        */
+/**************************************************************************/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <asm/iSeries/mf.h>
+
+#ifndef _VETH_H
+#include "veth.h"
+#endif
+#ifndef _HVLPCONFIG_H
+#include <asm/iSeries/HvLpConfig.h>
+#endif
+#ifndef _VETH_PROC_H
+#include <asm/iSeries/veth-proc.h>
+#endif
+#ifndef _HVTYPES_H
+#include <asm/iSeries/HvTypes.h>
+#endif
+#ifndef _ISERIES_PROC_H
+#include <asm/iSeries/iSeries_proc.h>
+#endif
+#include <asm/semaphore.h>
+#include <linux/proc_fs.h>
+
+
+#define veth_printk(fmt, args...) \
+printk(KERN_INFO "%s: " fmt, __FILE__, ## args)
+
+#define veth_error_printk(fmt, args...) \
+printk(KERN_ERR "(%s:%3.3d) ERROR: " fmt, __FILE__, __LINE__ , ## args)
+
+static const char __initdata *version = "v1.0 03/11/2002  Kyle Lucke, klucke@us.ibm.com\n";
+
+static int probed __initdata = 0;
+#define VethBuffersToAllocate 120
+
+static struct VethFabricMgr *mFabricMgr = NULL;
+static struct proc_dir_entry *veth_proc_root = NULL;
+
+DECLARE_MUTEX_LOCKED(VethProcSemaphore);
+
+static int veth_open(struct net_device *dev);
+static int veth_close(struct net_device *dev);
+static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static int veth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
+static void veth_handleEvent(struct HvLpEvent *, struct pt_regs *);
+static void veth_handleAck(struct HvLpEvent *);
+static void veth_handleInt(struct HvLpEvent *);
+static void veth_openConnections(void);
+static void veth_openConnection(u8, int lockMe);
+static void veth_closeConnection(u8, int lockMe);
+static void veth_intFinishOpeningConnections(void *, int number);
+static void veth_finishOpeningConnections(void *);
+static void veth_finishOpeningConnectionsLocked(struct VethLpConnection *);
+static int veth_multicast_wanted(struct VethPort *port, u64 dest);
+static void veth_set_multicast_list(struct net_device *dev);
+
+static void veth_sendCap(struct VethLpConnection *);
+static void veth_sendMonitor(struct VethLpConnection *);
+static void veth_takeCap(struct VethLpConnection *, struct VethLpEvent *);
+static void veth_takeCapAck(struct VethLpConnection *, struct VethLpEvent *);
+static void veth_takeMonitorAck(struct VethLpConnection *, struct VethLpEvent *);
+static void veth_msgsInit(struct VethLpConnection *connection);
+static void veth_recycleMsg(struct VethLpConnection *, u16);
+static void veth_capBh(struct VethLpConnection *);
+static void veth_capAckBh(struct VethLpConnection *);
+static void veth_monitorAckBh(struct VethLpConnection *);
+static void veth_takeFrames(struct VethLpConnection *, struct VethLpEvent *);
+static void veth_pTransmit(struct sk_buff *skb, HvLpIndex remoteLp, struct net_device *dev);
+static struct net_device_stats *veth_get_stats(struct net_device *dev);
+static void veth_intFinishMsgsInit(void *, int);
+static void veth_finishMsgsInit(struct VethLpConnection *connection);
+static void veth_intFinishCapBh(void *, int);
+static void veth_finishCapBh(struct VethLpConnection *connection);
+static void veth_finishCapBhLocked(struct VethLpConnection *connection);
+static void veth_finishSendCap(struct VethLpConnection *connection);
+static void veth_timedAck(unsigned long connectionPtr);
+#ifdef MODULE
+static void veth_waitForEnd(void);
+#endif
+static void veth_failMe(struct VethLpConnection *connection);
+
+extern struct pci_dev *iSeries_veth_dev;
+
+int __init veth_probe(void)
+{
+	struct net_device *dev = NULL;
+	struct VethPort *port = NULL;
+	int vlansFound = 0;
+	int displayVersion = 0;
+
+	u16 vlanMap = HvLpConfig_getVirtualLanIndexMap();
+	int vlanIndex = 0;
+
+	if (probed)
+		return -ENODEV;
+	probed = 1;
+
+	while (vlanMap != 0) {
+		int bitOn = vlanMap & 0x8000;
+
+		if (bitOn) {
+			vlansFound++;
+
+			dev = init_etherdev(NULL, sizeof(struct VethPort));
+
+			if (dev == NULL) {
+				veth_error_printk("Unable to allocate net_device structure!\n");
+				break;
+			}
+
+			if (!dev->priv)
+				dev->priv = kmalloc(sizeof(struct VethPort), GFP_KERNEL);
+			if (!dev->priv) {
+				veth_error_printk("Unable to allocate memory\n");
+				return -ENOMEM;
+			}
+
+			veth_printk("Found an ethernet device %s (veth=%d) (addr=%p)\n", dev->name, vlanIndex, dev);
+			port = mFabricMgr->mPorts[vlanIndex] = (struct VethPort *) dev->priv;
+			memset(port, 0, sizeof(struct VethPort));
+			rwlock_init(&(port->mMcastGate));
+			mFabricMgr->mPorts[vlanIndex]->mDev = dev;
+
+			dev->dev_addr[0] = 0x02;
+			dev->dev_addr[1] = 0x01;
+			dev->dev_addr[2] = 0xFF;
+			dev->dev_addr[3] = vlanIndex;
+			dev->dev_addr[4] = 0xFF;
+			dev->dev_addr[5] = HvLpConfig_getLpIndex_outline();
+			dev->mtu = 9000;
+
+			memcpy(&(port->mMyAddress), dev->dev_addr, 6);
+
+			dev->open = &veth_open;
+			dev->hard_start_xmit = &veth_start_xmit;
+			dev->stop = &veth_close;
+			dev->get_stats = veth_get_stats;
+			dev->set_multicast_list = &veth_set_multicast_list;
+			dev->do_ioctl = &veth_ioctl;
+			dev->features |= NETIF_F_SG;
+
+			/* display version info if adapter is found */
+			if (!displayVersion) {
+				/* set display flag to TRUE so that */
+				/* we only display this string ONCE */
+				displayVersion = 1;
+				veth_printk("%s", version);
+			}
+
+		}
+
+		++vlanIndex;
+		vlanMap = vlanMap << 1;
+	}
+
+	if (vlansFound > 0)
+		return 0;
+	else
+		return -ENODEV;
+}
+
+#ifdef MODULE
+MODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>");
+MODULE_DESCRIPTION("iSeries Virtual ethernet driver");
+MODULE_LICENSE("GPL");
+
+DECLARE_MUTEX_LOCKED(VethModuleBhDone);
+int VethModuleReopen = 1;
+
+void veth_proc_delete(struct proc_dir_entry *iSeries_proc)
+{
+	int i = 0;
+	HvLpIndex thisLp = HvLpConfig_getLpIndex_outline();
+	u16 vlanMap = HvLpConfig_getVirtualLanIndexMap();
+	int vlanIndex = 0;
+
+	for (i = 0; i < HvMaxArchitectedLps; ++i) {
+		if (i != thisLp) {
+			if (HvLpConfig_doLpsCommunicateOnVirtualLan(thisLp, i)) {
+				char name[10] = "";
+				sprintf(name, "lpar%d", i);
+				remove_proc_entry(name, veth_proc_root);
+			}
+		}
+	}
+
+	while (vlanMap != 0) {
+		int bitOn = vlanMap & 0x8000;
+
+		if (bitOn) {
+			char name[10] = "";
+			sprintf(name, "veth%d", vlanIndex);
+			remove_proc_entry(name, veth_proc_root);
+		}
+
+		++vlanIndex;
+		vlanMap = vlanMap << 1;
+	}
+
+	remove_proc_entry("veth", iSeries_proc);
+
+	up(&VethProcSemaphore);
+}
+
+void veth_waitForEnd(void)
+{
+	up(&VethModuleBhDone);
+}
+
+void __exit veth_module_cleanup(void)
+{
+	int i;
+	struct VethFabricMgr *myFm = mFabricMgr;
+	struct tq_struct myBottomHalf;
+	struct net_device *thisOne = NULL;
+
+	VethModuleReopen = 0;
+
+	for (i = 0; i < HvMaxArchitectedLps; ++i) {
+		veth_closeConnection(i, 1);
+	}
+
+	myBottomHalf.routine = (void *) (void *) veth_waitForEnd;
+
+	queue_task(&myBottomHalf, &tq_immediate);
+	mark_bh(IMMEDIATE_BH);
+
+	down(&VethModuleBhDone);
+
+	HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan);
+
+	mb();
+	mFabricMgr = NULL;
+	mb();
+
+	down(&VethProcSemaphore);
+
+	iSeries_proc_callback(&veth_proc_delete);
+
+	down(&VethProcSemaphore);
+
+	for (i = 0; i < HvMaxArchitectedLps; ++i) {
+		if (myFm->mConnection[i].mNumberAllocated + myFm->mConnection[i].mNumberRcvMsgs > 0) {
+			mf_deallocateLpEvents(myFm->mConnection[i].mRemoteLp,
+					      HvLpEvent_Type_VirtualLan,
+					      myFm->mConnection[i].mNumberAllocated + myFm->mConnection[i].mNumberRcvMsgs,
+					      NULL, NULL);
+		}
+
+		if (myFm->mConnection[i].mMsgs != NULL) {
+			kfree(myFm->mConnection[i].mMsgs);
+		}
+	}
+
+	for (i = 0; i < HvMaxArchitectedVirtualLans; ++i) {
+		if (myFm->mPorts[i] != NULL) {
+			thisOne = myFm->mPorts[i]->mDev;
+			myFm->mPorts[i] = NULL;
+
+			mb();
+
+			if (thisOne != NULL) {
+				veth_printk("Unregistering %s (veth=%d)\n", thisOne->name, i);
+				unregister_netdev(thisOne);
+			}
+		}
+	}
+
+	kfree(myFm);
+}
+
+module_exit(veth_module_cleanup);
+#endif
+
+
+void veth_proc_init(struct proc_dir_entry *iSeries_proc)
+{
+	long i = 0;
+	HvLpIndex thisLp = HvLpConfig_getLpIndex_outline();
+	u16 vlanMap = HvLpConfig_getVirtualLanIndexMap();
+	long vlanIndex = 0;
+
+
+	veth_proc_root = proc_mkdir("veth", iSeries_proc);
+	if (!veth_proc_root)
+		return;
+
+	for (i = 0; i < HvMaxArchitectedLps; ++i) {
+		if (i != thisLp) {
+			if (HvLpConfig_doLpsCommunicateOnVirtualLan(thisLp, i)) {
+				struct proc_dir_entry *ent;
+				char name[10] = "";
+				sprintf(name, "lpar%d", (int) i);
+				ent = create_proc_entry(name, S_IFREG | S_IRUSR, veth_proc_root);
+				if (!ent)
+					return;
+				ent->nlink = 1;
+				ent->data = (void *) i;
+				ent->read_proc = proc_veth_dump_connection;
+				ent->write_proc = NULL;
+			}
+		}
+	}
+
+	while (vlanMap != 0) {
+		int bitOn = vlanMap & 0x8000;
+
+		if (bitOn) {
+			struct proc_dir_entry *ent;
+			char name[10] = "";
+			sprintf(name, "veth%d", (int) vlanIndex);
+			ent = create_proc_entry(name, S_IFREG | S_IRUSR, veth_proc_root);
+			if (!ent)
+				return;
+			ent->nlink = 1;
+			ent->data = (void *) vlanIndex;
+			ent->read_proc = proc_veth_dump_port;
+			ent->write_proc = NULL;
+		}
+
+		++vlanIndex;
+		vlanMap = vlanMap << 1;
+	}
+
+	up(&VethProcSemaphore);
+}
+
+int __init veth_module_init(void)
+{
+	int status;
+	int i;
+
+	mFabricMgr = kmalloc(sizeof(struct VethFabricMgr), GFP_KERNEL);
+	memset(mFabricMgr, 0, sizeof(struct VethFabricMgr));
+	veth_printk("Initializing veth module, fabric mgr (address=%p)\n", mFabricMgr);
+
+	mFabricMgr->mEyecatcher = 0x56455448464D4752ULL;
+	mFabricMgr->mThisLp = HvLpConfig_getLpIndex_outline();
+
+	for (i = 0; i < HvMaxArchitectedLps; ++i) {
+		mFabricMgr->mConnection[i].mEyecatcher = 0x564554484C50434EULL;
+		veth_failMe(mFabricMgr->mConnection + i);
+		spin_lock_init(&mFabricMgr->mConnection[i].mAckGate);
+		spin_lock_init(&mFabricMgr->mConnection[i].mStatusGate);
+	}
+
+	status = veth_probe();
+
+	if (status == 0) {
+		veth_openConnections();
+		iSeries_proc_callback(&veth_proc_init);
+	}
+
+	return status;
+}
+
+module_init(veth_module_init);
+
+static void veth_failMe(struct VethLpConnection *connection)
+{
+	connection->mConnectionStatus.mSentCap = 0;
+	connection->mConnectionStatus.mCapAcked = 0;
+	connection->mConnectionStatus.mGotCap = 0;
+	connection->mConnectionStatus.mGotCapAcked = 0;
+	connection->mConnectionStatus.mSentMonitor = 0;
+	connection->mConnectionStatus.mFailed = 1;
+}
+
+static int veth_open(struct net_device *dev)
+{
+	struct VethPort *port = (struct VethPort *) dev->priv;
+
+	memset(&port->mStats, 0, sizeof(port->mStats));
+	MOD_INC_USE_COUNT;
+
+	netif_start_queue(dev);
+
+	return 0;
+}
+
+static int veth_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+
+	MOD_DEC_USE_COUNT;
+
+	return 0;
+}
+
+static struct net_device_stats *veth_get_stats(struct net_device *dev)
+{
+	struct VethPort *port = (struct VethPort *) dev->priv;
+
+	return (&port->mStats);
+}
+
+
+static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	unsigned char *frame = skb->data;
+	HvLpIndex remoteLp = frame[5];
+	int i = 0;
+	int clone = 0;
+
+	if (mFabricMgr == NULL) {
+		veth_error_printk("NULL fabric manager with active ports!\n");
+		netif_stop_queue(dev);
+		return 1;
+	}
+
+	mb();
+
+	if ((*frame & 0x01) != 0x01) {	/* broadcast or multicast */
+		if ((remoteLp != mFabricMgr->mThisLp) && (HvLpConfig_doLpsCommunicateOnVirtualLan(mFabricMgr->mThisLp, remoteLp)))
+			veth_pTransmit(skb, remoteLp, dev);
+	} else {
+		for (i = 0; i < HvMaxArchitectedLps; ++i) {
+			if (i != mFabricMgr->mThisLp) {
+				if (HvLpConfig_doLpsCommunicateOnVirtualLan(mFabricMgr->mThisLp, i)) {
+					if (clone)
+						skb = skb_clone(skb, GFP_ATOMIC);
+					else
+						clone = 1;
+
+					/* the ack handles deleting the skb */
+					veth_pTransmit(skb, i, dev);
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void veth_pTransmit(struct sk_buff *skb, HvLpIndex remoteLp, struct net_device *dev)
+{
+	struct VethLpConnection *connection = mFabricMgr->mConnection + remoteLp;
+	HvLpEvent_Rc returnCode;
+	struct scatterlist sg[VethMaxFramesPerMsg];
+	int nfrags = 0;
+	int nsg;
+
+	if (connection->mConnectionStatus.mFailed != 1) {
+		int rc = 0;
+		struct VethMsg *msg = NULL;
+		VETHSTACKPOP(&(connection->mMsgStack), msg);
+
+		/* We can't handle a fragmented frame if it has 
+		   more than VethMaxFramesPerMsg fragments. 
+		   Attempt to coalesce the fragments if possible,
+		   otherwise drop the frame */
+
+		if ((skb_shinfo(skb)->nr_frags + 1) > VethMaxFramesPerMsg) {
+			veth_printk("Linearizing frame to handle > 6 frags\n");
+			rc = skb_linearize(skb, GFP_ATOMIC);
+		}
+
+		if (msg != NULL && rc == 0 && ((skb->len - 14) <= 9000)) {
+			/* Use a scatterlist for both the fraged un-fraged
+			   case. pci_map_sg has a fastpast for the single
+			   case so we can simplify this code and not take
+			   too big of a perf hit.
+			 */
+
+			if (skb_shinfo(skb)->nr_frags) {	/* fragmented frame */
+				int i = 0;
+
+				sg[nfrags].address = skb->data;
+				sg[nfrags].length = skb->len - skb->data_len;
+				++nfrags;
+
+				do {
+					skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+					sg[nfrags].address = page_address(frag->page) + frag->page_offset;
+					sg[nfrags].length = frag->size;
+					++nfrags;
+					++i;
+				} while (i < skb_shinfo(skb)->nr_frags);
+			} else {	/* unfragmented frame */
+				sg[nfrags].address = skb->data;
+				sg[nfrags].length = skb->len;
+				++nfrags;
+			}
+			/*
+			   Note: nsg may be less than nfrags. Each frag entry
+			   in the skb can only be a page in size, so they can
+			   be contigous in memory but be spread over multiple
+			   frag entries in the skb. pci_map_sg will coalesce 
+			   contiguous fragments and into a single dma address.
+			 */
+
+			nsg = pci_map_sg(iSeries_veth_dev, sg, nfrags, PCI_DMA_TODEVICE);
+
+			/* Is it really necessary to check the length and address fields of the 
+			   first entry here? */
+			if (nsg) {
+				int i = 0;
+				msg->mSkb = skb;
+				do {
+					msg->mEvent.mSendData.mAddress[i] = sg[i].dma_address;
+					msg->mEvent.mSendData.mLength[i] = sg[i].dma_length;
+					++i;
+				} while (i < nsg);
+
+				msg->mEvent.mSendData.mEofMask = (1 << (nsg - 1));
+				test_and_set_bit(0, &(msg->mInUse));
+
+				returnCode = HvCallEvent_signalLpEventFast(remoteLp,
+									   HvLpEvent_Type_VirtualLan,
+									   VethEventTypeFrames,
+									   HvLpEvent_AckInd_NoAck,
+									   HvLpEvent_AckType_ImmediateAck,
+									   connection->mSourceInst,
+									   connection->mTargetInst,
+									   msg->mIndex,
+									   msg->mEvent.mFpData.mData1,
+									   msg->mEvent.mFpData.mData2,
+									   msg->mEvent.mFpData.mData3,
+									   msg->mEvent.mFpData.mData4,
+									   msg->mEvent.mFpData.mData5);
+			} else {
+				returnCode = -1;	/* Bad return code */
+			}
+
+			if (returnCode != HvLpEvent_Rc_Good) {
+				struct VethPort *port = (struct VethPort *) dev->priv;
+				if (nsg)
+					pci_unmap_sg(iSeries_veth_dev, sg, nsg, PCI_DMA_TODEVICE);
+
+				dev_kfree_skb_any(skb);
+
+				msg->mSkb = NULL;
+				memset(&(msg->mEvent.mSendData), 0, sizeof(struct VethFramesData));
+				VETHSTACKPUSH(&(connection->mMsgStack), msg);
+				port->mStats.tx_dropped++;
+			} else {
+				struct VethPort *port = (struct VethPort *) dev->priv;
+				port->mStats.tx_packets++;
+				port->mStats.tx_bytes += skb->len;
+			}
+		} else {
+			struct VethPort *port = (struct VethPort *) dev->priv;
+			port->mStats.tx_dropped++;
+			if (rc)
+				port->mLinearized++;
+			dev_kfree_skb_any(skb);
+		}
+	} else {
+		struct VethPort *port = (struct VethPort *) dev->priv;
+		port->mStats.tx_dropped++;
+		dev_kfree_skb_any(skb);
+	}
+}
+
+static int veth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+
+	return -EOPNOTSUPP;
+}
+
+static void veth_set_multicast_list(struct net_device *dev)
+{
+	char *addrs;
+	struct VethPort *port = (struct VethPort *) dev->priv;
+	u64 newAddress = 0;
+	unsigned long flags;
+
+	write_lock_irqsave(&port->mMcastGate, flags);
+
+	if (dev->flags & IFF_PROMISC) {	/* set promiscuous mode */
+		port->mPromiscuous = 1;
+	} else {
+		struct dev_mc_list *dmi = dev->mc_list;
+
+		if (dev->flags & IFF_ALLMULTI) {
+			port->mAllMcast = 1;
+		} else {
+			int i;
+			/* Update table */
+			port->mNumAddrs = 0;
+
+			for (i = 0; ((i < dev->mc_count) && (i < 12)); i++) {	/* for each address in the list */
+				addrs = dmi->dmi_addr;
+				dmi = dmi->next;
+				if ((*addrs & 0x01) == 1) {	/* multicast address? */
+					memcpy(&newAddress, addrs, 6);
+					newAddress &= 0xFFFFFFFFFFFF0000;
+
+					port->mMcasts[port->mNumAddrs] = newAddress;
+					mb();
+					port->mNumAddrs = port->mNumAddrs + 1;
+				}
+			}
+		}
+	}
+
+	write_unlock_irqrestore(&port->mMcastGate, flags);
+}
+
+
+static void veth_handleEvent(struct HvLpEvent *event, struct pt_regs *regs)
+{
+	if (event->xFlags.xFunction == HvLpEvent_Function_Ack) {
+		veth_handleAck(event);
+	} else if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
+		veth_handleInt(event);
+	}
+}
+
+static void veth_handleAck(struct HvLpEvent *event)
+{
+	struct VethLpConnection *connection = &(mFabricMgr->mConnection[event->xTargetLp]);
+	struct VethLpEvent *vethEvent = (struct VethLpEvent *) event;
+
+	switch (event->xSubtype) {
+	case VethEventTypeCap:
+		{
+			veth_takeCapAck(connection, vethEvent);
+			break;
+		}
+	case VethEventTypeMonitor:
+		{
+			veth_takeMonitorAck(connection, vethEvent);
+			break;
+		}
+	default:
+		{
+			veth_error_printk("Unknown ack type %d from lpar %d\n", event->xSubtype, connection->mRemoteLp);
+		}
+	};
+}
+
+static void veth_handleInt(struct HvLpEvent *event)
+{
+	int i = 0;
+	struct VethLpConnection *connection = &(mFabricMgr->mConnection[event->xSourceLp]);
+	struct VethLpEvent *vethEvent = (struct VethLpEvent *) event;
+
+	switch (event->xSubtype) {
+	case VethEventTypeCap:
+		{
+			veth_takeCap(connection, vethEvent);
+			break;
+		}
+	case VethEventTypeMonitor:
+		{
+			/* do nothing... this'll hang out here til we're dead, and the hypervisor will return it for us. */
+			break;
+		}
+	case VethEventTypeFramesAck:
+		{
+			for (i = 0; i < VethMaxFramesMsgsAcked; ++i) {
+				u16 msg = vethEvent->mDerivedData.mFramesAckData.mToken[i];
+				veth_recycleMsg(connection, msg);
+			}
+			break;
+		}
+	case VethEventTypeFrames:
+		{
+			veth_takeFrames(connection, vethEvent);
+			break;
+		}
+	default:
+		{
+			veth_error_printk("Unknown interrupt type %d from lpar %d\n", event->xSubtype, connection->mRemoteLp);
+		}
+	};
+}
+
+static void veth_openConnections()
+{
+	int i = 0;
+
+	HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan, &veth_handleEvent);
+
+	/* Now I need to run through the active lps and open connections to the ones I'm supposed to
+	   open to. */
+
+	for (i = HvMaxArchitectedLps - 1; i >= 0; --i) {
+		if (i != mFabricMgr->mThisLp) {
+			if (HvLpConfig_doLpsCommunicateOnVirtualLan(mFabricMgr->mThisLp, i)) {
+				veth_openConnection(i, 1);
+			} else {
+				veth_closeConnection(i, 1);
+			}
+		}
+	}
+}
+
+static void veth_intFinishOpeningConnections(void *parm, int number)
+{
+	struct VethLpConnection *connection = (struct VethLpConnection *) parm;
+	connection->mAllocBhTq.data = parm;
+	connection->mNumberAllocated = number;
+	queue_task(&connection->mAllocBhTq, &tq_immediate);
+	mark_bh(IMMEDIATE_BH);
+}
+
+static void veth_finishOpeningConnections(void *parm)
+{
+	unsigned long flags;
+	struct VethLpConnection *connection = (struct VethLpConnection *) parm;
+	spin_lock_irqsave(&connection->mStatusGate, flags);
+	veth_finishOpeningConnectionsLocked(connection);
+	spin_unlock_irqrestore(&connection->mStatusGate, flags);
+}
+
+static void veth_finishOpeningConnectionsLocked(struct VethLpConnection *connection)
+{
+	if (connection->mNumberAllocated >= 2) {
+		connection->mConnectionStatus.mCapMonAlloced = 1;
+		veth_sendCap(connection);
+	} else {
+		veth_error_printk("Couldn't allocate base msgs for lpar %d, only got %d\n", connection->mRemoteLp,
+				  connection->mNumberAllocated);
+		veth_failMe(connection);
+	}
+}
+
+static void veth_openConnection(u8 remoteLp, int lockMe)
+{
+	unsigned long flags;
+	unsigned long flags2;
+	HvLpInstanceId source;
+	HvLpInstanceId target;
+	u64 i = 0;
+	struct VethLpConnection *connection = &(mFabricMgr->mConnection[remoteLp]);
+
+	memset(&connection->mCapBhTq, 0, sizeof(connection->mCapBhTq));
+	connection->mCapBhTq.routine = (void *) (void *) veth_capBh;
+
+	memset(&connection->mCapAckBhTq, 0, sizeof(connection->mCapAckBhTq));
+	connection->mCapAckBhTq.routine = (void *) (void *) veth_capAckBh;
+
+	memset(&connection->mMonitorAckBhTq, 0, sizeof(connection->mMonitorAckBhTq));
+	connection->mMonitorAckBhTq.routine = (void *) (void *) veth_monitorAckBh;
+
+	memset(&connection->mAllocBhTq, 0, sizeof(connection->mAllocBhTq));
+	connection->mAllocBhTq.routine = (void *) (void *) veth_finishOpeningConnections;
+
+	if (lockMe)
+		spin_lock_irqsave(&connection->mStatusGate, flags);
+
+	connection->mRemoteLp = remoteLp;
+
+	spin_lock_irqsave(&connection->mAckGate, flags2);
+
+	memset(&connection->mEventData, 0xFF, sizeof(connection->mEventData));
+	connection->mNumAcks = 0;
+
+	HvCallEvent_openLpEventPath(remoteLp, HvLpEvent_Type_VirtualLan);
+
+	/* clean up non-acked msgs */
+	for (i = 0; i < connection->mNumMsgs; ++i) {
+		veth_recycleMsg(connection, i);
+	}
+
+	connection->mConnectionStatus.mOpen = 1;
+
+	source = connection->mSourceInst = HvCallEvent_getSourceLpInstanceId(remoteLp, HvLpEvent_Type_VirtualLan);
+	target = connection->mTargetInst = HvCallEvent_getTargetLpInstanceId(remoteLp, HvLpEvent_Type_VirtualLan);
+
+	if (connection->mConnectionStatus.mCapMonAlloced != 1) {
+		connection->mAllocBhTq.routine = (void *) (void *) veth_finishOpeningConnections;
+		mf_allocateLpEvents(remoteLp,
+				    HvLpEvent_Type_VirtualLan,
+				    sizeof(struct VethLpEvent), 2, &veth_intFinishOpeningConnections, connection);
+	} else {
+		veth_finishOpeningConnectionsLocked(connection);
+	}
+
+	spin_unlock_irqrestore(&connection->mAckGate, flags2);
+
+	if (lockMe)
+		spin_unlock_irqrestore(&connection->mStatusGate, flags);
+}
+
+static void veth_closeConnection(u8 remoteLp, int lockMe)
+{
+	struct VethLpConnection *connection = &(mFabricMgr->mConnection[remoteLp]);
+	unsigned long flags;
+	unsigned long flags2;
+	if (lockMe)
+		spin_lock_irqsave(&connection->mStatusGate, flags);
+
+	del_timer(&connection->mAckTimer);
+
+	if (connection->mConnectionStatus.mOpen == 1) {
+		HvCallEvent_closeLpEventPath(remoteLp, HvLpEvent_Type_VirtualLan);
+		connection->mConnectionStatus.mOpen = 0;
+		veth_failMe(connection);
+
+		/* reset ack data */
+		spin_lock_irqsave(&connection->mAckGate, flags2);
+
+		memset(&connection->mEventData, 0xFF, sizeof(connection->mEventData));
+		connection->mNumAcks = 0;
+
+		spin_unlock_irqrestore(&connection->mAckGate, flags2);
+	}
+
+	if (lockMe)
+		spin_unlock_irqrestore(&connection->mStatusGate, flags);
+}
+
+static void veth_msgsInit(struct VethLpConnection *connection)
+{
+	connection->mAllocBhTq.routine = (void *) (void *) veth_finishMsgsInit;
+	mf_allocateLpEvents(connection->mRemoteLp,
+			    HvLpEvent_Type_VirtualLan,
+			    sizeof(struct VethLpEvent),
+			    connection->mMyCap.mUnionData.mFields.mNumberBuffers, &veth_intFinishMsgsInit, connection);
+}
+
+static void veth_intFinishMsgsInit(void *parm, int number)
+{
+	struct VethLpConnection *connection = (struct VethLpConnection *) parm;
+	connection->mAllocBhTq.data = parm;
+	connection->mNumberRcvMsgs = number;
+	queue_task(&connection->mAllocBhTq, &tq_immediate);
+	mark_bh(IMMEDIATE_BH);
+}
+
+static void veth_intFinishCapBh(void *parm, int number)
+{
+	struct VethLpConnection *connection = (struct VethLpConnection *) parm;
+	connection->mAllocBhTq.data = parm;
+	if (number > 0)
+		connection->mNumberLpAcksAlloced += number;
+
+	queue_task(&connection->mAllocBhTq, &tq_immediate);
+	mark_bh(IMMEDIATE_BH);
+}
+
+static void veth_finishMsgsInit(struct VethLpConnection *connection)
+{
+	int i = 0;
+	unsigned int numberGotten = 0;
+	u64 amountOfHeapToGet = connection->mMyCap.mUnionData.mFields.mNumberBuffers * sizeof(struct VethMsg);
+	char *msgs = NULL;
+	unsigned long flags;
+	spin_lock_irqsave(&connection->mStatusGate, flags);
+
+	if (connection->mNumberRcvMsgs >= connection->mMyCap.mUnionData.mFields.mNumberBuffers) {
+		msgs = kmalloc(amountOfHeapToGet, GFP_ATOMIC);
+
+		connection->mMsgs = (struct VethMsg *) msgs;
+
+		if (msgs != NULL) {
+			memset(msgs, 0, amountOfHeapToGet);
+
+			for (i = 0; i < connection->mMyCap.mUnionData.mFields.mNumberBuffers; ++i) {
+				connection->mMsgs[i].mIndex = i;
+				++numberGotten;
+				VETHSTACKPUSH(&(connection->mMsgStack), (connection->mMsgs + i));
+			}
+			if (numberGotten > 0) {
+				connection->mNumMsgs = numberGotten;
+			}
+		} else {
+			kfree(msgs);
+			connection->mMsgs = NULL;
+		}
+	}
+
+	connection->mMyCap.mUnionData.mFields.mNumberBuffers = connection->mNumMsgs;
+
+	if (connection->mNumMsgs < 10)
+		connection->mMyCap.mUnionData.mFields.mThreshold = 1;
+	else if (connection->mNumMsgs < 20)
+		connection->mMyCap.mUnionData.mFields.mThreshold = 4;
+	else if (connection->mNumMsgs < 40)
+		connection->mMyCap.mUnionData.mFields.mThreshold = 10;
+	else
+		connection->mMyCap.mUnionData.mFields.mThreshold = 20;
+
+	connection->mMyCap.mUnionData.mFields.mTimer = VethAckTimeoutUsec;
+
+	veth_finishSendCap(connection);
+
+	spin_unlock_irqrestore(&connection->mStatusGate, flags);
+}
+
+static void veth_sendCap(struct VethLpConnection *connection)
+{
+	if (connection->mMsgs == NULL) {
+		connection->mMyCap.mUnionData.mFields.mNumberBuffers = VethBuffersToAllocate;
+		veth_msgsInit(connection);
+	} else {
+		veth_finishSendCap(connection);
+	}
+}
+
+static void veth_finishSendCap(struct VethLpConnection *connection)
+{
+	HvLpEvent_Rc returnCode = HvCallEvent_signalLpEventFast(connection->mRemoteLp,
+								HvLpEvent_Type_VirtualLan,
+								VethEventTypeCap,
+								HvLpEvent_AckInd_DoAck,
+								HvLpEvent_AckType_ImmediateAck,
+								connection->mSourceInst,
+								connection->mTargetInst,
+								0,
+								connection->mMyCap.mUnionData.mNoFields.mReserved1,
+								connection->mMyCap.mUnionData.mNoFields.mReserved2,
+								connection->mMyCap.mUnionData.mNoFields.mReserved3,
+								connection->mMyCap.mUnionData.mNoFields.mReserved4,
+								connection->mMyCap.mUnionData.mNoFields.mReserved5);
+
+	if ((returnCode == HvLpEvent_Rc_PartitionDead) || (returnCode == HvLpEvent_Rc_PathClosed)) {
+		connection->mConnectionStatus.mSentCap = 0;
+	} else if (returnCode != HvLpEvent_Rc_Good) {
+		veth_error_printk("Couldn't send cap to lpar %d, rc %x\n", connection->mRemoteLp, (int) returnCode);
+		veth_failMe(connection);
+	} else {
+		connection->mConnectionStatus.mSentCap = 1;
+	}
+}
+
+static void veth_takeCap(struct VethLpConnection *connection, struct VethLpEvent *event)
+{
+	if (!test_and_set_bit(0, &(connection->mCapBhPending))) {
+		connection->mCapBhTq.data = connection;
+		memcpy(&connection->mCapEvent, event, sizeof(connection->mCapEvent));
+		queue_task(&connection->mCapBhTq, &tq_immediate);
+		mark_bh(IMMEDIATE_BH);
+	} else {
+		veth_error_printk("Received a capabilities from lpar %d while already processing one\n", connection->mRemoteLp);
+		event->mBaseEvent.xRc = HvLpEvent_Rc_BufferNotAvailable;
+		HvCallEvent_ackLpEvent((struct HvLpEvent *) event);
+	}
+}
+
+static void veth_takeCapAck(struct VethLpConnection *connection, struct VethLpEvent *event)
+{
+	if (!test_and_set_bit(0, &(connection->mCapAckBhPending))) {
+		connection->mCapAckBhTq.data = connection;
+		memcpy(&connection->mCapAckEvent, event, sizeof(connection->mCapAckEvent));
+		queue_task(&connection->mCapAckBhTq, &tq_immediate);
+		mark_bh(IMMEDIATE_BH);
+	} else {
+		veth_error_printk("Received a capabilities ack from lpar %d while already processing one\n",
+				  connection->mRemoteLp);
+	}
+}
+
+static void veth_takeMonitorAck(struct VethLpConnection *connection, struct VethLpEvent *event)
+{
+	if (!test_and_set_bit(0, &(connection->mMonitorAckBhPending))) {
+		connection->mMonitorAckBhTq.data = connection;
+		memcpy(&connection->mMonitorAckEvent, event, sizeof(connection->mMonitorAckEvent));
+		queue_task(&connection->mMonitorAckBhTq, &tq_immediate);
+		mark_bh(IMMEDIATE_BH);
+	} else {
+		veth_error_printk("Received a monitor ack from lpar %d while already processing one\n", connection->mRemoteLp);
+	}
+}
+
+static void veth_recycleMsg(struct VethLpConnection *connection, u16 msg)
+{
+	struct scatterlist sg[VethMaxFramesPerMsg];
+	if (msg < connection->mNumMsgs) {
+		struct VethMsg *myMsg = connection->mMsgs + msg;
+		if (test_and_clear_bit(0, &(myMsg->mInUse))) {
+			int i;
+			int nsg = 0;
+			for (i = 0; i < VethMaxFramesPerMsg; i++) {
+				if (myMsg->mEvent.mSendData.mAddress[i] != 0) {
+					sg[nsg].dma_address = myMsg->mEvent.mSendData.mAddress[i];
+					sg[nsg].dma_length = myMsg->mEvent.mSendData.mLength[i];
+					nsg++;
+				}
+			}
+			pci_unmap_sg(iSeries_veth_dev, sg, nsg, PCI_DMA_TODEVICE);
+
+			dev_kfree_skb_any(myMsg->mSkb);
+
+			myMsg->mSkb = NULL;
+			memset(&(myMsg->mEvent.mSendData), 0, sizeof(struct VethFramesData));
+			VETHSTACKPUSH(&connection->mMsgStack, myMsg);
+		} else {
+			if (connection->mConnectionStatus.mOpen) {
+				veth_error_printk("Received a frames ack for msg %d from lpar %d while not outstanding\n", msg,
+						  connection->mRemoteLp);
+			}
+		}
+	}
+}
+
+static void veth_capBh(struct VethLpConnection *connection)
+{
+	struct VethLpEvent *event = &connection->mCapEvent;
+	unsigned long flags;
+	struct VethCapData *remoteCap = &(connection->mRemoteCap);
+	u64 numAcks = 0;
+	spin_lock_irqsave(&connection->mStatusGate, flags);
+	connection->mConnectionStatus.mGotCap = 1;
+
+	memcpy(remoteCap, &(event->mDerivedData.mCapabilitiesData), sizeof(connection->mRemoteCap));
+
+	if ((remoteCap->mUnionData.mFields.mNumberBuffers <= VethMaxFramesMsgs) &&
+	    (remoteCap->mUnionData.mFields.mNumberBuffers != 0) &&
+	    (remoteCap->mUnionData.mFields.mThreshold <= VethMaxFramesMsgsAcked) &&
+	    (remoteCap->mUnionData.mFields.mThreshold != 0)) {
+		numAcks = (remoteCap->mUnionData.mFields.mNumberBuffers / remoteCap->mUnionData.mFields.mThreshold) + 1;
+
+		if (connection->mNumberLpAcksAlloced < numAcks) {
+			numAcks = numAcks - connection->mNumberLpAcksAlloced;
+			connection->mAllocBhTq.routine = (void *) (void *) veth_finishCapBh;
+			mf_allocateLpEvents(connection->mRemoteLp,
+					    HvLpEvent_Type_VirtualLan,
+					    sizeof(struct VethLpEvent), numAcks, &veth_intFinishCapBh, connection);
+		} else
+			veth_finishCapBhLocked(connection);
+	} else {
+		veth_error_printk("Received incompatible capabilities from lpar %d\n", connection->mRemoteLp);
+		event->mBaseEvent.xRc = HvLpEvent_Rc_InvalidSubtypeData;
+		HvCallEvent_ackLpEvent((struct HvLpEvent *) event);
+	}
+
+	clear_bit(0, &(connection->mCapBhPending));
+	spin_unlock_irqrestore(&connection->mStatusGate, flags);
+}
+
+static void veth_capAckBh(struct VethLpConnection *connection)
+{
+	struct VethLpEvent *event = &connection->mCapAckEvent;
+	unsigned long flags;
+
+	spin_lock_irqsave(&connection->mStatusGate, flags);
+
+	if (event->mBaseEvent.xRc == HvLpEvent_Rc_Good) {
+		connection->mConnectionStatus.mCapAcked = 1;
+
+		if ((connection->mConnectionStatus.mGotCap == 1) && (connection->mConnectionStatus.mGotCapAcked == 1)) {
+			if (connection->mConnectionStatus.mSentMonitor != 1)
+				veth_sendMonitor(connection);
+		}
+	} else {
+		veth_error_printk("Bad rc(%d) from lpar %d on capabilities\n", event->mBaseEvent.xRc, connection->mRemoteLp);
+		veth_failMe(connection);
+	}
+
+	clear_bit(0, &(connection->mCapAckBhPending));
+	spin_unlock_irqrestore(&connection->mStatusGate, flags);
+}
+
+static void veth_monitorAckBh(struct VethLpConnection *connection)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&connection->mStatusGate, flags);
+
+	veth_failMe(connection);
+
+	veth_printk("Monitor ack returned for lpar %d\n", connection->mRemoteLp);
+
+	if (connection->mConnectionStatus.mOpen) {
+		veth_closeConnection(connection->mRemoteLp, 0);
+
+		udelay(100);
+
+		queue_task(&connection->mMonitorAckBhTq, &tq_immediate);
+		mark_bh(IMMEDIATE_BH);
+	} else {
+#ifdef MODULE
+		if (VethModuleReopen)
+#endif
+			veth_openConnection(connection->mRemoteLp, 0);
+#ifdef MODULE
+		else {
+			int i = 0;
+
+			for (i = 0; i < connection->mNumMsgs; ++i) {
+				veth_recycleMsg(connection, i);
+			}
+		}
+#endif
+		clear_bit(0, &(connection->mMonitorAckBhPending));
+	}
+
+	spin_unlock_irqrestore(&connection->mStatusGate, flags);
+}
+
+#define number_of_pages(v, l) ((((unsigned long)(v) & ((1 << 12) - 1)) + (l) + 4096 - 1) / 4096)
+#define page_offset(v) ((unsigned long)(v) & ((1 << 12) - 1))
+
+static void veth_takeFrames(struct VethLpConnection *connection, struct VethLpEvent *event)
+{
+	int i = 0;
+	struct VethPort *port = NULL;
+	struct BufList {
+		union {
+			struct {
+				u32 token2;
+				u32 garbage;
+			} token1;
+			u64 address;
+		} addr;
+		u64 size;
+	};
+
+	struct BufList myBufList[4];	/* max pages per frame */
+	struct BufList remoteList[VethMaxFramesPerMsg];	/* max frags per frame */
+
+	do {
+		int nfrags = 0;
+		u16 length = 0;
+
+		/* a 0 address marks the end of the valid entries */
+		if (event->mDerivedData.mSendData.mAddress[i] == 0)
+			break;
+
+		/* make sure that we have at least 1 EOF entry in the remaining entries */
+		if (!(event->mDerivedData.mSendData.mEofMask >> i)) {
+			veth_printk("bad lp event: missing EOF frag in event mEofMask 0x%x i %d\n",
+				    event->mDerivedData.mSendData.mEofMask, i);
+			break;
+		}
+
+		/* add up length of non-EOF frags */
+		do {
+			remoteList[nfrags].addr.token1.token2 = event->mDerivedData.mSendData.mAddress[i + nfrags];
+			remoteList[nfrags].addr.token1.garbage = 0;
+			length += remoteList[nfrags].size = event->mDerivedData.mSendData.mLength[i + nfrags];
+		}
+		while (!(event->mDerivedData.mSendData.mEofMask & (1 << (i + nfrags++))));
+
+
+		/* length == total length of all framgents */
+		/* nfrags == # of fragments in this frame */
+
+		if ((length - 14) <= 9000) {	/* save as 13 < length <= 9014 */
+			struct sk_buff *skb = alloc_skb(length, GFP_ATOMIC);
+			if (skb != NULL) {
+				HvLpDma_Rc returnCode = HvLpDma_Rc_Good;
+
+				/* build the buffer list for the dma operation */
+				int numPages = number_of_pages((skb->data), length);	/* number of pages in this fragment of the complete buffer */
+				myBufList[0].addr.address =
+				    (0x8000000000000000LL | (virt_to_absolute((unsigned long) skb->data)));
+				myBufList[0].size = (numPages > 1) ? (4096 - page_offset(skb->data)) : length;
+				if (numPages > 1) {
+					myBufList[1].addr.address =
+					    (0x8000000000000000LL |
+					     (virt_to_absolute((unsigned long) skb->data + myBufList[0].size)));
+					myBufList[1].size = (numPages > 2) ? 4096 : length - myBufList[0].size;
+					if (numPages > 2) {
+						myBufList[2].addr.address =
+						    (0x8000000000000000LL |
+						     (virt_to_absolute
+						      ((unsigned long) skb->data + myBufList[0].size + myBufList[1].size)));
+						myBufList[2].size =
+						    (numPages > 3) ? 4096 : length - myBufList[0].size - myBufList[1].size;
+						if (numPages > 3) {
+							myBufList[3].addr.address =
+							    0x8000000000000000LL |
+							    (virt_to_absolute
+							     ((unsigned long) skb->data + myBufList[0].size + myBufList[1].size +
+							      myBufList[2].size));
+							myBufList[3].size =
+							    length - myBufList[0].size - myBufList[1].size - myBufList[2].size;
+						}
+					}
+				}
+				returnCode = HvCallEvent_dmaBufList(HvLpEvent_Type_VirtualLan,
+								    event->mBaseEvent.xSourceLp,
+								    HvLpDma_Direction_RemoteToLocal,
+								    connection->mSourceInst,
+								    connection->mTargetInst,
+								    HvLpDma_AddressType_RealAddress,
+								    HvLpDma_AddressType_TceIndex,
+								    0x8000000000000000LL |
+								    (virt_to_absolute((unsigned long) &myBufList)),
+								    0x8000000000000000LL |
+								    (virt_to_absolute((unsigned long) &remoteList)), length);
+
+				if (returnCode == HvLpDma_Rc_Good) {
+					HvLpVirtualLanIndex vlan = skb->data[9];
+					u64 dest = *((u64 *) skb->data) & 0xFFFFFFFFFFFF0000;
+
+					if (((vlan < HvMaxArchitectedVirtualLans) && ((port = mFabricMgr->mPorts[vlan]) != NULL)) && ((dest == port->mMyAddress) ||	/* it's for me */
+																      (dest == 0xFFFFFFFFFFFF0000) ||	/* it's a broadcast */
+																      (veth_multicast_wanted(port, dest)) ||	/* it's one of my multicasts */
+																      (port->mPromiscuous == 1))) {	/* I'm promiscuous */
+						skb_put(skb, length);
+						skb->dev = port->mDev;
+						skb->protocol = eth_type_trans(skb, port->mDev);
+						skb->ip_summed = CHECKSUM_NONE;
+						netif_rx(skb);	/* send it up */
+						port->mStats.rx_packets++;
+						port->mStats.rx_bytes += length;
+
+					} else {
+						dev_kfree_skb_irq(skb);
+					}
+				} else {
+					dev_kfree_skb_irq(skb);
+				}
+			}
+		} else {
+			break;
+		}
+		i += nfrags;
+	} while (i < VethMaxFramesPerMsg);
+
+	/* Ack it */
+
+	{
+		unsigned long flags;
+		spin_lock_irqsave(&connection->mAckGate, flags);
+
+		if (connection->mNumAcks < VethMaxFramesMsgsAcked) {
+			connection->mEventData.mAckData.mToken[connection->mNumAcks] = event->mBaseEvent.xCorrelationToken;
+			++connection->mNumAcks;
+
+			if (connection->mNumAcks == connection->mRemoteCap.mUnionData.mFields.mThreshold) {
+				HvLpEvent_Rc rc = HvCallEvent_signalLpEventFast(connection->mRemoteLp,
+										HvLpEvent_Type_VirtualLan,
+										VethEventTypeFramesAck,
+										HvLpEvent_AckInd_NoAck,
+										HvLpEvent_AckType_ImmediateAck,
+										connection->mSourceInst,
+										connection->mTargetInst,
+										0,
+										connection->mEventData.mFpData.mData1,
+										connection->mEventData.mFpData.mData2,
+										connection->mEventData.mFpData.mData3,
+										connection->mEventData.mFpData.mData4,
+										connection->mEventData.mFpData.mData5);
+
+				if (rc != HvLpEvent_Rc_Good) {
+					veth_error_printk("Bad lp event return code(%x) acking frames from lpar %d\n", (int) rc,
+							  connection->mRemoteLp);
+				}
+
+				connection->mNumAcks = 0;
+
+				memset(&connection->mEventData, 0xFF, sizeof(connection->mEventData));
+			}
+
+		}
+
+		spin_unlock_irqrestore(&connection->mAckGate, flags);
+	}
+}
+
+#undef number_of_pages
+#undef page_offset
+
+static void veth_timedAck(unsigned long connectionPtr)
+{
+	unsigned long flags;
+	HvLpEvent_Rc rc;
+	struct VethLpConnection *connection = (struct VethLpConnection *) connectionPtr;
+	/* Ack all the events */
+	spin_lock_irqsave(&connection->mAckGate, flags);
+
+	if (connection->mNumAcks > 0) {
+		rc = HvCallEvent_signalLpEventFast(connection->mRemoteLp,
+						   HvLpEvent_Type_VirtualLan,
+						   VethEventTypeFramesAck,
+						   HvLpEvent_AckInd_NoAck,
+						   HvLpEvent_AckType_ImmediateAck,
+						   connection->mSourceInst,
+						   connection->mTargetInst,
+						   0,
+						   connection->mEventData.mFpData.mData1,
+						   connection->mEventData.mFpData.mData2,
+						   connection->mEventData.mFpData.mData3,
+						   connection->mEventData.mFpData.mData4, connection->mEventData.mFpData.mData5);
+
+		if (rc != HvLpEvent_Rc_Good) {
+			veth_error_printk("Bad lp event return code(%x) acking frames from lpar %d!\n", (int) rc,
+					  connection->mRemoteLp);
+		}
+
+		connection->mNumAcks = 0;
+
+		memset(&connection->mEventData, 0xFF, sizeof(connection->mEventData));
+	}
+
+	spin_unlock_irqrestore(&connection->mAckGate, flags);
+
+	/* Reschedule the timer */
+	connection->mAckTimer.expires = jiffies + connection->mTimeout;
+	add_timer(&connection->mAckTimer);
+}
+
+static int veth_multicast_wanted(struct VethPort *port, u64 thatAddr)
+{
+	int returnParm = 0;
+	int i;
+	unsigned long flags;
+
+	if ((*((char *) &thatAddr) & 0x01) != 1)
+		return 0;
+
+	read_lock_irqsave(&port->mMcastGate, flags);
+	if (port->mAllMcast) {
+		read_unlock_irqrestore(&port->mMcastGate, flags);
+		return 1;
+	}
+
+	for (i = 0; i < port->mNumAddrs; ++i) {
+		u64 thisAddr = port->mMcasts[i];
+
+		if (thisAddr == thatAddr) {
+			returnParm = 1;
+			break;
+		}
+	}
+	read_unlock_irqrestore(&port->mMcastGate, flags);
+
+	return returnParm;
+}
+
+static void veth_sendMonitor(struct VethLpConnection *connection)
+{
+	HvLpEvent_Rc returnCode = HvCallEvent_signalLpEventFast(connection->mRemoteLp,
+								HvLpEvent_Type_VirtualLan,
+								VethEventTypeMonitor,
+								HvLpEvent_AckInd_DoAck,
+								HvLpEvent_AckType_DeferredAck,
+								connection->mSourceInst,
+								connection->mTargetInst,
+								0, 0, 0, 0, 0, 0);
+
+	if (returnCode == HvLpEvent_Rc_Good) {
+		connection->mConnectionStatus.mSentMonitor = 1;
+		connection->mConnectionStatus.mFailed = 0;
+
+		/* Start the ACK timer */
+		init_timer(&connection->mAckTimer);
+		connection->mAckTimer.function = veth_timedAck;
+		connection->mAckTimer.data = (unsigned long) connection;
+		connection->mAckTimer.expires = jiffies + connection->mTimeout;
+		add_timer(&connection->mAckTimer);
+
+	} else {
+		veth_error_printk("Monitor send to lpar %d failed with rc %x\n", connection->mRemoteLp, (int) returnCode);
+		veth_failMe(connection);
+	}
+}
+
+static void veth_finishCapBh(struct VethLpConnection *connection)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&connection->mStatusGate, flags);
+	veth_finishCapBhLocked(connection);
+	spin_unlock_irqrestore(&connection->mStatusGate, flags);
+}
+
+static void veth_finishCapBhLocked(struct VethLpConnection *connection)
+{
+	struct VethLpEvent *event = &connection->mCapEvent;
+	struct VethCapData *remoteCap = &(connection->mRemoteCap);
+	int numAcks = (remoteCap->mUnionData.mFields.mNumberBuffers / remoteCap->mUnionData.mFields.mThreshold) + 1;
+
+	/* Convert timer to jiffies */
+	if (connection->mMyCap.mUnionData.mFields.mTimer)
+		connection->mTimeout = remoteCap->mUnionData.mFields.mTimer * HZ / 1000000;
+	else
+		connection->mTimeout = VethAckTimeoutUsec * HZ / 1000000;
+
+	if (connection->mNumberLpAcksAlloced >= numAcks) {
+		HvLpEvent_Rc returnCode = HvCallEvent_ackLpEvent((struct HvLpEvent *) event);
+
+		if (returnCode == HvLpEvent_Rc_Good) {
+			connection->mConnectionStatus.mGotCapAcked = 1;
+
+			if (connection->mConnectionStatus.mSentCap != 1) {
+				connection->mTargetInst =
+				    HvCallEvent_getTargetLpInstanceId(connection->mRemoteLp, HvLpEvent_Type_VirtualLan);
+
+				veth_sendCap(connection);
+			} else if (connection->mConnectionStatus.mCapAcked == 1) {
+				if (connection->mConnectionStatus.mSentMonitor != 1)
+					veth_sendMonitor(connection);
+			}
+		} else {
+			veth_error_printk("Failed to ack remote cap for lpar %d with rc %x\n", connection->mRemoteLp,
+					  (int) returnCode);
+			veth_failMe(connection);
+		}
+	} else {
+		veth_error_printk("Couldn't allocate all the frames ack events for lpar %d\n", connection->mRemoteLp);
+		event->mBaseEvent.xRc = HvLpEvent_Rc_BufferNotAvailable;
+		HvCallEvent_ackLpEvent((struct HvLpEvent *) event);
+	}
+}
+
+int proc_veth_dump_connection(char *page, char **start, off_t off, int count, int *eof, void *data) {
+	char *out = page;
+	long whichConnection = (long) data;
+	int len = 0;
+	struct VethLpConnection *connection = NULL;
+
+	if ((whichConnection < 0) || (whichConnection > HvMaxArchitectedLps) || (mFabricMgr == NULL)) {
+		veth_error_printk("Got bad data from /proc file system\n");
+		len = sprintf(page, "ERROR\n");
+	} else {
+		int thereWasStuffBefore = 0;
+		connection = &(mFabricMgr->mConnection[whichConnection]);
+
+		out += sprintf(out, "Remote Lp:\t%d\n", connection->mRemoteLp);
+		out += sprintf(out, "Source Inst:\t%04X\n", connection->mSourceInst);
+		out += sprintf(out, "Target Inst:\t%04X\n", connection->mTargetInst);
+		out += sprintf(out, "Num Msgs:\t%d\n", connection->mNumMsgs);
+		out += sprintf(out, "Num Lp Acks:\t%d\n", connection->mNumberLpAcksAlloced);
+		out += sprintf(out, "Num Acks:\t%d\n", connection->mNumAcks);
+
+		if (connection->mConnectionStatus.mOpen) {
+			out += sprintf(out, "<Open");
+			thereWasStuffBefore = 1;
+		}
+
+		if (connection->mConnectionStatus.mCapMonAlloced) {
+			if (thereWasStuffBefore)
+				out += sprintf(out, "/");
+			else
+				out += sprintf(out, "<");
+			out += sprintf(out, "CapMonAlloced");
+			thereWasStuffBefore = 1;
+		}
+
+		if (connection->mConnectionStatus.mBaseMsgsAlloced) {
+			if (thereWasStuffBefore)
+				out += sprintf(out, "/");
+			else
+				out += sprintf(out, "<");
+			out += sprintf(out, "BaseMsgsAlloced");
+			thereWasStuffBefore = 1;
+		}
+
+		if (connection->mConnectionStatus.mSentCap) {
+			if (thereWasStuffBefore)
+				out += sprintf(out, "/");
+			else
+				out += sprintf(out, "<");
+			out += sprintf(out, "SentCap");
+			thereWasStuffBefore = 1;
+		}
+
+		if (connection->mConnectionStatus.mCapAcked) {
+			if (thereWasStuffBefore)
+				out += sprintf(out, "/");
+			else
+				out += sprintf(out, "<");
+			out += sprintf(out, "CapAcked");
+			thereWasStuffBefore = 1;
+		}
+
+		if (connection->mConnectionStatus.mGotCap) {
+			if (thereWasStuffBefore)
+				out += sprintf(out, "/");
+			else
+				out += sprintf(out, "<");
+			out += sprintf(out, "GotCap");
+			thereWasStuffBefore = 1;
+		}
+
+		if (connection->mConnectionStatus.mGotCapAcked) {
+			if (thereWasStuffBefore)
+				out += sprintf(out, "/");
+			else
+				out += sprintf(out, "<");
+			out += sprintf(out, "GotCapAcked");
+			thereWasStuffBefore = 1;
+		}
+
+		if (connection->mConnectionStatus.mSentMonitor) {
+			if (thereWasStuffBefore)
+				out += sprintf(out, "/");
+			else
+				out += sprintf(out, "<");
+			out += sprintf(out, "SentMonitor");
+			thereWasStuffBefore = 1;
+		}
+
+		if (connection->mConnectionStatus.mPopulatedRings) {
+			if (thereWasStuffBefore)
+				out += sprintf(out, "/");
+			else
+				out += sprintf(out, "<");
+			out += sprintf(out, "PopulatedRings");
+			thereWasStuffBefore = 1;
+		}
+
+		if (connection->mConnectionStatus.mFailed) {
+			if (thereWasStuffBefore)
+				out += sprintf(out, "/");
+			else
+				out += sprintf(out, "<");
+			out += sprintf(out, "Failed");
+			thereWasStuffBefore = 1;
+		}
+
+		if (thereWasStuffBefore)
+			out += sprintf(out, ">");
+
+		out += sprintf(out, "\n");
+
+		out += sprintf(out, "Capabilities (System:<Version/Buffers/Threshold/Timeout>):\n");
+		out += sprintf(out, "\tLocal:<");
+		out += sprintf(out, "%d/%d/%d/%d>\n",
+			       connection->mMyCap.mUnionData.mFields.mVersion,
+			       connection->mMyCap.mUnionData.mFields.mNumberBuffers,
+			       connection->mMyCap.mUnionData.mFields.mThreshold, connection->mMyCap.mUnionData.mFields.mTimer);
+		out += sprintf(out, "\tRemote:<");
+		out += sprintf(out, "%d/%d/%d/%d>\n",
+			       connection->mRemoteCap.mUnionData.mFields.mVersion,
+			       connection->mRemoteCap.mUnionData.mFields.mNumberBuffers,
+			       connection->mRemoteCap.mUnionData.mFields.mThreshold,
+			       connection->mRemoteCap.mUnionData.mFields.mTimer);
+		len = out - page;
+	}
+	len -= off;
+	if (len < count) {
+		*eof = 1;
+		if (len <= 0)
+			return 0;
+	} else
+		len = count;
+	*start = page + off;
+	return len;
+}
+
+int proc_veth_dump_port(char *page, char **start, off_t off, int count, int *eof, void *data) {
+	char *out = page;
+	long whichPort = (long) data;
+	int len = 0;
+	struct VethPort *port = NULL;
+
+	if ((whichPort < 0) || (whichPort > HvMaxArchitectedVirtualLans) || (mFabricMgr == NULL))
+		len = sprintf(page, "Virtual ethernet is not configured.\n");
+	else {
+		int i = 0;
+		u32 *myAddr;
+		u16 *myEndAddr;
+		port = mFabricMgr->mPorts[whichPort];
+
+		if (port != NULL) {
+			myAddr = (u32 *) & (port->mMyAddress);
+			myEndAddr = (u16 *) (myAddr + 1);
+			out += sprintf(out, "Net device:\t%p\n", port->mDev);
+			out += sprintf(out, "Net device name:\t%s\n", port->mDev->name);
+			out += sprintf(out, "Address:\t%08X%04X\n", myAddr[0], myEndAddr[0]);
+			out += sprintf(out, "Promiscuous:\t%d\n", port->mPromiscuous);
+			out += sprintf(out, "All multicast:\t%d\n", port->mAllMcast);
+			out += sprintf(out, "Number sk_buffs linearized:\t%u\n", port->mLinearized);
+			out += sprintf(out, "Number multicast:\t%d\n", port->mNumAddrs);
+
+			for (i = 0; i < port->mNumAddrs; ++i) {
+				u32 *multi = (u32 *) & (port->mMcasts[i]);
+				u16 *multiEnd = (u16 *) (multi + 1);
+				out += sprintf(out, "   %08X%04X\n", multi[0], multiEnd[0]);
+			}
+		} else {
+			out += sprintf(page, "veth%d is not configured.\n", (int) whichPort);
+		}
+
+		len = out - page;
+	}
+	len -= off;
+	if (len < count) {
+		*eof = 1;
+		if (len <= 0)
+			return 0;
+	} else
+		len = count;
+	*start = page + off;
+	return len;
+}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/iseries/veth.h linuxppc64_2_4/drivers/iseries/veth.h
--- ../kernel.org/linux-2.4.19/drivers/iseries/veth.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/drivers/iseries/veth.h	Tue Mar 12 08:51:03 2002
@@ -0,0 +1,242 @@
+/* File veth.h created by Kyle A. Lucke on Mon Aug  7 2000. */
+
+/* Change Activity: */
+/* End Change Activity */
+
+#ifndef _VETH_H
+#define _VETH_H
+
+#ifndef _HVTYPES_H
+#include <asm/iSeries/HvTypes.h>
+#endif
+#ifndef _HVLPEVENT_H
+#include <asm/iSeries/HvLpEvent.h>
+#endif
+#include <linux/netdevice.h>
+
+#define VethEventNumTypes (4)
+#define VethEventTypeCap (0)
+#define VethEventTypeFrames (1)
+#define VethEventTypeMonitor (2)
+#define VethEventTypeFramesAck (3)
+
+#define VethMaxFramesMsgsAcked (20)
+#define VethMaxFramesMsgs (0xFFFF)
+#define VethMaxFramesPerMsg (6)
+#define VethAckTimeoutUsec (1000000)
+
+#define VETHSTACKTYPE(T) struct VethStack##T
+#define VETHSTACK(T) \
+VETHSTACKTYPE(T) \
+{ \
+struct T *head; \
+spinlock_t lock; \
+}
+#define VETHSTACKCTOR(s) do { (s)->head = NULL; spin_lock_init(&(s)->lock); } while(0)
+#define VETHSTACKPUSH(s, p) \
+do { \
+unsigned long flags; \
+spin_lock_irqsave(&(s)->lock,flags); \
+(p)->next = (s)->head; \
+(s)->head = (p); \
+spin_unlock_irqrestore(&(s)->lock, flags); \
+} while(0)
+
+#define VETHSTACKPOP(s,p) \
+do { \
+unsigned long flags; \
+spin_lock_irqsave(&(s)->lock,flags); \
+(p) = (s)->head; \
+if ((s)->head != NULL) \
+{ \
+(s)->head = (s)->head->next; \
+} \
+spin_unlock_irqrestore(&(s)->lock, flags); \
+} while(0)
+
+#define VETHQUEUE(T) \
+struct VethQueue##T \
+{ \
+T *head; \
+T *tail; \
+spinlock_t lock; \
+}
+#define VETHQUEUECTOR(q) do { (q)->head = NULL; (q)->tail = NULL; spin_lock_init(&(q)->lock); } while(0)
+#define VETHQUEUEENQ(q, p) \
+do { \
+unsigned long flags; \
+spin_lock_irqsave(&(q)->lock,flags); \
+(p)->next = NULL; \
+if ((q)->head != NULL) \
+{ \
+(q)->head->next = (p); \
+(q)->head = (p); \
+} \
+else \
+{ \
+(q)->tail = (q)->head = (p); \
+} \
+spin_unlock_irqrestore(&(q)->lock, flags); \
+} while(0)
+
+#define VETHQUEUEDEQ(q,p) \
+do { \
+unsigned long flags; \
+spin_lock_irqsave(&(q)->lock,flags); \
+(p) = (q)->tail; \
+if ((p) != NULL) \
+{ \
+(q)->tail = (p)->next; \
+(p)->next = NULL; \
+} \
+if ((q)->tail == NULL) \
+(q)->head = NULL; \
+spin_unlock_irqrestore(&(q)->lock, flags); \
+} while(0)
+
+struct VethFramesData {
+	u32 mAddress[6];
+	u16 mLength[6];
+	u32 mEofMask:6;
+	u32 mReserved:26;
+};
+
+struct VethFramesAckData {
+	u16 mToken[VethMaxFramesMsgsAcked];
+};
+
+struct VethCapData {
+	union {
+		struct Fields {
+			u8 mVersion;
+			u8 mReserved1;
+			u16 mNumberBuffers;
+			u16 mThreshold;
+			u16 mReserved2;
+			u32 mTimer;
+			u32 mReserved3;
+			u64 mReserved4;
+			u64 mReserved5;
+			u64 mReserved6;
+		} mFields;
+		struct NoFields {
+			u64 mReserved1;
+			u64 mReserved2;
+			u64 mReserved3;
+			u64 mReserved4;
+			u64 mReserved5;
+		} mNoFields;
+	} mUnionData;
+};
+
+struct VethFastPathData {
+	u64 mData1;
+	u64 mData2;
+	u64 mData3;
+	u64 mData4;
+	u64 mData5;
+};
+
+struct VethLpEvent {
+	struct HvLpEvent mBaseEvent;
+	union {
+		struct VethFramesData mSendData;
+		struct VethCapData mCapabilitiesData;
+		struct VethFramesAckData mFramesAckData;
+		struct VethFastPathData mFastPathData;
+	} mDerivedData;
+
+};
+
+struct VethMsg {
+	struct VethMsg *next;
+	union {
+		struct VethFramesData mSendData;
+		struct VethFastPathData mFpData;
+	} mEvent;
+	int mIndex;
+	unsigned long mInUse;
+	struct sk_buff *mSkb;
+};
+
+
+struct VethControlBlock {
+	struct net_device *mDev;
+	struct VethControlBlock *mNext;
+	HvLpVirtualLanIndex mVlanId;
+};
+
+struct VethLpConnection {
+	u64 mEyecatcher;
+	HvLpIndex mRemoteLp;
+	HvLpInstanceId mSourceInst;
+	HvLpInstanceId mTargetInst;
+	u32 mNumMsgs;
+	struct VethMsg *mMsgs;
+	int mNumberRcvMsgs;
+	int mNumberLpAcksAlloced;
+	union {
+		struct VethFramesAckData mAckData;
+		struct VethFastPathData mFpData;
+	} mEventData;
+	spinlock_t mAckGate;
+	u32 mNumAcks;
+	spinlock_t mStatusGate;
+	struct {
+		u64 mOpen:1;
+		u64 mCapMonAlloced:1;
+		u64 mBaseMsgsAlloced:1;
+		u64 mSentCap:1;
+		u64 mCapAcked:1;
+		u64 mGotCap:1;
+		u64 mGotCapAcked:1;
+		u64 mSentMonitor:1;
+		u64 mPopulatedRings:1;
+		u64 mReserved:54;
+		u64 mFailed:1;
+	} mConnectionStatus;
+	struct VethCapData mMyCap;
+	struct VethCapData mRemoteCap;
+	unsigned long mCapAckBhPending;
+	struct tq_struct mCapAckBhTq;
+	struct VethLpEvent mCapAckEvent;
+	unsigned long mCapBhPending;
+	struct tq_struct mCapBhTq;
+	struct VethLpEvent mCapEvent;
+	unsigned long mMonitorAckBhPending;
+	struct tq_struct mMonitorAckBhTq;
+	struct VethLpEvent mMonitorAckEvent;
+	unsigned long mAllocBhPending;
+	struct tq_struct mAllocBhTq;
+	int mNumberAllocated;
+	struct timer_list mAckTimer;
+	u32 mTimeout;
+	 VETHSTACK(VethMsg) mMsgStack;
+};
+#define HVMAXARCHITECTEDVIRTUALLANS 16
+struct VethPort {
+	struct net_device *mDev;
+	struct net_device_stats mStats;
+	int mLock;
+	u64 mMyAddress;
+	int mPromiscuous;
+	int mAllMcast;
+	rwlock_t mMcastGate;
+	int mNumAddrs;
+	u64 mMcasts[12];
+	u32 mLinearized;
+};
+
+struct VethFabricMgr {
+	u64 mEyecatcher;
+	HvLpIndex mThisLp;
+	struct VethLpConnection mConnection[HVMAXARCHITECTEDLPS];
+	spinlock_t mPortListGate;
+	u64 mNumPorts;
+	struct VethPort *mPorts[HVMAXARCHITECTEDVIRTUALLANS];
+};
+
+int proc_veth_dump_connection(char *page, char **start, off_t off, int count, int *eof, void *data);
+int proc_veth_dump_port(char *page, char **start, off_t off, int count, int *eof, void *data);
+
+#endif				/* _VETH_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/iseries/vio.h linuxppc64_2_4/drivers/iseries/vio.h
--- ../kernel.org/linux-2.4.19/drivers/iseries/vio.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/drivers/iseries/vio.h	Wed Feb 27 11:09:05 2002
@@ -0,0 +1,130 @@
+/* -*- linux-c -*-
+ *  drivers/char/vio.h
+ *
+ *  iSeries Virtual I/O Message Path header
+ *
+ *  Authors: Dave Boutcher <boutcher@us.ibm.com>
+ *           Ryan Arnold <ryanarn@us.ibm.com>
+ *           Colin Devilbiss <devilbis@us.ibm.com>
+ *
+ * (C) Copyright 2000 IBM Corporation
+ * 
+ * This header file is used by the iSeries virtual I/O device
+ * drivers.  It defines the interfaces to the common functions
+ * (implemented in drivers/char/viopath.h) as well as defining
+ * common functions and structures.  Currently (at the time I 
+ * wrote this comment) the iSeries virtual I/O device drivers
+ * that use this are 
+ *   drivers/block/viodasd.c 
+ *   drivers/char/viocons.c
+ *   drivers/char/viotape.c
+ *   drivers/cdrom/viocd.c
+ *
+ * The iSeries virtual ethernet support (veth.c) uses a whole
+ * different set of functions.
+ * 
+ * This program is free software;  you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) anyu later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.  
+ *
+ * You should have received a copy of the GNU General Public License 
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#ifndef _VIO_H
+#define _VIO_H
+
+#include <asm/iSeries/HvTypes.h>
+#include <asm/iSeries/HvLpEvent.h>
+
+/* iSeries virtual I/O events use the subtype field in
+ * HvLpEvent to figure out what kind of vio event is coming
+ * in.  We use a table to route these, and this defines
+ * the maximum number of distinct subtypes
+ */
+#define VIO_MAX_SUBTYPES 7
+
+/* Each subtype can register a handler to process their events.
+ * The handler must have this interface.
+ */
+typedef void (vio_event_handler_t) (struct HvLpEvent * event);
+
+int viopath_open(HvLpIndex remoteLp, int subtype, int numReq);
+int viopath_close(HvLpIndex remoteLp, int subtype, int numReq);
+int vio_setHandler(int subtype, vio_event_handler_t * beh);
+int vio_clearHandler(int subtype);
+int viopath_isactive(HvLpIndex lp);
+HvLpInstanceId viopath_sourceinst(HvLpIndex lp);
+HvLpInstanceId viopath_targetinst(HvLpIndex lp);
+void vio_set_hostlp(void);
+void *vio_get_event_buffer(int subtype);
+void vio_free_event_buffer(int subtype, void *buffer);
+
+extern HvLpIndex viopath_hostLp;
+extern HvLpIndex viopath_ourLp;
+
+#define VIO_MESSAGE "iSeries virtual I/O: "
+#define KERN_DEBUG_VIO KERN_DEBUG VIO_MESSAGE
+#define KERN_INFO_VIO KERN_INFO VIO_MESSAGE
+#define KERN_WARNING_VIO KERN_WARNING VIO_MESSAGE
+
+#define VIOCHAR_MAX_DATA 200
+
+#define VIOMAJOR_SUBTYPE_MASK 0xff00
+#define VIOMINOR_SUBTYPE_MASK 0x00ff
+#define VIOMAJOR_SUBTYPE_SHIFT 8
+
+#define VIOVERSION            0x0101
+
+/*
+This is the general structure for VIO errors; each module should have a table
+of them, and each table should be terminated by an entry of { 0, 0, NULL }.
+Then, to find a specific error message, a module should pass its local table
+and the return code.
+*/
+struct vio_error_entry {
+	u16 rc;
+	int errno;
+	const char *msg;
+};
+const struct vio_error_entry *vio_lookup_rc(const struct vio_error_entry
+					    *local_table, u16 rc);
+
+enum viosubtypes {
+	viomajorsubtype_monitor = 0x0100,
+	viomajorsubtype_blockio = 0x0200,
+	viomajorsubtype_chario = 0x0300,
+	viomajorsubtype_config = 0x0400,
+	viomajorsubtype_cdio = 0x0500,
+	viomajorsubtype_tape = 0x0600
+};
+
+
+enum vioconfigsubtype {
+	vioconfigget = 0x0001,
+};
+
+enum viorc {
+	viorc_good = 0x0000,
+	viorc_noConnection = 0x0001,
+	viorc_noReceiver = 0x0002,
+	viorc_noBufferAvailable = 0x0003,
+	viorc_invalidMessageType = 0x0004,
+	viorc_invalidRange = 0x0201,
+	viorc_invalidToken = 0x0202,
+	viorc_DMAError = 0x0203,
+	viorc_useError = 0x0204,
+	viorc_releaseError = 0x0205,
+	viorc_invalidDisk = 0x0206,
+	viorc_openRejected = 0x0301
+};
+
+
+#endif				/* _VIO_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/iseries/viocd.c linuxppc64_2_4/drivers/iseries/viocd.c
--- ../kernel.org/linux-2.4.19/drivers/iseries/viocd.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/drivers/iseries/viocd.c	Fri Mar 29 11:51:14 2002
@@ -0,0 +1,818 @@
+/* -*- linux-c -*-
+ *  drivers/cdrom/viocd.c
+ *
+ ***************************************************************************
+ *  iSeries Virtual CD Rom
+ *
+ *  Authors: Dave Boutcher <boutcher@us.ibm.com>
+ *           Ryan Arnold <ryanarn@us.ibm.com>
+ *           Colin Devilbiss <devilbis@us.ibm.com>
+ *
+ * (C) Copyright 2000 IBM Corporation
+ * 
+ * This program is free software;  you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) anyu later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.  
+ *
+ * You should have received a copy of the GNU General Public License 
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *************************************************************************** 
+ * This routine provides access to CD ROM drives owned and managed by an 
+ * OS/400 partition running on the same box as this Linux partition.
+ *
+ * All operations are performed by sending messages back and forth to 
+ * the OS/400 partition.  
+ *
+ * 
+ * This device driver can either use it's own major number, or it can
+ * pretend to be an AZTECH drive. This is controlled with a 
+ * CONFIG option.  You can either call this an elegant solution to the 
+ * fact that a lot of software doesn't recognize a new CD major number...
+ * or you can call this a really ugly hack.  Your choice.
+ *
+ */
+
+#include <linux/major.h>
+#include <linux/config.h>
+
+/* Decide on the proper naming convention to use for our device */
+#ifdef CONFIG_DEVFS_FS
+#define VIOCD_DEVICE "cdroms/cdrom%d"
+#define VIOCD_DEVICE_OFFSET 0
+#else
+#ifdef CONFIG_VIOCD_AZTECH
+#define VIOCD_DEVICE "aztcd"
+#define VIOCD_DEVICE_OFFSET 0
+#else
+#define VIOCD_DEVICE "iseries/vcd%c"
+#define VIOCD_DEVICE_OFFSET 'a'
+#endif
+#endif
+
+/***************************************************************************
+ * Decide if we are using our own major or pretending to be an AZTECH drive
+ ***************************************************************************/
+#ifdef CONFIG_VIOCD_AZTECH
+#define MAJOR_NR AZTECH_CDROM_MAJOR
+#define do_viocd_request do_aztcd_request
+#else
+#define MAJOR_NR VIOCD_MAJOR
+#endif
+
+#define VIOCD_VERS "1.04"
+
+#include <linux/blk.h>
+#include <linux/cdrom.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+
+#include <asm/iSeries/HvTypes.h>
+#include <asm/iSeries/HvLpEvent.h>
+#include "vio.h"
+#include <asm/iSeries/iSeries_proc.h>
+
+extern struct pci_dev * iSeries_vio_dev;
+
+#define signalLpEventFast HvCallEvent_signalLpEventFast
+
+struct viocdlpevent {
+	struct HvLpEvent event;
+	u32 mReserved1;
+	u16 mVersion;
+	u16 mSubTypeRc;
+	u16 mDisk;
+	u16 mFlags;
+	u32 mToken;
+	u64 mOffset;		// On open, the max number of disks
+	u64 mLen;		// On open, the size of the disk
+	u32 mBlockSize;		// Only set on open
+	u32 mMediaSize;		// Only set on open
+};
+
+enum viocdsubtype {
+	viocdopen = 0x0001,
+	viocdclose = 0x0002,
+	viocdread = 0x0003,
+	viocdwrite = 0x0004,
+	viocdlockdoor = 0x0005,
+	viocdgetinfo = 0x0006,
+	viocdcheck = 0x0007
+};
+
+/* Should probably make this a module parameter....sigh
+ */
+#define VIOCD_MAX_CD 8
+int viocd_blocksizes[VIOCD_MAX_CD];
+static u64 viocd_size_in_bytes[VIOCD_MAX_CD];
+
+static const struct vio_error_entry viocd_err_table[] = {
+	{0x0201, EINVAL, "Invalid Range"},
+	{0x0202, EINVAL, "Invalid Token"},
+	{0x0203, EIO, "DMA Error"},
+	{0x0204, EIO, "Use Error"},
+	{0x0205, EIO, "Release Error"},
+	{0x0206, EINVAL, "Invalid CD"},
+	{0x020C, EROFS, "Read Only Device"},
+	{0x020D, EIO, "Changed or Missing Volume (or Varied Off?)"},
+	{0x020E, EIO, "Optical System Error (Varied Off?)"},
+	{0x02FF, EIO, "Internal Error"},
+	{0x3010, EIO, "Changed Volume"},
+	{0xC100, EIO, "Optical System Error"},
+	{0x0000, 0, NULL},
+};
+
+/* This is the structure we use to exchange info between driver and interrupt
+ * handler
+ */
+struct viocd_waitevent {
+	struct semaphore *sem;
+	int rc;
+	u16 subtypeRc;
+	int changed;
+};
+
+/* this is a lookup table for the true capabilities of a device */
+struct capability_entry {
+	char *type;
+	int capability;
+};
+
+static struct capability_entry capability_table[] = {
+	{ "6330", CDC_LOCK | CDC_DVD_RAM },
+	{ "6321", CDC_LOCK },
+	{ "632B", 0 },
+	{ NULL  , CDC_LOCK },
+};
+
+struct block_device_operations viocd_fops =
+{
+	owner:			THIS_MODULE,
+	open:			cdrom_open,
+	release:		cdrom_release,
+	ioctl:			cdrom_ioctl,
+	check_media_change:	cdrom_media_changed,
+};
+
+/* These are our internal structures for keeping track of devices
+ */
+static int viocd_numdev;
+
+struct cdrom_info {
+	char rsrcname[10];
+	char type[4];
+	char model[3];
+};
+static struct cdrom_info *viocd_unitinfo = NULL;
+
+struct disk_info{
+	u32 useCount;
+	u32 blocksize;
+	u32 mediasize;
+};
+static struct disk_info viocd_diskinfo[VIOCD_MAX_CD];
+
+static struct cdrom_device_info viocd_info[VIOCD_MAX_CD];
+
+static spinlock_t viocd_lock = SPIN_LOCK_UNLOCKED;
+
+#define MAX_CD_REQ 1
+static LIST_HEAD(reqlist);
+
+/* End a request
+ */
+static int viocd_end_request(struct request *req, int uptodate)
+{
+	if (end_that_request_first(req, uptodate, DEVICE_NAME))
+		return 0;
+	end_that_request_last(req);
+	return 1;
+}
+
+
+/* Get info on CD devices from OS/400
+ */
+static void get_viocd_info(void)
+{
+	dma_addr_t dmaaddr;
+	HvLpEvent_Rc hvrc;
+	int i;
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	struct viocd_waitevent we;
+
+	// If we don't have a host, bail out
+	if (viopath_hostLp == HvLpIndexInvalid)
+		return;
+
+	if (viocd_unitinfo == NULL)
+		viocd_unitinfo =
+		    kmalloc(sizeof(struct cdrom_info) * VIOCD_MAX_CD,
+			    GFP_KERNEL);
+
+	memset(viocd_unitinfo, 0x00,
+	       sizeof(struct cdrom_info) * VIOCD_MAX_CD);
+
+	dmaaddr = pci_map_single(iSeries_vio_dev, viocd_unitinfo,
+				 sizeof(struct cdrom_info) * VIOCD_MAX_CD,
+				 PCI_DMA_FROMDEVICE);
+	if (dmaaddr == 0xFFFFFFFF) {
+		printk(KERN_WARNING_VIO "error allocating tce\n");
+		return;
+	}
+
+	we.sem = &Semaphore;
+
+	hvrc = signalLpEventFast(viopath_hostLp,
+			     HvLpEvent_Type_VirtualIo,
+			     viomajorsubtype_cdio | viocdgetinfo,
+			     HvLpEvent_AckInd_DoAck,
+			     HvLpEvent_AckType_ImmediateAck,
+			     viopath_sourceinst(viopath_hostLp),
+			     viopath_targetinst(viopath_hostLp),
+			     (u64) (unsigned long) &we,
+			     VIOVERSION << 16,
+			     dmaaddr,
+			     0,
+			     sizeof(struct cdrom_info) * VIOCD_MAX_CD,
+			     0);
+	if (hvrc != HvLpEvent_Rc_Good) {
+		printk(KERN_WARNING_VIO "cdrom error sending event. rc %d\n", (int) hvrc);
+		return;
+	}
+
+	down(&Semaphore);
+
+	if (we.rc) {
+		const struct vio_error_entry *err = vio_lookup_rc(viocd_err_table, we.subtypeRc);
+		printk(KERN_WARNING_VIO "bad rc %d:0x%04X on getinfo: %s\n", we.rc, we.subtypeRc, err->msg);
+		return;
+	}
+
+
+	for (i = 0; (i < VIOCD_MAX_CD) && (viocd_unitinfo[i].rsrcname[0]); i++) {
+		viocd_numdev++;
+	}
+}
+
+/* Open a device
+ */
+static int viocd_open(struct cdrom_device_info *cdi, int purpose)
+{
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	int device_no = MINOR(cdi->dev);
+	HvLpEvent_Rc hvrc;
+	struct viocd_waitevent we;
+	struct disk_info *diskinfo = &viocd_diskinfo[device_no];
+
+	// If we don't have a host, bail out
+	if (viopath_hostLp == HvLpIndexInvalid || device_no >= viocd_numdev)
+		return -ENODEV;
+
+	we.sem = &Semaphore;
+	hvrc = signalLpEventFast(viopath_hostLp,
+			     HvLpEvent_Type_VirtualIo,
+			     viomajorsubtype_cdio | viocdopen,
+			     HvLpEvent_AckInd_DoAck,
+			     HvLpEvent_AckType_ImmediateAck,
+			     viopath_sourceinst(viopath_hostLp),
+			     viopath_targetinst(viopath_hostLp),
+			     (u64) (unsigned long) &we,
+			     VIOVERSION << 16,
+			     ((u64) device_no << 48),
+			     0, 0, 0);
+	if (hvrc != 0) {
+		printk(KERN_WARNING_VIO "bad rc on signalLpEventFast %d\n",
+		       (int) hvrc);
+		return -EIO;
+	}
+
+	down(&Semaphore);
+
+	if (we.rc) {
+		const struct vio_error_entry *err = vio_lookup_rc(viocd_err_table, we.subtypeRc);
+		printk(KERN_WARNING_VIO "bad rc %d:0x%04X on open: %s\n", we.rc, we.subtypeRc, err->msg);
+		return -err->errno;
+	}
+
+	if (diskinfo->useCount == 0) {
+		if(diskinfo->blocksize > 0) {
+			viocd_blocksizes[device_no] = diskinfo->blocksize;
+			viocd_size_in_bytes[device_no] = diskinfo->blocksize * diskinfo->mediasize;
+		} else {
+			viocd_size_in_bytes[device_no] = 0xFFFFFFFFFFFFFFFF;
+		}
+	}
+	MOD_INC_USE_COUNT;
+	return 0;
+}
+
+/* Release a device
+ */
+static void viocd_release(struct cdrom_device_info *cdi)
+{
+	int device_no = MINOR(cdi->dev);
+	HvLpEvent_Rc hvrc;
+
+	/* If we don't have a host, bail out */
+	if (viopath_hostLp == HvLpIndexInvalid
+	    || device_no >= viocd_numdev)
+		return;
+
+	hvrc = signalLpEventFast(viopath_hostLp,
+			     HvLpEvent_Type_VirtualIo,
+			     viomajorsubtype_cdio | viocdclose,
+			     HvLpEvent_AckInd_NoAck,
+			     HvLpEvent_AckType_ImmediateAck,
+			     viopath_sourceinst(viopath_hostLp),
+			     viopath_targetinst(viopath_hostLp),
+			     0,
+			     VIOVERSION << 16,
+			     ((u64) device_no << 48),
+			     0, 0, 0);
+	if (hvrc != 0) {
+		printk(KERN_WARNING_VIO "bad rc on signalLpEventFast %d\n", (int) hvrc);
+		return;
+	}
+
+	MOD_DEC_USE_COUNT;
+}
+
+/* Send a read or write request to OS/400
+ */
+static int send_request(struct request *req)
+{
+	HvLpEvent_Rc hvrc;
+	dma_addr_t dmaaddr;
+	int device_no = DEVICE_NR(req->rq_dev);
+	u64 start = req->sector * 512,
+	    len = req->current_nr_sectors * 512;
+	char reading = req->cmd == READ;
+	u16 command = reading ? viocdread : viocdwrite;
+
+
+	if(start + len > viocd_size_in_bytes[device_no]) {
+		printk(KERN_WARNING_VIO "viocd%d; access position %lx, past size %lx\n",
+		       device_no, start + len, viocd_size_in_bytes[device_no]);
+		return -1;
+	}
+	
+	dmaaddr = pci_map_single(iSeries_vio_dev, req->buffer, len,
+				 reading ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
+	if (dmaaddr == 0xFFFFFFFF) {
+		printk(KERN_WARNING_VIO "error allocating tce for address %p len %ld\n",
+			   req->buffer, len);
+		return -1;
+	}
+
+	hvrc = signalLpEventFast(viopath_hostLp,
+			     HvLpEvent_Type_VirtualIo,
+			     viomajorsubtype_cdio | command,
+			     HvLpEvent_AckInd_DoAck,
+			     HvLpEvent_AckType_ImmediateAck,
+			     viopath_sourceinst(viopath_hostLp),
+			     viopath_targetinst(viopath_hostLp),
+			     (u64) (unsigned long) req->buffer,
+	                     VIOVERSION << 16,
+			     ((u64) device_no << 48) | dmaaddr,
+			     start, len, 0);
+	if (hvrc != HvLpEvent_Rc_Good) {
+		printk(KERN_WARNING_VIO "hv error on op %d\n", (int) hvrc);
+		return -1;
+	}
+
+	return 0;
+}
+
+
+/* Do a request
+ */
+static int rwreq;
+static void do_viocd_request(request_queue_t * q)
+{
+	for (;;) {
+		struct request *req;
+		char err_str[80] = "";
+		int device_no;
+
+		INIT_REQUEST;
+		if (rwreq >= MAX_CD_REQ) {
+			return;
+		}
+
+		device_no = CURRENT_DEV;
+
+		/* remove the current request from the queue */
+		req = CURRENT;
+		blkdev_dequeue_request(req);
+
+		/* check for any kind of error */
+		if (device_no > viocd_numdev)
+			sprintf(err_str, "Invalid device number %d", device_no);
+		else if (send_request(req) < 0)
+			strcpy(err_str, "unable to send message to OS/400!");
+
+		/* if we had any sort of error, log it and cancel the request */
+		if (*err_str) {
+			printk(KERN_WARNING_VIO "%s\n", err_str);
+			viocd_end_request(req, 0);
+		} else {
+			spin_lock(&viocd_lock);
+			list_add_tail(&req->queue, &reqlist);
+			++rwreq;
+			spin_unlock(&viocd_lock);
+		}
+	}
+}
+
+/* Check if the CD changed
+ */
+static int viocd_media_changed(struct cdrom_device_info *cdi, int disc_nr)
+{
+	struct viocd_waitevent we;
+	HvLpEvent_Rc hvrc;
+	int device_no = MINOR(cdi->dev);
+
+	/* This semaphore is raised in the interrupt handler                     */
+	DECLARE_MUTEX_LOCKED(Semaphore);
+
+	/* Check that we are dealing with a valid hosting partition              */
+	if (viopath_hostLp == HvLpIndexInvalid) {
+		printk(KERN_WARNING_VIO "Invalid hosting partition\n");
+		return -EIO;
+	}
+
+	we.sem = &Semaphore;
+
+	/* Send the open event to OS/400                                         */
+	hvrc = signalLpEventFast(viopath_hostLp,
+			     HvLpEvent_Type_VirtualIo,
+			     viomajorsubtype_cdio | viocdcheck,
+			     HvLpEvent_AckInd_DoAck,
+			     HvLpEvent_AckType_ImmediateAck,
+			     viopath_sourceinst(viopath_hostLp),
+			     viopath_targetinst(viopath_hostLp),
+			     (u64) (unsigned long) &we,
+			     VIOVERSION << 16,
+			     ((u64) device_no << 48),
+			     0, 0, 0);
+
+	if (hvrc != 0) {
+		printk(KERN_WARNING_VIO "bad rc on signalLpEventFast %d\n", (int) hvrc);
+		return -EIO;
+	}
+
+	/* Wait for the interrupt handler to get the response                    */
+	down(&Semaphore);
+
+	/* Check the return code.  If bad, assume no change                      */
+	if (we.rc) {
+		const struct vio_error_entry *err = vio_lookup_rc(viocd_err_table, we.subtypeRc);
+		printk(KERN_WARNING_VIO "bad rc %d:0x%04X on check_change: %s; Assuming no change\n", we.rc, we.subtypeRc, err->msg);
+		return 0;
+	}
+
+	return we.changed;
+}
+
+static int viocd_lock_door(struct cdrom_device_info *cdi, int locking)
+{
+	HvLpEvent_Rc hvrc;
+	u64 device_no = MINOR(cdi->dev);
+	/* NOTE: flags is 1 or 0 so it won't overwrite the device_no             */
+	u64 flags = !!locking;
+	/* This semaphore is raised in the interrupt handler                     */
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	struct viocd_waitevent we = { sem:&Semaphore };
+
+	/* Check that we are dealing with a valid hosting partition              */
+	if (viopath_hostLp == HvLpIndexInvalid) {
+		printk(KERN_WARNING_VIO "Invalid hosting partition\n");
+		return -EIO;
+	}
+
+	we.sem = &Semaphore;
+
+	/* Send the lockdoor event to OS/400                                     */
+	hvrc = signalLpEventFast(viopath_hostLp,
+			     HvLpEvent_Type_VirtualIo,
+			     viomajorsubtype_cdio | viocdlockdoor,
+			     HvLpEvent_AckInd_DoAck,
+			     HvLpEvent_AckType_ImmediateAck,
+			     viopath_sourceinst(viopath_hostLp),
+			     viopath_targetinst(viopath_hostLp),
+			     (u64) (unsigned long) &we,
+			     VIOVERSION << 16,
+			     (device_no << 48) | (flags << 32),
+			     0, 0, 0);
+
+	if (hvrc != 0) {
+		printk(KERN_WARNING_VIO "bad rc on signalLpEventFast %d\n", (int) hvrc);
+		return -EIO;
+	}
+
+	/* Wait for the interrupt handler to get the response                    */
+	down(&Semaphore);
+
+	/* Check the return code.  If bad, assume no change                      */
+	if (we.rc != 0) {
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/* This routine handles incoming CD LP events
+ */
+static void vioHandleCDEvent(struct HvLpEvent *event)
+{
+	struct viocdlpevent *bevent = (struct viocdlpevent *) event;
+	struct viocd_waitevent *pwe;
+
+	if (event == NULL) {
+		/* Notification that a partition went away! */
+		return;
+	}
+	/* First, we should NEVER get an int here...only acks */
+	if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
+		printk(KERN_WARNING_VIO "Yikes! got an int in viocd event handler!\n");
+		if (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck) {
+			event->xRc = HvLpEvent_Rc_InvalidSubtype;
+			HvCallEvent_ackLpEvent(event);
+		}
+	}
+
+	switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) {
+	case viocdopen:
+		viocd_diskinfo[bevent->mDisk].blocksize = bevent->mBlockSize;
+		viocd_diskinfo[bevent->mDisk].mediasize = bevent->mMediaSize;
+		/* FALLTHROUGH !! */
+	case viocdgetinfo:
+	case viocdlockdoor:
+		pwe = (struct viocd_waitevent *) (unsigned long) event->xCorrelationToken;
+		pwe->rc = event->xRc;
+		pwe->subtypeRc = bevent->mSubTypeRc;
+		up(pwe->sem);
+		break;
+
+	case viocdclose:
+		break;
+
+	case viocdwrite:
+	case viocdread:{
+		unsigned long flags;
+		int reading = ((event->xSubtype & VIOMINOR_SUBTYPE_MASK) == viocdread);
+		struct request *req = blkdev_entry_to_request(reqlist.next);
+		/* Since this is running in interrupt mode, we need to make sure we're not
+		 * stepping on any global I/O operations
+		 */
+		spin_lock_irqsave(&io_request_lock, flags);
+
+		pci_unmap_single(iSeries_vio_dev,
+				 bevent->mToken,
+				 bevent->mLen,
+				 reading ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
+
+		/* find the event to which this is a response */
+		while ((&req->queue != &reqlist) &&
+		       ((u64) (unsigned long) req->buffer != bevent->event.xCorrelationToken))
+			req = blkdev_entry_to_request(req->queue.next);
+
+		/* if the event was not there, then what are we responding to?? */
+		if (&req->queue == &reqlist) {
+			printk(KERN_WARNING_VIO "Yikes! we never enqueued this guy!\n");
+			spin_unlock_irqrestore(&io_request_lock,
+					       flags);
+			break;
+		}
+
+		/* we don't need to keep it around anymore... */
+		spin_lock(&viocd_lock);
+		list_del(&req->queue);
+		--rwreq;
+		spin_unlock(&viocd_lock);
+		{
+			char stat = event->xRc == HvLpEvent_Rc_Good;
+			int nsect = bevent->mLen >> 9;
+
+			if (!stat) {
+				const struct vio_error_entry *err =
+				    vio_lookup_rc(viocd_err_table, bevent->mSubTypeRc);
+				printk(KERN_WARNING_VIO "request %p failed with rc %d:0x%04X: %s\n",
+				       req->buffer, event->xRc, bevent->mSubTypeRc, err->msg);
+			}
+			while ((nsect > 0) && (req->bh)) {
+				nsect -= req->current_nr_sectors;
+				viocd_end_request(req, stat);
+			}
+			/* we weren't done yet */
+			if (req->bh) {
+				if (send_request(req) < 0) {
+					printk(KERN_WARNING_VIO
+					    "couldn't re-submit req %p\n", req->buffer);
+					viocd_end_request(req, 0);
+				} else {
+					spin_lock(&viocd_lock);
+					list_add_tail(&req->queue, &reqlist);
+					++rwreq;
+					spin_unlock(&viocd_lock);
+				}
+			}
+		}
+
+		/* restart handling of incoming requests */
+		do_viocd_request(NULL);
+		spin_unlock_irqrestore(&io_request_lock, flags);
+		break;
+	}
+	case viocdcheck:
+		pwe = (struct viocd_waitevent *) (unsigned long) event->xCorrelationToken;
+		pwe->rc = event->xRc;
+		pwe->subtypeRc = bevent->mSubTypeRc;
+		pwe->changed = bevent->mFlags;
+		up(pwe->sem);
+		break;
+
+	default:
+		printk(KERN_WARNING_VIO "message with invalid subtype %0x04X!\n", event->xSubtype & VIOMINOR_SUBTYPE_MASK);
+		if (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck) {
+			event->xRc = HvLpEvent_Rc_InvalidSubtype;
+			HvCallEvent_ackLpEvent(event);
+		}
+	}
+}
+
+/* Our file operations table
+ */
+static struct cdrom_device_ops viocd_dops = {
+	open:viocd_open,
+	release:viocd_release,
+	media_changed:viocd_media_changed,
+	lock_door:viocd_lock_door,
+	capability:CDC_CLOSE_TRAY | CDC_OPEN_TRAY | CDC_LOCK | CDC_SELECT_SPEED | CDC_SELECT_DISC | CDC_MULTI_SESSION | CDC_MCN | CDC_MEDIA_CHANGED | CDC_PLAY_AUDIO | CDC_RESET | CDC_IOCTLS | CDC_DRIVE_STATUS | CDC_GENERIC_PACKET | CDC_CD_R | CDC_CD_RW | CDC_DVD | CDC_DVD_R | CDC_DVD_RAM
+};
+
+/* Handle reads from the proc file system
+ */
+static int proc_read(char *buf, char **start, off_t offset,
+		     int blen, int *eof, void *data)
+{
+	int len = 0;
+	int i;
+
+	for (i = 0; i < viocd_numdev; i++) {
+		len +=
+		    sprintf(buf + len,
+			    "viocd device %d is iSeries resource %10.10s type %4.4s, model %3.3s\n",
+			    i, viocd_unitinfo[i].rsrcname,
+			    viocd_unitinfo[i].type,
+			    viocd_unitinfo[i].model);
+	}
+	*eof = 1;
+	return len;
+}
+
+
+/* setup our proc file system entries
+ */
+void viocd_proc_init(struct proc_dir_entry *iSeries_proc)
+{
+	struct proc_dir_entry *ent;
+	ent = create_proc_entry("viocd", S_IFREG | S_IRUSR, iSeries_proc);
+	if (!ent)
+		return;
+	ent->nlink = 1;
+	ent->data = NULL;
+	ent->read_proc = proc_read;
+}
+
+/* clean up our proc file system entries
+ */
+void viocd_proc_delete(struct proc_dir_entry *iSeries_proc)
+{
+	remove_proc_entry("viocd", iSeries_proc);
+}
+
+static int find_capability(const char *type)
+{
+	struct capability_entry *entry;
+	for(entry = capability_table; entry->type; ++entry)
+		if(!strncmp(entry->type, type, 4))
+			break;
+	return entry->capability;
+}
+
+/* Initialize the whole device driver.  Handle module and non-module
+ * versions
+ */
+__init int viocd_init(void)
+{
+	int i, rc;
+
+	if (viopath_hostLp == HvLpIndexInvalid)
+		vio_set_hostlp();
+
+	/* If we don't have a host, bail out */
+	if (viopath_hostLp == HvLpIndexInvalid)
+		return -ENODEV;
+
+	rc = viopath_open(viopath_hostLp, viomajorsubtype_cdio, MAX_CD_REQ+2);
+	if (rc) {
+		printk(KERN_WARNING_VIO "error opening path to host partition %d\n",
+			   viopath_hostLp);
+		return rc;
+	}
+
+	/* Initialize our request handler
+	 */
+	rwreq = 0;
+	vio_setHandler(viomajorsubtype_cdio, vioHandleCDEvent);
+
+	memset(&viocd_diskinfo, 0x00, sizeof(viocd_diskinfo));
+
+	get_viocd_info();
+
+	if (viocd_numdev == 0) {
+		vio_clearHandler(viomajorsubtype_cdio);
+		viopath_close(viopath_hostLp, viomajorsubtype_cdio, MAX_CD_REQ+2);
+		return 0;
+	}
+
+	printk(KERN_INFO_VIO
+	       "%s: iSeries Virtual CD vers %s, major %d, max disks %d, hosting partition %d\n",
+	       DEVICE_NAME, VIOCD_VERS, MAJOR_NR, VIOCD_MAX_CD, viopath_hostLp);
+
+	if (devfs_register_blkdev(MAJOR_NR, "viocd", &viocd_fops) != 0) {
+		printk(KERN_WARNING_VIO "Unable to get major %d for viocd CD-ROM\n", MAJOR_NR);
+		return -EIO;
+	}
+
+	blksize_size[MAJOR_NR] = viocd_blocksizes;
+	blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
+	read_ahead[MAJOR_NR] = 4;
+
+	memset(&viocd_info, 0x00, sizeof(viocd_info));
+	for (i = 0; i < viocd_numdev; i++) {
+		viocd_info[i].dev = MKDEV(MAJOR_NR, i);
+		viocd_info[i].ops = &viocd_dops;
+		viocd_info[i].speed = 4;
+		viocd_info[i].capacity = 1;
+		viocd_info[i].mask = ~find_capability(viocd_unitinfo[i].type);
+		sprintf(viocd_info[i].name, VIOCD_DEVICE, VIOCD_DEVICE_OFFSET + i);
+		if (register_cdrom(&viocd_info[i]) != 0) {
+			printk(KERN_WARNING_VIO "Cannot register viocd CD-ROM %s!\n", viocd_info[i].name);
+		} else {
+			printk(KERN_INFO_VIO 
+			       "cd %s is iSeries resource %10.10s type %4.4s, model %3.3s\n",
+			       viocd_info[i].name,
+			       viocd_unitinfo[i].rsrcname,
+			       viocd_unitinfo[i].type,
+			       viocd_unitinfo[i].model);
+		}
+	}
+
+	/* 
+	 * Create the proc entry
+	 */
+	iSeries_proc_callback(&viocd_proc_init);
+
+	return 0;
+}
+
+#ifdef MODULE
+void viocd_exit(void)
+{
+	int i;
+	for (i = 0; i < viocd_numdev; i++) {
+		if (unregister_cdrom(&viocd_info[i]) != 0) {
+			printk(KERN_WARNING_VIO "Cannot unregister viocd CD-ROM %s!\n", viocd_info[i].name);
+		}
+	}
+	if ((devfs_unregister_blkdev(MAJOR_NR, "viocd") == -EINVAL)) {
+		printk(KERN_WARNING_VIO "can't unregister viocd\n");
+		return;
+	}
+	blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
+	if (viocd_unitinfo)
+		kfree(viocd_unitinfo);
+
+	iSeries_proc_callback(&viocd_proc_delete);
+
+	viopath_close(viopath_hostLp, viomajorsubtype_cdio, MAX_CD_REQ+2);
+	vio_clearHandler(viomajorsubtype_cdio);
+}
+#endif
+
+#ifdef MODULE
+module_init(viocd_init);
+module_exit(viocd_exit);
+MODULE_LICENSE("GPL");
+#endif
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/iseries/viocons.c linuxppc64_2_4/drivers/iseries/viocons.c
--- ../kernel.org/linux-2.4.19/drivers/iseries/viocons.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/drivers/iseries/viocons.c	Wed Dec 12 13:48:34 2001
@@ -0,0 +1,1401 @@
+/* -*- linux-c -*-
+ *  drivers/char/viocons.c
+ *
+ *  iSeries Virtual Terminal
+ *
+ *  Authors: Dave Boutcher <boutcher@us.ibm.com>
+ *           Ryan Arnold <ryanarn@us.ibm.com>
+ *           Colin Devilbiss <devilbis@us.ibm.com>
+ *
+ * (C) Copyright 2000 IBM Corporation
+ * 
+ * This program is free software;  you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) anyu later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.  
+ *
+ * You should have received a copy of the GNU General Public License 
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/errno.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/console.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <asm/ioctls.h>
+#include <linux/kd.h>
+
+#include "vio.h"
+
+#include <asm/iSeries/HvLpEvent.h>
+#include "asm/iSeries/HvCallEvent.h"
+#include "asm/iSeries/HvLpConfig.h"
+#include "asm/iSeries/HvCall.h"
+#include <asm/iSeries/iSeries_proc.h>
+
+/* Check that the tty_driver_data actually points to our stuff
+ */
+#define VIOTTY_PARANOIA_CHECK 1
+#define VIOTTY_MAGIC (0x0DCB)
+
+static int debug;
+
+static DECLARE_WAIT_QUEUE_HEAD(viocons_wait_queue);
+
+#define VTTY_PORTS 10
+#define VIOTTY_SERIAL_START 65
+
+static u64 sndMsgSeq[VTTY_PORTS];
+static u64 sndMsgAck[VTTY_PORTS];
+
+static spinlock_t consolelock = SPIN_LOCK_UNLOCKED;
+
+/* THe structure of the events that flow between us and OS/400.  You can't
+ * mess with this unless the OS/400 side changes too
+ */
+struct viocharlpevent {
+	struct HvLpEvent event;
+	u32 mReserved1;
+	u16 mVersion;
+	u16 mSubTypeRc;
+	u8 virtualDevice;
+	u8 immediateDataLen;
+	u8 immediateData[VIOCHAR_MAX_DATA];
+};
+
+#define viochar_window (10)
+#define viochar_highwatermark (3)
+
+enum viocharsubtype {
+	viocharopen = 0x0001,
+	viocharclose = 0x0002,
+	viochardata = 0x0003,
+	viocharack = 0x0004,
+	viocharconfig = 0x0005
+};
+
+enum viochar_rc {
+	viochar_rc_ebusy = 1
+};
+
+/* When we get writes faster than we can send it to the partition,
+ * buffer the data here.  There is one set of buffers for each virtual
+ * port.
+ * Note that bufferUsed is a bit map of used buffers.
+ * It had better have enough bits to hold NUM_BUF
+ * the bitops assume it is a multiple of unsigned long
+ */
+#define NUM_BUF (8)
+#define OVERFLOW_SIZE VIOCHAR_MAX_DATA
+
+static struct overflowBuffers {
+	unsigned long bufferUsed;
+	u8 *buffer[NUM_BUF];
+	int bufferBytes[NUM_BUF];
+	int curbuf;
+	int bufferOverflow;
+	int overflowMessage;
+} overflow[VTTY_PORTS];
+
+static void initDataEvent(struct viocharlpevent *viochar, HvLpIndex lp);
+
+static struct tty_driver viotty_driver;
+static struct tty_driver viottyS_driver;
+static int viotty_refcount;
+
+static struct tty_struct *viotty_table[VTTY_PORTS];
+static struct tty_struct *viottyS_table[VTTY_PORTS];
+static struct termios *viotty_termios[VTTY_PORTS];
+static struct termios *viottyS_termios[VTTY_PORTS];
+static struct termios *viotty_termios_locked[VTTY_PORTS];
+static struct termios *viottyS_termios_locked[VTTY_PORTS];
+
+void hvlog(char *fmt, ...)
+{
+	int i;
+	static char buf[256];
+	va_list args;
+	va_start(args, fmt);
+	i = vsprintf(buf, fmt, args);
+	va_end(args);
+	HvCall_writeLogBuffer(buf, i);
+	HvCall_writeLogBuffer("\r", 1);
+
+}
+
+/* Our port information.  We store a pointer to one entry in the
+ * tty_driver_data
+ */
+static struct port_info_tag {
+	int magic;
+	struct tty_struct *tty;
+	HvLpIndex lp;
+	u8 vcons;
+	u8 port;
+} port_info[VTTY_PORTS];
+
+/* Make sure we're pointing to a valid port_info structure.  Shamelessly
+ * plagerized from serial.c
+ */
+static inline int viotty_paranoia_check(struct port_info_tag *pi,
+					kdev_t device, const char *routine)
+{
+#ifdef VIOTTY_PARANOIA_CHECK
+	static const char *badmagic =
+	    "%s Warning: bad magic number for port_info struct (%s) in %s\n";
+	static const char *badinfo =
+	    "%s Warning: null port_info for (%s) in %s\n";
+
+	if (!pi) {
+		printk(badinfo, KERN_WARNING_VIO, kdevname(device),
+		       routine);
+		return 1;
+	}
+	if (pi->magic != VIOTTY_MAGIC) {
+		printk(badmagic, KERN_WARNING_VIO, kdevname(device),
+		       routine);
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+/*
+ * Handle reads from the proc file system.  Right now we just dump the
+ * state of the first TTY
+ */
+static int proc_read(char *buf, char **start, off_t offset,
+		     int blen, int *eof, void *data)
+{
+	int len = 0;
+	struct tty_struct *tty = viotty_table[0];
+	struct termios *termios;
+	if (tty == NULL) {
+		len += sprintf(buf + len, "no tty\n");
+		*eof = 1;
+		return len;
+	}
+
+	len +=
+	    sprintf(buf + len,
+		    "tty info: COOK_OUT %ld COOK_IN %ld, NO_WRITE_SPLIT %ld\n",
+		    tty->flags & TTY_HW_COOK_OUT,
+		    tty->flags & TTY_HW_COOK_IN,
+		    tty->flags & TTY_NO_WRITE_SPLIT);
+
+	termios = tty->termios;
+	if (termios == NULL) {
+		len += sprintf(buf + len, "no termios\n");
+		*eof = 1;
+		return len;
+	}
+	len += sprintf(buf + len, "INTR_CHAR     %2.2x\n", INTR_CHAR(tty));
+	len += sprintf(buf + len, "QUIT_CHAR     %2.2x\n", QUIT_CHAR(tty));
+	len +=
+	    sprintf(buf + len, "ERASE_CHAR    %2.2x\n", ERASE_CHAR(tty));
+	len += sprintf(buf + len, "KILL_CHAR     %2.2x\n", KILL_CHAR(tty));
+	len += sprintf(buf + len, "EOF_CHAR      %2.2x\n", EOF_CHAR(tty));
+	len += sprintf(buf + len, "TIME_CHAR     %2.2x\n", TIME_CHAR(tty));
+	len += sprintf(buf + len, "MIN_CHAR      %2.2x\n", MIN_CHAR(tty));
+	len += sprintf(buf + len, "SWTC_CHAR     %2.2x\n", SWTC_CHAR(tty));
+	len +=
+	    sprintf(buf + len, "START_CHAR    %2.2x\n", START_CHAR(tty));
+	len += sprintf(buf + len, "STOP_CHAR     %2.2x\n", STOP_CHAR(tty));
+	len += sprintf(buf + len, "SUSP_CHAR     %2.2x\n", SUSP_CHAR(tty));
+	len += sprintf(buf + len, "EOL_CHAR      %2.2x\n", EOL_CHAR(tty));
+	len +=
+	    sprintf(buf + len, "REPRINT_CHAR  %2.2x\n", REPRINT_CHAR(tty));
+	len +=
+	    sprintf(buf + len, "DISCARD_CHAR  %2.2x\n", DISCARD_CHAR(tty));
+	len +=
+	    sprintf(buf + len, "WERASE_CHAR   %2.2x\n", WERASE_CHAR(tty));
+	len +=
+	    sprintf(buf + len, "LNEXT_CHAR    %2.2x\n", LNEXT_CHAR(tty));
+	len += sprintf(buf + len, "EOL2_CHAR     %2.2x\n", EOL2_CHAR(tty));
+
+	len += sprintf(buf + len, "I_IGNBRK      %4.4x\n", I_IGNBRK(tty));
+	len += sprintf(buf + len, "I_BRKINT      %4.4x\n", I_BRKINT(tty));
+	len += sprintf(buf + len, "I_IGNPAR      %4.4x\n", I_IGNPAR(tty));
+	len += sprintf(buf + len, "I_PARMRK      %4.4x\n", I_PARMRK(tty));
+	len += sprintf(buf + len, "I_INPCK       %4.4x\n", I_INPCK(tty));
+	len += sprintf(buf + len, "I_ISTRIP      %4.4x\n", I_ISTRIP(tty));
+	len += sprintf(buf + len, "I_INLCR       %4.4x\n", I_INLCR(tty));
+	len += sprintf(buf + len, "I_IGNCR       %4.4x\n", I_IGNCR(tty));
+	len += sprintf(buf + len, "I_ICRNL       %4.4x\n", I_ICRNL(tty));
+	len += sprintf(buf + len, "I_IUCLC       %4.4x\n", I_IUCLC(tty));
+	len += sprintf(buf + len, "I_IXON        %4.4x\n", I_IXON(tty));
+	len += sprintf(buf + len, "I_IXANY       %4.4x\n", I_IXANY(tty));
+	len += sprintf(buf + len, "I_IXOFF       %4.4x\n", I_IXOFF(tty));
+	len += sprintf(buf + len, "I_IMAXBEL     %4.4x\n", I_IMAXBEL(tty));
+
+	len += sprintf(buf + len, "O_OPOST       %4.4x\n", O_OPOST(tty));
+	len += sprintf(buf + len, "O_OLCUC       %4.4x\n", O_OLCUC(tty));
+	len += sprintf(buf + len, "O_ONLCR       %4.4x\n", O_ONLCR(tty));
+	len += sprintf(buf + len, "O_OCRNL       %4.4x\n", O_OCRNL(tty));
+	len += sprintf(buf + len, "O_ONOCR       %4.4x\n", O_ONOCR(tty));
+	len += sprintf(buf + len, "O_ONLRET      %4.4x\n", O_ONLRET(tty));
+	len += sprintf(buf + len, "O_OFILL       %4.4x\n", O_OFILL(tty));
+	len += sprintf(buf + len, "O_OFDEL       %4.4x\n", O_OFDEL(tty));
+	len += sprintf(buf + len, "O_NLDLY       %4.4x\n", O_NLDLY(tty));
+	len += sprintf(buf + len, "O_CRDLY       %4.4x\n", O_CRDLY(tty));
+	len += sprintf(buf + len, "O_TABDLY      %4.4x\n", O_TABDLY(tty));
+	len += sprintf(buf + len, "O_BSDLY       %4.4x\n", O_BSDLY(tty));
+	len += sprintf(buf + len, "O_VTDLY       %4.4x\n", O_VTDLY(tty));
+	len += sprintf(buf + len, "O_FFDLY       %4.4x\n", O_FFDLY(tty));
+
+	len += sprintf(buf + len, "C_BAUD        %4.4x\n", C_BAUD(tty));
+	len += sprintf(buf + len, "C_CSIZE       %4.4x\n", C_CSIZE(tty));
+	len += sprintf(buf + len, "C_CSTOPB      %4.4x\n", C_CSTOPB(tty));
+	len += sprintf(buf + len, "C_CREAD       %4.4x\n", C_CREAD(tty));
+	len += sprintf(buf + len, "C_PARENB      %4.4x\n", C_PARENB(tty));
+	len += sprintf(buf + len, "C_PARODD      %4.4x\n", C_PARODD(tty));
+	len += sprintf(buf + len, "C_HUPCL       %4.4x\n", C_HUPCL(tty));
+	len += sprintf(buf + len, "C_CLOCAL      %4.4x\n", C_CLOCAL(tty));
+	len += sprintf(buf + len, "C_CRTSCTS     %4.4x\n", C_CRTSCTS(tty));
+
+	len += sprintf(buf + len, "L_ISIG        %4.4x\n", L_ISIG(tty));
+	len += sprintf(buf + len, "L_ICANON      %4.4x\n", L_ICANON(tty));
+	len += sprintf(buf + len, "L_XCASE       %4.4x\n", L_XCASE(tty));
+	len += sprintf(buf + len, "L_ECHO        %4.4x\n", L_ECHO(tty));
+	len += sprintf(buf + len, "L_ECHOE       %4.4x\n", L_ECHOE(tty));
+	len += sprintf(buf + len, "L_ECHOK       %4.4x\n", L_ECHOK(tty));
+	len += sprintf(buf + len, "L_ECHONL      %4.4x\n", L_ECHONL(tty));
+	len += sprintf(buf + len, "L_NOFLSH      %4.4x\n", L_NOFLSH(tty));
+	len += sprintf(buf + len, "L_TOSTOP      %4.4x\n", L_TOSTOP(tty));
+	len += sprintf(buf + len, "L_ECHOCTL     %4.4x\n", L_ECHOCTL(tty));
+	len += sprintf(buf + len, "L_ECHOPRT     %4.4x\n", L_ECHOPRT(tty));
+	len += sprintf(buf + len, "L_ECHOKE      %4.4x\n", L_ECHOKE(tty));
+	len += sprintf(buf + len, "L_FLUSHO      %4.4x\n", L_FLUSHO(tty));
+	len += sprintf(buf + len, "L_PENDIN      %4.4x\n", L_PENDIN(tty));
+	len += sprintf(buf + len, "L_IEXTEN      %4.4x\n", L_IEXTEN(tty));
+
+	*eof = 1;
+	return len;
+}
+
+/*
+ * Handle writes to our proc file system.  Right now just turns on and off
+ * our debug flag
+ */
+static int proc_write(struct file *file, const char *buffer,
+		      unsigned long count, void *data)
+{
+	if (count) {
+		if (buffer[0] == '1') {
+			printk("viocons: debugging on\n");
+			debug = 1;
+		} else {
+			printk("viocons: debugging off\n");
+			debug = 0;
+		}
+	}
+	return count;
+}
+
+/*
+ * setup our proc file system entries
+ */
+void viocons_proc_init(struct proc_dir_entry *iSeries_proc)
+{
+	struct proc_dir_entry *ent;
+	ent =
+	    create_proc_entry("viocons", S_IFREG | S_IRUSR, iSeries_proc);
+	if (!ent)
+		return;
+	ent->nlink = 1;
+	ent->data = NULL;
+	ent->read_proc = proc_read;
+	ent->write_proc = proc_write;
+}
+
+/*
+ * clean up our proc file system entries
+ */
+void viocons_proc_delete(struct proc_dir_entry *iSeries_proc)
+{
+	remove_proc_entry("viocons", iSeries_proc);
+}
+
+/*
+ * Add data to our pending-send buffers.  
+ *
+ * NOTE: Don't use printk in here because it gets nastily recursive.  hvlog can be
+ * used to log to the hypervisor buffer
+ */
+static int bufferAdd(u8 port, const char *buf, size_t len, int userFlag)
+{
+	size_t bleft = len;
+	size_t curlen;
+	char *cbuf = (char *) buf;
+	int nextbuf;
+	struct overflowBuffers *pov = &overflow[port];
+	while (bleft > 0) {
+		/* If there is no space left in the current buffer, we have
+		 * filled everything up, so return.  If we filled the previous
+		 * buffer we would already have moved to the next one.
+		 */
+		if (pov->bufferBytes[pov->curbuf] == OVERFLOW_SIZE) {
+			hvlog("buffer %d full.  no more space\n",
+			      pov->curbuf);
+			pov->bufferOverflow++;
+			pov->overflowMessage = 1;
+			return len - bleft;
+		}
+
+		/* Turn on the "used" bit for this buffer.  If it's already on, that's
+		 * fine.
+		 */
+		set_bit(pov->curbuf, &pov->bufferUsed);
+
+		/* 
+		 * See if this buffer has been allocated.  If not, allocate it
+		 */
+		if (pov->buffer[pov->curbuf] == NULL)
+			pov->buffer[pov->curbuf] =
+			    kmalloc(OVERFLOW_SIZE, GFP_ATOMIC);
+
+		/*
+		 * Figure out how much we can copy into this buffer
+		 */
+		if (bleft <
+		    (OVERFLOW_SIZE - pov->bufferBytes[pov->curbuf]))
+			curlen = bleft;
+		else
+			curlen =
+			    OVERFLOW_SIZE - pov->bufferBytes[pov->curbuf];
+
+		/*
+		 * Copy the data into the buffer                      
+		 */
+		if (userFlag)
+			copy_from_user(pov->buffer[pov->curbuf] +
+				       pov->bufferBytes[pov->curbuf], cbuf,
+				       curlen);
+		else
+			memcpy(pov->buffer[pov->curbuf] +
+			       pov->bufferBytes[pov->curbuf], cbuf,
+			       curlen);
+
+		pov->bufferBytes[pov->curbuf] += curlen;
+		cbuf += curlen;
+		bleft -= curlen;
+
+		/*
+		 * Now see if we've filled this buffer
+		 */
+		if (pov->bufferBytes[pov->curbuf] == OVERFLOW_SIZE) {
+			nextbuf = (pov->curbuf + 1) % NUM_BUF;
+
+			/*
+			 * Move to the next buffer if it hasn't been used yet
+			 */
+			if (test_bit(nextbuf, &pov->bufferUsed) == 0) {
+				pov->curbuf = nextbuf;
+			}
+		}
+	}
+	return len;
+}
+
+/* Send pending data
+ *
+ * NOTE: Don't use printk in here because it gets nastily recursive.  hvlog can be
+ * used to log to the hypervisor buffer
+ */
+void sendBuffers(u8 port, HvLpIndex lp)
+{
+	HvLpEvent_Rc hvrc;
+	int nextbuf;
+	struct viocharlpevent *viochar;
+	unsigned long flags;
+	struct overflowBuffers *pov = &overflow[port];
+
+	spin_lock_irqsave(&consolelock, flags);
+
+	viochar = (struct viocharlpevent *)
+	    vio_get_event_buffer(viomajorsubtype_chario);
+
+	/* Make sure we got a buffer
+	 */
+	if (viochar == NULL) {
+		hvlog("Yikes...can't get viochar buffer");
+		spin_unlock_irqrestore(&consolelock, flags);
+		return;
+	}
+
+	if (pov->bufferUsed == 0) {
+		hvlog("in sendbuffers, but no buffers used\n");
+		vio_free_event_buffer(viomajorsubtype_chario, viochar);
+		spin_unlock_irqrestore(&consolelock, flags);
+		return;
+	}
+
+	/*
+	 * curbuf points to the buffer we're filling.  We want to start sending AFTER
+	 * this one.  
+	 */
+	nextbuf = (pov->curbuf + 1) % NUM_BUF;
+
+	/*
+	 * Loop until we find a buffer with the bufferUsed bit on
+	 */
+	while (test_bit(nextbuf, &pov->bufferUsed) == 0)
+		nextbuf = (nextbuf + 1) % NUM_BUF;
+
+	initDataEvent(viochar, lp);
+
+	/*
+	 * While we have buffers with data, and our send window is open, send them
+	 */
+	while ((test_bit(nextbuf, &pov->bufferUsed)) &&
+	       ((sndMsgSeq[port] - sndMsgAck[port]) < viochar_window)) {
+		viochar->immediateDataLen = pov->bufferBytes[nextbuf];
+		viochar->event.xCorrelationToken = sndMsgSeq[port]++;
+		viochar->event.xSizeMinus1 =
+		    offsetof(struct viocharlpevent,
+			     immediateData) + viochar->immediateDataLen;
+
+		memcpy(viochar->immediateData, pov->buffer[nextbuf],
+		       viochar->immediateDataLen);
+
+		hvrc = HvCallEvent_signalLpEvent(&viochar->event);
+		if (hvrc) {
+			/*
+			 * MUST unlock the spinlock before doing a printk
+			 */
+			vio_free_event_buffer(viomajorsubtype_chario,
+					      viochar);
+			spin_unlock_irqrestore(&consolelock, flags);
+
+			printk(KERN_WARNING_VIO
+			       "console error sending event! return code %d\n",
+			       (int) hvrc);
+			return;
+		}
+
+		/*
+		 * clear the bufferUsed bit, zero the number of bytes in this buffer,
+		 * and move to the next buffer
+		 */
+		clear_bit(nextbuf, &pov->bufferUsed);
+		pov->bufferBytes[nextbuf] = 0;
+		nextbuf = (nextbuf + 1) % NUM_BUF;
+	}
+
+
+	/*
+	 * If we have emptied all the buffers, start at 0 again.
+	 * this will re-use any allocated buffers
+	 */
+	if (pov->bufferUsed == 0) {
+		pov->curbuf = 0;
+
+		if (pov->overflowMessage)
+			pov->overflowMessage = 0;
+
+		if (port_info[port].tty) {
+			if ((port_info[port].tty->
+			     flags & (1 << TTY_DO_WRITE_WAKEUP))
+			    && (port_info[port].tty->ldisc.write_wakeup))
+				(port_info[port].tty->ldisc.
+				 write_wakeup) (port_info[port].tty);
+			wake_up_interruptible(&port_info[port].tty->
+					      write_wait);
+		}
+	}
+
+	vio_free_event_buffer(viomajorsubtype_chario, viochar);
+	spin_unlock_irqrestore(&consolelock, flags);
+
+}
+
+/* Our internal writer.  Gets called both from the console device and
+ * the tty device.  the tty pointer will be NULL if called from the console.
+ *
+ * NOTE: Don't use printk in here because it gets nastily recursive.  hvlog can be
+ * used to log to the hypervisor buffer
+ */
+static int internal_write(struct tty_struct *tty, const char *buf,
+			  size_t len, int userFlag)
+{
+	HvLpEvent_Rc hvrc;
+	size_t bleft = len;
+	size_t curlen;
+	const char *curbuf = buf;
+	struct viocharlpevent *viochar;
+	unsigned long flags;
+	struct port_info_tag *pi = NULL;
+	HvLpIndex lp;
+	u8 port;
+
+	if (tty) {
+		pi = (struct port_info_tag *) tty->driver_data;
+
+		if (!pi
+		    || viotty_paranoia_check(pi, tty->device,
+					     "viotty_internal_write"))
+			return -ENODEV;
+
+		lp = pi->lp;
+		port = pi->port;
+	} else {
+		/* If this is the console device, use the lp from the first port entry
+		 */
+		port = 0;
+		lp = port_info[0].lp;
+	}
+
+	/* Always put console output in the hypervisor console log
+	 */
+	if (port == 0)
+		HvCall_writeLogBuffer(buf, len);
+
+	/* If the path to this LP is closed, don't bother doing anything more.
+	 * just dump the data on the floor
+	 */
+	if (!viopath_isactive(lp))
+		return len;
+
+	/*
+	 * If there is already data queued for this port, send it
+	 */
+	if (overflow[port].bufferUsed)
+		sendBuffers(port, lp);
+
+	spin_lock_irqsave(&consolelock, flags);
+
+	viochar = (struct viocharlpevent *)
+	    vio_get_event_buffer(viomajorsubtype_chario);
+	/* Make sure we got a buffer
+	 */
+	if (viochar == NULL) {
+		hvlog("Yikes...can't get viochar buffer");
+		spin_unlock_irqrestore(&consolelock, flags);
+		return -1;
+	}
+
+	initDataEvent(viochar, lp);
+
+	/* Got the lock, don't cause console output */
+	while ((bleft > 0) &&
+	       (overflow[port].bufferUsed == 0) &&
+	       ((sndMsgSeq[port] - sndMsgAck[port]) < viochar_window)) {
+		if (bleft > VIOCHAR_MAX_DATA)
+			curlen = VIOCHAR_MAX_DATA;
+		else
+			curlen = bleft;
+
+		viochar->immediateDataLen = curlen;
+		viochar->event.xCorrelationToken = sndMsgSeq[port]++;
+
+		if (userFlag)
+			copy_from_user(viochar->immediateData, curbuf,
+				       curlen);
+		else
+			memcpy(viochar->immediateData, curbuf, curlen);
+
+		viochar->event.xSizeMinus1 =
+		    offsetof(struct viocharlpevent,
+			     immediateData) + curlen;
+
+		hvrc = HvCallEvent_signalLpEvent(&viochar->event);
+		if (hvrc) {
+			/*
+			 * MUST unlock the spinlock before doing a printk
+			 */
+			vio_free_event_buffer(viomajorsubtype_chario,
+					      viochar);
+			spin_unlock_irqrestore(&consolelock, flags);
+
+			hvlog("viocons: error sending event! %d\n",
+			      (int) hvrc);
+			return len - bleft;
+		}
+
+		curbuf += curlen;
+		bleft -= curlen;
+	}
+
+	/*
+	 * If we didn't send it all, buffer it
+	 */
+	if (bleft > 0) {
+		bleft -= bufferAdd(port, curbuf, bleft, userFlag);
+	}
+	vio_free_event_buffer(viomajorsubtype_chario, viochar);
+	spin_unlock_irqrestore(&consolelock, flags);
+
+	return len - bleft;
+}
+
+/* Initialize the common fields in a charLpEvent
+ */
+static void initDataEvent(struct viocharlpevent *viochar, HvLpIndex lp)
+{
+	memset(viochar, 0x00, sizeof(struct viocharlpevent));
+
+	viochar->event.xFlags.xValid = 1;
+	viochar->event.xFlags.xFunction = HvLpEvent_Function_Int;
+	viochar->event.xFlags.xAckInd = HvLpEvent_AckInd_NoAck;
+	viochar->event.xFlags.xAckType = HvLpEvent_AckType_DeferredAck;
+	viochar->event.xType = HvLpEvent_Type_VirtualIo;
+	viochar->event.xSubtype = viomajorsubtype_chario | viochardata;
+	viochar->event.xSourceLp = HvLpConfig_getLpIndex();
+	viochar->event.xTargetLp = lp;
+	viochar->event.xSizeMinus1 = sizeof(struct viocharlpevent);
+	viochar->event.xSourceInstanceId = viopath_sourceinst(lp);
+	viochar->event.xTargetInstanceId = viopath_targetinst(lp);
+}
+
+
+/* console device write
+ */
+static void viocons_write(struct console *co, const char *s,
+			  unsigned count)
+{
+	/* This parser will ensure that all single instances of either \n or \r are
+	 * matched into carriage return/line feed combinations.  It also allows for
+	 * instances where there already exist \n\r combinations as well as the
+	 * reverse, \r\n combinations.
+	 */
+
+	int index;
+	char charptr[1];
+	int foundcr;
+	int slicebegin;
+	int sliceend;
+
+	foundcr = 0;
+	slicebegin = 0;
+	sliceend = 0;
+
+	for (index = 0; index < count; index++) {
+		if (!foundcr && s[index] == 0x0a) {
+			if ((slicebegin - sliceend > 0)
+			    && sliceend < count) {
+				internal_write(NULL, &s[slicebegin],
+					       sliceend - slicebegin, 0);
+				slicebegin = sliceend;
+			}
+			charptr[0] = '\r';
+			internal_write(NULL, charptr, 1, 0);
+		}
+		if (foundcr && s[index] != 0x0a) {
+			if ((index - 2) >= 0) {
+				if (s[index - 2] != 0x0a) {
+					internal_write(NULL,
+						       &s[slicebegin],
+						       sliceend -
+						       slicebegin, 0);
+					slicebegin = sliceend;
+					charptr[0] = '\n';
+					internal_write(NULL, charptr, 1,
+						       0);
+				}
+			}
+		}
+		sliceend++;
+
+		if (s[index] == 0x0d)
+			foundcr = 1;
+		else
+			foundcr = 0;
+	}
+
+	internal_write(NULL, &s[slicebegin], sliceend - slicebegin, 0);
+
+	if (count > 1) {
+		if (foundcr == 1 && s[count - 1] != 0x0a) {
+			charptr[0] = '\n';
+			internal_write(NULL, charptr, 1, 0);
+		} else if (s[count - 1] == 0x0a && s[count - 2] != 0x0d) {
+
+			charptr[0] = '\r';
+			internal_write(NULL, charptr, 1, 0);
+		}
+	}
+}
+
+/* Work out a the device associate with this console
+ */
+static kdev_t viocons_device(struct console *c)
+{
+	return MKDEV(TTY_MAJOR, c->index + viotty_driver.minor_start);
+}
+
+/* console device read method
+ */
+static int viocons_read(struct console *co, const char *s, unsigned count)
+{
+	printk(KERN_DEBUG_VIO "viocons_read\n");
+	// Implement me
+	interruptible_sleep_on(&viocons_wait_queue);
+	return 0;
+}
+
+/* console device wait until a key is pressed
+ */
+static int viocons_wait_key(struct console *co)
+{
+	printk(KERN_DEBUG_VIO "In viocons_wait_key\n");
+	// Implement me
+	interruptible_sleep_on(&viocons_wait_queue);
+	return 0;
+}
+
+/* Do console device setup
+ */
+static int __init viocons_setup(struct console *co, char *options)
+{
+	return 0;
+}
+
+/* console device I/O methods
+ */
+static struct console viocons = {
+	name:"ttyS",
+	write:viocons_write,
+	read:viocons_read,
+	device:viocons_device,
+	wait_key:viocons_wait_key,
+	setup:viocons_setup,
+	flags:CON_PRINTBUFFER,
+};
+
+
+/* TTY Open method
+ */
+static int viotty_open(struct tty_struct *tty, struct file *filp)
+{
+	int port;
+	unsigned long flags;
+	MOD_INC_USE_COUNT;
+	port = MINOR(tty->device) - tty->driver.minor_start;
+
+	if (port >= VIOTTY_SERIAL_START)
+		port -= VIOTTY_SERIAL_START;
+
+	if ((port < 0) || (port >= VTTY_PORTS)) {
+		MOD_DEC_USE_COUNT;
+		return -ENODEV;
+	}
+
+	spin_lock_irqsave(&consolelock, flags);
+
+	/*
+	 * If some other TTY is already connected here, reject the open
+	 */
+	if ((port_info[port].tty) && (port_info[port].tty != tty)) {
+		spin_unlock_irqrestore(&consolelock, flags);
+		MOD_DEC_USE_COUNT;
+		printk(KERN_WARNING_VIO
+		       "console attempt to open device twice from different ttys\n");
+		return -EBUSY;
+	}
+	tty->driver_data = &port_info[port];
+	port_info[port].tty = tty;
+	spin_unlock_irqrestore(&consolelock, flags);
+
+	return 0;
+}
+
+/* TTY Close method
+ */
+static void viotty_close(struct tty_struct *tty, struct file *filp)
+{
+	unsigned long flags;
+	struct port_info_tag *pi =
+	    (struct port_info_tag *) tty->driver_data;
+
+	if (!pi || viotty_paranoia_check(pi, tty->device, "viotty_close"))
+		return;
+
+	spin_lock_irqsave(&consolelock, flags);
+	if (tty->count == 1) {
+		pi->tty = NULL;
+	}
+
+	spin_unlock_irqrestore(&consolelock, flags);
+
+	MOD_DEC_USE_COUNT;
+}
+
+/* TTY Write method
+ */
+static int viotty_write(struct tty_struct *tty, int from_user,
+			const unsigned char *buf, int count)
+{
+	return internal_write(tty, buf, count, from_user);
+}
+
+/* TTY put_char method
+ */
+static void viotty_put_char(struct tty_struct *tty, unsigned char ch)
+{
+	internal_write(tty, &ch, 1, 0);
+}
+
+/* TTY flush_chars method
+ */
+static void viotty_flush_chars(struct tty_struct *tty)
+{
+}
+
+/* TTY write_room method
+ */
+static int viotty_write_room(struct tty_struct *tty)
+{
+	int i;
+	int room = 0;
+	struct port_info_tag *pi =
+	    (struct port_info_tag *) tty->driver_data;
+
+	if (!pi
+	    || viotty_paranoia_check(pi, tty->device,
+				     "viotty_sendbuffers"))
+		return 0;
+
+	// If no buffers are used, return the max size
+	if (overflow[pi->port].bufferUsed == 0)
+		return VIOCHAR_MAX_DATA * NUM_BUF;
+
+	for (i = 0; ((i < NUM_BUF) && (room < VIOCHAR_MAX_DATA)); i++) {
+		room +=
+		    (OVERFLOW_SIZE - overflow[pi->port].bufferBytes[i]);
+	}
+
+	if (room > VIOCHAR_MAX_DATA)
+		return VIOCHAR_MAX_DATA;
+	else
+		return room;
+}
+
+/* TTY chars_in_buffer_room method
+ */
+static int viotty_chars_in_buffer(struct tty_struct *tty)
+{
+	return 0;
+}
+
+static void viotty_flush_buffer(struct tty_struct *tty)
+{
+}
+
+static int viotty_ioctl(struct tty_struct *tty, struct file *file,
+			unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+		/* the ioctls below read/set the flags usually shown in the leds */
+		/* don't use them - they will go away without warning */
+	case KDGETLED:
+	case KDGKBLED:
+		return put_user(0, (char *) arg);
+
+	case KDSKBLED:
+		return 0;
+	}
+
+	return n_tty_ioctl(tty, file, cmd, arg);
+}
+
+static void viotty_throttle(struct tty_struct *tty)
+{
+}
+
+static void viotty_unthrottle(struct tty_struct *tty)
+{
+}
+
+static void viotty_set_termios(struct tty_struct *tty,
+			       struct termios *old_termios)
+{
+}
+
+static void viotty_stop(struct tty_struct *tty)
+{
+}
+
+static void viotty_start(struct tty_struct *tty)
+{
+}
+
+static void viotty_hangup(struct tty_struct *tty)
+{
+}
+
+static void viotty_break(struct tty_struct *tty, int break_state)
+{
+}
+
+static void viotty_send_xchar(struct tty_struct *tty, char ch)
+{
+}
+
+static void viotty_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+}
+
+/* Handle an open charLpEvent.  Could be either interrupt or ack
+ */
+static void vioHandleOpenEvent(struct HvLpEvent *event)
+{
+	unsigned long flags;
+	u8 eventRc;
+	u16 eventSubtypeRc;
+	struct viocharlpevent *cevent = (struct viocharlpevent *) event;
+	u8 port = cevent->virtualDevice;
+
+	if (event->xFlags.xFunction == HvLpEvent_Function_Ack) {
+		if (port >= VTTY_PORTS)
+			return;
+
+		spin_lock_irqsave(&consolelock, flags);
+		/* Got the lock, don't cause console output */
+
+		if (event->xRc == HvLpEvent_Rc_Good) {
+			sndMsgSeq[port] = sndMsgAck[port] = 0;
+		}
+
+		port_info[port].lp = event->xTargetLp;
+
+		spin_unlock_irqrestore(&consolelock, flags);
+
+		if (event->xCorrelationToken != 0) {
+			unsigned long semptr = event->xCorrelationToken;
+			up((struct semaphore *) semptr);
+		} else
+			printk(KERN_WARNING_VIO
+			       "console: wierd...got open ack without semaphore\n");
+	} else {
+		/* This had better require an ack, otherwise complain
+		 */
+		if (event->xFlags.xAckInd != HvLpEvent_AckInd_DoAck) {
+			printk(KERN_WARNING_VIO
+			       "console: viocharopen without ack bit!\n");
+			return;
+		}
+
+		spin_lock_irqsave(&consolelock, flags);
+		/* Got the lock, don't cause console output */
+
+		/* Make sure this is a good virtual tty */
+		if (port >= VTTY_PORTS) {
+			eventRc = HvLpEvent_Rc_SubtypeError;
+			eventSubtypeRc = viorc_openRejected;
+		}
+
+		/* If this is tty is already connected to a different
+		   partition, fail */
+		else if ((port_info[port].lp != HvLpIndexInvalid) &&
+			 (port_info[port].lp != event->xSourceLp)) {
+			eventRc = HvLpEvent_Rc_SubtypeError;
+			eventSubtypeRc = viorc_openRejected;
+		} else {
+			port_info[port].lp = event->xSourceLp;
+			eventRc = HvLpEvent_Rc_Good;
+			eventSubtypeRc = viorc_good;
+			sndMsgSeq[port] = sndMsgAck[port] = 0;
+		}
+
+		spin_unlock_irqrestore(&consolelock, flags);
+
+		/* Return the acknowledgement */
+		HvCallEvent_ackLpEvent(event);
+	}
+}
+
+/* Handle a close open charLpEvent.  Could be either interrupt or ack
+ */
+static void vioHandleCloseEvent(struct HvLpEvent *event)
+{
+	unsigned long flags;
+	struct viocharlpevent *cevent = (struct viocharlpevent *) event;
+	u8 port = cevent->virtualDevice;
+
+	if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
+		if (port >= VTTY_PORTS)
+			return;
+
+		/* For closes, just mark the console partition invalid */
+		spin_lock_irqsave(&consolelock, flags);
+		/* Got the lock, don't cause console output */
+
+		if (port_info[port].lp == event->xSourceLp)
+			port_info[port].lp = HvLpIndexInvalid;
+
+		spin_unlock_irqrestore(&consolelock, flags);
+		printk(KERN_INFO_VIO
+		       "console close from %d\n", event->xSourceLp);
+	} else {
+		printk(KERN_WARNING_VIO
+		       "console got unexpected close acknowlegement\n");
+	}
+}
+
+/* Handle a config charLpEvent.  Could be either interrupt or ack
+ */
+static void vioHandleConfig(struct HvLpEvent *event)
+{
+	struct viocharlpevent *cevent = (struct viocharlpevent *) event;
+	int len;
+
+	len = cevent->immediateDataLen;
+	HvCall_writeLogBuffer(cevent->immediateData,
+			      cevent->immediateDataLen);
+
+	if (cevent->immediateData[0] == 0x01) {
+		printk(KERN_INFO_VIO
+		       "console window resized to %d: %d: %d: %d\n",
+		       cevent->immediateData[1],
+		       cevent->immediateData[2],
+		       cevent->immediateData[3], cevent->immediateData[4]);
+	} else {
+		printk(KERN_WARNING_VIO "console unknown config event\n");
+	}
+	return;
+}
+
+/* Handle a data charLpEvent. 
+ */
+static void vioHandleData(struct HvLpEvent *event)
+{
+	struct tty_struct *tty;
+	struct viocharlpevent *cevent = (struct viocharlpevent *) event;
+	struct port_info_tag *pi;
+	int len;
+	u8 port = cevent->virtualDevice;
+
+	if (port >= VTTY_PORTS) {
+		printk(KERN_WARNING_VIO
+		       "console data on invalid virtual device %d\n",
+		       port);
+		return;
+	}
+
+	tty = port_info[port].tty;
+
+	if (tty == NULL) {
+		printk(KERN_WARNING_VIO
+		       "no tty for virtual device %d\n", port);
+		return;
+	}
+
+	if (tty->magic != TTY_MAGIC) {
+		printk(KERN_WARNING_VIO "tty bad magic\n");
+		return;
+	}
+
+	/*
+	 * Just to be paranoid, make sure the tty points back to this port
+	 */
+	pi = (struct port_info_tag *) tty->driver_data;
+
+	if (!pi || viotty_paranoia_check(pi, tty->device, "vioHandleData"))
+		return;
+
+	len = cevent->immediateDataLen;
+
+	if (len == 0)
+		return;
+
+	/*
+	 * Log port 0 data to the hypervisor log
+	 */
+	if (port == 0)
+		HvCall_writeLogBuffer(cevent->immediateData,
+				      cevent->immediateDataLen);
+
+	/* Don't copy more bytes than there is room for in the buffer */
+	if (tty->flip.count + len > TTY_FLIPBUF_SIZE) {
+		len = TTY_FLIPBUF_SIZE - tty->flip.count;
+		printk(KERN_WARNING_VIO
+		       "console input buffer overflow!\n");
+	}
+
+	memcpy(tty->flip.char_buf_ptr, cevent->immediateData, len);
+	memset(tty->flip.flag_buf_ptr, TTY_NORMAL, len);
+
+	/* Update the kernel buffer end */
+	tty->flip.count += len;
+	tty->flip.char_buf_ptr += len;
+
+	tty->flip.flag_buf_ptr += len;
+
+	tty_flip_buffer_push(tty);
+}
+
+/* Handle an ack charLpEvent. 
+ */
+static void vioHandleAck(struct HvLpEvent *event)
+{
+	struct viocharlpevent *cevent = (struct viocharlpevent *) event;
+	unsigned long flags;
+	u8 port = cevent->virtualDevice;
+
+	if (port >= VTTY_PORTS) {
+		printk(KERN_WARNING_VIO
+		       "viocons: data on invalid virtual device\n");
+		return;
+	}
+
+	spin_lock_irqsave(&consolelock, flags);
+	sndMsgAck[port] = event->xCorrelationToken;
+	spin_unlock_irqrestore(&consolelock, flags);
+
+	if (overflow[port].bufferUsed)
+		sendBuffers(port, port_info[port].lp);
+}
+
+/* Handle charLpEvents and route to the appropriate routine
+ */
+static void vioHandleCharEvent(struct HvLpEvent *event)
+{
+	int charminor;
+
+	if (event == NULL) {
+		return;
+	}
+	charminor = event->xSubtype & VIOMINOR_SUBTYPE_MASK;
+	switch (charminor) {
+	case viocharopen:
+		vioHandleOpenEvent(event);
+		break;
+	case viocharclose:
+		vioHandleCloseEvent(event);
+		break;
+	case viochardata:
+		vioHandleData(event);
+		break;
+	case viocharack:
+		vioHandleAck(event);
+		break;
+	case viocharconfig:
+		vioHandleConfig(event);
+		break;
+	default:
+		if ((event->xFlags.xFunction == HvLpEvent_Function_Int) &&
+		    (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) {
+			event->xRc = HvLpEvent_Rc_InvalidSubtype;
+			HvCallEvent_ackLpEvent(event);
+		}
+	}
+}
+
+/* Send an open event
+ */
+static int viocons_sendOpen(HvLpIndex remoteLp, u8 port, void *sem)
+{
+	return HvCallEvent_signalLpEventFast(remoteLp,
+					     HvLpEvent_Type_VirtualIo,
+					     viomajorsubtype_chario
+					     | viocharopen,
+					     HvLpEvent_AckInd_DoAck,
+					     HvLpEvent_AckType_ImmediateAck,
+					     viopath_sourceinst
+					     (remoteLp),
+					     viopath_targetinst
+					     (remoteLp),
+					     (u64) (unsigned long)
+					     sem, VIOVERSION << 16,
+					     ((u64) port << 48), 0, 0, 0);
+
+}
+
+int __init viocons_init2(void)
+{
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	int rc;
+
+	/*
+	 * Now open to the primary LP
+	 */
+	printk(KERN_INFO_VIO "console open path to primary\n");
+	rc = viopath_open(HvLpConfig_getPrimaryLpIndex(), viomajorsubtype_chario, viochar_window + 2);	/* +2 for fudge */
+	if (rc) {
+		printk(KERN_WARNING_VIO
+		       "console error opening to primary %d\n", rc);
+	}
+
+	if (viopath_hostLp == HvLpIndexInvalid) {
+		vio_set_hostlp();
+	}
+
+	/*
+	 * And if the primary is not the same as the hosting LP, open to the 
+	 * hosting lp
+	 */
+	if ((viopath_hostLp != HvLpIndexInvalid) &&
+	    (viopath_hostLp != HvLpConfig_getPrimaryLpIndex())) {
+		printk(KERN_INFO_VIO
+		       "console open path to hosting (%d)\n",
+		       viopath_hostLp);
+		rc = viopath_open(viopath_hostLp, viomajorsubtype_chario, viochar_window + 2);	/* +2 for fudge */
+		if (rc) {
+			printk(KERN_WARNING_VIO
+			       "console error opening to partition %d: %d\n",
+			       viopath_hostLp, rc);
+		}
+	}
+
+	if (vio_setHandler(viomajorsubtype_chario, vioHandleCharEvent) < 0) {
+		printk(KERN_WARNING_VIO
+		       "Error seting handler for console events!\n");
+	}
+
+	printk(KERN_INFO_VIO "console major number is %d\n", TTY_MAJOR);
+
+	/* First, try to open the console to the hosting lp.
+	 * Wait on a semaphore for the response.
+	 */
+	if ((viopath_isactive(viopath_hostLp)) &&
+	    (viocons_sendOpen(viopath_hostLp, 0, &Semaphore) == 0)) {
+		printk(KERN_INFO_VIO
+		       "opening console to hosting partition %d\n",
+		       viopath_hostLp);
+		down(&Semaphore);
+	}
+
+	/*
+	 * If we don't have an active console, try the primary
+	 */
+	if ((!viopath_isactive(port_info[0].lp)) &&
+	    (viopath_isactive(HvLpConfig_getPrimaryLpIndex())) &&
+	    (viocons_sendOpen
+	     (HvLpConfig_getPrimaryLpIndex(), 0, &Semaphore) == 0)) {
+		printk(KERN_INFO_VIO
+		       "opening console to primary partition\n");
+		down(&Semaphore);
+	}
+
+	/* Initialize the tty_driver structure */
+	memset(&viotty_driver, 0, sizeof(struct tty_driver));
+	viotty_driver.magic = TTY_DRIVER_MAGIC;
+	viotty_driver.driver_name = "vioconsole";
+#if defined(CONFIG_DEVFS_FS)
+	viotty_driver.name = "tty%d";
+#else
+	viotty_driver.name = "tty";
+#endif
+	viotty_driver.major = TTY_MAJOR;
+	viotty_driver.minor_start = 1;
+	viotty_driver.name_base = 1;
+	viotty_driver.num = VTTY_PORTS;
+	viotty_driver.type = TTY_DRIVER_TYPE_CONSOLE;
+	viotty_driver.subtype = 1;
+	viotty_driver.init_termios = tty_std_termios;
+	viotty_driver.flags =
+	    TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS;
+	viotty_driver.refcount = &viotty_refcount;
+	viotty_driver.table = viotty_table;
+	viotty_driver.termios = viotty_termios;
+	viotty_driver.termios_locked = viotty_termios_locked;
+
+	viotty_driver.open = viotty_open;
+	viotty_driver.close = viotty_close;
+	viotty_driver.write = viotty_write;
+	viotty_driver.put_char = viotty_put_char;
+	viotty_driver.flush_chars = viotty_flush_chars;
+	viotty_driver.write_room = viotty_write_room;
+	viotty_driver.chars_in_buffer = viotty_chars_in_buffer;
+	viotty_driver.flush_buffer = viotty_flush_buffer;
+	viotty_driver.ioctl = viotty_ioctl;
+	viotty_driver.throttle = viotty_throttle;
+	viotty_driver.unthrottle = viotty_unthrottle;
+	viotty_driver.set_termios = viotty_set_termios;
+	viotty_driver.stop = viotty_stop;
+	viotty_driver.start = viotty_start;
+	viotty_driver.hangup = viotty_hangup;
+	viotty_driver.break_ctl = viotty_break;
+	viotty_driver.send_xchar = viotty_send_xchar;
+	viotty_driver.wait_until_sent = viotty_wait_until_sent;
+
+	viottyS_driver = viotty_driver;
+#if defined(CONFIG_DEVFS_FS)
+	viottyS_driver.name = "ttyS%d";
+#else
+	viottyS_driver.name = "ttyS";
+#endif
+	viottyS_driver.major = TTY_MAJOR;
+	viottyS_driver.minor_start = VIOTTY_SERIAL_START;
+	viottyS_driver.type = TTY_DRIVER_TYPE_SERIAL;
+	viottyS_driver.table = viottyS_table;
+	viottyS_driver.termios = viottyS_termios;
+	viottyS_driver.termios_locked = viottyS_termios_locked;
+
+	if (tty_register_driver(&viotty_driver)) {
+		printk(KERN_WARNING_VIO
+		       "Couldn't register console driver\n");
+	}
+
+	if (tty_register_driver(&viottyS_driver)) {
+		printk(KERN_WARNING_VIO
+		       "Couldn't register console S driver\n");
+	}
+	/* Now create the vcs and vcsa devfs entries so mingetty works */
+#if defined(CONFIG_DEVFS_FS)
+	{
+		struct tty_driver temp_driver = viotty_driver;
+		int i;
+
+		temp_driver.name = "vcs%d";
+		for (i = 0; i < VTTY_PORTS; i++)
+			tty_register_devfs(&temp_driver,
+					   0, i + temp_driver.minor_start);
+
+		temp_driver.name = "vcsa%d";
+		for (i = 0; i < VTTY_PORTS; i++)
+			tty_register_devfs(&temp_driver,
+					   0, i + temp_driver.minor_start);
+
+		// For compatibility with some earlier code only!
+		// This will go away!!!
+		temp_driver.name = "viocons/%d";
+		temp_driver.name_base = 0;
+		for (i = 0; i < VTTY_PORTS; i++)
+			tty_register_devfs(&temp_driver,
+					   0, i + temp_driver.minor_start);
+	}
+#endif
+
+	/* 
+	 * Create the proc entry
+	 */
+	iSeries_proc_callback(&viocons_proc_init);
+
+	return 0;
+}
+
+void __init viocons_init(void)
+{
+	int i;
+	printk(KERN_INFO_VIO "registering console\n");
+
+	memset(&port_info, 0x00, sizeof(port_info));
+	for (i = 0; i < VTTY_PORTS; i++) {
+		sndMsgSeq[i] = sndMsgAck[i] = 0;
+		port_info[i].port = i;
+		port_info[i].lp = HvLpIndexInvalid;
+		port_info[i].magic = VIOTTY_MAGIC;
+	}
+
+	register_console(&viocons);
+	memset(overflow, 0x00, sizeof(overflow));
+	debug = 0;
+
+	HvCall_setLogBufferFormatAndCodepage(HvCall_LogBuffer_ASCII, 437);
+}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/iseries/viodasd.c linuxppc64_2_4/drivers/iseries/viodasd.c
--- ../kernel.org/linux-2.4.19/drivers/iseries/viodasd.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/drivers/iseries/viodasd.c	Wed Apr  3 12:27:16 2002
@@ -0,0 +1,1623 @@
+/* -*- linux-c -*-
+ * viodasd.c
+ *  Authors: Dave Boutcher <boutcher@us.ibm.com>
+ *           Ryan Arnold <ryanarn@us.ibm.com>
+ *           Colin Devilbiss <devilbis@us.ibm.com>
+ *
+ * (C) Copyright 2000 IBM Corporation
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
+ ***************************************************************************
+ * This routine provides access to disk space (termed "DASD" in historical
+ * IBM terms) owned and managed by an OS/400 partition running on the
+ * same box as this Linux partition.
+ *
+ * All disk operations are performed by sending messages back and forth to 
+ * the OS/400 partition. 
+ * 
+ * This device driver can either use its own major number, or it can
+ * pretend to be an IDE drive (grep 'IDE[0-9]_MAJOR' ../../include/linux/major.h).
+ * This is controlled with a CONFIG option.  You can either call this an
+ * elegant solution to the fact that a lot of software doesn't recognize
+ * a new disk major number...or you can call this a really ugly hack.
+ * Your choice.
+ */
+
+#include <linux/major.h>
+#include <linux/config.h>
+
+/* Changelog:
+	2001-11-27	devilbis	Added first pass at complete IDE emulation
+ */
+
+/* Decide if we are using our own major or pretending to be an IDE drive
+ *
+ * If we are using our own major, we only support 7 partitions per physical
+ * disk....so with minor numbers 0-255 we get a maximum of 32 disks.  If we
+ * are emulating IDE, we get 63 partitions per disk, with a maximum of 4
+ * disks per major, but common practice is to place only 2 devices in /dev
+ * for each IDE major, for a total of 20 (since there are 10 IDE majors).
+ */
+
+#ifdef CONFIG_VIODASD_IDE
+static const int major_table[] = {
+	IDE0_MAJOR,
+	IDE1_MAJOR,
+	IDE2_MAJOR,
+	IDE3_MAJOR,
+	IDE4_MAJOR,
+	IDE5_MAJOR,
+	IDE6_MAJOR,
+	IDE7_MAJOR,
+	IDE8_MAJOR,
+	IDE9_MAJOR,
+};
+enum {
+	DEV_PER_MAJOR = 2,
+	PARTITION_SHIFT = 6,
+};
+static int major_to_index(int major)
+{
+	switch(major) {
+	case IDE0_MAJOR: return 0;
+	case IDE1_MAJOR: return 1;
+	case IDE2_MAJOR: return 2;
+	case IDE3_MAJOR: return 3;
+	case IDE4_MAJOR: return 4;
+	case IDE5_MAJOR: return 5;
+	case IDE6_MAJOR: return 6;
+	case IDE7_MAJOR: return 7;
+	case IDE8_MAJOR: return 8;
+	case IDE9_MAJOR: return 9;
+	default:
+		return -1;
+	}
+}
+#define do_viodasd_request do_hd_request
+#define VIOD_DEVICE_NAME "hd"
+#define VIOD_GENHD_NAME "hd"
+#else				/* !CONFIG_VIODASD_IDE */
+static const int major_table[] = {
+	VIODASD_MAJOR,
+};
+enum {
+	DEV_PER_MAJOR = 32,
+	PARTITION_SHIFT = 3,
+};
+static int major_to_index(int major)
+{
+	if(major != VIODASD_MAJOR)
+		return -1;
+	return 0;
+}
+#define VIOD_DEVICE_NAME "viod"
+#ifdef CONFIG_DEVFS_FS
+#define VIOD_GENHD_NAME "viod"
+#else
+#define VIOD_GENHD_NAME "iSeries/vd"
+#endif
+#endif				/* CONFIG_VIODASD_IDE */
+
+#define DEVICE_NR(dev) (devt_to_diskno(dev))
+#define LOCAL_END_REQUEST
+
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/blk.h>
+#include <linux/genhd.h>
+#include <linux/hdreg.h>
+#include <linux/fd.h>
+#include <linux/proc_fs.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+
+#include <asm/iSeries/HvTypes.h>
+#include <asm/iSeries/HvLpEvent.h>
+#include <asm/iSeries/HvLpConfig.h>
+#include "vio.h"
+#include <asm/iSeries/iSeries_proc.h>
+
+MODULE_DESCRIPTION("iSeries Virtual DASD");
+MODULE_AUTHOR("Dave Boutcher");
+MODULE_LICENSE("GPL");
+
+#define VIODASD_VERS "1.50"
+
+enum {
+	NUM_MAJORS = sizeof(major_table) / sizeof(major_table[0]),
+	MAX_DISKNO = DEV_PER_MAJOR * NUM_MAJORS,
+	MAX_MAJOR_NAME = 4 + 1, /* maximum length of a gendisk->name */
+};
+
+static volatile int viodasd_max_disk = MAX_DISKNO - 1;
+
+static int diskno_to_major(int diskno)
+{
+	if (diskno >= MAX_DISKNO)
+		return -1;
+	return major_table[diskno / DEV_PER_MAJOR];
+}
+static int devt_to_diskno(kdev_t dev)
+{
+	return major_to_index(MAJOR(dev)) * DEV_PER_MAJOR +
+	    (MINOR(dev) >> PARTITION_SHIFT);
+}
+static int diskno_to_devt(int diskno, int partition)
+{
+	return MKDEV(diskno_to_major(diskno),
+		     ((diskno % DEV_PER_MAJOR) << PARTITION_SHIFT) +
+		     partition);
+}
+
+#define VIOMAXREQ 16
+#define VIOMAXBLOCKDMA        12
+
+extern struct pci_dev *iSeries_vio_dev;
+
+struct openData {
+	u64 mDiskLen;
+	u16 mMaxDisks;
+	u16 mCylinders;
+	u16 mTracks;
+	u16 mSectors;
+	u16 mBytesPerSector;
+};
+
+struct rwData {			// Used during rw
+	u64 mOffset;
+	struct {
+		u32 mToken;
+		u32 reserved;
+		u64 mLen;
+	} dmaInfo[VIOMAXBLOCKDMA];
+};
+
+struct vioblocklpevent {
+	struct HvLpEvent event;
+	u32 mReserved1;
+	u16 mVersion;
+	u16 mSubTypeRc;
+	u16 mDisk;
+	u16 mFlags;
+	union {
+		struct openData openData;
+		struct rwData rwData;
+		struct {
+			u64 changed;
+		} check;
+	} u;
+};
+
+#define vioblockflags_ro   0x0001
+
+enum vioblocksubtype {
+	vioblockopen = 0x0001,
+	vioblockclose = 0x0002,
+	vioblockread = 0x0003,
+	vioblockwrite = 0x0004,
+	vioblockflush = 0x0005,
+	vioblockcheck = 0x0007
+};
+
+/* In a perfect world we will perform better if we get page-aligned I/O
+ * requests, in multiples of pages.  At least peg our block size to the
+ * actual page size.
+ */
+static int blksize = HVPAGESIZE;	/* in bytes */
+
+static DECLARE_WAIT_QUEUE_HEAD(viodasd_wait);
+struct viodasd_waitevent {
+	struct semaphore *sem;
+	int rc;
+	union {
+		int changed;	/* Used only for check_change */
+		u16 subRC;
+	} data;
+};
+
+static const struct vio_error_entry viodasd_err_table[] = {
+	{0x0201, EINVAL, "Invalid Range"},
+	{0x0202, EINVAL, "Invalid Token"},
+	{0x0203, EIO, "DMA Error"},
+	{0x0204, EIO, "Use Error"},
+	{0x0205, EIO, "Release Error"},
+	{0x0206, EINVAL, "Invalid Disk"},
+	{0x0207, EBUSY, "Cant Lock"},
+	{0x0208, EIO, "Already Locked"},
+	{0x0209, EIO, "Already Unlocked"},
+	{0x020A, EIO, "Invalid Arg"},
+	{0x020B, EIO, "Bad IFS File"},
+	{0x020C, EROFS, "Read Only Device"},
+	{0x02FF, EIO, "Internal Error"},
+	{0x0000, 0, NULL},
+};
+
+/* Our gendisk table
+ */
+static struct gendisk viodasd_gendisk[NUM_MAJORS];
+
+static struct gendisk *major_to_gendisk(int major)
+{
+	int index = major_to_index(major);
+	return index < 0 ? NULL : &viodasd_gendisk[index];
+}
+static struct hd_struct *devt_to_partition(kdev_t dev)
+{
+	return &major_to_gendisk(MAJOR(dev))->part[MINOR(dev)];
+}
+
+/* Figure out the biggest I/O request (in sectors) we can accept
+ */
+#define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA)
+
+/* Keep some statistics on what's happening for the PROC file system
+ */
+static struct {
+	long tot;
+	long nobh;
+	long ntce[VIOMAXBLOCKDMA];
+} viod_stats[MAX_DISKNO][2];
+
+/* Number of disk I/O requests we've sent to OS/400
+ */
+static int num_req_outstanding;
+
+/* This is our internal structure for keeping track of disk devices
+ */
+struct viodasd_device {
+	int useCount;
+	u16 cylinders;
+	u16 tracks;
+	u16 sectors;
+	u16 bytesPerSector;
+	u64 size;
+	int readOnly;
+} *viodasd_devices;
+
+/* When we get a disk I/O request we take it off the general request queue
+ * and put it here.
+ */
+static LIST_HEAD(reqlist);
+
+/* Handle reads from the proc file system
+ */
+static int proc_read(char *buf, char **start, off_t offset,
+		     int blen, int *eof, void *data)
+{
+	int len = 0;
+	int i;
+	int j;
+
+#if defined(MODULE)
+	len +=
+	    sprintf(buf + len,
+		    "viod Module opened %d times.  Major number %d\n",
+		    MOD_IN_USE, major_table[0]);
+#endif
+	len +=
+	    sprintf(buf + len, "viod %d possible devices\n", MAX_DISKNO);
+
+	for (i = 0; i < 16; i++) {
+		if (viod_stats[i][0].tot || viod_stats[i][1].tot) {
+			len +=
+			    sprintf(buf + len,
+				    "DISK %2.2d: rd %-10.10ld wr %-10.10ld (no buffer list rd %-10.10ld wr %-10.10ld\n",
+				    i, viod_stats[i][0].tot,
+				    viod_stats[i][1].tot,
+				    viod_stats[i][0].nobh,
+				    viod_stats[i][1].nobh);
+
+			len += sprintf(buf + len, "rd DMA: ");
+
+			for (j = 0; j < VIOMAXBLOCKDMA; j++)
+				len += sprintf(buf + len, " [%2.2d] %ld",
+					       j,
+					       viod_stats[i][0].ntce[j]);
+
+			len += sprintf(buf + len, "\nwr DMA: ");
+
+			for (j = 0; j < VIOMAXBLOCKDMA; j++)
+				len += sprintf(buf + len, " [%2.2d] %ld",
+					       j,
+					       viod_stats[i][1].ntce[j]);
+			len += sprintf(buf + len, "\n");
+		}
+	}
+
+	*eof = 1;
+	return len;
+}
+
+/* Handle writes to our proc file system
+ */
+static int proc_write(struct file *file, const char *buffer,
+		      unsigned long count, void *data)
+{
+	return count;
+}
+
+/* setup our proc file system entries
+ */
+void viodasd_proc_init(struct proc_dir_entry *iSeries_proc)
+{
+	struct proc_dir_entry *ent;
+	ent =
+	    create_proc_entry("viodasd", S_IFREG | S_IRUSR, iSeries_proc);
+	if (!ent)
+		return;
+	ent->nlink = 1;
+	ent->data = NULL;
+	ent->read_proc = proc_read;
+	ent->write_proc = proc_write;
+}
+
+/* clean up our proc file system entries
+ */
+void viodasd_proc_delete(struct proc_dir_entry *iSeries_proc)
+{
+	remove_proc_entry("viodasd", iSeries_proc);
+}
+
+/* End a request
+ */
+static void viodasd_end_request(struct request *req, int uptodate)
+{
+	if (end_that_request_first(req, uptodate, VIOD_DEVICE_NAME))
+		return;
+
+	end_that_request_last(req);
+}
+
+/* This rebuilds the partition information for a single disk device
+ */
+static int viodasd_revalidate(kdev_t dev)
+{
+	int i;
+	int device_no = DEVICE_NR(dev);
+	int dev_within_major = device_no % DEV_PER_MAJOR;
+	int part0 = (dev_within_major << PARTITION_SHIFT);
+	int npart = (1 << PARTITION_SHIFT);
+	int major = MAJOR(dev);
+	struct gendisk *gendisk = major_to_gendisk(major);
+
+	if (viodasd_devices[device_no].size == 0)
+		return 0;
+
+	for (i = npart - 1; i >= 0; i--) {
+		int minor = part0 + i;
+		struct hd_struct *partition = &gendisk->part[minor];
+
+		if (partition->nr_sects != 0) {
+			kdev_t devp = MKDEV(major, minor);
+			struct super_block *sb;
+			fsync_dev(devp);
+
+			sb = get_super(devp);
+			if (sb)
+				invalidate_inodes(sb);
+
+			invalidate_buffers(devp);
+		}
+
+		partition->start_sect = 0;
+		partition->nr_sects = 0;
+	}
+
+	grok_partitions(gendisk, dev_within_major, npart,
+			viodasd_devices[device_no].size >> 9);
+
+	return 0;
+}
+
+
+static u16 access_flags(mode_t mode)
+{
+	u16 flags = 0;
+	if (!(mode & FMODE_WRITE))
+		flags |= vioblockflags_ro;
+	return flags;
+}
+
+/* This is the actual open code.  It gets called from the external
+ * open entry point, as well as from the init code when we're figuring
+ * out what disks we have
+ */
+static int internal_open(int device_no, u16 flags)
+{
+	int i;
+	const int dev_within_major = device_no % DEV_PER_MAJOR;
+	struct gendisk *gendisk =
+	    major_to_gendisk(diskno_to_major(device_no));
+	HvLpEvent_Rc hvrc;
+	/* This semaphore is raised in the interrupt handler                     */
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	struct viodasd_waitevent we = { sem:&Semaphore };
+
+	/* Check that we are dealing with a valid hosting partition              */
+	if (viopath_hostLp == HvLpIndexInvalid) {
+		printk(KERN_WARNING_VIO "Invalid hosting partition\n");
+		return -EIO;
+	}
+
+	/* Send the open event to OS/400                                         */
+	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+					     HvLpEvent_Type_VirtualIo,
+					     viomajorsubtype_blockio |
+					     vioblockopen,
+					     HvLpEvent_AckInd_DoAck,
+					     HvLpEvent_AckType_ImmediateAck,
+					     viopath_sourceinst
+					     (viopath_hostLp),
+					     viopath_targetinst
+					     (viopath_hostLp),
+					     (u64) (unsigned long) &we,
+					     VIOVERSION << 16,
+					     ((u64) device_no << 48) |
+					     ((u64) flags << 32), 0, 0, 0);
+
+	if (hvrc != 0) {
+		printk(KERN_WARNING_VIO "bad rc on signalLpEvent %d\n",
+		       (int) hvrc);
+		return -EIO;
+	}
+
+	/* Wait for the interrupt handler to get the response                    */
+	down(&Semaphore);
+
+	/* Check the return code                                                 */
+	if (we.rc != 0) {
+		const struct vio_error_entry *err =
+		    vio_lookup_rc(viodasd_err_table, we.data.subRC);
+		printk(KERN_WARNING_VIO
+		       "bad rc opening disk: %d:0x%04x (%s)\n",
+		       (int) we.rc, we.data.subRC, err->msg);
+		return -err->errno;
+	}
+	
+	/* If this is the first open of this device, update the device information */
+	/* If this is NOT the first open, assume that it isn't changing            */
+	if (viodasd_devices[device_no].useCount == 0) {
+		if (viodasd_devices[device_no].size > 0) {
+			/* divide by 512 */
+			u64 tmpint = viodasd_devices[device_no].size >> 9;
+			gendisk->part[dev_within_major << PARTITION_SHIFT].nr_sects = tmpint;
+			/* Now the value divided by 1024 */
+			tmpint = tmpint >> 1;
+			gendisk->sizes[dev_within_major << PARTITION_SHIFT] = tmpint;
+
+			for (i = dev_within_major << PARTITION_SHIFT;
+			     i < ((dev_within_major + 1) << PARTITION_SHIFT);
+			     i++)
+			{
+				hardsect_size[diskno_to_major(device_no)][i] =
+				    viodasd_devices[device_no].bytesPerSector;
+			}
+		}
+	} else {
+		/* If the size of the device changed, weird things are happening!     */
+		if (gendisk->sizes[dev_within_major << PARTITION_SHIFT] !=
+		    viodasd_devices[device_no].size >> 10) {
+			printk(KERN_WARNING_VIO
+			       "disk size change (%dK to %dK) for device %d\n",
+			       gendisk->sizes[dev_within_major << PARTITION_SHIFT],
+			       (int) viodasd_devices[device_no].size >> 10, device_no);
+		}
+	}
+
+	/* Bump the use count                                                      */
+	viodasd_devices[device_no].useCount++;
+	return 0;
+}
+
+/* This is the actual release code.  It gets called from the external
+ * release entry point, as well as from the init code when we're figuring
+ * out what disks we have
+ */
+static int internal_release(int device_no, u16 flags)
+{
+	/* Send the event to OS/400.  We DON'T expect a response                 */
+	HvLpEvent_Rc hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+							  HvLpEvent_Type_VirtualIo,
+							  viomajorsubtype_blockio
+							  | vioblockclose,
+							  HvLpEvent_AckInd_NoAck,
+							  HvLpEvent_AckType_ImmediateAck,
+							  viopath_sourceinst
+							  (viopath_hostLp),
+							  viopath_targetinst
+							  (viopath_hostLp),
+							  0,
+							  VIOVERSION << 16,
+							  ((u64) device_no
+							   << 48) | ((u64)
+								     flags
+								     <<
+								     32),
+							  0, 0, 0);
+
+	viodasd_devices[device_no].useCount--;
+
+	if (hvrc != 0) {
+		printk(KERN_WARNING_VIO
+		       "bad rc sending event to OS/400 %d\n", (int) hvrc);
+		return -EIO;
+	}
+	return 0;
+}
+
+
+static void internal_register_new_disk(int diskno);
+
+/* External open entry point.
+ */
+static int viodasd_open(struct inode *ino, struct file *fil)
+{
+	int device_no;
+	int old_max_disk = viodasd_max_disk;
+
+	/* Do a bunch of sanity checks                                           */
+	if (!ino) {
+		printk(KERN_WARNING_VIO "no inode provided in open\n");
+		return -ENODEV;
+	}
+
+	if (major_to_index(MAJOR(ino->i_rdev)) < 0) {
+		printk(KERN_WARNING_VIO
+		       "Weird error...wrong major number on open\n");
+		return -ENODEV;
+	}
+
+	device_no = DEVICE_NR(ino->i_rdev);
+	if (device_no > MAX_DISKNO || device_no < 0) {
+		printk(KERN_WARNING_VIO
+		       "Invalid device number %d in open\n", device_no);
+		return -ENODEV;
+	}
+
+	/* Call the actual open code                                             */
+	if (internal_open(device_no, access_flags(fil ? fil->f_mode : 0)) == 0) {
+		int i;
+		MOD_INC_USE_COUNT;
+		/* For each new disk: */
+		/* update the disk's geometry via internal_open and register it */
+		for (i = old_max_disk + 1; i <= viodasd_max_disk; ++i) {
+			internal_open(i, vioblockflags_ro);
+			internal_register_new_disk(i);
+			internal_release(i, vioblockflags_ro);
+		}
+		return 0;
+	} else {
+		return -EIO;
+	}
+}
+
+/* External release entry point.
+ */
+static int viodasd_release(struct inode *ino, struct file *fil)
+{
+	int device_no;
+
+	/* Do a bunch of sanity checks                                           */
+	if (!ino) {
+		printk(KERN_WARNING_VIO "no inode provided in release\n");
+		return -ENODEV;
+	}
+
+	if (major_to_index(MAJOR(ino->i_rdev)) < 0) {
+		printk(KERN_WARNING_VIO
+		       "Weird error...wrong major number on release\n");
+		return -ENODEV;
+	}
+
+	device_no = DEVICE_NR(ino->i_rdev);
+
+	if (device_no > MAX_DISKNO || device_no < 0) {
+		printk("Tried to release invalid disk number %d\n",
+		       device_no);
+		return -ENODEV;
+	}
+
+	/* Call the actual release code                                          */
+	internal_release(device_no, access_flags(fil ? fil->f_mode : 0));
+
+	MOD_DEC_USE_COUNT;
+	return 0;
+}
+
+/* External ioctl entry point.
+ */
+static int viodasd_ioctl(struct inode *ino, struct file *fil,
+			 unsigned int cmd, unsigned long arg)
+{
+	int device_no;
+	int err;
+	HvLpEvent_Rc hvrc;
+	struct hd_struct *partition;
+	DECLARE_MUTEX_LOCKED(Semaphore);
+
+	/* Sanity checks                                                        */
+	if (!ino) {
+		printk(KERN_WARNING_VIO "no inode provided in ioctl\n");
+		return -ENODEV;
+	}
+
+	if (major_to_index(MAJOR(ino->i_rdev)) < 0) {
+		printk(KERN_WARNING_VIO
+		       "Weird error...wrong major number on ioctl\n");
+		return -ENODEV;
+	}
+
+	partition = devt_to_partition(ino->i_rdev);
+
+	device_no = DEVICE_NR(ino->i_rdev);
+	if (device_no > viodasd_max_disk) {
+		printk(KERN_WARNING_VIO
+		       "Invalid device number %d in ioctl\n", device_no);
+		return -ENODEV;
+	}
+
+	switch (cmd) {
+	case BLKGETSIZE:
+		/* return the device size in sectors */
+		if (!arg)
+			return -EINVAL;
+		err =
+		    verify_area(VERIFY_WRITE, (long *) arg, sizeof(long));
+		if (err)
+			return err;
+
+		put_user(partition->nr_sects, (long *) arg);
+		return 0;
+
+	case FDFLUSH:
+	case BLKFLSBUF:
+		if (!suser())
+			return -EACCES;
+		fsync_dev(ino->i_rdev);
+		invalidate_buffers(ino->i_rdev);
+		hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+						     HvLpEvent_Type_VirtualIo,
+						     viomajorsubtype_blockio
+						     | vioblockflush,
+						     HvLpEvent_AckInd_DoAck,
+						     HvLpEvent_AckType_ImmediateAck,
+						     viopath_sourceinst
+						     (viopath_hostLp),
+						     viopath_targetinst
+						     (viopath_hostLp),
+						     (u64) (unsigned long)
+						     &Semaphore,
+						     VIOVERSION << 16,
+						     ((u64) device_no <<
+						      48), 0, 0, 0);
+
+
+		if (hvrc != 0) {
+			printk(KERN_WARNING_VIO
+			       "bad rc on sync signalLpEvent %d\n",
+			       (int) hvrc);
+			return -EIO;
+		}
+
+		down(&Semaphore);
+
+		return 0;
+
+	case BLKRAGET:
+		if (!arg)
+			return -EINVAL;
+		err =
+		    verify_area(VERIFY_WRITE, (long *) arg, sizeof(long));
+		if (err)
+			return err;
+		put_user(read_ahead[MAJOR(ino->i_rdev)], (long *) arg);
+		return 0;
+
+	case BLKRASET:
+		if (!suser())
+			return -EACCES;
+		if (arg > 0x00ff)
+			return -EINVAL;
+		read_ahead[MAJOR(ino->i_rdev)] = arg;
+		return 0;
+
+	case BLKRRPART:
+		viodasd_revalidate(ino->i_rdev);
+		return 0;
+
+	case HDIO_GETGEO:
+		{
+			unsigned char sectors;
+			unsigned char heads;
+			unsigned short cylinders;
+
+			struct hd_geometry *geo =
+			    (struct hd_geometry *) arg;
+			if (geo == NULL)
+				return -EINVAL;
+
+			err = verify_area(VERIFY_WRITE, geo, sizeof(*geo));
+			if (err)
+				return err;
+
+			sectors = viodasd_devices[device_no].sectors;
+			if (sectors == 0)
+				sectors = 32;
+
+			heads = viodasd_devices[device_no].tracks;
+			if (heads == 0)
+				heads = 64;
+
+			cylinders = viodasd_devices[device_no].cylinders;
+			if (cylinders == 0)
+				cylinders =
+				    partition->nr_sects / (sectors *
+							   heads);
+
+			put_user(sectors, &geo->sectors);
+			put_user(heads, &geo->heads);
+			put_user(cylinders, &geo->cylinders);
+
+			put_user(partition->start_sect,
+				 (long *) &geo->start);
+
+			return 0;
+		}
+
+#define PRTIOC(x) case x: printk(KERN_WARNING_VIO "got unsupported FD ioctl " #x "\n"); \
+                          return -EINVAL;
+
+		PRTIOC(FDCLRPRM);
+		PRTIOC(FDSETPRM);
+		PRTIOC(FDDEFPRM);
+		PRTIOC(FDGETPRM);
+		PRTIOC(FDMSGON);
+		PRTIOC(FDMSGOFF);
+		PRTIOC(FDFMTBEG);
+		PRTIOC(FDFMTTRK);
+		PRTIOC(FDFMTEND);
+		PRTIOC(FDSETEMSGTRESH);
+		PRTIOC(FDSETMAXERRS);
+		PRTIOC(FDGETMAXERRS);
+		PRTIOC(FDGETDRVTYP);
+		PRTIOC(FDSETDRVPRM);
+		PRTIOC(FDGETDRVPRM);
+		PRTIOC(FDGETDRVSTAT);
+		PRTIOC(FDPOLLDRVSTAT);
+		PRTIOC(FDRESET);
+		PRTIOC(FDGETFDCSTAT);
+		PRTIOC(FDWERRORCLR);
+		PRTIOC(FDWERRORGET);
+		PRTIOC(FDRAWCMD);
+		PRTIOC(FDEJECT);
+		PRTIOC(FDTWADDLE);
+
+	}
+
+	return -EINVAL;
+}
+
+/* Send an actual I/O request to OS/400
+ */
+static int send_request(struct request *req)
+{
+	u64 sect_size;
+	u64 start;
+	u64 len;
+	int direction;
+	int nsg;
+	u16 viocmd;
+	HvLpEvent_Rc hvrc;
+	struct vioblocklpevent *bevent;
+	struct scatterlist sg[VIOMAXBLOCKDMA];
+	struct buffer_head *bh;
+	int sgindex;
+	int device_no = DEVICE_NR(req->rq_dev);
+	int dev_within_major = device_no % DEV_PER_MAJOR;
+	int statindex;
+	struct hd_struct *partition = devt_to_partition(req->rq_dev);
+
+	if (device_no > viodasd_max_disk || device_no < 0) {
+		printk
+		    ("yikes! sending a request to device %d of %d possible?\n",
+		     device_no, viodasd_max_disk + 1);
+	}
+	
+	/* Note that this SHOULD always be 512...but lets be architecturally correct */
+	sect_size = hardsect_size[MAJOR(req->rq_dev)][dev_within_major];
+
+	/* Figure out the starting sector and length                                 */
+	start = (req->sector + partition->start_sect) * sect_size;
+	len = req->nr_sectors * sect_size;
+
+	/* More paranoia checks                                                      */
+	if ((req->sector + req->nr_sectors) >
+	    (partition->start_sect + partition->nr_sects)) {
+		printk(KERN_WARNING_VIO
+		       "Invalid request offset & length\n");
+		printk(KERN_WARNING_VIO
+		       "req->sector: %ld, req->nr_sectors: %ld\n",
+		       req->sector, req->nr_sectors);
+		printk(KERN_WARNING_VIO "major: %d, minor: %d\n",
+		       MAJOR(req->rq_dev), MINOR(req->rq_dev));
+		return -1;
+	}
+
+	if (req->cmd == READ) {
+		direction = PCI_DMA_FROMDEVICE;
+		viocmd = viomajorsubtype_blockio | vioblockread;
+		statindex = 0;
+	} else {
+		direction = PCI_DMA_TODEVICE;
+		viocmd = viomajorsubtype_blockio | vioblockwrite;
+		statindex = 1;
+	}
+
+	/* Update totals */
+	viod_stats[device_no][statindex].tot++;
+
+	/* Now build the scatter-gather list                                        */
+	memset(&sg, 0x00, sizeof(sg));
+	sgindex = 0;
+
+	/* See if this is a swap I/O (without a bh pointer) or a regular I/O        */
+	if (req->bh) {
+		/* OK...this loop takes buffers from the request and adds them to the SG
+		   until we're done, or until we hit a maximum.  If we hit a maximum we'll
+		   just finish this request later                                       */
+		bh = req->bh;
+		while ((bh) && (sgindex < VIOMAXBLOCKDMA)) {
+			sg[sgindex].address = bh->b_data;
+			sg[sgindex].length = bh->b_size;
+
+			sgindex++;
+			bh = bh->b_reqnext;
+		}
+		nsg = pci_map_sg(iSeries_vio_dev, sg, sgindex, direction);
+		if ((nsg == 0) || (sg[0].dma_length == 0)
+		    || (sg[0].dma_address == 0xFFFFFFFF)) {
+			printk(KERN_WARNING_VIO "error getting sg tces\n");
+			return -1;
+		}
+
+	} else {
+		/* Update stats */
+		viod_stats[device_no][statindex].nobh++;
+
+		sg[0].dma_address =
+		    pci_map_single(iSeries_vio_dev, req->buffer, len,
+				   direction);
+		if (sg[0].dma_address == 0xFFFFFFFF) {
+			printk(KERN_WARNING_VIO
+			       "error allocating tce for address %p len %ld\n",
+			       req->buffer, (long) len);
+			return -1;
+		}
+		sg[0].dma_length = len;
+		nsg = 1;
+	}
+
+	/* Update stats */
+	viod_stats[device_no][statindex].ntce[sgindex]++;
+
+	/* This optimization handles a single DMA block                          */
+	if (sgindex == 1) {
+		/* Send the open event to OS/400                                         */
+		hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+						     HvLpEvent_Type_VirtualIo,
+						     viomajorsubtype_blockio
+						     | viocmd,
+						     HvLpEvent_AckInd_DoAck,
+						     HvLpEvent_AckType_ImmediateAck,
+						     viopath_sourceinst
+						     (viopath_hostLp),
+						     viopath_targetinst
+						     (viopath_hostLp),
+						     (u64) (unsigned long)
+						     req->buffer,
+						     VIOVERSION << 16,
+						     ((u64) device_no <<
+						      48), start,
+						     ((u64) sg[0].
+						      dma_address) << 32,
+						     sg[0].dma_length);
+	} else {
+		bevent =
+		    (struct vioblocklpevent *)
+		    vio_get_event_buffer(viomajorsubtype_blockio);
+		if (bevent == NULL) {
+			printk(KERN_WARNING_VIO
+			       "error allocating disk event buffer\n");
+			return -1;
+		}
+
+		/* Now build up the actual request.  Note that we store the pointer      */
+		/* to the request buffer in the correlation token so we can match        */
+		/* this response up later                                                */
+		memset(bevent, 0x00, sizeof(struct vioblocklpevent));
+		bevent->event.xFlags.xValid = 1;
+		bevent->event.xFlags.xFunction = HvLpEvent_Function_Int;
+		bevent->event.xFlags.xAckInd = HvLpEvent_AckInd_DoAck;
+		bevent->event.xFlags.xAckType =
+		    HvLpEvent_AckType_ImmediateAck;
+		bevent->event.xType = HvLpEvent_Type_VirtualIo;
+		bevent->event.xSubtype = viocmd;
+		bevent->event.xSourceLp = HvLpConfig_getLpIndex();
+		bevent->event.xTargetLp = viopath_hostLp;
+		bevent->event.xSizeMinus1 =
+		    offsetof(struct vioblocklpevent,
+			     u.rwData.dmaInfo) +
+		    (sizeof(bevent->u.rwData.dmaInfo[0]) * (sgindex)) - 1;
+		bevent->event.xSizeMinus1 =
+		    sizeof(struct vioblocklpevent) - 1;
+		bevent->event.xSourceInstanceId =
+		    viopath_sourceinst(viopath_hostLp);
+		bevent->event.xTargetInstanceId =
+		    viopath_targetinst(viopath_hostLp);
+		bevent->event.xCorrelationToken =
+		    (u64) (unsigned long) req->buffer;
+		bevent->mVersion = VIOVERSION;
+		bevent->mDisk = device_no;
+		bevent->u.rwData.mOffset = start;
+
+		/* Copy just the dma information from the sg list into the request */
+		for (sgindex = 0; sgindex < nsg; sgindex++) {
+			bevent->u.rwData.dmaInfo[sgindex].mToken =
+			    sg[sgindex].dma_address;
+			bevent->u.rwData.dmaInfo[sgindex].mLen =
+			    sg[sgindex].dma_length;
+		}
+
+		/* Send the request                                               */
+		hvrc = HvCallEvent_signalLpEvent(&bevent->event);
+		vio_free_event_buffer(viomajorsubtype_blockio, bevent);
+	}
+
+	if (hvrc != HvLpEvent_Rc_Good) {
+		printk(KERN_WARNING_VIO
+		       "error sending disk event to OS/400 (rc %d)\n",
+		       (int) hvrc);
+		return -1;
+	} else {
+		/* If the request was successful, bump the number of outstanding */
+		num_req_outstanding++;
+	}
+	return 0;
+}
+
+/* This is the external request processing routine
+ */
+static void do_viodasd_request(request_queue_t * q)
+{
+	int device_no;
+	for (;;) {
+		struct request *req;
+		struct gendisk *gendisk;
+
+		/* inlined INIT_REQUEST here because we don't define MAJOR_NR before blk.h */
+		if (list_empty(&q->queue_head))
+			return;
+		req = blkdev_entry_next_request(&q->queue_head);
+		if (major_to_index(MAJOR(req->rq_dev)) < 0)
+			panic(VIOD_DEVICE_NAME ": request list destroyed");
+		if (req->bh) {
+			if (!buffer_locked(req->bh))
+				panic(VIOD_DEVICE_NAME
+				      ": block not locked");
+		}
+
+		gendisk = major_to_gendisk(MAJOR(req->rq_dev));
+
+		device_no = DEVICE_NR(req->rq_dev);
+		if (device_no > MAX_DISKNO || device_no < 0) {
+			printk(KERN_WARNING_VIO "Invalid device # %d\n",
+			       device_no);
+			viodasd_end_request(req, 0);
+			continue;
+		}
+		
+		if (gendisk->sizes == NULL) {
+			printk(KERN_WARNING_VIO
+			       "Ouch! gendisk->sizes is NULL\n");
+			viodasd_end_request(req, 0);
+			continue;
+		}
+
+		/* If the queue is plugged, don't dequeue anything right now */
+		if ((q) && (q->plugged)) {
+			return;
+		}
+
+		/* If we already have the maximum number of requests outstanding to OS/400
+		   just bail out. We'll come back later                              */
+		if (num_req_outstanding >= VIOMAXREQ) {
+			return;
+		}
+
+		/* get the current request, then dequeue it from the queue           */
+		blkdev_dequeue_request(req);
+
+		/* Try sending the request                                           */
+		if (send_request(req) == 0) {
+			list_add_tail(&req->queue, &reqlist);
+		} else {
+			viodasd_end_request(req, 0);
+		}
+	}
+}
+
+/* Check for changed disks
+ */
+static int viodasd_check_change(kdev_t dev)
+{
+	struct viodasd_waitevent we;
+	HvLpEvent_Rc hvrc;
+	int device_no = DEVICE_NR(dev);
+
+	/* This semaphore is raised in the interrupt handler                     */
+	DECLARE_MUTEX_LOCKED(Semaphore);
+
+	/* Check that we are dealing with a valid hosting partition              */
+	if (viopath_hostLp == HvLpIndexInvalid) {
+		printk(KERN_WARNING_VIO "Invalid hosting partition\n");
+		return -EIO;
+	}
+
+	we.sem = &Semaphore;
+
+	/* Send the open event to OS/400                                         */
+	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+					     HvLpEvent_Type_VirtualIo,
+					     viomajorsubtype_blockio |
+					     vioblockcheck,
+					     HvLpEvent_AckInd_DoAck,
+					     HvLpEvent_AckType_ImmediateAck,
+					     viopath_sourceinst
+					     (viopath_hostLp),
+					     viopath_targetinst
+					     (viopath_hostLp),
+					     (u64) (unsigned long) &we,
+					     VIOVERSION << 16,
+					     ((u64) device_no << 48), 0, 0,
+					     0);
+
+	if (hvrc != 0) {
+		printk(KERN_WARNING_VIO "bad rc on signalLpEvent %d\n",
+		       (int) hvrc);
+		return -EIO;
+	}
+
+	/* Wait for the interrupt handler to get the response                    */
+	down(&Semaphore);
+
+	/* Check the return code.  If bad, assume no change                      */
+	if (we.rc != 0) {
+		printk(KERN_WARNING_VIO
+		       "bad rc %d on check_change. Assuming no change\n",
+		       (int) we.rc);
+		return 0;
+	}
+
+	return we.data.changed;
+}
+
+/* Our file operations table
+ */
+static struct block_device_operations viodasd_fops = {
+	open:viodasd_open,
+	release:viodasd_release,
+	ioctl:viodasd_ioctl,
+	check_media_change:viodasd_check_change,
+	revalidate:viodasd_revalidate
+};
+
+/* returns the total number of scatterlist elements converted */
+static int block_event_to_scatterlist(const struct vioblocklpevent *bevent,
+				      struct scatterlist *sg,
+				      int *total_len)
+{
+	int i, numsg;
+	const struct rwData *rwData = &bevent->u.rwData;
+	static const int offset =
+	    offsetof(struct vioblocklpevent, u.rwData.dmaInfo);
+	static const int element_size = sizeof(rwData->dmaInfo[0]);
+
+	numsg = ((bevent->event.xSizeMinus1 + 1) - offset) / element_size;
+	if (numsg > VIOMAXBLOCKDMA)
+		numsg = VIOMAXBLOCKDMA;
+
+	*total_len = 0;
+	memset(sg, 0x00, sizeof(sg[0]) * VIOMAXBLOCKDMA);
+
+	for (i = 0; (i < numsg) && (rwData->dmaInfo[i].mLen > 0); ++i) {
+		sg[i].dma_address = rwData->dmaInfo[i].mToken;
+		sg[i].dma_length = rwData->dmaInfo[i].mLen;
+		*total_len += rwData->dmaInfo[i].mLen;
+	}
+	return i;
+}
+
+static struct request *find_request_with_token(u64 token)
+{
+	struct request *req = blkdev_entry_to_request(reqlist.next);
+	while ((&req->queue != &reqlist) &&
+	       ((u64) (unsigned long) req->buffer != token))
+		req = blkdev_entry_to_request(req->queue.next);
+	if (&req->queue == &reqlist) {
+		return NULL;
+	}
+	return req;
+}
+
+/* Restart all queues, starting with the one _after_ the major given, */
+/* thus reducing the chance of starvation of disks with late majors. */
+static void viodasd_restart_all_queues_starting_from(int first_major)
+{
+	int i, first_index = major_to_index(first_major);
+	for(i = first_index + 1; i < NUM_MAJORS; ++i)
+		do_viodasd_request(BLK_DEFAULT_QUEUE(major_table[i]));
+	for(i = 0; i <= first_index; ++i)
+		do_viodasd_request(BLK_DEFAULT_QUEUE(major_table[i]));
+}
+
+/* For read and write requests, decrement the number of outstanding requests,
+ * Free the DMA buffers we allocated, and find the matching request by
+ * using the buffer pointer we stored in the correlation token.
+ */
+static int viodasd_handleReadWrite(struct vioblocklpevent *bevent)
+{
+	int num_sg, num_sect, pci_direction, total_len, major;
+	struct request *req;
+	struct scatterlist sg[VIOMAXBLOCKDMA];
+	struct HvLpEvent *event = &bevent->event;
+	unsigned long irq_flags;
+
+	num_sg = block_event_to_scatterlist(bevent, sg, &total_len);
+	num_sect = total_len >> 9;
+	if (event->xSubtype == (viomajorsubtype_blockio | vioblockread))
+		pci_direction = PCI_DMA_FROMDEVICE;
+	else
+		pci_direction = PCI_DMA_TODEVICE;
+	pci_unmap_sg(iSeries_vio_dev, sg, num_sg, pci_direction);
+
+
+	/* Since this is running in interrupt mode, we need to make sure we're not
+	 * stepping on any global I/O operations
+	 */
+	spin_lock_irqsave(&io_request_lock, irq_flags);
+
+	num_req_outstanding--;
+
+	/* Now find the matching request in OUR list (remember we moved the request
+	 * from the global list to our list when we got it)
+	 */
+	req = find_request_with_token(bevent->event.xCorrelationToken);
+	if (req == NULL) {
+		printk(KERN_WARNING_VIO
+		       "Yikes! No request matching 0x%lx found\n",
+		       bevent->event.xCorrelationToken);
+		spin_unlock_irqrestore(&io_request_lock, irq_flags);
+		return -1;
+	}
+
+	/* Remove the request from our list */
+	list_del(&req->queue);
+	/* Record this event's major number so we can check that queue again */
+	major = MAJOR(req->rq_dev);
+
+	if (!req->bh) {
+		if (event->xRc != HvLpEvent_Rc_Good) {
+			const struct vio_error_entry *err =
+			    vio_lookup_rc(viodasd_err_table,
+					  bevent->mSubTypeRc);
+			printk(KERN_WARNING_VIO
+			       "read/write error %d:0x%04x (%s)\n",
+			       event->xRc, bevent->mSubTypeRc, err->msg);
+			viodasd_end_request(req, 0);
+		} else {
+			if (num_sect != req->current_nr_sectors) {
+				printk(KERN_WARNING_VIO
+				       "Yikes...non bh i/o # sect doesn't match!!!\n");
+			}
+			viodasd_end_request(req, 1);
+		}
+	} else {
+		/* record having received the answers we did */
+		while ((num_sect > 0) && (req->bh)) {
+			num_sect -= req->current_nr_sectors;
+			viodasd_end_request(req, 1);
+		}
+		/* if they somehow answered _more_ than we asked for...something weird happened */
+		if (num_sect)
+			printk(KERN_WARNING_VIO
+			       "Yikes...sectors left over on a request!!!\n");
+
+		/* if they didn't answer the whole request this time, re-submit the request */
+		if (req->bh) {
+			if (send_request(req) == 0) {
+				list_add_tail(&req->queue, &reqlist);
+			} else {
+				viodasd_end_request(req, 0);
+			}
+		}
+	}
+
+	/* Finally, try to get more requests off of this device's queue */
+	viodasd_restart_all_queues_starting_from(major);
+
+	spin_unlock_irqrestore(&io_request_lock, irq_flags);
+
+	return 0;
+}
+
+/* This routine handles incoming block LP events */
+static void vioHandleBlockEvent(struct HvLpEvent *event)
+{
+	struct vioblocklpevent *bevent = (struct vioblocklpevent *) event;
+	struct viodasd_waitevent *pwe;
+
+	if (event == NULL) {
+		/* Notification that a partition went away! */
+		return;
+	}
+	// First, we should NEVER get an int here...only acks
+	if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
+		printk(KERN_WARNING_VIO
+		       "Yikes! got an int in viodasd event handler!\n");
+		if (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck) {
+			event->xRc = HvLpEvent_Rc_InvalidSubtype;
+			HvCallEvent_ackLpEvent(event);
+		}
+	}
+
+	switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) {
+
+		/* Handle a response to an open request.  We get all the disk information
+		 * in the response, so update it.  The correlation token contains a pointer to
+		 * a waitevent structure that has a semaphore in it.  update the return code
+		 * in the waitevent structure and post the semaphore to wake up the guy who
+		 * sent the request */
+	case vioblockopen:
+		pwe =
+		    (struct viodasd_waitevent *) (unsigned long) event->
+		    xCorrelationToken;
+		pwe->rc = event->xRc;
+		pwe->data.subRC = bevent->mSubTypeRc;
+		if (event->xRc == HvLpEvent_Rc_Good) {
+			const struct openData *data = &bevent->u.openData;
+			struct viodasd_device *device =
+			    &viodasd_devices[bevent->mDisk];
+			device->readOnly =
+			    bevent->mFlags & vioblockflags_ro;
+			device->size = data->mDiskLen;
+			device->cylinders = data->mCylinders;
+			device->tracks = data->mTracks;
+			device->sectors = data->mSectors;
+			device->bytesPerSector = data->mBytesPerSector;
+			viodasd_max_disk = data->mMaxDisks;
+		}
+		up(pwe->sem);
+		break;
+	case vioblockclose:
+		break;
+	case vioblockcheck:
+		pwe =
+		    (struct viodasd_waitevent *) (unsigned long) event->
+		    xCorrelationToken;
+		pwe->rc = event->xRc;
+		pwe->data.changed = bevent->u.check.changed;
+		up(pwe->sem);
+		break;
+	case vioblockflush:
+		up((void *) (unsigned long) event->xCorrelationToken);
+		break;
+	case vioblockread:
+	case vioblockwrite:
+		viodasd_handleReadWrite(bevent);
+		break;
+
+	default:
+		printk(KERN_WARNING_VIO "invalid subtype!");
+		if (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck) {
+			event->xRc = HvLpEvent_Rc_InvalidSubtype;
+			HvCallEvent_ackLpEvent(event);
+		}
+	}
+}
+
+/* This routine tries to clean up anything we allocated/registered
+ */
+static void viodasd_cleanup_major(int major)
+{
+	const int num_partitions = DEV_PER_MAJOR << PARTITION_SHIFT;
+	int minor;
+
+#define CLEANIT(x) if (x) {kfree(x); x=NULL;}
+
+	for (minor = 0; minor < num_partitions; minor++)
+		fsync_dev(MKDEV(major, minor));
+
+	blk_cleanup_queue(BLK_DEFAULT_QUEUE(major));
+
+	read_ahead[major] = 0;
+
+	CLEANIT(blk_size[major]);
+	CLEANIT(blksize_size[major]);
+	CLEANIT(hardsect_size[major]);
+	CLEANIT(max_sectors[major]);
+	CLEANIT(major_to_gendisk(major)->part);
+
+	blk_cleanup_queue(BLK_DEFAULT_QUEUE(major));
+
+	devfs_unregister_blkdev(major, VIOD_DEVICE_NAME);
+}
+
+static const char *major_name(int major)
+{
+	static char major_names[NUM_MAJORS][MAX_MAJOR_NAME];
+	int index = major_to_index(major);
+
+	if(index < 0)
+		return NULL;
+	else if(index == 0)
+		strcpy(major_names[index], VIOD_GENHD_NAME);
+	else
+		sprintf(major_names[index], VIOD_GENHD_NAME"%d", index);
+	
+	return major_names[index];
+}
+
+/* in case of bad return code, caller must cleanup2() for this major */
+static int viodasd_init_major(int major)
+{
+	int i;
+	const int numpart = DEV_PER_MAJOR << PARTITION_SHIFT;
+	int *sizes, *sectsizes, *blksizes, *maxsectors;
+	struct hd_struct *partitions;
+	struct gendisk *gendisk = major_to_gendisk(major);
+
+	/*
+	 * Do the devfs_register.  This works even if devfs is not
+	 * configured
+	 */
+	if (devfs_register_blkdev(major, VIOD_DEVICE_NAME, &viodasd_fops)) {
+		printk(KERN_WARNING_VIO
+		       "%s: can't register major number %d\n",
+		       VIOD_DEVICE_NAME, major);
+		return -1;
+	}
+
+	blk_init_queue(BLK_DEFAULT_QUEUE(major), do_viodasd_request);
+
+	read_ahead[major] = 8;	/* 8 sector (4kB) read ahead */
+
+	/* initialize the struct */
+	gendisk->major = major;
+	gendisk->major_name = major_name(major);
+	gendisk->minor_shift = PARTITION_SHIFT;
+	gendisk->max_p = 1 << PARTITION_SHIFT;
+	gendisk->nr_real = DEV_PER_MAJOR;
+	gendisk->fops = &viodasd_fops;
+
+	/* to be assigned later */
+	gendisk->next = NULL;
+	gendisk->part = NULL;
+	gendisk->sizes = NULL;
+	gendisk->de_arr = NULL;
+	gendisk->flags = NULL;
+
+	/* register us in the global list */
+	add_gendisk(gendisk);
+
+	/*
+	 * Now fill in all the device driver info     
+	 */
+	sizes = kmalloc(numpart * sizeof(int), GFP_KERNEL);
+	if (!sizes)
+		return -ENOMEM;
+	memset(sizes, 0x00, numpart * sizeof(int));
+	blk_size[major] = gendisk->sizes = sizes;
+
+	partitions =
+	    kmalloc(numpart * sizeof(struct hd_struct), GFP_KERNEL);
+	if (!partitions)
+		return -ENOMEM;
+	memset(partitions, 0x00, numpart * sizeof(struct hd_struct));
+	gendisk->part = partitions;
+
+	blksizes = kmalloc(numpart * sizeof(int), GFP_KERNEL);
+	if (!blksizes)
+		return -ENOMEM;
+	for (i = 0; i < numpart; i++)
+		blksizes[i] = blksize;
+	blksize_size[major] = blksizes;
+
+	sectsizes = kmalloc(numpart * sizeof(int), GFP_KERNEL);
+	if (!sectsizes)
+		return -ENOMEM;
+	for (i = 0; i < numpart; i++)
+		sectsizes[i] = 0;
+	hardsect_size[major] = sectsizes;
+
+	maxsectors = kmalloc(numpart * sizeof(int), GFP_KERNEL);
+	if (!maxsectors)
+		return -ENOMEM;
+	for (i = 0; i < numpart; i++)
+		maxsectors[i] = VIODASD_MAXSECTORS;
+	max_sectors[major] = maxsectors;
+
+	return 0;
+}
+
+static void internal_register_new_disk(int diskno)
+{
+	int major = diskno_to_major(diskno);
+	int dev_within_major = diskno % DEV_PER_MAJOR;
+	struct gendisk *gendisk = major_to_gendisk(major);
+	int i;
+
+	if (diskno == 0) {
+		printk(KERN_INFO_VIO
+		       "%s: Currently %d disks connected\n",
+		       VIOD_DEVICE_NAME, (int) viodasd_max_disk + 1);
+		if (viodasd_max_disk > MAX_DISKNO - 1)
+			printk(KERN_INFO_VIO
+			       "Only examining the first %d\n",
+			       MAX_DISKNO);
+	}
+
+	register_disk(gendisk,
+		      MKDEV(major,
+			    dev_within_major <<
+			    PARTITION_SHIFT),
+		      1 << PARTITION_SHIFT, &viodasd_fops,
+		      gendisk->
+		      part[dev_within_major << PARTITION_SHIFT].nr_sects);
+
+	printk(KERN_INFO_VIO
+	       "%s: Disk %2.2d size %dM, sectors %d, heads %d, cylinders %d, sectsize %d\n",
+	       VIOD_DEVICE_NAME,
+	       diskno,
+	       (int) (viodasd_devices[diskno].size /
+		      (1024 * 1024)),
+	       (int) viodasd_devices[diskno].sectors,
+	       (int) viodasd_devices[diskno].tracks,
+	       (int) viodasd_devices[diskno].cylinders,
+	       (int) hardsect_size[major][dev_within_major <<
+					  PARTITION_SHIFT]);
+
+	for (i = 1; i < (1 << PARTITION_SHIFT); ++i) {
+		int minor = (dev_within_major << PARTITION_SHIFT) + i;
+		struct hd_struct *partition = &gendisk->part[minor];
+		if (partition->nr_sects)
+			printk(KERN_INFO_VIO
+			       "%s: Disk %2.2d partition %2.2d start sector %ld, # sector %ld\n",
+			       VIOD_DEVICE_NAME, diskno, i,
+			       partition->start_sect, partition->nr_sects);
+	}
+}
+
+/* Initialize the whole device driver.  Handle module and non-module
+ * versions
+ */
+__init int viodasd_init(void)
+{
+	int i, j;
+	int rc;
+
+	/* Try to open to our host lp
+	 */
+	if (viopath_hostLp == HvLpIndexInvalid) {
+		vio_set_hostlp();
+	}
+
+	if (viopath_hostLp == HvLpIndexInvalid) {
+		printk(KERN_WARNING_VIO "%s: invalid hosting partition\n",
+		       VIOD_DEVICE_NAME);
+		return -EIO;
+	}
+
+	printk(KERN_INFO_VIO
+	       "%s: Disk vers %s, major %d, max disks %d, hosting partition %d\n",
+	       VIOD_DEVICE_NAME, VIODASD_VERS, major_table[0], MAX_DISKNO,
+	       viopath_hostLp);
+
+	if (ROOT_DEV == NODEV) {
+		/* first disk, first partition */
+		ROOT_DEV = diskno_to_devt(0, 1);
+
+		printk(KERN_INFO_VIO
+		       "Claiming root file system as first partition of first virtual disk");
+	}
+
+	/* Actually open the path to the hosting partition           */
+	rc = viopath_open(viopath_hostLp, viomajorsubtype_blockio,
+			  VIOMAXREQ + 2);
+	if (rc) {
+		printk(KERN_WARNING_VIO
+		       "error opening path to host partition %d\n",
+		       viopath_hostLp);
+		return -EIO;
+	} else {
+		printk("%s: opened path to hosting partition %d\n",
+		       VIOD_DEVICE_NAME, viopath_hostLp);
+	}
+
+	viodasd_devices =
+	    kmalloc(MAX_DISKNO * sizeof(struct viodasd_device),
+		    GFP_KERNEL);
+	if (!viodasd_devices)
+		return -ENOMEM;
+	memset(viodasd_devices, 0x00,
+	       MAX_DISKNO * sizeof(struct viodasd_device));
+
+	/*
+	 * Initialize our request handler
+	 */
+	vio_setHandler(viomajorsubtype_blockio, vioHandleBlockEvent);
+
+	for (i = 0; i < NUM_MAJORS; ++i) {
+		int init_rc = viodasd_init_major(major_table[i]);
+		if (init_rc < 0) {
+			for (j = 0; j <= i; ++j)
+				viodasd_cleanup_major(major_table[j]);
+			return init_rc;
+		}
+	}
+
+	viodasd_max_disk = MAX_DISKNO - 1;
+	for (i = 0; i <= viodasd_max_disk && i < MAX_DISKNO; i++) {
+		// Note that internal_open has two side effects:
+		//  a) it updates the size of the disk
+		//  b) it updates viodasd_max_disk
+		if (internal_open(i, vioblockflags_ro) == 0) {
+			internal_register_new_disk(i);
+			internal_release(i, vioblockflags_ro);
+		}
+	}
+
+	/* 
+	 * Create the proc entry
+	 */
+	iSeries_proc_callback(&viodasd_proc_init);
+
+	return 0;
+}
+
+#ifdef MODULE
+void viodasd_exit(void)
+{
+	int i;
+	for(i = 0; i < NUM_MAJORS; ++i)
+		viodasd_cleanup_major(major_table[i]);
+
+	CLEANIT(viodasd_devices);
+
+	viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
+	iSeries_proc_callback(&viodasd_proc_delete);
+
+}
+#endif
+
+#ifdef MODULE
+module_init(viodasd_init);
+module_exit(viodasd_exit);
+#endif
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/iseries/viopath.c linuxppc64_2_4/drivers/iseries/viopath.c
--- ../kernel.org/linux-2.4.19/drivers/iseries/viopath.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/drivers/iseries/viopath.c	Tue Apr 16 10:47:52 2002
@@ -0,0 +1,661 @@
+/* -*- linux-c -*-
+ *  arch/ppc64/viopath.c
+ *
+ *  iSeries Virtual I/O Message Path code
+ *
+ *  Authors: Dave Boutcher <boutcher@us.ibm.com>
+ *           Ryan Arnold <ryanarn@us.ibm.com>
+ *           Colin Devilbiss <devilbis@us.ibm.com>
+ *
+ * (C) Copyright 2000 IBM Corporation
+ * 
+ * This code is used by the iSeries virtual disk, cd,
+ * tape, and console to communicate with OS/400 in another
+ * partition.
+ *
+ * This program is free software;  you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) anyu later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.  
+ *
+ * You should have received a copy of the GNU General Public License 
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/proc_fs.h>
+#include <linux/pci.h>
+#include <linux/wait.h>
+
+#include <asm/iSeries/HvLpEvent.h>
+#include <asm/iSeries/HvLpConfig.h>
+#include <asm/iSeries/HvCallCfg.h>
+#include <asm/iSeries/mf.h>
+#include <asm/iSeries/iSeries_proc.h>
+
+#include "vio.h"
+
+EXPORT_SYMBOL(viopath_hostLp);
+EXPORT_SYMBOL(viopath_ourLp);
+EXPORT_SYMBOL(vio_set_hostlp);
+EXPORT_SYMBOL(vio_lookup_rc);
+EXPORT_SYMBOL(viopath_open);
+EXPORT_SYMBOL(viopath_close);
+EXPORT_SYMBOL(viopath_isactive);
+EXPORT_SYMBOL(viopath_sourceinst);
+EXPORT_SYMBOL(viopath_targetinst);
+EXPORT_SYMBOL(vio_setHandler);
+EXPORT_SYMBOL(vio_clearHandler);
+EXPORT_SYMBOL(vio_get_event_buffer);
+EXPORT_SYMBOL(vio_free_event_buffer);
+
+extern struct pci_dev * iSeries_vio_dev;
+
+/* Status of the path to each other partition in the system.
+ * This is overkill, since we will only ever establish connections
+ * to our hosting partition and the primary partition on the system.
+ * But this allows for other support in the future.
+ */
+static struct viopathStatus {
+	int isOpen:1;		/* Did we open the path?            */
+	int isActive:1;		/* Do we have a mon msg outstanding */
+	int users[VIO_MAX_SUBTYPES];
+	HvLpInstanceId mSourceInst;
+	HvLpInstanceId mTargetInst;
+	int numberAllocated;
+} viopathStatus[HVMAXARCHITECTEDLPS];
+
+static spinlock_t statuslock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * For each kind of event we allocate a buffer that is
+ * guaranteed not to cross a page boundary
+ */
+static void *event_buffer[VIO_MAX_SUBTYPES];
+static atomic_t event_buffer_available[VIO_MAX_SUBTYPES];
+
+static void handleMonitorEvent(struct HvLpEvent *event);
+
+/* We use this structure to handle asynchronous responses.  The caller
+ * blocks on the semaphore and the handler posts the semaphore.
+ */
+struct doneAllocParms_t {
+	struct semaphore *sem;
+	int number;
+};
+
+/* Put a sequence number in each mon msg.  The value is not
+ * important.  Start at something other than 0 just for
+ * readability.  wrapping this is ok.
+ */
+static u8 viomonseq = 22;
+
+/* Our hosting logical partition.  We get this at startup
+ * time, and different modules access this variable directly.
+ */
+HvLpIndex viopath_hostLp = 0xff;	/* HvLpIndexInvalid */
+HvLpIndex viopath_ourLp = 0xff;
+
+/* For each kind of incoming event we set a pointer to a
+ * routine to call.
+ */
+static vio_event_handler_t *vio_handler[VIO_MAX_SUBTYPES];
+
+/* Handle reads from the proc file system
+ */
+static int proc_read(char *buf, char **start, off_t offset,
+		     int blen, int *eof, void *data)
+{
+	HvLpEvent_Rc hvrc;
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	dma_addr_t dmaa =
+	    pci_map_single(iSeries_vio_dev, buf, PAGE_SIZE, PCI_DMA_FROMDEVICE);
+	int len = PAGE_SIZE;
+
+	if (len > blen)
+		len = blen;
+
+	memset(buf, 0x00, len);
+	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+					     HvLpEvent_Type_VirtualIo,
+					     viomajorsubtype_config |
+					     vioconfigget,
+					     HvLpEvent_AckInd_DoAck,
+					     HvLpEvent_AckType_ImmediateAck,
+					     viopath_sourceinst
+					     (viopath_hostLp),
+					     viopath_targetinst
+					     (viopath_hostLp),
+					     (u64) (unsigned long)
+					     &Semaphore, VIOVERSION << 16,
+					     ((u64) dmaa) << 32, len, 0,
+					     0);
+	if (hvrc != HvLpEvent_Rc_Good) {
+		printk("viopath hv error on op %d\n", (int) hvrc);
+	}
+
+	down(&Semaphore);
+
+	pci_unmap_single(iSeries_vio_dev, dmaa, PAGE_SIZE, PCI_DMA_FROMDEVICE);
+
+	*eof = 1;
+	return strlen(buf);
+}
+
+/* Handle writes to our proc file system
+ */
+static int proc_write(struct file *file, const char *buffer,
+		      unsigned long count, void *data)
+{
+	/* Doesn't do anything today!!!
+	 */
+	return count;
+}
+
+/* setup our proc file system entries
+ */
+static void vio_proc_init(struct proc_dir_entry *iSeries_proc)
+{
+	struct proc_dir_entry *ent;
+	ent = create_proc_entry("config", S_IFREG | S_IRUSR, iSeries_proc);
+	if (!ent)
+		return;
+	ent->nlink = 1;
+	ent->data = NULL;
+	ent->read_proc = proc_read;
+	ent->write_proc = proc_write;
+}
+
+/* See if a given LP is active.  Allow for invalid lps to be passed in
+ * and just return invalid
+ */
+int viopath_isactive(HvLpIndex lp)
+{
+	if (lp == HvLpIndexInvalid)
+		return 0;
+	if (lp < HVMAXARCHITECTEDLPS)
+		return viopathStatus[lp].isActive;
+	else
+		return 0;
+}
+
+/* We cache the source and target instance ids for each
+ * partition.  
+ */
+HvLpInstanceId viopath_sourceinst(HvLpIndex lp)
+{
+	return viopathStatus[lp].mSourceInst;
+}
+
+HvLpInstanceId viopath_targetinst(HvLpIndex lp)
+{
+	return viopathStatus[lp].mTargetInst;
+}
+
+/* Send a monitor message.  This is a message with the acknowledge
+ * bit on that the other side will NOT explicitly acknowledge.  When
+ * the other side goes down, the hypervisor will acknowledge any
+ * outstanding messages....so we will know when the other side dies.
+ */
+static void sendMonMsg(HvLpIndex remoteLp)
+{
+	HvLpEvent_Rc hvrc;
+
+	viopathStatus[remoteLp].mSourceInst =
+	    HvCallEvent_getSourceLpInstanceId(remoteLp,
+					      HvLpEvent_Type_VirtualIo);
+	viopathStatus[remoteLp].mTargetInst =
+	    HvCallEvent_getTargetLpInstanceId(remoteLp,
+					      HvLpEvent_Type_VirtualIo);
+
+	/* Deliberately ignore the return code here.  if we call this
+	 * more than once, we don't care.
+	 */
+	vio_setHandler(viomajorsubtype_monitor, handleMonitorEvent);
+
+	hvrc = HvCallEvent_signalLpEventFast(remoteLp,
+					     HvLpEvent_Type_VirtualIo,
+					     viomajorsubtype_monitor,
+					     HvLpEvent_AckInd_DoAck,
+					     HvLpEvent_AckType_DeferredAck,
+					     viopathStatus[remoteLp].
+					     mSourceInst,
+					     viopathStatus[remoteLp].
+					     mTargetInst, viomonseq++,
+					     0, 0, 0, 0, 0);
+
+	if (hvrc == HvLpEvent_Rc_Good) {
+		viopathStatus[remoteLp].isActive = 1;
+	} else {
+		printk(KERN_WARNING_VIO
+		       "could not connect to partition %d\n", remoteLp);
+		viopathStatus[remoteLp].isActive = 0;
+	}
+}
+
+static void handleMonitorEvent(struct HvLpEvent *event)
+{
+	HvLpIndex remoteLp;
+	int i;
+
+	/* This handler is _also_ called as part of the loop
+	 * at the end of this routine, so it must be able to
+	 * ignore NULL events...
+	 */
+	if(!event)
+		return;
+
+	/* First see if this is just a normal monitor message from the
+	 * other partition
+	 */
+	if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
+		remoteLp = event->xSourceLp;
+		if (!viopathStatus[remoteLp].isActive)
+			sendMonMsg(remoteLp);
+		return;
+	}
+
+	/* This path is for an acknowledgement; the other partition
+	 * died
+	 */
+	remoteLp = event->xTargetLp;
+	if ((event->xSourceInstanceId !=
+	     viopathStatus[remoteLp].mSourceInst)
+	    || (event->xTargetInstanceId !=
+		viopathStatus[remoteLp].mTargetInst)) {
+		printk(KERN_WARNING_VIO
+		       "ignoring ack....mismatched instances\n");
+		return;
+	}
+
+	printk(KERN_WARNING_VIO "partition %d ended\n", remoteLp);
+
+	viopathStatus[remoteLp].isActive = 0;
+
+	/* For each active handler, pass them a NULL
+	 * message to indicate that the other partition
+	 * died
+	 */
+	for (i = 0; i < VIO_MAX_SUBTYPES; i++) {
+		if (vio_handler[i] != NULL)
+			(*vio_handler[i]) (NULL);
+	}
+}
+
+int vio_setHandler(int subtype, vio_event_handler_t * beh)
+{
+	subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
+
+	if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
+		return -EINVAL;
+
+	if (vio_handler[subtype] != NULL)
+		return -EBUSY;
+
+	vio_handler[subtype] = beh;
+	return 0;
+}
+
+int vio_clearHandler(int subtype)
+{
+	subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
+
+	if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
+		return -EINVAL;
+
+	if (vio_handler[subtype] == NULL)
+		return -EAGAIN;
+
+	vio_handler[subtype] = NULL;
+	return 0;
+}
+
+static void handleConfig(struct HvLpEvent *event)
+{
+	if(!event)
+		return;
+	if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
+		printk(KERN_WARNING_VIO
+		       "unexpected config request from partition %d",
+		       event->xSourceLp);
+
+		if ((event->xFlags.xFunction == HvLpEvent_Function_Int) &&
+		    (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) {
+			event->xRc = HvLpEvent_Rc_InvalidSubtype;
+			HvCallEvent_ackLpEvent(event);
+		}
+		return;
+	}
+
+	up((struct semaphore *) event->xCorrelationToken);
+}
+
+/* Initialization of the hosting partition
+ */
+void vio_set_hostlp(void)
+{
+	/* If this has already been set then we DON'T want to either change
+	 * it or re-register the proc file system
+	 */
+	if (viopath_hostLp != HvLpIndexInvalid)
+		return;
+
+	/* Figure out our hosting partition.  This isn't allowed to change
+	 * while we're active
+	 */
+	viopath_ourLp = HvLpConfig_getLpIndex();
+	viopath_hostLp = HvCallCfg_getHostingLpIndex(viopath_ourLp);
+
+	/* If we have a valid hosting LP, create a proc file system entry
+	 * for config information
+	 */
+	if (viopath_hostLp != HvLpIndexInvalid) {
+		iSeries_proc_callback(&vio_proc_init);
+		vio_setHandler(viomajorsubtype_config, handleConfig);
+	}
+}
+
+static void vio_handleEvent(struct HvLpEvent *event, struct pt_regs *regs)
+{
+	HvLpIndex remoteLp;
+	int subtype =
+	    (event->
+	     xSubtype & VIOMAJOR_SUBTYPE_MASK) >> VIOMAJOR_SUBTYPE_SHIFT;
+
+	if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
+		remoteLp = event->xSourceLp;
+		if (event->xSourceInstanceId !=
+		    viopathStatus[remoteLp].mTargetInst) {
+			printk(KERN_WARNING_VIO
+			       "message from invalid partition. "
+			       "int msg rcvd, source inst (%d) doesnt match (%d)\n",
+			       viopathStatus[remoteLp].mTargetInst,
+			       event->xSourceInstanceId);
+			return;
+		}
+
+		if (event->xTargetInstanceId !=
+		    viopathStatus[remoteLp].mSourceInst) {
+			printk(KERN_WARNING_VIO
+			       "message from invalid partition. "
+			       "int msg rcvd, target inst (%d) doesnt match (%d)\n",
+			       viopathStatus[remoteLp].mSourceInst,
+			       event->xTargetInstanceId);
+			return;
+		}
+	} else {
+		remoteLp = event->xTargetLp;
+		if (event->xSourceInstanceId !=
+		    viopathStatus[remoteLp].mSourceInst) {
+			printk(KERN_WARNING_VIO
+			       "message from invalid partition. "
+			       "ack msg rcvd, source inst (%d) doesnt match (%d)\n",
+			       viopathStatus[remoteLp].mSourceInst,
+			       event->xSourceInstanceId);
+			return;
+		}
+
+		if (event->xTargetInstanceId !=
+		    viopathStatus[remoteLp].mTargetInst) {
+			printk(KERN_WARNING_VIO
+			       "message from invalid partition. "
+			       "viopath: ack msg rcvd, target inst (%d) doesnt match (%d)\n",
+			       viopathStatus[remoteLp].mTargetInst,
+			       event->xTargetInstanceId);
+			return;
+		}
+	}
+
+	if (vio_handler[subtype] == NULL) {
+		printk(KERN_WARNING_VIO
+		       "unexpected virtual io event subtype %d from partition %d\n",
+		       event->xSubtype, remoteLp);
+		/* No handler.  Ack if necessary
+		 */
+		if ((event->xFlags.xFunction == HvLpEvent_Function_Int) &&
+		    (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck)) {
+			event->xRc = HvLpEvent_Rc_InvalidSubtype;
+			HvCallEvent_ackLpEvent(event);
+		}
+		return;
+	}
+
+	/* This innocuous little line is where all the real work happens
+	 */
+	(*vio_handler[subtype]) (event);
+}
+
+static void viopath_donealloc(void *parm, int number)
+{
+	struct doneAllocParms_t *doneAllocParmsp =
+	    (struct doneAllocParms_t *) parm;
+	doneAllocParmsp->number = number;
+	up(doneAllocParmsp->sem);
+}
+
+static int allocateEvents(HvLpIndex remoteLp, int numEvents)
+{
+	struct doneAllocParms_t doneAllocParms;
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	doneAllocParms.sem = &Semaphore;
+
+	mf_allocateLpEvents(remoteLp, HvLpEvent_Type_VirtualIo, 250,	/* It would be nice to put a real number here! */
+			    numEvents,
+			    &viopath_donealloc, &doneAllocParms);
+
+	down(&Semaphore);
+
+	return doneAllocParms.number;
+}
+
+int viopath_open(HvLpIndex remoteLp, int subtype, int numReq)
+{
+	int i;
+	unsigned long flags;
+
+	if ((remoteLp >= HvMaxArchitectedLps)
+	    || (remoteLp == HvLpIndexInvalid))
+		return -EINVAL;
+
+	subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
+	if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
+		return -EINVAL;
+
+	spin_lock_irqsave(&statuslock, flags);
+
+	/* OK...we can fit 4 maximum-sized events (256 bytes) in
+	 * each page (4096).  Get a new page every 4
+	 */
+	if (event_buffer[0] == NULL) {
+		for (i = 0; i < VIO_MAX_SUBTYPES; i++) {
+			if ((i % 4) == 0) {
+				event_buffer[i] =
+				    (void *) get_free_page(GFP_KERNEL);
+				if (event_buffer[i] == NULL) {
+					spin_unlock_irqrestore(&statuslock, flags);
+					return -ENOMEM;
+				}
+			} else {
+				event_buffer[i] =
+				    event_buffer[i - 1] + 256;
+			}
+			atomic_set(&event_buffer_available[i], 1);
+		}
+	}
+
+	viopathStatus[remoteLp].users[subtype]++;
+
+	if (!viopathStatus[remoteLp].isOpen) {
+		HvCallEvent_openLpEventPath(remoteLp,
+					    HvLpEvent_Type_VirtualIo);
+
+		viopathStatus[remoteLp].numberAllocated +=
+		    allocateEvents(remoteLp, 1);
+
+		if (viopathStatus[remoteLp].numberAllocated == 0) {
+			HvCallEvent_closeLpEventPath(remoteLp,
+						     HvLpEvent_Type_VirtualIo);
+			
+			spin_unlock_irqrestore(&statuslock, flags);
+			return -ENOMEM;
+		}
+
+		viopathStatus[remoteLp].mSourceInst =
+		    HvCallEvent_getSourceLpInstanceId(remoteLp,
+						      HvLpEvent_Type_VirtualIo);
+		viopathStatus[remoteLp].mTargetInst =
+		    HvCallEvent_getTargetLpInstanceId(remoteLp,
+						      HvLpEvent_Type_VirtualIo);
+
+		HvLpEvent_registerHandler(HvLpEvent_Type_VirtualIo,
+					  &vio_handleEvent);
+
+		viopathStatus[remoteLp].isOpen = 1;
+
+		sendMonMsg(remoteLp);
+
+		printk(KERN_INFO_VIO
+		       "Opening connection to partition %d, setting sinst %d, tinst %d\n",
+		       remoteLp,
+		       viopathStatus[remoteLp].mSourceInst,
+		       viopathStatus[remoteLp].mTargetInst);
+	}
+
+	viopathStatus[remoteLp].numberAllocated +=
+	    allocateEvents(remoteLp, numReq);
+	spin_unlock_irqrestore(&statuslock, flags);
+
+	return 0;
+}
+
+int viopath_close(HvLpIndex remoteLp, int subtype, int numReq)
+{
+	unsigned long flags;
+	int i;
+	int numOpen;
+	struct doneAllocParms_t doneAllocParms;
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	doneAllocParms.sem = &Semaphore;
+
+	if ((remoteLp >= HvMaxArchitectedLps)
+	    || (remoteLp == HvLpIndexInvalid))
+		return -EINVAL;
+
+	subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
+	if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
+		return -EINVAL;
+
+	spin_lock_irqsave(&statuslock, flags);
+
+	viopathStatus[remoteLp].users[subtype]--;
+	
+	mf_deallocateLpEvents( remoteLp,HvLpEvent_Type_VirtualIo,
+			       numReq,
+			       &viopath_donealloc,
+			       &doneAllocParms );
+	down(&Semaphore);
+
+	for (i = 0, numOpen = 0; i < VIO_MAX_SUBTYPES; i++) {
+		numOpen += viopathStatus[remoteLp].users[i];
+	}
+	
+	if ((viopathStatus[remoteLp].isOpen) && (numOpen == 0)) {
+		printk(KERN_INFO_VIO
+		       "Closing connection to partition %d", remoteLp);
+
+		HvCallEvent_closeLpEventPath(remoteLp,
+					     HvLpEvent_Type_VirtualIo);
+		viopathStatus[remoteLp].isOpen = 0;
+		viopathStatus[remoteLp].isActive = 0;
+
+		for (i = 0; i < VIO_MAX_SUBTYPES; i++) {
+			atomic_set(&event_buffer_available[i], 0);
+			
+			for (i = 0; i < VIO_MAX_SUBTYPES; i += 4) {
+				free_page((unsigned long) event_buffer[i]);
+			}
+		}
+
+	}
+	spin_unlock_irqrestore(&statuslock, flags);
+	return 0;
+}
+
+void *vio_get_event_buffer(int subtype)
+{
+	subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
+	if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES))
+		return NULL;
+
+	if (atomic_dec_if_positive(&event_buffer_available[subtype]) == 0)
+		return event_buffer[subtype];
+	else
+		return NULL;
+}
+
+void vio_free_event_buffer(int subtype, void *buffer)
+{
+	subtype = subtype >> VIOMAJOR_SUBTYPE_SHIFT;
+	if ((subtype < 0) || (subtype >= VIO_MAX_SUBTYPES)) {
+		printk(KERN_WARNING_VIO
+		       "unexpected subtype %d freeing event buffer\n",
+		       subtype);
+		return;
+	}
+
+	if (atomic_read(&event_buffer_available[subtype]) != 0) {
+		printk(KERN_WARNING_VIO
+		       "freeing unallocated event buffer, subtype %d\n",
+		       subtype);
+		return;
+	}
+
+	if (buffer != event_buffer[subtype]) {
+		printk(KERN_WARNING_VIO
+		       "freeing invalid event buffer, subtype %d\n",
+		       subtype);
+	}
+
+	atomic_set(&event_buffer_available[subtype], 1);
+}
+
+static const struct vio_error_entry vio_no_error =
+    { 0, 0, "Non-VIO Error" };
+static const struct vio_error_entry vio_unknown_error =
+    { 0, EIO, "Unknown Error" };
+
+static const struct vio_error_entry vio_default_errors[] = {
+	{0x0001, EIO, "No Connection"},
+	{0x0002, EIO, "No Receiver"},
+	{0x0003, EIO, "No Buffer Available"},
+	{0x0004, EBADRQC, "Invalid Message Type"},
+	{0x0000, 0, NULL},
+};
+
+const struct vio_error_entry *vio_lookup_rc(const struct vio_error_entry
+					    *local_table, u16 rc)
+{
+	const struct vio_error_entry *cur;
+	if (!rc)
+		return &vio_no_error;
+	if (local_table)
+		for (cur = local_table; cur->rc; ++cur)
+			if (cur->rc == rc)
+				return cur;
+	for (cur = vio_default_errors; cur->rc; ++cur)
+		if (cur->rc == rc)
+			return cur;
+	return &vio_unknown_error;
+}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/iseries/viotape.c linuxppc64_2_4/drivers/iseries/viotape.c
--- ../kernel.org/linux-2.4.19/drivers/iseries/viotape.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/drivers/iseries/viotape.c	Wed Dec 12 13:48:34 2001
@@ -0,0 +1,1185 @@
+/* -*- linux-c -*-
+ *  drivers/char/viotape.c
+ *
+ *  iSeries Virtual Tape
+ ***************************************************************************
+ *
+ *  Authors: Dave Boutcher <boutcher@us.ibm.com>
+ *           Ryan Arnold <ryanarn@us.ibm.com>
+ *           Colin Devilbiss <devilbis@us.ibm.com>
+ *
+ * (C) Copyright 2000 IBM Corporation
+ * 
+ * This program is free software;  you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) anyu later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of 
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.  
+ *
+ * You should have received a copy of the GNU General Public License 
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ ***************************************************************************
+ * This routine provides access to tape drives owned and managed by an OS/400 
+ * partition running on the same box as this Linux partition.
+ *
+ * All tape operations are performed by sending messages back and forth to 
+ * the OS/400 partition.  The format of the messages is defined in
+ * iSeries/vio.h
+ * 
+ */
+
+
+#undef VIOT_DEBUG
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <asm/ioctls.h>
+#include <linux/mtio.h>
+#include <linux/pci.h>
+#include <linux/devfs_fs.h>
+#include <linux/devfs_fs_kernel.h>
+#include <asm/uaccess.h>
+
+#include "vio.h"
+#include <asm/iSeries/HvLpEvent.h>
+#include "asm/iSeries/HvCallEvent.h"
+#include "asm/iSeries/HvLpConfig.h"
+#include <asm/iSeries/iSeries_proc.h>
+
+extern struct pci_dev * iSeries_vio_dev;
+
+static int viotape_major = 230;
+static int viotape_numdev = 0;
+
+#define VIOTAPE_MAXREQ 1
+
+/* version number for viotape driver */
+static unsigned int version_major = 1;
+static unsigned int version_minor = 0;
+
+static u64 sndMsgSeq;
+static u64 sndMsgAck;
+static u64 rcvMsgSeq;
+static u64 rcvMsgAck;
+
+/***************************************************************************
+ * The minor number follows the conventions of the SCSI tape drives.  The
+ * rewind and mode are encoded in the minor #.  We use this struct to break
+ * them out
+ ***************************************************************************/
+struct viot_devinfo_struct {
+	int major;
+	int minor;
+	int devno;
+	int mode;
+	int rewind;
+};
+
+#define VIOTAPOP_RESET          0
+#define VIOTAPOP_FSF	        1
+#define VIOTAPOP_BSF	        2
+#define VIOTAPOP_FSR	        3
+#define VIOTAPOP_BSR	        4
+#define VIOTAPOP_WEOF	        5
+#define VIOTAPOP_REW	        6
+#define VIOTAPOP_NOP	        7
+#define VIOTAPOP_EOM	        8
+#define VIOTAPOP_ERASE          9
+#define VIOTAPOP_SETBLK        10
+#define VIOTAPOP_SETDENSITY    11
+#define VIOTAPOP_SETPOS	       12
+#define VIOTAPOP_GETPOS	       13
+#define VIOTAPOP_SETPART       14
+
+struct viotapelpevent {
+	struct HvLpEvent event;
+	u32 mReserved1;
+	u16 mVersion;
+	u16 mSubTypeRc;
+	u16 mTape;
+	u16 mFlags;
+	u32 mToken;
+	u64 mLen;
+	union {
+		struct {
+			u32 mTapeOp;
+			u32 mCount;
+		} tapeOp;
+		struct {
+			u32 mType;
+			u32 mResid;
+			u32 mDsreg;
+			u32 mGstat;
+			u32 mErreg;
+			u32 mFileNo;
+			u32 mBlkNo;
+		} getStatus;
+		struct {
+			u32 mBlkNo;
+		} getPos;
+	} u;
+};
+enum viotapesubtype {
+	viotapeopen = 0x0001,
+	viotapeclose = 0x0002,
+	viotaperead = 0x0003,
+	viotapewrite = 0x0004,
+	viotapegetinfo = 0x0005,
+	viotapeop = 0x0006,
+	viotapegetpos = 0x0007,
+	viotapesetpos = 0x0008,
+	viotapegetstatus = 0x0009
+};
+
+enum viotapeRc {
+	viotape_InvalidRange = 0x0601,
+	viotape_InvalidToken = 0x0602,
+	viotape_DMAError = 0x0603,
+	viotape_UseError = 0x0604,
+	viotape_ReleaseError = 0x0605,
+	viotape_InvalidTape = 0x0606,
+	viotape_InvalidOp = 0x0607,
+	viotape_TapeErr = 0x0608,
+
+	viotape_AllocTimedOut = 0x0640,
+	viotape_BOTEnc = 0x0641,
+	viotape_BlankTape = 0x0642,
+	viotape_BufferEmpty = 0x0643,
+	viotape_CleanCartFound = 0x0644,
+	viotape_CmdNotAllowed = 0x0645,
+	viotape_CmdNotSupported = 0x0646,
+	viotape_DataCheck = 0x0647,
+	viotape_DecompressErr = 0x0648,
+	viotape_DeviceTimeout = 0x0649,
+	viotape_DeviceUnavail = 0x064a,
+	viotape_DeviceBusy = 0x064b,
+	viotape_EndOfMedia = 0x064c,
+	viotape_EndOfTape = 0x064d,
+	viotape_EquipCheck = 0x064e,
+	viotape_InsufficientRs = 0x064f,
+	viotape_InvalidLogBlk = 0x0650,
+	viotape_LengthError = 0x0651,
+	viotape_LibDoorOpen = 0x0652,
+	viotape_LoadFailure = 0x0653,
+	viotape_NotCapable = 0x0654,
+	viotape_NotOperational = 0x0655,
+	viotape_NotReady = 0x0656,
+	viotape_OpCancelled = 0x0657,
+	viotape_PhyLinkErr = 0x0658,
+	viotape_RdyNotBOT = 0x0659,
+	viotape_TapeMark = 0x065a,
+	viotape_WriteProt = 0x065b
+};
+
+static const struct vio_error_entry viotape_err_table[] = {
+	{viotape_InvalidRange, EIO, "Internal error"},
+	{viotape_InvalidToken, EIO, "Internal error"},
+	{viotape_DMAError, EIO, "DMA error"},
+	{viotape_UseError, EIO, "Internal error"},
+	{viotape_ReleaseError, EIO, "Internal error"},
+	{viotape_InvalidTape, EIO, "Invalid tape device"},
+	{viotape_InvalidOp, EIO, "Invalid operation"},
+	{viotape_TapeErr, EIO, "Tape error"},
+	{viotape_AllocTimedOut, EBUSY, "Allocate timed out"},
+	{viotape_BOTEnc, EIO, "Beginning of tape encountered"},
+	{viotape_BlankTape, EIO, "Blank tape"},
+	{viotape_BufferEmpty, EIO, "Buffer empty"},
+	{viotape_CleanCartFound, ENOMEDIUM, "Cleaning cartridge found"},
+	{viotape_CmdNotAllowed, EIO, "Command not allowed"},
+	{viotape_CmdNotSupported, EIO, "Command not supported"},
+	{viotape_DataCheck, EIO, "Data check"},
+	{viotape_DecompressErr, EIO, "Decompression error"},
+	{viotape_DeviceTimeout, EBUSY, "Device timeout"},
+	{viotape_DeviceUnavail, EIO, "Device unavailable"},
+	{viotape_DeviceBusy, EBUSY, "Device busy"},
+	{viotape_EndOfMedia, ENOSPC, "End of media"},
+	{viotape_EndOfTape, ENOSPC, "End of tape"},
+	{viotape_EquipCheck, EIO, "Equipment check"},
+	{viotape_InsufficientRs, EOVERFLOW, "Insufficient tape resources"},
+	{viotape_InvalidLogBlk, EIO, "Invalid logical block location"},
+	{viotape_LengthError, EOVERFLOW, "Length error"},
+	{viotape_LibDoorOpen, EBUSY, "Door open"},
+	{viotape_LoadFailure, ENOMEDIUM, "Load failure"},
+	{viotape_NotCapable, EIO, "Not capable"},
+	{viotape_NotOperational, EIO, "Not operational"},
+	{viotape_NotReady, EIO, "Not ready"},
+	{viotape_OpCancelled, EIO, "Operation cancelled"},
+	{viotape_PhyLinkErr, EIO, "Physical link error"},
+	{viotape_RdyNotBOT, EIO, "Ready but not beginning of tape"},
+	{viotape_TapeMark, EIO, "Tape mark"},
+	{viotape_WriteProt, EROFS, "Write protection error"},
+	{0, 0, NULL},
+};
+
+/* Maximum # tapes we support
+ */
+#define VIOTAPE_MAX_TAPE 8
+#define MAX_PARTITIONS 4
+
+/* defines for current tape state */
+#define VIOT_IDLE 0
+#define VIOT_READING 1
+#define VIOT_WRITING 2
+
+/* Our info on the tapes
+ */
+struct tape_descr {
+	char rsrcname[10];
+	char type[4];
+	char model[3];
+};
+
+static struct tape_descr *viotape_unitinfo = NULL;
+
+static const char *lasterr[VIOTAPE_MAX_TAPE];
+
+static struct mtget viomtget[VIOTAPE_MAX_TAPE];
+
+/* maintain the current state of each tape (and partition)
+   so that we know when to write EOF marks.
+*/
+static struct {
+	unsigned char cur_part;
+	devfs_handle_t dev_handle;
+	struct {
+		unsigned char rwi;
+	} part_stat[MAX_PARTITIONS];
+} state[VIOTAPE_MAX_TAPE];
+
+/* We single-thread
+ */
+static struct semaphore reqSem;
+
+/* When we send a request, we use this struct to get the response back
+ * from the interrupt handler
+ */
+struct opStruct {
+	void *buffer;
+	dma_addr_t dmaaddr;
+	size_t count;
+	int rc;
+	struct semaphore *sem;
+	struct opStruct *free;
+};
+
+static spinlock_t opStructListLock;
+static struct opStruct *opStructList;
+
+/* forward declaration to resolve interdependence */
+static int chg_state(int index, unsigned char new_state,
+		     struct file *file);
+
+/* Decode the kdev_t into its parts
+ */
+void getDevInfo(kdev_t dev, struct viot_devinfo_struct *devi)
+{
+	devi->major = MAJOR(dev);
+	devi->minor = MINOR(dev);
+	devi->devno = devi->minor & 0x1F;
+	devi->mode = (devi->minor & 0x60) >> 5;
+	/* if bit is set in the minor, do _not_ rewind automatically */
+	devi->rewind = !(devi->minor & 0x80);
+}
+
+
+/* Allocate an op structure from our pool
+ */
+static struct opStruct *getOpStruct(void)
+{
+	struct opStruct *newOpStruct;
+	spin_lock(&opStructListLock);
+
+	if (opStructList == NULL) {
+		newOpStruct = kmalloc(sizeof(struct opStruct), GFP_KERNEL);
+	} else {
+		newOpStruct = opStructList;
+		opStructList = opStructList->free;
+	}
+
+	if (newOpStruct)
+		memset(newOpStruct, 0x00, sizeof(struct opStruct));
+
+	spin_unlock(&opStructListLock);
+
+	return newOpStruct;
+}
+
+/* Return an op structure to our pool
+ */
+static void freeOpStruct(struct opStruct *opStruct)
+{
+	spin_lock(&opStructListLock);
+	opStruct->free = opStructList;
+	opStructList = opStruct;
+	spin_unlock(&opStructListLock);
+}
+
+/* Map our tape return codes to errno values
+ */
+int tapeRcToErrno(int tapeRc, char *operation, int tapeno)
+{
+	const struct vio_error_entry *err;
+	if(tapeRc == 0)
+		return 0;
+	err = vio_lookup_rc(viotape_err_table, tapeRc);
+
+	printk(KERN_WARNING_VIO "tape error 0x%04x on Device %d (%-10s): %s\n",
+	       tapeRc, tapeno, viotape_unitinfo[tapeno].rsrcname, err->msg);
+
+	lasterr[tapeno] = err->msg;
+
+	return -err->errno;
+}
+
+/* Handle reads from the proc file system.  
+ */
+static int proc_read(char *buf, char **start, off_t offset,
+		     int blen, int *eof, void *data)
+{
+	int len = 0;
+	int i;
+
+	len += sprintf(buf + len, "viotape driver version %d.%d\n",
+		       version_major, version_minor);
+
+	for (i = 0; i < viotape_numdev; i++) {
+
+		len +=
+		    sprintf(buf + len,
+			    "viotape device %d is iSeries resource %10.10s type %4.4s, model %3.3s\n",
+			    i, viotape_unitinfo[i].rsrcname,
+			    viotape_unitinfo[i].type,
+			    viotape_unitinfo[i].model);
+		if (lasterr[i])
+			len +=
+			    sprintf(buf + len, "   last error: %s\n",
+				    lasterr[i]);
+	}
+
+	*eof = 1;
+	return len;
+}
+
+/* setup our proc file system entries
+ */
+void viotape_proc_init(struct proc_dir_entry *iSeries_proc)
+{
+	struct proc_dir_entry *ent;
+	ent =
+	    create_proc_entry("viotape", S_IFREG | S_IRUSR, iSeries_proc);
+	if (!ent)
+		return;
+	ent->nlink = 1;
+	ent->data = NULL;
+	ent->read_proc = proc_read;
+}
+
+/* clean up our proc file system entries
+ */
+void viotape_proc_delete(struct proc_dir_entry *iSeries_proc)
+{
+	remove_proc_entry("viotape", iSeries_proc);
+}
+
+
+/* Get info on all tapes from OS/400
+ */
+static void get_viotape_info(void)
+{
+	dma_addr_t dmaaddr;
+	HvLpEvent_Rc hvrc;
+	int i;
+	struct opStruct *op = getOpStruct();
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	if (op == NULL)
+		return;
+
+	if (viotape_unitinfo == NULL) {
+		viotape_unitinfo =
+		    kmalloc(sizeof(struct tape_descr) * VIOTAPE_MAX_TAPE,
+			    GFP_KERNEL);
+	}
+	memset(viotape_unitinfo, 0x00,
+	       sizeof(struct tape_descr) * VIOTAPE_MAX_TAPE);
+	memset(lasterr, 0x00, sizeof(lasterr));
+
+	op->sem = &Semaphore;
+
+	dmaaddr = pci_map_single(iSeries_vio_dev, viotape_unitinfo,
+				 sizeof(struct tape_descr) *
+				 VIOTAPE_MAX_TAPE, PCI_DMA_FROMDEVICE);
+	if (dmaaddr == 0xFFFFFFFF) {
+		printk(KERN_WARNING_VIO "viotape error allocating tce\n");
+		return;
+	}
+
+	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+					     HvLpEvent_Type_VirtualIo,
+					     viomajorsubtype_tape |
+					     viotapegetinfo,
+					     HvLpEvent_AckInd_DoAck,
+					     HvLpEvent_AckType_ImmediateAck,
+					     viopath_sourceinst
+					     (viopath_hostLp),
+					     viopath_targetinst
+					     (viopath_hostLp),
+					     (u64) (unsigned long) op,
+					     VIOVERSION << 16, dmaaddr,
+					     sizeof(struct tape_descr) *
+					     VIOTAPE_MAX_TAPE, 0, 0);
+	if (hvrc != HvLpEvent_Rc_Good) {
+		printk("viotape hv error on op %d\n", (int) hvrc);
+	}
+
+	down(&Semaphore);
+
+	freeOpStruct(op);
+
+
+	for (i = 0;
+	     ((i < VIOTAPE_MAX_TAPE) && (viotape_unitinfo[i].rsrcname[0]));
+	     i++) {
+		printk("found a tape %10.10s\n",
+		       viotape_unitinfo[i].rsrcname);
+		viotape_numdev++;
+	}
+}
+
+
+/* Write
+ */
+static ssize_t viotap_write(struct file *file, const char *buf,
+			    size_t count, loff_t * ppos)
+{
+	HvLpEvent_Rc hvrc;
+	kdev_t dev = file->f_dentry->d_inode->i_rdev;
+	unsigned short flags = file->f_flags;
+	struct opStruct *op = getOpStruct();
+	int noblock = ((flags & O_NONBLOCK) != 0);
+	int err;
+	struct viot_devinfo_struct devi;
+	DECLARE_MUTEX_LOCKED(Semaphore);
+
+	if (op == NULL)
+		return -ENOMEM;
+
+	getDevInfo(dev, &devi);
+
+	/* We need to make sure we can send a request.  We use
+	 * a semaphore to keep track of # requests in use.  If
+	 * we are non-blocking, make sure we don't block on the 
+	 * semaphore
+	 */
+	if (noblock) {
+		if (down_trylock(&reqSem)) {
+			freeOpStruct(op);
+			return -EWOULDBLOCK;
+		}
+	} else {
+		down(&reqSem);
+	}
+
+	/* Allocate a DMA buffer */
+	op->buffer = pci_alloc_consistent(iSeries_vio_dev, count, &op->dmaaddr);
+
+	if ((op->dmaaddr == 0xFFFFFFFF) || (op->buffer == NULL)) {
+		printk(KERN_WARNING_VIO 
+		       "tape error allocating dma buffer for len %ld\n",
+		       count);
+		freeOpStruct(op);
+		up(&reqSem);
+		return -EFAULT;
+	}
+
+	op->count = count;
+
+	/* Copy the data into the buffer */
+	err = copy_from_user(op->buffer, (const void *) buf, count);
+	if (err) {
+		printk(KERN_WARNING_VIO 
+		       "tape: error on copy from user\n");
+		pci_free_consistent(iSeries_vio_dev, count, op->buffer, op->dmaaddr);
+		freeOpStruct(op);
+		up(&reqSem);
+		return -EFAULT;
+	}
+
+	if (noblock) {
+		op->sem = NULL;
+	} else {
+		op->sem = &Semaphore;
+	}
+
+	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+					     HvLpEvent_Type_VirtualIo,
+					     viomajorsubtype_tape |
+					     viotapewrite,
+					     HvLpEvent_AckInd_DoAck,
+					     HvLpEvent_AckType_ImmediateAck,
+					     viopath_sourceinst
+					     (viopath_hostLp),
+					     viopath_targetinst
+					     (viopath_hostLp),
+					     (u64) (unsigned long) op,
+					     VIOVERSION << 16,
+					     ((u64) devi.
+					      devno << 48) | op->dmaaddr,
+					     count, 0, 0);
+	if (hvrc != HvLpEvent_Rc_Good) {
+		printk("viotape hv error on op %d\n", (int) hvrc);
+		pci_free_consistent(iSeries_vio_dev, count, op->buffer, op->dmaaddr);
+		freeOpStruct(op);
+		up(&reqSem);
+		return -EIO;
+	}
+
+	if (noblock)
+		return count;
+
+	down(&Semaphore);
+
+	err = op->rc;
+
+	/* Free the buffer */
+	pci_free_consistent(iSeries_vio_dev, count, op->buffer, op->dmaaddr);
+
+	count = op->count;
+
+	freeOpStruct(op);
+	up(&reqSem);
+	if (err)
+		return tapeRcToErrno(err, "write", devi.devno);
+	else {
+		chg_state(devi.devno, VIOT_WRITING, file);
+		return count;
+	}
+}
+
+/* read
+ */
+static ssize_t viotap_read(struct file *file, char *buf, size_t count,
+			   loff_t * ptr)
+{
+	HvLpEvent_Rc hvrc;
+	kdev_t dev = file->f_dentry->d_inode->i_rdev;
+	unsigned short flags = file->f_flags;
+	struct opStruct *op = getOpStruct();
+	int noblock = ((flags & O_NONBLOCK) != 0);
+	int err;
+	struct viot_devinfo_struct devi;
+	DECLARE_MUTEX_LOCKED(Semaphore);
+
+	if (op == NULL)
+		return -ENOMEM;
+
+	getDevInfo(dev, &devi);
+
+	/* We need to make sure we can send a request.  We use
+	 * a semaphore to keep track of # requests in use.  If
+	 * we are non-blocking, make sure we don't block on the 
+	 * semaphore
+	 */
+	if (noblock) {
+		if (down_trylock(&reqSem)) {
+			freeOpStruct(op);
+			return -EWOULDBLOCK;
+		}
+	} else {
+		down(&reqSem);
+	}
+
+	chg_state(devi.devno, VIOT_READING, file);
+
+	/* Allocate a DMA buffer */
+	op->buffer = pci_alloc_consistent(iSeries_vio_dev, count, &op->dmaaddr);
+
+	if ((op->dmaaddr == 0xFFFFFFFF) || (op->buffer == NULL)) {
+		freeOpStruct(op);
+		up(&reqSem);
+		return -EFAULT;
+	}
+
+	op->count = count;
+
+	op->sem = &Semaphore;
+
+	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+					     HvLpEvent_Type_VirtualIo,
+					     viomajorsubtype_tape |
+					     viotaperead,
+					     HvLpEvent_AckInd_DoAck,
+					     HvLpEvent_AckType_ImmediateAck,
+					     viopath_sourceinst
+					     (viopath_hostLp),
+					     viopath_targetinst
+					     (viopath_hostLp),
+					     (u64) (unsigned long) op,
+					     VIOVERSION << 16,
+					     ((u64) devi.
+					      devno << 48) | op->dmaaddr,
+					     count, 0, 0);
+	if (hvrc != HvLpEvent_Rc_Good) {
+		printk(KERN_WARNING_VIO 
+		       "tape hv error on op %d\n", (int) hvrc);
+		pci_free_consistent(iSeries_vio_dev, count, op->buffer, op->dmaaddr);
+		freeOpStruct(op);
+		up(&reqSem);
+		return -EIO;
+	}
+
+	down(&Semaphore);
+
+	if (op->rc == 0) {
+		/* If we got data back        */
+		if (op->count) {
+			/* Copy the data into the buffer */
+			err = copy_to_user(buf, op->buffer, count);
+			if (err) {
+				printk("error on copy_to_user\n");
+				pci_free_consistent(iSeries_vio_dev, count,
+						    op->buffer,
+						    op->dmaaddr);
+				freeOpStruct(op);
+				up(&reqSem);
+				return -EFAULT;
+			}
+		}
+	}
+
+	err = op->rc;
+
+	/* Free the buffer */
+	pci_free_consistent(iSeries_vio_dev, count, op->buffer, op->dmaaddr);
+	count = op->count;
+
+	freeOpStruct(op);
+	up(&reqSem);
+	if (err)
+		return tapeRcToErrno(err, "read", devi.devno);
+	else
+		return count;
+}
+
+/* read
+ */
+static int viotap_ioctl(struct inode *inode, struct file *file,
+			unsigned int cmd, unsigned long arg)
+{
+	HvLpEvent_Rc hvrc;
+	int err;
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	kdev_t dev = file->f_dentry->d_inode->i_rdev;
+	struct opStruct *op = getOpStruct();
+	struct viot_devinfo_struct devi;
+	if (op == NULL)
+		return -ENOMEM;
+
+	getDevInfo(dev, &devi);
+
+	down(&reqSem);
+
+	switch (cmd) {
+	case MTIOCTOP:{
+			struct mtop mtc;
+			u32 myOp;
+
+			/* inode is null if and only if we (the kernel) made the request */
+			if (inode == NULL)
+				memcpy(&mtc, (void *) arg,
+				       sizeof(struct mtop));
+			else if (copy_from_user
+				 ((char *) &mtc, (char *) arg,
+				  sizeof(struct mtop))) {
+				freeOpStruct(op);
+				up(&reqSem);
+				return -EFAULT;
+			}
+
+			switch (mtc.mt_op) {
+			case MTRESET:
+				myOp = VIOTAPOP_RESET;
+				break;
+			case MTFSF:
+				myOp = VIOTAPOP_FSF;
+				break;
+			case MTBSF:
+				myOp = VIOTAPOP_BSF;
+				break;
+			case MTFSR:
+				myOp = VIOTAPOP_FSR;
+				break;
+			case MTBSR:
+				myOp = VIOTAPOP_BSR;
+				break;
+			case MTWEOF:
+				myOp = VIOTAPOP_WEOF;
+				break;
+			case MTREW:
+				myOp = VIOTAPOP_REW;
+				break;
+			case MTNOP:
+				myOp = VIOTAPOP_NOP;
+				break;
+			case MTEOM:
+				myOp = VIOTAPOP_EOM;
+				break;
+			case MTERASE:
+				myOp = VIOTAPOP_ERASE;
+				break;
+			case MTSETBLK:
+				myOp = VIOTAPOP_SETBLK;
+				break;
+			case MTSETDENSITY:
+				myOp = VIOTAPOP_SETDENSITY;
+				break;
+			case MTTELL:
+				myOp = VIOTAPOP_GETPOS;
+				break;
+			case MTSEEK:
+				myOp = VIOTAPOP_SETPOS;
+				break;
+			case MTSETPART:
+				myOp = VIOTAPOP_SETPART;
+				break;
+			default:
+				return -EIO;
+			}
+
+/* if we moved the head, we are no longer reading or writing */
+			switch (mtc.mt_op) {
+			case MTFSF:
+			case MTBSF:
+			case MTFSR:
+			case MTBSR:
+			case MTTELL:
+			case MTSEEK:
+			case MTREW:
+				chg_state(devi.devno, VIOT_IDLE, file);
+			}
+
+			op->sem = &Semaphore;
+			hvrc =
+			    HvCallEvent_signalLpEventFast(viopath_hostLp,
+							  HvLpEvent_Type_VirtualIo,
+							  viomajorsubtype_tape
+							  | viotapeop,
+							  HvLpEvent_AckInd_DoAck,
+							  HvLpEvent_AckType_ImmediateAck,
+							  viopath_sourceinst
+							  (viopath_hostLp),
+							  viopath_targetinst
+							  (viopath_hostLp),
+							  (u64) (unsigned
+								 long) op,
+							  VIOVERSION << 16,
+							  ((u64) devi.
+							   devno << 48), 0,
+							  (((u64) myOp) <<
+							   32) | mtc.
+							  mt_count, 0);
+			if (hvrc != HvLpEvent_Rc_Good) {
+				printk("viotape hv error on op %d\n",
+				       (int) hvrc);
+				freeOpStruct(op);
+				up(&reqSem);
+				return -EIO;
+			}
+			down(&Semaphore);
+			if (op->rc) {
+				freeOpStruct(op);
+				up(&reqSem);
+				return tapeRcToErrno(op->rc,
+						     "tape operation",
+						     devi.devno);
+			} else {
+				freeOpStruct(op);
+				up(&reqSem);
+				return 0;
+			}
+			break;
+		}
+
+	case MTIOCGET:
+		op->sem = &Semaphore;
+		hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+						     HvLpEvent_Type_VirtualIo,
+						     viomajorsubtype_tape |
+						     viotapegetstatus,
+						     HvLpEvent_AckInd_DoAck,
+						     HvLpEvent_AckType_ImmediateAck,
+						     viopath_sourceinst
+						     (viopath_hostLp),
+						     viopath_targetinst
+						     (viopath_hostLp),
+						     (u64) (unsigned long)
+						     op, VIOVERSION << 16,
+						     ((u64) devi.
+						      devno << 48), 0, 0,
+						     0);
+		if (hvrc != HvLpEvent_Rc_Good) {
+			printk("viotape hv error on op %d\n", (int) hvrc);
+			freeOpStruct(op);
+			up(&reqSem);
+			return -EIO;
+		}
+		down(&Semaphore);
+		up(&reqSem);
+		if (op->rc) {
+			freeOpStruct(op);
+			return tapeRcToErrno(op->rc, "get status",
+					     devi.devno);
+		} else {
+			freeOpStruct(op);
+			err =
+			    copy_to_user((void *) arg, &viomtget[dev],
+					 sizeof(viomtget[0]));
+			if (err) {
+				freeOpStruct(op);
+				return -EFAULT;
+			}
+			return 0;
+		}
+		break;
+	case MTIOCPOS:
+		printk("Got an MTIOCPOS\n");
+	default:
+		return -ENOSYS;
+	}
+	return 0;
+}
+
+/* Open
+ */
+static int viotap_open(struct inode *inode, struct file *file)
+{
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	kdev_t dev = file->f_dentry->d_inode->i_rdev;
+	HvLpEvent_Rc hvrc;
+	struct opStruct *op = getOpStruct();
+	struct viot_devinfo_struct devi;
+	if (op == NULL)
+		return -ENOMEM;
+
+	getDevInfo(dev, &devi);
+
+// Note: We currently only support one mode!
+	if ((devi.devno >= viotape_numdev) || (devi.mode)) {
+		freeOpStruct(op);
+		return -ENODEV;
+	}
+
+	op->sem = &Semaphore;
+
+	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+					     HvLpEvent_Type_VirtualIo,
+					     viomajorsubtype_tape |
+					     viotapeopen,
+					     HvLpEvent_AckInd_DoAck,
+					     HvLpEvent_AckType_ImmediateAck,
+					     viopath_sourceinst
+					     (viopath_hostLp),
+					     viopath_targetinst
+					     (viopath_hostLp),
+					     (u64) (unsigned long) op,
+					     VIOVERSION << 16,
+					     ((u64) devi.devno << 48), 0,
+					     0, 0);
+
+
+	if (hvrc != 0) {
+		printk("viotape bad rc on signalLpEvent %d\n", (int) hvrc);
+		freeOpStruct(op);
+		return -EIO;
+	}
+
+	down(&Semaphore);
+
+	if (op->rc) {
+		freeOpStruct(op);
+		return tapeRcToErrno(op->rc, "open", devi.devno);
+	} else {
+		freeOpStruct(op);
+		MOD_INC_USE_COUNT;
+		return 0;
+	}
+}
+
+
+/* Release
+ */
+static int viotap_release(struct inode *inode, struct file *file)
+{
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	kdev_t dev = file->f_dentry->d_inode->i_rdev;
+	HvLpEvent_Rc hvrc;
+	struct viot_devinfo_struct devi;
+	struct opStruct *op = getOpStruct();
+
+	if (op == NULL)
+		return -ENOMEM;
+	op->sem = &Semaphore;
+
+	getDevInfo(dev, &devi);
+
+	if (devi.devno >= viotape_numdev) {
+		freeOpStruct(op);
+		return -ENODEV;
+	}
+
+	chg_state(devi.devno, VIOT_IDLE, file);
+
+	if (devi.rewind) {
+		hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+						     HvLpEvent_Type_VirtualIo,
+						     viomajorsubtype_tape |
+						     viotapeop,
+						     HvLpEvent_AckInd_DoAck,
+						     HvLpEvent_AckType_ImmediateAck,
+						     viopath_sourceinst
+						     (viopath_hostLp),
+						     viopath_targetinst
+						     (viopath_hostLp),
+						     (u64) (unsigned long)
+						     op, VIOVERSION << 16,
+						     ((u64) devi.
+						      devno << 48), 0,
+						     ((u64) VIOTAPOP_REW)
+						     << 32, 0);
+		down(&Semaphore);
+
+		if (op->rc) {
+			tapeRcToErrno(op->rc, "rewind", devi.devno);
+		}
+	}
+
+	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
+					     HvLpEvent_Type_VirtualIo,
+					     viomajorsubtype_tape |
+					     viotapeclose,
+					     HvLpEvent_AckInd_DoAck,
+					     HvLpEvent_AckType_ImmediateAck,
+					     viopath_sourceinst
+					     (viopath_hostLp),
+					     viopath_targetinst
+					     (viopath_hostLp),
+					     (u64) (unsigned long) op,
+					     VIOVERSION << 16,
+					     ((u64) devi.devno << 48), 0,
+					     0, 0);
+
+
+	if (hvrc != 0) {
+		printk("viotape: bad rc on signalLpEvent %d\n",
+		       (int) hvrc);
+		return -EIO;
+	}
+
+	down(&Semaphore);
+
+	if (op->rc) {
+		printk("viotape: close failed\n");
+	}
+	MOD_DEC_USE_COUNT;
+	return 0;
+}
+
+struct file_operations viotap_fops = {
+	owner:THIS_MODULE,
+	read:viotap_read,
+	write:viotap_write,
+	ioctl:viotap_ioctl,
+	open:viotap_open,
+	release:viotap_release,
+};
+
+/* Handle interrupt events for tape
+ */
+static void vioHandleTapeEvent(struct HvLpEvent *event)
+{
+	int tapeminor;
+	struct opStruct *op;
+	struct viotapelpevent *tevent = (struct viotapelpevent *) event;
+
+	if (event == NULL) {
+	  /* Notification that a partition went away! */
+	  if (!viopath_isactive(viopath_hostLp)) {
+	    /* TODO! Clean up */
+	  }
+	  return;
+	}
+
+	tapeminor = event->xSubtype & VIOMINOR_SUBTYPE_MASK;
+	switch (tapeminor) {
+	case viotapegetinfo:
+	case viotapeopen:
+	case viotapeclose:
+		op = (struct opStruct *) (unsigned long) event->
+		    xCorrelationToken;
+		op->rc = tevent->mSubTypeRc;
+		up(op->sem);
+		break;
+	case viotaperead:
+	case viotapewrite:
+		op = (struct opStruct *) (unsigned long) event->
+		    xCorrelationToken;
+		op->rc = tevent->mSubTypeRc;
+		op->count = tevent->mLen;
+
+		if (op->sem) {
+			up(op->sem);
+		} else {
+			freeOpStruct(op);
+			up(&reqSem);
+		}
+		break;
+	case viotapeop:
+	case viotapegetpos:
+	case viotapesetpos:
+	case viotapegetstatus:
+		op = (struct opStruct *) (unsigned long) event->
+		    xCorrelationToken;
+		if (op) {
+			op->count = tevent->u.tapeOp.mCount;
+			op->rc = tevent->mSubTypeRc;
+
+			if (op->sem) {
+				up(op->sem);
+			}
+		}
+		break;
+	default:
+		printk("viotape: wierd ack\n");
+	}
+}
+
+
+/* Do initialization
+ */
+int __init viotap_init(void)
+{
+	DECLARE_MUTEX_LOCKED(Semaphore);
+	int rc;
+	char tapename[32];
+	int i;
+
+	printk("viotape driver version %d.%d\n", version_major,
+	       version_minor);
+
+	sndMsgSeq = sndMsgAck = 0;
+	rcvMsgSeq = rcvMsgAck = 0;
+	opStructList = NULL;
+	spin_lock_init(&opStructListLock);
+
+	sema_init(&reqSem, VIOTAPE_MAXREQ);
+
+	if (viopath_hostLp == HvLpIndexInvalid)
+		vio_set_hostlp();
+
+	/*
+	 * Open to our hosting lp
+	 */
+	if (viopath_hostLp == HvLpIndexInvalid)
+		return -1;
+
+	printk("viotape: init - open path to hosting (%d)\n",
+	       viopath_hostLp);
+
+	rc = viopath_open(viopath_hostLp, viomajorsubtype_tape, VIOTAPE_MAXREQ + 2);
+	if (rc) {
+		printk("viotape: error on viopath_open to hostlp %d\n",
+		       rc);
+	}
+
+	vio_setHandler(viomajorsubtype_tape, vioHandleTapeEvent);
+
+	printk("viotape major is %d\n", viotape_major);
+
+	get_viotape_info();
+
+	if (devfs_register_chrdev(viotape_major, "viotape", &viotap_fops)) {
+		printk("Error registering viotape device\n");
+		return -1;
+	}
+
+	for (i = 0; i < viotape_numdev; i++) {
+		int j;
+		state[i].cur_part = 0;
+		for (j = 0; j < MAX_PARTITIONS; ++j)
+			state[i].part_stat[j].rwi = VIOT_IDLE;
+		sprintf(tapename, "viotape%d", i);
+		state[i].dev_handle =
+		    devfs_register(NULL, tapename, DEVFS_FL_DEFAULT,
+				   viotape_major, i,
+				   S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP |
+				   S_IWGRP, &viotap_fops, NULL);
+		printk
+		    ("viotape device %s is iSeries resource %10.10s type %4.4s, model %3.3s\n",
+		     tapename, viotape_unitinfo[i].rsrcname,
+		     viotape_unitinfo[i].type, viotape_unitinfo[i].model);
+	}
+
+	/* 
+	 * Create the proc entry
+	 */
+	iSeries_proc_callback(&viotape_proc_init);
+
+	return 0;
+}
+
+/* Give a new state to the tape object
+ */
+static int chg_state(int index, unsigned char new_state, struct file *file)
+{
+	unsigned char *cur_state =
+	    &state[index].part_stat[state[index].cur_part].rwi;
+	int rc = 0;
+
+	/* if the same state, don't bother */
+	if (*cur_state == new_state)
+		return 0;
+
+	/* write an EOF if changing from writing to some other state */
+	if (*cur_state == VIOT_WRITING) {
+		struct mtop write_eof = { MTWEOF, 1 };
+		rc = viotap_ioctl(NULL, file, MTIOCTOP,
+				  (unsigned long) &write_eof);
+	}
+	*cur_state = new_state;
+	return rc;
+}
+
+/* Cleanup
+ */
+static void __exit viotap_exit(void)
+{
+	int i, ret;
+	for (i = 0; i < viotape_numdev; ++i)
+		devfs_unregister(state[i].dev_handle);
+	ret = devfs_unregister_chrdev(viotape_major, "viotape");
+	if (ret < 0)
+		printk("Error unregistering device: %d\n", ret);
+	iSeries_proc_callback(&viotape_proc_delete);
+	if (viotape_unitinfo != NULL) {
+		kfree(viotape_unitinfo);
+		viotape_unitinfo = NULL;
+	}
+	viopath_close(viopath_hostLp, viomajorsubtype_tape, VIOTAPE_MAXREQ + 2);
+	vio_clearHandler(viomajorsubtype_tape);
+}
+
+MODULE_LICENSE("GPL");
+module_init(viotap_init);
+module_exit(viotap_exit);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/message/fusion/mptctl.h linuxppc64_2_4/drivers/message/fusion/mptctl.h
--- ../kernel.org/linux-2.4.19/drivers/message/fusion/mptctl.h	Fri Apr 19 11:00:35 2002
+++ linuxppc64_2_4/drivers/message/fusion/mptctl.h	Mon Apr 22 14:12:28 2002
@@ -20,7 +20,7 @@
  *  (mailto:sjralston1@netscape.net)
  *  (mailto:Pam.Delaney@lsil.com)
  *
- *  $Id: mptctl.h,v 1.9 2002/02/27 18:44:26 sralston Exp $
+ *  $Id: mptctl.h,v 1.1 2002/04/22 19:12:28 tgall Exp $
  */
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /*
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/mtd/maps/amd766rom.c linuxppc64_2_4/drivers/mtd/maps/amd766rom.c
--- ../kernel.org/linux-2.4.19/drivers/mtd/maps/amd766rom.c	Fri Apr 19 11:00:36 2002
+++ linuxppc64_2_4/drivers/mtd/maps/amd766rom.c	Mon Apr 22 14:12:28 2002
@@ -2,7 +2,7 @@
  * amd766rom.c
  *
  * Normal mappings of chips in physical memory
- * $Id: amd766rom.c,v 1.1 2002/01/10 22:59:13 eric Exp $
+ * $Id: amd766rom.c,v 1.1 2002/04/22 19:12:28 tgall Exp $
  */
 
 #include <linux/module.h>
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/mtd/maps/autcpu12-nvram.c linuxppc64_2_4/drivers/mtd/maps/autcpu12-nvram.c
--- ../kernel.org/linux-2.4.19/drivers/mtd/maps/autcpu12-nvram.c	Fri Apr 19 11:00:36 2002
+++ linuxppc64_2_4/drivers/mtd/maps/autcpu12-nvram.c	Mon Apr 22 14:12:28 2002
@@ -2,7 +2,7 @@
  * NV-RAM memory access on autcpu12 
  * (C) 2002 Thomas Gleixner (gleixner@autronix.de)
  *
- * $Id: autcpu12-nvram.c,v 1.1 2002/02/22 09:30:24 gleixner Exp $ 
+ * $Id: autcpu12-nvram.c,v 1.1 2002/04/22 19:12:28 tgall Exp $ 
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/mtd/maps/dilnetpc.c linuxppc64_2_4/drivers/mtd/maps/dilnetpc.c
--- ../kernel.org/linux-2.4.19/drivers/mtd/maps/dilnetpc.c	Fri Apr 19 11:00:36 2002
+++ linuxppc64_2_4/drivers/mtd/maps/dilnetpc.c	Mon Apr 22 14:12:28 2002
@@ -14,7 +14,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  *
- * $Id: dilnetpc.c,v 1.8 2002/03/12 13:07:26 rkaiser Exp $
+ * $Id: dilnetpc.c,v 1.1 2002/04/22 19:12:28 tgall Exp $
  *
  * The DIL/Net PC is a tiny embedded PC board made by SSV Embedded Systems
  * featuring the AMD Elan SC410 processor. There are two variants of this
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/mtd/maps/epxa10db-flash.c linuxppc64_2_4/drivers/mtd/maps/epxa10db-flash.c
--- ../kernel.org/linux-2.4.19/drivers/mtd/maps/epxa10db-flash.c	Fri Apr 19 11:00:36 2002
+++ linuxppc64_2_4/drivers/mtd/maps/epxa10db-flash.c	Mon Apr 22 14:12:28 2002
@@ -5,7 +5,7 @@
  *  Copyright (C) 2001 Altera Corporation
  *  Copyright (C) 2001 Red Hat, Inc.
  *
- * $Id: epxa10db-flash.c,v 1.2 2001/12/19 13:00:19 jskov Exp $ 
+ * $Id: epxa10db-flash.c,v 1.1 2002/04/22 19:12:28 tgall Exp $ 
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/mtd/maps/ich2rom.c linuxppc64_2_4/drivers/mtd/maps/ich2rom.c
--- ../kernel.org/linux-2.4.19/drivers/mtd/maps/ich2rom.c	Fri Apr 19 11:00:36 2002
+++ linuxppc64_2_4/drivers/mtd/maps/ich2rom.c	Mon Apr 22 14:12:28 2002
@@ -2,7 +2,7 @@
  * ich2rom.c
  *
  * Normal mappings of chips in physical memory
- * $Id: ich2rom.c,v 1.1 2002/01/10 22:59:13 eric Exp $
+ * $Id: ich2rom.c,v 1.1 2002/04/22 19:12:28 tgall Exp $
  */
 
 #include <linux/module.h>
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/mtd/maps/mbx860.c linuxppc64_2_4/drivers/mtd/maps/mbx860.c
--- ../kernel.org/linux-2.4.19/drivers/mtd/maps/mbx860.c	Fri Apr 19 11:00:36 2002
+++ linuxppc64_2_4/drivers/mtd/maps/mbx860.c	Mon Apr 22 14:12:28 2002
@@ -1,5 +1,5 @@
 /*
- * $Id: mbx860.c,v 1.1 2001/11/18 19:43:09 dwmw2 Exp $
+ * $Id: mbx860.c,v 1.1 2002/04/22 19:12:28 tgall Exp $
  *
  * Handle mapping of the flash on MBX860 boards
  *
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/mtd/maps/pb1xxx-flash.c linuxppc64_2_4/drivers/mtd/maps/pb1xxx-flash.c
--- ../kernel.org/linux-2.4.19/drivers/mtd/maps/pb1xxx-flash.c	Fri Apr 19 11:00:36 2002
+++ linuxppc64_2_4/drivers/mtd/maps/pb1xxx-flash.c	Mon Apr 22 13:25:42 2002
@@ -3,7 +3,7 @@
  * 
  * (C) 2001 Pete Popov <ppopov@mvista.com>
  * 
- * $Id: pb1xxx-flash.c,v 1.2 2002/02/14 19:36:45 ppopov Exp $
+ * $Id: pb1xxx-flash.c,v 1.1 2002/04/22 18:25:42 tgall Exp $
  */
 
 #include <linux/config.h>
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/mtd/maps/pci.c linuxppc64_2_4/drivers/mtd/maps/pci.c
--- ../kernel.org/linux-2.4.19/drivers/mtd/maps/pci.c	Fri Apr 19 11:00:36 2002
+++ linuxppc64_2_4/drivers/mtd/maps/pci.c	Mon Apr 22 14:12:28 2002
@@ -7,7 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- *  $Id: pci.c,v 1.1 2001/09/27 20:28:45 rmk Exp $
+ *  $Id: pci.c,v 1.1 2002/04/22 19:12:28 tgall Exp $
  * 
  * Generic PCI memory map driver.  We support the following boards:
  *  - Intel IQ80310 ATU.
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/mtd/maps/tsunami_flash.c linuxppc64_2_4/drivers/mtd/maps/tsunami_flash.c
--- ../kernel.org/linux-2.4.19/drivers/mtd/maps/tsunami_flash.c	Fri Apr 19 11:00:36 2002
+++ linuxppc64_2_4/drivers/mtd/maps/tsunami_flash.c	Mon Apr 22 14:12:28 2002
@@ -2,7 +2,7 @@
  * tsunami_flash.c
  *
  * flash chip on alpha ds10...
- * $Id: tsunami_flash.c,v 1.1 2002/01/10 22:59:13 eric Exp $
+ * $Id: tsunami_flash.c,v 1.1 2002/04/22 19:12:28 tgall Exp $
  */
 #include <asm/io.h>
 #include <asm/core_tsunami.h>
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/mtd/mtdconcat.c linuxppc64_2_4/drivers/mtd/mtdconcat.c
--- ../kernel.org/linux-2.4.19/drivers/mtd/mtdconcat.c	Fri Apr 19 11:00:36 2002
+++ linuxppc64_2_4/drivers/mtd/mtdconcat.c	Mon Apr 22 14:12:28 2002
@@ -5,7 +5,7 @@
  *
  * This code is GPL
  *
- * $Id: mtdconcat.c,v 1.2 2002/03/22 08:45:22 dwmw2 Exp $
+ * $Id: mtdconcat.c,v 1.1 2002/04/22 19:12:28 tgall Exp $
  */
 
 #include <linux/module.h>
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/net/Config.in linuxppc64_2_4/drivers/net/Config.in
--- ../kernel.org/linux-2.4.19/drivers/net/Config.in	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/net/Config.in	Tue Apr 23 09:37:28 2002
@@ -250,10 +250,6 @@
 
 endmenu
 
-if [ "$CONFIG_PPC_ISERIES" = "y" ]; then
-   dep_tristate 'iSeries Virtual Ethernet driver support' CONFIG_VETH $CONFIG_PPC_ISERIES
-fi
-
 bool 'FDDI driver support' CONFIG_FDDI
 if [ "$CONFIG_FDDI" = "y" ]; then
    if [ "$CONFIG_PCI" = "y" -o "$CONFIG_EISA" = "y" ]; then
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/net/Makefile linuxppc64_2_4/drivers/net/Makefile
--- ../kernel.org/linux-2.4.19/drivers/net/Makefile	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/net/Makefile	Tue Apr 23 09:37:28 2002
@@ -73,7 +73,6 @@
 obj-$(CONFIG_DM9102) += dmfe.o
 obj-$(CONFIG_YELLOWFIN) += yellowfin.o
 obj-$(CONFIG_ACENIC) += acenic.o
-obj-$(CONFIG_VETH) += veth.o
 obj-$(CONFIG_NATSEMI) += natsemi.o
 obj-$(CONFIG_NS83820) += ns83820.o
 obj-$(CONFIG_STNIC) += stnic.o 8390.o
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/net/Space.c linuxppc64_2_4/drivers/net/Space.c
--- ../kernel.org/linux-2.4.19/drivers/net/Space.c	Fri Apr 19 11:00:20 2002
+++ linuxppc64_2_4/drivers/net/Space.c	Mon Apr 22 10:32:58 2002
@@ -541,9 +541,10 @@
 
 
 
-#ifdef CONFIG_TR
+#if 0 /* ifdef CONFIG_TR */
 /* Token-ring device probe */
 extern int ibmtr_probe(struct net_device *);
+extern int olympic_probe(struct net_device *);
 extern int smctr_probe(struct net_device *);
 
 static int
@@ -552,6 +553,9 @@
     if (1
 #ifdef CONFIG_IBMTR
 	&& ibmtr_probe(dev)
+#endif
+#ifdef CONFIG_IBMOL
+	&& olympic_probe(dev)
 #endif
 #ifdef CONFIG_SMCTR
 	&& smctr_probe(dev)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/net/acenic.c linuxppc64_2_4/drivers/net/acenic.c
--- ../kernel.org/linux-2.4.19/drivers/net/acenic.c	Fri Apr 19 11:00:36 2002
+++ linuxppc64_2_4/drivers/net/acenic.c	Mon Apr 22 10:32:58 2002
@@ -1917,6 +1917,7 @@
 	atomic_add(i, &ap->cur_rx_bufs);
 	ap->rx_std_skbprd = idx;
 
+        mb();  // DRENG
 	if (ACE_IS_TIGON_I(ap)) {
 		struct cmd cmd;
 		cmd.evt = C_SET_RX_PRD_IDX;
@@ -2289,7 +2290,7 @@
 		writel(idx, &regs->RxRetCsm);
 	}
 	ap->cur_rx = idx;
-
+        mb();  // DRENG
 	return;
  error:
 	idx = rxretprd;
@@ -2815,6 +2816,7 @@
 
  	wmb();
  	ap->tx_prd = idx;
+        mb();  // DRENG prd must be visible before telling HW to advance
  	ace_set_txprd(regs, ap, idx);
 
 	if (flagsize & BD_FLG_COAL_NOW) {
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/net/pcnet32.c linuxppc64_2_4/drivers/net/pcnet32.c
--- ../kernel.org/linux-2.4.19/drivers/net/pcnet32.c	Fri Apr 19 11:00:36 2002
+++ linuxppc64_2_4/drivers/net/pcnet32.c	Mon Apr 22 10:33:07 2002
@@ -55,6 +55,8 @@
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 
+#define DO_DXSUFLO
+
 /*
  * PCI device identifiers for "new style" Linux PCI Device Drivers
  */
@@ -221,8 +223,8 @@
  * That translates to 2 (4 == 2^^2) and 4 (16 == 2^^4).
  */
 #ifndef PCNET32_LOG_TX_BUFFERS
-#define PCNET32_LOG_TX_BUFFERS 4
-#define PCNET32_LOG_RX_BUFFERS 5
+#define PCNET32_LOG_TX_BUFFERS 6
+#define PCNET32_LOG_RX_BUFFERS 7
 #endif
 
 #define TX_RING_SIZE		(1 << (PCNET32_LOG_TX_BUFFERS))
@@ -233,7 +235,7 @@
 #define RX_RING_MOD_MASK	(RX_RING_SIZE - 1)
 #define RX_RING_LEN_BITS	((PCNET32_LOG_RX_BUFFERS) << 4)
 
-#define PKT_BUF_SZ		1544
+#define PKT_BUF_SZ             2048
 
 /* Offsets from base I/O address. */
 #define PCNET32_WIO_RDP		0x10
@@ -294,34 +296,34 @@
  */
 struct pcnet32_private {
     /* The Tx and Rx ring entries must be aligned on 16-byte boundaries in 32bit mode. */
-    struct pcnet32_rx_head    rx_ring[RX_RING_SIZE];
-    struct pcnet32_tx_head    tx_ring[TX_RING_SIZE];
-    struct pcnet32_init_block init_block;
-    dma_addr_t 		dma_addr;	/* DMA address of beginning of this object, 
-					   returned by pci_alloc_consistent */
-    struct pci_dev	*pci_dev;	/* Pointer to the associated pci device structure */
-    const char		*name;
+    struct pcnet32_rx_head   rx_ring[RX_RING_SIZE];
+    struct pcnet32_tx_head   tx_ring[TX_RING_SIZE];
+    struct pcnet32_init_block	init_block;
+    dma_addr_t dma_addr;		/* DMA address of beginning of this object, 
+                                    returned by pci_alloc_consistent */
+    struct pci_dev *pci_dev;		/* Pointer to the associated pci device structure */
+    const char *name;
     /* The saved address of a sent-in-place packet/buffer, for skfree(). */
-    struct sk_buff	*tx_skbuff[TX_RING_SIZE];
-    struct sk_buff	*rx_skbuff[RX_RING_SIZE];
-    dma_addr_t		tx_dma_addr[TX_RING_SIZE];
-    dma_addr_t		rx_dma_addr[RX_RING_SIZE];
+    struct sk_buff *tx_skbuff[TX_RING_SIZE];
+    struct sk_buff *rx_skbuff[RX_RING_SIZE];
+    dma_addr_t tx_dma_addr[TX_RING_SIZE];
+    dma_addr_t rx_dma_addr[RX_RING_SIZE];
     struct pcnet32_access a;
-    spinlock_t		lock;		/* Guard lock */
-    unsigned int	cur_rx, cur_tx;	/* The next free ring entry */
-    unsigned int	dirty_rx, dirty_tx; /* The ring entries to be free()ed. */
+    spinlock_t lock;					/* Guard lock */
+    unsigned int cur_rx, cur_tx;		/* The next free ring entry */
+    unsigned int dirty_rx, dirty_tx;	/* The ring entries to be free()ed. */
     struct net_device_stats stats;
-    char		tx_full;
-    int			options;
-    int	shared_irq:1,			/* shared irq possible */
-	ltint:1,			/* enable TxDone-intr inhibitor */
-	dxsuflo:1,			/* disable transmit stop on uflo */
-	mii:1;				/* mii port available */
-    struct net_device	*next;
+    char tx_full;
+    int	 options;
+    int	 shared_irq:1,			/* shared irq possible */
+	ltint:1,
+	dxsuflo:1,                      /* disable transmit stop on uflo */
+	mii:1;                          /* mii port available */
+    struct net_device *next;
     struct mii_if_info mii_if;
 };
 
-static void pcnet32_probe_vlbus(void);
+static void  pcnet32_probe_vlbus(void);
 static int  pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *);
 static int  pcnet32_probe1(unsigned long, unsigned int, int, struct pci_dev *);
 static int  pcnet32_open(struct net_device *);
@@ -342,7 +344,6 @@
     PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3,
 };
 
-
 static u16 pcnet32_wio_read_csr (unsigned long addr, int index)
 {
     outw (index, addr+PCNET32_WIO_RAP);
@@ -444,15 +445,15 @@
 }
 
 static struct pcnet32_access pcnet32_dwio = {
-    read_csr:	pcnet32_dwio_read_csr,
-    write_csr:	pcnet32_dwio_write_csr,
-    read_bcr:	pcnet32_dwio_read_bcr,
-    write_bcr:	pcnet32_dwio_write_bcr,
-    read_rap:	pcnet32_dwio_read_rap,
-    write_rap:	pcnet32_dwio_write_rap,
+    read_csr: pcnet32_dwio_read_csr,
+    write_csr:  pcnet32_dwio_write_csr,
+    read_bcr: pcnet32_dwio_read_bcr,
+    write_bcr: pcnet32_dwio_write_bcr,
+    read_rap: pcnet32_dwio_read_rap,
+    write_rap: pcnet32_dwio_write_rap,
     reset:	pcnet32_dwio_reset
-};
 
+};
 
 
 /* only probes for non-PCI devices, the rest are handled by 
@@ -461,40 +462,44 @@
 static void __devinit
 pcnet32_probe_vlbus(void)
 {
-    unsigned int *port, ioaddr;
+    unsigned long ioaddr = 0; // FIXME dev ? dev->base_addr: 0;
+    int *port;
+    
+    printk(KERN_INFO "pcnet32_probe_vlbus: cards_found=%d\n", cards_found);
     
-    /* search for PCnet32 VLB cards at known addresses */
+    /* now look for PCnet32 VLB cards */
     for (port = pcnet32_portlist; (ioaddr = *port); port++) {
 	if (!check_region(ioaddr, PCNET32_TOTAL_SIZE)) {
 	    /* check if there is really a pcnet chip on that ioaddr */
-	    if ((inb(ioaddr + 14) == 0x57) && (inb(ioaddr + 15) == 0x57))
+	    if ((inb(ioaddr + 14) == 0x57) && (inb(ioaddr + 15) == 0x57)) 
 		pcnet32_probe1(ioaddr, 0, 0, NULL);
 	}
     }
 }
 
 
+
 static int __devinit
 pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
     unsigned long ioaddr;
-    int err;
+    int err = 0;
 
     err = pci_enable_device(pdev);
     if (err < 0) {
-	printk(KERN_ERR PFX "failed to enable device -- err=%d\n", err);
-	return err;
+      printk(KERN_ERR PFX "failed to enable device -- err=%d\n", err);
+      return err;
     }
     pci_set_master(pdev);
 
     ioaddr = pci_resource_start (pdev, 0);
     if (!ioaddr) {
-        printk (KERN_ERR PFX "card has no PCI IO resources, aborting\n");
+        printk (KERN_ERR "card has no PCI IO resources, aborting\n");
         return -ENODEV;
     }
-    
+	
     if (!pci_dma_supported(pdev, PCNET32_DMA_MASK)) {
-	printk(KERN_ERR PFX "architecture does not support 32bit PCI busmaster DMA\n");
+	printk(KERN_ERR "pcnet32.c: architecture does not support 32bit PCI busmaster DMA\n");
 	return -ENODEV;
     }
 
@@ -508,13 +513,13 @@
  */
 static int __devinit
 pcnet32_probe1(unsigned long ioaddr, unsigned int irq_line, int shared,
-		struct pci_dev *pdev)
+              struct pci_dev *pdev)
 {
     struct pcnet32_private *lp;
     struct resource *res;
     dma_addr_t lp_dma_addr;
-    int i, media;
-    int fdx, mii, fset, dxsuflo, ltint;
+    int i,media;
+    int fdx = 0, mii = 0, fset = 0, dxsuflo=0, ltint=0;
     int chip_version;
     char *chipname;
     struct net_device *dev;
@@ -522,25 +527,27 @@
     u8 promaddr[6];
 
     /* reset the chip */
+    pcnet32_dwio_reset(ioaddr);
+	udelay (100);
     pcnet32_wio_reset(ioaddr);
 
-    /* NOTE: 16-bit check is first, otherwise some older PCnet chips fail */
-    if (pcnet32_wio_read_csr(ioaddr, 0) == 4 && pcnet32_wio_check(ioaddr)) {
-	a = &pcnet32_wio;
+    /* Important to do the check for dwio mode first. */
+    if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 && pcnet32_dwio_check(ioaddr)) {
+        a = &pcnet32_dwio;
     } else {
-	pcnet32_dwio_reset(ioaddr);
-	if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 && pcnet32_dwio_check(ioaddr)) {
-	    a = &pcnet32_dwio;
+        if (pcnet32_wio_read_csr(ioaddr, 0) == 4 && 
+	    pcnet32_wio_check(ioaddr)) {
+	    a = &pcnet32_wio;
 	} else
 	    return -ENODEV;
     }
 
-    chip_version = a->read_csr(ioaddr, 88) | (a->read_csr(ioaddr,89) << 16);
+    chip_version = a->read_csr(ioaddr, 88) | (a->read_csr (ioaddr,89) << 16);
     if (pcnet32_debug > 2)
 	printk(KERN_INFO "  PCnet chip version is %#x.\n", chip_version);
     if ((chip_version & 0xfff) != 0x003)
 	return -ENODEV;
-    
+	
     /* initialize variables */
     fdx = mii = fset = dxsuflo = ltint = 0;
     chip_version = (chip_version >> 12) & 0xffff;
@@ -610,20 +617,27 @@
      *	one for latency - although on PCI this isnt a big loss. Older chips 
      *	have FIFO's smaller than a packet, so you can't do this.
      */
-	 
+    /*
+     * UPDATE
+     * Got to make sure that BCR18:MEMCMD, BCR18:BREADE, BCR18:BWRITE are
+     * set on a PCI
+     */
     if(fset)
     {
-	a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0800));
-	a->write_csr(ioaddr, 80, (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00);
-	dxsuflo = 1;
-	ltint = 1;
+    a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0xA60));
+    a->write_csr(ioaddr, 3, 0x2eb7);
+    a->write_csr(ioaddr, 4, 0x32ea);
+    a->write_csr(ioaddr, 80, 0x3f00);
+	
+    dxsuflo = 1;
+    ltint = 1;
     }
     
     dev = alloc_etherdev(0);
     if(!dev)
 	return -ENOMEM;
 
-    printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr);
+    printk(KERN_INFO "%s at %#3lx,", chipname, ioaddr);
 
     /* In most chips, after a chip reset, the ethernet address is read from the
      * station address PROM at the base address and programmed into the
@@ -632,6 +646,7 @@
      * they disagree with the CSRs.  Either way, we use the CSR values, and
      * double check that they are valid.
      */
+#ifndef CONFIG_PPC
     for (i = 0; i < 3; i++) {
 	unsigned int val;
 	val = a->read_csr(ioaddr, i+12) & 0x0ffff;
@@ -639,28 +654,29 @@
 	dev->dev_addr[2*i] = val & 0x0ff;
 	dev->dev_addr[2*i+1] = (val >> 8) & 0x0ff;
     }
+#endif
 
     /* read PROM address and compare with CSR address */
-    for (i = 0; i < 6; i++)
+    for (i = 0; i < 6; i++) {
 	promaddr[i] = inb(ioaddr + i);
-    
+
     if( memcmp( promaddr, dev->dev_addr, 6)
-	|| !is_valid_ether_addr(dev->dev_addr) ) {
-#ifndef __powerpc__
+      || !is_valid_ether_addr(dev->dev_addr) ) {
+#ifndef __powerpc__ 
 	if( is_valid_ether_addr(promaddr) ){
 #else
-	if( !is_valid_ether_addr(dev->dev_addr)
-	    && is_valid_ether_addr(promaddr)) {
+	if (!is_valid_ether_addr(dev->dev_addr)
+		&& is_valid_ether_addr(promaddr)) {
 #endif
-	    printk(" warning: CSR address invalid,\n");
-	    printk(KERN_INFO "    using instead PROM address of");
-	    memcpy(dev->dev_addr, promaddr, 6);
+		printk(" warning: CSR address invalid,\n");
+		printk(KERN_INFO " using instead PROM address of");
+		memcpy(dev->dev_addr, promaddr, 6);
 	}
-    }
+    }	    	    
 
     /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */
     if( !is_valid_ether_addr(dev->dev_addr) )
-	memset(dev->dev_addr, 0, sizeof(dev->dev_addr));
+	memset(dev->dev_addr, 0, sizeof(dev->dev_addr));	
 
     for (i = 0; i < 6; i++)
 	printk(" %2.2x", dev->dev_addr[i] );
@@ -800,6 +816,7 @@
     cards_found++;
     return 0;
 }
+}
 
 
 static int
@@ -893,7 +910,7 @@
     lp->init_block.filter[1] = 0x00000000;
     if (pcnet32_init_ring(dev))
 	return -ENOMEM;
-    
+
     /* Re-initialize the PCNET32, and start it when done. */
     lp->a.write_csr (ioaddr, 1, (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) &0xffff);
     lp->a.write_csr (ioaddr, 2, (lp->dma_addr + offsetof(struct pcnet32_private, init_block)) >> 16);
@@ -975,7 +992,10 @@
 	    }
 	    skb_reserve (rx_skbuff, 2);
 	}
-        lp->rx_dma_addr[i] = pci_map_single(lp->pci_dev, rx_skbuff->tail, rx_skbuff->len, PCI_DMA_FROMDEVICE);
+
+	if (lp->rx_dma_addr[i] == NULL) 
+		lp->rx_dma_addr[i] = pci_map_single(lp->pci_dev, rx_skbuff->tail, PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE);
+
 	lp->rx_ring[i].base = (u32)le32_to_cpu(lp->rx_dma_addr[i]);
 	lp->rx_ring[i].buf_length = le16_to_cpu(-PKT_BUF_SZ);
 	lp->rx_ring[i].status = le16_to_cpu(0x8000);
@@ -1010,7 +1030,7 @@
     /* ReInit Ring */
     lp->a.write_csr (ioaddr, 0, 1);
     i = 0;
-    while (i++ < 100)
+    while (1)
 	if (lp->a.read_csr (ioaddr, 0) & 0x0100)
 	    break;
 
@@ -1024,10 +1044,10 @@
     struct pcnet32_private *lp = dev->priv;
     unsigned long ioaddr = dev->base_addr, flags;
 
-    spin_lock_irqsave(&lp->lock, flags);
+        spin_lock_irqsave(&lp->lock, flags);
     /* Transmitter timeout, serious problems. */
 	printk(KERN_ERR "%s: transmit timed out, status %4.4x, resetting.\n",
-	       dev->name, lp->a.read_csr(ioaddr, 0));
+	       dev->name, lp->a.read_csr (ioaddr, 0));
 	lp->a.write_csr (ioaddr, 0, 0x0004);
 	lp->stats.tx_errors++;
 	if (pcnet32_debug > 2) {
@@ -1050,7 +1070,7 @@
 	dev->trans_start = jiffies;
 	netif_start_queue(dev);
 
-	spin_unlock_irqrestore(&lp->lock, flags);
+        spin_unlock_irqrestore(&lp->lock, flags);
 }
 
 
@@ -1065,7 +1085,7 @@
 
     if (pcnet32_debug > 3) {
 	printk(KERN_DEBUG "%s: pcnet32_start_xmit() called, csr0 %4.4x.\n",
-	       dev->name, lp->a.read_csr(ioaddr, 0));
+	       dev->name, lp->a.read_csr (ioaddr, 0));
     }
 
     spin_lock_irqsave(&lp->lock, flags);
@@ -1310,12 +1330,12 @@
 		    if ((newskb = dev_alloc_skb (PKT_BUF_SZ))) {
 			skb_reserve (newskb, 2);
 			skb = lp->rx_skbuff[entry];
-			pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[entry], skb->len, PCI_DMA_FROMDEVICE);
 			skb_put (skb, pkt_len);
 			lp->rx_skbuff[entry] = newskb;
 			newskb->dev = dev;
+			pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[entry], PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE);
                         lp->rx_dma_addr[entry] = 
-				pci_map_single(lp->pci_dev, newskb->tail,
+				pci_map_single(lp->pci_dev, newskb->tail, 
 					newskb->len, PCI_DMA_FROMDEVICE);
 			lp->rx_ring[entry].base = le32_to_cpu(lp->rx_dma_addr[entry]);
 			rx_in_place = 1;
@@ -1369,7 +1389,7 @@
 static int
 pcnet32_close(struct net_device *dev)
 {
-    unsigned long ioaddr = dev->base_addr;
+    unsigned long ioaddr = dev->base_addr, flags;
     struct pcnet32_private *lp = dev->priv;
     int i;
 
@@ -1390,13 +1410,23 @@
      */
     lp->a.write_bcr (ioaddr, 20, 4);
 
+    /*
+     *	FIXME: What happens if the bcr write is posted, the buffers are
+     *	freed and there is still incoming DMA traffic
+     */
+
+#warning "PCI posting bug"
+
     free_irq(dev->irq, dev);
-    
+   
+    /* Lock after free_irq to avoid deadlock with interrupt handler. */
+    spin_lock_irqsave(&lp->lock, flags);
+
     /* free all allocated skbuffs */
     for (i = 0; i < RX_RING_SIZE; i++) {
 	lp->rx_ring[i].status = 0;			    
 	if (lp->rx_skbuff[i]) {
-            pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], lp->rx_skbuff[i]->len, PCI_DMA_FROMDEVICE);
+            pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE);
 	    dev_kfree_skb(lp->rx_skbuff[i]);
         }
 	lp->rx_skbuff[i] = NULL;
@@ -1412,6 +1442,8 @@
         lp->tx_dma_addr[i] = 0;
     }
     
+    spin_unlock_irqrestore(&lp->lock, flags);
+
     MOD_DEC_USE_COUNT;
 
     return 0;
@@ -1505,13 +1537,13 @@
 
 	if (!lp->mii)
 		return 0;
-		
+
 	phyaddr = lp->a.read_bcr(ioaddr, 33);
 
-	lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f));
+	lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f)); 
 	val_out = lp->a.read_bcr(ioaddr, 34);
 	lp->a.write_bcr(ioaddr, 33, phyaddr);
-	
+
 	return val_out;
 }
 
@@ -1523,7 +1555,7 @@
 
 	if (!lp->mii)
 		return;
-		
+
 	phyaddr = lp->a.read_bcr(ioaddr, 33);
 
 	lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f));
@@ -1549,76 +1581,76 @@
 		return -EFAULT;
 
 	switch (ethcmd) {
-	case ETHTOOL_GDRVINFO: {
-		struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO };
-		strcpy (info.driver, DRV_NAME);
-		strcpy (info.version, DRV_VERSION);
-		if (lp->pci_dev)
-			strcpy (info.bus_info, lp->pci_dev->slot_name);
-		else
-			sprintf(info.bus_info, "VLB 0x%lx", dev->base_addr);
-		if (copy_to_user (useraddr, &info, sizeof (info)))
-			return -EFAULT;
-		return 0;
-	}
-
-	/* get settings */
-	case ETHTOOL_GSET: {
-		struct ethtool_cmd ecmd = { ETHTOOL_GSET };
-		spin_lock_irq(&lp->lock);
-		mii_ethtool_gset(&lp->mii_if, &ecmd);
-		spin_unlock_irq(&lp->lock);
-		if (copy_to_user(useraddr, &ecmd, sizeof(ecmd)))
-			return -EFAULT;
-		return 0;
-	}
-	/* set settings */
-	case ETHTOOL_SSET: {
-		int r;
-		struct ethtool_cmd ecmd;
-		if (copy_from_user(&ecmd, useraddr, sizeof(ecmd)))
-			return -EFAULT;
-		spin_lock_irq(&lp->lock);
-		r = mii_ethtool_sset(&lp->mii_if, &ecmd);
-		spin_unlock_irq(&lp->lock);
-		return r;
-	}
-	/* restart autonegotiation */
-	case ETHTOOL_NWAY_RST: {
-		return mii_nway_restart(&lp->mii_if);
-	}
-	/* get link status */
-	case ETHTOOL_GLINK: {
-		struct ethtool_value edata = {ETHTOOL_GLINK};
-		edata.data = mii_link_ok(&lp->mii_if);
-		if (copy_to_user(useraddr, &edata, sizeof(edata)))
+		case ETHTOOL_GDRVINFO: {
+			struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO };
+			strcpy (info.driver, DRV_NAME);
+			strcpy (info.version, DRV_VERSION);
+			if (lp->pci_dev)
+				strcpy (info.bus_info, lp->pci_dev->slot_name);
+			else
+				sprintf(info.bus_info, "VLB 0x%lx", dev->base_addr);
+			if (copy_to_user (useraddr, &info, sizeof (info)))
+				return -EFAULT;
+			return 0;
+		}
+		/* get settings */
+		case ETHTOOL_GSET: {
+			struct ethtool_cmd ecmd = { ETHTOOL_GSET };
+			spin_lock_irq(&lp->lock);
+			mii_ethtool_gset(&lp->mii_if, &ecmd);
+			spin_unlock_irq(&lp->lock);
+			if (copy_to_user(useraddr, &ecmd, sizeof(ecmd)))
+				return -EFAULT;
+			return 0;
+		}
+		/* set settings */
+		case ETHTOOL_SSET: {
+			int r;
+			struct ethtool_cmd ecmd;
+			if (copy_from_user(&ecmd, useraddr, sizeof(ecmd)))
+				return -EFAULT;
+			spin_lock_irq(&lp->lock);
+			r = mii_ethtool_sset(&lp->mii_if, &ecmd);
+			spin_unlock_irq(&lp->lock);
+			return r;
+		}
+		/* restart autonegotiation */
+		case ETHTOOL_NWAY_RST: {
+			return mii_nway_restart(&lp->mii_if);
+		}
+		/* get link status */
+		case ETHTOOL_GLINK: {
+			struct ethtool_value edata = {ETHTOOL_GLINK};
+			edata.data = mii_link_ok(&lp->mii_if);
+			if (copy_to_user(useraddr, &edata, sizeof(edata)))
 			return -EFAULT;
-		return 0;
-	}
+			return 0;
+		}
 
-	/* get message-level */
-	case ETHTOOL_GMSGLVL: {
-		struct ethtool_value edata = {ETHTOOL_GMSGLVL};
-		edata.data = pcnet32_debug;
-		if (copy_to_user(useraddr, &edata, sizeof(edata)))
-			return -EFAULT;
-		return 0;
-	}
-	/* set message-level */
-	case ETHTOOL_SMSGLVL: {
-		struct ethtool_value edata;
-		if (copy_from_user(&edata, useraddr, sizeof(edata)))
-			return -EFAULT;
-		pcnet32_debug = edata.data;
-		return 0;
-	}
-	default:
-		break;
+		/* get message-level */
+		case ETHTOOL_GMSGLVL: {
+			struct ethtool_value edata = {ETHTOOL_GMSGLVL};
+			edata.data = pcnet32_debug;
+			if (copy_to_user(useraddr, &edata, sizeof(edata)))
+				return -EFAULT;
+			return 0;
+		}
+		/* set message-level */
+		case ETHTOOL_SMSGLVL: {
+			struct ethtool_value edata;
+			if (copy_from_user(&edata, useraddr, sizeof(edata)))
+				return -EFAULT;
+			pcnet32_debug = edata.data;
+			return 0;
+		}
+		default:
+			break;
 	}
 
-	return -EOPNOTSUPP;
+return -EOPNOTSUPP;
 }
 
+
 static int pcnet32_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
     unsigned long ioaddr = dev->base_addr;
@@ -1626,26 +1658,29 @@
     struct mii_ioctl_data *data = (struct mii_ioctl_data *)&rq->ifr_data;
     int phyaddr = lp->a.read_bcr (ioaddr, 33);
 
-    if (cmd == SIOCETHTOOL)
-	return pcnet32_ethtool_ioctl(dev, (void *) rq->ifr_data);
+	if (cmd == SIOCETHTOOL)
+		return pcnet32_ethtool_ioctl(dev, (void *) rq->ifr_data);
 
     if (lp->mii) {
 	switch(cmd) {
-	case SIOCGMIIPHY:		/* Get address of MII PHY in use. */
+	case SIOCGMIIPHY:	/* Get the address of the PHY in use. */
 	    data->phy_id = (phyaddr >> 5) & 0x1f;
 	    /* Fall Through */
-	case SIOCGMIIREG:		/* Read MII PHY register. */
+
+	case SIOCGMIIREG:	/* Read the specified MII register. */
 	    lp->a.write_bcr (ioaddr, 33, ((data->phy_id & 0x1f) << 5) | (data->reg_num & 0x1f));
 	    data->val_out = lp->a.read_bcr (ioaddr, 34);
 	    lp->a.write_bcr (ioaddr, 33, phyaddr);
 	    return 0;
-	case SIOCSMIIREG:		/* Write MII PHY register. */
+
+	case SIOCSMIIREG:	/* Write the specified MII register */
 	    if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 	    lp->a.write_bcr (ioaddr, 33, ((data->phy_id & 0x1f) << 5) | (data->reg_num & 0x1f));
 	    lp->a.write_bcr (ioaddr, 34, data->val_in);
 	    lp->a.write_bcr (ioaddr, 33, phyaddr);
 	    return 0;
+
 	default:
 	    return -EOPNOTSUPP;
 	}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/net/tg3.c linuxppc64_2_4/drivers/net/tg3.c
--- ../kernel.org/linux-2.4.19/drivers/net/tg3.c	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/net/tg3.c	Tue Apr 23 09:37:28 2002
@@ -1,4 +1,4 @@
-/* $Id: tg3.c,v 1.43.2.80 2002/03/14 00:10:04 davem Exp $
+/* $Id: tg3.c,v 1.2 2002/04/23 14:37:28 tgall Exp $
  * tg3.c: Broadcom Tigon3 ethernet driver.
  *
  * Copyright (C) 2001, 2002 David S. Miller (davem@redhat.com)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/net/tg3.h linuxppc64_2_4/drivers/net/tg3.h
--- ../kernel.org/linux-2.4.19/drivers/net/tg3.h	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/net/tg3.h	Tue Apr 23 09:37:28 2002
@@ -1,4 +1,4 @@
-/* $Id: tg3.h,v 1.37.2.32 2002/03/11 12:18:18 davem Exp $
+/* $Id: tg3.h,v 1.2 2002/04/23 14:37:28 tgall Exp $
  * tg3.h: Definitions for Broadcom Tigon3 ethernet driver.
  *
  * Copyright (C) 2001, 2002 David S. Miller (davem@redhat.com)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/net/tokenring/olympic.c linuxppc64_2_4/drivers/net/tokenring/olympic.c
--- ../kernel.org/linux-2.4.19/drivers/net/tokenring/olympic.c	Fri Apr 19 10:59:56 2002
+++ linuxppc64_2_4/drivers/net/tokenring/olympic.c	Mon Apr 22 10:33:12 2002
@@ -311,20 +311,20 @@
 	writel(readl(olympic_mmio+BCTL)|BCTL_MIMREB,olympic_mmio+BCTL);
 	
 	if (olympic_priv->olympic_ring_speed  == 0) { /* Autosense */
-		writel(readl(olympic_mmio+GPR)|GPR_AUTOSENSE,olympic_mmio+GPR);
+		writew(readw(olympic_mmio+GPR)|GPR_AUTOSENSE,olympic_mmio+GPR);
 		if (olympic_priv->olympic_message_level) 
 			printk(KERN_INFO "%s: Ringspeed autosense mode on\n",olympic_priv->olympic_card_name);
 	} else if (olympic_priv->olympic_ring_speed == 16) {
 		if (olympic_priv->olympic_message_level) 
 			printk(KERN_INFO "%s: Trying to open at 16 Mbps as requested\n", olympic_priv->olympic_card_name);
-		writel(GPR_16MBPS, olympic_mmio+GPR);
+		writew(GPR_16MBPS, olympic_mmio+GPR);
 	} else if (olympic_priv->olympic_ring_speed == 4) {
 		if (olympic_priv->olympic_message_level) 
 			printk(KERN_INFO "%s: Trying to open at 4 Mbps as requested\n", olympic_priv->olympic_card_name) ; 
-		writel(0, olympic_mmio+GPR);
+		writew(0, olympic_mmio+GPR);
 	} 
 	
-	writel(readl(olympic_mmio+GPR)|GPR_NEPTUNE_BF,olympic_mmio+GPR);
+	writew(readw(olympic_mmio+GPR)|GPR_NEPTUNE_BF,olympic_mmio+GPR);
 
 #if OLYMPIC_DEBUG
 	printk("GPR = %x\n",readw(olympic_mmio + GPR) ) ; 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/pci/pci.c linuxppc64_2_4/drivers/pci/pci.c
--- ../kernel.org/linux-2.4.19/drivers/pci/pci.c	Fri Apr 19 11:00:45 2002
+++ linuxppc64_2_4/drivers/pci/pci.c	Mon Apr 22 10:33:13 2002
@@ -1079,8 +1079,8 @@
 	res = child->resource[0];
 	pci_read_config_byte(dev, PCI_IO_BASE, &io_base_lo);
 	pci_read_config_byte(dev, PCI_IO_LIMIT, &io_limit_lo);
-	base = (io_base_lo & PCI_IO_RANGE_MASK) << 8;
-	limit = (io_limit_lo & PCI_IO_RANGE_MASK) << 8;
+	base = (unsigned long)(io_base_lo & PCI_IO_RANGE_MASK) << 8;
+	limit = (unsigned long)(io_limit_lo & PCI_IO_RANGE_MASK) << 8;
 
 	if ((io_base_lo & PCI_IO_RANGE_TYPE_MASK) == PCI_IO_RANGE_TYPE_32) {
 		u16 io_base_hi, io_limit_hi;
@@ -1107,8 +1107,8 @@
 	res = child->resource[1];
 	pci_read_config_word(dev, PCI_MEMORY_BASE, &mem_base_lo);
 	pci_read_config_word(dev, PCI_MEMORY_LIMIT, &mem_limit_lo);
-	base = (mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16;
-	limit = (mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
+	base = (unsigned long)(mem_base_lo & PCI_MEMORY_RANGE_MASK) << 16;
+	limit = (unsigned long)(mem_limit_lo & PCI_MEMORY_RANGE_MASK) << 16;
 	if (base && base <= limit) {
 		res->flags = (mem_base_lo & PCI_MEMORY_RANGE_TYPE_MASK) | IORESOURCE_MEM;
 		res->start = base;
@@ -1123,16 +1123,16 @@
 	res = child->resource[2];
 	pci_read_config_word(dev, PCI_PREF_MEMORY_BASE, &mem_base_lo);
 	pci_read_config_word(dev, PCI_PREF_MEMORY_LIMIT, &mem_limit_lo);
-	base = (mem_base_lo & PCI_PREF_RANGE_MASK) << 16;
-	limit = (mem_limit_lo & PCI_PREF_RANGE_MASK) << 16;
+	base = (unsigned long)(mem_base_lo & PCI_PREF_RANGE_MASK) << 16;
+	limit = (unsigned long)(mem_limit_lo & PCI_PREF_RANGE_MASK) << 16;
 
 	if ((mem_base_lo & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
 		u32 mem_base_hi, mem_limit_hi;
 		pci_read_config_dword(dev, PCI_PREF_BASE_UPPER32, &mem_base_hi);
 		pci_read_config_dword(dev, PCI_PREF_LIMIT_UPPER32, &mem_limit_hi);
 #if BITS_PER_LONG == 64
-		base |= ((long) mem_base_hi) << 32;
-		limit |= ((long) mem_limit_hi) << 32;
+		base |= ((unsigned long) mem_base_hi) << 32;
+		limit |= ((unsigned long) mem_limit_hi) << 32;
 #else
 		if (mem_base_hi || mem_limit_hi) {
 			printk(KERN_ERR "PCI: Unable to handle 64-bit address space for %s\n", child->name);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/pci/pci.ids linuxppc64_2_4/drivers/pci/pci.ids
--- ../kernel.org/linux-2.4.19/drivers/pci/pci.ids	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/drivers/pci/pci.ids	Tue Apr 23 09:37:29 2002
@@ -556,6 +556,7 @@
 	0022  IBM27-82351
 	002d  Python
 	002e  ServeRAID-3x
+	0031  Serial Adapter
 	0036  Miami
 	003a  CPU to PCI Bridge
 	003e  16/4 Token ring UTP/STP controller
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/scsi/aic7xxx/aic7770_osm.c linuxppc64_2_4/drivers/scsi/aic7xxx/aic7770_osm.c
--- ../kernel.org/linux-2.4.19/drivers/scsi/aic7xxx/aic7770_osm.c	Fri Apr 19 11:00:21 2002
+++ linuxppc64_2_4/drivers/scsi/aic7xxx/aic7770_osm.c	Mon Apr 22 13:53:41 2002
@@ -36,7 +36,7 @@
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
- * $Id: //depot/linux-aic7xxx-2.4.18_rc4/drivers/scsi/aic7xxx/aic7770_osm.c#1 $
+ * $Id: aic7770_osm.c,v 1.1 2002/04/22 18:53:41 tgall Exp $
  */
 
 #include "aic7xxx_osm.h"
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.y linuxppc64_2_4/drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.y
--- ../kernel.org/linux-2.4.19/drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.y	Fri Apr 19 11:00:22 2002
+++ linuxppc64_2_4/drivers/scsi/aic7xxx/aicasm/aicasm_macro_gram.y	Mon Apr 22 13:53:41 2002
@@ -38,7 +38,7 @@
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
- * $Id$
+ * $Id: aicasm_macro_gram.y,v 1.1 2002/04/22 18:53:41 tgall Exp $
  *
  * $FreeBSD$
  */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/scsi/aic7xxx/aicasm/aicasm_macro_scan.l linuxppc64_2_4/drivers/scsi/aic7xxx/aicasm/aicasm_macro_scan.l
--- ../kernel.org/linux-2.4.19/drivers/scsi/aic7xxx/aicasm/aicasm_macro_scan.l	Fri Apr 19 11:00:22 2002
+++ linuxppc64_2_4/drivers/scsi/aic7xxx/aicasm/aicasm_macro_scan.l	Mon Apr 22 13:53:41 2002
@@ -38,7 +38,7 @@
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
- * $Id$
+ * $Id: aicasm_macro_scan.l,v 1.1 2002/04/22 18:53:41 tgall Exp $
  *
  * $FreeBSD$
  */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/scsi/sr_ioctl.c linuxppc64_2_4/drivers/scsi/sr_ioctl.c
--- ../kernel.org/linux-2.4.19/drivers/scsi/sr_ioctl.c	Fri Apr 19 10:59:57 2002
+++ linuxppc64_2_4/drivers/scsi/sr_ioctl.c	Mon Apr 22 10:33:14 2002
@@ -334,7 +334,12 @@
 {
 	u_char sr_cmd[10];
 	int result, target = MINOR(cdi->dev);
-	unsigned char buffer[32];
+	unsigned char *buffer = scsi_malloc(512);
+
+	if (buffer == NULL) {
+		printk("SCSI DMA pool exhausted.");
+		return -ENOMEM;
+	}
 
 	memset(sr_cmd, 0, sizeof(sr_cmd));
 
@@ -407,6 +412,7 @@
 		return -EINVAL;
 	}
 
+	scsi_free(buffer, 512);
 #if 0
 	if (result)
 		printk("DEBUG: sr_audio: result for ioctl %x: %x\n", cmd, result);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/sound/dmabuf.c linuxppc64_2_4/drivers/sound/dmabuf.c
--- ../kernel.org/linux-2.4.19/drivers/sound/dmabuf.c	Fri Apr 19 10:30:24 2002
+++ linuxppc64_2_4/drivers/sound/dmabuf.c	Mon Feb 25 08:44:33 2002
@@ -113,7 +113,7 @@
 		}
 	}
 	dmap->raw_buf = start_addr;
-	dmap->raw_buf_phys = virt_to_bus(start_addr);
+	dmap->raw_buf_phys = pci_map_single(NULL, start_addr, dmap->buffsize, PCI_DMA_BIDIRECTIONAL);
 
 	for (page = virt_to_page(start_addr); page <= virt_to_page(end_addr); page++)
 		mem_map_reserve(page);
@@ -134,6 +134,8 @@
 
 	start_addr = (unsigned long) dmap->raw_buf;
 	end_addr = start_addr + dmap->buffsize;
+
+	pci_unmap_single(NULL, dmap->raw_buf_phys, dmap->buffsize, PCI_DMA_BIDIRECTIONAL);
 
 	for (page = virt_to_page(start_addr); page <= virt_to_page(end_addr); page++)
 		mem_map_unreserve(page);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/usb/storage/datafab.c linuxppc64_2_4/drivers/usb/storage/datafab.c
--- ../kernel.org/linux-2.4.19/drivers/usb/storage/datafab.c	Fri Apr 19 11:00:14 2002
+++ linuxppc64_2_4/drivers/usb/storage/datafab.c	Mon Apr 22 10:33:29 2002
@@ -1,6 +1,6 @@
 /* Driver for Datafab USB Compact Flash reader
  *
- * $Id: datafab.c,v 1.7 2002/02/25 00:40:13 mdharm Exp $
+ * $Id: datafab.c,v 1.7 2002/04/22 15:33:29 tgall Exp $
  *
  * datafab driver v0.1:
  *
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/usb/storage/isd200.c linuxppc64_2_4/drivers/usb/storage/isd200.c
--- ../kernel.org/linux-2.4.19/drivers/usb/storage/isd200.c	Fri Apr 19 11:00:14 2002
+++ linuxppc64_2_4/drivers/usb/storage/isd200.c	Mon Apr 22 10:33:29 2002
@@ -1,6 +1,6 @@
 /* Transport & Protocol Driver for In-System Design, Inc. ISD200 ASIC
  *
- * $Id: isd200.c,v 1.14 2002/02/25 00:40:13 mdharm Exp $
+ * $Id: isd200.c,v 1.4 2002/04/22 15:33:29 tgall Exp $
  *
  * Current development and maintenance:
  *   (C) 2001-2002 Björn Stenberg (bjorn@haxx.se)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/usb/storage/jumpshot.c linuxppc64_2_4/drivers/usb/storage/jumpshot.c
--- ../kernel.org/linux-2.4.19/drivers/usb/storage/jumpshot.c	Fri Apr 19 11:00:14 2002
+++ linuxppc64_2_4/drivers/usb/storage/jumpshot.c	Mon Apr 22 10:33:29 2002
@@ -1,6 +1,6 @@
 /* Driver for Lexar "Jumpshot" Compact Flash reader
  *
- * $Id: jumpshot.c,v 1.7 2002/02/25 00:40:13 mdharm Exp $
+ * $Id: jumpshot.c,v 1.7 2002/04/22 15:33:29 tgall Exp $
  *
  * jumpshot driver v0.1:
  *
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/video/offb.c linuxppc64_2_4/drivers/video/offb.c
--- ../kernel.org/linux-2.4.19/drivers/video/offb.c	Fri Apr 19 10:30:28 2002
+++ linuxppc64_2_4/drivers/video/offb.c	Wed Nov 14 21:23:38 2001
@@ -430,7 +430,7 @@
     info->cmap_type = cmap_unknown;
     if (depth == 8)
     {
-    	/* XXX kludge for ati's */
+    	/* XXX kludge for ati */
 	if (dp && !strncmp(name, "ATY,Rage128", 11)) {
 		unsigned long regbase = dp->addrs[2].address;
 		info->cmap_adr = ioremap(regbase, 0x1FFF);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/video/pm3fb.c linuxppc64_2_4/drivers/video/pm3fb.c
--- ../kernel.org/linux-2.4.19/drivers/video/pm3fb.c	Fri Apr 19 10:59:41 2002
+++ linuxppc64_2_4/drivers/video/pm3fb.c	Mon Apr 22 13:25:44 2002
@@ -16,7 +16,7 @@
  *  License. See the file COPYING in the main directory of this archive for
  *  more details.
  *
- *  $Header: /cvsroot/linux/drivers/video/pm3fb.c,v 1.1 2002/02/25 19:11:06 marcelo Exp $
+ *  $Header: /cvs/linuxppc64/linuxppc64_2_4/drivers/video/pm3fb.c,v 1.1 2002/04/22 18:25:44 tgall Exp $
  *
  *  CHANGELOG:
  *  Mon Feb 11 10:35:48 MET 2002, v 1.4.11B: Cosmetic update.
@@ -3641,7 +3641,7 @@
 {
 	DTRACE;
 
-	DPRINTK(2, "This is pm3fb.c, CVS version: $Header: /cvsroot/linux/drivers/video/pm3fb.c,v 1.1 2002/02/25 19:11:06 marcelo Exp $");
+	DPRINTK(2, "This is pm3fb.c, CVS version: $Header: /cvs/linuxppc64/linuxppc64_2_4/drivers/video/pm3fb.c,v 1.1 2002/04/22 18:25:44 tgall Exp $");
 
 	pm3fb_real_setup(g_options);
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/drivers/video/pm3fb.h linuxppc64_2_4/drivers/video/pm3fb.h
--- ../kernel.org/linux-2.4.19/drivers/video/pm3fb.h	Fri Apr 19 10:59:41 2002
+++ linuxppc64_2_4/drivers/video/pm3fb.h	Mon Apr 22 13:25:44 2002
@@ -8,7 +8,7 @@
  *  License. See the file COPYING in the main directory of this archive for
  *  more details.
  *
- *  $Header: /cvsroot/linux/drivers/video/pm3fb.h,v 1.1 2002/02/25 19:11:06 marcelo Exp $
+ *  $Header: /cvs/linuxppc64/linuxppc64_2_4/drivers/video/pm3fb.h,v 1.1 2002/04/22 18:25:44 tgall Exp $
  *
  */
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/Config.in linuxppc64_2_4/fs/Config.in
--- ../kernel.org/linux-2.4.19/fs/Config.in	Fri Apr 19 11:00:46 2002
+++ linuxppc64_2_4/fs/Config.in	Mon Apr 22 10:35:08 2002
@@ -52,6 +52,8 @@
 dep_mbool '  Transparent decompression extension' CONFIG_ZISOFS $CONFIG_ISO9660_FS
 
 tristate 'Minix fs support' CONFIG_MINIX_FS
+tristate 'JFS filesystem support' CONFIG_JFS_FS
+dep_mbool '  JFS debugging' CONFIG_JFS_DEBUG $CONFIG_JFS_FS
 
 tristate 'FreeVxFS file system support (VERITAS VxFS(TM) compatible)' CONFIG_VXFS_FS
 tristate 'NTFS file system support (read only)' CONFIG_NTFS_FS
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/Makefile linuxppc64_2_4/fs/Makefile
--- ../kernel.org/linux-2.4.19/fs/Makefile	Fri Apr 19 10:29:59 2002
+++ linuxppc64_2_4/fs/Makefile	Thu Feb 21 20:57:39 2002
@@ -67,6 +67,7 @@
 subdir-$(CONFIG_REISERFS_FS)	+= reiserfs
 subdir-$(CONFIG_DEVPTS_FS)	+= devpts
 subdir-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs
+subdir-$(CONFIG_JFS_FS)     += jfs
 
 
 obj-$(CONFIG_BINFMT_AOUT)	+= binfmt_aout.o
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/binfmt_elf.c linuxppc64_2_4/fs/binfmt_elf.c
--- ../kernel.org/linux-2.4.19/fs/binfmt_elf.c	Fri Apr 19 10:59:41 2002
+++ linuxppc64_2_4/fs/binfmt_elf.c	Thu Feb 21 20:57:39 2002
@@ -440,6 +440,7 @@
 	unsigned int size;
 	unsigned long elf_entry, interp_load_addr = 0;
 	unsigned long start_code, end_code, start_data, end_data;
+	unsigned long reloc_func_desc = 0;
 	struct elfhdr elf_ex;
 	struct elfhdr interp_elf_ex;
   	struct exec interp_ex;
@@ -536,8 +537,6 @@
 			interp_ex = *((struct exec *) bprm->buf);
 			interp_elf_ex = *((struct elfhdr *) bprm->buf);
 			break;
-		} else {
-			SET_PERSONALITY(elf_ex, ibcs2_interpreter);
 		}
 		elf_ppnt++;
 	}
@@ -664,6 +663,7 @@
 				load_bias += error -
 				             ELF_PAGESTART(load_bias + vaddr);
 				load_addr += load_bias;
+				reloc_func_desc = load_addr;
 			}
 		}
 		k = elf_ppnt->p_vaddr;
@@ -710,6 +710,7 @@
 			send_sig(SIGSEGV, current, 0);
 			return 0;
 		}
+		reloc_func_desc = interp_load_addr;
 	}
 
 	kfree(elf_phdata);
@@ -772,10 +773,14 @@
 	/*
 	 * The ABI may specify that certain registers be set up in special
 	 * ways (on i386 %edx is the address of a DT_FINI function, for
-	 * example.  This macro performs whatever initialization to
-	 * the regs structure is required.
+	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
+	 * that the e_entry field is the address of the function descriptor
+	 * for the startup routine, rather than the address of the startup
+	 * routine itself.  This macro performs whatever initialization to
+	 * the regs structure is required as well as any relocations to the
+	 * function descriptor entries when executing dynamically links apps.
 	 */
-	ELF_PLAT_INIT(regs);
+	ELF_PLAT_INIT(regs, reloc_func_desc);
 #endif
 
 	start_thread(regs, elf_entry, bprm->p);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/binfmt_elf.c.orig linuxppc64_2_4/fs/binfmt_elf.c.orig
--- ../kernel.org/linux-2.4.19/fs/binfmt_elf.c.orig	Fri Apr 19 10:29:59 2002
+++ linuxppc64_2_4/fs/binfmt_elf.c.orig	Wed Dec 31 18:00:00 1969
@@ -1,1277 +0,0 @@
-/*
- * linux/fs/binfmt_elf.c
- *
- * These are the functions used to load ELF format executables as used
- * on SVr4 machines.  Information on the format may be found in the book
- * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
- * Tools".
- *
- * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
- */
-
-#include <linux/module.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/a.out.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/binfmts.h>
-#include <linux/string.h>
-#include <linux/file.h>
-#include <linux/fcntl.h>
-#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/shm.h>
-#include <linux/personality.h>
-#include <linux/elfcore.h>
-#include <linux/init.h>
-#include <linux/highuid.h>
-#include <linux/smp_lock.h>
-#include <linux/compiler.h>
-#include <linux/highmem.h>
-
-#include <asm/uaccess.h>
-#include <asm/param.h>
-#include <asm/pgalloc.h>
-
-#define DLINFO_ITEMS 13
-
-#include <linux/elf.h>
-
-static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs);
-static int load_elf_library(struct file*);
-static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int);
-extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
-extern void dump_thread(struct pt_regs *, struct user *);
-
-#ifndef elf_addr_t
-#define elf_addr_t unsigned long
-#define elf_caddr_t char *
-#endif
-
-/*
- * If we don't support core dumping, then supply a NULL so we
- * don't even try.
- */
-#ifdef USE_ELF_CORE_DUMP
-static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
-#else
-#define elf_core_dump	NULL
-#endif
-
-#if ELF_EXEC_PAGESIZE > PAGE_SIZE
-# define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
-#else
-# define ELF_MIN_ALIGN	PAGE_SIZE
-#endif
-
-#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
-#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
-#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
-
-static struct linux_binfmt elf_format = {
-	NULL, THIS_MODULE, load_elf_binary, load_elf_library, elf_core_dump, ELF_EXEC_PAGESIZE
-};
-
-#define BAD_ADDR(x)	((unsigned long)(x) > TASK_SIZE)
-
-static void set_brk(unsigned long start, unsigned long end)
-{
-	start = ELF_PAGEALIGN(start);
-	end = ELF_PAGEALIGN(end);
-	if (end <= start)
-		return;
-	do_brk(start, end - start);
-}
-
-
-/* We need to explicitly zero any fractional pages
-   after the data section (i.e. bss).  This would
-   contain the junk from the file that should not
-   be in memory */
-
-
-static void padzero(unsigned long elf_bss)
-{
-	unsigned long nbyte;
-
-	nbyte = ELF_PAGEOFFSET(elf_bss);
-	if (nbyte) {
-		nbyte = ELF_MIN_ALIGN - nbyte;
-		clear_user((void *) elf_bss, nbyte);
-	}
-}
-
-static elf_addr_t * 
-create_elf_tables(char *p, int argc, int envc,
-		  struct elfhdr * exec,
-		  unsigned long load_addr,
-		  unsigned long load_bias,
-		  unsigned long interp_load_addr, int ibcs)
-{
-	elf_caddr_t *argv;
-	elf_caddr_t *envp;
-	elf_addr_t *sp, *csp;
-	char *k_platform, *u_platform;
-	long hwcap;
-	size_t platform_len = 0;
-	size_t len;
-
-	/*
-	 * Get hold of platform and hardware capabilities masks for
-	 * the machine we are running on.  In some cases (Sparc), 
-	 * this info is impossible to get, in others (i386) it is
-	 * merely difficult.
-	 */
-
-	hwcap = ELF_HWCAP;
-	k_platform = ELF_PLATFORM;
-
-	if (k_platform) {
-		platform_len = strlen(k_platform) + 1;
-		u_platform = p - platform_len;
-		__copy_to_user(u_platform, k_platform, platform_len);
-	} else
-		u_platform = p;
-
-#if defined(__i386__) && defined(CONFIG_SMP)
-	/*
-	 * In some cases (e.g. Hyper-Threading), we want to avoid L1 evictions
-	 * by the processes running on the same package. One thing we can do
-	 * is to shuffle the initial stack for them.
-	 *
-	 * The conditionals here are unneeded, but kept in to make the
-	 * code behaviour the same as pre change unless we have hyperthreaded
-	 * processors. This keeps Mr Marcelo Person happier but should be
-	 * removed for 2.5
-	 */
-	 
-	if(smp_num_siblings > 1)
-		u_platform = u_platform - ((current->pid % 64) << 7);
-#endif	
-
-	/*
-	 * Force 16 byte _final_ alignment here for generality.
-	 */
-	sp = (elf_addr_t *)(~15UL & (unsigned long)(u_platform));
-	csp = sp;
-	csp -= (1+DLINFO_ITEMS)*2 + (k_platform ? 2 : 0);
-#ifdef DLINFO_ARCH_ITEMS
-	csp -= DLINFO_ARCH_ITEMS*2;
-#endif
-	csp -= envc+1;
-	csp -= argc+1;
-	csp -= (!ibcs ? 3 : 1);	/* argc itself */
-	if ((unsigned long)csp & 15UL)
-		sp -= ((unsigned long)csp & 15UL) / sizeof(*sp);
-
-	/*
-	 * Put the ELF interpreter info on the stack
-	 */
-#define NEW_AUX_ENT(nr, id, val) \
-	  __put_user ((id), sp+(nr*2)); \
-	  __put_user ((val), sp+(nr*2+1)); \
-
-	sp -= 2;
-	NEW_AUX_ENT(0, AT_NULL, 0);
-	if (k_platform) {
-		sp -= 2;
-		NEW_AUX_ENT(0, AT_PLATFORM, (elf_addr_t)(unsigned long) u_platform);
-	}
-	sp -= DLINFO_ITEMS*2;
-	NEW_AUX_ENT( 0, AT_HWCAP, hwcap);
-	NEW_AUX_ENT( 1, AT_PAGESZ, ELF_EXEC_PAGESIZE);
-	NEW_AUX_ENT( 2, AT_CLKTCK, CLOCKS_PER_SEC);
-	NEW_AUX_ENT( 3, AT_PHDR, load_addr + exec->e_phoff);
-	NEW_AUX_ENT( 4, AT_PHENT, sizeof (struct elf_phdr));
-	NEW_AUX_ENT( 5, AT_PHNUM, exec->e_phnum);
-	NEW_AUX_ENT( 6, AT_BASE, interp_load_addr);
-	NEW_AUX_ENT( 7, AT_FLAGS, 0);
-	NEW_AUX_ENT( 8, AT_ENTRY, load_bias + exec->e_entry);
-	NEW_AUX_ENT( 9, AT_UID, (elf_addr_t) current->uid);
-	NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) current->euid);
-	NEW_AUX_ENT(11, AT_GID, (elf_addr_t) current->gid);
-	NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) current->egid);
-#ifdef ARCH_DLINFO
-	/* 
-	 * ARCH_DLINFO must come last so platform specific code can enforce
-	 * special alignment requirements on the AUXV if necessary (eg. PPC).
-	 */
-	ARCH_DLINFO;
-#endif
-#undef NEW_AUX_ENT
-
-	sp -= envc+1;
-	envp = (elf_caddr_t *) sp;
-	sp -= argc+1;
-	argv = (elf_caddr_t *) sp;
-	if (!ibcs) {
-		__put_user((elf_addr_t)(unsigned long) envp,--sp);
-		__put_user((elf_addr_t)(unsigned long) argv,--sp);
-	}
-
-	__put_user((elf_addr_t)argc,--sp);
-	current->mm->arg_start = (unsigned long) p;
-	while (argc-->0) {
-		__put_user((elf_caddr_t)(unsigned long)p,argv++);
-		len = strnlen_user(p, PAGE_SIZE*MAX_ARG_PAGES);
-		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
-			return NULL;
-		p += len;
-	}
-	__put_user(NULL, argv);
-	current->mm->arg_end = current->mm->env_start = (unsigned long) p;
-	while (envc-->0) {
-		__put_user((elf_caddr_t)(unsigned long)p,envp++);
-		len = strnlen_user(p, PAGE_SIZE*MAX_ARG_PAGES);
-		if (!len || len > PAGE_SIZE*MAX_ARG_PAGES)
-			return NULL;
-		p += len;
-	}
-	__put_user(NULL, envp);
-	current->mm->env_end = (unsigned long) p;
-	return sp;
-}
-
-#ifndef elf_map
-
-static inline unsigned long
-elf_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
-{
-	unsigned long map_addr;
-
-	down_write(&current->mm->mmap_sem);
-	map_addr = do_mmap(filep, ELF_PAGESTART(addr),
-			   eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type,
-			   eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr));
-	up_write(&current->mm->mmap_sem);
-	return(map_addr);
-}
-
-#endif /* !elf_map */
-
-/* This is much more generalized than the library routine read function,
-   so we keep this separate.  Technically the library read function
-   is only provided so that we can read a.out libraries that have
-   an ELF header */
-
-static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
-				     struct file * interpreter,
-				     unsigned long *interp_load_addr)
-{
-	struct elf_phdr *elf_phdata;
-	struct elf_phdr *eppnt;
-	unsigned long load_addr = 0;
-	int load_addr_set = 0;
-	unsigned long last_bss = 0, elf_bss = 0;
-	unsigned long error = ~0UL;
-	int retval, i, size;
-
-	/* First of all, some simple consistency checks */
-	if (interp_elf_ex->e_type != ET_EXEC &&
-	    interp_elf_ex->e_type != ET_DYN)
-		goto out;
-	if (!elf_check_arch(interp_elf_ex))
-		goto out;
-	if (!interpreter->f_op || !interpreter->f_op->mmap)
-		goto out;
-
-	/*
-	 * If the size of this structure has changed, then punt, since
-	 * we will be doing the wrong thing.
-	 */
-	if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
-		goto out;
-	if (interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
-		goto out;
-
-	/* Now read in all of the header information */
-
-	size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
-	if (size > ELF_MIN_ALIGN)
-		goto out;
-	elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
-	if (!elf_phdata)
-		goto out;
-
-	retval = kernel_read(interpreter,interp_elf_ex->e_phoff,(char *)elf_phdata,size);
-	error = retval;
-	if (retval < 0)
-		goto out_close;
-
-	eppnt = elf_phdata;
-	for (i=0; i<interp_elf_ex->e_phnum; i++, eppnt++) {
-	  if (eppnt->p_type == PT_LOAD) {
-	    int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
-	    int elf_prot = 0;
-	    unsigned long vaddr = 0;
-	    unsigned long k, map_addr;
-
-	    if (eppnt->p_flags & PF_R) elf_prot =  PROT_READ;
-	    if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
-	    if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
-	    vaddr = eppnt->p_vaddr;
-	    if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
-	    	elf_type |= MAP_FIXED;
-
-	    map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type);
-	    if (BAD_ADDR(map_addr))
-	    	goto out_close;
-
-	    if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) {
-		load_addr = map_addr - ELF_PAGESTART(vaddr);
-		load_addr_set = 1;
-	    }
-
-	    /*
-	     * Find the end of the file mapping for this phdr, and keep
-	     * track of the largest address we see for this.
-	     */
-	    k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
-	    if (k > elf_bss)
-		elf_bss = k;
-
-	    /*
-	     * Do the same thing for the memory mapping - between
-	     * elf_bss and last_bss is the bss section.
-	     */
-	    k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
-	    if (k > last_bss)
-		last_bss = k;
-	  }
-	}
-
-	/* Now use mmap to map the library into memory. */
-
-	/*
-	 * Now fill out the bss section.  First pad the last page up
-	 * to the page boundary, and then perform a mmap to make sure
-	 * that there are zero-mapped pages up to and including the 
-	 * last bss page.
-	 */
-	padzero(elf_bss);
-	elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);	/* What we have mapped so far */
-
-	/* Map the last of the bss segment */
-	if (last_bss > elf_bss)
-		do_brk(elf_bss, last_bss - elf_bss);
-
-	*interp_load_addr = load_addr;
-	error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;
-
-out_close:
-	kfree(elf_phdata);
-out:
-	return error;
-}
-
-static unsigned long load_aout_interp(struct exec * interp_ex,
-			     struct file * interpreter)
-{
-	unsigned long text_data, elf_entry = ~0UL;
-	char * addr;
-	loff_t offset;
-	int retval;
-
-	current->mm->end_code = interp_ex->a_text;
-	text_data = interp_ex->a_text + interp_ex->a_data;
-	current->mm->end_data = text_data;
-	current->mm->brk = interp_ex->a_bss + text_data;
-
-	switch (N_MAGIC(*interp_ex)) {
-	case OMAGIC:
-		offset = 32;
-		addr = (char *) 0;
-		break;
-	case ZMAGIC:
-	case QMAGIC:
-		offset = N_TXTOFF(*interp_ex);
-		addr = (char *) N_TXTADDR(*interp_ex);
-		break;
-	default:
-		goto out;
-	}
-
-	do_brk(0, text_data);
-	retval = -ENOEXEC;
-	if (!interpreter->f_op || !interpreter->f_op->read)
-		goto out;
-	retval = interpreter->f_op->read(interpreter, addr, text_data, &offset);
-	if (retval < 0)
-		goto out;
-	flush_icache_range((unsigned long)addr,
-	                   (unsigned long)addr + text_data);
-
-	do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
-		interp_ex->a_bss);
-	elf_entry = interp_ex->a_entry;
-
-out:
-	return elf_entry;
-}
-
-/*
- * These are the functions used to load ELF style executables and shared
- * libraries.  There is no binary dependent code anywhere else.
- */
-
-#define INTERPRETER_NONE 0
-#define INTERPRETER_AOUT 1
-#define INTERPRETER_ELF 2
-
-
-static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
-{
-	struct file *interpreter = NULL; /* to shut gcc up */
- 	unsigned long load_addr = 0, load_bias = 0;
-	int load_addr_set = 0;
-	char * elf_interpreter = NULL;
-	unsigned int interpreter_type = INTERPRETER_NONE;
-	unsigned char ibcs2_interpreter = 0;
-	unsigned long error;
-	struct elf_phdr * elf_ppnt, *elf_phdata;
-	unsigned long elf_bss, k, elf_brk;
-	int elf_exec_fileno;
-	int retval, i;
-	unsigned int size;
-	unsigned long elf_entry, interp_load_addr = 0;
-	unsigned long start_code, end_code, start_data, end_data;
-	struct elfhdr elf_ex;
-	struct elfhdr interp_elf_ex;
-  	struct exec interp_ex;
-	char passed_fileno[6];
-	
-	/* Get the exec-header */
-	elf_ex = *((struct elfhdr *) bprm->buf);
-
-	retval = -ENOEXEC;
-	/* First of all, some simple consistency checks */
-	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
-		goto out;
-
-	if (elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN)
-		goto out;
-	if (!elf_check_arch(&elf_ex))
-		goto out;
-	if (!bprm->file->f_op||!bprm->file->f_op->mmap)
-		goto out;
-
-	/* Now read in all of the header information */
-
-	retval = -ENOMEM;
-	if (elf_ex.e_phentsize != sizeof(struct elf_phdr))
-		goto out;
-	if (elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
-		goto out;
-	size = elf_ex.e_phnum * sizeof(struct elf_phdr);
-	elf_phdata = (struct elf_phdr *) kmalloc(size, GFP_KERNEL);
-	if (!elf_phdata)
-		goto out;
-
-	retval = kernel_read(bprm->file, elf_ex.e_phoff, (char *) elf_phdata, size);
-	if (retval < 0)
-		goto out_free_ph;
-
-	retval = get_unused_fd();
-	if (retval < 0)
-		goto out_free_ph;
-	get_file(bprm->file);
-	fd_install(elf_exec_fileno = retval, bprm->file);
-
-	elf_ppnt = elf_phdata;
-	elf_bss = 0;
-	elf_brk = 0;
-
-	start_code = ~0UL;
-	end_code = 0;
-	start_data = 0;
-	end_data = 0;
-
-	for (i = 0; i < elf_ex.e_phnum; i++) {
-		if (elf_ppnt->p_type == PT_INTERP) {
-			/* This is the program interpreter used for
-			 * shared libraries - for now assume that this
-			 * is an a.out format binary
-			 */
-
-			retval = -ENOMEM;
-			if (elf_ppnt->p_filesz > PATH_MAX)
-				goto out_free_file;
-			elf_interpreter = (char *) kmalloc(elf_ppnt->p_filesz,
-							   GFP_KERNEL);
-			if (!elf_interpreter)
-				goto out_free_file;
-
-			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
-					   elf_interpreter,
-					   elf_ppnt->p_filesz);
-			if (retval < 0)
-				goto out_free_interp;
-			/* If the program interpreter is one of these two,
-			 * then assume an iBCS2 image. Otherwise assume
-			 * a native linux image.
-			 */
-			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
-			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
-				ibcs2_interpreter = 1;
-#if 0
-			printk("Using ELF interpreter %s\n", elf_interpreter);
-#endif
-
-			SET_PERSONALITY(elf_ex, ibcs2_interpreter);
-
-			interpreter = open_exec(elf_interpreter);
-			retval = PTR_ERR(interpreter);
-			if (IS_ERR(interpreter))
-				goto out_free_interp;
-			retval = kernel_read(interpreter, 0, bprm->buf, BINPRM_BUF_SIZE);
-			if (retval < 0)
-				goto out_free_dentry;
-
-			/* Get the exec headers */
-			interp_ex = *((struct exec *) bprm->buf);
-			interp_elf_ex = *((struct elfhdr *) bprm->buf);
-			break;
-		} else {
-			SET_PERSONALITY(elf_ex, ibcs2_interpreter);
-		}
-		elf_ppnt++;
-	}
-
-	/* Some simple consistency checks for the interpreter */
-	if (elf_interpreter) {
-		interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
-
-		/* Now figure out which format our binary is */
-		if ((N_MAGIC(interp_ex) != OMAGIC) &&
-		    (N_MAGIC(interp_ex) != ZMAGIC) &&
-		    (N_MAGIC(interp_ex) != QMAGIC))
-			interpreter_type = INTERPRETER_ELF;
-
-		if (memcmp(interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
-			interpreter_type &= ~INTERPRETER_ELF;
-
-		retval = -ELIBBAD;
-		if (!interpreter_type)
-			goto out_free_dentry;
-
-		/* Make sure only one type was selected */
-		if ((interpreter_type & INTERPRETER_ELF) &&
-		     interpreter_type != INTERPRETER_ELF) {
-	     		// FIXME - ratelimit this before re-enabling
-			// printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
-			interpreter_type = INTERPRETER_ELF;
-		}
-	}
-
-	/* OK, we are done with that, now set up the arg stuff,
-	   and then start this sucker up */
-
-	if (!bprm->sh_bang) {
-		char * passed_p;
-
-		if (interpreter_type == INTERPRETER_AOUT) {
-		  sprintf(passed_fileno, "%d", elf_exec_fileno);
-		  passed_p = passed_fileno;
-
-		  if (elf_interpreter) {
-		    retval = copy_strings_kernel(1,&passed_p,bprm);
-			if (retval)
-				goto out_free_dentry; 
-		    bprm->argc++;
-		  }
-		}
-	}
-
-	/* Flush all traces of the currently running executable */
-	retval = flush_old_exec(bprm);
-	if (retval)
-		goto out_free_dentry;
-
-	/* OK, This is the point of no return */
-	current->mm->start_data = 0;
-	current->mm->end_data = 0;
-	current->mm->end_code = 0;
-	current->mm->mmap = NULL;
-	current->flags &= ~PF_FORKNOEXEC;
-	elf_entry = (unsigned long) elf_ex.e_entry;
-
-	/* Do this so that we can load the interpreter, if need be.  We will
-	   change some of these later */
-	current->mm->rss = 0;
-	setup_arg_pages(bprm); /* XXX: check error */
-	current->mm->start_stack = bprm->p;
-
-	/* Now we do a little grungy work by mmaping the ELF image into
-	   the correct location in memory.  At this point, we assume that
-	   the image should be loaded at fixed address, not at a variable
-	   address. */
-
-	for(i = 0, elf_ppnt = elf_phdata; i < elf_ex.e_phnum; i++, elf_ppnt++) {
-		int elf_prot = 0, elf_flags;
-		unsigned long vaddr;
-
-		if (elf_ppnt->p_type != PT_LOAD)
-			continue;
-
-		if (unlikely (elf_brk > elf_bss)) {
-			unsigned long nbyte;
-	            
-			/* There was a PT_LOAD segment with p_memsz > p_filesz
-			   before this one. Map anonymous pages, if needed,
-			   and clear the area.  */
-			set_brk (elf_bss + load_bias, elf_brk + load_bias);
-			nbyte = ELF_PAGEOFFSET(elf_bss);
-			if (nbyte) {
-				nbyte = ELF_MIN_ALIGN - nbyte;
-				if (nbyte > elf_brk - elf_bss)
-					nbyte = elf_brk - elf_bss;
-				clear_user((void *) elf_bss + load_bias, nbyte);
-			}
-		}
-
-		if (elf_ppnt->p_flags & PF_R) elf_prot |= PROT_READ;
-		if (elf_ppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
-		if (elf_ppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
-
-		elf_flags = MAP_PRIVATE|MAP_DENYWRITE|MAP_EXECUTABLE;
-
-		vaddr = elf_ppnt->p_vaddr;
-		if (elf_ex.e_type == ET_EXEC || load_addr_set) {
-			elf_flags |= MAP_FIXED;
-		} else if (elf_ex.e_type == ET_DYN) {
-			/* Try and get dynamic programs out of the way of the default mmap
-			   base, as well as whatever program they might try to exec.  This
-		           is because the brk will follow the loader, and is not movable.  */
-			load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
-		}
-
-		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
-		if (BAD_ADDR(error))
-			continue;
-
-		if (!load_addr_set) {
-			load_addr_set = 1;
-			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
-			if (elf_ex.e_type == ET_DYN) {
-				load_bias += error -
-				             ELF_PAGESTART(load_bias + vaddr);
-				load_addr += load_bias;
-			}
-		}
-		k = elf_ppnt->p_vaddr;
-		if (k < start_code) start_code = k;
-		if (start_data < k) start_data = k;
-
-		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
-
-		if (k > elf_bss)
-			elf_bss = k;
-		if ((elf_ppnt->p_flags & PF_X) && end_code <  k)
-			end_code = k;
-		if (end_data < k)
-			end_data = k;
-		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
-		if (k > elf_brk)
-			elf_brk = k;
-	}
-
-	elf_entry += load_bias;
-	elf_bss += load_bias;
-	elf_brk += load_bias;
-	start_code += load_bias;
-	end_code += load_bias;
-	start_data += load_bias;
-	end_data += load_bias;
-
-	if (elf_interpreter) {
-		if (interpreter_type == INTERPRETER_AOUT)
-			elf_entry = load_aout_interp(&interp_ex,
-						     interpreter);
-		else
-			elf_entry = load_elf_interp(&interp_elf_ex,
-						    interpreter,
-						    &interp_load_addr);
-
-		allow_write_access(interpreter);
-		fput(interpreter);
-		kfree(elf_interpreter);
-
-		if (BAD_ADDR(elf_entry)) {
-			printk(KERN_ERR "Unable to load interpreter\n");
-			kfree(elf_phdata);
-			send_sig(SIGSEGV, current, 0);
-			return 0;
-		}
-	}
-
-	kfree(elf_phdata);
-
-	if (interpreter_type != INTERPRETER_AOUT)
-		sys_close(elf_exec_fileno);
-
-	set_binfmt(&elf_format);
-
-	compute_creds(bprm);
-	current->flags &= ~PF_FORKNOEXEC;
-	bprm->p = (unsigned long)
-	  create_elf_tables((char *)bprm->p,
-			bprm->argc,
-			bprm->envc,
-			&elf_ex,
-			load_addr, load_bias,
-			interp_load_addr,
-			(interpreter_type == INTERPRETER_AOUT ? 0 : 1));
-	/* N.B. passed_fileno might not be initialized? */
-	if (interpreter_type == INTERPRETER_AOUT)
-		current->mm->arg_start += strlen(passed_fileno) + 1;
-	current->mm->start_brk = current->mm->brk = elf_brk;
-	current->mm->end_code = end_code;
-	current->mm->start_code = start_code;
-	current->mm->start_data = start_data;
-	current->mm->end_data = end_data;
-	current->mm->start_stack = bprm->p;
-
-	/* Calling set_brk effectively mmaps the pages that we need
-	 * for the bss and break sections
-	 */
-	set_brk(elf_bss, elf_brk);
-
-	padzero(elf_bss);
-
-#if 0
-	printk("(start_brk) %lx\n" , (long) current->mm->start_brk);
-	printk("(end_code) %lx\n" , (long) current->mm->end_code);
-	printk("(start_code) %lx\n" , (long) current->mm->start_code);
-	printk("(start_data) %lx\n" , (long) current->mm->start_data);
-	printk("(end_data) %lx\n" , (long) current->mm->end_data);
-	printk("(start_stack) %lx\n" , (long) current->mm->start_stack);
-	printk("(brk) %lx\n" , (long) current->mm->brk);
-#endif
-
-	if (current->personality & MMAP_PAGE_ZERO) {
-		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
-		   and some applications "depend" upon this behavior.
-		   Since we do not have the power to recompile these, we
-		   emulate the SVr4 behavior.  Sigh.  */
-		/* N.B. Shouldn't the size here be PAGE_SIZE?? */
-		down_write(&current->mm->mmap_sem);
-		error = do_mmap(NULL, 0, 4096, PROT_READ | PROT_EXEC,
-				MAP_FIXED | MAP_PRIVATE, 0);
-		up_write(&current->mm->mmap_sem);
-	}
-
-#ifdef ELF_PLAT_INIT
-	/*
-	 * The ABI may specify that certain registers be set up in special
-	 * ways (on i386 %edx is the address of a DT_FINI function, for
-	 * example.  This macro performs whatever initialization to
-	 * the regs structure is required.
-	 */
-	ELF_PLAT_INIT(regs);
-#endif
-
-	start_thread(regs, elf_entry, bprm->p);
-	if (current->ptrace & PT_PTRACED)
-		send_sig(SIGTRAP, current, 0);
-	retval = 0;
-out:
-	return retval;
-
-	/* error cleanup */
-out_free_dentry:
-	allow_write_access(interpreter);
-	fput(interpreter);
-out_free_interp:
-	if (elf_interpreter)
-		kfree(elf_interpreter);
-out_free_file:
-	sys_close(elf_exec_fileno);
-out_free_ph:
-	kfree(elf_phdata);
-	goto out;
-}
-
-/* This is really simpleminded and specialized - we are loading an
-   a.out library that is given an ELF header. */
-
-static int load_elf_library(struct file *file)
-{
-	struct elf_phdr *elf_phdata;
-	unsigned long elf_bss, bss, len;
-	int retval, error, i, j;
-	struct elfhdr elf_ex;
-
-	error = -ENOEXEC;
-	retval = kernel_read(file, 0, (char *) &elf_ex, sizeof(elf_ex));
-	if (retval != sizeof(elf_ex))
-		goto out;
-
-	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
-		goto out;
-
-	/* First of all, some simple consistency checks */
-	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
-	   !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
-		goto out;
-
-	/* Now read in all of the header information */
-
-	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
-	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
-
-	error = -ENOMEM;
-	elf_phdata = (struct elf_phdr *) kmalloc(j, GFP_KERNEL);
-	if (!elf_phdata)
-		goto out;
-
-	error = -ENOEXEC;
-	retval = kernel_read(file, elf_ex.e_phoff, (char *) elf_phdata, j);
-	if (retval != j)
-		goto out_free_ph;
-
-	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
-		if ((elf_phdata + i)->p_type == PT_LOAD) j++;
-	if (j != 1)
-		goto out_free_ph;
-
-	while (elf_phdata->p_type != PT_LOAD) elf_phdata++;
-
-	/* Now use mmap to map the library into memory. */
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap(file,
-			ELF_PAGESTART(elf_phdata->p_vaddr),
-			(elf_phdata->p_filesz +
-			 ELF_PAGEOFFSET(elf_phdata->p_vaddr)),
-			PROT_READ | PROT_WRITE | PROT_EXEC,
-			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
-			(elf_phdata->p_offset -
-			 ELF_PAGEOFFSET(elf_phdata->p_vaddr)));
-	up_write(&current->mm->mmap_sem);
-	if (error != ELF_PAGESTART(elf_phdata->p_vaddr))
-		goto out_free_ph;
-
-	elf_bss = elf_phdata->p_vaddr + elf_phdata->p_filesz;
-	padzero(elf_bss);
-
-	len = ELF_PAGESTART(elf_phdata->p_filesz + elf_phdata->p_vaddr + ELF_MIN_ALIGN - 1);
-	bss = elf_phdata->p_memsz + elf_phdata->p_vaddr;
-	if (bss > len)
-		do_brk(len, bss - len);
-	error = 0;
-
-out_free_ph:
-	kfree(elf_phdata);
-out:
-	return error;
-}
-
-/*
- * Note that some platforms still use traditional core dumps and not
- * the ELF core dump.  Each platform can select it as appropriate.
- */
-#ifdef USE_ELF_CORE_DUMP
-
-/*
- * ELF core dumper
- *
- * Modelled on fs/exec.c:aout_core_dump()
- * Jeremy Fitzhardinge <jeremy@sw.oz.au>
- */
-/*
- * These are the only things you should do on a core-file: use only these
- * functions to write out all the necessary info.
- */
-static int dump_write(struct file *file, const void *addr, int nr)
-{
-	return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-
-static int dump_seek(struct file *file, off_t off)
-{
-	if (file->f_op->llseek) {
-		if (file->f_op->llseek(file, off, 0) != off)
-			return 0;
-	} else
-		file->f_pos = off;
-	return 1;
-}
-
-/*
- * Decide whether a segment is worth dumping; default is yes to be
- * sure (missing info is worse than too much; etc).
- * Personally I'd include everything, and use the coredump limit...
- *
- * I think we should skip something. But I am not sure how. H.J.
- */
-static inline int maydump(struct vm_area_struct *vma)
-{
-	/*
-	 * If we may not read the contents, don't allow us to dump
-	 * them either. "dump_write()" can't handle it anyway.
-	 */
-	if (!(vma->vm_flags & VM_READ))
-		return 0;
-
-	/* Do not dump I/O mapped devices! -DaveM */
-	if (vma->vm_flags & VM_IO)
-		return 0;
-#if 1
-	if (vma->vm_flags & (VM_WRITE|VM_GROWSUP|VM_GROWSDOWN))
-		return 1;
-	if (vma->vm_flags & (VM_READ|VM_EXEC|VM_EXECUTABLE|VM_SHARED))
-		return 0;
-#endif
-	return 1;
-}
-
-#define roundup(x, y)  ((((x)+((y)-1))/(y))*(y))
-
-/* An ELF note in memory */
-struct memelfnote
-{
-	const char *name;
-	int type;
-	unsigned int datasz;
-	void *data;
-};
-
-static int notesize(struct memelfnote *en)
-{
-	int sz;
-
-	sz = sizeof(struct elf_note);
-	sz += roundup(strlen(en->name), 4);
-	sz += roundup(en->datasz, 4);
-
-	return sz;
-}
-
-/* #define DEBUG */
-
-#ifdef DEBUG
-static void dump_regs(const char *str, elf_greg_t *r)
-{
-	int i;
-	static const char *regs[] = { "ebx", "ecx", "edx", "esi", "edi", "ebp",
-					      "eax", "ds", "es", "fs", "gs",
-					      "orig_eax", "eip", "cs",
-					      "efl", "uesp", "ss"};
-	printk("Registers: %s\n", str);
-
-	for(i = 0; i < ELF_NGREG; i++)
-	{
-		unsigned long val = r[i];
-		printk("   %-2d %-5s=%08lx %lu\n", i, regs[i], val, val);
-	}
-}
-#endif
-
-#define DUMP_WRITE(addr, nr)	\
-	do { if (!dump_write(file, (addr), (nr))) return 0; } while(0)
-#define DUMP_SEEK(off)	\
-	do { if (!dump_seek(file, (off))) return 0; } while(0)
-
-static int writenote(struct memelfnote *men, struct file *file)
-{
-	struct elf_note en;
-
-	en.n_namesz = strlen(men->name);
-	en.n_descsz = men->datasz;
-	en.n_type = men->type;
-
-	DUMP_WRITE(&en, sizeof(en));
-	DUMP_WRITE(men->name, en.n_namesz);
-	/* XXX - cast from long long to long to avoid need for libgcc.a */
-	DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));	/* XXX */
-	DUMP_WRITE(men->data, men->datasz);
-	DUMP_SEEK(roundup((unsigned long)file->f_pos, 4));	/* XXX */
-
-	return 1;
-}
-#undef DUMP_WRITE
-#undef DUMP_SEEK
-
-#define DUMP_WRITE(addr, nr)	\
-	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
-		goto end_coredump;
-#define DUMP_SEEK(off)	\
-	if (!dump_seek(file, (off))) \
-		goto end_coredump;
-/*
- * Actual dumper
- *
- * This is a two-pass process; first we find the offsets of the bits,
- * and then they are actually written out.  If we run out of core limit
- * we just truncate.
- */
-static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file)
-{
-	int has_dumped = 0;
-	mm_segment_t fs;
-	int segs;
-	size_t size = 0;
-	int i;
-	struct vm_area_struct *vma;
-	struct elfhdr elf;
-	off_t offset = 0, dataoff;
-	unsigned long limit = current->rlim[RLIMIT_CORE].rlim_cur;
-	int numnote = 4;
-	struct memelfnote notes[4];
-	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
-	elf_fpregset_t fpu;		/* NT_PRFPREG */
-	struct elf_prpsinfo psinfo;	/* NT_PRPSINFO */
-
-	/* first copy the parameters from user space */
-	memset(&psinfo, 0, sizeof(psinfo));
-	{
-		int i, len;
-
-		len = current->mm->arg_end - current->mm->arg_start;
-		if (len >= ELF_PRARGSZ)
-			len = ELF_PRARGSZ-1;
-		copy_from_user(&psinfo.pr_psargs,
-			      (const char *)current->mm->arg_start, len);
-		for(i = 0; i < len; i++)
-			if (psinfo.pr_psargs[i] == 0)
-				psinfo.pr_psargs[i] = ' ';
-		psinfo.pr_psargs[len] = 0;
-
-	}
-
-	memset(&prstatus, 0, sizeof(prstatus));
-	/*
-	 * This transfers the registers from regs into the standard
-	 * coredump arrangement, whatever that is.
-	 */
-#ifdef ELF_CORE_COPY_REGS
-	ELF_CORE_COPY_REGS(prstatus.pr_reg, regs)
-#else
-	if (sizeof(elf_gregset_t) != sizeof(struct pt_regs))
-	{
-		printk("sizeof(elf_gregset_t) (%ld) != sizeof(struct pt_regs) (%ld)\n",
-			(long)sizeof(elf_gregset_t), (long)sizeof(struct pt_regs));
-	}
-	else
-		*(struct pt_regs *)&prstatus.pr_reg = *regs;
-#endif
-
-	/* now stop all vm operations */
-	down_write(&current->mm->mmap_sem);
-	segs = current->mm->map_count;
-
-#ifdef DEBUG
-	printk("elf_core_dump: %d segs %lu limit\n", segs, limit);
-#endif
-
-	/* Set up header */
-	memcpy(elf.e_ident, ELFMAG, SELFMAG);
-	elf.e_ident[EI_CLASS] = ELF_CLASS;
-	elf.e_ident[EI_DATA] = ELF_DATA;
-	elf.e_ident[EI_VERSION] = EV_CURRENT;
-	memset(elf.e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
-
-	elf.e_type = ET_CORE;
-	elf.e_machine = ELF_ARCH;
-	elf.e_version = EV_CURRENT;
-	elf.e_entry = 0;
-	elf.e_phoff = sizeof(elf);
-	elf.e_shoff = 0;
-	elf.e_flags = 0;
-	elf.e_ehsize = sizeof(elf);
-	elf.e_phentsize = sizeof(struct elf_phdr);
-	elf.e_phnum = segs+1;		/* Include notes */
-	elf.e_shentsize = 0;
-	elf.e_shnum = 0;
-	elf.e_shstrndx = 0;
-
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-
-	has_dumped = 1;
-	current->flags |= PF_DUMPCORE;
-
-	DUMP_WRITE(&elf, sizeof(elf));
-	offset += sizeof(elf);				/* Elf header */
-	offset += (segs+1) * sizeof(struct elf_phdr);	/* Program headers */
-
-	/*
-	 * Set up the notes in similar form to SVR4 core dumps made
-	 * with info from their /proc.
-	 */
-
-	notes[0].name = "CORE";
-	notes[0].type = NT_PRSTATUS;
-	notes[0].datasz = sizeof(prstatus);
-	notes[0].data = &prstatus;
-	prstatus.pr_info.si_signo = prstatus.pr_cursig = signr;
-	prstatus.pr_sigpend = current->pending.signal.sig[0];
-	prstatus.pr_sighold = current->blocked.sig[0];
-	psinfo.pr_pid = prstatus.pr_pid = current->pid;
-	psinfo.pr_ppid = prstatus.pr_ppid = current->p_pptr->pid;
-	psinfo.pr_pgrp = prstatus.pr_pgrp = current->pgrp;
-	psinfo.pr_sid = prstatus.pr_sid = current->session;
-	prstatus.pr_utime.tv_sec = CT_TO_SECS(current->times.tms_utime);
-	prstatus.pr_utime.tv_usec = CT_TO_USECS(current->times.tms_utime);
-	prstatus.pr_stime.tv_sec = CT_TO_SECS(current->times.tms_stime);
-	prstatus.pr_stime.tv_usec = CT_TO_USECS(current->times.tms_stime);
-	prstatus.pr_cutime.tv_sec = CT_TO_SECS(current->times.tms_cutime);
-	prstatus.pr_cutime.tv_usec = CT_TO_USECS(current->times.tms_cutime);
-	prstatus.pr_cstime.tv_sec = CT_TO_SECS(current->times.tms_cstime);
-	prstatus.pr_cstime.tv_usec = CT_TO_USECS(current->times.tms_cstime);
-
-#ifdef DEBUG
-	dump_regs("Passed in regs", (elf_greg_t *)regs);
-	dump_regs("prstatus regs", (elf_greg_t *)&prstatus.pr_reg);
-#endif
-
-	notes[1].name = "CORE";
-	notes[1].type = NT_PRPSINFO;
-	notes[1].datasz = sizeof(psinfo);
-	notes[1].data = &psinfo;
-	i = current->state ? ffz(~current->state) + 1 : 0;
-	psinfo.pr_state = i;
-	psinfo.pr_sname = (i < 0 || i > 5) ? '.' : "RSDZTD"[i];
-	psinfo.pr_zomb = psinfo.pr_sname == 'Z';
-	psinfo.pr_nice = current->nice;
-	psinfo.pr_flag = current->flags;
-	psinfo.pr_uid = NEW_TO_OLD_UID(current->uid);
-	psinfo.pr_gid = NEW_TO_OLD_GID(current->gid);
-	strncpy(psinfo.pr_fname, current->comm, sizeof(psinfo.pr_fname));
-
-	notes[2].name = "CORE";
-	notes[2].type = NT_TASKSTRUCT;
-	notes[2].datasz = sizeof(*current);
-	notes[2].data = current;
-
-	/* Try to dump the FPU. */
-	prstatus.pr_fpvalid = dump_fpu (regs, &fpu);
-	if (!prstatus.pr_fpvalid)
-	{
-		numnote--;
-	}
-	else
-	{
-		notes[3].name = "CORE";
-		notes[3].type = NT_PRFPREG;
-		notes[3].datasz = sizeof(fpu);
-		notes[3].data = &fpu;
-	}
-	
-	/* Write notes phdr entry */
-	{
-		struct elf_phdr phdr;
-		int sz = 0;
-
-		for(i = 0; i < numnote; i++)
-			sz += notesize(&notes[i]);
-
-		phdr.p_type = PT_NOTE;
-		phdr.p_offset = offset;
-		phdr.p_vaddr = 0;
-		phdr.p_paddr = 0;
-		phdr.p_filesz = sz;
-		phdr.p_memsz = 0;
-		phdr.p_flags = 0;
-		phdr.p_align = 0;
-
-		offset += phdr.p_filesz;
-		DUMP_WRITE(&phdr, sizeof(phdr));
-	}
-
-	/* Page-align dumped data */
-	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
-
-	/* Write program headers for segments dump */
-	for(vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
-		struct elf_phdr phdr;
-		size_t sz;
-
-		sz = vma->vm_end - vma->vm_start;
-
-		phdr.p_type = PT_LOAD;
-		phdr.p_offset = offset;
-		phdr.p_vaddr = vma->vm_start;
-		phdr.p_paddr = 0;
-		phdr.p_filesz = maydump(vma) ? sz : 0;
-		phdr.p_memsz = sz;
-		offset += phdr.p_filesz;
-		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
-		if (vma->vm_flags & VM_WRITE) phdr.p_flags |= PF_W;
-		if (vma->vm_flags & VM_EXEC) phdr.p_flags |= PF_X;
-		phdr.p_align = ELF_EXEC_PAGESIZE;
-
-		DUMP_WRITE(&phdr, sizeof(phdr));
-	}
-
-	for(i = 0; i < numnote; i++)
-		if (!writenote(&notes[i], file))
-			goto end_coredump;
-
-	DUMP_SEEK(dataoff);
-
-	for(vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
-		unsigned long addr;
-
-		if (!maydump(vma))
-			continue;
-
-#ifdef DEBUG
-		printk("elf_core_dump: writing %08lx-%08lx\n", vma->vm_start, vma->vm_end);
-#endif
-
-		for (addr = vma->vm_start;
-		     addr < vma->vm_end;
-		     addr += PAGE_SIZE) {
-			struct page* page;
-			struct vm_area_struct *vma;
-
-			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
-						&page, &vma) <= 0) {
-				DUMP_SEEK (file->f_pos + PAGE_SIZE);
-			} else {
-				if (page == ZERO_PAGE(addr)) {
-					DUMP_SEEK (file->f_pos + PAGE_SIZE);
-				} else {
-					void *kaddr;
-					flush_cache_page(vma, addr);
-					kaddr = kmap(page);
-					DUMP_WRITE(kaddr, PAGE_SIZE);
-					flush_page_to_ram(page);
-					kunmap(page);
-				}
-				put_page(page);
-			}
-		}
-	}
-
-	if ((off_t) file->f_pos != offset) {
-		/* Sanity check */
-		printk("elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
-		       (off_t) file->f_pos, offset);
-	}
-
- end_coredump:
-	set_fs(fs);
-	up_write(&current->mm->mmap_sem);
-	return has_dumped;
-}
-#endif		/* USE_ELF_CORE_DUMP */
-
-static int __init init_elf_binfmt(void)
-{
-	return register_binfmt(&elf_format);
-}
-
-static void __exit exit_elf_binfmt(void)
-{
-	/* Remove the COFF and ELF loaders. */
-	unregister_binfmt(&elf_format);
-}
-
-module_init(init_elf_binfmt)
-module_exit(exit_elf_binfmt)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/buffer.c linuxppc64_2_4/fs/buffer.c
--- ../kernel.org/linux-2.4.19/fs/buffer.c	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/fs/buffer.c	Tue Apr 23 09:37:30 2002
@@ -2839,7 +2839,7 @@
 			bh_hash_shift++;
 
 		hash_table = (struct buffer_head **)
-		    __get_free_pages(GFP_ATOMIC, order);
+			vmalloc(PAGE_SIZE << order);
 	} while (hash_table == NULL && --order > 0);
 	printk("Buffer-cache hash table entries: %d (order: %d, %ld bytes)\n",
 	       nr_hash, order, (PAGE_SIZE << order));
@@ -2896,6 +2896,7 @@
 			break;
 		if (write_some_buffers(NODEV))
 			continue;
+		/* spin_unlock(&lru_list_lock); ??? */
 		return 0;
 	}
 	spin_unlock(&lru_list_lock);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/dcache.c linuxppc64_2_4/fs/dcache.c
--- ../kernel.org/linux-2.4.19/fs/dcache.c	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/fs/dcache.c	Tue Apr 23 09:37:30 2002
@@ -23,6 +23,7 @@
 #include <linux/smp_lock.h>
 #include <linux/cache.h>
 #include <linux/module.h>
+#include <linux/vmalloc.h>
 
 #include <asm/uaccess.h>
 
@@ -1206,7 +1207,7 @@
 			d_hash_shift++;
 
 		dentry_hashtable = (struct list_head *)
-			__get_free_pages(GFP_ATOMIC, order);
+			vmalloc(PAGE_SIZE << order);
 	} while (dentry_hashtable == NULL && --order >= 0);
 
 	printk("Dentry-cache hash table entries: %d (order: %ld, %ld bytes)\n",
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/exec.c linuxppc64_2_4/fs/exec.c
--- ../kernel.org/linux-2.4.19/fs/exec.c	Fri Apr 19 11:00:23 2002
+++ linuxppc64_2_4/fs/exec.c	Mon Apr 22 10:35:08 2002
@@ -313,7 +313,7 @@
 		mpnt->vm_mm = current->mm;
 		mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
 		mpnt->vm_end = STACK_TOP;
-		mpnt->vm_page_prot = PAGE_COPY;
+		mpnt->vm_page_prot = protection_map[VM_STACK_FLAGS & 0xf];
 		mpnt->vm_flags = VM_STACK_FLAGS;
 		mpnt->vm_ops = NULL;
 		mpnt->vm_pgoff = 0;
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/inode.c linuxppc64_2_4/fs/inode.c
--- ../kernel.org/linux-2.4.19/fs/inode.c	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/fs/inode.c	Tue Apr 23 09:37:30 2002
@@ -17,6 +17,7 @@
 #include <linux/swapctl.h>
 #include <linux/prefetch.h>
 #include <linux/locks.h>
+#include <linux/vmalloc.h>
 
 /*
  * New inode.c implementation.
@@ -1148,7 +1149,7 @@
 			i_hash_shift++;
 
 		inode_hashtable = (struct list_head *)
-			__get_free_pages(GFP_ATOMIC, order);
+			vmalloc(PAGE_SIZE << order);
 	} while (inode_hashtable == NULL && --order >= 0);
 
 	printk("Inode-cache hash table entries: %d (order: %ld, %ld bytes)\n",
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/Makefile linuxppc64_2_4/fs/jfs/Makefile
--- ../kernel.org/linux-2.4.19/fs/jfs/Makefile	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/Makefile	Wed Nov 14 10:19:35 2001
@@ -0,0 +1,20 @@
+#
+# Makefile for the Linux JFS filesystem routines.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile.
+
+O_TARGET := jfs.o
+obj-y   := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \
+	    jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \
+	    jfs_unicode.o jfs_dtree.o jfs_inode.o \
+	    jfs_extent.o symlink.o jfs_metapage.o \
+	    jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o
+obj-m   := $(O_TARGET)
+
+EXTRA_CFLAGS += -D_JFS_4K
+
+include $(TOPDIR)/Rules.make
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/dir.c linuxppc64_2_4/fs/jfs/dir.c
--- ../kernel.org/linux-2.4.19/fs/jfs/dir.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/dir.c	Wed Nov 14 10:19:35 2001
@@ -0,0 +1,112 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/fs.h>
+#include <linux/jfs_fs.h>
+#include <linux/jfs/jfs_filsys.h>
+#include <linux/jfs/jfs_lock.h>
+#include <linux/jfs/jfs_unicode.h>
+#include <linux/jfs/jfs_debug.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/locks.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+
+extern int jfs_create(struct inode *, struct dentry *, int);
+extern int jfs_mkdir(struct inode *, struct dentry *, int);
+extern int jfs_unlink(struct inode *, struct dentry *);
+extern int jfs_rmdir(struct inode *, struct dentry *);
+extern int jfs_link(struct dentry *, struct inode *, struct dentry *);
+extern int jfs_symlink(struct inode *, struct dentry *, const char *);
+extern int jfs_rename(struct inode *, struct dentry *, struct inode *,
+		      struct dentry *);
+extern int jfs_mknod(struct inode *, struct dentry *, int, int);
+extern int jfs_fsync_file(struct file *, struct dentry *, int);
+
+static ssize_t jfs_dir_read(struct file *filp,
+			    char *buf, size_t count, loff_t * ppos)
+{
+	return -EISDIR;
+}
+
+struct file_operations jfs_dir_operations = {
+	fsync:		jfs_fsync_file,
+	read:		jfs_dir_read,
+	readdir:	jfs_readdir,
+};
+
+static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry)
+{
+	btstack_t btstack;
+	ino_t inum;
+	struct inode *ip;
+	component_t key;
+	const char *name = dentry->d_name.name;
+	int len = dentry->d_name.len;
+	int rc;
+
+	jFYI(1, ("jfs_lookup: name = %s\n", name));
+
+
+	if ((name[0] == '.') && (len == 1))
+		inum = dip->i_ino;
+	else if (strcmp(name, "..") == 0)
+		inum = PARENT(dip);
+	else {
+		if ((rc =
+		     get_UCSname(&key, dentry, JFS_SBI(dip->i_sb)->nls_tab)))
+			return ERR_PTR(-rc);
+		IREAD_LOCK(dip);
+		rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP);
+		IREAD_UNLOCK(dip);
+		free_UCSname(&key);
+		if (rc == ENOENT) {
+			d_add(dentry, NULL);
+			return ERR_PTR(0);
+		} else if (rc) {
+			jERROR(1,
+			       ("jfs_lookup: dtSearch returned %d\n", rc));
+			return ERR_PTR(-rc);
+		}
+	}
+
+	ip = iget(dip->i_sb, inum);
+	if (ip == NULL) {
+		jERROR(1,
+		       ("jfs_lookup: iget failed on inum %d\n",
+			(uint) inum));
+		return ERR_PTR(-EACCES);
+	}
+
+	d_add(dentry, ip);
+
+	return ERR_PTR(0);
+}
+
+struct inode_operations jfs_dir_inode_operations = {
+	create:		jfs_create,
+	lookup:		jfs_lookup,
+	link:		jfs_link,
+	unlink:		jfs_unlink,
+	symlink:	jfs_symlink,
+	mkdir:		jfs_mkdir,
+	rmdir:		jfs_rmdir,
+	mknod:		jfs_mknod,
+	rename:		jfs_rename,
+};
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/endian24.h linuxppc64_2_4/fs/jfs/endian24.h
--- ../kernel.org/linux-2.4.19/fs/jfs/endian24.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/endian24.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,50 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _H_ENDIAN24
+#define	_H_ENDIAN24
+
+/*
+ *	fs/jfs/endian24.h:
+ *
+ * Endian conversion for 24-byte data
+ *
+ */
+#define __swab24(x) \
+({ \
+	__u32 __x = (x); \
+	((__u32)( \
+		((__x & (__u32)0x000000ffUL) << 16) | \
+		 (__x & (__u32)0x0000ff00UL)        | \
+		((__x & (__u32)0x00ff0000UL) >> 16) )); \
+})
+
+#if (defined(__KERNEL__) && defined(__LITTLE_ENDIAN)) || (defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN))
+	#define __cpu_to_le24(x) ((__u32)(x))
+	#define __le24_to_cpu(x) ((__u32)(x))
+#else
+	#define __cpu_to_le24(x) __swab24(x)
+	#define __le24_to_cpu(x) __swab24(x)
+#endif
+
+#ifdef __KERNEL__
+	#define cpu_to_le24 __cpu_to_le24
+	#define le24_to_cpu __le24_to_cpu
+#endif
+
+#endif				/* !_H_ENDIAN24 */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/file.c linuxppc64_2_4/fs/jfs/file.c
--- ../kernel.org/linux-2.4.19/fs/jfs/file.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/file.c	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,105 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/fs.h>
+#include <linux/locks.h>
+#include "jfs_incore.h"
+#include "jfs_txnmgr.h"
+#include "jfs_debug.h"
+
+
+extern int generic_file_open(struct inode *, struct file *) __weak;
+extern loff_t generic_file_llseek(struct file *, loff_t, int origin) __weak;
+
+extern int jfs_commit_inode(struct inode *, int);
+
+int jfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	struct inode *inode = dentry->d_inode;
+	int rc = 0;
+
+	rc = fsync_inode_data_buffers(inode);
+
+	if (!(inode->i_state & I_DIRTY))
+		return rc;
+	if (datasync || !(inode->i_state & I_DIRTY_DATASYNC))
+		return rc;
+
+	IWRITE_LOCK(inode);
+	rc |= jfs_commit_inode(inode, 1);
+	IWRITE_UNLOCK(inode);
+
+	return rc ? -EIO : 0;
+}
+
+struct file_operations jfs_file_operations = {
+	open:		generic_file_open,
+	llseek:		generic_file_llseek,
+	write:		generic_file_write,
+	read:		generic_file_read,
+	mmap:		generic_file_mmap,
+	fsync:		jfs_fsync,
+};
+
+/*
+ * Guts of jfs_truncate.  Called with locks already held.  Can be called
+ * with directory for truncating directory index table.
+ */
+void jfs_truncate_nolock(struct inode *ip, loff_t length)
+{
+	loff_t newsize;
+	tid_t tid;
+
+	ASSERT(length >= 0);
+
+	if (test_cflag(COMMIT_Nolink, ip)) {
+		xtTruncate(0, ip, length, COMMIT_WMAP);
+		return;
+	}
+
+	do {
+		tid = txBegin(ip->i_sb, 0);
+
+		newsize = xtTruncate(tid, ip, length,
+				     COMMIT_TRUNCATE | COMMIT_PWMAP);
+		if (newsize < 0) {
+			txEnd(tid);
+			break;
+		}
+
+		ip->i_mtime = ip->i_ctime = CURRENT_TIME;
+		mark_inode_dirty(ip);
+
+		txCommit(tid, 1, &ip, 0);
+		txEnd(tid);
+	} while (newsize > length);	/* Truncate isn't always atomic */
+}
+
+static void jfs_truncate(struct inode *ip)
+{
+	jFYI(1, ("jfs_truncate: size = 0x%lx\n", (ulong) ip->i_size));
+
+	IWRITE_LOCK(ip);
+	jfs_truncate_nolock(ip, ip->i_size);
+	IWRITE_UNLOCK(ip);
+}
+
+struct inode_operations jfs_file_inode_operations = {
+	truncate:	jfs_truncate,
+};
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/inode.c linuxppc64_2_4/fs/jfs/inode.c
--- ../kernel.org/linux-2.4.19/fs/jfs/inode.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/inode.c	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,329 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/fs.h>
+#include <linux/locks.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_imap.h"
+#include "jfs_extent.h"
+#include "jfs_unicode.h"
+#include "jfs_debug.h"
+
+
+extern struct inode_operations jfs_dir_inode_operations;
+extern struct inode_operations jfs_file_inode_operations;
+extern struct inode_operations jfs_symlink_inode_operations;
+extern struct file_operations jfs_dir_operations;
+extern struct file_operations jfs_file_operations;
+struct address_space_operations jfs_aops;
+extern int freeZeroLink(struct inode *);
+
+void jfs_put_inode(struct inode *inode)
+{
+	jFYI(1, ("In jfs_put_inode, inode = 0x%p\n", inode));
+}
+
+void jfs_read_inode(struct inode *inode)
+{
+	int rc;
+
+	rc = alloc_jfs_inode(inode);
+	if (rc) {
+		printk(__FUNCTION__ ": failed.");
+		goto bad_inode;
+	}
+	jFYI(1, ("In jfs_read_inode, inode = 0x%p\n", inode));
+
+	if (diRead(inode))
+		goto bad_inode_free;
+
+	if (S_ISREG(inode->i_mode)) {
+		inode->i_op = &jfs_file_inode_operations;
+		inode->i_fop = &jfs_file_operations;
+		inode->i_mapping->a_ops = &jfs_aops;
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &jfs_dir_inode_operations;
+		inode->i_fop = &jfs_dir_operations;
+		inode->i_mapping->a_ops = &jfs_aops;
+		inode->i_mapping->gfp_mask = GFP_NOFS;
+	} else if (S_ISLNK(inode->i_mode)) {
+		if (inode->i_size > IDATASIZE) {
+			inode->i_op = &page_symlink_inode_operations;
+			inode->i_mapping->a_ops = &jfs_aops;
+		} else
+			inode->i_op = &jfs_symlink_inode_operations;
+	} else {
+		init_special_inode(inode, inode->i_mode,
+				   kdev_t_to_nr(inode->i_rdev));
+	}
+
+	return;
+
+      bad_inode_free:
+	free_jfs_inode(inode);
+      bad_inode:
+	make_bad_inode(inode);
+}
+
+/* This define is from fs/open.c */
+#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
+
+/*
+ * Workhorse of both fsync & write_inode
+ */
+int jfs_commit_inode(struct inode *inode, int wait)
+{
+	int rc = 0;
+	tid_t tid;
+	static int noisy = 5;
+
+	jFYI(1, ("In jfs_commit_inode, inode = 0x%p\n", inode));
+
+	/*
+	 * Don't commit if inode has been committed since last being
+	 * marked dirty, or if it has been deleted.
+	 */
+	if (test_cflag(COMMIT_Nolink, inode) ||
+	    !test_cflag(COMMIT_Dirty, inode))
+		return 0;
+
+	if (isReadOnly(inode)) {
+		/* kernel allows writes to devices on read-only
+		 * partitions and may think inode is dirty
+		 */
+		if (!special_file(inode->i_mode) && noisy) {
+			jERROR(1, ("jfs_commit_inode(0x%p) called on "
+				   "read-only volume\n", inode));
+			jERROR(1, ("Is remount racy?\n"));
+			noisy--;
+		}
+		return 0;
+	}
+
+	tid = txBegin(inode->i_sb, COMMIT_INODE);
+	rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0);
+	txEnd(tid);
+	return -rc;
+}
+
+void jfs_write_inode(struct inode *inode, int wait)
+{
+	/*
+	 * If COMMIT_DIRTY is not set, the inode isn't really dirty.
+	 * It has been committed since the last change, but was still
+	 * on the dirty inode list
+	 */
+	if (test_cflag(COMMIT_Nolink, inode) ||
+	    !test_cflag(COMMIT_Dirty, inode))
+		return;
+
+	IWRITE_LOCK(inode);
+
+	if (jfs_commit_inode(inode, wait)) {
+		jERROR(1, ("jfs_write_inode: jfs_commit_inode failed!\n"));
+	}
+
+	IWRITE_UNLOCK(inode);
+}
+
+void jfs_delete_inode(struct inode *inode)
+{
+	jFYI(1, ("In jfs_delete_inode, inode = 0x%p\n", inode));
+
+	IWRITE_LOCK(inode);
+	if (test_cflag(COMMIT_Freewmap, inode))
+		freeZeroLink(inode);
+
+	diFree(inode);
+	IWRITE_UNLOCK(inode);
+
+	clear_inode(inode);
+}
+
+void jfs_dirty_inode(struct inode *inode)
+{
+	static int noisy = 5;
+
+	if (isReadOnly(inode)) {
+		if (!special_file(inode->i_mode) && noisy) {
+			/* kernel allows writes to devices on read-only
+			 * partitions and may try to mark inode dirty
+			 */
+			jERROR(1, ("jfs_dirty_inode called on "
+				   "read-only volume\n"));
+			jERROR(1, ("Is remount racy?\n"));
+			noisy--;
+		}
+		return;
+	}
+
+	set_cflag(COMMIT_Dirty, inode);
+}
+
+static int jfs_get_block(struct inode *ip, long lblock,
+			 struct buffer_head *bh_result, int create)
+{
+	s64 lblock64 = lblock;
+	int no_size_check = 0;
+	int rc = 0;
+	int take_locks;
+	xad_t xad;
+	s64 xaddr;
+	int xflag;
+	s32 xlen;
+
+	/*
+	 * If this is a special inode (imap, dmap) or directory,
+	 * the lock should already be taken
+	 */
+	take_locks = ((JFS_IP(ip)->fileset != AGGREGATE_I) &&
+		      !S_ISDIR(ip->i_mode));
+	/*
+	 * Take appropriate lock on inode
+	 */
+	if (take_locks) {
+		if (create)
+			IWRITE_LOCK(ip);
+		else
+			IREAD_LOCK(ip);
+	}
+
+	/*
+	 * A directory's "data" is the inode index table, but i_size is the
+	 * size of the d-tree, so don't check the offset against i_size
+	 */
+	if (S_ISDIR(ip->i_mode))
+		no_size_check = 1;
+
+	if ((no_size_check ||
+	     ((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size)) &&
+	    (xtLookup
+	     (ip, lblock64, 1, &xflag, &xaddr, &xlen, no_size_check)
+	     == 0) && xlen) {
+		if (xflag & XAD_NOTRECORDED) {
+			if (!create)
+				/*
+				 * Allocated but not recorded, read treats
+				 * this as a hole
+				 */
+				goto unlock;
+#ifdef _JFS_4K
+			XADoffset(&xad, lblock64);
+			XADlength(&xad, xlen);
+			XADaddress(&xad, xaddr);
+#else				/* _JFS_4K */
+			/*
+			 * As long as block size = 4K, this isn't a problem.
+			 * We should mark the whole page not ABNR, but how
+			 * will we know to mark the other blocks BH_New?
+			 */
+			BUG();
+#endif				/* _JFS_4K */
+			rc = extRecord(ip, &xad);
+			if (rc)
+				goto unlock;
+			bh_result->b_state |= (1UL << BH_New);
+		}
+
+		bh_result->b_dev = ip->i_dev;
+		bh_result->b_blocknr = xaddr;
+		bh_result->b_state |= (1UL << BH_Mapped);
+		goto unlock;
+	}
+	if (!create)
+		goto unlock;
+
+	/*
+	 * Allocate a new block
+	 */
+#ifdef _JFS_4K
+	if ((rc =
+	     extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
+		goto unlock;
+	rc = extAlloc(ip, 1, lblock64, &xad, FALSE);
+	if (rc)
+		goto unlock;
+
+	bh_result->b_dev = ip->i_dev;
+	bh_result->b_blocknr = addressXAD(&xad);
+	bh_result->b_state |= ((1UL << BH_Mapped) | (1UL << BH_New));
+
+#else				/* _JFS_4K */
+	/*
+	 * We need to do whatever it takes to keep all but the last buffers
+	 * in 4K pages - see jfs_write.c
+	 */
+	BUG();
+#endif				/* _JFS_4K */
+
+      unlock:
+	/*
+	 * Release lock on inode
+	 */
+	if (take_locks) {
+		if (create)
+			IWRITE_UNLOCK(ip);
+		else
+			IREAD_UNLOCK(ip);
+	}
+	return -rc;
+}
+
+static int jfs_writepage(struct page *page)
+{
+	return block_write_full_page(page, jfs_get_block);
+}
+
+static int jfs_readpage(struct file *file, struct page *page)
+{
+	return block_read_full_page(page, jfs_get_block);
+}
+
+static int jfs_prepare_write(struct file *file,
+			     struct page *page, unsigned from, unsigned to)
+{
+	return block_prepare_write(page, from, to, jfs_get_block);
+}
+
+static int jfs_bmap(struct address_space *mapping, long block)
+{
+	return generic_block_bmap(mapping, block, jfs_get_block);
+}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,15))
+static int jfs_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
+			 unsigned long blocknr, int blocksize)
+{
+	return generic_direct_IO(rw, inode, iobuf, blocknr,
+				 blocksize, jfs_get_block);
+}
+#endif				/* Kernel >= 2.4.15 */
+
+struct address_space_operations jfs_aops = {
+	readpage:	jfs_readpage,
+	writepage:	jfs_writepage,
+	sync_page:	block_sync_page,
+	prepare_write:	jfs_prepare_write,
+	commit_write:	generic_commit_write,
+	bmap:		jfs_bmap,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,15))
+	direct_IO:	jfs_direct_IO,
+#endif
+};
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_btree.h linuxppc64_2_4/fs/jfs/jfs_btree.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_btree.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_btree.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,163 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#ifndef	_H_JFS_BTREE
+#define _H_JFS_BTREE
+/*
+ *	jfs_btree.h: B+-tree
+ *
+ * JFS B+-tree (dtree and xtree) common definitions
+ */
+
+/*
+ *	basic btree page - btpage_t
+ */
+typedef struct {
+	s64 next;		/* 8: right sibling bn */
+	s64 prev;		/* 8: left sibling bn */
+
+	u8 flag;		/* 1: */
+	u8 rsrvd[7];		/* 7: type specific */
+	s64 self;		/* 8: self address */
+
+	u8 entry[4064];		/* 4064: */
+} btpage_t;			/* (4096) */
+
+/* btpaget_t flag */
+#define BT_TYPE		0x07	/* B+-tree index */
+#define	BT_ROOT		0x01	/* root page */
+#define	BT_LEAF		0x02	/* leaf page */
+#define	BT_INTERNAL	0x04	/* internal page */
+#define	BT_RIGHTMOST	0x10	/* rightmost page */
+#define	BT_LEFTMOST	0x20	/* leftmost page */
+#define	BT_SWAPPED	0x80	/* used by fsck for endian swapping */
+
+/* btorder (in inode) */
+#define	BT_RANDOM		0x0000
+#define	BT_SEQUENTIAL		0x0001
+#define	BT_LOOKUP		0x0010
+#define	BT_INSERT		0x0020
+#define	BT_DELETE		0x0040
+
+/*
+ *	btree page buffer cache access
+ */
+#define BT_IS_ROOT(MP) (((MP)->xflag & COMMIT_PAGE) == 0)
+
+/* get page from buffer page */
+#define BT_PAGE(IP, MP, TYPE, ROOT)\
+	(BT_IS_ROOT(MP) ? (TYPE *)&JFS_IP(IP)->ROOT : (TYPE *)(MP)->data)
+
+/* get the page buffer and the page for specified block address */
+#define BT_GETPAGE(IP, BN, MP, TYPE, SIZE, P, RC, ROOT)\
+{\
+	if ((BN) == 0)\
+	{\
+		MP = (metapage_t *)&JFS_IP(IP)->bxflag;\
+		P = (TYPE *)&JFS_IP(IP)->ROOT;\
+		RC = 0;\
+		jEVENT(0,("%d BT_GETPAGE returning root\n", __LINE__));\
+	}\
+	else\
+	{\
+		jEVENT(0,("%d BT_GETPAGE reading block %d\n", __LINE__,\
+			 (int)BN));\
+		MP = read_metapage((IP), BN, SIZE, 1);\
+		if (MP) {\
+			RC = 0;\
+			P = (MP)->data;\
+		} else {\
+			P = NULL;\
+			jERROR(1,("bread failed!\n"));\
+			RC = EIO;\
+		}\
+	}\
+}
+
+#define BT_MARK_DIRTY(MP, IP)\
+{\
+	if (BT_IS_ROOT(MP))\
+		mark_inode_dirty(IP);\
+	else\
+		mark_metapage_dirty(MP);\
+}
+
+/* put the page buffer */
+#define BT_PUTPAGE(MP)\
+{\
+	if (! BT_IS_ROOT(MP)) \
+		release_metapage(MP); \
+}
+
+
+/*
+ *	btree traversal stack
+ *
+ * record the path traversed during the search;
+ * top frame record the leaf page/entry selected.
+ */
+#define	MAXTREEHEIGHT		8
+typedef struct btframe {	/* stack frame */
+	s64 bn;			/* 8: */
+	s16 index;		/* 2: */
+	s16 lastindex;		/* 2: */
+	struct metapage *mp;	/* 4: */
+} btframe_t;			/* (16) */
+
+typedef struct btstack {
+	btframe_t *top;		/* 4: */
+	int nsplit;		/* 4: */
+	btframe_t stack[MAXTREEHEIGHT];
+} btstack_t;
+
+#define BT_CLR(btstack)\
+	(btstack)->top = (btstack)->stack
+
+#define BT_PUSH(BTSTACK, BN, INDEX)\
+{\
+	(BTSTACK)->top->bn = BN;\
+	(BTSTACK)->top->index = INDEX;\
+	++(BTSTACK)->top;\
+	assert((BTSTACK)->top != &((BTSTACK)->stack[MAXTREEHEIGHT]));\
+}
+
+#define BT_POP(btstack)\
+	( (btstack)->top == (btstack)->stack ? NULL : --(btstack)->top )
+
+#define BT_STACK(btstack)\
+	( (btstack)->top == (btstack)->stack ? NULL : (btstack)->top )
+
+/* retrieve search results */
+#define BT_GETSEARCH(IP, LEAF, BN, MP, TYPE, P, INDEX, ROOT)\
+{\
+	BN = (LEAF)->bn;\
+	MP = (LEAF)->mp;\
+	if (BN)\
+		P = (TYPE *)MP->data;\
+	else\
+		P = (TYPE *)&JFS_IP(IP)->ROOT;\
+	INDEX = (LEAF)->index;\
+}
+
+/* put the page buffer of search */
+#define BT_PUTSEARCH(BTSTACK)\
+{\
+	if (! BT_IS_ROOT((BTSTACK)->top->mp))\
+		release_metapage((BTSTACK)->top->mp);\
+}
+#endif				/* _H_JFS_BTREE */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_compat.h linuxppc64_2_4/fs/jfs/jfs_compat.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_compat.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_compat.h	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,87 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _H_JFS_COMPAT
+#define	_H_JFS_COMPAT
+
+/*
+ *	jfs_compat.h:
+ *
+ * Definitions to allow JFS to build on older kernels.
+ *
+ * This file should be removed when JFS is merged with linux kernel
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/slab.h>
+
+#ifndef __weak
+#define __weak	__attribute__((weak));
+#endif
+
+#ifndef MODULE_LICENSE
+#define MODULE_LICENSE(x)
+#endif
+
+#ifndef GFP_NOFS
+#define GFP_NOFS GFP_BUFFER
+#endif
+
+#if !defined(KERNEL_HAS_O_DIRECT)
+#define fsync_inode_data_buffers fsync_inode_buffers
+#endif
+
+/*
+ * Linux 2.4.9 has broken min/max macros.
+ * Linux < 2.4.9 doesn't have min/max at all.
+ */
+#if (LINUX_VERSION_CODE == KERNEL_VERSION(2,4,9))
+#undef min
+#undef max
+#endif
+
+/*
+ * Completions are new in 2.4.7.
+ */
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,6))
+#define DECLARE_COMPLETION(c)	DECLARE_MUTEX_LOCKED(c)
+#define complete(c)		up(c)
+#define wait_for_completion(c)	down(c)
+/* must be last to not mess up the namespace */
+#define completion		semaphore
+#else
+#include <linux/completion.h>
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,9))
+#define min(x,y) ({			\
+	const typeof(x) _x = (x);	\
+	const typeof(y) _y = (y);	\
+	(void) (&_x == &_y);		\
+	_x < _y ? _x : _y; })
+
+#define max(x,y) ({			\
+	const typeof(x) _x = (x);	\
+	const typeof(y) _y = (y);	\
+	(void) (&_x == &_y);		\
+	_x > _y ? _x : _y; })
+#endif
+
+#endif				/* !_H_JFS_COMPAT */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_debug.c linuxppc64_2_4/fs/jfs/jfs_debug.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_debug.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_debug.c	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,145 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/fs.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <asm/uaccess.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_debug.h"
+
+#ifdef CONFIG_JFS_DEBUG
+void dump_mem(char *label, void *data, int length)
+{
+	int i, j;
+	int *intptr = data;
+	char *charptr = data;
+	char buf[10], line[80];
+
+	printk("%s: dump of %d bytes of data at 0x%p\n\n", label, length,
+	       data);
+	for (i = 0; i < length; i += 16) {
+		line[0] = 0;
+		for (j = 0; (j < 4) && (i + j * 4 < length); j++) {
+			sprintf(buf, " %08x", intptr[i / 4 + j]);
+			strcat(line, buf);
+		}
+		buf[0] = ' ';
+		buf[2] = 0;
+		for (j = 0; (j < 16) && (i + j < length); j++) {
+			buf[1] =
+			    isprint(charptr[i + j]) ? charptr[i + j] : '.';
+			strcat(line, buf);
+		}
+		printk("%s\n", line);
+	}
+}
+
+#ifdef CONFIG_PROC_FS
+static int loglevel_read(char *page, char **start, off_t off,
+			 int count, int *eof, void *data)
+{
+	int len;
+
+	len = sprintf(page, "%d\n", jfsloglevel);
+
+	len -= off;
+	*start = page + off;
+
+	if (len > count)
+		len = count;
+	else
+		*eof = 1;
+
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+
+static int loglevel_write(struct file *file, const char *buffer,
+			unsigned long count, void *data)
+{
+	char c;
+
+	if (get_user(c, buffer))
+		return -EFAULT;
+
+	/* yes, I know this is an ASCIIism.  --hch */
+	if (c < '0' || c > '9')
+		return -EINVAL;
+	jfsloglevel = c - '0';
+	return count;
+}
+
+
+extern read_proc_t jfs_txanchor_read;
+#ifdef CONFIG_JFS_STATISTICS
+extern read_proc_t jfs_lmstats_read;
+extern read_proc_t jfs_xtstat_read;
+extern read_proc_t jfs_mpstat_read;
+#endif
+static struct proc_dir_entry *base;
+
+static struct {
+	const char	*name;
+	read_proc_t	*read_fn;
+	write_proc_t	*write_fn;
+} Entries[] = {
+	{ "TxAnchor",	jfs_txanchor_read, },
+#ifdef CONFIG_JFS_STATISTICS
+	{ "lmstats",	jfs_lmstats_read, },
+	{ "xtstat",	jfs_xtstat_read, },
+	{ "mpstat",	jfs_mpstat_read, },
+#endif
+	{ "loglevel",	loglevel_read, loglevel_write }
+};
+#define NPROCENT	(sizeof(Entries)/sizeof(Entries[0]))
+
+void jfs_proc_init(void)
+{
+	int i;
+
+	if (!(base = proc_mkdir("jfs", proc_root_fs)))
+		return;
+	base->owner = THIS_MODULE;
+
+	for (i = 0; i < NPROCENT; i++) {
+		struct proc_dir_entry *p;
+		if ((p = create_proc_entry(Entries[i].name, 0, base))) {
+			p->read_proc = Entries[i].read_fn;
+			p->write_proc = Entries[i].write_fn;
+		}
+	}
+}
+
+void jfs_proc_clean(void)
+{
+	int i;
+
+	if (base) {
+		for (i = 0; i < NPROCENT; i++)
+			remove_proc_entry(Entries[i].name, base);
+		remove_proc_entry("jfs", base);
+	}
+}
+
+#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_JFS_DEBUG */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_debug.h linuxppc64_2_4/fs/jfs/jfs_debug.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_debug.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_debug.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,96 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+*/
+#ifndef _H_JFS_DEBUG
+#define _H_JFS_DEBUG
+
+/*
+ *	jfs_debug.h
+ *
+ * global debug message, data structure/macro definitions
+ * under control of CONFIG_JFS_DEBUG, CONFIG_JFS_STATISTICS;
+ */
+
+/*
+ *	assert with traditional printf/panic
+ */
+#ifdef CONFIG_KERNEL_ASSERTS
+/* kgdb stuff */
+#define assert(p) KERNEL_ASSERT(#p, p)
+#else
+#define assert(p) {\
+if (!(p))\
+	{\
+		printk("assert(%s)\n",#p);\
+		BUG();\
+	}\
+}
+#endif
+
+/*
+ *	debug ON
+ *	--------
+ */
+#ifdef CONFIG_JFS_DEBUG
+#define ASSERT(p) assert(p)
+
+/* dump memory contents */
+extern void dump_mem(char *label, void *data, int length);
+extern int jfsloglevel;
+
+/* information message: e.g., configuration, major event */
+#define jFYI(button, prspec) \
+	do { if (button && jfsloglevel > 1) printk prspec; } while (0)
+
+/* error event message: e.g., i/o error */
+extern int jfsERROR;
+#define jERROR(button, prspec) \
+	do { if (button && jfsloglevel > 0) { printk prspec; } } while (0)
+
+/* debug event message: */
+#define jEVENT(button,prspec) \
+	do { if (button) printk prspec; } while (0)
+
+/*
+ *	debug OFF
+ *	---------
+ */
+#else				/* CONFIG_JFS_DEBUG */
+#define dump_mem(label,data,length)
+#define ASSERT(p)
+#define jEVENT(button,prspec)
+#define jERROR(button,prspec)
+#define jFYI(button,prspec)
+#endif				/* CONFIG_JFS_DEBUG */
+
+/*
+ *	statistics
+ *	----------
+ */
+#ifdef	CONFIG_JFS_STATISTICS
+#define	INCREMENT(x)		((x)++)
+#define	DECREMENT(x)		((x)--)
+#define	HIGHWATERMARK(x,y)	((x) = max((x), (y)))
+#else
+#define	INCREMENT(x)
+#define	DECREMENT(x)
+#define	HIGHWATERMARK(x,y)
+#endif				/* CONFIG_JFS_STATISTICS */
+
+#endif				/* _H_JFS_DEBUG */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_defragfs.h linuxppc64_2_4/fs/jfs/jfs_defragfs.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_defragfs.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_defragfs.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,55 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#ifndef	_H_JFS_DEFRAGFS
+#define _H_JFS_DEFRAGFS
+
+/*
+ *	jfs_defragfs.h
+ */
+/*
+ *	defragfs parameter list
+ */
+typedef struct {
+	uint flag;		/* 4: */
+	u8 dev;			/* 1: */
+	u8 pad[3];		/* 3: */
+	s32 fileset;		/* 4: */
+	u32 inostamp;		/* 4: */
+	u32 ino;		/* 4: */
+	u32 gen;		/* 4: */
+	s64 xoff;		/* 8: */
+	s64 old_xaddr;		/* 8: */
+	s64 new_xaddr;		/* 8: */
+	s32 xlen;		/* 4: */
+} defragfs_t;			/* (52) */
+
+/* plist flag */
+#define DEFRAGFS_SYNC		0x80000000
+#define DEFRAGFS_COMMIT		0x40000000
+#define DEFRAGFS_RELOCATE	0x10000000
+
+#define	INODE_TYPE		0x0000F000	/* IFREG or IFDIR */
+
+#define EXTENT_TYPE		0x000000ff
+#define DTPAGE			0x00000001
+#define XTPAGE			0x00000002
+#define DATAEXT			0x00000004
+#define EAEXT			0x00000008
+
+#endif				/* _H_JFS_DEFRAGFS */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_dinode.h linuxppc64_2_4/fs/jfs/jfs_dinode.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_dinode.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_dinode.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,157 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _H_JFS_DINODE
+#define _H_JFS_DINODE
+
+/*
+ *      jfs_dinode.h: on-disk inode manager
+ *
+ */
+
+#define INODESLOTSIZE           128
+#define L2INODESLOTSIZE         7
+#define log2INODESIZE           9	/* log2(bytes per dinode) */
+
+
+/*
+ *      on-disk inode (dinode_t): 512 bytes
+ *
+ * note: align 64-bit fields on 8-byte boundary.
+ */
+struct dinode {
+	/*
+	 *      I. base area (128 bytes)
+	 *      ------------------------
+	 *
+	 * define generic/POSIX attributes
+	 */
+	u32 di_inostamp;	/* 4: stamp to show inode belongs to fileset */
+	s32 di_fileset;		/* 4: fileset number */
+	u32 di_number;		/* 4: inode number, aka file serial number */
+	u32 di_gen;		/* 4: inode generation number */
+
+	pxd_t di_ixpxd;		/* 8: inode extent descriptor */
+
+	s64 di_size;		/* 8: size */
+	s64 di_nblocks;		/* 8: number of blocks allocated */
+
+	u32 di_nlink;		/* 4: number of links to the object */
+
+	u32 di_uid;		/* 4: user id of owner */
+	u32 di_gid;		/* 4: group id of owner */
+
+	u32 di_mode;		/* 4: attribute, format and permission */
+
+	struct timestruc_t di_atime;	/* 8: time last data accessed */
+	struct timestruc_t di_ctime;	/* 8: time last status changed */
+	struct timestruc_t di_mtime;	/* 8: time last data modified */
+	struct timestruc_t di_otime;	/* 8: time created */
+
+	dxd_t di_acl;		/* 16: acl descriptor */
+
+	dxd_t di_ea;		/* 16: ea descriptor */
+
+	u32 di_next_index;	/* 4: Next available dir_table index */
+
+	s32 di_acltype;		/* 4: Type of ACL */
+
+	/*
+	 *      Extension Areas.
+	 *
+	 *      Historically, the inode was partitioned into 4 128-byte areas,
+	 *      the last 3 being defined as unions which could have multiple
+	 *      uses.  The first 96 bytes had been completely unused until
+	 *      an index table was added to the directory.  It is now more
+	 *      useful to describe the last 3/4 of the inode as a single
+	 *      union.  We would probably be better off redesigning the
+	 *      entire structure from scratch, but we don't want to break
+	 *      commonality with OS/2's JFS at this time.
+	 */
+	union {
+		struct {
+			/*
+			 * This table contains the information needed to
+			 * find a directory entry from a 32-bit index.
+			 * If the index is small enough, the table is inline,
+			 * otherwise, an x-tree root overlays this table
+			 */
+			dir_table_slot_t _table[12];	/* 96: inline */
+
+			dtroot_t _dtroot;		/* 288: dtree root */
+		} _dir;					/* (384) */
+#define di_dirtable	u._dir._table
+#define di_dtroot	u._dir._dtroot
+#define di_parent       di_dtroot.header.idotdot
+#define di_DASD		di_dtroot.header.DASD
+
+		struct {
+			union {
+				u8 _data[96];		/* 96: unused */
+				struct {
+					void *_imap;	/* 4: unused */
+					u32 _gengen;	/* 4: generator */
+				} _imap;
+			} _u1;				/* 96: */
+#define di_gengen	u._file._u1._imap._gengen
+
+			union {
+				xtpage_t _xtroot;
+				struct {
+					u8 unused[16];	/* 16: */
+					dxd_t _dxd;	/* 16: */
+					union {
+						u32 _rdev;	/* 4: */
+						u8 _fastsymlink[128];
+					} _u;
+					u8 _inlineea[128];
+				} _special;
+			} _u2;
+		} _file;
+#define di_xtroot	u._file._u2._xtroot
+#define di_dxd		u._file._u2._special._dxd
+#define di_btroot	di_xtroot
+#define di_inlinedata	u._file._u2._special._u
+#define di_rdev		u._file._u2._special._u._rdev
+#define di_fastsymlink	u._file._u2._special._u._fastsymlink
+#define di_inlineea     u._file._u2._special._inlineea
+	} u;
+};
+
+typedef struct dinode dinode_t;
+
+
+/* extended mode bits (on-disk inode di_mode) */
+#define IFJOURNAL       0x00010000	/* journalled file */
+#define ISPARSE         0x00020000	/* sparse file enabled */
+#define INLINEEA        0x00040000	/* inline EA area free */
+#define ISWAPFILE	0x00800000	/* file open for pager swap space */
+
+/* more extended mode bits: attributes for OS/2 */
+#define IREADONLY	0x02000000	/* no write access to file */
+#define IARCHIVE	0x40000000	/* file archive bit */
+#define ISYSTEM		0x08000000	/* system file */
+#define IHIDDEN		0x04000000	/* hidden file */
+#define IRASH		0x4E000000	/* mask for changeable attributes */
+#define INEWNAME	0x80000000	/* non-8.3 filename format */
+#define IDIRECTORY	0x20000000	/* directory (shadow of real bit) */
+#define ATTRSHIFT	25	/* bits to shift to move attribute
+				   specification to mode position */
+
+#endif /*_H_JFS_DINODE */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_dmap.c linuxppc64_2_4/fs/jfs/jfs_dmap.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_dmap.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_dmap.c	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,4189 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ *   MODULE_NAME:		jfs_dmap.c
+ *
+ *   COMPONENT_NAME: 	sysjfs
+ *
+ *   FUNCTION:			block allocation map manager
+ *
+*/
+
+/*
+ * Change History :
+ *
+ */
+
+#include <linux/fs.h>
+#include "jfs_incore.h"
+#include "jfs_dmap.h"
+#include "jfs_imap.h"
+#include "jfs_lock.h"
+#include "jfs_metapage.h"
+#include "jfs_debug.h"
+
+/*
+ *	Debug code for double-checking block map
+ */
+/* #define	_JFS_DEBUG_DMAP	1 */
+
+#ifdef	_JFS_DEBUG_DMAP
+#define DBINITMAP(size,ipbmap,results) \
+	DBinitmap(size,ipbmap,results)
+#define DBALLOC(dbmap,mapsize,blkno,nblocks) \
+	DBAlloc(dbmap,mapsize,blkno,nblocks)
+#define DBFREE(dbmap,mapsize,blkno,nblocks) \
+	DBFree(dbmap,mapsize,blkno,nblocks)
+#define DBALLOCCK(dbmap,mapsize,blkno,nblocks) \
+	DBAllocCK(dbmap,mapsize,blkno,nblocks)
+#define DBFREECK(dbmap,mapsize,blkno,nblocks) \
+	DBFreeCK(dbmap,mapsize,blkno,nblocks)
+
+static void DBinitmap(s64, struct inode *, u32 **);
+static void DBAlloc(uint *, s64, s64, s64);
+static void DBFree(uint *, s64, s64, s64);
+static void DBAllocCK(uint *, s64, s64, s64);
+static void DBFreeCK(uint *, s64, s64, s64);
+#else
+#define DBINITMAP(size,ipbmap,results)
+#define DBALLOC(dbmap, mapsize, blkno, nblocks)
+#define DBFREE(dbmap, mapsize, blkno, nblocks)
+#define DBALLOCCK(dbmap, mapsize, blkno, nblocks)
+#define DBFREECK(dbmap, mapsize, blkno, nblocks)
+#endif				/* _JFS_DEBUG_DMAP */
+
+/*
+ *	SERIALIZATION of the Block Allocation Map.
+ *
+ *	the working state of the block allocation map is accessed in
+ *	two directions:
+ *	
+ *	1) allocation and free requests that start at the dmap
+ *	   level and move up through the dmap control pages (i.e.
+ *	   the vast majority of requests).
+ * 
+ * 	2) allocation requests that start at dmap control page
+ *	   level and work down towards the dmaps.
+ *	
+ *	the serialization scheme used here is as follows. 
+ *
+ *	requests which start at the bottom are serialized against each 
+ *	other through buffers and each requests holds onto its buffers 
+ *	as it works it way up from a single dmap to the required level 
+ *	of dmap control page.
+ *	requests that start at the top are serialized against each other
+ *	and request that start from the bottom by the multiple read/single
+ *	write inode lock of the bmap inode. requests starting at the top
+ *	take this lock in write mode while request starting at the bottom
+ *	take the lock in read mode.  a single top-down request may proceed
+ *	exclusively while multiple bottoms-up requests may proceed 
+ * 	simultaneously (under the protection of busy buffers).
+ *	
+ *	in addition to information found in dmaps and dmap control pages,
+ *	the working state of the block allocation map also includes read/
+ *	write information maintained in the bmap descriptor (i.e. total
+ *	free block count, allocation group level free block counts).
+ *	a single exclusive lock (BMAP_LOCK) is used to guard this information
+ *	in the face of multiple-bottoms up requests.
+ *	(lock ordering: IREAD_LOCK, BMAP_LOCK);
+ *	
+ *	accesses to the persistent state of the block allocation map (limited
+ *	to the persistent bitmaps in dmaps) is guarded by (busy) buffers.
+ */
+
+#define BMAP_LOCK_INIT(bmp)	init_MUTEX(&bmp->db_bmaplock)
+#define BMAP_LOCK(bmp)		down(&bmp->db_bmaplock)
+#define BMAP_UNLOCK(bmp)	up(&bmp->db_bmaplock)
+
+/*
+ * forward references
+ */
+static void dbAllocBits(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks);
+static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval);
+static void dbBackSplit(dmtree_t * tp, int leafno);
+static void dbJoin(dmtree_t * tp, int leafno, int newval);
+static void dbAdjTree(dmtree_t * tp, int leafno, int newval);
+static int dbAdjCtl(bmap_t * bmp, s64 blkno, int newval, int alloc,
+		    int level);
+static int dbAllocAny(bmap_t * bmp, s64 nblocks, int l2nb, s64 * results);
+static int dbAllocNext(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks);
+static int dbAllocNear(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks,
+		       int l2nb, s64 * results);
+static int dbAllocDmap(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks);
+static int dbAllocDmapLev(bmap_t * bmp, dmap_t * dp, int nblocks, int l2nb,
+			  s64 * results);
+static int dbAllocAG(bmap_t * bmp, int agno, s64 nblocks, int l2nb,
+		     s64 * results);
+static int dbAllocCtl(bmap_t * bmp, s64 nblocks, int l2nb, s64 blkno,
+		      s64 * results);
+int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks);
+static int dbFindBits(u32 word, int l2nb);
+static int dbFindCtl(bmap_t * bmp, int l2nb, int level, s64 * blkno);
+static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx);
+static void dbFreeBits(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks);
+static int dbFreeDmap(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks);
+static int dbMaxBud(u8 * cp);
+s64 dbMapFileSizeToMapSize(struct inode *ipbmap);
+int blkstol2(s64 nb);
+void fsDirty(void);
+
+int cntlz(u32 value);
+int cnttz(u32 word);
+
+static int dbAllocDmapBU(bmap_t * bmp, dmap_t * dp, s64 blkno,
+			 int nblocks);
+static int dbInitDmap(dmap_t * dp, s64 blkno, int nblocks);
+static int dbInitDmapTree(dmap_t * dp);
+static int dbInitTree(dmaptree_t * dtp);
+static int dbInitDmapCtl(dmapctl_t * dcp, int level, int i);
+static int dbGetL2AGSize(s64 nblocks);
+
+/*
+ *	buddy table
+ *
+ * table used for determining buddy sizes within characters of 
+ * dmap bitmap words.  the characters themselves serve as indexes
+ * into the table, with the table elements yielding the maximum
+ * binary buddy of free bits within the character.
+ */
+signed char budtab[256] = {
+	3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
+	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
+	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
+	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
+	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
+	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
+	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
+	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
+	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1
+};
+
+
+/*
+ * NAME:    	dbMount()
+ *
+ * FUNCTION:	initializate the block allocation map.
+ *
+ *		memory is allocated for the in-core bmap descriptor and
+ *		the in-core descriptor is initialized from disk.
+ *
+ * PARAMETERS:
+ *      ipbmap	-  pointer to in-core inode for the block map.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOMEM	- insufficient memory
+ *      EIO	- i/o error
+ */
+int dbMount(struct inode *ipbmap)
+{
+	bmap_t *bmp;
+	dbmap_t *dbmp_le;
+	metapage_t *mp;
+	int i;
+
+	/*
+	 * allocate/initialize the in-memory bmap descriptor
+	 */
+	/* allocate memory for the in-memory bmap descriptor */
+	bmp = kmalloc(sizeof(bmap_t), GFP_KERNEL);
+	if (bmp == NULL)
+		return (ENOMEM);
+
+	/* read the on-disk bmap descriptor. */
+	mp = read_metapage(ipbmap,
+			   BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
+			   PSIZE, 0);
+	if (mp == NULL) {
+		kfree(bmp);
+		return (EIO);
+	}
+
+	/* copy the on-disk bmap descriptor to its in-memory version. */
+	dbmp_le = (dbmap_t *) mp->data;
+	bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize);
+	bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree);
+	bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage);
+	bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag);
+	bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel);
+	bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
+	bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
+	bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
+	bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth);
+	bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
+	bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
+	bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
+	for (i = 0; i < MAXAG; i++)
+		bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]);
+	bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize);
+	bmp->db_maxfreebud = dbmp_le->dn_maxfreebud;
+
+	/* release the buffer. */
+	release_metapage(mp);
+
+	/* bind the bmap inode and the bmap descriptor to each other. */
+	bmp->db_ipbmap = ipbmap;
+	JFS_SBI(ipbmap->i_sb)->bmap = bmp;
+
+	DBINITMAP(bmp->db_mapsize, ipbmap, &bmp->db_DBmap);
+
+	/*
+	 * allocate/initialize the bmap lock
+	 */
+	BMAP_LOCK_INIT(bmp);
+
+	return (0);
+}
+
+
+/*
+ * NAME:    	dbUnmount()
+ *
+ * FUNCTION:	terminate the block allocation map in preparation for
+ *		file system unmount.
+ *
+ * 		the in-core bmap descriptor is written to disk and
+ *		the memory for this descriptor is freed.
+ *
+ * PARAMETERS:
+ *      ipbmap	-  pointer to in-core inode for the block map.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      EIO	- i/o error
+ */
+int dbUnmount(struct inode *ipbmap, int mounterror)
+{
+	bmap_t *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
+
+	if (!(mounterror || isReadOnly(ipbmap)))
+		dbSync(ipbmap);
+
+	/*
+	 * Invalidate the page cache buffers
+	 */
+	truncate_inode_pages(ipbmap->i_mapping, 0);
+
+	/* free the memory for the in-memory bmap. */
+	kfree(bmp);
+
+	return (0);
+}
+
+/*
+ *	dbSync()
+ */
+int dbSync(struct inode *ipbmap)
+{
+	dbmap_t *dbmp_le;
+	bmap_t *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
+	metapage_t *mp;
+	int i;
+
+	/*
+	 * write bmap global control page
+	 */
+	/* get the buffer for the on-disk bmap descriptor. */
+	mp = read_metapage(ipbmap,
+			   BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
+			   PSIZE, 0);
+	if (mp == NULL) {
+		jERROR(1,("dbSync: read_metapage failed!\n"));
+		return (EIO);
+	}
+	/* copy the in-memory version of the bmap to the on-disk version */
+	dbmp_le = (dbmap_t *) mp->data;
+	dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize);
+	dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree);
+	dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage);
+	dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag);
+	dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel);
+	dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
+	dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
+	dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
+	dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth);
+	dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
+	dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
+	dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
+	for (i = 0; i < MAXAG; i++)
+		dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]);
+	dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize);
+	dbmp_le->dn_maxfreebud = bmp->db_maxfreebud;
+
+	/* write the buffer */
+	write_metapage(mp);
+
+	/*
+	 * write out dirty pages of bmap
+	 */
+	fsync_inode_data_buffers(ipbmap);
+
+	ipbmap->i_state |= I_DIRTY;
+	diWriteSpecial(ipbmap);
+
+	return (0);
+}
+
+
+/*
+ * NAME:    	dbFree()
+ *
+ * FUNCTION:	free the specified block range from the working block
+ *		allocation map.
+ *
+ *		the blocks will be free from the working map one dmap
+ *		at a time.
+ *
+ * PARAMETERS:
+ *      ip	-  pointer to in-core inode;
+ *      blkno	-  starting block number to be freed.
+ *      nblocks	-  number of blocks to be freed.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      EIO	- i/o error
+ */
+int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
+{
+	metapage_t *mp;
+	dmap_t *dp;
+	int nb, rc;
+	s64 lblkno, rem;
+	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
+	bmap_t *bmp = JFS_SBI(ip->i_sb)->bmap;
+
+	IREAD_LOCK(ipbmap);
+
+	/* block to be freed better be within the mapsize. */
+	assert(blkno + nblocks <= bmp->db_mapsize);
+
+	/*
+	 * free the blocks a dmap at a time.
+	 */
+	mp = NULL;
+	for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
+		/* release previous dmap if any */
+		if (mp) {
+			write_metapage(mp);
+		}
+
+		/* get the buffer for the current dmap. */
+		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
+		mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
+		if (mp == NULL) {
+			IREAD_UNLOCK(ipbmap);
+			return (EIO);
+		}
+		dp = (dmap_t *) mp->data;
+
+		/* determine the number of blocks to be freed from
+		 * this dmap.
+		 */
+		nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
+
+		DBALLOCCK(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
+
+		/* free the blocks. */
+		if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) {
+			release_metapage(mp);
+			IREAD_UNLOCK(ipbmap);
+			return (rc);
+		}
+
+		DBFREE(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
+	}
+
+	/* write the last buffer. */
+	write_metapage(mp);
+
+	IREAD_UNLOCK(ipbmap);
+
+	return (0);
+}
+
+
+/*
+ * NAME:	dbUpdatePMap()
+ *
+ * FUNCTION:    update the allocation state (free or allocate) of the
+ *		specified block range in the persistent block allocation map.
+ *		
+ *		the blocks will be updated in the persistent map one
+ *		dmap at a time.
+ *
+ * PARAMETERS:
+ *      ipbmap	-  pointer to in-core inode for the block map.
+ *      free	- TRUE if block range is to be freed from the persistent
+ *		  map; FALSE if it is to   be allocated.
+ *      blkno	-  starting block number of the range.
+ *      nblocks	-  number of contiguous blocks in the range.
+ *      tblk	-  transaction block;
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      EIO	- i/o error
+ */
+int
+dbUpdatePMap(struct inode *ipbmap,
+	     int free, s64 blkno, s64 nblocks, tblock_t * tblk)
+{
+	int nblks, dbitno, wbitno, rbits;
+	int word, nbits, nwords;
+	bmap_t *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
+	s64 lblkno, rem, lastlblkno;
+	u32 mask;
+	dmap_t *dp;
+	metapage_t *mp;
+	log_t *log;
+	int lsn, difft, diffp;
+
+	/* the blocks better be within the mapsize. */
+	assert(blkno + nblocks <= bmp->db_mapsize);
+
+	/* compute delta of transaction lsn from log syncpt */
+	lsn = tblk->lsn;
+	log = (log_t *) JFS_SBI(tblk->sb)->log;
+	logdiff(difft, lsn, log);
+
+	/*
+	 * update the block state a dmap at a time.
+	 */
+	mp = NULL;
+	lastlblkno = 0;
+	for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) {
+		/* get the buffer for the current dmap. */
+		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
+		if (lblkno != lastlblkno) {
+			if (mp) {
+				write_metapage(mp);
+			}
+
+			mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE,
+					   0);
+			if (mp == NULL)
+				return (EIO);
+		}
+		dp = (dmap_t *) mp->data;
+
+		/* determine the bit number and word within the dmap of
+		 * the starting block.  also determine how many blocks
+		 * are to be updated within this dmap.
+		 */
+		dbitno = blkno & (BPERDMAP - 1);
+		word = dbitno >> L2DBWORD;
+		nblks = min(rem, (s64)BPERDMAP - dbitno);
+
+		/* update the bits of the dmap words. the first and last
+		 * words may only have a subset of their bits updated. if
+		 * this is the case, we'll work against that word (i.e.
+		 * partial first and/or last) only in a single pass.  a 
+		 * single pass will also be used to update all words that
+		 * are to have all their bits updated.
+		 */
+		for (rbits = nblks; rbits > 0;
+		     rbits -= nbits, dbitno += nbits) {
+			/* determine the bit number within the word and
+			 * the number of bits within the word.
+			 */
+			wbitno = dbitno & (DBWORD - 1);
+			nbits = min(rbits, DBWORD - wbitno);
+
+			/* check if only part of the word is to be updated. */
+			if (nbits < DBWORD) {
+				/* update (free or allocate) the bits
+				 * in this word.
+				 */
+				mask =
+				    (ONES << (DBWORD - nbits) >> wbitno);
+				if (free)
+					dp->pmap[word] &=
+					    cpu_to_le32(~mask);
+				else
+					dp->pmap[word] |=
+					    cpu_to_le32(mask);
+
+				word += 1;
+			} else {
+				/* one or more words are to have all
+				 * their bits updated.  determine how
+				 * many words and how many bits.
+				 */
+				nwords = rbits >> L2DBWORD;
+				nbits = nwords << L2DBWORD;
+
+				/* update (free or allocate) the bits
+				 * in these words.
+				 */
+				if (free)
+					memset(&dp->pmap[word], 0,
+					       nwords * 4);
+				else
+					memset(&dp->pmap[word], (int) ONES,
+					       nwords * 4);
+
+				word += nwords;
+			}
+		}
+
+		/*
+		 * update dmap lsn
+		 */
+		if (lblkno == lastlblkno)
+			continue;
+
+		lastlblkno = lblkno;
+
+		if (mp->lsn != 0) {
+			/* inherit older/smaller lsn */
+			logdiff(diffp, mp->lsn, log);
+			if (difft < diffp) {
+				mp->lsn = lsn;
+
+				/* move bp after tblock in logsync list */
+				LOGSYNC_LOCK(log);
+				list_del(&mp->synclist);
+				list_add(&mp->synclist, &tblk->synclist);
+				LOGSYNC_UNLOCK(log);
+			}
+
+			/* inherit younger/larger clsn */
+			LOGSYNC_LOCK(log);
+			logdiff(difft, tblk->clsn, log);
+			logdiff(diffp, mp->clsn, log);
+			if (difft > diffp)
+				mp->clsn = tblk->clsn;
+			LOGSYNC_UNLOCK(log);
+		} else {
+			mp->log = log;
+			mp->lsn = lsn;
+
+			/* insert bp after tblock in logsync list */
+			LOGSYNC_LOCK(log);
+
+			log->count++;
+			list_add(&mp->synclist, &tblk->synclist);
+
+			mp->clsn = tblk->clsn;
+			LOGSYNC_UNLOCK(log);
+		}
+	}
+
+	/* write the last buffer. */
+	if (mp) {
+		write_metapage(mp);
+	}
+
+	return (0);
+}
+
+
+/*
+ * NAME:	dbNextAG()
+ *
+ * FUNCTION:    find the preferred allocation group for new allocations.
+ *
+ *		we try to keep the trailing (rightmost) allocation groups
+ *		free for large allocations.  we try to do this by targeting
+ *		new inode allocations towards the leftmost or 'active'
+ *		allocation groups while keeping the rightmost or 'inactive'
+ *		allocation groups free. once the active allocation groups
+ *		have dropped to a certain percentage of free space, we add
+ *		the leftmost inactive allocation group to the active set.
+ *
+ *		within the active allocation groups, we maintain a preferred
+ *		allocation group which consists of a group with at least
+ *		average free space over the active set. it is the preferred
+ *		group that we target new inode allocation towards.  the 
+ *		tie-in between inode allocation and block allocation occurs
+ *		as we allocate the first (data) block of an inode and specify
+ *		the inode (block) as the allocation hint for this block.
+ *
+ * PARAMETERS:
+ *      ipbmap	-  pointer to in-core inode for the block map.
+ *
+ * RETURN VALUES:
+ *      the preferred allocation group number.
+ *
+ * note: only called by dbAlloc();
+ */
+int dbNextAG(struct inode *ipbmap)
+{
+	s64 avgfree, inactfree, actfree, rem;
+	int actags, inactags, l2agsize;
+	bmap_t *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
+
+	BMAP_LOCK(bmp);
+
+	/* determine the number of active allocation groups (i.e.
+	 * the number of allocation groups up to and including
+	 * the rightmost allocation group with blocks allocated
+	 * in it.
+	 */
+	actags = bmp->db_maxag + 1;
+	assert(actags <= bmp->db_numag);
+
+	/* get the number of inactive allocation groups (i.e. the
+	 * number of allocation group following the rightmost group
+	 * with allocation in it.
+	 */
+	inactags = bmp->db_numag - actags;
+
+	/* determine how many blocks are in the inactive allocation
+	 * groups. in doing this, we must account for the fact that
+	 * the rightmost group might be a partial group (i.e. file
+	 * system size is not a multiple of the group size).
+	 */
+	l2agsize = bmp->db_agl2size;
+	rem = bmp->db_mapsize & (bmp->db_agsize - 1);
+	inactfree = (inactags
+		     && rem) ? ((inactags - 1) << l2agsize) +
+	    rem : inactags << l2agsize;
+
+	/* now determine how many free blocks are in the active
+	 * allocation groups plus the average number of free blocks
+	 * within the active ags.
+	 */
+	actfree = bmp->db_nfree - inactfree;
+	avgfree = (u32) actfree / (u32) actags;
+
+	/* check if not all of the allocation groups are active.
+	 */
+	if (actags < bmp->db_numag) {
+		/* not all of the allocation groups are active.  determine
+		 * if we should extend the active set by 1 (i.e. add the
+		 * group following the current active set).  we do so if
+		 * the number of free blocks within the active set is less
+		 * than the allocation group set and average free within
+		 * the active set is less than 60%.  we activate a new group
+		 * by setting the allocation group preference to the new
+		 * group.
+		 */
+		if (actfree < bmp->db_agsize &&
+		    ((avgfree * 100) >> l2agsize) < 60)
+			bmp->db_agpref = actags;
+	} else {
+		/* all allocation groups are in the active set.  check if
+		 * the preferred allocation group has average free space.
+		 * if not, re-establish the preferred group as the leftmost
+		 * group with average free space.
+		 */
+		if (bmp->db_agfree[bmp->db_agpref] < avgfree) {
+			for (bmp->db_agpref = 0; bmp->db_agpref < actags;
+			     bmp->db_agpref++) {
+				if (bmp->db_agfree[bmp->db_agpref] <=
+				    avgfree)
+					break;
+			}
+			assert(bmp->db_agpref < bmp->db_numag);
+		}
+	}
+
+	BMAP_UNLOCK(bmp);
+
+	/* return the preferred group.
+	 */
+	return (bmp->db_agpref);
+}
+
+
+/*
+ * NAME:	dbAlloc()
+ *
+ * FUNCTION:    attempt to allocate a specified number of contiguous free
+ *		blocks from the working allocation block map.
+ *
+ *		the block allocation policy uses hints and a multi-step
+ *		approach.
+ *
+ *	  	for allocation requests smaller than the number of blocks
+ *		per dmap, we first try to allocate the new blocks
+ *		immediately following the hint.  if these blocks are not
+ *		available, we try to allocate blocks near the hint.  if
+ *		no blocks near the hint are available, we next try to 
+ *		allocate within the same dmap as contains the hint.
+ *
+ *		if no blocks are available in the dmap or the allocation
+ *		request is larger than the dmap size, we try to allocate
+ *		within the same allocation group as contains the hint. if
+ *		this does not succeed, we finally try to allocate anywhere
+ *		within the aggregate.
+ *
+ *		we also try to allocate anywhere within the aggregate for
+ *		for allocation requests larger than the allocation group
+ *		size or requests that specify no hint value.
+ *
+ * PARAMETERS:
+ *      ip	-  pointer to in-core inode;
+ *      hint	- allocation hint.
+ *      nblocks	- number of contiguous blocks in the range.
+ *      results	- on successful return, set to the starting block number
+ *		  of the newly allocated contiguous range.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOSPC	- insufficient disk resources
+ *      EIO	- i/o error
+ */
+int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
+{
+	int rc, agno;
+	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
+	bmap_t *bmp;
+	metapage_t *mp;
+	s64 lblkno, blkno;
+	dmap_t *dp;
+	int l2nb;
+	s64 mapSize;
+
+	/* assert that nblocks is valid */
+	assert(nblocks > 0);
+
+#ifdef _STILL_TO_PORT
+	/* DASD limit check                                     F226941 */
+	if (OVER_LIMIT(ip, nblocks))
+		return ENOSPC;
+#endif				/* _STILL_TO_PORT */
+
+	/* get the log2 number of blocks to be allocated.
+	 * if the number of blocks is not a log2 multiple, 
+	 * it will be rounded up to the next log2 multiple.
+	 */
+	l2nb = BLKSTOL2(nblocks);
+
+	bmp = JFS_SBI(ip->i_sb)->bmap;
+
+//retry:        /* serialize w.r.t.extendfs() */
+	mapSize = bmp->db_mapsize;
+
+	/* the hint should be within the map */
+	assert(hint < mapSize);
+
+	/* if no hint was specified or the number of blocks to be
+	 * allocated is greater than the allocation group size, try
+	 * to allocate anywhere.
+	 */
+	if (hint == 0 || l2nb > bmp->db_agl2size) {
+		IWRITE_LOCK(ipbmap);
+
+		rc = dbAllocAny(bmp, nblocks, l2nb, results);
+		if (rc == 0) {
+			DBALLOC(bmp->db_DBmap, bmp->db_mapsize, *results,
+				nblocks);
+		}
+
+		IWRITE_UNLOCK(ipbmap);
+		return (rc);
+	}
+
+	/* we would like to allocate close to the hint.  adjust the
+	 * hint to the block following the hint since the allocators
+	 * will start looking for free space starting at this point.
+	 * if the hint was the last block of the file system, try to
+	 * allocate in the same allocation group as the hint.
+	 */
+	blkno = hint + 1;
+	if (blkno >= bmp->db_mapsize) {
+		blkno--;
+		goto tryag;
+	}
+
+	/* check if blkno crosses over into a new allocation group.
+	 * if so, check if we should allow allocations within this
+	 * allocation group.  we try to keep the trailing (rightmost)
+	 * allocation groups of the file system free for large
+	 * allocations and may want to prevent this allocation from
+	 * spilling over into this space.
+	 */
+	if ((blkno & (bmp->db_agsize - 1)) == 0) {
+		/* check if the AG is beyond the rightmost AG with
+		 * allocations in it.  if so, call dbNextAG() to
+		 * determine if the allocation should be allowed
+		 * to proceed within this AG or should be targeted
+		 * to another AG.
+		 */
+		agno = blkno >> bmp->db_agl2size;
+		if (agno > bmp->db_maxag) {
+			agno = dbNextAG(ipbmap);
+			blkno = (s64) agno << bmp->db_agl2size;
+			goto tryag;
+		}
+	}
+
+	/* check if the allocation request size can be satisfied from a
+	 * single dmap.  if so, try to allocate from the dmap containing
+	 * the hint using a tiered strategy.
+	 */
+	if (nblocks <= BPERDMAP) {
+		IREAD_LOCK(ipbmap);
+
+		/* get the buffer for the dmap containing the hint.
+		 */
+		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
+		mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
+		if (mp == NULL) {
+			IREAD_UNLOCK(ipbmap);
+			return (EIO);
+		}
+		dp = (dmap_t *) mp->data;
+
+		/* first, try to satisfy the allocation request with the
+		 * blocks beginning at the hint.
+		 */
+		if ((rc =
+		     dbAllocNext(bmp, dp, blkno,
+				 (int) nblocks)) != ENOSPC) {
+			if (rc == 0) {
+				*results = blkno;
+				DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
+					*results, nblocks);
+				write_metapage(mp);
+			} else {
+				assert(rc == EIO);
+				release_metapage(mp);
+			}
+
+			IREAD_UNLOCK(ipbmap);
+			return (rc);
+		}
+
+		/* next, try to satisfy the allocation request with blocks
+		 * near the hint.
+		 */
+		if ((rc =
+		     dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb,
+				 results))
+		    != ENOSPC) {
+			if (rc == 0) {
+				DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
+					*results, nblocks);
+				mark_metapage_dirty(mp);
+			}
+			release_metapage(mp);
+
+			IREAD_UNLOCK(ipbmap);
+			return (rc);
+		}
+
+		/* try to satisfy the allocation request with blocks within
+		 * the same allocation group as the hint.
+		 */
+		if ((rc =
+		     dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
+		    != ENOSPC) {
+			if (rc == 0) {
+				DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
+					*results, nblocks);
+				mark_metapage_dirty(mp);
+			}
+			release_metapage(mp);
+
+			IREAD_UNLOCK(ipbmap);
+			return (rc);
+		}
+
+		release_metapage(mp);
+		IREAD_UNLOCK(ipbmap);
+	}
+
+      tryag:
+	IWRITE_LOCK(ipbmap);
+
+	/* determine the allocation group number of the hint and try to
+	 * allocate within this allocation group.  if that fails, try to
+	 * allocate anywhere in the map.
+	 */
+	agno = blkno >> bmp->db_agl2size;
+	if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == ENOSPC)
+		rc = dbAllocAny(bmp, nblocks, l2nb, results);
+	if (rc == 0) {
+		DBALLOC(bmp->db_DBmap, bmp->db_mapsize, *results, nblocks);
+	}
+
+	IWRITE_UNLOCK(ipbmap);
+
+	return (rc);
+}
+
+
+/*
+ * NAME:	dbAllocExact()
+ *
+ * FUNCTION:    try to allocate the requested extent;
+ *
+ * PARAMETERS:
+ *      ip	- pointer to in-core inode;
+ *      blkno	- extent address;
+ *      nblocks	- extent length;
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOSPC	- insufficient disk resources
+ *      EIO	- i/o error
+ */
+int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
+{
+	int rc;
+	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
+	bmap_t *bmp = JFS_SBI(ip->i_sb)->bmap;
+	dmap_t *dp;
+	s64 lblkno;
+	metapage_t *mp;
+
+	IREAD_LOCK(ipbmap);
+
+	/*
+	 * validate extent request:
+	 *
+	 * note: defragfs policy:
+	 *  max 64 blocks will be moved.  
+	 *  allocation request size must be satisfied from a single dmap.
+	 */
+	if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) {
+		IREAD_UNLOCK(ipbmap);
+		return EINVAL;
+	}
+
+	if (nblocks > ((s64) 1 << bmp->db_maxfreebud)) {
+		/* the free space is no longer available */
+		IREAD_UNLOCK(ipbmap);
+		return ENOSPC;
+	}
+
+	/* read in the dmap covering the extent */
+	lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
+	mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
+	if (mp == NULL) {
+		IREAD_UNLOCK(ipbmap);
+		return (EIO);
+	}
+	dp = (dmap_t *) mp->data;
+
+	/* try to allocate the requested extent */
+	rc = dbAllocNext(bmp, dp, blkno, nblocks);
+
+	IREAD_UNLOCK(ipbmap);
+
+	if (rc == 0) {
+		DBALLOC(bmp->db_DBmap, bmp->db_mapsize, blkno, nblocks);
+		mark_metapage_dirty(mp);
+	}
+	release_metapage(mp);
+
+	return (rc);
+}
+
+
+/*
+ * NAME:	dbReAlloc()
+ *
+ * FUNCTION:    attempt to extend a current allocation by a specified
+ *		number of blocks.
+ *
+ *		this routine attempts to satisfy the allocation request
+ *		by first trying to extend the existing allocation in
+ *		place by allocating the additional blocks as the blocks
+ *		immediately following the current allocation.  if these
+ *		blocks are not available, this routine will attempt to
+ *		allocate a new set of contiguous blocks large enough
+ *		to cover the existing allocation plus the additional
+ *		number of blocks required.
+ *
+ * PARAMETERS:
+ *      ip	    -  pointer to in-core inode requiring allocation.
+ *      blkno	    -  starting block of the current allocation.
+ *      nblocks	    -  number of contiguous blocks within the current
+ *		       allocation.
+ *      addnblocks  -  number of blocks to add to the allocation.
+ *      results	-      on successful return, set to the starting block number
+ *		       of the existing allocation if the existing allocation
+ *		       was extended in place or to a newly allocated contiguous
+ *		       range if the existing allocation could not be extended
+ *		       in place.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOSPC	- insufficient disk resources
+ *      EIO	- i/o error
+ */
+int
+dbReAlloc(struct inode *ip,
+	  s64 blkno, s64 nblocks, s64 addnblocks, s64 * results)
+{
+	int rc;
+
+	/* try to extend the allocation in place.
+	 */
+	if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) {
+		*results = blkno;
+		return (0);
+	} else {
+		if (rc != ENOSPC)
+			return (rc);
+	}
+
+	/* could not extend the allocation in place, so allocate a
+	 * new set of blocks for the entire request (i.e. try to get
+	 * a range of contiguous blocks large enough to cover the
+	 * existing allocation plus the additional blocks.)
+	 */
+	return (dbAlloc
+		(ip, blkno + nblocks - 1, addnblocks + nblocks, results));
+}
+
+
+/*
+ * NAME:	dbExtend()
+ *
+ * FUNCTION:    attempt to extend a current allocation by a specified
+ *		number of blocks.
+ *
+ *		this routine attempts to satisfy the allocation request
+ *		by first trying to extend the existing allocation in
+ *		place by allocating the additional blocks as the blocks
+ *		immediately following the current allocation.
+ *
+ * PARAMETERS:
+ *      ip	    -  pointer to in-core inode requiring allocation.
+ *      blkno	    -  starting block of the current allocation.
+ *      nblocks	    -  number of contiguous blocks within the current
+ *		       allocation.
+ *      addnblocks  -  number of blocks to add to the allocation.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOSPC	- insufficient disk resources
+ *      EIO	- i/o error
+ */
+int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
+	s64 lblkno, lastblkno, extblkno;
+	uint rel_block;
+	metapage_t *mp;
+	dmap_t *dp;
+	int rc;
+	struct inode *ipbmap = sbi->ipbmap;
+	bmap_t *bmp;
+
+	/*
+	 * We don't want a non-aligned extent to cross a page boundary
+	 */
+	if (((rel_block = blkno & (sbi->nbperpage - 1))) &&
+	    (rel_block + nblocks + addnblocks > sbi->nbperpage))
+		return (ENOSPC);
+
+	/* get the last block of the current allocation */
+	lastblkno = blkno + nblocks - 1;
+
+	/* determine the block number of the block following
+	 * the existing allocation.
+	 */
+	extblkno = lastblkno + 1;
+
+	IREAD_LOCK(ipbmap);
+
+	/* better be within the file system */
+	bmp = sbi->bmap;
+	assert(lastblkno >= 0 && lastblkno < bmp->db_mapsize);
+
+	/* we'll attempt to extend the current allocation in place by
+	 * allocating the additional blocks as the blocks immediately
+	 * following the current allocation.  we only try to extend the
+	 * current allocation in place if the number of additional blocks
+	 * can fit into a dmap, the last block of the current allocation
+	 * is not the last block of the file system, and the start of the
+	 * inplace extension is not on an allocation group boundry.
+	 */
+	if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize ||
+	    (extblkno & (bmp->db_agsize - 1)) == 0) {
+		IREAD_UNLOCK(ipbmap);
+		return (ENOSPC);
+	}
+
+	/* get the buffer for the dmap containing the first block
+	 * of the extension.
+	 */
+	lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage);
+	mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
+	if (mp == NULL) {
+		IREAD_UNLOCK(ipbmap);
+		return (EIO);
+	}
+
+	DBALLOCCK(bmp->db_DBmap, bmp->db_mapsize, blkno, nblocks);
+	dp = (dmap_t *) mp->data;
+
+	/* try to allocate the blocks immediately following the
+	 * current allocation.
+	 */
+	rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks);
+
+	IREAD_UNLOCK(ipbmap);
+
+	/* were we successful ? */
+	if (rc == 0) {
+		DBALLOC(bmp->db_DBmap, bmp->db_mapsize, extblkno,
+			addnblocks);
+		write_metapage(mp);
+	} else {
+		/* we were not successful */
+		release_metapage(mp);
+		assert(rc == ENOSPC || rc == EIO);
+	}
+
+	return (rc);
+}
+
+
+/*
+ * NAME:	dbAllocNext()
+ *
+ * FUNCTION:    attempt to allocate the blocks of the specified block
+ *		range within a dmap.
+ *
+ * PARAMETERS:
+ *      bmp	-  pointer to bmap descriptor
+ *      dp	-  pointer to dmap.
+ *      blkno	-  starting block number of the range.
+ *      nblocks	-  number of contiguous free blocks of the range.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOSPC	- insufficient disk resources
+ *      EIO	- i/o error
+ *
+ * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
+ */
+static int dbAllocNext(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks)
+{
+	int dbitno, word, rembits, nb, nwords, wbitno, nw;
+	int l2size;
+	s8 *leaf;
+	u32 mask;
+
+	/* pick up a pointer to the leaves of the dmap tree.
+	 */
+	leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
+
+	/* determine the bit number and word within the dmap of the
+	 * starting block.
+	 */
+	dbitno = blkno & (BPERDMAP - 1);
+	word = dbitno >> L2DBWORD;
+
+	/* check if the specified block range is contained within
+	 * this dmap.
+	 */
+	if (dbitno + nblocks > BPERDMAP)
+		return (ENOSPC);
+
+	/* check if the starting leaf indicates that anything
+	 * is free.
+	 */
+	if (leaf[word] == NOFREE)
+		return (ENOSPC);
+
+	/* check the dmaps words corresponding to block range to see
+	 * if the block range is free.  not all bits of the first and
+	 * last words may be contained within the block range.  if this
+	 * is the case, we'll work against those words (i.e. partial first
+	 * and/or last) on an individual basis (a single pass) and examine
+	 * the actual bits to determine if they are free.  a single pass
+	 * will be used for all dmap words fully contained within the
+	 * specified range.  within this pass, the leaves of the dmap
+	 * tree will be examined to determine if the blocks are free. a
+	 * single leaf may describe the free space of multiple dmap
+	 * words, so we may visit only a subset of the actual leaves
+	 * corresponding to the dmap words of the block range.
+	 */
+	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
+		/* determine the bit number within the word and
+		 * the number of bits within the word.
+		 */
+		wbitno = dbitno & (DBWORD - 1);
+		nb = min(rembits, DBWORD - wbitno);
+
+		/* check if only part of the word is to be examined.
+		 */
+		if (nb < DBWORD) {
+			/* check if the bits are free.
+			 */
+			mask = (ONES << (DBWORD - nb) >> wbitno);
+			if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask)
+				return (ENOSPC);
+
+			word += 1;
+		} else {
+			/* one or more dmap words are fully contained
+			 * within the block range.  determine how many
+			 * words and how many bits.
+			 */
+			nwords = rembits >> L2DBWORD;
+			nb = nwords << L2DBWORD;
+
+			/* now examine the appropriate leaves to determine
+			 * if the blocks are free.
+			 */
+			while (nwords > 0) {
+				/* does the leaf describe any free space ?
+				 */
+				if (leaf[word] < BUDMIN)
+					return (ENOSPC);
+
+				/* determine the l2 number of bits provided
+				 * by this leaf.
+				 */
+				l2size =
+				    min((int)leaf[word], NLSTOL2BSZ(nwords));
+
+				/* determine how many words were handled.
+				 */
+				nw = BUDSIZE(l2size, BUDMIN);
+
+				nwords -= nw;
+				word += nw;
+			}
+		}
+	}
+
+	/* allocate the blocks.
+	 */
+	return (dbAllocDmap(bmp, dp, blkno, nblocks));
+}
+
+
+/*
+ * NAME:	dbAllocNear()
+ *
+ * FUNCTION:    attempt to allocate a number of contiguous free blocks near
+ *		a specified block (hint) within a dmap.
+ *
+ *		starting with the dmap leaf that covers the hint, we'll
+ *		check the next four contiguous leaves for sufficient free
+ *		space.  if sufficient free space is found, we'll allocate
+ *		the desired free space.
+ *
+ * PARAMETERS:
+ *      bmp	-  pointer to bmap descriptor
+ *      dp	-  pointer to dmap.
+ *      blkno	-  block number to allocate near.
+ *      nblocks	-  actual number of contiguous free blocks desired.
+ *      l2nb	-  log2 number of contiguous free blocks desired.
+ *      results	-  on successful return, set to the starting block number
+ *		   of the newly allocated range.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOSPC	- insufficient disk resources
+ *      EIO	- i/o error
+ *
+ * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
+ */
+static int
+dbAllocNear(bmap_t * bmp,
+	    dmap_t * dp, s64 blkno, int nblocks, int l2nb, s64 * results)
+{
+	int word, lword, rc;
+	s8 *leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
+
+	/* determine the word within the dmap that holds the hint
+	 * (i.e. blkno).  also, determine the last word in the dmap
+	 * that we'll include in our examination.
+	 */
+	word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
+	lword = min(word + 4, LPERDMAP);
+
+	/* examine the leaves for sufficient free space.
+	 */
+	for (; word < lword; word++) {
+		/* does the leaf describe sufficient free space ?
+		 */
+		if (leaf[word] < l2nb)
+			continue;
+
+		/* determine the block number within the file system
+		 * of the first block described by this dmap word.
+		 */
+		blkno = le64_to_cpu(dp->start) + (word << L2DBWORD);
+
+		/* if not all bits of the dmap word are free, get the
+		 * starting bit number within the dmap word of the required
+		 * string of free bits and adjust the block number with the
+		 * value.
+		 */
+		if (leaf[word] < BUDMIN)
+			blkno +=
+			    dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb);
+
+		/* allocate the blocks.
+		 */
+		if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
+			*results = blkno;
+
+		return (rc);
+	}
+
+	return (ENOSPC);
+}
+
+
+/*
+ * NAME:	dbAllocAG()
+ *
+ * FUNCTION:    attempt to allocate the specified number of contiguous
+ *		free blocks within the specified allocation group.
+ *
+ *		unless the allocation group size is equal to the number
+ *		of blocks per dmap, the dmap control pages will be used to
+ *		find the required free space, if available.  we start the
+ *		search at the highest dmap control page level which
+ *		distinctly describes the allocation group's free space
+ *		(i.e. the highest level at which the allocation group's
+ *		free space is not mixed in with that of any other group).
+ *		in addition, we start the search within this level at a
+ *		height of the dmapctl dmtree at which the nodes distinctly
+ *		describe the allocation group's free space.  at this height,
+ *		the allocation group's free space may be represented by 1
+ *		or two sub-trees, depending on the allocation group size.
+ *		we search the top nodes of these subtrees left to right for
+ *		sufficient free space.  if sufficient free space is found,
+ *		the subtree is searched to find the leftmost leaf that 
+ *		has free space.  once we have made it to the leaf, we
+ *		move the search to the next lower level dmap control page
+ *		corresponding to this leaf.  we continue down the dmap control
+ *		pages until we find the dmap that contains or starts the
+ *		sufficient free space and we allocate at this dmap.
+ *
+ *		if the allocation group size is equal to the dmap size,
+ *		we'll start at the dmap corresponding to the allocation
+ *		group and attempt the allocation at this level.
+ *
+ *		the dmap control page search is also not performed if the
+ *		allocation group is completely free and we go to the first
+ *		dmap of the allocation group to do the allocation.  this is
+ *		done because the allocation group may be part (not the first
+ *		part) of a larger binary buddy system, causing the dmap
+ *		control pages to indicate no free space (NOFREE) within
+ *		the allocation group.
+ *
+ * PARAMETERS:
+ *      bmp	-  pointer to bmap descriptor
+ *	agno	- allocation group number.
+ *      nblocks	-  actual number of contiguous free blocks desired.
+ *      l2nb	-  log2 number of contiguous free blocks desired.
+ *      results	-  on successful return, set to the starting block number
+ *		   of the newly allocated range.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOSPC	- insufficient disk resources
+ *      EIO	- i/o error
+ *
+ * note: IWRITE_LOCK(ipmap) held on entry/exit;
+ */
+static int
+dbAllocAG(bmap_t * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
+{
+	metapage_t *mp;
+	dmapctl_t *dcp;
+	int rc, ti, i, k, m, n, agperlev;
+	s64 blkno, lblkno;
+	int budmin;
+
+	/* allocation request should not be for more than the
+	 * allocation group size.
+	 */
+	assert(l2nb <= bmp->db_agl2size);
+
+	/* determine the starting block number of the allocation
+	 * group.
+	 */
+	blkno = (s64) agno << bmp->db_agl2size;
+
+	/* check if the allocation group size is the minimum allocation
+	 * group size or if the allocation group is completely free. if
+	 * the allocation group size is the minimum size of BPERDMAP (i.e.
+	 * 1 dmap), there is no need to search the dmap control page (below)
+	 * that fully describes the allocation group since the allocation
+	 * group is already fully described by a dmap.  in this case, we
+	 * just call dbAllocCtl() to search the dmap tree and allocate the
+	 * required space if available.  
+	 *
+	 * if the allocation group is completely free, dbAllocCtl() is
+	 * also called to allocate the required space.  this is done for
+	 * two reasons.  first, it makes no sense searching the dmap control
+	 * pages for free space when we know that free space exists.  second,
+	 * the dmap control pages may indicate that the allocation group
+	 * has no free space if the allocation group is part (not the first
+	 * part) of a larger binary buddy system.
+	 */
+	if (bmp->db_agsize == BPERDMAP
+	    || bmp->db_agfree[agno] == bmp->db_agsize) {
+		rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
+		/* assert(!(rc == ENOSPC && bmp->db_agfree[agno] == bmp->db_agsize)); */
+		if ((rc == ENOSPC) &&
+		    (bmp->db_agfree[agno] == bmp->db_agsize)) {
+			jERROR(1,
+			       ("dbAllocAG: removed assert, but still need to debug here\nblkno = 0x%Lx, nblocks = 0x%Lx\n",
+				(unsigned long long) blkno,
+				(unsigned long long) nblocks));
+		}
+		return (rc);
+	}
+
+	/* the buffer for the dmap control page that fully describes the
+	 * allocation group.
+	 */
+	lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel);
+	mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
+	if (mp == NULL)
+		return (EIO);
+	dcp = (dmapctl_t *) mp->data;
+	budmin = dcp->budmin;
+
+	/* search the subtree(s) of the dmap control page that describes
+	 * the allocation group, looking for sufficient free space.  to begin,
+	 * determine how many allocation groups are represented in a dmap
+	 * control page at the control page level (i.e. L0, L1, L2) that
+	 * fully describes an allocation group. next, determine the starting
+	 * tree index of this allocation group within the control page.
+	 */
+	agperlev =
+	    (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth;
+	ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
+
+	/* dmap control page trees fan-out by 4 and a single allocation 
+	 * group may be described by 1 or 2 subtrees within the ag level
+	 * dmap control page, depending upon the ag size. examine the ag's
+	 * subtrees for sufficient free space, starting with the leftmost
+	 * subtree.
+	 */
+	for (i = 0; i < bmp->db_agwidth; i++, ti++) {
+		/* is there sufficient free space ?
+		 */
+		if (l2nb > dcp->stree[ti])
+			continue;
+
+		/* sufficient free space found in a subtree. now search down
+		 * the subtree to find the leftmost leaf that describes this
+		 * free space.
+		 */
+		for (k = bmp->db_agheigth; k > 0; k--) {
+			for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
+				if (l2nb <= dcp->stree[m + n]) {
+					ti = m + n;
+					break;
+				}
+			}
+			assert(n < 4);
+		}
+
+		/* determine the block number within the file system
+		 * that corresponds to this leaf.
+		 */
+		if (bmp->db_aglevel == 2)
+			blkno = 0;
+		else if (bmp->db_aglevel == 1)
+			blkno &= ~(MAXL1SIZE - 1);
+		else		/* bmp->db_aglevel == 0 */
+			blkno &= ~(MAXL0SIZE - 1);
+
+		blkno +=
+		    ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin;
+
+		/* release the buffer in preparation for going down
+		 * the next level of dmap control pages.
+		 */
+		release_metapage(mp);
+
+		/* check if we need to continue to search down the lower
+		 * level dmap control pages.  we need to if the number of
+		 * blocks required is less than maximum number of blocks
+		 * described at the next lower level.
+		 */
+		if (l2nb < budmin) {
+
+			/* search the lower level dmap control pages to get
+			 * the starting block number of the the dmap that
+			 * contains or starts off the free space.
+			 */
+			if ((rc =
+			     dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1,
+				       &blkno))) {
+				assert(rc != ENOSPC);
+				return (rc);
+			}
+		}
+
+		/* allocate the blocks.
+		 */
+		rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
+		assert(rc != ENOSPC);
+		return (rc);
+	}
+
+	/* no space in the allocation group.  release the buffer and
+	 * return ENOSPC.
+	 */
+	release_metapage(mp);
+
+	return (ENOSPC);
+}
+
+
+/*
+ * NAME:	dbAllocAny()
+ *
+ * FUNCTION:    attempt to allocate the specified number of contiguous
+ *		free blocks anywhere in the file system.
+ *
+ *		dbAllocAny() attempts to find the sufficient free space by
+ *		searching down the dmap control pages, starting with the
+ *		highest level (i.e. L0, L1, L2) control page.  if free space
+ *		large enough to satisfy the desired free space is found, the
+ *		desired free space is allocated.
+ *
+ * PARAMETERS:
+ *      bmp	-  pointer to bmap descriptor
+ *      nblocks	 -  actual number of contiguous free blocks desired.
+ *      l2nb	 -  log2 number of contiguous free blocks desired.
+ *      results	-  on successful return, set to the starting block number
+ *		   of the newly allocated range.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOSPC	- insufficient disk resources
+ *      EIO	- i/o error
+ *
+ * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
+ */
+static int dbAllocAny(bmap_t * bmp, s64 nblocks, int l2nb, s64 * results)
+{
+	int rc;
+	s64 blkno = 0;
+
+	/* starting with the top level dmap control page, search
+	 * down the dmap control levels for sufficient free space.
+	 * if free space is found, dbFindCtl() returns the starting
+	 * block number of the dmap that contains or starts off the
+	 * range of free space.
+	 */
+	if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno)))
+		return (rc);
+
+	/* allocate the blocks.
+	 */
+	rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
+	assert(rc != ENOSPC);
+	return (rc);
+}
+
+
+/*
+ * NAME:	dbFindCtl()
+ *
+ * FUNCTION:    starting at a specified dmap control page level and block
+ *		number, search down the dmap control levels for a range of
+ *	        contiguous free blocks large enough to satisfy an allocation
+ *		request for the specified number of free blocks.
+ *
+ *		if sufficient contiguous free blocks are found, this routine
+ *		returns the starting block number within a dmap page that
+ *		contains or starts a range of contiqious free blocks that
+ *		is sufficient in size.
+ *
+ * PARAMETERS:
+ *      bmp	-  pointer to bmap descriptor
+ *      level	-  starting dmap control page level.
+ *      l2nb	-  log2 number of contiguous free blocks desired.
+ *      *blkno	-  on entry, starting block number for conducting the search.
+ *		   on successful return, the first block within a dmap page
+ *		   that contains or starts a range of contiguous free blocks.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOSPC	- insufficient disk resources
+ *      EIO	- i/o error
+ *
+ * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
+ */
+static int dbFindCtl(bmap_t * bmp, int l2nb, int level, s64 * blkno)
+{
+	int rc, leafidx, lev;
+	s64 b, lblkno;
+	dmapctl_t *dcp;
+	int budmin;
+	metapage_t *mp;
+
+	/* starting at the specified dmap control page level and block
+	 * number, search down the dmap control levels for the starting
+	 * block number of a dmap page that contains or starts off 
+	 * sufficient free blocks.
+	 */
+	for (lev = level, b = *blkno; lev >= 0; lev--) {
+		/* get the buffer of the dmap control page for the block
+		 * number and level (i.e. L0, L1, L2).
+		 */
+		lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev);
+		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
+		if (mp == NULL)
+			return (EIO);
+		dcp = (dmapctl_t *) mp->data;
+		budmin = dcp->budmin;
+
+		/* search the tree within the dmap control page for
+		 * sufficent free space.  if sufficient free space is found,
+		 * dbFindLeaf() returns the index of the leaf at which
+		 * free space was found.
+		 */
+		rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx);
+
+		/* release the buffer.
+		 */
+		release_metapage(mp);
+
+		/* space found ?
+		 */
+		if (rc) {
+			assert(lev == level);
+			return (ENOSPC);
+		}
+
+		/* adjust the block number to reflect the location within
+		 * the dmap control page (i.e. the leaf) at which free 
+		 * space was found.
+		 */
+		b += (((s64) leafidx) << budmin);
+
+		/* we stop the search at this dmap control page level if
+		 * the number of blocks required is greater than or equal
+		 * to the maximum number of blocks described at the next
+		 * (lower) level.
+		 */
+		if (l2nb >= budmin)
+			break;
+	}
+
+	*blkno = b;
+	return (0);
+}
+
+
+/*
+ * NAME:	dbAllocCtl()
+ *
+ * FUNCTION:    attempt to allocate a specified number of contiguous
+ *		blocks starting within a specific dmap.  
+ *		
+ *		this routine is called by higher level routines that search
+ *		the dmap control pages above the actual dmaps for contiguous
+ *		free space.  the result of successful searches by these
+ * 		routines are the starting block numbers within dmaps, with
+ *		the dmaps themselves containing the desired contiguous free
+ *		space or starting a contiguous free space of desired size
+ *		that is made up of the blocks of one or more dmaps. these
+ *		calls should not fail due to insufficent resources.
+ *
+ *		this routine is called in some cases where it is not known
+ *		whether it will fail due to insufficient resources.  more
+ *		specifically, this occurs when allocating from an allocation
+ *		group whose size is equal to the number of blocks per dmap.
+ *		in this case, the dmap control pages are not examined prior
+ *		to calling this routine (to save pathlength) and the call
+ *		might fail.
+ *
+ *		for a request size that fits within a dmap, this routine relies
+ *		upon the dmap's dmtree to find the requested contiguous free
+ *		space.  for request sizes that are larger than a dmap, the
+ *		requested free space will start at the first block of the
+ *		first dmap (i.e. blkno).
+ *
+ * PARAMETERS:
+ *      bmp	-  pointer to bmap descriptor
+ *      nblocks	 -  actual number of contiguous free blocks to allocate.
+ *      l2nb	 -  log2 number of contiguous free blocks to allocate.
+ *      blkno	 -  starting block number of the dmap to start the allocation
+ *		    from.
+ *      results	-  on successful return, set to the starting block number
+ *		   of the newly allocated range.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOSPC	- insufficient disk resources
+ *      EIO	- i/o error
+ *
+ * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
+ */
+static int
+dbAllocCtl(bmap_t * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
+{
+	int rc, nb;
+	s64 b, lblkno, n;
+	metapage_t *mp;
+	dmap_t *dp;
+
+	/* check if the allocation request is confined to a single dmap.
+	 */
+	if (l2nb <= L2BPERDMAP) {
+		/* get the buffer for the dmap.
+		 */
+		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
+		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
+		if (mp == NULL)
+			return (EIO);
+		dp = (dmap_t *) mp->data;
+
+		/* try to allocate the blocks.
+		 */
+		rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results);
+		if (rc == 0)
+			mark_metapage_dirty(mp);
+
+		release_metapage(mp);
+
+		return (rc);
+	}
+
+	/* allocation request involving multiple dmaps. it must start on
+	 * a dmap boundary.
+	 */
+	assert((blkno & (BPERDMAP - 1)) == 0);
+
+	/* allocate the blocks dmap by dmap.
+	 */
+	for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) {
+		/* get the buffer for the dmap.
+		 */
+		lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
+		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
+		if (mp == NULL) {
+			rc = EIO;
+			goto backout;
+		}
+		dp = (dmap_t *) mp->data;
+
+		/* the dmap better be all free.
+		 */
+		assert(dp->tree.stree[ROOT] == L2BPERDMAP);
+
+		/* determine how many blocks to allocate from this dmap.
+		 */
+		nb = min(n, (s64)BPERDMAP);
+
+		/* allocate the blocks from the dmap.
+		 */
+		if ((rc = dbAllocDmap(bmp, dp, b, nb))) {
+			release_metapage(mp);
+			goto backout;
+		}
+
+		/* write the buffer.
+		 */
+		write_metapage(mp);
+	}
+
+	/* set the results (starting block number) and return.
+	 */
+	*results = blkno;
+	return (0);
+
+	/* something failed in handling an allocation request involving
+	 * multiple dmaps.  we'll try to clean up by backing out any
+	 * allocation that has already happened for this request.  if
+	 * we fail in backing out the allocation, we'll mark the file
+	 * system to indicate that blocks have been leaked.
+	 */
+      backout:
+
+	/* try to backout the allocations dmap by dmap.
+	 */
+	for (n = nblocks - n, b = blkno; n > 0;
+	     n -= BPERDMAP, b += BPERDMAP) {
+		/* get the buffer for this dmap.
+		 */
+		lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
+		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
+		if (mp == NULL) {
+			/* could not back out.  mark the file system
+			 * to indicate that we have leaked blocks.
+			 */
+			fsDirty();	/* !!! */
+			jERROR(1,
+			       ("dbAllocCtl: I/O Error: Block Leakage.\n"));
+			continue;
+		}
+		dp = (dmap_t *) mp->data;
+
+		/* free the blocks is this dmap.
+		 */
+		if (dbFreeDmap(bmp, dp, b, BPERDMAP)) {
+			/* could not back out.  mark the file system
+			 * to indicate that we have leaked blocks.
+			 */
+			release_metapage(mp);
+			fsDirty();	/* !!! */
+			jERROR(1, ("dbAllocCtl: Block Leakage.\n"));
+			continue;
+		}
+
+		/* write the buffer.
+		 */
+		write_metapage(mp);
+	}
+
+	return (rc);
+}
+
+
+/*
+ * NAME:	dbAllocDmapLev()
+ *
+ * FUNCTION:    attempt to allocate a specified number of contiguous blocks
+ *		from a specified dmap.
+ *		
+ *		this routine checks if the contiguous blocks are available.
+ *		if so, nblocks of blocks are allocated; otherwise, ENOSPC is
+ *		returned.
+ *
+ * PARAMETERS:
+ *      mp	-  pointer to bmap descriptor
+ *      dp	-  pointer to dmap to attempt to allocate blocks from. 
+ *      l2nb	-  log2 number of contiguous block desired.
+ *      nblocks	-  actual number of contiguous block desired.
+ *      results	-  on successful return, set to the starting block number
+ *		   of the newly allocated range.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOSPC	- insufficient disk resources
+ *      EIO	- i/o error
+ *
+ * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or 
+ *	IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
+ */
+static int
+dbAllocDmapLev(bmap_t * bmp,
+	       dmap_t * dp, int nblocks, int l2nb, s64 * results)
+{
+	s64 blkno;
+	int leafidx, rc;
+
+	/* can't be more than a dmaps worth of blocks */
+	assert(l2nb <= L2BPERDMAP);
+
+	/* search the tree within the dmap page for sufficient
+	 * free space.  if sufficient free space is found, dbFindLeaf()
+	 * returns the index of the leaf at which free space was found.
+	 */
+	if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx))
+		return (ENOSPC);
+
+	/* determine the block number within the file system corresponding
+	 * to the leaf at which free space was found.
+	 */
+	blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD);
+
+	/* if not all bits of the dmap word are free, get the starting
+	 * bit number within the dmap word of the required string of free
+	 * bits and adjust the block number with this value.
+	 */
+	if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN)
+		blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb);
+
+	/* allocate the blocks */
+	if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
+		*results = blkno;
+
+	return (rc);
+}
+
+
+/*
+ * NAME:	dbAllocDmap()
+ *
+ * FUNCTION:    adjust the disk allocation map to reflect the allocation
+ *		of a specified block range within a dmap.
+ *
+ *		this routine allocates the specified blocks from the dmap
+ *		through a call to dbAllocBits(). if the allocation of the
+ *		block range causes the maximum string of free blocks within
+ *		the dmap to change (i.e. the value of the root of the dmap's
+ *		dmtree), this routine will cause this change to be reflected
+ *		up through the appropriate levels of the dmap control pages
+ *		by a call to dbAdjCtl() for the L0 dmap control page that
+ *		covers this dmap.
+ *
+ * PARAMETERS:
+ *      bmp	-  pointer to bmap descriptor
+ *      dp	-  pointer to dmap to allocate the block range from.
+ *      blkno	-  starting block number of the block to be allocated.
+ *      nblocks	-  number of blocks to be allocated.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      EIO	- i/o error
+ *
+ * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
+ */
+static int dbAllocDmap(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks)
+{
+	s8 oldroot;
+	int rc;
+
+	/* save the current value of the root (i.e. maximum free string)
+	 * of the dmap tree.
+	 */
+	oldroot = dp->tree.stree[ROOT];
+
+	/* allocate the specified (blocks) bits */
+	dbAllocBits(bmp, dp, blkno, nblocks);
+
+	/* if the root has not changed, done. */
+	if (dp->tree.stree[ROOT] == oldroot)
+		return (0);
+
+	/* root changed. bubble the change up to the dmap control pages.
+	 * if the adjustment of the upper level control pages fails,
+	 * backout the bit allocation (thus making everything consistent).
+	 */
+	if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0)))
+		dbFreeBits(bmp, dp, blkno, nblocks);
+
+	return (rc);
+}
+
+
+/*
+ * NAME:	dbFreeDmap()
+ *
+ * FUNCTION:    adjust the disk allocation map to reflect the allocation
+ *		of a specified block range within a dmap.
+ *
+ *		this routine frees the specified blocks from the dmap through
+ *		a call to dbFreeBits(). if the deallocation of the block range
+ *		causes the maximum string of free blocks within the dmap to
+ *		change (i.e. the value of the root of the dmap's dmtree), this
+ *		routine will cause this change to be reflected up through the
+ *	        appropriate levels of the dmap control pages by a call to
+ *		dbAdjCtl() for the L0 dmap control page that covers this dmap.
+ *
+ * PARAMETERS:
+ *      bmp	-  pointer to bmap descriptor
+ *      dp	-  pointer to dmap to free the block range from.
+ *      blkno	-  starting block number of the block to be freed.
+ *      nblocks	-  number of blocks to be freed.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      EIO	- i/o error
+ *
+ * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
+ */
+static int dbFreeDmap(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks)
+{
+	s8 oldroot;
+	int rc, word;
+
+	/* save the current value of the root (i.e. maximum free string)
+	 * of the dmap tree.
+	 */
+	oldroot = dp->tree.stree[ROOT];
+
+	/* free the specified (blocks) bits */
+	dbFreeBits(bmp, dp, blkno, nblocks);
+
+	/* if the root has not changed, done. */
+	if (dp->tree.stree[ROOT] == oldroot)
+		return (0);
+
+	/* root changed. bubble the change up to the dmap control pages.
+	 * if the adjustment of the upper level control pages fails,
+	 * backout the deallocation. 
+	 */
+	if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) {
+		word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
+
+		/* as part of backing out the deallocation, we will have
+		 * to back split the dmap tree if the deallocation caused
+		 * the freed blocks to become part of a larger binary buddy
+		 * system.
+		 */
+		if (dp->tree.stree[word] == NOFREE)
+			dbBackSplit((dmtree_t *) & dp->tree, word);
+
+		dbAllocBits(bmp, dp, blkno, nblocks);
+	}
+
+	return (rc);
+}
+
+
+/*
+ * NAME:	dbAllocBits()
+ *
+ * FUNCTION:    allocate a specified block range from a dmap.
+ *
+ *		this routine updates the dmap to reflect the working
+ *		state allocation of the specified block range. it directly
+ *		updates the bits of the working map and causes the adjustment
+ *		of the binary buddy system described by the dmap's dmtree
+ *		leaves to reflect the bits allocated.  it also causes the
+ *		dmap's dmtree, as a whole, to reflect the allocated range.
+ *
+ * PARAMETERS:
+ *      bmp	-  pointer to bmap descriptor
+ *      dp	-  pointer to dmap to allocate bits from.
+ *      blkno	-  starting block number of the bits to be allocated.
+ *      nblocks	-  number of bits to be allocated.
+ *
+ * RETURN VALUES: none
+ *
+ * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
+ */
+static void dbAllocBits(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks)
+{
+	int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
+	dmtree_t *tp = (dmtree_t *) & dp->tree;
+	int size;
+	s8 *leaf;
+
+	/* pick up a pointer to the leaves of the dmap tree */
+	leaf = dp->tree.stree + LEAFIND;
+
+	/* determine the bit number and word within the dmap of the
+	 * starting block.
+	 */
+	dbitno = blkno & (BPERDMAP - 1);
+	word = dbitno >> L2DBWORD;
+
+	/* block range better be within the dmap */
+	assert(dbitno + nblocks <= BPERDMAP);
+
+	/* allocate the bits of the dmap's words corresponding to the block
+	 * range. not all bits of the first and last words may be contained
+	 * within the block range.  if this is the case, we'll work against
+	 * those words (i.e. partial first and/or last) on an individual basis
+	 * (a single pass), allocating the bits of interest by hand and
+	 * updating the leaf corresponding to the dmap word. a single pass
+	 * will be used for all dmap words fully contained within the
+	 * specified range.  within this pass, the bits of all fully contained
+	 * dmap words will be marked as free in a single shot and the leaves
+	 * will be updated. a single leaf may describe the free space of
+	 * multiple dmap words, so we may update only a subset of the actual
+	 * leaves corresponding to the dmap words of the block range.
+	 */
+	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
+		/* determine the bit number within the word and
+		 * the number of bits within the word.
+		 */
+		wbitno = dbitno & (DBWORD - 1);
+		nb = min(rembits, DBWORD - wbitno);
+
+		/* check if only part of a word is to be allocated.
+		 */
+		if (nb < DBWORD) {
+			/* allocate (set to 1) the appropriate bits within
+			 * this dmap word.
+			 */
+			dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
+						      >> wbitno);
+
+			/* update the leaf for this dmap word. in addition
+			 * to setting the leaf value to the binary buddy max
+			 * of the updated dmap word, dbSplit() will split
+			 * the binary system of the leaves if need be.
+			 */
+			dbSplit(tp, word, BUDMIN,
+				dbMaxBud((u8 *) & dp->wmap[word]));
+
+			word += 1;
+		} else {
+			/* one or more dmap words are fully contained
+			 * within the block range.  determine how many
+			 * words and allocate (set to 1) the bits of these
+			 * words.
+			 */
+			nwords = rembits >> L2DBWORD;
+			memset(&dp->wmap[word], (int) ONES, nwords * 4);
+
+			/* determine how many bits.
+			 */
+			nb = nwords << L2DBWORD;
+
+			/* now update the appropriate leaves to reflect
+			 * the allocated words.
+			 */
+			for (; nwords > 0; nwords -= nw) {
+				assert(leaf[word] >= BUDMIN);
+
+				/* determine what the leaf value should be
+				 * updated to as the minimum of the l2 number
+				 * of bits being allocated and the l2 number
+				 * of bits currently described by this leaf.
+				 */
+				size = min((int)leaf[word], NLSTOL2BSZ(nwords));
+
+				/* update the leaf to reflect the allocation.
+				 * in addition to setting the leaf value to
+				 * NOFREE, dbSplit() will split the binary
+				 * system of the leaves to reflect the current
+				 * allocation (size).
+				 */
+				dbSplit(tp, word, size, NOFREE);
+
+				/* get the number of dmap words handled */
+				nw = BUDSIZE(size, BUDMIN);
+				word += nw;
+			}
+		}
+	}
+
+	/* update the free count for this dmap */
+	dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks);
+
+	BMAP_LOCK(bmp);
+
+	/* if this allocation group is completely free,
+	 * update the maximum allocation group number if this allocation
+	 * group is the new max.
+	 */
+	agno = blkno >> bmp->db_agl2size;
+	if (agno > bmp->db_maxag)
+		bmp->db_maxag = agno;
+
+	/* update the free count for the allocation group and map */
+	bmp->db_agfree[agno] -= nblocks;
+	bmp->db_nfree -= nblocks;
+
+	BMAP_UNLOCK(bmp);
+}
+
+
+/*
+ * NAME:	dbFreeBits()
+ *
+ * FUNCTION:    free a specified block range from a dmap.
+ *
+ *		this routine updates the dmap to reflect the working
+ *		state allocation of the specified block range. it directly
+ *		updates the bits of the working map and causes the adjustment
+ *		of the binary buddy system described by the dmap's dmtree
+ *		leaves to reflect the bits freed.  it also causes the dmap's
+ *		dmtree, as a whole, to reflect the deallocated range.
+ *
+ * PARAMETERS:
+ *      bmp	-  pointer to bmap descriptor
+ *      dp	-  pointer to dmap to free bits from.
+ *      blkno	-  starting block number of the bits to be freed.
+ *      nblocks	-  number of bits to be freed.
+ *
+ * RETURN VALUES: none
+ *
+ * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
+ */
+static void dbFreeBits(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks)
+{
+	int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
+	dmtree_t *tp = (dmtree_t *) & dp->tree;
+	int size;
+
+	/* determine the bit number and word within the dmap of the
+	 * starting block.
+	 */
+	dbitno = blkno & (BPERDMAP - 1);
+	word = dbitno >> L2DBWORD;
+
+	/* block range better be within the dmap.
+	 */
+	assert(dbitno + nblocks <= BPERDMAP);
+
+	/* free the bits of the dmaps words corresponding to the block range.
+	 * not all bits of the first and last words may be contained within
+	 * the block range.  if this is the case, we'll work against those
+	 * words (i.e. partial first and/or last) on an individual basis
+	 * (a single pass), freeing the bits of interest by hand and updating
+	 * the leaf corresponding to the dmap word. a single pass will be used
+	 * for all dmap words fully contained within the specified range.  
+	 * within this pass, the bits of all fully contained dmap words will
+	 * be marked as free in a single shot and the leaves will be updated. a
+	 * single leaf may describe the free space of multiple dmap words,
+	 * so we may update only a subset of the actual leaves corresponding
+	 * to the dmap words of the block range.
+	 *
+	 * dbJoin() is used to update leaf values and will join the binary
+	 * buddy system of the leaves if the new leaf values indicate this
+	 * should be done.
+	 */
+	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
+		/* determine the bit number within the word and
+		 * the number of bits within the word.
+		 */
+		wbitno = dbitno & (DBWORD - 1);
+		nb = min(rembits, DBWORD - wbitno);
+
+		/* check if only part of a word is to be freed.
+		 */
+		if (nb < DBWORD) {
+			/* free (zero) the appropriate bits within this
+			 * dmap word. 
+			 */
+			dp->wmap[word] &=
+			    cpu_to_le32(~(ONES << (DBWORD - nb)
+					  >> wbitno));
+
+			/* update the leaf for this dmap word.
+			 */
+			dbJoin(tp, word,
+			       dbMaxBud((u8 *) & dp->wmap[word]));
+
+			word += 1;
+		} else {
+			/* one or more dmap words are fully contained
+			 * within the block range.  determine how many
+			 * words and free (zero) the bits of these words.
+			 */
+			nwords = rembits >> L2DBWORD;
+			memset(&dp->wmap[word], 0, nwords * 4);
+
+			/* determine how many bits.
+			 */
+			nb = nwords << L2DBWORD;
+
+			/* now update the appropriate leaves to reflect
+			 * the freed words.
+			 */
+			for (; nwords > 0; nwords -= nw) {
+				/* determine what the leaf value should be
+				 * updated to as the minimum of the l2 number
+				 * of bits being freed and the l2 (max) number
+				 * of bits that can be described by this leaf.
+				 */
+				size =
+				    min(LITOL2BSZ
+					(word, L2LPERDMAP, BUDMIN),
+					NLSTOL2BSZ(nwords));
+
+				/* update the leaf.
+				 */
+				dbJoin(tp, word, size);
+
+				/* get the number of dmap words handled.
+				 */
+				nw = BUDSIZE(size, BUDMIN);
+				word += nw;
+			}
+		}
+	}
+
+	/* update the free count for this dmap.
+	 */
+	dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks);
+
+	BMAP_LOCK(bmp);
+
+	/* update the free count for the allocation group and 
+	 * map.
+	 */
+	agno = blkno >> bmp->db_agl2size;
+	bmp->db_nfree += nblocks;
+	bmp->db_agfree[agno] += nblocks;
+
+	/* check if this allocation group is not completely free and
+	 * if it is currently the maximum (rightmost) allocation group.
+	 * if so, establish the new maximum allocation group number by
+	 * searching left for the first allocation group with allocation.
+	 */
+	if ((bmp->db_agfree[agno] == bmp->db_agsize
+	     && agno == bmp->db_maxag) || (agno == bmp->db_numag - 1
+					   && bmp->db_agfree[agno] ==
+					   (bmp-> db_mapsize &
+					    (BPERDMAP - 1)))) {
+		while (bmp->db_maxag > 0) {
+			bmp->db_maxag -= 1;
+			if (bmp->db_agfree[bmp->db_maxag] !=
+			    bmp->db_agsize)
+				break;
+		}
+
+		/* re-establish the allocation group preference if the
+		 * current preference is right of the maximum allocation
+		 * group.
+		 */
+		if (bmp->db_agpref > bmp->db_maxag)
+			bmp->db_agpref = bmp->db_maxag;
+	}
+
+	BMAP_UNLOCK(bmp);
+}
+
+
+/*
+ * NAME:	dbAdjCtl()
+ *
+ * FUNCTION:	adjust a dmap control page at a specified level to reflect
+ *		the change in a lower level dmap or dmap control page's
+ *		maximum string of free blocks (i.e. a change in the root
+ *		of the lower level object's dmtree) due to the allocation
+ *		or deallocation of a range of blocks with a single dmap.
+ *
+ *		on entry, this routine is provided with the new value of
+ *		the lower level dmap or dmap control page root and the
+ *		starting block number of the block range whose allocation
+ *		or deallocation resulted in the root change.  this range
+ *		is respresented by a single leaf of the current dmapctl
+ *		and the leaf will be updated with this value, possibly
+ *		causing a binary buddy system within the leaves to be 
+ *		split or joined.  the update may also cause the dmapctl's
+ *		dmtree to be updated.
+ *
+ *		if the adjustment of the dmap control page, itself, causes its
+ *		root to change, this change will be bubbled up to the next dmap
+ *		control level by a recursive call to this routine, specifying
+ *		the new root value and the next dmap control page level to
+ *		be adjusted.
+ * PARAMETERS:
+ *      bmp	-  pointer to bmap descriptor
+ *      blkno	-  the first block of a block range within a dmap.  it is
+ *		   the allocation or deallocation of this block range that
+ *		   requires the dmap control page to be adjusted.
+ *      newval	-  the new value of the lower level dmap or dmap control
+ *		   page root.
+ *      alloc	-  TRUE if adjustment is due to an allocation.
+ *      level	-  current level of dmap control page (i.e. L0, L1, L2) to
+ *		   be adjusted.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      EIO	- i/o error
+ *
+ * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
+ */
+static int
+dbAdjCtl(bmap_t * bmp, s64 blkno, int newval, int alloc, int level)
+{
+	metapage_t *mp;
+	s8 oldroot;
+	int oldval;
+	s64 lblkno;
+	dmapctl_t *dcp;
+	int rc, leafno, ti;
+
+	/* get the buffer for the dmap control page for the specified
+	 * block number and control page level.
+	 */
+	lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level);
+	mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
+	if (mp == NULL)
+		return (EIO);
+	dcp = (dmapctl_t *) mp->data;
+
+	/* determine the leaf number corresponding to the block and
+	 * the index within the dmap control tree.
+	 */
+	leafno = BLKTOCTLLEAF(blkno, dcp->budmin);
+	ti = leafno + le32_to_cpu(dcp->leafidx);
+
+	/* save the current leaf value and the current root level (i.e.
+	 * maximum l2 free string described by this dmapctl).
+	 */
+	oldval = dcp->stree[ti];
+	oldroot = dcp->stree[ROOT];
+
+	/* check if this is a control page update for an allocation.
+	 * if so, update the leaf to reflect the new leaf value using
+	 * dbSplit(); otherwise (deallocation), use dbJoin() to udpate
+	 * the leaf with the new value.  in addition to updating the
+	 * leaf, dbSplit() will also split the binary buddy system of
+	 * the leaves, if required, and bubble new values within the
+	 * dmapctl tree, if required.  similarly, dbJoin() will join
+	 * the binary buddy system of leaves and bubble new values up
+	 * the dmapctl tree as required by the new leaf value.
+	 */
+	if (alloc) {
+		/* check if we are in the middle of a binary buddy
+		 * system.  this happens when we are performing the
+		 * first allocation out of an allocation group that
+		 * is part (not the first part) of a larger binary
+		 * buddy system.  if we are in the middle, back split
+		 * the system prior to calling dbSplit() which assumes
+		 * that it is at the front of a binary buddy system.
+		 */
+		if (oldval == NOFREE) {
+			dbBackSplit((dmtree_t *) dcp, leafno);
+			oldval = dcp->stree[ti];
+		}
+		dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval);
+	} else {
+		dbJoin((dmtree_t *) dcp, leafno, newval);
+	}
+
+	/* check if the root of the current dmap control page changed due
+	 * to the update and if the current dmap control page is not at
+	 * the current top level (i.e. L0, L1, L2) of the map.  if so (i.e.
+	 * root changed and this is not the top level), call this routine
+	 * again (recursion) for the next higher level of the mapping to
+	 * reflect the change in root for the current dmap control page.
+	 */
+	if (dcp->stree[ROOT] != oldroot) {
+		/* are we below the top level of the map.  if so,
+		 * bubble the root up to the next higher level.
+		 */
+		if (level < bmp->db_maxlevel) {
+			/* bubble up the new root of this dmap control page to
+			 * the next level.
+			 */
+			if ((rc =
+			     dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc,
+				      level + 1))) {
+				/* something went wrong in bubbling up the new
+				 * root value, so backout the changes to the
+				 * current dmap control page.
+				 */
+				if (alloc) {
+					dbJoin((dmtree_t *) dcp, leafno,
+					       oldval);
+				} else {
+					/* the dbJoin() above might have
+					 * caused a larger binary buddy system
+					 * to form and we may now be in the
+					 * middle of it.  if this is the case,
+					 * back split the buddies.
+					 */
+					if (dcp->stree[ti] == NOFREE)
+						dbBackSplit((dmtree_t *)
+							    dcp, leafno);
+					dbSplit((dmtree_t *) dcp, leafno,
+						dcp->budmin, oldval);
+				}
+
+				/* release the buffer and return the error.
+				 */
+				release_metapage(mp);
+				return (rc);
+			}
+		} else {
+			/* we're at the top level of the map. update
+			 * the bmap control page to reflect the size
+			 * of the maximum free buddy system.
+			 */
+			assert(level == bmp->db_maxlevel);
+			assert(bmp->db_maxfreebud == oldroot);
+			bmp->db_maxfreebud = dcp->stree[ROOT];
+		}
+	}
+
+	/* write the buffer.
+	 */
+	write_metapage(mp);
+
+	return (0);
+}
+
+
+/*
+ * NAME:	dbSplit()
+ *
+ * FUNCTION:    update the leaf of a dmtree with a new value, splitting
+ *		the leaf from the binary buddy system of the dmtree's
+ *		leaves, as required.
+ *
+ * PARAMETERS:
+ *      tp	- pointer to the tree containing the leaf.
+ *      leafno	- the number of the leaf to be updated.
+ *      splitsz	- the size the binary buddy system starting at the leaf
+ *		  must be split to, specified as the log2 number of blocks.
+ *      newval	- the new value for the leaf.
+ *
+ * RETURN VALUES: none
+ *
+ * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
+ */
+static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
+{
+	int budsz;
+	int cursz;
+	s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
+
+	/* check if the leaf needs to be split.
+	 */
+	if (leaf[leafno] > tp->dmt_budmin) {
+		/* the split occurs by cutting the buddy system in half
+		 * at the specified leaf until we reach the specified
+		 * size.  pick up the starting split size (current size
+		 * - 1 in l2) and the corresponding buddy size.
+		 */
+		cursz = leaf[leafno] - 1;
+		budsz = BUDSIZE(cursz, tp->dmt_budmin);
+
+		/* split until we reach the specified size.
+		 */
+		while (cursz >= splitsz) {
+			/* update the buddy's leaf with its new value.
+			 */
+			dbAdjTree(tp, leafno ^ budsz, cursz);
+
+			/* on to the next size and buddy.
+			 */
+			cursz -= 1;
+			budsz >>= 1;
+		}
+	}
+
+	/* adjust the dmap tree to reflect the specified leaf's new 
+	 * value.
+	 */
+	dbAdjTree(tp, leafno, newval);
+}
+
+
+/*
+ * NAME:	dbBackSplit()
+ *
+ * FUNCTION:    back split the binary buddy system of dmtree leaves
+ *		that hold a specified leaf until the specified leaf
+ *		starts its own binary buddy system.
+ *
+ *		the allocators typically perform allocations at the start
+ *		of binary buddy systems and dbSplit() is used to accomplish
+ *		any required splits.  in some cases, however, allocation
+ *		may occur in the middle of a binary system and requires a
+ *		back split, with the split proceeding out from the middle of
+ *		the system (less efficient) rather than the start of the
+ *		system (more efficient).  the cases in which a back split
+ *		is required are rare and are limited to the first allocation
+ *		within an allocation group which is a part (not first part)
+ *		of a larger binary buddy system and a few exception cases
+ *		in which a previous join operation must be backed out.
+ *
+ * PARAMETERS:
+ *      tp	- pointer to the tree containing the leaf.
+ *      leafno	- the number of the leaf to be updated.
+ *
+ * RETURN VALUES: none
+ *
+ * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
+ */
+static void dbBackSplit(dmtree_t * tp, int leafno)
+{
+	int budsz, bud, w, bsz, size;
+	int cursz;
+	s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
+
+	/* leaf should be part (not first part) of a binary
+	 * buddy system.
+	 */
+	assert(leaf[leafno] == NOFREE);
+
+	/* the back split is accomplished by iteratively finding the leaf
+	 * that starts the buddy system that contains the specified leaf and
+	 * splitting that system in two.  this iteration continues until
+	 * the specified leaf becomes the start of a buddy system. 
+	 *
+	 * determine maximum possible l2 size for the specified leaf.
+	 */
+	size =
+	    LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs),
+		      tp->dmt_budmin);
+
+	/* determine the number of leaves covered by this size.  this
+	 * is the buddy size that we will start with as we search for
+	 * the buddy system that contains the specified leaf.
+	 */
+	budsz = BUDSIZE(size, tp->dmt_budmin);
+
+	/* back split.
+	 */
+	while (leaf[leafno] == NOFREE) {
+		/* find the leftmost buddy leaf.
+		 */
+		for (w = leafno, bsz = budsz;; bsz <<= 1,
+		     w = (w < bud) ? w : bud) {
+			assert(bsz < le32_to_cpu(tp->dmt_nleafs));
+
+			/* determine the buddy.
+			 */
+			bud = w ^ bsz;
+
+			/* check if this buddy is the start of the system.
+			 */
+			if (leaf[bud] != NOFREE) {
+				/* split the leaf at the start of the
+				 * system in two.
+				 */
+				cursz = leaf[bud] - 1;
+				dbSplit(tp, bud, cursz, cursz);
+				break;
+			}
+		}
+	}
+
+	assert(leaf[leafno] == size);
+}
+
+
+/*
+ * NAME:	dbJoin()
+ *
+ * FUNCTION:    update the leaf of a dmtree with a new value, joining
+ *		the leaf with other leaves of the dmtree into a multi-leaf
+ *		binary buddy system, as required.
+ *
+ * PARAMETERS:
+ *      tp	- pointer to the tree containing the leaf.
+ *      leafno	- the number of the leaf to be updated.
+ *      newval	- the new value for the leaf.
+ *
+ * RETURN VALUES: none
+ */
+static void dbJoin(dmtree_t * tp, int leafno, int newval)
+{
+	int budsz, buddy;
+	s8 *leaf;
+
+	/* can the new leaf value require a join with other leaves ?
+	 */
+	if (newval >= tp->dmt_budmin) {
+		/* pickup a pointer to the leaves of the tree.
+		 */
+		leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
+
+		/* try to join the specified leaf into a large binary
+		 * buddy system.  the join proceeds by attempting to join
+		 * the specified leafno with its buddy (leaf) at new value.
+		 * if the join occurs, we attempt to join the left leaf
+		 * of the joined buddies with its buddy at new value + 1.
+		 * we continue to join until we find a buddy that cannot be
+		 * joined (does not have a value equal to the size of the
+		 * last join) or until all leaves have been joined into a
+		 * single system.
+		 *
+		 * get the buddy size (number of words covered) of
+		 * the new value.
+		 */
+		budsz = BUDSIZE(newval, tp->dmt_budmin);
+
+		/* try to join.
+		 */
+		while (budsz < le32_to_cpu(tp->dmt_nleafs)) {
+			/* get the buddy leaf.
+			 */
+			buddy = leafno ^ budsz;
+
+			/* if the leaf's new value is greater than its
+			 * buddy's value, we join no more.
+			 */
+			if (newval > leaf[buddy])
+				break;
+
+			assert(newval == leaf[buddy]);
+
+			/* check which (leafno or buddy) is the left buddy.
+			 * the left buddy gets to claim the blocks resulting
+			 * from the join while the right gets to claim none.
+			 * the left buddy is also eligable to participate in
+			 * a join at the next higher level while the right
+			 * is not.
+			 *
+			 */
+			if (leafno < buddy) {
+				/* leafno is the left buddy.
+				 */
+				dbAdjTree(tp, buddy, NOFREE);
+			} else {
+				/* buddy is the left buddy and becomes
+				 * leafno.
+				 */
+				dbAdjTree(tp, leafno, NOFREE);
+				leafno = buddy;
+			}
+
+			/* on to try the next join.
+			 */
+			newval += 1;
+			budsz <<= 1;
+		}
+	}
+
+	/* update the leaf value.
+	 */
+	dbAdjTree(tp, leafno, newval);
+}
+
+
+/*
+ * NAME:	dbAdjTree()
+ *
+ * FUNCTION:    update a leaf of a dmtree with a new value, adjusting
+ *		the dmtree, as required, to reflect the new leaf value.
+ *		the combination of any buddies must already be done before
+ *		this is called.
+ *
+ * PARAMETERS:
+ *      tp	- pointer to the tree to be adjusted.
+ *      leafno	- the number of the leaf to be updated.
+ *      newval	- the new value for the leaf.
+ *
+ * RETURN VALUES: none
+ */
+static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
+{
+	int lp, pp, k;
+	int max;
+
+	/* pick up the index of the leaf for this leafno.
+	 */
+	lp = leafno + le32_to_cpu(tp->dmt_leafidx);
+
+	/* is the current value the same as the old value ?  if so,
+	 * there is nothing to do.
+	 */
+	if (tp->dmt_stree[lp] == newval)
+		return;
+
+	/* set the new value.
+	 */
+	tp->dmt_stree[lp] = newval;
+
+	/* bubble the new value up the tree as required.
+	 */
+	for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) {
+		/* get the index of the first leaf of the 4 leaf
+		 * group containing the specified leaf (leafno).
+		 */
+		lp = ((lp - 1) & ~0x03) + 1;
+
+		/* get the index of the parent of this 4 leaf group.
+		 */
+		pp = (lp - 1) >> 2;
+
+		/* determine the maximum of the 4 leaves.
+		 */
+		max = TREEMAX(&tp->dmt_stree[lp]);
+
+		/* if the maximum of the 4 is the same as the
+		 * parent's value, we're done.
+		 */
+		if (tp->dmt_stree[pp] == max)
+			break;
+
+		/* parent gets new value.
+		 */
+		tp->dmt_stree[pp] = max;
+
+		/* parent becomes leaf for next go-round.
+		 */
+		lp = pp;
+	}
+}
+
+
+/*
+ * NAME:	dbFindLeaf()
+ *
+ * FUNCTION:    search a dmtree_t for sufficient free blocks, returning
+ *		the index of a leaf describing the free blocks if 
+ *		sufficient free blocks are found.
+ *
+ *		the search starts at the top of the dmtree_t tree and
+ *		proceeds down the tree to the leftmost leaf with sufficient
+ *		free space.
+ *
+ * PARAMETERS:
+ *      tp	- pointer to the tree to be searched.
+ *      l2nb	- log2 number of free blocks to search for.
+ *	leafidx	- return pointer to be set to the index of the leaf
+ *		  describing at least l2nb free blocks if sufficient
+ *		  free blocks are found.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      ENOSPC	- insufficient free blocks. 
+ */
+static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
+{
+	int ti, n = 0, k, x = 0;
+
+	/* first check the root of the tree to see if there is
+	 * sufficient free space.
+	 */
+	if (l2nb > tp->dmt_stree[ROOT])
+		return (ENOSPC);
+
+	/* sufficient free space available. now search down the tree
+	 * starting at the next level for the leftmost leaf that
+	 * describes sufficient free space.
+	 */
+	for (k = le32_to_cpu(tp->dmt_height), ti = 1;
+	     k > 0; k--, ti = ((ti + n) << 2) + 1) {
+		/* search the four nodes at this level, starting from
+		 * the left.
+		 */
+		for (x = ti, n = 0; n < 4; n++) {
+			/* sufficient free space found.  move to the next
+			 * level (or quit if this is the last level).
+			 */
+			if (l2nb <= tp->dmt_stree[x + n])
+				break;
+		}
+
+		/* better have found something since the higher
+		 * levels of the tree said it was here.
+		 */
+		assert(n < 4);
+	}
+
+	/* set the return to the leftmost leaf describing sufficient
+	 * free space.
+	 */
+	*leafidx = x + n - le32_to_cpu(tp->dmt_leafidx);
+
+	return (0);
+}
+
+
+/*
+ * NAME:	dbFindBits()
+ *
+ * FUNCTION:    find a specified number of binary buddy free bits within a
+ *		dmap bitmap word value.
+ *
+ *		this routine searches the bitmap value for (1 << l2nb) free
+ *		bits at (1 << l2nb) alignments within the value.
+ *
+ * PARAMETERS:
+ *      word	-  dmap bitmap word value.
+ *      l2nb	-  number of free bits specified as a log2 number.
+ *
+ * RETURN VALUES:
+ *      starting bit number of free bits.
+ */
+static int dbFindBits(u32 word, int l2nb)
+{
+	int bitno, nb;
+	u32 mask;
+
+	/* get the number of bits.
+	 */
+	nb = 1 << l2nb;
+	assert(nb <= DBWORD);
+
+	/* complement the word so we can use a mask (i.e. 0s represent
+	 * free bits) and compute the mask.
+	 */
+	word = ~word;
+	mask = ONES << (DBWORD - nb);
+
+	/* scan the word for nb free bits at nb alignments.
+	 */
+	for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) {
+		if ((mask & word) == mask)
+			break;
+	}
+
+	ASSERT(bitno < 32);
+
+	/* return the bit number.
+	 */
+	return (bitno);
+}
+
+
+/*
+ * NAME:	dbMaxBud(u8 *cp)
+ *
+ * FUNCTION:    determine the largest binary buddy string of free
+ *		bits within 32-bits of the map.
+ *
+ * PARAMETERS:
+ *      cp	-  pointer to the 32-bit value.
+ *
+ * RETURN VALUES:
+ *      largest binary buddy of free bits within a dmap word.
+ */
+static int dbMaxBud(u8 * cp)
+{
+	signed char tmp1, tmp2;
+
+	/* check if the wmap word is all free. if so, the
+	 * free buddy size is BUDMIN.
+	 */
+	if (*((uint *) cp) == 0)
+		return (BUDMIN);
+
+	/* check if the wmap word is half free. if so, the
+	 * free buddy size is BUDMIN-1.
+	 */
+	if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0)
+		return (BUDMIN - 1);
+
+	/* not all free or half free. determine the free buddy
+	 * size thru table lookup using quarters of the wmap word.
+	 */
+	tmp1 = max(budtab[cp[2]], budtab[cp[3]]);
+	tmp2 = max(budtab[cp[0]], budtab[cp[1]]);
+	return (max(tmp1, tmp2));
+}
+
+
+/*
+ * NAME:	cnttz(uint word)
+ *
+ * FUNCTION:    determine the number of trailing zeros within a 32-bit
+ *		value.
+ *
+ * PARAMETERS:
+ *      value	-  32-bit value to be examined.
+ *
+ * RETURN VALUES:
+ *      count of trailing zeros
+ */
+int cnttz(u32 word)
+{
+	int n;
+
+	for (n = 0; n < 32; n++, word >>= 1) {
+		if (word & 0x01)
+			break;
+	}
+
+	return (n);
+}
+
+
+/*
+ * NAME:	cntlz(u32 value)
+ *
+ * FUNCTION:    determine the number of leading zeros within a 32-bit
+ *		value.
+ *
+ * PARAMETERS:
+ *      value	-  32-bit value to be examined.
+ *
+ * RETURN VALUES:
+ *      count of leading zeros
+ */
+int cntlz(u32 value)
+{
+	int n;
+
+	for (n = 0; n < 32; n++, value <<= 1) {
+		if (value & HIGHORDER)
+			break;
+	}
+	return (n);
+}
+
+
+/*
+ * NAME:	blkstol2(s64 nb)
+ *
+ * FUNCTION:	convert a block count to its log2 value. if the block
+ *	        count is not a l2 multiple, it is rounded up to the next
+ *		larger l2 multiple.
+ *
+ * PARAMETERS:
+ *      nb	-  number of blocks
+ *
+ * RETURN VALUES:
+ *      log2 number of blocks
+ */
+int blkstol2(s64 nb)
+{
+	int l2nb;
+	s64 mask;		/* meant to be signed */
+
+	mask = (s64) 1 << (64 - 1);
+
+	/* count the leading bits.
+	 */
+	for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) {
+		/* leading bit found.
+		 */
+		if (nb & mask) {
+			/* determine the l2 value.
+			 */
+			l2nb = (64 - 1) - l2nb;
+
+			/* check if we need to round up.
+			 */
+			if (~mask & nb)
+				l2nb++;
+
+			return (l2nb);
+		}
+	}
+	assert(0);
+	return 0;		/* fix compiler warning */
+}
+
+
+/*
+ * NAME:	fsDirty()
+ *
+ * FUNCTION:    xxx
+ *
+ * PARAMETERS:
+ *      ipmnt	- mount inode
+ *
+ * RETURN VALUES:
+ *      none
+ */
+void fsDirty()
+{
+	printk("fsDirty(): bye-bye\n");
+	assert(0);
+}
+
+
+/*
+ * NAME:    	dbAllocBottomUp()
+ *
+ * FUNCTION:	alloc the specified block range from the working block
+ *		allocation map.
+ *
+ *		the blocks will be alloc from the working map one dmap
+ *		at a time.
+ *
+ * PARAMETERS:
+ *      ip	-  pointer to in-core inode;
+ *      blkno	-  starting block number to be freed.
+ *      nblocks	-  number of blocks to be freed.
+ *
+ * RETURN VALUES:
+ *      0	- success
+ *      EIO	- i/o error
+ */
+int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
+{
+	metapage_t *mp;
+	dmap_t *dp;
+	int nb, rc;
+	s64 lblkno, rem;
+	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
+	bmap_t *bmp = JFS_SBI(ip->i_sb)->bmap;
+
+	IREAD_LOCK(ipbmap);
+
+	/* block to be allocated better be within the mapsize. */
+	ASSERT(nblocks <= bmp->db_mapsize - blkno);
+
+	/*
+	 * allocate the blocks a dmap at a time.
+	 */
+	mp = NULL;
+	for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
+		/* release previous dmap if any */
+		if (mp) {
+			write_metapage(mp);
+		}
+
+		/* get the buffer for the current dmap. */
+		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
+		mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
+		if (mp == NULL) {
+			IREAD_UNLOCK(ipbmap);
+			return (EIO);
+		}
+		dp = (dmap_t *) mp->data;
+
+		/* determine the number of blocks to be allocated from
+		 * this dmap.
+		 */
+		nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
+
+		DBFREECK(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
+
+		/* allocate the blocks. */
+		if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) {
+			release_metapage(mp);
+			IREAD_UNLOCK(ipbmap);
+			return (rc);
+		}
+
+		DBALLOC(bmp->db_DBmap, bmp->db_mapsize, blkno, nb);
+	}
+
+	/* write the last buffer. */
+	write_metapage(mp);
+
+	IREAD_UNLOCK(ipbmap);
+
+	return (0);
+}
+
+
+static int dbAllocDmapBU(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks)
+{
+	int rc;
+	int dbitno, word, rembits, nb, nwords, wbitno, agno;
+	s8 oldroot, *leaf;
+	dmaptree_t *tp = (dmaptree_t *) & dp->tree;
+
+	/* save the current value of the root (i.e. maximum free string)
+	 * of the dmap tree.
+	 */
+	oldroot = tp->stree[ROOT];
+
+	/* pick up a pointer to the leaves of the dmap tree */
+	leaf = tp->stree + LEAFIND;
+
+	/* determine the bit number and word within the dmap of the
+	 * starting block.
+	 */
+	dbitno = blkno & (BPERDMAP - 1);
+	word = dbitno >> L2DBWORD;
+
+	/* block range better be within the dmap */
+	assert(dbitno + nblocks <= BPERDMAP);
+
+	/* allocate the bits of the dmap's words corresponding to the block
+	 * range. not all bits of the first and last words may be contained
+	 * within the block range.  if this is the case, we'll work against
+	 * those words (i.e. partial first and/or last) on an individual basis
+	 * (a single pass), allocating the bits of interest by hand and
+	 * updating the leaf corresponding to the dmap word. a single pass
+	 * will be used for all dmap words fully contained within the
+	 * specified range.  within this pass, the bits of all fully contained
+	 * dmap words will be marked as free in a single shot and the leaves
+	 * will be updated. a single leaf may describe the free space of
+	 * multiple dmap words, so we may update only a subset of the actual
+	 * leaves corresponding to the dmap words of the block range.
+	 */
+	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
+		/* determine the bit number within the word and
+		 * the number of bits within the word.
+		 */
+		wbitno = dbitno & (DBWORD - 1);
+		nb = min(rembits, DBWORD - wbitno);
+
+		/* check if only part of a word is to be allocated.
+		 */
+		if (nb < DBWORD) {
+			/* allocate (set to 1) the appropriate bits within
+			 * this dmap word.
+			 */
+			dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
+						      >> wbitno);
+
+			word += 1;
+		} else {
+			/* one or more dmap words are fully contained
+			 * within the block range.  determine how many
+			 * words and allocate (set to 1) the bits of these
+			 * words.
+			 */
+			nwords = rembits >> L2DBWORD;
+			memset(&dp->wmap[word], (int) ONES, nwords * 4);
+
+			/* determine how many bits */
+			nb = nwords << L2DBWORD;
+		}
+	}
+
+	/* update the free count for this dmap */
+	dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks);
+
+	/* reconstruct summary tree */
+	dbInitDmapTree(dp);
+
+	BMAP_LOCK(bmp);
+
+	/* if this allocation group is completely free,
+	 * update the highest active allocation group number 
+	 * if this allocation group is the new max.
+	 */
+	agno = blkno >> bmp->db_agl2size;
+	if (agno > bmp->db_maxag)
+		bmp->db_maxag = agno;
+
+	/* update the free count for the allocation group and map */
+	bmp->db_agfree[agno] -= nblocks;
+	bmp->db_nfree -= nblocks;
+
+	BMAP_UNLOCK(bmp);
+
+	/* if the root has not changed, done. */
+	if (tp->stree[ROOT] == oldroot)
+		return (0);
+
+	/* root changed. bubble the change up to the dmap control pages.
+	 * if the adjustment of the upper level control pages fails,
+	 * backout the bit allocation (thus making everything consistent).
+	 */
+	if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0)))
+		dbFreeBits(bmp, dp, blkno, nblocks);
+
+	return (rc);
+}
+
+
+/*
+ * NAME:	dbExtendFS()
+ *
+ * FUNCTION:	extend bmap from blkno for nblocks;
+ * 		dbExtendFS() updates bmap ready for dbAllocBottomUp();
+ *
+ * L2
+ *  |
+ *   L1---------------------------------L1
+ *    |                                  |
+ *     L0---------L0---------L0           L0---------L0---------L0
+ *      |          |          |            |          |          |
+ *       d0,...,dn  d0,...,dn  d0,...,dn    d0,...,dn  d0,...,dn  d0,.,dm;
+ * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
+ *
+ * <---old---><----------------------------extend----------------------->   
+ */
+int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb);
+	int nbperpage = sbi->nbperpage;
+	int i, i0 = TRUE, j, j0 = TRUE, k, n;
+	s64 newsize;
+	s64 p;
+	metapage_t *mp, *l2mp, *l1mp, *l0mp;
+	dmapctl_t *l2dcp, *l1dcp, *l0dcp;
+	dmap_t *dp;
+	s8 *l0leaf, *l1leaf, *l2leaf;
+	bmap_t *bmp = sbi->bmap;
+	int agno, l2agsize, oldl2agsize;
+	s64 ag_rem;
+
+	newsize = blkno + nblocks;
+
+	jEVENT(0, ("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld\n",
+		   (long long) blkno, (long long) nblocks,
+		   (long long) newsize));
+
+	/*
+	 *      initialize bmap control page.
+	 *
+	 * all the data in bmap control page should exclude
+	 * the mkfs hidden dmap page.
+	 */
+
+	/* update mapsize */
+	bmp->db_mapsize = newsize;
+	bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize);
+
+	/* compute new AG size */
+	l2agsize = dbGetL2AGSize(newsize);
+	oldl2agsize = bmp->db_agl2size;
+
+	bmp->db_agl2size = l2agsize;
+	bmp->db_agsize = 1 << l2agsize;
+
+	/* compute new number of AG */
+	agno = bmp->db_numag;
+	bmp->db_numag = newsize >> l2agsize;
+	bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
+
+	/*
+	 *      reconfigure db_agfree[] 
+	 * from old AG configuration to new AG configuration;
+	 *
+	 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
+	 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
+	 * note: new AG size = old AG size * (2**x).
+	 */
+	if (l2agsize == oldl2agsize)
+		goto extend;
+	k = 1 << (l2agsize - oldl2agsize);
+	ag_rem = bmp->db_agfree[0];	/* save agfree[0] */
+	for (i = 0, n = 0; i < agno; n++) {
+		bmp->db_agfree[n] = 0;	/* init collection point */
+
+		/* coalesce cotiguous k AGs; */
+		for (j = 0; j < k && i < agno; j++, i++) {
+			/* merge AGi to AGn */
+			bmp->db_agfree[n] += bmp->db_agfree[i];
+		}
+	}
+	bmp->db_agfree[0] += ag_rem;	/* restore agfree[0] */
+
+	for (; n < MAXAG; n++)
+		bmp->db_agfree[n] = 0;
+
+	/*
+	 * update highest active ag number
+	 */
+
+	bmp->db_maxag = bmp->db_maxag / k;
+
+	/*
+	 *      extend bmap
+	 *
+	 * update bit maps and corresponding level control pages;
+	 * global control page db_nfree, db_agfree[agno], db_maxfreebud;
+	 */
+      extend:
+	/* get L2 page */
+	p = BMAPBLKNO + nbperpage;	/* L2 page */
+	l2mp = read_metapage(ipbmap, p, PSIZE, 0);
+	assert(l2mp);
+	l2dcp = (dmapctl_t *) l2mp->data;
+
+	/* compute start L1 */
+	k = blkno >> L2MAXL1SIZE;
+	l2leaf = l2dcp->stree + CTLLEAFIND + k;
+	p = BLKTOL1(blkno, sbi->l2nbperpage);	/* L1 page */
+
+	/*
+	 * extend each L1 in L2
+	 */
+	for (; k < LPERCTL; k++, p += nbperpage) {
+		/* get L1 page */
+		if (j0) {
+			/* read in L1 page: (blkno & (MAXL1SIZE - 1)) */
+			l1mp = read_metapage(ipbmap, p, PSIZE, 0);
+			if (l1mp == NULL)
+				goto errout;
+			l1dcp = (dmapctl_t *) l1mp->data;
+
+			/* compute start L0 */
+			j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE;
+			l1leaf = l1dcp->stree + CTLLEAFIND + j;
+			p = BLKTOL0(blkno, sbi->l2nbperpage);
+			j0 = FALSE;
+		} else {
+			/* assign/init L1 page */
+			l1mp = get_metapage(ipbmap, p, PSIZE, 0);
+			if (l1mp == NULL)
+				goto errout;
+
+			l1dcp = (dmapctl_t *) l1mp->data;
+
+			/* compute start L0 */
+			j = 0;
+			l1leaf = l1dcp->stree + CTLLEAFIND;
+			p += nbperpage;	/* 1st L0 of L1.k  */
+		}
+
+		/*
+		 * extend each L0 in L1
+		 */
+		for (; j < LPERCTL; j++) {
+			/* get L0 page */
+			if (i0) {
+				/* read in L0 page: (blkno & (MAXL0SIZE - 1)) */
+
+				l0mp = read_metapage(ipbmap, p, PSIZE, 0);
+				if (l0mp == NULL)
+					goto errout;
+				l0dcp = (dmapctl_t *) l0mp->data;
+
+				/* compute start dmap */
+				i = (blkno & (MAXL0SIZE - 1)) >>
+				    L2BPERDMAP;
+				l0leaf = l0dcp->stree + CTLLEAFIND + i;
+				p = BLKTODMAP(blkno,
+					      sbi->l2nbperpage);
+				i0 = FALSE;
+			} else {
+				/* assign/init L0 page */
+				l0mp = get_metapage(ipbmap, p, PSIZE, 0);
+				if (l0mp == NULL)
+					goto errout;
+
+				l0dcp = (dmapctl_t *) l0mp->data;
+
+				/* compute start dmap */
+				i = 0;
+				l0leaf = l0dcp->stree + CTLLEAFIND;
+				p += nbperpage;	/* 1st dmap of L0.j */
+			}
+
+			/*
+			 * extend each dmap in L0
+			 */
+			for (; i < LPERCTL; i++) {
+				/*
+				 * reconstruct the dmap page, and
+				 * initialize corresponding parent L0 leaf
+				 */
+				if ((n = blkno & (BPERDMAP - 1))) {
+					/* read in dmap page: */
+					mp = read_metapage(ipbmap, p,
+							   PSIZE, 0);
+					if (mp == NULL)
+						goto errout;
+					n = min(nblocks, (s64)BPERDMAP - n);
+				} else {
+					/* assign/init dmap page */
+					mp = read_metapage(ipbmap, p,
+							   PSIZE, 0);
+					if (mp == NULL)
+						goto errout;
+
+					n = min(nblocks, (s64)BPERDMAP);
+				}
+
+				dp = (dmap_t *) mp->data;
+				*l0leaf = dbInitDmap(dp, blkno, n);
+
+				bmp->db_nfree += n;
+				agno = le64_to_cpu(dp->start) >> l2agsize;
+				bmp->db_agfree[agno] += n;
+
+				write_metapage(mp);
+
+				l0leaf++;
+				p += nbperpage;
+
+				blkno += n;
+				nblocks -= n;
+				if (nblocks == 0)
+					break;
+			}	/* for each dmap in a L0 */
+
+			/*
+			 * build current L0 page from its leaves, and 
+			 * initialize corresponding parent L1 leaf
+			 */
+			*l1leaf = dbInitDmapCtl(l0dcp, 0, ++i);
+			write_metapage(l0mp);
+
+			if (nblocks)
+				l1leaf++;	/* continue for next L0 */
+			else {
+				/* more than 1 L0 ? */
+				if (j > 0)
+					break;	/* build L1 page */
+				else {
+					/* summarize in global bmap page */
+					bmp->db_maxfreebud = *l1leaf;
+					release_metapage(l1mp);
+					release_metapage(l2mp);
+					goto finalize;
+				}
+			}
+		}		/* for each L0 in a L1 */
+
+		/*
+		 * build current L1 page from its leaves, and 
+		 * initialize corresponding parent L2 leaf
+		 */
+		*l2leaf = dbInitDmapCtl(l1dcp, 1, ++j);
+		write_metapage(l1mp);
+
+		if (nblocks)
+			l2leaf++;	/* continue for next L1 */
+		else {
+			/* more than 1 L1 ? */
+			if (k > 0)
+				break;	/* build L2 page */
+			else {
+				/* summarize in global bmap page */
+				bmp->db_maxfreebud = *l2leaf;
+				release_metapage(l2mp);
+				goto finalize;
+			}
+		}
+	}			/* for each L1 in a L2 */
+
+	assert(0);
+
+	/*
+	 *      finalize bmap control page
+	 */
+      finalize:
+
+	return 0;
+
+      errout:
+	return EIO;
+}
+
+
+/*
+ *	dbFinalizeBmap()
+ */
+void dbFinalizeBmap(struct inode *ipbmap)
+{
+	bmap_t *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
+	int actags, inactags, l2nl;
+	s64 ag_rem, actfree, inactfree, avgfree;
+	int i, n;
+
+	/*
+	 *      finalize bmap control page
+	 */
+//finalize:
+	/* 
+	 * compute db_agpref: preferred ag to allocate from
+	 * (the leftmost ag with average free space in it);
+	 */
+//agpref:
+	/* get the number of active ags and inacitve ags */
+	actags = bmp->db_maxag + 1;
+	inactags = bmp->db_numag - actags;
+	ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1);	/* ??? */
+
+	/* determine how many blocks are in the inactive allocation
+	 * groups. in doing this, we must account for the fact that
+	 * the rightmost group might be a partial group (i.e. file
+	 * system size is not a multiple of the group size).
+	 */
+	inactfree = (inactags && ag_rem) ?
+	    ((inactags - 1) << bmp->db_agl2size) + ag_rem
+	    : inactags << bmp->db_agl2size;
+
+	/* determine how many free blocks are in the active
+	 * allocation groups plus the average number of free blocks
+	 * within the active ags.
+	 */
+	actfree = bmp->db_nfree - inactfree;
+	avgfree = (u32) actfree / (u32) actags;
+
+	/* if the preferred allocation group has not average free space.
+	 * re-establish the preferred group as the leftmost
+	 * group with average free space.
+	 */
+	if (bmp->db_agfree[bmp->db_agpref] < avgfree) {
+		for (bmp->db_agpref = 0; bmp->db_agpref < actags;
+		     bmp->db_agpref++) {
+			if (bmp->db_agfree[bmp->db_agpref] >= avgfree)
+				break;
+		}
+		assert(bmp->db_agpref < bmp->db_numag);
+	}
+
+	/*
+	 * compute db_aglevel, db_agheigth, db_width, db_agstart:
+	 * an ag is covered in aglevel dmapctl summary tree, 
+	 * at agheight level height (from leaf) with agwidth number of nodes 
+	 * each, which starts at agstart index node of the smmary tree node 
+	 * array;
+	 */
+	bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
+	l2nl =
+	    bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
+	bmp->db_agheigth = l2nl >> 1;
+	bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1));
+	for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0;
+	     i--) {
+		bmp->db_agstart += n;
+		n <<= 2;
+	}
+
+/*
+printk("bmap: agpref:%d aglevel:%d agheigth:%d agwidth:%d\n",
+	bmp->db_agpref, bmp->db_aglevel, bmp->db_agheigth, bmp->db_agwidth);
+*/
+}
+
+
+/*
+ * NAME:	dbInitDmap()/ujfs_idmap_page()
+ *                                                                    
+ * FUNCTION:	initialize working/persistent bitmap of the dmap page
+ *		for the specified number of blocks:
+ *                                                                    
+ *		at entry, the bitmaps had been initialized as free (ZEROS);
+ *		The number of blocks will only account for the actually 
+ *		existing blocks. Blocks which don't actually exist in 
+ *		the aggregate will be marked as allocated (ONES);
+ *
+ * PARAMETERS:
+ *	dp	- pointer to page of map
+ *	nblocks	- number of blocks this page
+ *
+ * RETURNS: NONE
+ */
+static int dbInitDmap(dmap_t * dp, s64 Blkno, int nblocks)
+{
+	int blkno, w, b, r, nw, nb, i;
+/*
+printk("sbh_dmap:  in dbInitDmap blkno:%Ld nblocks:%ld\n", Blkno, nblocks); 
+*/
+
+	/* starting block number within the dmap */
+	blkno = Blkno & (BPERDMAP - 1);
+
+	if (blkno == 0) {
+		dp->nblocks = dp->nfree = cpu_to_le32(nblocks);
+		dp->start = cpu_to_le64(Blkno);
+
+		if (nblocks == BPERDMAP) {
+			memset(&dp->wmap[0], 0, LPERDMAP * 4);
+			memset(&dp->pmap[0], 0, LPERDMAP * 4);
+			goto initTree;
+		}
+	} else {
+		dp->nblocks =
+		    cpu_to_le32(le32_to_cpu(dp->nblocks) + nblocks);
+		dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks);
+	}
+
+	/* word number containing start block number */
+	w = blkno >> L2DBWORD;
+
+	/*
+	 * free the bits corresponding to the block range (ZEROS):
+	 * note: not all bits of the first and last words may be contained 
+	 * within the block range.
+	 */
+	for (r = nblocks; r > 0; r -= nb, blkno += nb) {
+		/* number of bits preceding range to be freed in the word */
+		b = blkno & (DBWORD - 1);
+		/* number of bits to free in the word */
+		nb = min(r, DBWORD - b);
+
+		/* is partial word to be freed ? */
+		if (nb < DBWORD) {
+			/* free (set to 0) from the bitmap word */
+			dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
+						     >> b));
+			dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
+						     >> b));
+
+			/* skip the word freed */
+			w++;
+		} else {
+			/* free (set to 0) contiguous bitmap words */
+			nw = r >> L2DBWORD;
+			memset(&dp->wmap[w], 0, nw * 4);
+			memset(&dp->pmap[w], 0, nw * 4);
+
+			/* skip the words freed */
+			nb = nw << L2DBWORD;
+			w += nw;
+		}
+	}
+
+	/*
+	 * mark bits following the range to be freed (non-existing 
+	 * blocks) as allocated (ONES)
+	 */
+/*
+printk("sbh_dmap:  in dbInitDmap, preparing to mark unbacked, blkno:%ld nblocks:%ld\n",
+		blkno, nblocks); 
+*/
+
+	if (blkno == BPERDMAP)
+		goto initTree;
+
+	/* the first word beyond the end of existing blocks */
+	w = blkno >> L2DBWORD;
+
+	/* does nblocks fall on a 32-bit boundary ? */
+	b = blkno & (DBWORD - 1);
+/*
+printk("sbh_dmap:  in dbInitDmap, b:%ld w:%ld mask: %lx\n", b, w, (ONES>>b)); 
+*/
+	if (b) {
+		/* mark a partial word allocated */
+		dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b);
+		w++;
+	}
+
+	/* set the rest of the words in the page to allocated (ONES) */
+	for (i = w; i < LPERDMAP; i++)
+		dp->pmap[i] = dp->wmap[i] = ONES;
+
+	/*
+	 * init tree
+	 */
+      initTree:
+	return (dbInitDmapTree(dp));
+}
+
+
+/*
+ * NAME:	dbInitDmapTree()/ujfs_complete_dmap()
+ *                                                                    
+ * FUNCTION:	initialize summary tree of the specified dmap:
+ *
+ *		at entry, bitmap of the dmap has been initialized;
+ *                                                                    
+ * PARAMETERS:
+ *	dp	- dmap to complete
+ *	blkno	- starting block number for this dmap
+ *	treemax	- will be filled in with max free for this dmap
+ *
+ * RETURNS:	max free string at the root of the tree
+ */
+static int dbInitDmapTree(dmap_t * dp)
+{
+	dmaptree_t *tp;
+	s8 *cp;
+	int i;
+
+	/* init fixed info of tree */
+	tp = &dp->tree;
+	tp->nleafs = cpu_to_le32(LPERDMAP);
+	tp->l2nleafs = cpu_to_le32(L2LPERDMAP);
+	tp->leafidx = cpu_to_le32(LEAFIND);
+	tp->height = cpu_to_le32(4);
+	tp->budmin = BUDMIN;
+
+	/* init each leaf from corresponding wmap word:
+	 * note: leaf is set to NOFREE(-1) if all blocks of corresponding
+	 * bitmap word are allocated. 
+	 */
+	cp = tp->stree + le32_to_cpu(tp->leafidx);
+	for (i = 0; i < LPERDMAP; i++)
+		*cp++ = dbMaxBud((u8 *) & dp->wmap[i]);
+
+	/* build the dmap's binary buddy summary tree */
+	return (dbInitTree(tp));
+}
+
+
+/*
+ * NAME:	dbInitTree()/ujfs_adjtree()
+ *                                                                    
+ * FUNCTION:	initialize binary buddy summary tree of a dmap or dmapctl.
+ *
+ *		at entry, the leaves of the tree has been initialized 
+ *		from corresponding bitmap word or root of summary tree
+ *		of the child control page;
+ *		configure binary buddy system at the leaf level, then
+ *		bubble up the values of the leaf nodes up the tree.
+ *
+ * PARAMETERS:
+ *	cp	- Pointer to the root of the tree
+ *	l2leaves- Number of leaf nodes as a power of 2
+ *	l2min	- Number of blocks that can be covered by a leaf
+ *		  as a power of 2
+ *
+ * RETURNS: max free string at the root of the tree
+ */
+static int dbInitTree(dmaptree_t * dtp)
+{
+	int l2max, l2free, bsize, nextb, i;
+	int child, parent, nparent;
+	s8 *tp, *cp, *cp1;
+
+	tp = dtp->stree;
+
+	/* Determine the maximum free string possible for the leaves */
+	l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin;
+
+	/*
+	 * configure the leaf levevl into binary buddy system
+	 *
+	 * Try to combine buddies starting with a buddy size of 1 
+	 * (i.e. two leaves). At a buddy size of 1 two buddy leaves 
+	 * can be combined if both buddies have a maximum free of l2min; 
+	 * the combination will result in the left-most buddy leaf having 
+	 * a maximum free of l2min+1.  
+	 * After processing all buddies for a given size, process buddies 
+	 * at the next higher buddy size (i.e. current size * 2) and 
+	 * the next maximum free (current free + 1).  
+	 * This continues until the maximum possible buddy combination 
+	 * yields maximum free.
+	 */
+	for (l2free = dtp->budmin, bsize = 1; l2free < l2max;
+	     l2free++, bsize = nextb) {
+		/* get next buddy size == current buddy pair size */
+		nextb = bsize << 1;
+
+		/* scan each adjacent buddy pair at current buddy size */
+		for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx);
+		     i < le32_to_cpu(dtp->nleafs);
+		     i += nextb, cp += nextb) {
+			/* coalesce if both adjacent buddies are max free */
+			if (*cp == l2free && *(cp + bsize) == l2free) {
+				*cp = l2free + 1;	/* left take right */
+				*(cp + bsize) = -1;	/* right give left */
+			}
+		}
+	}
+
+	/*
+	 * bubble summary information of leaves up the tree.
+	 *
+	 * Starting at the leaf node level, the four nodes described by
+	 * the higher level parent node are compared for a maximum free and 
+	 * this maximum becomes the value of the parent node.  
+	 * when all lower level nodes are processed in this fashion then 
+	 * move up to the next level (parent becomes a lower level node) and 
+	 * continue the process for that level.
+	 */
+	for (child = le32_to_cpu(dtp->leafidx),
+	     nparent = le32_to_cpu(dtp->nleafs) >> 2;
+	     nparent > 0; nparent >>= 2, child = parent) {
+		/* get index of 1st node of parent level */
+		parent = (child - 1) >> 2;
+
+		/* set the value of the parent node as the maximum 
+		 * of the four nodes of the current level.
+		 */
+		for (i = 0, cp = tp + child, cp1 = tp + parent;
+		     i < nparent; i++, cp += 4, cp1++)
+			*cp1 = TREEMAX(cp);
+	}
+
+	return (*tp);
+}
+
+
+/*
+ *	dbInitDmapCtl()
+ *
+ * function: initialize dmapctl page
+ */
+static int dbInitDmapCtl(dmapctl_t * dcp, int level, int i)
+{				/* start leaf index not covered by range */
+	s8 *cp;
+
+	dcp->nleafs = cpu_to_le32(LPERCTL);
+	dcp->l2nleafs = cpu_to_le32(L2LPERCTL);
+	dcp->leafidx = cpu_to_le32(CTLLEAFIND);
+	dcp->height = cpu_to_le32(5);
+	dcp->budmin = L2BPERDMAP + L2LPERCTL * level;
+
+	/*
+	 * initialize the leaves of current level that were not covered 
+	 * by the specified input block range (i.e. the leaves have no 
+	 * low level dmapctl or dmap).
+	 */
+	cp = &dcp->stree[CTLLEAFIND + i];
+	for (; i < LPERCTL; i++)
+		*cp++ = NOFREE;
+
+	/* build the dmap's binary buddy summary tree */
+	return (dbInitTree((dmaptree_t *) dcp));
+}
+
+
+/*
+ * NAME:	dbGetL2AGSize()/ujfs_getagl2size()
+ *                                                                    
+ * FUNCTION:	Determine log2(allocation group size) from aggregate size
+ *                                                                    
+ * PARAMETERS:
+ *	nblocks	- Number of blocks in aggregate
+ *
+ * RETURNS: log2(allocation group size) in aggregate blocks
+ */
+static int dbGetL2AGSize(s64 nblocks)
+{
+	s64 sz;
+	s64 m;
+	int l2sz;
+
+	if (nblocks < BPERDMAP * MAXAG)
+		return (L2BPERDMAP);
+
+	/* round up aggregate size to power of 2 */
+	m = ((u64) 1 << (64 - 1));
+	for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) {
+		if (m & nblocks)
+			break;
+	}
+
+	sz = (s64) 1 << l2sz;
+	if (sz < nblocks)
+		l2sz += 1;
+
+	/* agsize = roundupSize/max_number_of_ag */
+	return (l2sz - L2MAXAG);
+}
+
+
+/*
+ * NAME:	dbMapFileSizeToMapSize()
+ *                                                                    
+ * FUNCTION:	compute number of blocks the block allocation map file 
+ *		can cover from the map file size;
+ *
+ * RETURNS:	Number of blocks which can be covered by this block map file;
+ */
+
+/*
+ * maximum number of map pages at each level including control pages
+ */
+#define MAXL0PAGES	(1 + LPERCTL)
+#define MAXL1PAGES	(1 + LPERCTL * MAXL0PAGES)
+#define MAXL2PAGES	(1 + LPERCTL * MAXL1PAGES)
+
+/*
+ * convert number of map pages to the zero origin top dmapctl level
+ */
+#define BMAPPGTOLEV(npages)	\
+	(((npages) <= 3 + MAXL0PAGES) ? 0 \
+       : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
+
+s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
+{
+	struct super_block *sb = ipbmap->i_sb;
+	s64 nblocks;
+	s64 npages, ndmaps;
+	int level, i;
+	int complete, factor;
+
+	nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize;
+	npages = nblocks >> JFS_SBI(sb)->l2nbperpage;
+	level = BMAPPGTOLEV(npages);
+
+	/* At each level, accumulate the number of dmap pages covered by 
+	 * the number of full child levels below it;
+	 * repeat for the last incomplete child level.
+	 */
+	ndmaps = 0;
+	npages--;		/* skip the first global control page */
+	/* skip higher level control pages above top level covered by map */
+	npages -= (2 - level);
+	npages--;		/* skip top level's control page */
+	for (i = level; i >= 0; i--) {
+		factor =
+		    (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
+		complete = (u32) npages / factor;
+		ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL
+				      : ((i == 1) ? LPERCTL : 1));
+
+		/* pages in last/incomplete child */
+		npages = (u32) npages % factor;
+		/* skip incomplete child's level control page */
+		npages--;
+	}
+
+	/* convert the number of dmaps into the number of blocks 
+	 * which can be covered by the dmaps;
+	 */
+	nblocks = ndmaps << L2BPERDMAP;
+
+	return (nblocks);
+}
+
+
+#ifdef	_JFS_DEBUG_DMAP
+/*
+ *	DBinitmap()
+ */
+static void DBinitmap(s64 size, struct inode *ipbmap, u32 ** results)
+{
+	int npages;
+	u32 *dbmap, *d;
+	int n;
+	s64 lblkno, cur_block;
+	dmap_t *dp;
+	metapage_t *mp;
+
+	npages = size / 32768;
+	npages += (size % 32768) ? 1 : 0;
+
+	dbmap = (u32 *) xmalloc(npages * 4096, L2PSIZE, kernel_heap);
+	if (dbmap == NULL)
+		assert(0);
+
+	for (n = 0, d = dbmap; n < npages; n++, d += 1024)
+		bzero(d, 4096);
+
+	/* Need to initialize from disk map pages
+	 */
+	for (d = dbmap, cur_block = 0; cur_block < size;
+	     cur_block += BPERDMAP, d += LPERDMAP) {
+		lblkno = BLKTODMAP(cur_block,
+				   JFS_SBI(ipbmap->i_sb)->bmap->
+				   db_l2nbperpage);
+		mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
+		if (mp == NULL) {
+			assert(0);
+		}
+		dp = (dmap_t *) mp->data;
+
+		for (n = 0; n < LPERDMAP; n++)
+			d[n] = le32_to_cpu(dp->wmap[n]);
+
+		release_metapage(mp);
+	}
+
+	*results = dbmap;
+}
+
+
+/*
+ *	DBAlloc()
+ */
+void DBAlloc(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
+{
+	int word, nb, bitno;
+	u32 mask;
+
+	assert(blkno > 0 && blkno < mapsize);
+	assert(nblocks > 0 && nblocks <= mapsize);
+
+	assert(blkno + nblocks <= mapsize);
+
+	dbmap += (blkno / 32);
+	while (nblocks > 0) {
+		bitno = blkno & (32 - 1);
+		nb = min(nblocks, 32 - bitno);
+
+		mask = (0xffffffff << (32 - nb) >> bitno);
+		assert((mask & *dbmap) == 0);
+		*dbmap |= mask;
+
+		dbmap++;
+		blkno += nb;
+		nblocks -= nb;
+	}
+}
+
+
+/*
+ *	DBFree()
+ */
+static void DBFree(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
+{
+	int word, nb, bitno;
+	u32 mask;
+
+	assert(blkno > 0 && blkno < mapsize);
+	assert(nblocks > 0 && nblocks <= mapsize);
+
+	assert(blkno + nblocks <= mapsize);
+
+	dbmap += (blkno / 32);
+	while (nblocks > 0) {
+		bitno = blkno & (32 - 1);
+		nb = min(nblocks, 32 - bitno);
+
+		mask = (0xffffffff << (32 - nb) >> bitno);
+		assert((mask & *dbmap) == mask);
+		*dbmap &= ~mask;
+
+		dbmap++;
+		blkno += nb;
+		nblocks -= nb;
+	}
+}
+
+
+/*
+ *	DBAllocCK()
+ */
+static void DBAllocCK(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
+{
+	int word, nb, bitno;
+	u32 mask;
+
+	assert(blkno > 0 && blkno < mapsize);
+	assert(nblocks > 0 && nblocks <= mapsize);
+
+	assert(blkno + nblocks <= mapsize);
+
+	dbmap += (blkno / 32);
+	while (nblocks > 0) {
+		bitno = blkno & (32 - 1);
+		nb = min(nblocks, 32 - bitno);
+
+		mask = (0xffffffff << (32 - nb) >> bitno);
+		assert((mask & *dbmap) == mask);
+
+		dbmap++;
+		blkno += nb;
+		nblocks -= nb;
+	}
+}
+
+
+/*
+ *	DBFreeCK()
+ */
+static void DBFreeCK(uint * dbmap, s64 mapsize, s64 blkno, s64 nblocks)
+{
+	int word, nb, bitno;
+	u32 mask;
+
+	assert(blkno > 0 && blkno < mapsize);
+	assert(nblocks > 0 && nblocks <= mapsize);
+
+	assert(blkno + nblocks <= mapsize);
+
+	dbmap += (blkno / 32);
+	while (nblocks > 0) {
+		bitno = blkno & (32 - 1);
+		nb = min(nblocks, 32 - bitno);
+
+		mask = (0xffffffff << (32 - nb) >> bitno);
+		assert((mask & *dbmap) == 0);
+
+		dbmap++;
+		blkno += nb;
+		nblocks -= nb;
+	}
+}
+
+
+/*
+ *	dbPrtMap()
+ */
+static void dbPrtMap(bmap_t * bmp)
+{
+	printk("   mapsize:   %d%d\n", bmp->db_mapsize);
+	printk("   nfree:     %d%d\n", bmp->db_nfree);
+	printk("   numag:     %d\n", bmp->db_numag);
+	printk("   agsize:    %d%d\n", bmp->db_agsize);
+	printk("   agl2size:  %d\n", bmp->db_agl2size);
+	printk("   agwidth:   %d\n", bmp->db_agwidth);
+	printk("   agstart:   %d\n", bmp->db_agstart);
+	printk("   agheigth:  %d\n", bmp->db_agheigth);
+	printk("   aglevel:   %d\n", bmp->db_aglevel);
+	printk("   maxlevel:  %d\n", bmp->db_maxlevel);
+	printk("   maxag:     %d\n", bmp->db_maxag);
+	printk("   agpref:    %d\n", bmp->db_agpref);
+	printk("   l2nbppg:   %d\n", bmp->db_l2nbperpage);
+}
+
+
+/*
+ *	dbPrtCtl()
+ */
+static void dbPrtCtl(dmapctl_t * dcp)
+{
+	int i, j, n;
+
+	printk("   height:    %08x\n", le32_to_cpu(dcp->height));
+	printk("   leafidx:   %08x\n", le32_to_cpu(dcp->leafidx));
+	printk("   budmin:    %08x\n", dcp->budmin);
+	printk("   nleafs:    %08x\n", le32_to_cpu(dcp->nleafs));
+	printk("   l2nleafs:  %08x\n", le32_to_cpu(dcp->l2nleafs));
+
+	printk("\n Tree:\n");
+	for (i = 0; i < CTLLEAFIND; i += 8) {
+		n = min(8, CTLLEAFIND - i);
+
+		for (j = 0; j < n; j++)
+			printf("  [%03x]: %02x", i + j,
+			       (char) dcp->stree[i + j]);
+		printf("\n");
+	}
+
+	printk("\n Tree Leaves:\n");
+	for (i = 0; i < LPERCTL; i += 8) {
+		n = min(8, LPERCTL - i);
+
+		for (j = 0; j < n; j++)
+			printf("  [%03x]: %02x",
+			       i + j,
+			       (char) dcp->stree[i + j + CTLLEAFIND]);
+		printf("\n");
+	}
+}
+#endif				/* _JFS_DEBUG_DMAP */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_dmap.h linuxppc64_2_4/fs/jfs/jfs_dmap.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_dmap.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_dmap.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,301 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *	jfs_dmap.h: block allocation map manager
+ */
+
+#ifndef	_H_JFS_DMAP
+#define _H_JFS_DMAP
+
+#include "jfs_txnmgr.h"
+
+#define BMAPVERSION	1	/* version number */
+#define	TREESIZE	(256+64+16+4+1)	/* size of a dmap tree */
+#define	LEAFIND		(64+16+4+1)	/* index of 1st leaf of a dmap tree */
+#define LPERDMAP	256	/* num leaves per dmap tree */
+#define L2LPERDMAP	8	/* l2 number of leaves per dmap tree */
+#define	DBWORD		32	/* # of blks covered by a map word */
+#define	L2DBWORD	5	/* l2 # of blks covered by a mword */
+#define BUDMIN  	L2DBWORD	/* max free string in a map word */
+#define BPERDMAP	(LPERDMAP * DBWORD)	/* num of blks per dmap */
+#define L2BPERDMAP	13	/* l2 num of blks per dmap */
+#define CTLTREESIZE	(1024+256+64+16+4+1)	/* size of a dmapctl tree */
+#define CTLLEAFIND	(256+64+16+4+1)	/* idx of 1st leaf of a dmapctl tree */
+#define LPERCTL		1024	/* num of leaves per dmapctl tree */
+#define L2LPERCTL	10	/* l2 num of leaves per dmapctl tree */
+#define	ROOT		0	/* index of the root of a tree */
+#define	NOFREE		((s8) -1)	/* no blocks free */
+#define	MAXAG		128	/* max number of allocation groups */
+#define L2MAXAG		7	/* l2 max num of AG */
+#define L2MINAGSZ	25	/* l2 of minimum AG size in bytes */
+#define	BMAPBLKNO	0	/* lblkno of bmap within the map */
+
+/*
+ * maximum l2 number of disk blocks at the various dmapctl levels.
+ */
+#define	L2MAXL0SIZE	(L2BPERDMAP + 1 * L2LPERCTL)
+#define	L2MAXL1SIZE	(L2BPERDMAP + 2 * L2LPERCTL)
+#define	L2MAXL2SIZE	(L2BPERDMAP + 3 * L2LPERCTL)
+
+/*
+ * maximum number of disk blocks at the various dmapctl levels.
+ */
+#define	MAXL0SIZE	((s64)1 << L2MAXL0SIZE)
+#define	MAXL1SIZE	((s64)1 << L2MAXL1SIZE)
+#define	MAXL2SIZE	((s64)1 << L2MAXL2SIZE)
+
+#define	MAXMAPSIZE	MAXL2SIZE	/* maximum aggregate map size */
+
+/* 
+ * determine the maximum free string for four (lower level) nodes
+ * of the tree.
+ */
+static __inline signed char TREEMAX(signed char *cp)
+{
+	signed char tmp1, tmp2;
+
+	tmp1 = max(*(cp+2), *(cp+3));
+	tmp2 = max(*(cp), *(cp+1));
+
+	return max(tmp1, tmp2);
+}
+
+/*
+ * convert disk block number to the logical block number of the dmap
+ * describing the disk block.  s is the log2(number of logical blocks per page)
+ *
+ * The calculation figures out how many logical pages are in front of the dmap.
+ *	- the number of dmaps preceding it
+ *	- the number of L0 pages preceding its L0 page
+ *	- the number of L1 pages preceding its L1 page
+ *	- 3 is added to account for the L2, L1, and L0 page for this dmap
+ *	- 1 is added to account for the control page of the map.
+ */
+#define BLKTODMAP(b,s)    \
+        ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s))
+
+/*
+ * convert disk block number to the logical block number of the LEVEL 0
+ * dmapctl describing the disk block.  s is the log2(number of logical blocks
+ * per page)
+ *
+ * The calculation figures out how many logical pages are in front of the L0.
+ *	- the number of dmap pages preceding it
+ *	- the number of L0 pages preceding it
+ *	- the number of L1 pages preceding its L1 page
+ *	- 2 is added to account for the L2, and L1 page for this L0
+ *	- 1 is added to account for the control page of the map.
+ */
+#define BLKTOL0(b,s)      \
+        (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s))
+
+/*
+ * convert disk block number to the logical block number of the LEVEL 1
+ * dmapctl describing the disk block.  s is the log2(number of logical blocks
+ * per page)
+ *
+ * The calculation figures out how many logical pages are in front of the L1.
+ *	- the number of dmap pages preceding it
+ *	- the number of L0 pages preceding it
+ *	- the number of L1 pages preceding it
+ *	- 1 is added to account for the L2 page
+ *	- 1 is added to account for the control page of the map.
+ */
+#define BLKTOL1(b,s)      \
+     (((((b) >> 33) << 20) + (((b) >> 33) << 10) + ((b) >> 33) + 1 + 1) << (s))
+
+/*
+ * convert disk block number to the logical block number of the dmapctl
+ * at the specified level which describes the disk block.
+ */
+#define BLKTOCTL(b,s,l)   \
+        (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
+
+/* 
+ * convert aggregate map size to the zero origin dmapctl level of the
+ * top dmapctl.
+ */
+#define	BMAPSZTOLEV(size)	\
+	(((size) <= MAXL0SIZE) ? 0 : ((size) <= MAXL1SIZE) ? 1 : 2)
+
+/* convert disk block number to allocation group number.
+ */
+#define BLKTOAG(b,sbi)	((b) >> ((sbi)->bmap->db_agl2size))
+
+/* convert allocation group number to starting disk block
+ * number.
+ */
+#define AGTOBLK(a,ip)	\
+	((s64)(a) << (JFS_SBI((ip)->i_sb)->bmap->db_agl2size))
+
+/*
+ *	dmap summary tree
+ *
+ * dmaptree_t must be consistent with dmapctl_t.
+ */
+typedef struct {
+	s32 nleafs;		/* 4: number of tree leafs      */
+	s32 l2nleafs;		/* 4: l2 number of tree leafs   */
+	s32 leafidx;		/* 4: index of first tree leaf  */
+	s32 height;		/* 4: height of the tree        */
+	s8 budmin;		/* 1: min l2 tree leaf value to combine */
+	s8 stree[TREESIZE];	/* TREESIZE: tree               */
+	u8 pad[2];		/* 2: pad to word boundary      */
+} dmaptree_t;			/* - 360 -                      */
+
+/*
+ *	dmap page per 8K blocks bitmap
+ */
+typedef struct {
+	s32 nblocks;		/* 4: num blks covered by this dmap     */
+	s32 nfree;		/* 4: num of free blks in this dmap     */
+	s64 start;		/* 8: starting blkno for this dmap      */
+	dmaptree_t tree;	/* 360: dmap tree                       */
+	u8 pad[1672];		/* 1672: pad to 2048 bytes              */
+	u32 wmap[LPERDMAP];	/* 1024: bits of the working map        */
+	u32 pmap[LPERDMAP];	/* 1024: bits of the persistent map     */
+} dmap_t;			/* - 4096 -                             */
+
+/*
+ *	disk map control page per level.
+ *
+ * dmapctl_t must be consistent with dmaptree_t.
+ */
+typedef struct {
+	s32 nleafs;		/* 4: number of tree leafs      */
+	s32 l2nleafs;		/* 4: l2 number of tree leafs   */
+	s32 leafidx;		/* 4: index of the first tree leaf      */
+	s32 height;		/* 4: height of tree            */
+	s8 budmin;		/* 1: minimum l2 tree leaf value        */
+	s8 stree[CTLTREESIZE];	/* CTLTREESIZE: dmapctl tree    */
+	u8 pad[2714];		/* 2714: pad to 4096            */
+} dmapctl_t;			/* - 4096 -                     */
+
+/*
+ *	common definition for dmaptree_t within dmap and dmapctl
+ */
+typedef union {
+	dmaptree_t t1;
+	dmapctl_t t2;
+} dmtree_t;
+
+/* macros for accessing fields within dmtree_t */
+#define	dmt_nleafs	t1.nleafs
+#define	dmt_l2nleafs 	t1.l2nleafs
+#define	dmt_leafidx 	t1.leafidx
+#define	dmt_height 	t1.height
+#define	dmt_budmin 	t1.budmin
+#define	dmt_stree 	t1.stree
+
+/* 
+ *	on-disk aggregate disk allocation map descriptor.
+ */
+typedef struct {
+	s64 dn_mapsize;		/* 8: number of blocks in aggregate     */
+	s64 dn_nfree;		/* 8: num free blks in aggregate map    */
+	s32 dn_l2nbperpage;	/* 4: number of blks per page           */
+	s32 dn_numag;		/* 4: total number of ags               */
+	s32 dn_maxlevel;	/* 4: number of active ags              */
+	s32 dn_maxag;		/* 4: max active alloc group number     */
+	s32 dn_agpref;		/* 4: preferred alloc group (hint)      */
+	s32 dn_aglevel;		/* 4: dmapctl level holding the AG      */
+	s32 dn_agheigth;	/* 4: height in dmapctl of the AG       */
+	s32 dn_agwidth;		/* 4: width in dmapctl of the AG        */
+	s32 dn_agstart;		/* 4: start tree index at AG height     */
+	s32 dn_agl2size;	/* 4: l2 num of blks per alloc group    */
+	s64 dn_agfree[MAXAG];	/* 8*MAXAG: per AG free count           */
+	s64 dn_agsize;		/* 8: num of blks per alloc group       */
+	s8 dn_maxfreebud;	/* 1: max free buddy system             */
+	u8 pad[3007];		/* 3007: pad to 4096                    */
+} dbmap_t;			/* - 4096 -                             */
+
+/* 
+ *	in-memory aggregate disk allocation map descriptor.
+ */
+typedef struct bmap {
+	dbmap_t db_bmap;	/* on-disk aggregate map descriptor */
+	struct inode *db_ipbmap;	/* ptr to aggregate map incore inode */
+	struct semaphore db_bmaplock;	/* aggregate map lock */
+	u32 *db_DBmap;
+} bmap_t;
+
+/* macros for accessing fields within in-memory aggregate map descriptor */
+#define	db_mapsize	db_bmap.dn_mapsize
+#define	db_nfree	db_bmap.dn_nfree
+#define	db_agfree	db_bmap.dn_agfree
+#define	db_agsize	db_bmap.dn_agsize
+#define	db_agl2size	db_bmap.dn_agl2size
+#define	db_agwidth	db_bmap.dn_agwidth
+#define	db_agheigth	db_bmap.dn_agheigth
+#define	db_agstart	db_bmap.dn_agstart
+#define	db_numag	db_bmap.dn_numag
+#define	db_maxlevel	db_bmap.dn_maxlevel
+#define	db_aglevel	db_bmap.dn_aglevel
+#define	db_agpref	db_bmap.dn_agpref
+#define	db_maxag	db_bmap.dn_maxag
+#define	db_maxfreebud	db_bmap.dn_maxfreebud
+#define	db_l2nbperpage	db_bmap.dn_l2nbperpage
+
+/*
+ * macros for various conversions needed by the allocators.
+ * blkstol2(), cntlz(), and cnttz() are operating system dependent functions.
+ */
+/* convert number of blocks to log2 number of blocks, rounding up to
+ * the next log2 value if blocks is not a l2 multiple.
+ */
+#define	BLKSTOL2(d)		(blkstol2(d))
+
+/* convert number of leafs to log2 leaf value */
+#define	NLSTOL2BSZ(n)		(31 - cntlz((n)) + BUDMIN)
+
+/* convert leaf index to log2 leaf value */
+#define	LITOL2BSZ(n,m,b)	((((n) == 0) ? (m) : cnttz((n))) + (b))
+
+/* convert a block number to a dmap control leaf index */
+#define BLKTOCTLLEAF(b,m)	\
+	(((b) & (((s64)1 << ((m) + L2LPERCTL)) - 1)) >> (m))
+
+/* convert log2 leaf value to buddy size */
+#define	BUDSIZE(s,m)		(1 << ((s) - (m)))
+
+/*
+ *	external references.
+ */
+extern int dbMount(struct inode *ipbmap);
+
+extern int dbUnmount(struct inode *ipbmap, int mounterror);
+
+extern int dbFree(struct inode *ipbmap, s64 blkno, s64 nblocks);
+
+extern int dbUpdatePMap(struct inode *ipbmap,
+			int free, s64 blkno, s64 nblocks, tblock_t * tblk);
+
+extern int dbNextAG(struct inode *ipbmap);
+
+extern int dbAlloc(struct inode *ipbmap, s64 hint, s64 nblocks, s64 * results);
+
+extern int dbAllocExact(struct inode *ip, s64 blkno, int nblocks);
+
+extern int dbReAlloc(struct inode *ipbmap,
+		     s64 blkno, s64 nblocks, s64 addnblocks, s64 * results);
+
+extern int dbSync(struct inode *ipbmap);
+extern int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks);
+extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks);
+extern void dbFinalizeBmap(struct inode *ipbmap);
+extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap);
+#endif				/* _H_JFS_DMAP */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_dtree.c linuxppc64_2_4/fs/jfs/jfs_dtree.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_dtree.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_dtree.c	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,4525 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * 
+*/
+
+/*
+ *	jfs_dtree.c: directory B+-tree manager
+ *
+ * B+-tree with variable length key directory:
+ *
+ * each directory page is structured as an array of 32-byte
+ * directory entry slots initialized as a freelist
+ * to avoid search/compaction of free space at insertion.
+ * when an entry is inserted, a number of slots are allocated
+ * from the freelist as required to store variable length data
+ * of the entry; when the entry is deleted, slots of the entry
+ * are returned to freelist.
+ *
+ * leaf entry stores full name as key and file serial number
+ * (aka inode number) as data.
+ * internal/router entry stores sufffix compressed name
+ * as key and simple extent descriptor as data.
+ *
+ * each directory page maintains a sorted entry index table
+ * which stores the start slot index of sorted entries
+ * to allow binary search on the table.
+ *
+ * directory starts as a root/leaf page in on-disk inode
+ * inline data area.
+ * when it becomes full, it starts a leaf of a external extent
+ * of length of 1 block. each time the first leaf becomes full,
+ * it is extended rather than split (its size is doubled),
+ * until its length becoms 4 KBytes, from then the extent is split
+ * with new 4 Kbyte extent when it becomes full
+ * to reduce external fragmentation of small directories.
+ *
+ * blah, blah, blah, for linear scan of directory in pieces by
+ * readdir().
+ *
+ *
+ *	case-insensitive directory file system
+ *
+ * names are stored in case-sensitive way in leaf entry.
+ * but stored, searched and compared in case-insensitive (uppercase) order
+ * (i.e., both search key and entry key are folded for search/compare):
+ * (note that case-sensitive order is BROKEN in storage, e.g.,
+ *  sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad
+ *
+ *  entries which folds to the same key makes up a equivalent class
+ *  whose members are stored as contiguous cluster (may cross page boundary)
+ *  but whose order is arbitrary and acts as duplicate, e.g.,
+ *  abc, Abc, aBc, abC)
+ *
+ * once match is found at leaf, requires scan forward/backward
+ * either for, in case-insensitive search, duplicate
+ * or for, in case-sensitive search, for exact match
+ *
+ * router entry must be created/stored in case-insensitive way
+ * in internal entry:
+ * (right most key of left page and left most key of right page
+ * are folded, and its suffix compression is propagated as router
+ * key in parent)
+ * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB>
+ * should be made the router key for the split)
+ *
+ * case-insensitive search:
+ *
+ * 	fold search key;
+ *
+ *	case-insensitive search of B-tree:
+ *	for internal entry, router key is already folded;
+ *	for leaf entry, fold the entry key before comparison.
+ *
+ *	if (leaf entry case-insensitive match found)
+ *		if (next entry satisfies case-insensitive match)
+ *			return EDUPLICATE;
+ *		if (prev entry satisfies case-insensitive match)
+ *			return EDUPLICATE;
+ *		return match;
+ *	else
+ *		return no match;
+ *
+ * 	serialization:
+ * target directory inode lock is being held on entry/exit
+ * of all main directory service routines.
+ *
+ *	log based recovery:
+ */
+
+#include <linux/fs.h>
+#include <linux/locks.h>
+#include "jfs_incore.h"
+#include "jfs_superblock.h"
+#include "jfs_filsys.h"
+#include "jfs_metapage.h"
+#include "jfs_dmap.h"
+#include "jfs_unicode.h"
+#include "jfs_debug.h"
+
+/* dtree split parameter */
+typedef struct {
+	metapage_t *mp;
+	s16 index;
+	s16 nslot;
+	component_t *key;
+	ddata_t *data;
+	pxdlist_t *pxdlist;
+} dtsplit_t;
+
+#define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot)
+
+/* get page buffer for specified block address */
+#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
+{\
+	BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\
+	if (!(RC))\
+	{\
+		if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\
+		    ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\
+		{\
+			jERROR(1,("DT_GETPAGE: dtree page corrupt\n"));\
+			BT_PUTPAGE(MP);\
+			updateSuper((IP)->i_sb, FM_DIRTY);\
+			MP = NULL;\
+			RC = EIO;\
+		}\
+	}\
+}
+
+/* for consistency */
+#define DT_PUTPAGE(MP) BT_PUTPAGE(MP)
+
+#define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
+	BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot)
+
+/*
+ * forward references
+ */
+static int dtSplitUp(tid_t tid, struct inode *ip,
+		     dtsplit_t * split, btstack_t * btstack);
+
+static int dtSplitPage(tid_t tid, struct inode *ip, dtsplit_t * split,
+		       metapage_t ** rmpp, dtpage_t ** rpp, pxd_t * rxdp);
+
+static int dtExtendPage(tid_t tid, struct inode *ip,
+			dtsplit_t * split, btstack_t * btstack);
+
+static int dtSplitRoot(tid_t tid, struct inode *ip,
+		       dtsplit_t * split, metapage_t ** rmpp);
+
+static int dtDeleteUp(tid_t tid, struct inode *ip, metapage_t * fmp,
+		      dtpage_t * fp, btstack_t * btstack);
+
+static int dtSearchNode(struct inode *ip,
+			s64 lmxaddr, pxd_t * kpxd, btstack_t * btstack);
+
+static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p);
+
+static int dtReadFirst(struct inode *ip, btstack_t * btstack);
+
+static int dtReadNext(struct inode *ip,
+		      loff_t * offset, btstack_t * btstack);
+
+static int dtCompare(component_t * key, dtpage_t * p, int si);
+
+static int ciCompare(component_t * key, dtpage_t * p, int si, int flag);
+
+static void dtGetKey(dtpage_t * p, int i, component_t * key, int flag);
+
+static void ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
+			       int ri, component_t * key, int flag);
+
+static void dtInsertEntry(dtpage_t * p, int index, component_t * key,
+			  ddata_t * data, dtlock_t ** dtlock);
+
+static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
+			dtlock_t ** sdtlock, dtlock_t ** ddtlock,
+			int do_index);
+
+static void dtDeleteEntry(dtpage_t * p, int fi, dtlock_t ** dtlock);
+
+static void dtTruncateEntry(dtpage_t * p, int ti, dtlock_t ** dtlock);
+
+static void dtLinelockFreelist(dtpage_t * p, int m, dtlock_t ** dtlock);
+
+#define ciToUpper(c)	UniStrupr((c)->name)
+
+/*
+ *	find_index()
+ *
+ *	Returns dtree page containing directory table entry for specified
+ *	index and pointer to its entry.
+ *
+ *	mp must be released by caller.
+ */
+static dir_table_slot_t *find_index(struct inode *ip, u32 index,
+				    metapage_t ** mp)
+{
+	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+	s64 blkno;
+	s64 offset;
+	int page_offset;
+	dir_table_slot_t *slot;
+	static int maxWarnings = 10;
+
+	if (index < 2) {
+		if (maxWarnings) {
+			jERROR(1, ("find_entry called with index = %d\n",
+				   index));
+			maxWarnings--;
+		}
+		return 0;
+	}
+
+	if (index >= jfs_ip->next_index) {
+		jFYI(1, ("find_entry called with index >= next_index\n"));
+		return 0;
+	}
+
+	if (jfs_ip->next_index <= (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
+		/*
+		 * Inline directory table
+		 */
+		*mp = 0;
+		slot = &jfs_ip->i_dirtable[index - 2];
+	} else {
+		offset = (index - 2) * sizeof(dir_table_slot_t);
+		page_offset = offset & (PSIZE - 1);
+		blkno = ((offset + 1) >> L2PSIZE) <<
+		    JFS_SBI(ip->i_sb)->l2nbperpage;
+
+		if (*mp && ((*mp)->index != blkno)) {
+			release_metapage(*mp);
+			*mp = 0;
+		}
+		if (*mp == 0)
+			*mp = read_metapage(ip, blkno, PSIZE, 0);
+		if (*mp == 0) {
+			jERROR(1,
+			       ("free_index: error reading directory table\n"));
+			return 0;
+		}
+
+		slot =
+		    (dir_table_slot_t *) ((char *) (*mp)->data +
+					  page_offset);
+	}
+	return slot;
+}
+
+static inline void lock_index(tid_t tid, struct inode *ip, metapage_t * mp,
+			      u32 index)
+{
+	tlock_t *tlck;
+	linelock_t *llck;
+	lv_t *lv;
+
+	tlck = txLock(tid, ip, mp, tlckDATA);
+	llck = (linelock_t *) tlck->lock;
+
+	if (llck->index >= llck->maxcnt)
+		llck = txLinelock(llck);
+	lv = &llck->lv[llck->index];
+
+	/*
+	 *      Linelock slot size is twice the size of directory table
+	 *      slot size.  512 entries per page.
+	 */
+	lv->offset = ((index - 2) & 511) >> 1;
+	lv->length = 1;
+	llck->index++;
+}
+
+/*
+ *	add_index()
+ *
+ *	Adds an entry to the directory index table.  This is used to provide
+ *	each directory entry with a persistent index in which to resume
+ *	directory traversals
+ */
+static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot)
+{
+	struct super_block *sb = ip->i_sb;
+	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+	u64 blkno;
+	dir_table_slot_t *dirtab_slot;
+	u32 index;
+	linelock_t *llck;
+	lv_t *lv;
+	metapage_t *mp;
+	s64 offset;
+	uint page_offset;
+	int rc;
+	tlock_t *tlck;
+	s64 xaddr;
+
+	ASSERT(DO_INDEX(ip));
+
+	if (jfs_ip->next_index < 2) {
+		jERROR(1, ("next_index = %d.  Please fix this!\n",
+			   jfs_ip->next_index));
+		jfs_ip->next_index = 2;
+	}
+
+	index = jfs_ip->next_index++;
+
+	if (index <= MAX_INLINE_DIRTABLE_ENTRY) {
+		/*
+		 * i_size reflects size of index table, or 8 bytes per entry.
+		 */
+		ip->i_size = (loff_t) (index - 1) << 3;
+
+		/*
+		 * dir table fits inline within inode
+		 */
+		dirtab_slot = &jfs_ip->i_dirtable[index-2];
+		dirtab_slot->flag = DIR_INDEX_VALID;
+		dirtab_slot->slot = slot;
+		DTSaddress(dirtab_slot, bn);
+
+		set_cflag(COMMIT_Dirtable, ip);
+
+		return index;
+	}
+	if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
+		/*
+		 * It's time to move the inline table to an external
+		 * page and begin to build the xtree
+		 */
+
+		/*
+		 * Save the table, we're going to overwrite it with the
+		 * xtree root
+		 */
+		dir_table_slot_t temp_table[12];
+		memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table));
+
+		/*
+		 * Initialize empty x-tree
+		 */
+		xtInitRoot(tid, ip);
+
+		/*
+		 * Allocate the first block & add it to the xtree
+		 */
+		xaddr = 0;
+		if ((rc =
+		     xtInsert(tid, ip, 0, 0, sbi->nbperpage,
+			      &xaddr, 0))) {
+			jFYI(1, ("add_index: xtInsert failed!\n"));
+			return -1;
+		}
+		ip->i_size = PSIZE;
+		ip->i_blocks += LBLK2PBLK(sb, sbi->nbperpage);
+
+		if ((mp = get_metapage(ip, 0, ip->i_blksize, 0)) == 0) {
+			jERROR(1, ("add_index: get_metapage failed!\n"));
+			xtTruncate(tid, ip, 0, COMMIT_PWMAP);
+			return -1;
+		}
+		tlck = txLock(tid, ip, mp, tlckDATA);
+		llck = (linelock_t *) & tlck->lock;
+		ASSERT(llck->index == 0);
+		lv = &llck->lv[0];
+
+		lv->offset = 0;
+		lv->length = 6;	/* tlckDATA slot size is 16 bytes */
+		llck->index++;
+
+		memcpy(mp->data, temp_table, sizeof(temp_table));
+
+		mark_metapage_dirty(mp);
+		release_metapage(mp);
+
+		/*
+		 * Logging is now directed by xtree tlocks
+		 */
+		clear_cflag(COMMIT_Dirtable, ip);
+	}
+
+	offset = (index - 2) * sizeof(dir_table_slot_t);
+	page_offset = offset & (PSIZE - 1);
+	blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage;
+	if (page_offset == 0) {
+		/*
+		 * This will be the beginning of a new page
+		 */
+		xaddr = 0;
+		if ((rc =
+		     xtInsert(tid, ip, 0, blkno, sbi->nbperpage,
+			      &xaddr, 0))) {
+			jFYI(1, ("add_index: xtInsert failed!\n"));
+			jfs_ip->next_index--;
+			return -1;
+		}
+		ip->i_size += PSIZE;
+		ip->i_blocks += LBLK2PBLK(sb, sbi->nbperpage);
+
+		if ((mp = get_metapage(ip, blkno, PSIZE, 0)))
+			memset(mp->data, 0, PSIZE);	/* Just looks better */
+		else
+			xtTruncate(tid, ip, offset, COMMIT_PWMAP);
+	} else
+		mp = read_metapage(ip, blkno, PSIZE, 0);
+
+	if (mp == 0) {
+		jERROR(1, ("add_index: get/read_metapage failed!\n"));
+		return -1;
+	}
+
+	lock_index(tid, ip, mp, index);
+
+	dirtab_slot =
+	    (dir_table_slot_t *) ((char *) mp->data + page_offset);
+	dirtab_slot->flag = DIR_INDEX_VALID;
+	dirtab_slot->slot = slot;
+	DTSaddress(dirtab_slot, bn);
+
+	mark_metapage_dirty(mp);
+	release_metapage(mp);
+
+	return index;
+}
+
+/*
+ *	free_index()
+ *
+ *	Marks an entry to the directory index table as free.
+ */
+static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next)
+{
+	dir_table_slot_t *dirtab_slot;
+	metapage_t *mp = 0;
+
+	dirtab_slot = find_index(ip, index, &mp);
+
+	if (dirtab_slot == 0)
+		return;
+
+	dirtab_slot->flag = DIR_INDEX_FREE;
+	dirtab_slot->slot = dirtab_slot->addr1 = 0;
+	dirtab_slot->addr2 = cpu_to_le32(next);
+
+	if (mp) {
+		lock_index(tid, ip, mp, index);
+		mark_metapage_dirty(mp);
+		release_metapage(mp);
+	} else
+		set_cflag(COMMIT_Dirtable, ip);
+}
+
+/*
+ *	modify_index()
+ *
+ *	Changes an entry in the directory index table
+ */
+static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn,
+			 int slot, metapage_t ** mp)
+{
+	dir_table_slot_t *dirtab_slot;
+
+	dirtab_slot = find_index(ip, index, mp);
+
+	if (dirtab_slot == 0)
+		return;
+
+	DTSaddress(dirtab_slot, bn);
+	dirtab_slot->slot = slot;
+
+	if (*mp) {
+		lock_index(tid, ip, *mp, index);
+		mark_metapage_dirty(*mp);
+	} else
+		set_cflag(COMMIT_Dirtable, ip);
+}
+
+/*
+ *	get_index()
+ *
+ *	reads a directory table slot
+ */
+static int get_index(struct inode *ip, u32 index,
+		     dir_table_slot_t * dirtab_slot)
+{
+	metapage_t *mp = 0;
+	dir_table_slot_t *slot;
+
+	slot = find_index(ip, index, &mp);
+	if (slot == 0) {
+		return -EIO;
+	}
+
+	memcpy(dirtab_slot, slot, sizeof(dir_table_slot_t));
+
+	if (mp)
+		release_metapage(mp);
+
+	return 0;
+}
+
+/*
+ *	dtSearch()
+ *
+ * function:
+ *	Search for the entry with specified key
+ *
+ * parameter:
+ *
+ * return: 0 - search result on stack, leaf page pinned;
+ *	   errno - I/O error
+ */
+int dtSearch(struct inode *ip,
+	 component_t * key, ino_t * data, btstack_t * btstack, int flag)
+{
+	int rc = 0;
+	int cmp = 1;		/* init for empty page */
+	s64 bn;
+	metapage_t *mp;
+	dtpage_t *p;
+	s8 *stbl;
+	int base, index, lim;
+	btframe_t *btsp;
+	pxd_t *pxd;
+	int psize = 288;	/* initial in-line directory */
+	ino_t inumber;
+	component_t ciKey;
+	struct super_block *sb = ip->i_sb;
+
+	ciKey.name =
+	    (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t),
+				GFP_NOFS);
+	if (ciKey.name == 0) {
+		rc = ENOMEM;
+		goto dtSearch_Exit2;
+	}
+
+
+	/* uppercase search key for c-i directory */
+	UniStrcpy(ciKey.name, key->name);
+	ciKey.namlen = key->namlen;
+
+	/* only uppercase if case-insensitive support is on */
+	if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) {
+		ciToUpper(&ciKey);
+	}
+	BT_CLR(btstack);	/* reset stack */
+
+	/* init level count for max pages to split */
+	btstack->nsplit = 1;
+
+	/*
+	 *      search down tree from root:
+	 *
+	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
+	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
+	 *
+	 * if entry with search key K is not found
+	 * internal page search find the entry with largest key Ki
+	 * less than K which point to the child page to search;
+	 * leaf page search find the entry with smallest key Kj
+	 * greater than K so that the returned index is the position of
+	 * the entry to be shifted right for insertion of new entry.
+	 * for empty tree, search key is greater than any key of the tree.
+	 *
+	 * by convention, root bn = 0.
+	 */
+	for (bn = 0;;) {
+		/* get/pin the page to search */
+		DT_GETPAGE(ip, bn, mp, psize, p, rc);
+		if (rc)
+			goto dtSearch_Exit1;
+
+		/* get sorted entry table of the page */
+		stbl = DT_GETSTBL(p);
+
+		/*
+		 * binary search with search key K on the current page.
+		 */
+		for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
+			index = base + (lim >> 1);
+
+			if (p->header.flag & BT_LEAF) {
+				/* uppercase leaf name to compare */
+				cmp =
+				    ciCompare(&ciKey, p, stbl[index],
+					      JFS_SBI(sb)->mntflag);
+			} else {
+				/* router key is in uppercase */
+
+				cmp = dtCompare(&ciKey, p, stbl[index]);
+
+
+			}
+			if (cmp == 0) {
+				/*
+				 *      search hit
+				 */
+				/* search hit - leaf page:
+				 * return the entry found
+				 */
+				if (p->header.flag & BT_LEAF) {
+					inumber = le32_to_cpu(
+			((ldtentry_t *) & p->slot[stbl[index]])->inumber);
+
+					/*
+					 * search for JFS_LOOKUP
+					 */
+					if (flag == JFS_LOOKUP) {
+						*data = inumber;
+						rc = 0;
+						goto out;
+					}
+
+					/*
+					 * search for JFS_CREATE
+					 */
+					if (flag == JFS_CREATE) {
+						*data = inumber;
+						rc = EEXIST;
+						goto out;
+					}
+
+					/*
+					 * search for JFS_REMOVE or JFS_RENAME
+					 */
+					if ((flag == JFS_REMOVE ||
+					     flag == JFS_RENAME) &&
+					    *data != inumber) {
+						rc = ESTALE;
+						goto out;
+					}
+
+					/*
+					 * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME
+					 */
+					/* save search result */
+					*data = inumber;
+					btsp = btstack->top;
+					btsp->bn = bn;
+					btsp->index = index;
+					btsp->mp = mp;
+
+					rc = 0;
+					goto dtSearch_Exit1;
+				}
+
+				/* search hit - internal page:
+				 * descend/search its child page
+				 */
+				goto getChild;
+			}
+
+			if (cmp > 0) {
+				base = index + 1;
+				--lim;
+			}
+		}
+
+		/*
+		 *      search miss
+		 *
+		 * base is the smallest index with key (Kj) greater than
+		 * search key (K) and may be zero or (maxindex + 1) index.
+		 */
+		/*
+		 * search miss - leaf page
+		 *
+		 * return location of entry (base) where new entry with
+		 * search key K is to be inserted.
+		 */
+		if (p->header.flag & BT_LEAF) {
+			/*
+			 * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME
+			 */
+			if (flag == JFS_LOOKUP || flag == JFS_REMOVE ||
+			    flag == JFS_RENAME) {
+				rc = ENOENT;
+				goto out;
+			}
+
+			/*
+			 * search for JFS_CREATE|JFS_FINDDIR:
+			 *
+			 * save search result
+			 */
+			*data = 0;
+			btsp = btstack->top;
+			btsp->bn = bn;
+			btsp->index = base;
+			btsp->mp = mp;
+
+			rc = 0;
+			goto dtSearch_Exit1;
+		}
+
+		/*
+		 * search miss - internal page
+		 *
+		 * if base is non-zero, decrement base by one to get the parent
+		 * entry of the child page to search.
+		 */
+		index = base ? base - 1 : base;
+
+		/*
+		 * go down to child page
+		 */
+	      getChild:
+		/* update max. number of pages to split */
+		if (btstack->nsplit >= 8) {
+			/* Something's corrupted, mark filesytem dirty so
+			 * chkdsk will fix it.
+			 */
+			jERROR(1, ("stack overrun in dtSearch!\n"));
+			updateSuper(sb, FM_DIRTY);
+			rc = EIO;
+			goto out;
+		}
+		btstack->nsplit++;
+
+		/* push (bn, index) of the parent page/entry */
+		BT_PUSH(btstack, bn, index);
+
+		/* get the child page block number */
+		pxd = (pxd_t *) & p->slot[stbl[index]];
+		bn = addressPXD(pxd);
+		psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
+
+		/* unpin the parent page */
+		DT_PUTPAGE(mp);
+	}
+
+      out:
+	DT_PUTPAGE(mp);
+
+      dtSearch_Exit1:
+
+	kfree(ciKey.name);
+
+      dtSearch_Exit2:
+
+	return rc;
+}
+
+
+/*
+ *	dtInsert()
+ *
+ * function: insert an entry to directory tree
+ *
+ * parameter:
+ *
+ * return: 0 - success;
+ *	   errno - failure;
+ */
+int dtInsert(tid_t tid, struct inode *ip,
+	 component_t * name, ino_t * fsn, btstack_t * btstack)
+{
+	int rc = 0;
+	metapage_t *mp;		/* meta-page buffer */
+	dtpage_t *p;		/* base B+-tree index page */
+	s64 bn;
+	int index;
+	dtsplit_t split;	/* split information */
+	ddata_t data;
+	dtlock_t *dtlck;
+	int n;
+	tlock_t *tlck;
+	lv_t *lv;
+
+	/*
+	 *      retrieve search result
+	 *
+	 * dtSearch() returns (leaf page pinned, index at which to insert).
+	 * n.b. dtSearch() may return index of (maxindex + 1) of
+	 * the full page.
+	 */
+	DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
+
+	/*
+	 *      insert entry for new key
+	 */
+	if (DO_INDEX(ip)) {
+		if (JFS_IP(ip)->next_index == DIREND) {
+			DT_PUTPAGE(mp);
+			return EMLINK;
+		}
+		n = NDTLEAF(name->namlen);
+		data.leaf.tid = tid;
+		data.leaf.ip = ip;
+	} else {
+		n = NDTLEAF_LEGACY(name->namlen);
+		data.leaf.ip = 0;	/* signifies legacy directory format */
+	}
+	data.leaf.ino = cpu_to_le32(*fsn);
+
+	/*
+	 *      leaf page does not have enough room for new entry:
+	 *
+	 *      extend/split the leaf page;
+	 *
+	 * dtSplitUp() will insert the entry and unpin the leaf page.
+	 */
+	if (n > p->header.freecnt) {
+		split.mp = mp;
+		split.index = index;
+		split.nslot = n;
+		split.key = name;
+		split.data = &data;
+		rc = dtSplitUp(tid, ip, &split, btstack);
+		return rc;
+	}
+
+	/*
+	 *      leaf page does have enough room for new entry:
+	 *
+	 *      insert the new data entry into the leaf page;
+	 */
+	BT_MARK_DIRTY(mp, ip);
+	/*
+	 * acquire a transaction lock on the leaf page
+	 */
+	tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
+	dtlck = (dtlock_t *) & tlck->lock;
+	ASSERT(dtlck->index == 0);
+	lv = (lv_t *) & dtlck->lv[0];
+
+	/* linelock header */
+	lv->offset = 0;
+	lv->length = 1;
+	dtlck->index++;
+
+	dtInsertEntry(p, index, name, &data, &dtlck);
+
+	/* linelock stbl of non-root leaf page */
+	if (!(p->header.flag & BT_ROOT)) {
+		if (dtlck->index >= dtlck->maxcnt)
+			dtlck = (dtlock_t *) txLinelock(dtlck);
+		lv = (lv_t *) & dtlck->lv[dtlck->index];
+		n = index >> L2DTSLOTSIZE;
+		lv->offset = p->header.stblindex + n;
+		lv->length =
+		    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
+		dtlck->index++;
+	}
+
+	/* unpin the leaf page */
+	DT_PUTPAGE(mp);
+
+	return 0;
+}
+
+
+/*
+ *	dtSplitUp()
+ *
+ * function: propagate insertion bottom up;
+ *
+ * parameter:
+ *
+ * return: 0 - success;
+ *	   errno - failure;
+ * 	leaf page unpinned;
+ */
+static int dtSplitUp(tid_t tid,
+	  struct inode *ip, dtsplit_t * split, btstack_t * btstack)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
+	int rc = 0;
+	metapage_t *smp;
+	dtpage_t *sp;		/* split page */
+	metapage_t *rmp;
+	dtpage_t *rp;		/* new right page split from sp */
+	pxd_t rpxd;		/* new right page extent descriptor */
+	metapage_t *lmp;
+	dtpage_t *lp;		/* left child page */
+	int skip;		/* index of entry of insertion */
+	btframe_t *parent;	/* parent page entry on traverse stack */
+	s64 xaddr, nxaddr;
+	int xlen, xsize;
+	pxdlist_t pxdlist;
+	pxd_t *pxd;
+	component_t key = { 0, 0 };
+	ddata_t *data = split->data;
+	int n;
+	dtlock_t *dtlck;
+	tlock_t *tlck;
+	lv_t *lv;
+
+	/* get split page */
+	smp = split->mp;
+	sp = DT_PAGE(ip, smp);
+
+	key.name =
+	    (wchar_t *) kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t),
+				GFP_NOFS);
+	if (key.name == 0) {
+		DT_PUTPAGE(smp);
+		rc = ENOMEM;
+		goto dtSplitUp_Exit;
+	}
+
+	/*
+	 *      split leaf page
+	 *
+	 * The split routines insert the new entry, and
+	 * acquire txLock as appropriate.
+	 */
+	/*
+	 *      split root leaf page:
+	 */
+	if (sp->header.flag & BT_ROOT) {
+		/*
+		 * allocate a single extent child page
+		 */
+		xlen = 1;
+		n = sbi->bsize >> L2DTSLOTSIZE;
+		n -= (n + 31) >> L2DTSLOTSIZE;	/* stbl size */
+		n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */
+		if (n <= split->nslot)
+			xlen++;
+		if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)))
+			goto freeKeyName;
+
+		pxdlist.maxnpxd = 1;
+		pxdlist.npxd = 0;
+		pxd = &pxdlist.pxd[0];
+		PXDaddress(pxd, xaddr);
+		PXDlength(pxd, xlen);
+		split->pxdlist = &pxdlist;
+		rc = dtSplitRoot(tid, ip, split, &rmp);
+
+		DT_PUTPAGE(rmp);
+		DT_PUTPAGE(smp);
+
+		goto freeKeyName;
+	}
+
+	/*
+	 *      extend first leaf page
+	 *
+	 * extend the 1st extent if less than buffer page size
+	 * (dtExtendPage() reurns leaf page unpinned)
+	 */
+	pxd = &sp->header.self;
+	xlen = lengthPXD(pxd);
+	xsize = xlen << sbi->l2bsize;
+	if (xsize < PSIZE) {
+		xaddr = addressPXD(pxd);
+		n = xsize >> L2DTSLOTSIZE;
+		n -= (n + 31) >> L2DTSLOTSIZE;	/* stbl size */
+		if ((n + sp->header.freecnt) <= split->nslot)
+			n = xlen + (xlen << 1);
+		else
+			n = xlen;
+		if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen,
+				    (s64) n, &nxaddr)))
+			goto extendOut;
+
+		pxdlist.maxnpxd = 1;
+		pxdlist.npxd = 0;
+		pxd = &pxdlist.pxd[0];
+		PXDaddress(pxd, nxaddr)
+		    PXDlength(pxd, xlen + n);
+		split->pxdlist = &pxdlist;
+		if ((rc = dtExtendPage(tid, ip, split, btstack))) {
+			nxaddr = addressPXD(pxd);
+			if (xaddr != nxaddr) {
+				/* free relocated extent */
+				xlen = lengthPXD(pxd);
+				dbFree(ip, nxaddr, (s64) xlen);
+			} else {
+				/* free extended delta */
+				xlen = lengthPXD(pxd) - n;
+				xaddr = addressPXD(pxd) + xlen;
+				dbFree(ip, xaddr, (s64) n);
+			}
+		}
+
+	      extendOut:
+		DT_PUTPAGE(smp);
+		goto freeKeyName;
+	}
+
+	/*
+	 *      split leaf page <sp> into <sp> and a new right page <rp>.
+	 *
+	 * return <rp> pinned and its extent descriptor <rpxd>
+	 */
+	/*
+	 * allocate new directory page extent and
+	 * new index page(s) to cover page split(s)
+	 *
+	 * allocation hint: ?
+	 */
+	n = btstack->nsplit;
+	pxdlist.maxnpxd = pxdlist.npxd = 0;
+	xlen = sbi->nbperpage;
+	for (pxd = pxdlist.pxd; n > 0; n--, pxd++) {
+		if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) {
+			PXDaddress(pxd, xaddr);
+			PXDlength(pxd, xlen);
+			pxdlist.maxnpxd++;
+			continue;
+		}
+
+		DT_PUTPAGE(smp);
+
+		/* undo allocation */
+		goto splitOut;
+	}
+
+	split->pxdlist = &pxdlist;
+	if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) {
+		DT_PUTPAGE(smp);
+
+		/* undo allocation */
+		goto splitOut;
+	}
+
+	/*
+	 * propagate up the router entry for the leaf page just split
+	 *
+	 * insert a router entry for the new page into the parent page,
+	 * propagate the insert/split up the tree by walking back the stack
+	 * of (bn of parent page, index of child page entry in parent page)
+	 * that were traversed during the search for the page that split.
+	 *
+	 * the propagation of insert/split up the tree stops if the root
+	 * splits or the page inserted into doesn't have to split to hold
+	 * the new entry.
+	 *
+	 * the parent entry for the split page remains the same, and
+	 * a new entry is inserted at its right with the first key and
+	 * block number of the new right page.
+	 *
+	 * There are a maximum of 4 pages pinned at any time:
+	 * two children, left parent and right parent (when the parent splits).
+	 * keep the child pages pinned while working on the parent.
+	 * make sure that all pins are released at exit.
+	 */
+	while ((parent = BT_POP(btstack)) != NULL) {
+		/* parent page specified by stack frame <parent> */
+
+		/* keep current child pages (<lp>, <rp>) pinned */
+		lmp = smp;
+		lp = sp;
+
+		/*
+		 * insert router entry in parent for new right child page <rp>
+		 */
+		/* get the parent page <sp> */
+		DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
+		if (rc) {
+			DT_PUTPAGE(lmp);
+			DT_PUTPAGE(rmp);
+			goto splitOut;
+		}
+
+		/*
+		 * The new key entry goes ONE AFTER the index of parent entry,
+		 * because the split was to the right.
+		 */
+		skip = parent->index + 1;
+
+		/*
+		 * compute the key for the router entry
+		 *
+		 * key suffix compression:
+		 * for internal pages that have leaf pages as children,
+		 * retain only what's needed to distinguish between
+		 * the new entry and the entry on the page to its left.
+		 * If the keys compare equal, retain the entire key.
+		 *
+		 * note that compression is performed only at computing
+		 * router key at the lowest internal level.
+		 * further compression of the key between pairs of higher
+		 * level internal pages loses too much information and
+		 * the search may fail.
+		 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,}
+		 * results in two adjacent parent entries (a)(xx).
+		 * if split occurs between these two entries, and
+		 * if compression is applied, the router key of parent entry
+		 * of right page (x) will divert search for x into right
+		 * subtree and miss x in the left subtree.)
+		 *
+		 * the entire key must be retained for the next-to-leftmost
+		 * internal key at any level of the tree, or search may fail
+		 * (e.g., ?)
+		 */
+		switch (rp->header.flag & BT_TYPE) {
+		case BT_LEAF:
+			/*
+			 * compute the length of prefix for suffix compression
+			 * between last entry of left page and first entry
+			 * of right page
+			 */
+			if ((sp->header.flag & BT_ROOT && skip > 1) ||
+			    sp->header.prev != 0 || skip > 1) {
+				/* compute uppercase router prefix key */
+				ciGetLeafPrefixKey(lp,
+						   lp->header.nextindex - 1,
+						   rp, 0, &key, sbi->mntflag);
+			} else {
+				/* next to leftmost entry of
+				   lowest internal level */
+
+				/* compute uppercase router key */
+				dtGetKey(rp, 0, &key, sbi->mntflag);
+				key.name[key.namlen] = 0;
+
+				if ((sbi->mntflag & JFS_OS2) == JFS_OS2)
+					ciToUpper(&key);
+			}
+
+			n = NDTINTERNAL(key.namlen);
+			break;
+
+		case BT_INTERNAL:
+			dtGetKey(rp, 0, &key, sbi->mntflag);
+			n = NDTINTERNAL(key.namlen);
+			break;
+
+		default:
+			jERROR(2, ("dtSplitUp(): UFO!\n"));
+			break;
+		}
+
+		/* unpin left child page */
+		DT_PUTPAGE(lmp);
+
+		/*
+		 * compute the data for the router entry
+		 */
+		data->xd = rpxd;	/* child page xd */
+
+		/*
+		 * parent page is full - split the parent page
+		 */
+		if (n > sp->header.freecnt) {
+			/* init for parent page split */
+			split->mp = smp;
+			split->index = skip;	/* index at insert */
+			split->nslot = n;
+			split->key = &key;
+			/* split->data = data; */
+
+			/* unpin right child page */
+			DT_PUTPAGE(rmp);
+
+			/* The split routines insert the new entry,
+			 * acquire txLock as appropriate.
+			 * return <rp> pinned and its block number <rbn>.
+			 */
+			rc = (sp->header.flag & BT_ROOT) ?
+			    dtSplitRoot(tid, ip, split, &rmp) :
+			    dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd);
+			if (rc) {
+				DT_PUTPAGE(smp);
+				goto splitOut;
+			}
+
+			/* smp and rmp are pinned */
+		}
+		/*
+		 * parent page is not full - insert router entry in parent page
+		 */
+		else {
+			BT_MARK_DIRTY(smp, ip);
+			/*
+			 * acquire a transaction lock on the parent page
+			 */
+			tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
+			dtlck = (dtlock_t *) & tlck->lock;
+			ASSERT(dtlck->index == 0);
+			lv = (lv_t *) & dtlck->lv[0];
+
+			/* linelock header */
+			lv->offset = 0;
+			lv->length = 1;
+			dtlck->index++;
+
+			/* linelock stbl of non-root parent page */
+			if (!(sp->header.flag & BT_ROOT)) {
+				lv++;
+				n = skip >> L2DTSLOTSIZE;
+				lv->offset = sp->header.stblindex + n;
+				lv->length =
+				    ((sp->header.nextindex -
+				      1) >> L2DTSLOTSIZE) - n + 1;
+				dtlck->index++;
+			}
+
+			dtInsertEntry(sp, skip, &key, data, &dtlck);
+
+			/* exit propagate up */
+			break;
+		}
+	}
+
+	/* unpin current split and its right page */
+	DT_PUTPAGE(smp);
+	DT_PUTPAGE(rmp);
+
+	/*
+	 * free remaining extents allocated for split
+	 */
+      splitOut:
+	n = pxdlist.npxd;
+	pxd = &pxdlist.pxd[n];
+	for (; n < pxdlist.maxnpxd; n++, pxd++)
+		dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd));
+
+      freeKeyName:
+	kfree(key.name);
+
+      dtSplitUp_Exit:
+
+	return rc;
+}
+
+
+/*
+ *	dtSplitPage()
+ *
+ * function: Split a non-root page of a btree.
+ *
+ * parameter:
+ *
+ * return: 0 - success;
+ *	   errno - failure;
+ *	return split and new page pinned;
+ */
+static int dtSplitPage(tid_t tid, struct inode *ip, dtsplit_t * split,
+	    metapage_t ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp)
+{
+	struct super_block *sb = ip->i_sb;
+	int rc = 0;
+	metapage_t *smp;
+	dtpage_t *sp;
+	metapage_t *rmp;
+	dtpage_t *rp;		/* new right page allocated */
+	s64 rbn;		/* new right page block number */
+	metapage_t *mp;
+	dtpage_t *p;
+	s64 nextbn;
+	pxdlist_t *pxdlist;
+	pxd_t *pxd;
+	int skip, nextindex, half, left, nxt, off, si;
+	ldtentry_t *ldtentry;
+	idtentry_t *idtentry;
+	u8 *stbl;
+	dtslot_t *f;
+	int fsi, stblsize;
+	int n;
+	dtlock_t *sdtlck, *rdtlck;
+	tlock_t *tlck;
+	dtlock_t *dtlck;
+	lv_t *slv, *rlv, *lv;
+
+	/* get split page */
+	smp = split->mp;
+	sp = DT_PAGE(ip, smp);
+
+	/*
+	 * allocate the new right page for the split
+	 */
+	pxdlist = split->pxdlist;
+	pxd = &pxdlist->pxd[pxdlist->npxd];
+	pxdlist->npxd++;
+	rbn = addressPXD(pxd);
+	rmp = get_metapage(ip, rbn, PSIZE, 1);
+	if (rmp == NULL)
+		return EIO;
+
+	jEVENT(0,
+	       ("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p\n", ip, smp, rmp));
+
+	BT_MARK_DIRTY(rmp, ip);
+	/*
+	 * acquire a transaction lock on the new right page
+	 */
+	tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
+	rdtlck = (dtlock_t *) & tlck->lock;
+
+	rp = (dtpage_t *) rmp->data;
+	*rpp = rp;
+	rp->header.self = *pxd;
+
+	BT_MARK_DIRTY(smp, ip);
+	/*
+	 * acquire a transaction lock on the split page
+	 *
+	 * action:
+	 */
+	tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY);
+	sdtlck = (dtlock_t *) & tlck->lock;
+
+	/* linelock header of split page */
+	ASSERT(sdtlck->index == 0);
+	slv = (lv_t *) & sdtlck->lv[0];
+	slv->offset = 0;
+	slv->length = 1;
+	sdtlck->index++;
+
+	/*
+	 * initialize/update sibling pointers between sp and rp
+	 */
+	nextbn = le64_to_cpu(sp->header.next);
+	rp->header.next = cpu_to_le64(nextbn);
+	rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
+	sp->header.next = cpu_to_le64(rbn);
+
+	/*
+	 * initialize new right page
+	 */
+	rp->header.flag = sp->header.flag;
+
+	/* compute sorted entry table at start of extent data area */
+	rp->header.nextindex = 0;
+	rp->header.stblindex = 1;
+
+	n = PSIZE >> L2DTSLOTSIZE;
+	rp->header.maxslot = n;
+	stblsize = (n + 31) >> L2DTSLOTSIZE;	/* in unit of slot */
+
+	/* init freelist */
+	fsi = rp->header.stblindex + stblsize;
+	rp->header.freelist = fsi;
+	rp->header.freecnt = rp->header.maxslot - fsi;
+
+	/*
+	 *      sequential append at tail: append without split
+	 *
+	 * If splitting the last page on a level because of appending
+	 * a entry to it (skip is maxentry), it's likely that the access is
+	 * sequential. Adding an empty page on the side of the level is less
+	 * work and can push the fill factor much higher than normal.
+	 * If we're wrong it's no big deal, we'll just do the split the right
+	 * way next time.
+	 * (It may look like it's equally easy to do a similar hack for
+	 * reverse sorted data, that is, split the tree left,
+	 * but it's not. Be my guest.)
+	 */
+	if (nextbn == 0 && split->index == sp->header.nextindex) {
+		/* linelock header + stbl (first slot) of new page */
+		rlv = (lv_t *) & rdtlck->lv[rdtlck->index];
+		rlv->offset = 0;
+		rlv->length = 2;
+		rdtlck->index++;
+
+		/*
+		 * initialize freelist of new right page
+		 */
+		f = &rp->slot[fsi];
+		for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
+			f->next = fsi;
+		f->next = -1;
+
+		/* insert entry at the first entry of the new right page */
+		dtInsertEntry(rp, 0, split->key, split->data, &rdtlck);
+
+		goto out;
+	}
+
+	/*
+	 *      non-sequential insert (at possibly middle page)
+	 */
+
+	/*
+	 * update prev pointer of previous right sibling page;
+	 */
+	if (nextbn != 0) {
+		DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+
+		BT_MARK_DIRTY(mp, ip);
+		/*
+		 * acquire a transaction lock on the next page
+		 */
+		tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
+		jEVENT(0,
+		       ("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p\n",
+			tlck, ip, mp));
+		dtlck = (dtlock_t *) & tlck->lock;
+
+		/* linelock header of previous right sibling page */
+		lv = (lv_t *) & dtlck->lv[dtlck->index];
+		lv->offset = 0;
+		lv->length = 1;
+		dtlck->index++;
+
+		p->header.prev = cpu_to_le64(rbn);
+
+		DT_PUTPAGE(mp);
+	}
+
+	/*
+	 * split the data between the split and right pages.
+	 */
+	skip = split->index;
+	half = (PSIZE >> L2DTSLOTSIZE) >> 1;	/* swag */
+	left = 0;
+
+	/*
+	 *      compute fill factor for split pages
+	 *
+	 * <nxt> traces the next entry to move to rp
+	 * <off> traces the next entry to stay in sp
+	 */
+	stbl = (u8 *) & sp->slot[sp->header.stblindex];
+	nextindex = sp->header.nextindex;
+	for (nxt = off = 0; nxt < nextindex; ++off) {
+		if (off == skip)
+			/* check for fill factor with new entry size */
+			n = split->nslot;
+		else {
+			si = stbl[nxt];
+			switch (sp->header.flag & BT_TYPE) {
+			case BT_LEAF:
+				ldtentry = (ldtentry_t *) & sp->slot[si];
+				if (DO_INDEX(ip))
+					n = NDTLEAF(ldtentry->namlen);
+				else
+					n = NDTLEAF_LEGACY(ldtentry->
+							   namlen);
+				break;
+
+			case BT_INTERNAL:
+				idtentry = (idtentry_t *) & sp->slot[si];
+				n = NDTINTERNAL(idtentry->namlen);
+				break;
+
+			default:
+				break;
+			}
+
+			++nxt;	/* advance to next entry to move in sp */
+		}
+
+		left += n;
+		if (left >= half)
+			break;
+	}
+
+	/* <nxt> poins to the 1st entry to move */
+
+	/*
+	 *      move entries to right page
+	 *
+	 * dtMoveEntry() initializes rp and reserves entry for insertion
+	 *
+	 * split page moved out entries are linelocked;
+	 * new/right page moved in entries are linelocked;
+	 */
+	/* linelock header + stbl of new right page */
+	rlv = (lv_t *) & rdtlck->lv[rdtlck->index];
+	rlv->offset = 0;
+	rlv->length = 5;
+	rdtlck->index++;
+
+	dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip));
+
+	sp->header.nextindex = nxt;
+
+	/*
+	 * finalize freelist of new right page
+	 */
+	fsi = rp->header.freelist;
+	f = &rp->slot[fsi];
+	for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
+		f->next = fsi;
+	f->next = -1;
+
+	/*
+	 * Update directory index table for entries now in right page
+	 */
+	if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
+		mp = 0;
+		stbl = DT_GETSTBL(rp);
+		for (n = 0; n < rp->header.nextindex; n++) {
+			ldtentry = (ldtentry_t *) & rp->slot[stbl[n]];
+			modify_index(tid, ip, le32_to_cpu(ldtentry->index),
+				     rbn, n, &mp);
+		}
+		if (mp)
+			release_metapage(mp);
+	}
+
+	/*
+	 * the skipped index was on the left page,
+	 */
+	if (skip <= off) {
+		/* insert the new entry in the split page */
+		dtInsertEntry(sp, skip, split->key, split->data, &sdtlck);
+
+		/* linelock stbl of split page */
+		if (sdtlck->index >= sdtlck->maxcnt)
+			sdtlck = (dtlock_t *) txLinelock(sdtlck);
+		slv = (lv_t *) & sdtlck->lv[sdtlck->index];
+		n = skip >> L2DTSLOTSIZE;
+		slv->offset = sp->header.stblindex + n;
+		slv->length =
+		    ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1;
+		sdtlck->index++;
+	}
+	/*
+	 * the skipped index was on the right page,
+	 */
+	else {
+		/* adjust the skip index to reflect the new position */
+		skip -= nxt;
+
+		/* insert the new entry in the right page */
+		dtInsertEntry(rp, skip, split->key, split->data, &rdtlck);
+	}
+
+      out:
+	*rmpp = rmp;
+	*rpxdp = *pxd;
+
+	ip->i_blocks += LBLK2PBLK(sb, lengthPXD(pxd));
+
+	jEVENT(0, ("dtSplitPage: ip:0x%p sp:0x%p rp:0x%p\n", ip, sp, rp));
+	return 0;
+}
+
+
+/*
+ *	dtExtendPage()
+ *
+ * function: extend 1st/only directory leaf page
+ *
+ * parameter:
+ *
+ * return: 0 - success;
+ *	   errno - failure;
+ *	return extended page pinned;
+ */
+static int dtExtendPage(tid_t tid,
+	     struct inode *ip, dtsplit_t * split, btstack_t * btstack)
+{
+	struct super_block *sb = ip->i_sb;
+	int rc;
+	metapage_t *smp, *pmp, *mp;
+	dtpage_t *sp, *pp;
+	pxdlist_t *pxdlist;
+	pxd_t *pxd, *tpxd;
+	int xlen, xsize;
+	int newstblindex, newstblsize;
+	int oldstblindex, oldstblsize;
+	int fsi, last;
+	dtslot_t *f;
+	btframe_t *parent;
+	int n;
+	dtlock_t *dtlck;
+	s64 xaddr, txaddr;
+	tlock_t *tlck;
+	pxdlock_t *pxdlock;
+	lv_t *lv;
+	uint type;
+	ldtentry_t *ldtentry;
+	u8 *stbl;
+
+	/* get page to extend */
+	smp = split->mp;
+	sp = DT_PAGE(ip, smp);
+
+	/* get parent/root page */
+	parent = BT_POP(btstack);
+	DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc);
+	if (rc)
+		return (rc);
+
+	/*
+	 *      extend the extent
+	 */
+	pxdlist = split->pxdlist;
+	pxd = &pxdlist->pxd[pxdlist->npxd];
+	pxdlist->npxd++;
+
+	xaddr = addressPXD(pxd);
+	tpxd = &sp->header.self;
+	txaddr = addressPXD(tpxd);
+	/* in-place extension */
+	if (xaddr == txaddr) {
+		type = tlckEXTEND;
+	}
+	/* relocation */
+	else {
+		type = tlckNEW;
+
+		/* save moved extent descriptor for later free */
+		tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE);
+		pxdlock = (pxdlock_t *) & tlck->lock;
+		pxdlock->flag = mlckFREEPXD;
+		pxdlock->pxd = sp->header.self;
+		pxdlock->index = 1;
+
+		/*
+		 * Update directory index table to reflect new page address
+		 */
+		if (DO_INDEX(ip)) {
+			mp = 0;
+			stbl = DT_GETSTBL(sp);
+			for (n = 0; n < sp->header.nextindex; n++) {
+				ldtentry =
+				    (ldtentry_t *) & sp->slot[stbl[n]];
+				modify_index(tid, ip,
+					     le32_to_cpu(ldtentry->index),
+					     xaddr, n, &mp);
+			}
+			if (mp)
+				release_metapage(mp);
+		}
+	}
+
+	/*
+	 *      extend the page
+	 */
+	sp->header.self = *pxd;
+
+	jEVENT(0,
+	       ("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p\n", ip, smp, sp));
+
+	BT_MARK_DIRTY(smp, ip);
+	/*
+	 * acquire a transaction lock on the extended/leaf page
+	 */
+	tlck = txLock(tid, ip, smp, tlckDTREE | type);
+	dtlck = (dtlock_t *) & tlck->lock;
+	lv = (lv_t *) & dtlck->lv[0];
+
+	/* update buffer extent descriptor of extended page */
+	xlen = lengthPXD(pxd);
+	xsize = xlen << JFS_SBI(sb)->l2bsize;
+#ifdef _STILL_TO_PORT
+	bmSetXD(smp, xaddr, xsize);
+#endif				/*  _STILL_TO_PORT */
+
+	/*
+	 * copy old stbl to new stbl at start of extended area
+	 */
+	oldstblindex = sp->header.stblindex;
+	oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE;
+	newstblindex = sp->header.maxslot;
+	n = xsize >> L2DTSLOTSIZE;
+	newstblsize = (n + 31) >> L2DTSLOTSIZE;
+	memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex],
+	       sp->header.nextindex);
+
+	/*
+	 * in-line extension: linelock old area of extended page
+	 */
+	if (type == tlckEXTEND) {
+		/* linelock header */
+		lv->offset = 0;
+		lv->length = 1;
+		dtlck->index++;
+		lv++;
+
+		/* linelock new stbl of extended page */
+		lv->offset = newstblindex;
+		lv->length = newstblsize;
+	}
+	/*
+	 * relocation: linelock whole relocated area
+	 */
+	else {
+		lv->offset = 0;
+		lv->length = sp->header.maxslot + newstblsize;
+	}
+
+	dtlck->index++;
+
+	sp->header.maxslot = n;
+	sp->header.stblindex = newstblindex;
+	/* sp->header.nextindex remains the same */
+
+	/*
+	 * add old stbl region at head of freelist
+	 */
+	fsi = oldstblindex;
+	f = &sp->slot[fsi];
+	last = sp->header.freelist;
+	for (n = 0; n < oldstblsize; n++, fsi++, f++) {
+		f->next = last;
+		last = fsi;
+	}
+	sp->header.freelist = last;
+	sp->header.freecnt += oldstblsize;
+
+	/*
+	 * append free region of newly extended area at tail of freelist
+	 */
+	/* init free region of newly extended area */
+	fsi = n = newstblindex + newstblsize;
+	f = &sp->slot[fsi];
+	for (fsi++; fsi < sp->header.maxslot; f++, fsi++)
+		f->next = fsi;
+	f->next = -1;
+
+	/* append new free region at tail of old freelist */
+	fsi = sp->header.freelist;
+	if (fsi == -1)
+		sp->header.freelist = n;
+	else {
+		do {
+			f = &sp->slot[fsi];
+			fsi = f->next;
+		} while (fsi != -1);
+
+		f->next = n;
+	}
+
+	sp->header.freecnt += sp->header.maxslot - n;
+
+	/*
+	 * insert the new entry
+	 */
+	dtInsertEntry(sp, split->index, split->key, split->data, &dtlck);
+
+	BT_MARK_DIRTY(pmp, ip);
+	/*
+	 * linelock any freeslots residing in old extent
+	 */
+	if (type == tlckEXTEND) {
+		n = sp->header.maxslot >> 2;
+		if (sp->header.freelist < n)
+			dtLinelockFreelist(sp, n, &dtlck);
+	}
+
+	/*
+	 *      update parent entry on the parent/root page
+	 */
+	/*
+	 * acquire a transaction lock on the parent/root page
+	 */
+	tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
+	dtlck = (dtlock_t *) & tlck->lock;
+	lv = (lv_t *) & dtlck->lv[dtlck->index];
+
+	/* linelock parent entry - 1st slot */
+	lv->offset = 1;
+	lv->length = 1;
+	dtlck->index++;
+
+	/* update the parent pxd for page extension */
+	tpxd = (pxd_t *) & pp->slot[1];
+	*tpxd = *pxd;
+
+	/* Since the directory might have an EA and/or ACL associated with it
+	 * we need to make sure we take that into account when setting the
+	 * i_nblocks
+	 */
+	ip->i_blocks = LBLK2PBLK(ip->i_sb, xlen +
+				 ((JFS_IP(ip)->ea.flag & DXD_EXTENT) ?
+				  lengthDXD(&JFS_IP(ip)->ea) : 0) +
+				 ((JFS_IP(ip)->acl.flag & DXD_EXTENT) ?
+				  lengthDXD(&JFS_IP(ip)->acl) : 0));
+
+	jEVENT(0,
+	       ("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p\n", ip, smp, sp));
+
+
+	DT_PUTPAGE(pmp);
+	return 0;
+}
+
+
+/*
+ *	dtSplitRoot()
+ *
+ * function:
+ *	split the full root page into
+ *	original/root/split page and new right page
+ *	i.e., root remains fixed in tree anchor (inode) and
+ *	the root is copied to a single new right child page
+ *	since root page << non-root page, and
+ *	the split root page contains a single entry for the
+ *	new right child page.
+ *
+ * parameter:
+ *
+ * return: 0 - success;
+ *	   errno - failure;
+ *	return new page pinned;
+ */
+static int dtSplitRoot(tid_t tid,
+	    struct inode *ip, dtsplit_t * split, metapage_t ** rmpp)
+{
+	struct super_block *sb = ip->i_sb;
+	metapage_t *smp;
+	dtroot_t *sp;
+	metapage_t *rmp;
+	dtpage_t *rp;
+	s64 rbn;
+	int xlen;
+	int xsize;
+	dtslot_t *f;
+	s8 *stbl;
+	int fsi, stblsize, n;
+	idtentry_t *s;
+	pxd_t *ppxd;
+	pxdlist_t *pxdlist;
+	pxd_t *pxd;
+	dtlock_t *dtlck;
+	tlock_t *tlck;
+	lv_t *lv;
+
+	/* get split root page */
+	smp = split->mp;
+	sp = &JFS_IP(ip)->i_dtroot;
+
+	/*
+	 *      allocate/initialize a single (right) child page
+	 *
+	 * N.B. at first split, a one (or two) block to fit new entry
+	 * is allocated; at subsequent split, a full page is allocated;
+	 */
+	pxdlist = split->pxdlist;
+	pxd = &pxdlist->pxd[pxdlist->npxd];
+	pxdlist->npxd++;
+	rbn = addressPXD(pxd);
+	xlen = lengthPXD(pxd);
+	xsize = xlen << JFS_SBI(sb)->l2bsize;
+	rmp = get_metapage(ip, rbn, xsize, 1);
+	rp = rmp->data;
+
+	BT_MARK_DIRTY(rmp, ip);
+	/*
+	 * acquire a transaction lock on the new right page
+	 */
+	tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW);
+	dtlck = (dtlock_t *) & tlck->lock;
+
+	rp->header.flag =
+	    (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
+	rp->header.self = *pxd;
+
+	/* initialize sibling pointers */
+	rp->header.next = 0;
+	rp->header.prev = 0;
+
+	/*
+	 *      move in-line root page into new right page extent
+	 */
+	/* linelock header + copied entries + new stbl (1st slot) in new page */
+	ASSERT(dtlck->index == 0);
+	lv = (lv_t *) & dtlck->lv[0];
+	lv->offset = 0;
+	lv->length = 10;	/* 1 + 8 + 1 */
+	dtlck->index++;
+
+	n = xsize >> L2DTSLOTSIZE;
+	rp->header.maxslot = n;
+	stblsize = (n + 31) >> L2DTSLOTSIZE;
+
+	/* copy old stbl to new stbl at start of extended area */
+	rp->header.stblindex = DTROOTMAXSLOT;
+	stbl = (s8 *) & rp->slot[DTROOTMAXSLOT];
+	memcpy(stbl, sp->header.stbl, sp->header.nextindex);
+	rp->header.nextindex = sp->header.nextindex;
+
+	/* copy old data area to start of new data area */
+	memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE);
+
+	/*
+	 * append free region of newly extended area at tail of freelist
+	 */
+	/* init free region of newly extended area */
+	fsi = n = DTROOTMAXSLOT + stblsize;
+	f = &rp->slot[fsi];
+	for (fsi++; fsi < rp->header.maxslot; f++, fsi++)
+		f->next = fsi;
+	f->next = -1;
+
+	/* append new free region at tail of old freelist */
+	fsi = sp->header.freelist;
+	if (fsi == -1)
+		rp->header.freelist = n;
+	else {
+		rp->header.freelist = fsi;
+
+		do {
+			f = &rp->slot[fsi];
+			fsi = f->next;
+		} while (fsi != -1);
+
+		f->next = n;
+	}
+
+	rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n;
+
+	/*
+	 * Update directory index table for entries now in right page
+	 */
+	if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) {
+		metapage_t *mp = 0;
+		ldtentry_t *ldtentry;
+
+		stbl = DT_GETSTBL(rp);
+		for (n = 0; n < rp->header.nextindex; n++) {
+			ldtentry = (ldtentry_t *) & rp->slot[stbl[n]];
+			modify_index(tid, ip, le32_to_cpu(ldtentry->index),
+				     rbn, n, &mp);
+		}
+		if (mp)
+			release_metapage(mp);
+	}
+	/*
+	 * insert the new entry into the new right/child page
+	 * (skip index in the new right page will not change)
+	 */
+	dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
+
+	/*
+	 *      reset parent/root page
+	 *
+	 * set the 1st entry offset to 0, which force the left-most key
+	 * at any level of the tree to be less than any search key.
+	 *
+	 * The btree comparison code guarantees that the left-most key on any
+	 * level of the tree is never used, so it doesn't need to be filled in.
+	 */
+	BT_MARK_DIRTY(smp, ip);
+	/*
+	 * acquire a transaction lock on the root page (in-memory inode)
+	 */
+	tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT);
+	dtlck = (dtlock_t *) & tlck->lock;
+
+	/* linelock root */
+	ASSERT(dtlck->index == 0);
+	lv = (lv_t *) & dtlck->lv[0];
+	lv->offset = 0;
+	lv->length = DTROOTMAXSLOT;
+	dtlck->index++;
+
+	/* update page header of root */
+	if (sp->header.flag & BT_LEAF) {
+		sp->header.flag &= ~BT_LEAF;
+		sp->header.flag |= BT_INTERNAL;
+	}
+
+	/* init the first entry */
+	s = (idtentry_t *) & sp->slot[DTENTRYSTART];
+	ppxd = (pxd_t *) s;
+	*ppxd = *pxd;
+	s->next = -1;
+	s->namlen = 0;
+
+	stbl = sp->header.stbl;
+	stbl[0] = DTENTRYSTART;
+	sp->header.nextindex = 1;
+
+	/* init freelist */
+	fsi = DTENTRYSTART + 1;
+	f = &sp->slot[fsi];
+
+	/* init free region of remaining area */
+	for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
+		f->next = fsi;
+	f->next = -1;
+
+	sp->header.freelist = DTENTRYSTART + 1;
+	sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1);
+
+	*rmpp = rmp;
+
+	ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
+	return 0;
+}
+
+
+/*
+ *	dtDelete()
+ *
+ * function: delete the entry(s) referenced by a key.
+ *
+ * parameter:
+ *
+ * return:
+ */
+int dtDelete(tid_t tid,
+	 struct inode *ip, component_t * key, ino_t * ino, int flag)
+{
+	int rc = 0;
+	s64 bn;
+	metapage_t *mp, *imp;
+	dtpage_t *p;
+	int index;
+	btstack_t btstack;
+	dtlock_t *dtlck;
+	tlock_t *tlck;
+	lv_t *lv;
+	int i;
+	ldtentry_t *ldtentry;
+	u8 *stbl;
+	u32 table_index, next_index;
+	metapage_t *nmp;
+	dtpage_t *np;
+
+	/*
+	 *      search for the entry to delete:
+	 *
+	 * dtSearch() returns (leaf page pinned, index at which to delete).
+	 */
+	if ((rc = dtSearch(ip, key, ino, &btstack, flag)))
+		return rc;
+
+	/* retrieve search result */
+	DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+
+	/*
+	 * We need to find put the index of the next entry into the
+	 * directory index table in order to resume a readdir from this
+	 * entry.
+	 */
+	if (DO_INDEX(ip)) {
+		stbl = DT_GETSTBL(p);
+		ldtentry = (ldtentry_t *) & p->slot[stbl[index]];
+		table_index = le32_to_cpu(ldtentry->index);
+		if (index == (p->header.nextindex - 1)) {
+			/*
+			 * Last entry in this leaf page
+			 */
+			if ((p->header.flag & BT_ROOT)
+			    || (p->header.next == 0))
+				next_index = -1;
+			else {
+				/* Read next leaf page */
+				DT_GETPAGE(ip, le64_to_cpu(p->header.next),
+					   nmp, PSIZE, np, rc);
+				if (rc)
+					next_index = -1;
+				else {
+					stbl = DT_GETSTBL(np);
+					ldtentry =
+					    (ldtentry_t *) & np->
+					    slot[stbl[0]];
+					next_index =
+					    le32_to_cpu(ldtentry->index);
+					DT_PUTPAGE(nmp);
+				}
+			}
+		} else {
+			ldtentry =
+			    (ldtentry_t *) & p->slot[stbl[index + 1]];
+			next_index = le32_to_cpu(ldtentry->index);
+		}
+		free_index(tid, ip, table_index, next_index);
+	}
+	/*
+	 * the leaf page becomes empty, delete the page
+	 */
+	if (p->header.nextindex == 1) {
+		/* delete empty page */
+		rc = dtDeleteUp(tid, ip, mp, p, &btstack);
+	}
+	/*
+	 * the leaf page has other entries remaining:
+	 *
+	 * delete the entry from the leaf page.
+	 */
+	else {
+		BT_MARK_DIRTY(mp, ip);
+		/*
+		 * acquire a transaction lock on the leaf page
+		 */
+		tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
+		dtlck = (dtlock_t *) & tlck->lock;
+
+		/*
+		 * Do not assume that dtlck->index will be zero.  During a
+		 * rename within a directory, this transaction may have
+		 * modified this page already when adding the new entry.
+		 */
+
+		/* linelock header */
+		if (dtlck->index >= dtlck->maxcnt)
+			dtlck = (dtlock_t *) txLinelock(dtlck);
+		lv = (lv_t *) & dtlck->lv[dtlck->index];
+		lv->offset = 0;
+		lv->length = 1;
+		dtlck->index++;
+
+		/* linelock stbl of non-root leaf page */
+		if (!(p->header.flag & BT_ROOT)) {
+			if (dtlck->index >= dtlck->maxcnt)
+				dtlck = (dtlock_t *) txLinelock(dtlck);
+			lv = (lv_t *) & dtlck->lv[dtlck->index];
+			i = index >> L2DTSLOTSIZE;
+			lv->offset = p->header.stblindex + i;
+			lv->length =
+			    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
+			    i + 1;
+			dtlck->index++;
+		}
+
+		/* free the leaf entry */
+		dtDeleteEntry(p, index, &dtlck);
+
+		/*
+		 * Update directory index table for entries moved in stbl
+		 */
+		if (DO_INDEX(ip) && index < p->header.nextindex) {
+			imp = 0;
+			stbl = DT_GETSTBL(p);
+			for (i = index; i < p->header.nextindex; i++) {
+				ldtentry =
+				    (ldtentry_t *) & p->slot[stbl[i]];
+				modify_index(tid, ip,
+					     le32_to_cpu(ldtentry->index),
+					     bn, i, &imp);
+			}
+			if (imp)
+				release_metapage(imp);
+		}
+
+		DT_PUTPAGE(mp);
+	}
+
+	return rc;
+}
+
+
+/*
+ *	dtDeleteUp()
+ *
+ * function:
+ *	free empty pages as propagating deletion up the tree
+ *
+ * parameter:
+ *
+ * return:
+ */
+static int dtDeleteUp(tid_t tid, struct inode *ip,
+	   metapage_t * fmp, dtpage_t * fp, btstack_t * btstack)
+{
+	int rc = 0;
+	metapage_t *mp;
+	dtpage_t *p;
+	int index, nextindex;
+	int xlen;
+	btframe_t *parent;
+	dtlock_t *dtlck;
+	tlock_t *tlck;
+	lv_t *lv;
+	pxdlock_t *pxdlock;
+	int i;
+
+	/*
+	 *      keep the root leaf page which has become empty
+	 */
+	if (BT_IS_ROOT(fmp)) {
+		/*
+		 * reset the root
+		 *
+		 * dtInitRoot() acquires txlock on the root
+		 */
+		dtInitRoot(tid, ip, PARENT(ip));
+
+		DT_PUTPAGE(fmp);
+
+		return 0;
+	}
+
+	/*
+	 *      free the non-root leaf page
+	 */
+	/*
+	 * acquire a transaction lock on the page
+	 *
+	 * write FREEXTENT|NOREDOPAGE log record
+	 * N.B. linelock is overlaid as freed extent descriptor, and
+	 * the buffer page is freed;
+	 */
+	tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
+	pxdlock = (pxdlock_t *) & tlck->lock;
+	pxdlock->flag = mlckFREEPXD;
+	pxdlock->pxd = fp->header.self;
+	pxdlock->index = 1;
+
+	/* update sibling pointers */
+	if ((rc = dtRelink(tid, ip, fp)))
+		return rc;
+
+	xlen = lengthPXD(&fp->header.self);
+	ip->i_blocks -= LBLK2PBLK(ip->i_sb, xlen);
+
+	/* free/invalidate its buffer page */
+	discard_metapage(fmp);
+
+	/*
+	 *      propagate page deletion up the directory tree
+	 *
+	 * If the delete from the parent page makes it empty,
+	 * continue all the way up the tree.
+	 * stop if the root page is reached (which is never deleted) or
+	 * if the entry deletion does not empty the page.
+	 */
+	while ((parent = BT_POP(btstack)) != NULL) {
+		/* pin the parent page <sp> */
+		DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+
+		/*
+		 * free the extent of the child page deleted
+		 */
+		index = parent->index;
+
+		/*
+		 * delete the entry for the child page from parent
+		 */
+		nextindex = p->header.nextindex;
+
+		/*
+		 * the parent has the single entry being deleted:
+		 *
+		 * free the parent page which has become empty.
+		 */
+		if (nextindex == 1) {
+			/*
+			 * keep the root internal page which has become empty
+			 */
+			if (p->header.flag & BT_ROOT) {
+				/*
+				 * reset the root
+				 *
+				 * dtInitRoot() acquires txlock on the root
+				 */
+				dtInitRoot(tid, ip, PARENT(ip));
+
+				DT_PUTPAGE(mp);
+
+				return 0;
+			}
+			/*
+			 * free the parent page
+			 */
+			else {
+				/*
+				 * acquire a transaction lock on the page
+				 *
+				 * write FREEXTENT|NOREDOPAGE log record
+				 */
+				tlck =
+				    txMaplock(tid, ip,
+					      tlckDTREE | tlckFREE);
+				pxdlock = (pxdlock_t *) & tlck->lock;
+				pxdlock->flag = mlckFREEPXD;
+				pxdlock->pxd = p->header.self;
+				pxdlock->index = 1;
+
+				/* update sibling pointers */
+				if ((rc = dtRelink(tid, ip, p)))
+					return rc;
+
+				xlen = lengthPXD(&p->header.self);
+				ip->i_blocks -= LBLK2PBLK(ip->i_sb, xlen);
+
+				/* free/invalidate its buffer page */
+				discard_metapage(mp);
+
+				/* propagate up */
+				continue;
+			}
+		}
+
+		/*
+		 * the parent has other entries remaining:
+		 *
+		 * delete the router entry from the parent page.
+		 */
+		BT_MARK_DIRTY(mp, ip);
+		/*
+		 * acquire a transaction lock on the page
+		 *
+		 * action: router entry deletion
+		 */
+		tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
+		dtlck = (dtlock_t *) & tlck->lock;
+
+		/* linelock header */
+		if (dtlck->index >= dtlck->maxcnt)
+			dtlck = (dtlock_t *) txLinelock(dtlck);
+		lv = (lv_t *) & dtlck->lv[dtlck->index];
+		lv->offset = 0;
+		lv->length = 1;
+		dtlck->index++;
+
+		/* linelock stbl of non-root leaf page */
+		if (!(p->header.flag & BT_ROOT)) {
+			if (dtlck->index < dtlck->maxcnt)
+				lv++;
+			else {
+				dtlck = (dtlock_t *) txLinelock(dtlck);
+				lv = (lv_t *) & dtlck->lv[0];
+			}
+			i = index >> L2DTSLOTSIZE;
+			lv->offset = p->header.stblindex + i;
+			lv->length =
+			    ((p->header.nextindex - 1) >> L2DTSLOTSIZE) -
+			    i + 1;
+			dtlck->index++;
+		}
+
+		/* free the router entry */
+		dtDeleteEntry(p, index, &dtlck);
+
+		/* reset key of new leftmost entry of level (for consistency) */
+		if (index == 0 &&
+		    ((p->header.flag & BT_ROOT) || p->header.prev == 0))
+			dtTruncateEntry(p, 0, &dtlck);
+
+		/* unpin the parent page */
+		DT_PUTPAGE(mp);
+
+		/* exit propagation up */
+		break;
+	}
+
+	return 0;
+}
+
+
+/*
+ * NAME:        dtRelocate()
+ *
+ * FUNCTION:    relocate dtpage (internal or leaf) of directory;
+ *              This function is mainly used by defragfs utility.
+ */
+int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
+	       s64 nxaddr)
+{
+	int rc = 0;
+	metapage_t *mp, *pmp, *lmp, *rmp;
+	dtpage_t *p, *pp, *rp = 0, *lp= 0;
+	s64 bn;
+	int index;
+	btstack_t btstack;
+	pxd_t *pxd;
+	s64 oxaddr, nextbn, prevbn;
+	int xlen, xsize;
+	tlock_t *tlck;
+	dtlock_t *dtlck;
+	pxdlock_t *pxdlock;
+	s8 *stbl;
+	lv_t *lv;
+
+	oxaddr = addressPXD(opxd);
+	xlen = lengthPXD(opxd);
+
+	jEVENT(0, ("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d\n",
+		   lmxaddr, oxaddr, nxaddr, xlen));
+
+	/*
+	 *      1. get the internal parent dtpage covering
+	 *      router entry for the tartget page to be relocated;
+	 */
+	rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
+	if (rc)
+		return rc;
+
+	/* retrieve search result */
+	DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
+	jEVENT(0, ("dtRelocate: parent router entry validated.\n"));
+
+	/*
+	 *      2. relocate the target dtpage
+	 */
+	/* read in the target page from src extent */
+	DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
+	if (rc) {
+		/* release the pinned parent page */
+		DT_PUTPAGE(pmp);
+		return rc;
+	}
+
+	/*
+	 * read in sibling pages if any to update sibling pointers;
+	 */
+	rmp = NULL;
+	if (p->header.next) {
+		nextbn = le64_to_cpu(p->header.next);
+		DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
+		if (rc) {
+			DT_PUTPAGE(mp);
+			DT_PUTPAGE(pmp);
+			return (rc);
+		}
+	}
+
+	lmp = NULL;
+	if (p->header.prev) {
+		prevbn = le64_to_cpu(p->header.prev);
+		DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
+		if (rc) {
+			DT_PUTPAGE(mp);
+			DT_PUTPAGE(pmp);
+			if (rmp)
+				DT_PUTPAGE(rmp);
+			return (rc);
+		}
+	}
+
+	/* at this point, all xtpages to be updated are in memory */
+
+	/*
+	 * update sibling pointers of sibling dtpages if any;
+	 */
+	if (lmp) {
+		tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK);
+		dtlck = (dtlock_t *) & tlck->lock;
+		/* linelock header */
+		ASSERT(dtlck->index == 0);
+		lv = (lv_t *) & dtlck->lv[0];
+		lv->offset = 0;
+		lv->length = 1;
+		dtlck->index++;
+
+		lp->header.next = cpu_to_le64(nxaddr);
+		DT_PUTPAGE(lmp);
+	}
+
+	if (rmp) {
+		tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK);
+		dtlck = (dtlock_t *) & tlck->lock;
+		/* linelock header */
+		ASSERT(dtlck->index == 0);
+		lv = (lv_t *) & dtlck->lv[0];
+		lv->offset = 0;
+		lv->length = 1;
+		dtlck->index++;
+
+		rp->header.prev = cpu_to_le64(nxaddr);
+		DT_PUTPAGE(rmp);
+	}
+
+	/*
+	 * update the target dtpage to be relocated
+	 *
+	 * write LOG_REDOPAGE of LOG_NEW type for dst page
+	 * for the whole target page (logredo() will apply
+	 * after image and update bmap for allocation of the
+	 * dst extent), and update bmap for allocation of
+	 * the dst extent;
+	 */
+	tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW);
+	dtlck = (dtlock_t *) & tlck->lock;
+	/* linelock header */
+	ASSERT(dtlck->index == 0);
+	lv = (lv_t *) & dtlck->lv[0];
+
+	/* update the self address in the dtpage header */
+	pxd = &p->header.self;
+	PXDaddress(pxd, nxaddr);
+
+	/* the dst page is the same as the src page, i.e.,
+	 * linelock for afterimage of the whole page;
+	 */
+	lv->offset = 0;
+	lv->length = p->header.maxslot;
+	dtlck->index++;
+
+	/* update the buffer extent descriptor of the dtpage */
+	xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
+#ifdef _STILL_TO_PORT
+	bmSetXD(mp, nxaddr, xsize);
+#endif /* _STILL_TO_PORT */
+	/* unpin the relocated page */
+	DT_PUTPAGE(mp);
+	jEVENT(0, ("dtRelocate: target dtpage relocated.\n"));
+
+	/* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec
+	 * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec
+	 * will also force a bmap update ).
+	 */
+
+	/*
+	 *      3. acquire maplock for the source extent to be freed;
+	 */
+	/* for dtpage relocation, write a LOG_NOREDOPAGE record
+	 * for the source dtpage (logredo() will init NoRedoPage
+	 * filter and will also update bmap for free of the source
+	 * dtpage), and upadte bmap for free of the source dtpage;
+	 */
+	tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE);
+	pxdlock = (pxdlock_t *) & tlck->lock;
+	pxdlock->flag = mlckFREEPXD;
+	PXDaddress(&pxdlock->pxd, oxaddr);
+	PXDlength(&pxdlock->pxd, xlen);
+	pxdlock->index = 1;
+
+	/*
+	 *      4. update the parent router entry for relocation;
+	 *
+	 * acquire tlck for the parent entry covering the target dtpage;
+	 * write LOG_REDOPAGE to apply after image only;
+	 */
+	jEVENT(0, ("dtRelocate: update parent router entry.\n"));
+	tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY);
+	dtlck = (dtlock_t *) & tlck->lock;
+	lv = (lv_t *) & dtlck->lv[dtlck->index];
+
+	/* update the PXD with the new address */
+	stbl = DT_GETSTBL(pp);
+	pxd = (pxd_t *) & pp->slot[stbl[index]];
+	PXDaddress(pxd, nxaddr);
+	lv->offset = stbl[index];
+	lv->length = 1;
+	dtlck->index++;
+
+	/* unpin the parent dtpage */
+	DT_PUTPAGE(pmp);
+
+	return rc;
+}
+
+
+/*
+ * NAME:	dtSearchNode()
+ *
+ * FUNCTION:	Search for an dtpage containing a specified address
+ *              This function is mainly used by defragfs utility.
+ *
+ * NOTE:	Search result on stack, the found page is pinned at exit.
+ *		The result page must be an internal dtpage.
+ *		lmxaddr give the address of the left most page of the
+ *		dtree level, in which the required dtpage resides.
+ */
+static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
+			btstack_t * btstack)
+{
+	int rc = 0;
+	s64 bn;
+	metapage_t *mp;
+	dtpage_t *p;
+	int psize = 288;	/* initial in-line directory */
+	s8 *stbl;
+	int i;
+	pxd_t *pxd;
+	btframe_t *btsp;
+
+	BT_CLR(btstack);	/* reset stack */
+
+	/*
+	 *      descend tree to the level with specified leftmost page
+	 *
+	 *  by convention, root bn = 0.
+	 */
+	for (bn = 0;;) {
+		/* get/pin the page to search */
+		DT_GETPAGE(ip, bn, mp, psize, p, rc);
+		if (rc)
+			return rc;
+
+		/* does the xaddr of leftmost page of the levevl
+		 * matches levevl search key ?
+		 */
+		if (p->header.flag & BT_ROOT) {
+			if (lmxaddr == 0)
+				break;
+		} else if (addressPXD(&p->header.self) == lmxaddr)
+			break;
+
+		/*
+		 * descend down to leftmost child page
+		 */
+		if (p->header.flag & BT_LEAF)
+			return ESTALE;
+
+		/* get the leftmost entry */
+		stbl = DT_GETSTBL(p);
+		pxd = (pxd_t *) & p->slot[stbl[0]];
+
+		/* get the child page block address */
+		bn = addressPXD(pxd);
+		psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize;
+		/* unpin the parent page */
+		DT_PUTPAGE(mp);
+	}
+
+	/*
+	 *      search each page at the current levevl
+	 */
+      loop:
+	stbl = DT_GETSTBL(p);
+	for (i = 0; i < p->header.nextindex; i++) {
+		pxd = (pxd_t *) & p->slot[stbl[i]];
+
+		/* found the specified router entry */
+		if (addressPXD(pxd) == addressPXD(kpxd) &&
+		    lengthPXD(pxd) == lengthPXD(kpxd)) {
+			btsp = btstack->top;
+			btsp->bn = bn;
+			btsp->index = i;
+			btsp->mp = mp;
+
+			return 0;
+		}
+	}
+
+	/* get the right sibling page if any */
+	if (p->header.next)
+		bn = le64_to_cpu(p->header.next);
+	else {
+		DT_PUTPAGE(mp);
+		return ESTALE;
+	}
+
+	/* unpin current page */
+	DT_PUTPAGE(mp);
+
+	/* get the right sibling page */
+	DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	if (rc)
+		return rc;
+
+	goto loop;
+}
+
+
+/*
+ *	dtRelink()
+ *
+ * function:
+ *	link around a freed page.
+ *
+ * parameter:
+ *	fp:	page to be freed
+ *
+ * return:
+ */
+static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p)
+{
+	int rc;
+	metapage_t *mp;
+	s64 nextbn, prevbn;
+	tlock_t *tlck;
+	dtlock_t *dtlck;
+	lv_t *lv;
+
+	nextbn = le64_to_cpu(p->header.next);
+	prevbn = le64_to_cpu(p->header.prev);
+
+	/* update prev pointer of the next page */
+	if (nextbn != 0) {
+		DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+
+		BT_MARK_DIRTY(mp, ip);
+		/*
+		 * acquire a transaction lock on the next page
+		 *
+		 * action: update prev pointer;
+		 */
+		tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
+		jEVENT(0,
+		       ("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p\n",
+			tlck, ip, mp));
+		dtlck = (dtlock_t *) & tlck->lock;
+
+		/* linelock header */
+		if (dtlck->index >= dtlck->maxcnt)
+			dtlck = (dtlock_t *) txLinelock(dtlck);
+		lv = (lv_t *) & dtlck->lv[dtlck->index];
+		lv->offset = 0;
+		lv->length = 1;
+		dtlck->index++;
+
+		p->header.prev = cpu_to_le64(prevbn);
+		DT_PUTPAGE(mp);
+	}
+
+	/* update next pointer of the previous page */
+	if (prevbn != 0) {
+		DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+
+		BT_MARK_DIRTY(mp, ip);
+		/*
+		 * acquire a transaction lock on the prev page
+		 *
+		 * action: update next pointer;
+		 */
+		tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK);
+		jEVENT(0,
+		       ("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p\n",
+			tlck, ip, mp));
+		dtlck = (dtlock_t *) & tlck->lock;
+
+		/* linelock header */
+		if (dtlck->index >= dtlck->maxcnt)
+			dtlck = (dtlock_t *) txLinelock(dtlck);
+		lv = (lv_t *) & dtlck->lv[dtlck->index];
+		lv->offset = 0;
+		lv->length = 1;
+		dtlck->index++;
+
+		p->header.next = cpu_to_le64(nextbn);
+		DT_PUTPAGE(mp);
+	}
+
+	return 0;
+}
+
+
+/*
+ *	dtInitRoot()
+ *
+ * initialize directory root (inline in inode)
+ */
+void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot)
+{
+	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+	dtroot_t *p;
+	int fsi;
+	dtslot_t *f;
+	tlock_t *tlck;
+	dtlock_t *dtlck;
+	lv_t *lv;
+	u16 xflag_save;
+
+	/*
+	 * If this was previously an non-empty directory, we need to remove
+	 * the old directory table.
+	 */
+	if (DO_INDEX(ip)) {
+		if (jfs_ip->next_index > (MAX_INLINE_DIRTABLE_ENTRY + 1)) {
+			tblock_t *tblk = tid_to_tblock(tid);
+			/*
+			 * We're playing games with the tid's xflag.  If
+			 * we're removing a regular file, the file's xtree
+			 * is committed with COMMIT_PMAP, but we always
+			 * commit the directories xtree with COMMIT_PWMAP.
+			 */
+			xflag_save = tblk->xflag;
+			tblk->xflag = 0;
+			/*
+			 * xtTruncate isn't guaranteed to fully truncate
+			 * the xtree.  The caller needs to check i_size
+			 * after committing the transaction to see if
+			 * additional truncation is needed.  The
+			 * COMMIT_Stale flag tells caller that we
+			 * initiated the truncation.
+			 */
+			xtTruncate(tid, ip, 0, COMMIT_PWMAP);
+			set_cflag(COMMIT_Stale, ip);
+
+			tblk->xflag = xflag_save;
+			/*
+			 * Tells jfs_metapage code that the metadata pages
+			 * for the index table are no longer useful, and
+			 * remove them from page cache.
+			 */
+			invalidate_inode_metapages(ip);
+		} else
+			ip->i_size = 1;
+
+		jfs_ip->next_index = 2;
+	} else
+		ip->i_size = IDATASIZE;
+
+	/*
+	 * acquire a transaction lock on the root
+	 *
+	 * action: directory initialization;
+	 */
+	tlck = txLock(tid, ip, (metapage_t *) & jfs_ip->bxflag,
+		      tlckDTREE | tlckENTRY | tlckBTROOT);
+	dtlck = (dtlock_t *) & tlck->lock;
+
+	/* linelock root */
+	ASSERT(dtlck->index == 0);
+	lv = (lv_t *) & dtlck->lv[0];
+	lv->offset = 0;
+	lv->length = DTROOTMAXSLOT;
+	dtlck->index++;
+
+	p = &jfs_ip->i_dtroot;
+
+	p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
+
+	p->header.nextindex = 0;
+
+	/* init freelist */
+	fsi = 1;
+	f = &p->slot[fsi];
+
+	/* init data area of root */
+	for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++)
+		f->next = fsi;
+	f->next = -1;
+
+	p->header.freelist = 1;
+	p->header.freecnt = 8;
+
+	/* init '..' entry */
+	p->header.idotdot = cpu_to_le32(idotdot);
+
+#if 0
+	ip->i_blocks = LBLK2PBLK(ip->i_sb,
+				 ((jfs_ip->ea.flag & DXD_EXTENT) ?
+				  lengthDXD(&jfs_ip->ea) : 0) +
+				 ((jfs_ip->acl.flag & DXD_EXTENT) ?
+				  lengthDXD(&jfs_ip->acl) : 0));
+#endif
+
+	return;
+}
+
+/*
+ *	jfs_readdir()
+ *
+ * function: read directory entries sequentially
+ *	from the specified entry offset
+ *
+ * parameter:
+ *
+ * return: offset = (pn, index) of start entry
+ *	of next jfs_readdir()/dtRead()
+ */
+int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+	struct inode *ip = filp->f_dentry->d_inode;
+	struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab;
+	int rc = 0;
+	struct dtoffset {
+		s16 pn;
+		s16 index;
+		s32 unused;
+	} *dtoffset = (struct dtoffset *) &filp->f_pos;
+	s64 bn;
+	metapage_t *mp;
+	dtpage_t *p;
+	int index;
+	s8 *stbl;
+	btstack_t btstack;
+	int i, next;
+	ldtentry_t *d;
+	dtslot_t *t;
+	int d_namleft, d_namlen, len, outlen;
+	char *d_name, *name_ptr;
+	int dtlhdrdatalen;
+	u32 dir_index;
+	int do_index = 0;
+	uint loop_count = 0;
+
+	if (filp->f_pos == DIREND)
+		return 0;
+
+	if (DO_INDEX(ip)) {
+		/*
+		 * persistent index is stored in directory entries.
+		 * Special cases:        0 = .
+		 *                       1 = ..
+		 *                      -1 = End of directory
+		 */
+		do_index = 1;
+		dtlhdrdatalen = DTLHDRDATALEN;
+
+		dir_index = (u32) filp->f_pos;
+
+		if (dir_index > 1) {
+			dir_table_slot_t dirtab_slot;
+
+			if (dtEmpty(ip)) {
+				filp->f_pos = DIREND;
+				return 0;
+			}
+		      repeat:
+			rc = get_index(ip, dir_index, &dirtab_slot);
+			if (rc) {
+				filp->f_pos = DIREND;
+				return rc;
+			}
+			if (dirtab_slot.flag == DIR_INDEX_FREE) {
+				if (loop_count++ > JFS_IP(ip)->next_index) {
+					jERROR(1, ("jfs_readdir detected "
+						   "infinite loop!\n"));
+					filp->f_pos = DIREND;
+					return 0;
+				}
+				dir_index = le32_to_cpu(dirtab_slot.addr2);
+				if (dir_index == -1) {
+					filp->f_pos = DIREND;
+					return 0;
+				}
+				goto repeat;
+			}
+			bn = addressDTS(&dirtab_slot);
+			index = dirtab_slot.slot;
+			DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+			if (rc) {
+				filp->f_pos = DIREND;
+				return 0;
+			}
+			if (p->header.flag & BT_INTERNAL) {
+				jERROR(1,("jfs_readdir: bad index table\n"));
+				DT_PUTPAGE(mp);
+				filp->f_pos = -1;
+				return 0;
+			}
+		} else {
+			if (dir_index == 0) {
+				/*
+				 * self "."
+				 */
+				filp->f_pos = 0;
+				if (filldir(dirent, ".", 1, 0, ip->i_ino,
+					    DT_DIR))
+					return 0;
+			}
+			/*
+			 * parent ".."
+			 */
+			filp->f_pos = 1;
+			if (filldir
+			    (dirent, "..", 2, 1, PARENT(ip), DT_DIR))
+				return 0;
+
+			/*
+			 * Find first entry of left-most leaf
+			 */
+			if (dtEmpty(ip)) {
+				filp->f_pos = DIREND;
+				return 0;
+			}
+
+			if ((rc = dtReadFirst(ip, &btstack)))
+				return -rc;
+
+			DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+		}
+	} else {
+		/*
+		 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
+		 *
+		 * pn = index = 0:      First entry "."
+		 * pn = 0; index = 1:   Second entry ".."
+		 * pn > 0:              Real entries, pn=1 -> leftmost page
+		 * pn = index = -1:     No more entries
+		 */
+		dtlhdrdatalen = DTLHDRDATALEN_LEGACY;
+
+		if (filp->f_pos == 0) {
+			/* build "." entry */
+
+			if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino,
+				    DT_DIR))
+				return 0;
+			dtoffset->index = 1;
+		}
+
+		if (dtoffset->pn == 0) {
+			if (dtoffset->index == 1) {
+				/* build ".." entry */
+
+				if (filldir(dirent, "..", 2, filp->f_pos,
+					    PARENT(ip), DT_DIR))
+					return 0;
+			} else {
+				jERROR(1,
+				       ("jfs_readdir called with invalid offset!\n"));
+			}
+			dtoffset->pn = 1;
+			dtoffset->index = 0;
+		}
+
+		if (dtEmpty(ip)) {
+			filp->f_pos = DIREND;
+			return 0;
+		}
+
+		if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) {
+			jERROR(1,
+			       ("jfs_readdir: unexpected rc = %d from dtReadNext\n",
+				rc));
+			filp->f_pos = DIREND;
+			return 0;
+		}
+		/* get start leaf page and index */
+		DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+
+		/* offset beyond directory eof ? */
+		if (bn < 0) {
+			filp->f_pos = DIREND;
+			return 0;
+		}
+	}
+
+	d_name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_NOFS);
+	if (d_name == NULL) {
+		DT_PUTPAGE(mp);
+		jERROR(1, ("jfs_readdir: kmalloc failed!\n"));
+		filp->f_pos = DIREND;
+		return 0;
+	}
+	while (1) {
+		stbl = DT_GETSTBL(p);
+
+		for (i = index; i < p->header.nextindex; i++) {
+			d = (ldtentry_t *) & p->slot[stbl[i]];
+
+			d_namleft = d->namlen;
+			name_ptr = d_name;
+
+			if (do_index) {
+				filp->f_pos = le32_to_cpu(d->index);
+				len = min(d_namleft, DTLHDRDATALEN);
+			} else
+				len = min(d_namleft, DTLHDRDATALEN_LEGACY);
+
+			/* copy the name of head/only segment */
+			outlen = jfs_strfromUCS_le(name_ptr, d->name, len,
+						   codepage);
+			d_namlen = outlen;
+
+			/* copy name in the additional segment(s) */
+			next = d->next;
+			while (next >= 0) {
+				t = (dtslot_t *) & p->slot[next];
+				name_ptr += outlen;
+				d_namleft -= len;
+				/* Sanity Check */
+				if (d_namleft == 0) {
+					jERROR(1,("JFS:Dtree error: "
+					  "ino = %ld, bn=%Ld, index = %d\n",
+						  ip->i_ino, bn, i));
+					updateSuper(ip->i_sb, FM_DIRTY);
+					goto skip_one;
+				}
+				len = min(d_namleft, DTSLOTDATALEN);
+				outlen = jfs_strfromUCS_le(name_ptr, t->name,
+							   len, codepage);
+				d_namlen+= outlen;
+
+				next = t->next;
+			}
+
+			if (filldir(dirent, d_name, d_namlen, filp->f_pos,
+				    le32_to_cpu(d->inumber), DT_UNKNOWN))
+				goto out;
+skip_one:
+			if (!do_index)
+				dtoffset->index++;
+		}
+
+		/*
+		 * get next leaf page
+		 */
+
+		if (p->header.flag & BT_ROOT) {
+			filp->f_pos = DIREND;
+			break;
+		}
+
+		bn = le64_to_cpu(p->header.next);
+		if (bn == 0) {
+			filp->f_pos = DIREND;
+			break;
+		}
+
+		/* unpin previous leaf page */
+		DT_PUTPAGE(mp);
+
+		/* get next leaf page */
+		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+		if (rc) {
+			kfree(d_name);
+			return -rc;
+		}
+
+		/* update offset (pn:index) for new page */
+		index = 0;
+		if (!do_index) {
+			dtoffset->pn++;
+			dtoffset->index = 0;
+		}
+
+	}
+
+      out:
+	kfree(d_name);
+	DT_PUTPAGE(mp);
+
+	return rc;
+}
+
+
+/*
+ *	dtReadFirst()
+ *
+ * function: get the leftmost page of the directory
+ */
+static int dtReadFirst(struct inode *ip, btstack_t * btstack)
+{
+	int rc = 0;
+	s64 bn;
+	int psize = 288;	/* initial in-line directory */
+	metapage_t *mp;
+	dtpage_t *p;
+	s8 *stbl;
+	btframe_t *btsp;
+	pxd_t *xd;
+
+	BT_CLR(btstack);	/* reset stack */
+
+	/*
+	 *      descend leftmost path of the tree
+	 *
+	 * by convention, root bn = 0.
+	 */
+	for (bn = 0;;) {
+		DT_GETPAGE(ip, bn, mp, psize, p, rc);
+		if (rc)
+			return rc;
+
+		/*
+		 * leftmost leaf page
+		 */
+		if (p->header.flag & BT_LEAF) {
+			/* return leftmost entry */
+			btsp = btstack->top;
+			btsp->bn = bn;
+			btsp->index = 0;
+			btsp->mp = mp;
+
+			return 0;
+		}
+
+		/*
+		 * descend down to leftmost child page
+		 */
+		/* push (bn, index) of the parent page/entry */
+		BT_PUSH(btstack, bn, 0);
+
+		/* get the leftmost entry */
+		stbl = DT_GETSTBL(p);
+		xd = (pxd_t *) & p->slot[stbl[0]];
+
+		/* get the child page block address */
+		bn = addressPXD(xd);
+		psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize;
+
+		/* unpin the parent page */
+		DT_PUTPAGE(mp);
+	}
+}
+
+
+/*
+ *	dtReadNext()
+ *
+ * function: get the page of the specified offset (pn:index)
+ *
+ * return: if (offset > eof), bn = -1;
+ *
+ * note: if index > nextindex of the target leaf page,
+ * start with 1st entry of next leaf page;
+ */
+static int dtReadNext(struct inode *ip, loff_t * offset, btstack_t * btstack)
+{
+	int rc = 0;
+	struct dtoffset {
+		s16 pn;
+		s16 index;
+		s32 unused;
+	} *dtoffset = (struct dtoffset *) offset;
+	s64 bn;
+	metapage_t *mp;
+	dtpage_t *p;
+	int index;
+	int pn;
+	s8 *stbl;
+	btframe_t *btsp, *parent;
+	pxd_t *xd;
+
+	/*
+	 * get leftmost leaf page pinned
+	 */
+	if ((rc = dtReadFirst(ip, btstack)))
+		return rc;
+
+	/* get leaf page */
+	DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
+
+	/* get the start offset (pn:index) */
+	pn = dtoffset->pn - 1;	/* Now pn = 0 represents leftmost leaf */
+	index = dtoffset->index;
+
+	/* start at leftmost page ? */
+	if (pn == 0) {
+		/* offset beyond eof ? */
+		if (index < p->header.nextindex)
+			goto out;
+
+		if (p->header.flag & BT_ROOT) {
+			bn = -1;
+			goto out;
+		}
+
+		/* start with 1st entry of next leaf page */
+		dtoffset->pn++;
+		dtoffset->index = index = 0;
+		goto a;
+	}
+
+	/* start at non-leftmost page: scan parent pages for large pn */
+	if (p->header.flag & BT_ROOT) {
+		bn = -1;
+		goto out;
+	}
+
+	/* start after next leaf page ? */
+	if (pn > 1)
+		goto b;
+
+	/* get leaf page pn = 1 */
+      a:
+	bn = le64_to_cpu(p->header.next);
+
+	/* unpin leaf page */
+	DT_PUTPAGE(mp);
+
+	/* offset beyond eof ? */
+	if (bn == 0) {
+		bn = -1;
+		goto out;
+	}
+
+	goto c;
+
+	/*
+	 * scan last internal page level to get target leaf page
+	 */
+      b:
+	/* unpin leftmost leaf page */
+	DT_PUTPAGE(mp);
+
+	/* get left most parent page */
+	btsp = btstack->top;
+	parent = btsp - 1;
+	bn = parent->bn;
+	DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	if (rc)
+		return rc;
+
+	/* scan parent pages at last internal page level */
+	while (pn >= p->header.nextindex) {
+		pn -= p->header.nextindex;
+
+		/* get next parent page address */
+		bn = le64_to_cpu(p->header.next);
+
+		/* unpin current parent page */
+		DT_PUTPAGE(mp);
+
+		/* offset beyond eof ? */
+		if (bn == 0) {
+			bn = -1;
+			goto out;
+		}
+
+		/* get next parent page */
+		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+
+		/* update parent page stack frame */
+		parent->bn = bn;
+	}
+
+	/* get leaf page address */
+	stbl = DT_GETSTBL(p);
+	xd = (pxd_t *) & p->slot[stbl[pn]];
+	bn = addressPXD(xd);
+
+	/* unpin parent page */
+	DT_PUTPAGE(mp);
+
+	/*
+	 * get target leaf page
+	 */
+      c:
+	DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	if (rc)
+		return rc;
+
+	/*
+	 * leaf page has been completed:
+	 * start with 1st entry of next leaf page
+	 */
+	if (index >= p->header.nextindex) {
+		bn = le64_to_cpu(p->header.next);
+
+		/* unpin leaf page */
+		DT_PUTPAGE(mp);
+
+		/* offset beyond eof ? */
+		if (bn == 0) {
+			bn = -1;
+			goto out;
+		}
+
+		/* get next leaf page */
+		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+
+		/* start with 1st entry of next leaf page */
+		dtoffset->pn++;
+		dtoffset->index = 0;
+	}
+
+      out:
+	/* return target leaf page pinned */
+	btsp = btstack->top;
+	btsp->bn = bn;
+	btsp->index = dtoffset->index;
+	btsp->mp = mp;
+
+	return 0;
+}
+
+
+/*
+ *	dtCompare()
+ *
+ * function: compare search key with an internal entry
+ *
+ * return:
+ *	< 0 if k is < record
+ *	= 0 if k is = record
+ *	> 0 if k is > record
+ */
+static int dtCompare(component_t * key,	/* search key */
+		     dtpage_t * p,	/* directory page */
+		     int si)
+{				/* entry slot index */
+	register int rc;
+	register wchar_t *kname, *name;
+	register int klen, namlen, len;
+	idtentry_t *ih;
+	dtslot_t *t;
+
+	/*
+	 * force the left-most key on internal pages, at any level of
+	 * the tree, to be less than any search key.
+	 * this obviates having to update the leftmost key on an internal
+	 * page when the user inserts a new key in the tree smaller than
+	 * anything that has been stored.
+	 *
+	 * (? if/when dtSearch() narrows down to 1st entry (index = 0),
+	 * at any internal page at any level of the tree,
+	 * it descends to child of the entry anyway -
+	 * ? make the entry as min size dummy entry)
+	 *
+	 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
+	 * return (1);
+	 */
+
+	kname = key->name;
+	klen = key->namlen;
+
+	ih = (idtentry_t *) & p->slot[si];
+	si = ih->next;
+	name = ih->name;
+	namlen = ih->namlen;
+	len = min(namlen, DTIHDRDATALEN);
+
+	/* compare with head/only segment */
+	len = min(klen, len);
+	if ((rc = UniStrncmp_le(kname, name, len)))
+		return rc;
+
+	klen -= len;
+	namlen -= len;
+
+	/* compare with additional segment(s) */
+	kname += len;
+	while (klen > 0 && namlen > 0) {
+		/* compare with next name segment */
+		t = (dtslot_t *) & p->slot[si];
+		len = min(namlen, DTSLOTDATALEN);
+		len = min(klen, len);
+		name = t->name;
+		if ((rc = UniStrncmp_le(kname, name, len)))
+			return rc;
+
+		klen -= len;
+		namlen -= len;
+		kname += len;
+		si = t->next;
+	}
+
+	return (klen - namlen);
+}
+
+
+
+
+/*
+ *	ciCompare()
+ *
+ * function: compare search key with an (leaf/internal) entry
+ *
+ * return:
+ *	< 0 if k is < record
+ *	= 0 if k is = record
+ *	> 0 if k is > record
+ */
+static int ciCompare(component_t * key,	/* search key */
+		     dtpage_t * p,	/* directory page */
+		     int si,	/* entry slot index */
+		     int flag)
+{
+	register int rc;
+	register wchar_t *kname, *name, x;
+	register int klen, namlen, len;
+	ldtentry_t *lh;
+	idtentry_t *ih;
+	dtslot_t *t;
+	int i;
+
+	/*
+	 * force the left-most key on internal pages, at any level of
+	 * the tree, to be less than any search key.
+	 * this obviates having to update the leftmost key on an internal
+	 * page when the user inserts a new key in the tree smaller than
+	 * anything that has been stored.
+	 *
+	 * (? if/when dtSearch() narrows down to 1st entry (index = 0),
+	 * at any internal page at any level of the tree,
+	 * it descends to child of the entry anyway -
+	 * ? make the entry as min size dummy entry)
+	 *
+	 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF))
+	 * return (1);
+	 */
+
+	kname = key->name;
+	klen = key->namlen;
+
+	/*
+	 * leaf page entry
+	 */
+	if (p->header.flag & BT_LEAF) {
+		lh = (ldtentry_t *) & p->slot[si];
+		si = lh->next;
+		name = lh->name;
+		namlen = lh->namlen;
+		if (flag & JFS_DIR_INDEX)
+			len = min(namlen, DTLHDRDATALEN);
+		else
+			len = min(namlen, DTLHDRDATALEN_LEGACY);
+	}
+	/*
+	 * internal page entry
+	 */
+	else {
+		ih = (idtentry_t *) & p->slot[si];
+		si = ih->next;
+		name = ih->name;
+		namlen = ih->namlen;
+		len = min(namlen, DTIHDRDATALEN);
+	}
+
+	/* compare with head/only segment */
+	len = min(klen, len);
+	for (i = 0; i < len; i++, kname++, name++) {
+		/* only uppercase if case-insensitive support is on */
+		if ((flag & JFS_OS2) == JFS_OS2)
+			x = UniToupper(le16_to_cpu(*name));
+		else
+			x = le16_to_cpu(*name);
+		if ((rc = *kname - x))
+			return rc;
+	}
+
+	klen -= len;
+	namlen -= len;
+
+	/* compare with additional segment(s) */
+	while (klen > 0 && namlen > 0) {
+		/* compare with next name segment */
+		t = (dtslot_t *) & p->slot[si];
+		len = min(namlen, DTSLOTDATALEN);
+		len = min(klen, len);
+		name = t->name;
+		for (i = 0; i < len; i++, kname++, name++) {
+			/* only uppercase if case-insensitive support is on */
+			if ((flag & JFS_OS2) == JFS_OS2)
+				x = UniToupper(le16_to_cpu(*name));
+			else
+				x = le16_to_cpu(*name);
+
+			if ((rc = *kname - x))
+				return rc;
+		}
+
+		klen -= len;
+		namlen -= len;
+		si = t->next;
+	}
+
+	return (klen - namlen);
+}
+
+
+/*
+ *	ciGetLeafPrefixKey()
+ *
+ * function: compute prefix of suffix compression
+ *	     from two adjacent leaf entries
+ *	     across page boundary
+ *
+ * return:
+ *	Number of prefix bytes needed to distinguish b from a.
+ */
+static void ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp,
+			       int ri, component_t * key, int flag)
+{
+	register int klen, namlen;
+	register wchar_t *pl, *pr, *kname;
+	wchar_t lname[JFS_NAME_MAX + 1];
+	component_t lkey = { 0, lname };
+	wchar_t rname[JFS_NAME_MAX + 1];
+	component_t rkey = { 0, rname };
+
+	/* get left and right key */
+	dtGetKey(lp, li, &lkey, flag);
+	lkey.name[lkey.namlen] = 0;
+
+	if ((flag & JFS_OS2) == JFS_OS2)
+		ciToUpper(&lkey);
+
+	dtGetKey(rp, ri, &rkey, flag);
+	rkey.name[rkey.namlen] = 0;
+
+
+	if ((flag & JFS_OS2) == JFS_OS2)
+		ciToUpper(&rkey);
+
+	/* compute prefix */
+	klen = 0;
+	kname = key->name;
+	namlen = min(lkey.namlen, rkey.namlen);
+	for (pl = lkey.name, pr = rkey.name;
+	     namlen; pl++, pr++, namlen--, klen++, kname++) {
+		*kname = *pr;
+		if (*pl != *pr) {
+			key->namlen = klen + 1;
+			return;
+		}
+	}
+
+	/* l->namlen <= r->namlen since l <= r */
+	if (lkey.namlen < rkey.namlen) {
+		*kname = *pr;
+		key->namlen = klen + 1;
+	} else			/* l->namelen == r->namelen */
+		key->namlen = klen;
+
+	return;
+}
+
+
+
+/*
+ *	dtGetKey()
+ *
+ * function: get key of the entry
+ */
+static void dtGetKey(dtpage_t * p, int i,	/* entry index */
+		     component_t * key, int flag)
+{
+	int si;
+	s8 *stbl;
+	ldtentry_t *lh;
+	idtentry_t *ih;
+	dtslot_t *t;
+	int namlen, len;
+	wchar_t *name, *kname;
+
+	/* get entry */
+	stbl = DT_GETSTBL(p);
+	si = stbl[i];
+	if (p->header.flag & BT_LEAF) {
+		lh = (ldtentry_t *) & p->slot[si];
+		si = lh->next;
+		namlen = lh->namlen;
+		name = lh->name;
+		if (flag & JFS_DIR_INDEX)
+			len = min(namlen, DTLHDRDATALEN);
+		else
+			len = min(namlen, DTLHDRDATALEN_LEGACY);
+	} else {
+		ih = (idtentry_t *) & p->slot[si];
+		si = ih->next;
+		namlen = ih->namlen;
+		name = ih->name;
+		len = min(namlen, DTIHDRDATALEN);
+	}
+
+	key->namlen = namlen;
+	kname = key->name;
+
+	/*
+	 * move head/only segment
+	 */
+	UniStrncpy_le(kname, name, len);
+
+	/*
+	 * move additional segment(s)
+	 */
+	while (si >= 0) {
+		/* get next segment */
+		t = &p->slot[si];
+		kname += len;
+		namlen -= len;
+		len = min(namlen, DTSLOTDATALEN);
+		UniStrncpy_le(kname, t->name, len);
+
+		si = t->next;
+	}
+}
+
+
+/*
+ *	dtInsertEntry()
+ *
+ * function: allocate free slot(s) and
+ *	     write a leaf/internal entry
+ *
+ * return: entry slot index
+ */
+static void dtInsertEntry(dtpage_t * p, int index, component_t * key,
+			  ddata_t * data, dtlock_t ** dtlock)
+{
+	dtslot_t *h, *t;
+	ldtentry_t *lh = 0;
+	idtentry_t *ih = 0;
+	int hsi, fsi, klen, len, nextindex;
+	wchar_t *kname, *name;
+	s8 *stbl;
+	pxd_t *xd;
+	dtlock_t *dtlck = *dtlock;
+	lv_t *lv;
+	int xsi, n;
+	s64 bn = 0;
+	metapage_t *mp = 0;
+
+	klen = key->namlen;
+	kname = key->name;
+
+	/* allocate a free slot */
+	hsi = fsi = p->header.freelist;
+	h = &p->slot[fsi];
+	p->header.freelist = h->next;
+	--p->header.freecnt;
+
+	/* open new linelock */
+	if (dtlck->index >= dtlck->maxcnt)
+		dtlck = (dtlock_t *) txLinelock(dtlck);
+
+	lv = (lv_t *) & dtlck->lv[dtlck->index];
+	lv->offset = hsi;
+
+	/* write head/only segment */
+	if (p->header.flag & BT_LEAF) {
+		lh = (ldtentry_t *) h;
+		lh->next = h->next;
+		lh->inumber = data->leaf.ino;	/* little-endian */
+		lh->namlen = klen;
+		name = lh->name;
+		if (data->leaf.ip) {
+			len = min(klen, DTLHDRDATALEN);
+			if (!(p->header.flag & BT_ROOT))
+				bn = addressPXD(&p->header.self);
+			lh->index = cpu_to_le32(add_index(data->leaf.tid,
+							  data->leaf.ip,
+							  bn, index));
+		} else
+			len = min(klen, DTLHDRDATALEN_LEGACY);
+	} else {
+		ih = (idtentry_t *) h;
+		ih->next = h->next;
+		xd = (pxd_t *) ih;
+		*xd = data->xd;
+		ih->namlen = klen;
+		name = ih->name;
+		len = min(klen, DTIHDRDATALEN);
+	}
+
+	UniStrncpy_le(name, kname, len);
+
+	n = 1;
+	xsi = hsi;
+
+	/* write additional segment(s) */
+	t = h;
+	klen -= len;
+	while (klen) {
+		/* get free slot */
+		fsi = p->header.freelist;
+		t = &p->slot[fsi];
+		p->header.freelist = t->next;
+		--p->header.freecnt;
+
+		/* is next slot contiguous ? */
+		if (fsi != xsi + 1) {
+			/* close current linelock */
+			lv->length = n;
+			dtlck->index++;
+
+			/* open new linelock */
+			if (dtlck->index < dtlck->maxcnt)
+				lv++;
+			else {
+				dtlck = (dtlock_t *) txLinelock(dtlck);
+				lv = (lv_t *) & dtlck->lv[0];
+			}
+
+			lv->offset = fsi;
+			n = 0;
+		}
+
+		kname += len;
+		len = min(klen, DTSLOTDATALEN);
+		UniStrncpy_le(t->name, kname, len);
+
+		n++;
+		xsi = fsi;
+		klen -= len;
+	}
+
+	/* close current linelock */
+	lv->length = n;
+	dtlck->index++;
+
+	*dtlock = dtlck;
+
+	/* terminate last/only segment */
+	if (h == t) {
+		/* single segment entry */
+		if (p->header.flag & BT_LEAF)
+			lh->next = -1;
+		else
+			ih->next = -1;
+	} else
+		/* multi-segment entry */
+		t->next = -1;
+
+	/* if insert into middle, shift right succeeding entries in stbl */
+	stbl = DT_GETSTBL(p);
+	nextindex = p->header.nextindex;
+	if (index < nextindex) {
+		memmove(stbl + index + 1, stbl + index, nextindex - index);
+
+		if ((p->header.flag & BT_LEAF) && data->leaf.ip) {
+			/*
+			 * Need to update slot number for entries that moved
+			 * in the stbl
+			 */
+			mp = 0;
+			for (n = index + 1; n <= nextindex; n++) {
+				lh = (ldtentry_t *) & (p->slot[stbl[n]]);
+				modify_index(data->leaf.tid, data->leaf.ip,
+					     le32_to_cpu(lh->index), bn, n,
+					     &mp);
+			}
+			if (mp)
+				release_metapage(mp);
+		}
+	}
+
+	stbl[index] = hsi;
+
+	/* advance next available entry index of stbl */
+	++p->header.nextindex;
+}
+
+
+/*
+ *	dtMoveEntry()
+ *
+ * function: move entries from split/left page to new/right page
+ *
+ *	nextindex of dst page and freelist/freecnt of both pages
+ *	are updated.
+ */
+static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp,
+			dtlock_t ** sdtlock, dtlock_t ** ddtlock,
+			int do_index)
+{
+	int ssi, next;		/* src slot index */
+	int di;			/* dst entry index */
+	int dsi;		/* dst slot index */
+	s8 *sstbl, *dstbl;	/* sorted entry table */
+	int snamlen, len;
+	ldtentry_t *slh, *dlh = 0;
+	idtentry_t *sih, *dih = 0;
+	dtslot_t *h, *s, *d;
+	dtlock_t *sdtlck = *sdtlock, *ddtlck = *ddtlock;
+	lv_t *slv, *dlv;
+	int xssi, ns, nd;
+	int sfsi;
+
+	sstbl = (s8 *) & sp->slot[sp->header.stblindex];
+	dstbl = (s8 *) & dp->slot[dp->header.stblindex];
+
+	dsi = dp->header.freelist;	/* first (whole page) free slot */
+	sfsi = sp->header.freelist;
+
+	/* linelock destination entry slot */
+	dlv = (lv_t *) & ddtlck->lv[ddtlck->index];
+	dlv->offset = dsi;
+
+	/* linelock source entry slot */
+	slv = (lv_t *) & sdtlck->lv[sdtlck->index];
+	slv->offset = sstbl[si];
+	xssi = slv->offset - 1;
+
+	/*
+	 * move entries
+	 */
+	ns = nd = 0;
+	for (di = 0; si < sp->header.nextindex; si++, di++) {
+		ssi = sstbl[si];
+		dstbl[di] = dsi;
+
+		/* is next slot contiguous ? */
+		if (ssi != xssi + 1) {
+			/* close current linelock */
+			slv->length = ns;
+			sdtlck->index++;
+
+			/* open new linelock */
+			if (sdtlck->index < sdtlck->maxcnt)
+				slv++;
+			else {
+				sdtlck = (dtlock_t *) txLinelock(sdtlck);
+				slv = (lv_t *) & sdtlck->lv[0];
+			}
+
+			slv->offset = ssi;
+			ns = 0;
+		}
+
+		/*
+		 * move head/only segment of an entry
+		 */
+		/* get dst slot */
+		h = d = &dp->slot[dsi];
+
+		/* get src slot and move */
+		s = &sp->slot[ssi];
+		if (sp->header.flag & BT_LEAF) {
+			/* get source entry */
+			slh = (ldtentry_t *) s;
+			dlh = (ldtentry_t *) h;
+			snamlen = slh->namlen;
+
+			if (do_index) {
+				len = min(snamlen, DTLHDRDATALEN);
+				dlh->index = slh->index; /* little-endian */
+			} else
+				len = min(snamlen, DTLHDRDATALEN_LEGACY);
+
+			memcpy(dlh, slh, 6 + len * 2);
+
+			next = slh->next;
+
+			/* update dst head/only segment next field */
+			dsi++;
+			dlh->next = dsi;
+		} else {
+			sih = (idtentry_t *) s;
+			snamlen = sih->namlen;
+
+			len = min(snamlen, DTIHDRDATALEN);
+			dih = (idtentry_t *) h;
+			memcpy(dih, sih, 10 + len * 2);
+			next = sih->next;
+
+			dsi++;
+			dih->next = dsi;
+		}
+
+		/* free src head/only segment */
+		s->next = sfsi;
+		s->cnt = 1;
+		sfsi = ssi;
+
+		ns++;
+		nd++;
+		xssi = ssi;
+
+		/*
+		 * move additional segment(s) of the entry
+		 */
+		snamlen -= len;
+		while ((ssi = next) >= 0) {
+			/* is next slot contiguous ? */
+			if (ssi != xssi + 1) {
+				/* close current linelock */
+				slv->length = ns;
+				sdtlck->index++;
+
+				/* open new linelock */
+				if (sdtlck->index < sdtlck->maxcnt)
+					slv++;
+				else {
+					sdtlck =
+					    (dtlock_t *)
+					    txLinelock(sdtlck);
+					slv = (lv_t *) & sdtlck->lv[0];
+				}
+
+				slv->offset = ssi;
+				ns = 0;
+			}
+
+			/* get next source segment */
+			s = &sp->slot[ssi];
+
+			/* get next destination free slot */
+			d++;
+
+			len = min(snamlen, DTSLOTDATALEN);
+			UniStrncpy(d->name, s->name, len);
+
+			ns++;
+			nd++;
+			xssi = ssi;
+
+			dsi++;
+			d->next = dsi;
+
+			/* free source segment */
+			next = s->next;
+			s->next = sfsi;
+			s->cnt = 1;
+			sfsi = ssi;
+
+			snamlen -= len;
+		}		/* end while */
+
+		/* terminate dst last/only segment */
+		if (h == d) {
+			/* single segment entry */
+			if (dp->header.flag & BT_LEAF)
+				dlh->next = -1;
+			else
+				dih->next = -1;
+		} else
+			/* multi-segment entry */
+			d->next = -1;
+	}			/* end for */
+
+	/* close current linelock */
+	slv->length = ns;
+	sdtlck->index++;
+	*sdtlock = sdtlck;
+
+	dlv->length = nd;
+	ddtlck->index++;
+	*ddtlock = ddtlck;
+
+	/* update source header */
+	sp->header.freelist = sfsi;
+	sp->header.freecnt += nd;
+
+	/* update destination header */
+	dp->header.nextindex = di;
+
+	dp->header.freelist = dsi;
+	dp->header.freecnt -= nd;
+}
+
+
+/*
+ *	dtDeleteEntry()
+ *
+ * function: free a (leaf/internal) entry
+ *
+ * log freelist header, stbl, and each segment slot of entry
+ * (even though last/only segment next field is modified,
+ * physical image logging requires all segment slots of
+ * the entry logged to avoid applying previous updates
+ * to the same slots)
+ */
+static void dtDeleteEntry(dtpage_t * p, int fi, dtlock_t ** dtlock)
+{
+	int fsi;		/* free entry slot index */
+	s8 *stbl;
+	dtslot_t *t;
+	int si, freecnt;
+	dtlock_t *dtlck = *dtlock;
+	lv_t *lv;
+	int xsi, n;
+
+	/* get free entry slot index */
+	stbl = DT_GETSTBL(p);
+	fsi = stbl[fi];
+
+	/* open new linelock */
+	if (dtlck->index >= dtlck->maxcnt)
+		dtlck = (dtlock_t *) txLinelock(dtlck);
+	lv = (lv_t *) & dtlck->lv[dtlck->index];
+
+	lv->offset = fsi;
+
+	/* get the head/only segment */
+	t = &p->slot[fsi];
+	if (p->header.flag & BT_LEAF)
+		si = ((ldtentry_t *) t)->next;
+	else
+		si = ((idtentry_t *) t)->next;
+	t->next = si;
+	t->cnt = 1;
+
+	n = freecnt = 1;
+	xsi = fsi;
+
+	/* find the last/only segment */
+	while (si >= 0) {
+		/* is next slot contiguous ? */
+		if (si != xsi + 1) {
+			/* close current linelock */
+			lv->length = n;
+			dtlck->index++;
+
+			/* open new linelock */
+			if (dtlck->index < dtlck->maxcnt)
+				lv++;
+			else {
+				dtlck = (dtlock_t *) txLinelock(dtlck);
+				lv = (lv_t *) & dtlck->lv[0];
+			}
+
+			lv->offset = si;
+			n = 0;
+		}
+
+		n++;
+		xsi = si;
+		freecnt++;
+
+		t = &p->slot[si];
+		t->cnt = 1;
+		si = t->next;
+	}
+
+	/* close current linelock */
+	lv->length = n;
+	dtlck->index++;
+
+	*dtlock = dtlck;
+
+	/* update freelist */
+	t->next = p->header.freelist;
+	p->header.freelist = fsi;
+	p->header.freecnt += freecnt;
+
+	/* if delete from middle,
+	 * shift left the succedding entries in the stbl
+	 */
+	si = p->header.nextindex;
+	if (fi < si - 1)
+		memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1);
+
+	p->header.nextindex--;
+}
+
+
+/*
+ *	dtTruncateEntry()
+ *
+ * function: truncate a (leaf/internal) entry
+ *
+ * log freelist header, stbl, and each segment slot of entry
+ * (even though last/only segment next field is modified,
+ * physical image logging requires all segment slots of
+ * the entry logged to avoid applying previous updates
+ * to the same slots)
+ */
+static void dtTruncateEntry(dtpage_t * p, int ti, dtlock_t ** dtlock)
+{
+	int tsi;		/* truncate entry slot index */
+	s8 *stbl;
+	dtslot_t *t;
+	int si, freecnt;
+	dtlock_t *dtlck = *dtlock;
+	lv_t *lv;
+	int fsi, xsi, n;
+
+	/* get free entry slot index */
+	stbl = DT_GETSTBL(p);
+	tsi = stbl[ti];
+
+	/* open new linelock */
+	if (dtlck->index >= dtlck->maxcnt)
+		dtlck = (dtlock_t *) txLinelock(dtlck);
+	lv = (lv_t *) & dtlck->lv[dtlck->index];
+
+	lv->offset = tsi;
+
+	/* get the head/only segment */
+	t = &p->slot[tsi];
+	ASSERT(p->header.flag & BT_INTERNAL);
+	((idtentry_t *) t)->namlen = 0;
+	si = ((idtentry_t *) t)->next;
+	((idtentry_t *) t)->next = -1;
+
+	n = 1;
+	freecnt = 0;
+	fsi = si;
+	xsi = tsi;
+
+	/* find the last/only segment */
+	while (si >= 0) {
+		/* is next slot contiguous ? */
+		if (si != xsi + 1) {
+			/* close current linelock */
+			lv->length = n;
+			dtlck->index++;
+
+			/* open new linelock */
+			if (dtlck->index < dtlck->maxcnt)
+				lv++;
+			else {
+				dtlck = (dtlock_t *) txLinelock(dtlck);
+				lv = (lv_t *) & dtlck->lv[0];
+			}
+
+			lv->offset = si;
+			n = 0;
+		}
+
+		n++;
+		xsi = si;
+		freecnt++;
+
+		t = &p->slot[si];
+		t->cnt = 1;
+		si = t->next;
+	}
+
+	/* close current linelock */
+	lv->length = n;
+	dtlck->index++;
+
+	*dtlock = dtlck;
+
+	/* update freelist */
+	if (freecnt == 0)
+		return;
+	t->next = p->header.freelist;
+	p->header.freelist = fsi;
+	p->header.freecnt += freecnt;
+}
+
+
+/*
+ *	dtLinelockFreelist()
+ */
+static void dtLinelockFreelist(dtpage_t * p,	/* directory page */
+			       int m,	/* max slot index */
+			       dtlock_t ** dtlock)
+{
+	int fsi;		/* free entry slot index */
+	dtslot_t *t;
+	int si;
+	dtlock_t *dtlck = *dtlock;
+	lv_t *lv;
+	int xsi, n;
+
+	/* get free entry slot index */
+	fsi = p->header.freelist;
+
+	/* open new linelock */
+	if (dtlck->index >= dtlck->maxcnt)
+		dtlck = (dtlock_t *) txLinelock(dtlck);
+	lv = (lv_t *) & dtlck->lv[dtlck->index];
+
+	lv->offset = fsi;
+
+	n = 1;
+	xsi = fsi;
+
+	t = &p->slot[fsi];
+	si = t->next;
+
+	/* find the last/only segment */
+	while (si < m && si >= 0) {
+		/* is next slot contiguous ? */
+		if (si != xsi + 1) {
+			/* close current linelock */
+			lv->length = n;
+			dtlck->index++;
+
+			/* open new linelock */
+			if (dtlck->index < dtlck->maxcnt)
+				lv++;
+			else {
+				dtlck = (dtlock_t *) txLinelock(dtlck);
+				lv = (lv_t *) & dtlck->lv[0];
+			}
+
+			lv->offset = si;
+			n = 0;
+		}
+
+		n++;
+		xsi = si;
+
+		t = &p->slot[si];
+		si = t->next;
+	}
+
+	/* close current linelock */
+	lv->length = n;
+	dtlck->index++;
+
+	*dtlock = dtlck;
+}
+
+
+/*
+ * NAME: dtModify
+ *
+ * FUNCTION: Modify the inode number part of a directory entry
+ *
+ * PARAMETERS:
+ *	tid	- Transaction id
+ *	ip	- Inode of parent directory
+ *	key	- Name of entry to be modified
+ *	orig_ino	- Original inode number expected in entry
+ *	new_ino	- New inode number to put into entry
+ *	flag	- JFS_RENAME
+ *
+ * RETURNS:
+ *	ESTALE	- If entry found does not match orig_ino passed in
+ *	ENOENT	- If no entry can be found to match key
+ *	0	- If successfully modified entry
+ */
+int dtModify(tid_t tid, struct inode *ip,
+	 component_t * key, ino_t * orig_ino, ino_t new_ino, int flag)
+{
+	int rc;
+	s64 bn;
+	metapage_t *mp;
+	dtpage_t *p;
+	int index;
+	btstack_t btstack;
+	tlock_t *tlck;
+	dtlock_t *dtlck;
+	lv_t *lv;
+	s8 *stbl;
+	int entry_si;		/* entry slot index */
+	ldtentry_t *entry;
+
+	/*
+	 *      search for the entry to modify:
+	 *
+	 * dtSearch() returns (leaf page pinned, index at which to modify).
+	 */
+	if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag)))
+		return rc;
+
+	/* retrieve search result */
+	DT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+
+	BT_MARK_DIRTY(mp, ip);
+	/*
+	 * acquire a transaction lock on the leaf page of named entry
+	 */
+	tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY);
+	dtlck = (dtlock_t *) & tlck->lock;
+
+	/* get slot index of the entry */
+	stbl = DT_GETSTBL(p);
+	entry_si = stbl[index];
+
+	/* linelock entry */
+	ASSERT(dtlck->index == 0);
+	lv = (lv_t *) & dtlck->lv[0];
+	lv->offset = entry_si;
+	lv->length = 1;
+	dtlck->index++;
+
+	/* get the head/only segment */
+	entry = (ldtentry_t *) & p->slot[entry_si];
+
+	/* substitute the inode number of the entry */
+	entry->inumber = cpu_to_le32(new_ino);
+
+	/* unpin the leaf page */
+	DT_PUTPAGE(mp);
+
+	return 0;
+}
+
+#ifdef _JFS_DEBUG_DTREE
+/*
+ *	dtDisplayTree()
+ *
+ * function: traverse forward
+ */
+int dtDisplayTree(struct inode *ip)
+{
+	int rc;
+	metapage_t *mp;
+	dtpage_t *p;
+	s64 bn, pbn;
+	int index, lastindex, v, h;
+	pxd_t *xd;
+	btstack_t btstack;
+	btframe_t *btsp;
+	btframe_t *parent;
+	u8 *stbl;
+	int psize = 256;
+
+	printk("display B+-tree.\n");
+
+	/* clear stack */
+	btsp = btstack.stack;
+
+	/*
+	 * start with root
+	 *
+	 * root resides in the inode
+	 */
+	bn = 0;
+	v = h = 0;
+
+	/*
+	 * first access of each page:
+	 */
+      newPage:
+	DT_GETPAGE(ip, bn, mp, psize, p, rc);
+	if (rc)
+		return rc;
+
+	/* process entries forward from first index */
+	index = 0;
+	lastindex = p->header.nextindex - 1;
+
+	if (p->header.flag & BT_INTERNAL) {
+		/*
+		 * first access of each internal page
+		 */
+		printf("internal page ");
+		dtDisplayPage(ip, bn, p);
+
+		goto getChild;
+	} else {		/* (p->header.flag & BT_LEAF) */
+
+		/*
+		 * first access of each leaf page
+		 */
+		printf("leaf page ");
+		dtDisplayPage(ip, bn, p);
+
+		/*
+		 * process leaf page entries
+		 *
+		 for ( ; index <= lastindex; index++)
+		 {
+		 }
+		 */
+
+		/* unpin the leaf page */
+		DT_PUTPAGE(mp);
+	}
+
+	/*
+	 * go back up to the parent page
+	 */
+      getParent:
+	/* pop/restore parent entry for the current child page */
+	if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL)
+		/* current page must have been root */
+		return;
+
+	/*
+	 * parent page scan completed
+	 */
+	if ((index = parent->index) == (lastindex = parent->lastindex)) {
+		/* go back up to the parent page */
+		goto getParent;
+	}
+
+	/*
+	 * parent page has entries remaining
+	 */
+	/* get back the parent page */
+	bn = parent->bn;
+	/* v = parent->level; */
+	DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	if (rc)
+		return rc;
+
+	/* get next parent entry */
+	index++;
+
+	/*
+	 * internal page: go down to child page of current entry
+	 */
+      getChild:
+	/* push/save current parent entry for the child page */
+	btsp->bn = pbn = bn;
+	btsp->index = index;
+	btsp->lastindex = lastindex;
+	/* btsp->level = v; */
+	/* btsp->node = h; */
+	++btsp;
+
+	/* get current entry for the child page */
+	stbl = DT_GETSTBL(p);
+	xd = (pxd_t *) & p->slot[stbl[index]];
+
+	/*
+	 * first access of each internal entry:
+	 */
+
+	/* get child page */
+	bn = addressPXD(xd);
+	psize = lengthPXD(xd) << ip->i_ipmnt->i_l2bsize;
+
+	printk("traverse down 0x%Lx[%d]->0x%Lx\n", pbn, index, bn);
+	v++;
+	h = index;
+
+	/* release parent page */
+	DT_PUTPAGE(mp);
+
+	/* process the child page */
+	goto newPage;
+}
+
+
+/*
+ *	dtDisplayPage()
+ *
+ * function: display page
+ */
+int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p)
+{
+	int rc;
+	metapage_t *mp;
+	ldtentry_t *lh;
+	idtentry_t *ih;
+	pxd_t *xd;
+	int i, j;
+	u8 *stbl;
+	wchar_t name[JFS_NAME_MAX + 1];
+	component_t key = { 0, name };
+	int freepage = 0;
+
+	if (p == NULL) {
+		freepage = 1;
+		DT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+	}
+
+	/* display page control */
+	printk("bn:0x%Lx flag:0x%08x nextindex:%d\n",
+	       bn, p->header.flag, p->header.nextindex);
+
+	/* display entries */
+	stbl = DT_GETSTBL(p);
+	for (i = 0, j = 1; i < p->header.nextindex; i++, j++) {
+		dtGetKey(p, i, &key, JFS_SBI(ip->i_sb)->mntflag);
+		key.name[key.namlen] = '\0';
+		if (p->header.flag & BT_LEAF) {
+			lh = (ldtentry_t *) & p->slot[stbl[i]];
+			printf("\t[%d] %s:%d", i, key.name,
+			       le32_to_cpu(lh->inumber));
+		} else {
+			ih = (idtentry_t *) & p->slot[stbl[i]];
+			xd = (pxd_t *) ih;
+			bn = addressPXD(xd);
+			printf("\t[%d] %s:0x%Lx", i, key.name, bn);
+		}
+
+		if (j == 4) {
+			printf("\n");
+			j = 0;
+		}
+	}
+
+	printf("\n");
+
+	if (freepage)
+		DT_PUTPAGE(mp);
+
+	return 0;
+}
+#endif				/* _JFS_DEBUG_DTREE */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_dtree.h linuxppc64_2_4/fs/jfs/jfs_dtree.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_dtree.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_dtree.h	Tue Apr 23 11:22:11 2002
@@ -0,0 +1,288 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * Change History :
+ *
+ */
+
+#ifndef _H_JFS_DTREE
+#define	_H_JFS_DTREE
+
+/*
+ *	jfs_dtree.h: directory B+-tree manager
+ */
+
+#include "jfs_btree.h"
+
+typedef union {
+	struct {
+		tid_t tid;
+		struct inode *ip;
+		u32 ino;
+	} leaf;
+	pxd_t xd;
+} ddata_t;
+
+
+/*
+ *      entry segment/slot
+ *
+ * an entry consists of type dependent head/only segment/slot and
+ * additional segments/slots linked vi next field;
+ * N.B. last/only segment of entry is terminated by next = -1;
+ */
+/*
+ *	directory page slot
+ */
+typedef struct {
+	s8 next;		/* 1: */
+	s8 cnt;			/* 1: */
+	wchar_t name[15];	/* 30: */
+} dtslot_t;			/* (32) */
+
+
+#define DATASLOTSIZE	16
+#define L2DATASLOTSIZE	4
+#define	DTSLOTSIZE	32
+#define	L2DTSLOTSIZE	5
+#define DTSLOTHDRSIZE	2
+#define DTSLOTDATASIZE	30
+#define DTSLOTDATALEN	15
+
+/*
+ *	 internal node entry head/only segment
+ */
+typedef struct {
+	pxd_t xd;		/* 8: child extent descriptor */
+
+	s8 next;		/* 1: */
+	u8 namlen;		/* 1: */
+	wchar_t name[11];	/* 22: 2-byte aligned */
+} idtentry_t;			/* (32) */
+
+#define DTIHDRSIZE	10
+#define DTIHDRDATALEN	11
+
+/* compute number of slots for entry */
+#define	NDTINTERNAL(klen) ( ((4 + (klen)) + (15 - 1)) / 15 )
+
+
+/*
+ *	leaf node entry head/only segment
+ *
+ * 	For legacy filesystems, name contains 13 wchars -- no index field
+ */
+typedef struct {
+	u32 inumber;		/* 4: 4-byte aligned */
+	s8 next;		/* 1: */
+	u8 namlen;		/* 1: */
+	wchar_t name[11];	/* 22: 2-byte aligned */
+	u32 index;		/* 4: index into dir_table */
+} ldtentry_t;			/* (32) */
+
+#define DTLHDRSIZE	6
+#define DTLHDRDATALEN_LEGACY	13	/* Old (OS/2) format */
+#define DTLHDRDATALEN	11
+
+/*
+ * dir_table used for directory traversal during readdir
+ */
+
+/*
+ * Keep persistent index for directory entries
+ */
+#define DO_INDEX(INODE) (JFS_SBI((INODE)->i_sb)->mntflag & JFS_DIR_INDEX)
+
+/*
+ * Maximum entry in inline directory table
+ */
+#define MAX_INLINE_DIRTABLE_ENTRY 13
+
+typedef struct dir_table_slot {
+	u8 rsrvd;		/* 1: */
+	u8 flag;		/* 1: 0 if free */
+	u8 slot;		/* 1: slot within leaf page of entry */
+	u8 addr1;		/* 1: upper 8 bits of leaf page address */
+	u32 addr2;		/* 4: lower 32 bits of leaf page address -OR-
+				   index of next entry when this entry was deleted */
+} dir_table_slot_t;		/* (8) */
+
+/*
+ * flag values
+ */
+#define DIR_INDEX_VALID 1
+#define DIR_INDEX_FREE 0
+
+#define DTSaddress(dir_table_slot, address64)\
+{\
+	(dir_table_slot)->addr1 = ((u64)address64) >> 32;\
+	(dir_table_slot)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
+}
+
+#define addressDTS(dts)\
+	( ((s64)((dts)->addr1)) << 32 | __le32_to_cpu((dts)->addr2) )
+
+/* compute number of slots for entry */
+#define	NDTLEAF_LEGACY(klen)	( ((2 + (klen)) + (15 - 1)) / 15 )
+#define	NDTLEAF	NDTINTERNAL
+
+
+/*
+ *	directory root page (in-line in on-disk inode):
+ *
+ * cf. dtpage_t below.
+ */
+typedef union {
+	struct {
+		dasd_t DASD;	/* 16: DASD limit/usage info  F226941 */
+
+		u8 flag;	/* 1: */
+		u8 nextindex;	/* 1: next free entry in stbl */
+		s8 freecnt;	/* 1: free count */
+		s8 freelist;	/* 1: freelist header */
+
+		u32 idotdot;	/* 4: parent inode number */
+
+		s8 stbl[8];	/* 8: sorted entry index table */
+	} header;		/* (32) */
+
+	dtslot_t slot[9];
+} dtroot_t;
+
+#define PARENT(IP) \
+	(le32_to_cpu(JFS_IP(IP)->i_dtroot.header.idotdot))
+
+#define DTROOTMAXSLOT	9
+
+#define	dtEmpty(IP) (JFS_IP(IP)->i_dtroot.header.nextindex == 0)
+
+
+/*
+ *	directory regular page:
+ *
+ *	entry slot array of 32 byte slot
+ *
+ * sorted entry slot index table (stbl):
+ * contiguous slots at slot specified by stblindex,
+ * 1-byte per entry
+ *   512 byte block:  16 entry tbl (1 slot)
+ *  1024 byte block:  32 entry tbl (1 slot)
+ *  2048 byte block:  64 entry tbl (2 slot)
+ *  4096 byte block: 128 entry tbl (4 slot)
+ *
+ * data area:
+ *   512 byte block:  16 - 2 =  14 slot
+ *  1024 byte block:  32 - 2 =  30 slot
+ *  2048 byte block:  64 - 3 =  61 slot
+ *  4096 byte block: 128 - 5 = 123 slot
+ *
+ * N.B. index is 0-based; index fields refer to slot index
+ * except nextindex which refers to entry index in stbl;
+ * end of entry stot list or freelist is marked with -1.
+ */
+typedef union {
+	struct {
+		s64 next;	/* 8: next sibling */
+		s64 prev;	/* 8: previous sibling */
+
+		u8 flag;	/* 1: */
+		u8 nextindex;	/* 1: next entry index in stbl */
+		s8 freecnt;	/* 1: */
+		s8 freelist;	/* 1: slot index of head of freelist */
+
+		u8 maxslot;	/* 1: number of slots in page slot[] */
+		u8 stblindex;	/* 1: slot index of start of stbl */
+		u8 rsrvd[2];	/* 2: */
+
+		pxd_t self;	/* 8: self pxd */
+	} header;		/* (32) */
+
+	dtslot_t slot[128];
+} dtpage_t;
+
+#define DTPAGEMAXSLOT        128
+
+#define DT8THPGNODEBYTES     512
+#define DT8THPGNODETSLOTS      1
+#define DT8THPGNODESLOTS      16
+
+#define DTQTRPGNODEBYTES    1024
+#define DTQTRPGNODETSLOTS      1
+#define DTQTRPGNODESLOTS      32
+
+#define DTHALFPGNODEBYTES   2048
+#define DTHALFPGNODETSLOTS     2
+#define DTHALFPGNODESLOTS     64
+
+#define DTFULLPGNODEBYTES   4096
+#define DTFULLPGNODETSLOTS     4
+#define DTFULLPGNODESLOTS    128
+
+#define DTENTRYSTART	1
+
+/* get sorted entry table of the page */
+#define DT_GETSTBL(p) ( ((p)->header.flag & BT_ROOT) ?\
+	((dtroot_t *)(p))->header.stbl : \
+	(s8 *)&(p)->slot[(p)->header.stblindex] )
+
+/*
+ * Flags for dtSearch
+ */
+#define JFS_CREATE 1
+#define JFS_LOOKUP 2
+#define JFS_REMOVE 3
+#define JFS_RENAME 4
+
+#define DIRENTSIZ(namlen) \
+    ( (sizeof(struct dirent) - 2*(JFS_NAME_MAX+1) + 2*((namlen)+1) + 3) &~ 3 )
+
+/*
+ * Maximum file offset for directories.
+ */
+#define DIREND	INT_MAX
+
+/*
+ *	external declarations
+ */
+extern void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot);
+
+extern int dtSearch(struct inode *ip, component_t * key,
+		    ino_t * data, btstack_t * btstack, int flag);
+
+extern int dtInsert(tid_t tid, struct inode *ip,
+		    component_t * key, ino_t * ino, btstack_t * btstack);
+
+extern int dtDelete(tid_t tid,
+		    struct inode *ip, component_t * key, ino_t * data, int flag);
+
+extern int dtRelocate(tid_t tid,
+		      struct inode *ip, s64 lmxaddr, pxd_t * opxd, s64 nxaddr);
+
+extern int dtModify(tid_t tid, struct inode *ip,
+		    component_t * key, ino_t * orig_ino, ino_t new_ino, int flag);
+
+extern int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir);
+
+#ifdef  _JFS_DEBUG_DTREE
+extern int dtDisplayTree(struct inode *ip);
+
+extern int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p);
+#endif				/* _JFS_DEBUG_DTREE */
+
+#endif				/* !_H_JFS_DTREE */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_extendfs.h linuxppc64_2_4/fs/jfs/jfs_extendfs.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_extendfs.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_extendfs.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,39 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef	_H_JFS_EXTENDFS
+#define _H_JFS_EXTENDFS
+
+/*
+ *	jfs_extendfs.h
+ */
+/*
+ *	extendfs parameter list
+ */
+typedef struct {
+	u32 flag;		/* 4: */
+	u8 dev;			/* 1: */
+	u8 pad[3];		/* 3: */
+	s64 LVSize;		/* 8: LV size in LV block */
+	s64 FSSize;		/* 8: FS size in LV block */
+	s32 LogSize;		/* 4: inlinelog size in LV block */
+} extendfs_t;			/* (28) */
+
+/* plist flag */
+#define EXTENDFS_QUERY		0x00000001
+
+#endif				/* _H_JFS_EXTENDFS */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_extent.c linuxppc64_2_4/fs/jfs/jfs_extent.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_extent.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_extent.c	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,637 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ *
+ * Module: jfs_extent.c:
+ */
+
+#include <linux/fs.h>
+#include "jfs_incore.h"
+#include "jfs_dmap.h"
+#include "jfs_extent.h"
+#include "jfs_debug.h"
+
+/*
+ * forward references
+ */
+static int extBalloc(struct inode *, s64, s64 *, s64 *);
+static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
+int extRecord(struct inode *, xad_t *);
+static s64 extRoundDown(s64 nb);
+
+/*
+ * external references
+ */
+extern int dbExtend(struct inode *, s64, s64, s64);
+extern int jfs_commit_inode(struct inode *, int);
+
+
+#define DPD(a)          (printk("(a): %d\n",(a)))
+#define DPC(a)          (printk("(a): %c\n",(a)))
+#define DPL1(a)					\
+{						\
+	if ((a) >> 32)				\
+		printk("(a): %x%08x  ",(a));	\
+	else					\
+		printk("(a): %x  ",(a) << 32);	\
+}
+#define DPL(a)					\
+{						\
+	if ((a) >> 32)				\
+		printk("(a): %x%08x\n",(a));	\
+	else					\
+		printk("(a): %x\n",(a) << 32);	\
+}
+
+#define DPD1(a)         (printk("(a): %d  ",(a)))
+#define DPX(a)          (printk("(a): %08x\n",(a)))
+#define DPX1(a)         (printk("(a): %08x  ",(a)))
+#define DPS(a)          (printk("%s\n",(a)))
+#define DPE(a)          (printk("\nENTERING: %s\n",(a)))
+#define DPE1(a)          (printk("\nENTERING: %s",(a)))
+#define DPS1(a)         (printk("  %s  ",(a)))
+
+
+/*
+ * NAME:	extAlloc()
+ *
+ * FUNCTION:    allocate an extent for a specified page range within a
+ *		file.
+ *
+ * PARAMETERS:
+ *	ip	- the inode of the file.
+ *	xlen	- requested extent length.
+ *	pno	- the starting page number with the file.
+ *	xp	- pointer to an xad.  on entry, xad describes an
+ *		  extent that is used as an allocation hint if the
+ *		  xaddr of the xad is non-zero.  on successful exit,
+ *		  the xad describes the newly allocated extent.
+ *	abnr	- boolean_t indicating whether the newly allocated extent
+ *		  should be marked as allocated but not recorded.
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      EIO	- i/o error.
+ *      ENOSPC	- insufficient disk resources.
+ */
+int
+extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
+	s64 nxlen, nxaddr, xoff, hint, xaddr = 0;
+	int rc, nbperpage;
+	int xflag;
+
+	/* This blocks if we are low on resources */
+	txBeginAnon(ip->i_sb);
+
+	/* validate extent length */
+	if (xlen > MAXXLEN)
+		xlen = MAXXLEN;
+
+	/* get the number of blocks per page */
+	nbperpage = sbi->nbperpage;
+
+	/* get the page's starting extent offset */
+	xoff = pno << sbi->l2nbperpage;
+
+	/* check if an allocation hint was provided */
+	if ((hint = addressXAD(xp))) {
+		/* get the size of the extent described by the hint */
+		nxlen = lengthXAD(xp);
+
+		/* check if the hint is for the portion of the file
+		 * immediately previous to the current allocation
+		 * request and if hint extent has the same abnr
+		 * value as the current request.  if so, we can
+		 * extend the hint extent to include the current
+		 * extent if we can allocate the blocks immediately
+		 * following the hint extent.
+		 */
+		if (offsetXAD(xp) + nxlen == xoff &&
+		    abnr == ((xp->flag & XAD_NOTRECORDED) ? TRUE : FALSE))
+			xaddr = hint + nxlen;
+
+		/* adjust the hint to the last block of the extent */
+		hint += (nxlen - 1);
+	}
+
+	/* allocate the disk blocks for the extent.  initially, extBalloc()
+	 * will try to allocate disk blocks for the requested size (xlen). 
+	 * if this fails (xlen contigious free blocks not avaliable), it'll
+	 * try to allocate a smaller number of blocks (producing a smaller
+	 * extent), with this smaller number of blocks consisting of the
+	 * requested number of blocks rounded down to the next smaller
+	 * power of 2 number (i.e. 16 -> 8).  it'll continue to round down
+	 * and retry the allocation until the number of blocks to allocate
+	 * is smaller than the number of blocks per page.
+	 */
+	nxlen = xlen;
+	if ((rc =
+	     extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
+		return (rc);
+	}
+
+	/* determine the value of the extent flag */
+	xflag = (abnr == TRUE) ? XAD_NOTRECORDED : 0;
+
+	/* if we can extend the hint extent to cover the current request, 
+	 * extend it.  otherwise, insert a new extent to
+	 * cover the current request.
+	 */
+	if (xaddr && xaddr == nxaddr)
+		rc = xtExtend(0, ip, xoff, (int) nxlen, 0);
+	else
+		rc = xtInsert(0, ip, xflag, xoff, (int) nxlen, &nxaddr, 0);
+
+	/* if the extend or insert failed, 
+	 * free the newly allocated blocks and return the error.
+	 */
+	if (rc) {
+		dbFree(ip, nxaddr, nxlen);
+		return (rc);
+	}
+
+	/* update the number of blocks allocated to the file */
+	ip->i_blocks += LBLK2PBLK(ip->i_sb, nxlen);
+
+	/* set the results of the extent allocation */
+	XADaddress(xp, nxaddr);
+	XADlength(xp, nxlen);
+	XADoffset(xp, xoff);
+	xp->flag = xflag;
+
+	mark_inode_dirty(ip);
+
+	/*
+	 * COMMIT_SyncList flags an anonymous tlock on page that is on
+	 * sync list.
+	 * We need to commit the inode to get the page written disk.
+	 */
+	if (test_and_clear_cflag(COMMIT_Synclist,ip))
+		jfs_commit_inode(ip, 0);
+
+	return (0);
+}
+
+
+/*
+ * NAME:        extRealloc()
+ *
+ * FUNCTION:    extend the allocation of a file extent containing a
+ *		partial back last page.
+ *
+ * PARAMETERS:
+ *	ip	- the inode of the file.
+ *	cp	- cbuf for the partial backed last page.
+ *	xlen	- request size of the resulting extent.
+ *	xp	- pointer to an xad. on successful exit, the xad
+ *		  describes the newly allocated extent.
+ *	abnr	- boolean_t indicating whether the newly allocated extent
+ *		  should be marked as allocated but not recorded.
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      EIO	- i/o error.
+ *      ENOSPC	- insufficient disk resources.
+ */
+int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
+{
+	struct super_block *sb = ip->i_sb;
+	s64 xaddr, xlen, nxaddr, delta, xoff;
+	s64 ntail, nextend, ninsert;
+	int rc, nbperpage = JFS_SBI(sb)->nbperpage;
+	int xflag;
+
+	/* This blocks if we are low on resources */
+	txBeginAnon(ip->i_sb);
+
+	/* validate extent length */
+	if (nxlen > MAXXLEN)
+		nxlen = MAXXLEN;
+
+	/* get the extend (partial) page's disk block address and
+	 * number of blocks.
+	 */
+	xaddr = addressXAD(xp);
+	xlen = lengthXAD(xp);
+	xoff = offsetXAD(xp);
+
+	/* if the extend page is abnr and if the request is for
+	 * the extent to be allocated and recorded, 
+	 * make the page allocated and recorded.
+	 */
+	if ((xp->flag & XAD_NOTRECORDED) && !abnr) {
+		xp->flag = 0;
+		if ((rc = xtUpdate(0, ip, xp)))
+			return (rc);
+	}
+
+	/* try to allocated the request number of blocks for the
+	 * extent.  dbRealloc() first tries to satisfy the request
+	 * by extending the allocation in place. otherwise, it will
+	 * try to allocate a new set of blocks large enough for the
+	 * request.  in satisfying a request, dbReAlloc() may allocate
+	 * less than what was request but will always allocate enough
+	 * space as to satisfy the extend page.
+	 */
+	if ((rc = extBrealloc(ip, xaddr, xlen, &nxlen, &nxaddr)))
+		return (rc);
+
+	delta = nxlen - xlen;
+
+	/* check if the extend page is not abnr but the request is abnr
+	 * and the allocated disk space is for more than one page.  if this
+	 * is the case, there is a miss match of abnr between the extend page
+	 * and the one or more pages following the extend page.  as a result,
+	 * two extents will have to be manipulated. the first will be that
+	 * of the extent of the extend page and will be manipulated thru
+	 * an xtExtend() or an xtTailgate(), depending upon whether the
+	 * disk allocation occurred as an inplace extension.  the second
+	 * extent will be manipulated (created) through an xtInsert() and
+	 * will be for the pages following the extend page.
+	 */
+	if (abnr && (!(xp->flag & XAD_NOTRECORDED)) && (nxlen > nbperpage)) {
+		ntail = nbperpage;
+		nextend = ntail - xlen;
+		ninsert = nxlen - nbperpage;
+
+		xflag = XAD_NOTRECORDED;
+	} else {
+		ntail = nxlen;
+		nextend = delta;
+		ninsert = 0;
+
+		xflag = xp->flag;
+	}
+
+	/* if we were able to extend the disk allocation in place,
+	 * extend the extent.  otherwise, move the extent to a
+	 * new disk location.
+	 */
+	if (xaddr == nxaddr) {
+		/* extend the extent */
+		if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
+			dbFree(ip, xaddr + xlen, delta);
+			return (rc);
+		}
+	} else {
+		/*
+		 * move the extent to a new location:
+		 *
+		 * xtTailgate() accounts for relocated tail extent;
+		 */
+		if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
+			dbFree(ip, nxaddr, nxlen);
+			return (rc);
+		}
+	}
+
+
+	/* check if we need to also insert a new extent */
+	if (ninsert) {
+		/* perform the insert.  if it fails, free the blocks
+		 * to be inserted and make it appear that we only did
+		 * the xtExtend() or xtTailgate() above.
+		 */
+		xaddr = nxaddr + ntail;
+		if (xtInsert (0, ip, xflag, xoff + ntail, (int) ninsert,
+			      &xaddr, 0)) {
+			dbFree(ip, xaddr, (s64) ninsert);
+			delta = nextend;
+			nxlen = ntail;
+			xflag = 0;
+		}
+	}
+
+	/* update the inode with the number of blocks allocated */
+	ip->i_blocks += LBLK2PBLK(sb, delta);
+
+	/* set the return results */
+	XADaddress(xp, nxaddr);
+	XADlength(xp, nxlen);
+	XADoffset(xp, xoff);
+	xp->flag = xflag;
+
+	mark_inode_dirty(ip);
+
+	return (0);
+}
+
+
+/*
+ * NAME:        extHint()
+ *
+ * FUNCTION:    produce an extent allocation hint for a file offset.
+ *
+ * PARAMETERS:
+ *	ip	- the inode of the file.
+ *	offset  - file offset for which the hint is needed.
+ *	xp	- pointer to the xad that is to be filled in with
+ *		  the hint.
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      EIO	- i/o error.
+ */
+int extHint(struct inode *ip, s64 offset, xad_t * xp)
+{
+	struct super_block *sb = ip->i_sb;
+	xadlist_t xadl;
+	lxdlist_t lxdl;
+	lxd_t lxd;
+	s64 prev;
+	int rc, nbperpage = JFS_SBI(sb)->nbperpage;
+
+	/* init the hint as "no hint provided" */
+	XADaddress(xp, 0);
+
+	/* determine the starting extent offset of the page previous
+	 * to the page containing the offset.
+	 */
+	prev = ((offset & ~POFFSET) >> JFS_SBI(sb)->l2bsize) - nbperpage;
+
+	/* if the offsets in the first page of the file,
+	 * no hint provided.
+	 */
+	if (prev < 0)
+		return (0);
+
+	/* prepare to lookup the previous page's extent info */
+	lxdl.maxnlxd = 1;
+	lxdl.nlxd = 1;
+	lxdl.lxd = &lxd;
+	LXDoffset(&lxd, prev)
+	    LXDlength(&lxd, nbperpage);
+
+	xadl.maxnxad = 1;
+	xadl.nxad = 0;
+	xadl.xad = xp;
+
+	/* perform the lookup */
+	if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
+		return (rc);
+
+	/* check if not extent exists for the previous page.  
+	 * this is possible for sparse files.
+	 */
+	if (xadl.nxad == 0) {
+//              assert(ISSPARSE(ip));
+		return (0);
+	}
+
+	/* only preserve the abnr flag within the xad flags
+	 * of the returned hint.
+	 */
+	xp->flag &= XAD_NOTRECORDED;
+
+	assert(xadl.nxad == 1);
+	assert(lengthXAD(xp) == nbperpage);
+
+	return (0);
+}
+
+
+/*
+ * NAME:        extRecord()
+ *
+ * FUNCTION:    change a page with a file from not recorded to recorded.
+ *
+ * PARAMETERS:
+ *	ip	- inode of the file.
+ *	cp	- cbuf of the file page.
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      EIO	- i/o error.
+ *      ENOSPC	- insufficient disk resources.
+ */
+int extRecord(struct inode *ip, xad_t * xp)
+{
+	int rc;
+
+	txBeginAnon(ip->i_sb);
+
+	/* update the extent */
+	if ((rc = xtUpdate(0, ip, xp)))
+		return (rc);
+
+#ifdef _STILL_TO_PORT
+	/* no longer abnr */
+	cp->cm_abnr = FALSE;
+
+	/* mark the cbuf as modified */
+	cp->cm_modified = TRUE;
+#endif				/*  _STILL_TO_PORT */
+
+	return (0);
+}
+
+
+/*
+ * NAME:        extFill()
+ *
+ * FUNCTION:    allocate disk space for a file page that represents
+ *		a file hole.
+ *
+ * PARAMETERS:
+ *	ip	- the inode of the file.
+ *	cp	- cbuf of the file page represent the hole.
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      EIO	- i/o error.
+ *      ENOSPC	- insufficient disk resources.
+ */
+int extFill(struct inode *ip, xad_t * xp)
+{
+	int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
+	s64 blkno = offsetXAD(xp) >> ip->i_blksize;
+
+//      assert(ISSPARSE(ip));
+
+	/* initialize the extent allocation hint */
+	XADaddress(xp, 0);
+
+	/* allocate an extent to fill the hole */
+	if ((rc = extAlloc(ip, nbperpage, blkno, xp, FALSE)))
+		return (rc);
+
+	assert(lengthPXD(xp) == nbperpage);
+
+	return (0);
+}
+
+
+/*
+ * NAME:	extBalloc()
+ *
+ * FUNCTION:    allocate disk blocks to form an extent.
+ *
+ *		initially, we will try to allocate disk blocks for the
+ *		requested size (nblocks).  if this fails (nblocks 
+ *		contigious free blocks not avaliable), we'll try to allocate
+ *		a smaller number of blocks (producing a smaller extent), with
+ *		this smaller number of blocks consisting of the requested
+ *		number of blocks rounded down to the next smaller power of 2
+ *		number (i.e. 16 -> 8).  we'll continue to round down and
+ *		retry the allocation until the number of blocks to allocate
+ *		is smaller than the number of blocks per page.
+ *		
+ * PARAMETERS:
+ *	ip	 - the inode of the file.
+ *	hint	 - disk block number to be used as an allocation hint.
+ *	*nblocks - pointer to an s64 value.  on entry, this value specifies
+ *		   the desired number of block to be allocated. on successful
+ *		   exit, this value is set to the number of blocks actually
+ *		   allocated.
+ *	blkno	 - pointer to a block address that is filled in on successful
+ *		   return with the starting block number of the newly 
+ *		   allocated block range.
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      EIO	- i/o error.
+ *      ENOSPC	- insufficient disk resources.
+ */
+static int
+extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
+{
+	s64 nb, nblks, daddr, max;
+	int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
+	bmap_t *mp = JFS_SBI(ip->i_sb)->bmap;
+
+	/* get the number of blocks to initially attempt to allocate.
+	 * we'll first try the number of blocks requested unless this
+	 * number is greater than the maximum number of contigious free
+	 * blocks in the map. in that case, we'll start off with the 
+	 * maximum free.
+	 */
+	max = (s64) 1 << mp->db_maxfreebud;
+	if (*nblocks >= max && *nblocks > nbperpage)
+		nb = nblks = (max > nbperpage) ? max : nbperpage;
+	else
+		nb = nblks = *nblocks;
+
+	/* try to allocate blocks */
+	while ((rc = dbAlloc(ip, hint, nb, &daddr))) {
+		/* if something other than an out of space error,
+		 * stop and return this error.
+		 */
+		if (rc != ENOSPC)
+			return (rc);
+
+		/* decrease the allocation request size */
+		nb = min(nblks, extRoundDown(nb));
+
+		/* give up if we cannot cover a page */
+		if (nb < nbperpage)
+			return (rc);
+	}
+
+	*nblocks = nb;
+	*blkno = daddr;
+
+	return (0);
+}
+
+
+/*
+ * NAME:	extBrealloc()
+ *
+ * FUNCTION:    attempt to extend an extent's allocation.
+ *
+ *		initially, we will try to extend the extent's allocation
+ *		in place.  if this fails, we'll try to move the extent
+ *		to a new set of blocks. if moving the extent, we initially
+ *		will try to allocate disk blocks for the requested size
+ *		(nnew).  if this fails 	(nnew contigious free blocks not
+ *		avaliable), we'll try  to allocate a smaller number of
+ *		blocks (producing a smaller extent), with this smaller
+ *		number of blocks consisting of the requested number of
+ *		blocks rounded down to the next smaller power of 2
+ *		number (i.e. 16 -> 8).  we'll continue to round down and
+ *		retry the allocation until the number of blocks to allocate
+ *		is smaller than the number of blocks per page.
+ *		
+ * PARAMETERS:
+ *	ip	 - the inode of the file.
+ *	blkno    - starting block number of the extents current allocation.
+ *	nblks    - number of blocks within the extents current allocation.
+ *	newnblks - pointer to a s64 value.  on entry, this value is the
+ *		   the new desired extent size (number of blocks).  on
+ *		   successful exit, this value is set to the extent's actual
+ *		   new size (new number of blocks).
+ *	newblkno - the starting block number of the extents new allocation.
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      EIO	- i/o error.
+ *      ENOSPC	- insufficient disk resources.
+ */
+static int
+extBrealloc(struct inode *ip,
+	    s64 blkno, s64 nblks, s64 * newnblks, s64 * newblkno)
+{
+	int rc;
+
+	/* try to extend in place */
+	if ((rc = dbExtend(ip, blkno, nblks, *newnblks - nblks)) == 0) {
+		*newblkno = blkno;
+		return (0);
+	} else {
+		if (rc != ENOSPC)
+			return (rc);
+	}
+
+	/* in place extension not possible.  
+	 * try to move the extent to a new set of blocks.
+	 */
+	return (extBalloc(ip, blkno, newnblks, newblkno));
+}
+
+
+/*
+ * NAME:        extRoundDown()
+ *
+ * FUNCTION:    round down a specified number of blocks to the next
+ *		smallest power of 2 number.
+ *
+ * PARAMETERS:
+ *	nb	- the inode of the file.
+ *
+ * RETURN VALUES:
+ *      next smallest power of 2 number.
+ */
+static s64 extRoundDown(s64 nb)
+{
+	int i;
+	u64 m, k;
+
+	for (i = 0, m = (u64) 1 << 63; i < 64; i++, m >>= 1) {
+		if (m & nb)
+			break;
+	}
+
+	i = 63 - i;
+	k = (u64) 1 << i;
+	k = ((k - 1) & nb) ? k : k >> 1;
+
+	return (k);
+}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_extent.h linuxppc64_2_4/fs/jfs/jfs_extent.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_extent.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_extent.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,31 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef	_H_JFS_EXTENT
+#define _H_JFS_EXTENT
+
+/*  get block allocation allocation hint as location of disk inode */
+#define	INOHINT(ip)	\
+	(addressPXD(&(JFS_IP(ip)->ixpxd)) + lengthPXD(&(JFS_IP(ip)->ixpxd)) - 1)
+
+extern int	extAlloc(struct inode *, s64, s64, xad_t *, boolean_t);
+extern int	extFill(struct inode *, xad_t *);
+extern int	extHint(struct inode *, s64, xad_t *);
+extern int	extRealloc(struct inode *, s64, xad_t *, boolean_t);
+extern int	extRecord(struct inode *, xad_t *);
+
+#endif	/* _H_JFS_EXTENT */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_filsys.h linuxppc64_2_4/fs/jfs/jfs_filsys.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_filsys.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_filsys.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,274 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+*/
+
+#ifndef _H_JFS_FILSYS
+#define _H_JFS_FILSYS
+
+/*
+ *	jfs_filsys.h
+ *
+ * file system (implementation-dependent) constants 
+ *
+ * refer to <limits.h> for system wide implementation-dependent constants 
+ */
+
+/*
+ *	 file system option (superblock flag)
+ */
+/* platform option (conditional compilation) */
+#define JFS_AIX		0x80000000	/* AIX support */
+/*	POSIX name/directory  support */
+
+#define JFS_OS2		0x40000000	/* OS/2 support */
+/*	case-insensitive name/directory support */
+
+#define JFS_DFS		0x20000000	/* DCE DFS LFS support */
+
+#define JFS_LINUX      	0x10000000	/* Linux support */
+/*	case-sensitive name/directory support */
+
+/* directory option */
+#define JFS_UNICODE	0x00000001	/* unicode name */
+
+/* commit option */
+#define	JFS_COMMIT	0x00000f00	/* commit option mask */
+#define	JFS_GROUPCOMMIT	0x00000100	/* group (of 1) commit */
+#define	JFS_LAZYCOMMIT	0x00000200	/* lazy commit */
+#define	JFS_TMPFS	0x00000400	/* temporary file system - 
+					 * do not log/commit:
+					 */
+
+/* log logical volume option */
+#define	JFS_INLINELOG	0x00000800	/* inline log within file system */
+#define JFS_INLINEMOVE	0x00001000	/* inline log being moved */
+
+/* Secondary aggregate inode table */
+#define JFS_BAD_SAIT	0x00010000	/* current secondary ait is bad */
+
+/* sparse regular file support */
+#define JFS_SPARSE	0x00020000	/* sparse regular file */
+
+/* DASD Limits		F226941 */
+#define JFS_DASD_ENABLED	0x00040000	/* DASD limits enabled */
+#define	JFS_DASD_PRIME		0x00080000	/* Prime DASD usage on boot */
+
+/* big endian flag */
+#define	JFS_SWAP_BYTES		0x00100000	/* running on big endian computer */
+
+/* Directory index */
+#define JFS_DIR_INDEX		0x00200000	/* Persistant index for */
+						/* directory entries    */
+
+
+/*
+ *	buffer cache configuration
+ */
+/* page size */
+#ifdef PSIZE
+#undef PSIZE
+#endif
+#define	PSIZE		4096	/* page size (in byte) */
+#define	L2PSIZE		12	/* log2(PSIZE) */
+#define	POFFSET		4095	/* offset within page */
+
+/* buffer page size */
+#define BPSIZE	PSIZE
+
+/*
+ *	fs fundamental size
+ *
+ * PSIZE >= file system block size >= PBSIZE >= DISIZE
+ */
+#define	PBSIZE		512	/* physical block size (in byte) */
+#define	L2PBSIZE	9	/* log2(PBSIZE) */
+
+#define DISIZE		512	/* on-disk inode size (in byte) */
+#define L2DISIZE	9	/* log2(DISIZE) */
+
+#define IDATASIZE	256	/* inode inline data size */
+#define	IXATTRSIZE	128	/* inode inline extended attribute size */
+
+#define XTPAGE_SIZE     4096
+#define log2_PAGESIZE     12
+
+#define IAG_SIZE        4096
+#define IAG_EXTENT_SIZE 4096
+#define	INOSPERIAG	4096	/* number of disk inodes per iag */
+#define	L2INOSPERIAG	12	/* l2 number of disk inodes per iag */
+#define INOSPEREXT	32	/* number of disk inode per extent */
+#define L2INOSPEREXT	5	/* l2 number of disk inode per extent */
+#define	IXSIZE		(DISIZE * INOSPEREXT)	/* inode extent size */
+#define	INOSPERPAGE	8	/* number of disk inodes per 4K page */
+#define	L2INOSPERPAGE	3	/* log2(INOSPERPAGE) */
+
+#define	IAGFREELIST_LWM	64
+
+#define INODE_EXTENT_SIZE	IXSIZE	/* inode extent size */
+#define NUM_INODE_PER_EXTENT	INOSPEREXT
+#define NUM_INODE_PER_IAG	INOSPERIAG
+
+#define MINBLOCKSIZE		512
+#define MAXBLOCKSIZE		4096
+#define	MAXFILESIZE		((s64)1 << 52)
+
+#define JFS_LINK_MAX		65535	/* nlink_t is unsigned short */
+
+/* Minimum number of bytes supported for a JFS partition */
+#define MINJFS			(0x1000000)
+#define MINJFSTEXT		"16"
+
+/*
+ * file system block size -> physical block size
+ */
+#define LBOFFSET(x)	((x) & (PBSIZE - 1))
+#define LBNUMBER(x)	((x) >> L2PBSIZE)
+#define	LBLK2PBLK(sb,b)	((b) << (sb->s_blocksize_bits - L2PBSIZE))
+#define	PBLK2LBLK(sb,b)	((b) >> (sb->s_blocksize_bits - L2PBSIZE))
+/* size in byte -> last page number */
+#define	SIZE2PN(size)	( ((s64)((size) - 1)) >> (L2PSIZE) )
+/* size in byte -> last file system block number */
+#define	SIZE2BN(size, l2bsize) ( ((s64)((size) - 1)) >> (l2bsize) )
+
+/*
+ * fixed physical block address (physical block size = 512 byte)
+ *
+ * NOTE: since we can't guarantee a physical block size of 512 bytes the use of
+ *	 these macros should be removed and the byte offset macros used instead.
+ */
+#define SUPER1_B	64	/* primary superblock */
+#define	AIMAP_B		(SUPER1_B + 8)	/* 1st extent of aggregate inode map */
+#define	AITBL_B		(AIMAP_B + 16)	/*
+					 * 1st extent of aggregate inode table
+					 */
+#define	SUPER2_B	(AITBL_B + 32)	/* 2ndary superblock pbn */
+#define	BMAP_B		(SUPER2_B + 8)	/* block allocation map */
+
+/*
+ * SIZE_OF_SUPER defines the total amount of space reserved on disk for the
+ * superblock.  This is not the same as the superblock structure, since all of
+ * this space is not currently being used.
+ */
+#define SIZE_OF_SUPER	PSIZE
+
+/*
+ * SIZE_OF_AG_TABLE defines the amount of space reserved to hold the AG table
+ */
+#define SIZE_OF_AG_TABLE	PSIZE
+
+/*
+ * SIZE_OF_MAP_PAGE defines the amount of disk space reserved for each page of
+ * the inode allocation map (to hold iag)
+ */
+#define SIZE_OF_MAP_PAGE	PSIZE
+
+/*
+ * fixed byte offset address
+ */
+#define SUPER1_OFF	0x8000	/* primary superblock */
+#define AIMAP_OFF	(SUPER1_OFF + SIZE_OF_SUPER)
+					/*
+					 * Control page of aggregate inode map
+					 * followed by 1st extent of map
+					 */
+#define AITBL_OFF	(AIMAP_OFF + (SIZE_OF_MAP_PAGE << 1))
+					/* 
+					 * 1st extent of aggregate inode table
+					 */
+#define SUPER2_OFF	(AITBL_OFF + INODE_EXTENT_SIZE)
+					/*
+					 * secondary superblock
+					 */
+#define BMAP_OFF	(SUPER2_OFF + SIZE_OF_SUPER)
+					/*
+					 * block allocation map
+					 */
+
+/*
+ * The following macro is used to indicate the number of reserved disk blocks at
+ * the front of an aggregate, in terms of physical blocks.  This value is
+ * currently defined to be 32K.  This turns out to be the same as the primary
+ * superblock's address, since it directly follows the reserved blocks.
+ */
+#define AGGR_RSVD_BLOCKS	SUPER1_B
+
+/*
+ * The following macro is used to indicate the number of reserved bytes at the
+ * front of an aggregate.  This value is currently defined to be 32K.  This
+ * turns out to be the same as the primary superblock's byte offset, since it
+ * directly follows the reserved blocks.
+ */
+#define AGGR_RSVD_BYTES	SUPER1_OFF
+
+/*
+ * The following macro defines the byte offset for the first inode extent in
+ * the aggregate inode table.  This allows us to find the self inode to find the
+ * rest of the table.  Currently this value is 44K.
+ */
+#define AGGR_INODE_TABLE_START	AITBL_OFF
+
+/*
+ *	fixed reserved inode number
+ */
+/* aggregate inode */
+#define AGGR_RESERVED_I	0	/* aggregate inode (reserved) */
+#define	AGGREGATE_I	1	/* aggregate inode map inode */
+#define	BMAP_I		2	/* aggregate block allocation map inode */
+#define	LOG_I		3	/* aggregate inline log inode */
+#define BADBLOCK_I	4	/* aggregate bad block inode */
+#define	FILESYSTEM_I	16	/* 1st/only fileset inode in ait:
+				 * fileset inode map inode
+				 */
+
+/* per fileset inode */
+#define FILESET_RSVD_I	0	/* fileset inode (reserved) */
+#define FILESET_EXT_I	1	/* fileset inode extension */
+#define	ROOT_I		2	/* fileset root inode */
+#define ACL_I		3	/* fileset ACL inode */
+
+#define FILESET_OBJECT_I 4	/* the first fileset inode available for a file
+				 * or directory or link...
+				 */
+#define FIRST_FILESET_INO 16	/* the first aggregate inode which describes
+				 * an inode.  (To fsck this is also the first
+				 * inode in part 2 of the agg inode table.)
+				 */
+
+/*
+ *	directory configuration
+ */
+#define JFS_NAME_MAX	255
+#define JFS_PATH_MAX	BPSIZE
+
+
+/*
+ *	file system state (superblock state)
+ */
+#define FM_CLEAN 0x00000000	/* file system is unmounted and clean */
+#define FM_MOUNT 0x00000001	/* file system is mounted cleanly */
+#define FM_DIRTY 0x00000002	/* file system was not unmounted and clean 
+				 * when mounted or 
+				 * commit failure occurred while being mounted:
+				 * fsck() must be run to repair 
+				 */
+#define	FM_LOGREDO 0x00000004	/* log based recovery (logredo()) failed:
+				 * fsck() must be run to repair 
+				 */
+#define	FM_EXTENDFS 0x00000008	/* file system extendfs() in progress */
+
+#endif				/* _H_JFS_FILSYS */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_imap.c linuxppc64_2_4/fs/jfs/jfs_imap.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_imap.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_imap.c	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,3236 @@
+/*
+
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+/*
+ * Change History :
+ *
+ */
+
+/*
+ *	jfs_imap.c: inode allocation map manager
+ *
+ * Serialization:
+ *   Each AG has a simple lock which is used to control the serialization of
+ *	the AG level lists.  This lock should be taken first whenever an AG
+ *	level list will be modified or accessed.
+ *
+ *   Each IAG is locked by obtaining the buffer for the IAG page.
+ *
+ *   There is also a inode lock for the inode map inode.  A read lock needs to
+ *	be taken whenever an IAG is read from the map or the global level
+ *	information is read.  A write lock needs to be taken whenever the global
+ *	level information is modified or an atomic operation needs to be used.
+ *
+ *	If more than one IAG is read at one time, the read lock may not
+ *	be given up until all of the IAG's are read.  Otherwise, a deadlock
+ *	may occur when trying to obtain the read lock while another thread
+ *	holding the read lock is waiting on the IAG already being held.
+ *
+ *   The control page of the inode map is read into memory by diMount().
+ *	Thereafter it should only be modified in memory and then it will be
+ *	written out when the filesystem is unmounted by diUnmount().
+ */
+
+#include <linux/fs.h>
+#include <linux/locks.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_dinode.h"
+#include "jfs_dmap.h"
+#include "jfs_imap.h"
+#include "jfs_metapage.h"
+#include "jfs_superblock.h"
+#include "jfs_debug.h"
+
+/*
+ * imap locks
+ */
+/* iag free list lock */
+#define IAGFREE_LOCK_INIT(imap)		init_MUTEX(&imap->im_freelock)
+#define IAGFREE_LOCK(imap)		down(&imap->im_freelock)
+#define IAGFREE_UNLOCK(imap)		up(&imap->im_freelock)
+
+/* per ag iag list locks */
+#define AG_LOCK_INIT(imap,index)	init_MUTEX(&(imap->im_aglock[index]))
+#define AG_LOCK(imap,agno)		down(&imap->im_aglock[agno])
+#define AG_UNLOCK(imap,agno)		up(&imap->im_aglock[agno])
+
+/*
+ * external references
+ */
+extern struct address_space_operations jfs_aops;
+
+/*
+ * forward references
+ */
+static int diAllocAG(imap_t *, int, boolean_t, struct inode *);
+static int diAllocAny(imap_t *, int, boolean_t, struct inode *);
+static int diAllocBit(imap_t *, iag_t *, int);
+static int diAllocExt(imap_t *, int, struct inode *);
+static int diAllocIno(imap_t *, int, struct inode *);
+static int diFindFree(u32, int);
+static int diNewExt(imap_t *, iag_t *, int);
+static int diNewIAG(imap_t *, int *, int, metapage_t **);
+static void duplicateIXtree(struct super_block *, s64, int, s64 *);
+
+static int diIAGRead(imap_t * imap, int, metapage_t **);
+static int copy_from_dinode(dinode_t *, struct inode *);
+static void copy_to_dinode(dinode_t *, struct inode *);
+
+/*
+ *	debug code for double-checking inode map
+ */
+/* #define	_JFS_DEBUG_IMAP	1 */
+
+#ifdef	_JFS_DEBUG_IMAP
+#define DBG_DIINIT(imap)	DBGdiInit(imap)
+#define DBG_DIALLOC(imap, ino)	DBGdiAlloc(imap, ino)
+#define DBG_DIFREE(imap, ino)	DBGdiFree(imap, ino)
+
+static void *DBGdiInit(imap_t * imap);
+static void DBGdiAlloc(imap_t * imap, ino_t ino);
+static void DBGdiFree(imap_t * imap, ino_t ino);
+#else
+#define DBG_DIINIT(imap)
+#define DBG_DIALLOC(imap, ino)
+#define DBG_DIFREE(imap, ino)
+#endif				/* _JFS_DEBUG_IMAP */
+
+/*
+ * NAME:        diMount()
+ *
+ * FUNCTION:    initialize the incore inode map control structures for
+ *		a fileset or aggregate init time.
+ *
+ *              the inode map's control structure (dinomap_t) is 
+ *              brought in from disk and placed in virtual memory.
+ *
+ * PARAMETERS:
+ *      ipimap  - pointer to inode map inode for the aggregate or fileset.
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      ENOMEM  - insufficient free virtual memory.
+ *      EIO  	- i/o error.
+ */
+int diMount(struct inode *ipimap)
+{
+	imap_t *imap;
+	metapage_t *mp;
+	int index;
+	dinomap_t *dinom_le;
+
+	/*
+	 * allocate/initialize the in-memory inode map control structure
+	 */
+	/* allocate the in-memory inode map control structure. */
+	imap = (imap_t *) kmalloc(sizeof(imap_t), GFP_KERNEL);
+	if (imap == NULL) {
+		jERROR(1, ("diMount: kmalloc returned NULL!\n"));
+		return (ENOMEM);
+	}
+
+	/* read the on-disk inode map control structure. */
+
+	mp = read_metapage(ipimap,
+			   IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
+			   PSIZE, 0);
+	if (mp == NULL) {
+		kfree(imap);
+		return (EIO);
+	}
+
+	/* copy the on-disk version to the in-memory version. */
+	dinom_le = (dinomap_t *) mp->data;
+	imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag);
+	imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag);
+	atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos));
+	atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree));
+	imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext);
+	imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext);
+	for (index = 0; index < MAXAG; index++) {
+		imap->im_agctl[index].inofree =
+		    le32_to_cpu(dinom_le->in_agctl[index].inofree);
+		imap->im_agctl[index].extfree =
+		    le32_to_cpu(dinom_le->in_agctl[index].extfree);
+		imap->im_agctl[index].numinos =
+		    le32_to_cpu(dinom_le->in_agctl[index].numinos);
+		imap->im_agctl[index].numfree =
+		    le32_to_cpu(dinom_le->in_agctl[index].numfree);
+	}
+
+	/* release the buffer. */
+	release_metapage(mp);
+
+	/*
+	 * allocate/initialize inode allocation map locks
+	 */
+	/* allocate and init iag free list lock */
+	IAGFREE_LOCK_INIT(imap);
+
+	/* allocate and init ag list locks */
+	for (index = 0; index < MAXAG; index++) {
+		AG_LOCK_INIT(imap, index);
+	}
+
+	/* bind the inode map inode and inode map control structure
+	 * to each other.
+	 */
+	imap->im_ipimap = ipimap;
+	JFS_IP(ipimap)->i_imap = imap;
+
+//      DBG_DIINIT(imap);
+
+	return (0);
+}
+
+
+/*
+ * NAME:        diUnmount()
+ *
+ * FUNCTION:    write to disk the incore inode map control structures for
+ *		a fileset or aggregate at unmount time.
+ *
+ * PARAMETERS:
+ *      ipimap  - pointer to inode map inode for the aggregate or fileset.
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      ENOMEM  - insufficient free virtual memory.
+ *      EIO  	- i/o error.
+ */
+int diUnmount(struct inode *ipimap, int mounterror)
+{
+	imap_t *imap = JFS_IP(ipimap)->i_imap;
+
+	/*
+	 * update the on-disk inode map control structure
+	 */
+
+	if (!(mounterror || isReadOnly(ipimap)))
+		diSync(ipimap);
+
+	/*
+	 * Invalidate the page cache buffers
+	 */
+	truncate_inode_pages(ipimap->i_mapping, 0);
+
+	/*
+	 * free in-memory control structure
+	 */
+	kfree(imap);
+
+	return (0);
+}
+
+
+/*
+ *	diSync()
+ */
+int diSync(struct inode *ipimap)
+{
+	dinomap_t *dinom_le;
+	imap_t *imp = JFS_IP(ipimap)->i_imap;
+	metapage_t *mp;
+	int index;
+
+	/*
+	 * write imap global conrol page
+	 */
+	/* read the on-disk inode map control structure */
+	mp = get_metapage(ipimap,
+			  IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
+			  PSIZE, 0);
+	if (mp == NULL) {
+		jERROR(1,("diSync: get_metapage failed!\n"));
+		return EIO;
+	}
+
+	/* copy the in-memory version to the on-disk version */
+	//memcpy(mp->data, &imp->im_imap,sizeof(dinomap_t));
+	dinom_le = (dinomap_t *) mp->data;
+	dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag);
+	dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag);
+	dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos));
+	dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree));
+	dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext);
+	dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext);
+	for (index = 0; index < MAXAG; index++) {
+		dinom_le->in_agctl[index].inofree =
+		    cpu_to_le32(imp->im_agctl[index].inofree);
+		dinom_le->in_agctl[index].extfree =
+		    cpu_to_le32(imp->im_agctl[index].extfree);
+		dinom_le->in_agctl[index].numinos =
+		    cpu_to_le32(imp->im_agctl[index].numinos);
+		dinom_le->in_agctl[index].numfree =
+		    cpu_to_le32(imp->im_agctl[index].numfree);
+	}
+
+	/* write out the control structure */
+	write_metapage(mp);
+
+	/*
+	 * write out dirty pages of imap
+	 */
+	fsync_inode_data_buffers(ipimap);
+
+	diWriteSpecial(ipimap);
+
+	return (0);
+}
+
+
+/*
+ * NAME:        diRead()
+ *
+ * FUNCTION:    initialize an incore inode from disk.
+ *
+ *		on entry, the specifed incore inode should itself
+ *		specify the disk inode number corresponding to the
+ *		incore inode (i.e. i_number should be initialized).
+ *		
+ *		this routine handles incore inode initialization for
+ *		both "special" and "regular" inodes.  special inodes
+ *		are those required early in the mount process and
+ *	        require special handling since much of the file system
+ *		is not yet initialized.  these "special" inodes are
+ *		identified by a NULL inode map inode pointer and are
+ *		actually initialized by a call to diReadSpecial().
+ *		
+ *		for regular inodes, the iag describing the disk inode
+ *		is read from disk to determine the inode extent address
+ *		for the disk inode.  with the inode extent address in
+ *		hand, the page of the extent that contains the disk
+ *		inode is read and the disk inode is copied to the
+ *		incore inode.
+ *
+ * PARAMETERS:
+ *      ip  -  pointer to incore inode to be initialized from disk.
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      EIO  	- i/o error.
+ *      ENOMEM	- insufficient memory
+ *      
+ */
+int diRead(struct inode *ip)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
+	int iagno, ino, extno, rc;
+	struct inode *ipimap;
+	dinode_t *dp;
+	iag_t *iagp;
+	metapage_t *mp;
+	s64 blkno, agstart;
+	imap_t *imap;
+	int block_offset;
+	int inodes_left;
+	uint pageno;
+	int rel_inode;
+
+	jFYI(1, ("diRead: ino = %ld\n", ip->i_ino));
+
+	ipimap = sbi->ipimap;
+	JFS_IP(ip)->ipimap = ipimap;
+
+	/* determine the iag number for this inode (number) */
+	iagno = INOTOIAG(ip->i_ino);
+
+	/* read the iag */
+	imap = JFS_IP(ipimap)->i_imap;
+	IREAD_LOCK(ipimap);
+	rc = diIAGRead(imap, iagno, &mp);
+	IREAD_UNLOCK(ipimap);
+	if (rc) {
+		jERROR(1, ("diRead: diIAGRead returned %d\n", rc));
+		return (rc);
+	}
+
+	iagp = (iag_t *) mp->data;
+
+	/* determine inode extent that holds the disk inode */
+	ino = ip->i_ino & (INOSPERIAG - 1);
+	extno = ino >> L2INOSPEREXT;
+
+	if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) ||
+	    (addressPXD(&iagp->inoext[extno]) == 0)) {
+		jERROR(1, ("diRead: Bad inoext: 0x%lx, 0x%lx\n",
+			   (ulong) addressPXD(&iagp->inoext[extno]),
+			   (ulong) lengthPXD(&iagp->inoext[extno])));
+		release_metapage(mp);
+		updateSuper(ip->i_sb, FM_DIRTY);
+		return ESTALE;
+	}
+
+	/* get disk block number of the page within the inode extent
+	 * that holds the disk inode.
+	 */
+	blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage);
+
+	/* get the ag for the iag */
+	agstart = le64_to_cpu(iagp->agstart);
+
+	release_metapage(mp);
+
+	rel_inode = (ino & (INOSPERPAGE - 1));
+	pageno = blkno >> sbi->l2nbperpage;
+
+	if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
+		/*
+		 * OS/2 didn't always align inode extents on page boundaries
+		 */
+		inodes_left =
+		     (sbi->nbperpage - block_offset) << sbi->l2niperblk;
+
+		if (rel_inode < inodes_left)
+			rel_inode += block_offset << sbi->l2niperblk;
+		else {
+			pageno += 1;
+			rel_inode -= inodes_left;
+		}
+	}
+
+	/* read the page of disk inode */
+	mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
+	if (mp == 0) {
+		jERROR(1, ("diRead: read_metapage failed\n"));
+		return EIO;
+	}
+
+	/* locate the the disk inode requested */
+	dp = (dinode_t *) mp->data;
+	dp += rel_inode;
+
+	if (ip->i_ino != le32_to_cpu(dp->di_number)) {
+		jERROR(1, ("diRead: i_ino != di_number\n"));
+		updateSuper(ip->i_sb, FM_DIRTY);
+		rc = EIO;
+	} else if (le32_to_cpu(dp->di_nlink) == 0) {
+		jERROR(1,
+		       ("diRead: di_nlink is zero. ino=%ld\n", ip->i_ino));
+		updateSuper(ip->i_sb, FM_DIRTY);
+		rc = ESTALE;
+	} else
+		/* copy the disk inode to the in-memory inode */
+		rc = copy_from_dinode(dp, ip);
+
+	release_metapage(mp);
+
+	/* set the ag for the inode */
+	JFS_IP(ip)->agno = BLKTOAG(agstart, sbi);
+
+	return (rc);
+}
+
+
+/*
+ * NAME:        diReadSpecial()
+ *
+ * FUNCTION:    initialize a 'special' inode from disk.
+ *
+ *		this routines handles aggregate level inodes.  The
+ *		inode cache cannot differentiate between the
+ *		aggregate inodes and the filesystem inodes, so we
+ *		handle these here.  We don't actually use the aggregate
+ *	        inode map, since these inodes are at a fixed location
+ *		and in some cases the aggregate inode map isn't initialized
+ *		yet.
+ *
+ * PARAMETERS:
+ *      sb - filesystem superblock
+ *	inum - aggregate inode number
+ *
+ * RETURN VALUES:
+ *      new inode	- success
+ *      NULL		- i/o error.
+ */
+struct inode *diReadSpecial(struct super_block *sb, ino_t inum)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	uint address;
+	dinode_t *dp;
+	struct inode *ip;
+	metapage_t *mp;
+	int rc;
+
+	ip = new_inode(sb);
+	if (ip == NULL) {
+		jERROR(1,
+		       ("diReadSpecial: new_inode returned NULL!\n"));
+		return ip;
+	}
+
+	rc = alloc_jfs_inode(ip);
+	if (rc) {
+		make_bad_inode(ip);
+		iput(ip);
+		return NULL;
+	}
+
+	/*
+	 * If ip->i_number >= 32 (INOSPEREXT), then read from secondary
+	 * aggregate inode table.
+	 */
+
+	if (inum >= INOSPEREXT) {
+		address =
+		    addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
+		inum -= INOSPEREXT;
+		ASSERT(inum < INOSPEREXT);
+		JFS_IP(ip)->ipimap = sbi->ipaimap2;
+	} else {
+		address = AITBL_OFF >> L2PSIZE;
+		JFS_IP(ip)->ipimap = sbi->ipaimap;
+	}
+	ip->i_ino = inum;
+
+	address += inum >> 3;	/* 8 inodes per 4K page */
+
+	/* read the page of fixed disk inode (AIT) in raw mode */
+	jEVENT(0,
+	       ("Reading aggregate inode %d from block %d\n", (uint) inum,
+		address));
+	mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
+	if (mp == NULL) {
+		ip->i_sb = NULL;
+		ip->i_nlink = 1;	/* Don't want iput() deleting it */
+		iput(ip);
+		return (NULL);
+	}
+
+	/* get the pointer to the disk inode of interest */
+	dp = (dinode_t *) (mp->data);
+	dp += inum % 8;		/* 8 inodes per 4K page */
+
+	/* copy on-disk inode to in-memory inode */
+	if ((copy_from_dinode(dp, ip)) != 0) {
+		/* handle bad return by returning NULL for ip */
+		ip->i_sb = NULL;
+		ip->i_nlink = 1;	/* Don't want iput() deleting it */
+		iput(ip);
+		/* release the page */
+		release_metapage(mp);
+		return (NULL);
+
+	}
+
+	ip->i_mapping->a_ops = &jfs_aops;
+	ip->i_mapping->gfp_mask = GFP_NOFS;
+
+	if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) {
+		sbi->gengen = le32_to_cpu(dp->di_gengen);
+		sbi->inostamp = le32_to_cpu(dp->di_inostamp);
+	}
+
+	/* release the page */
+	release_metapage(mp);
+
+	return (ip);
+}
+
+/*
+ * NAME:        diWriteSpecial()
+ *
+ * FUNCTION:    Write the special inode to disk
+ *
+ * PARAMETERS:
+ *      ip - special inode
+ *
+ * RETURN VALUES: none
+ */
+
+void diWriteSpecial(struct inode *ip)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
+	uint address;
+	dinode_t *dp;
+	ino_t inum = ip->i_ino;
+	metapage_t *mp;
+
+	/*
+	 * If ip->i_number >= 32 (INOSPEREXT), then write to secondary
+	 * aggregate inode table.
+	 */
+
+	if (!(ip->i_state & I_DIRTY))
+		return;
+
+	ip->i_state &= ~I_DIRTY;
+
+	if (inum >= INOSPEREXT) {
+		address =
+		    addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
+		inum -= INOSPEREXT;
+		ASSERT(inum < INOSPEREXT);
+	} else {
+		address = AITBL_OFF >> L2PSIZE;
+	}
+
+	address += inum >> 3;	/* 8 inodes per 4K page */
+
+	/* read the page of fixed disk inode (AIT) in raw mode */
+	jEVENT(0,
+	       ("Reading aggregate inode %d from block %d\n", (uint) inum,
+		address));
+	mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
+	if (mp == NULL) {
+		jERROR(1,
+		       ("diWriteSpecial: failed to read aggregate inode extent!\n"));
+		return;
+	}
+
+	/* get the pointer to the disk inode of interest */
+	dp = (dinode_t *) (mp->data);
+	dp += inum % 8;		/* 8 inodes per 4K page */
+
+	/* copy on-disk inode to in-memory inode */
+	copy_to_dinode(dp, ip);
+	memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288);
+
+	if (inum == FILESYSTEM_I)
+		dp->di_gengen = cpu_to_le32(sbi->gengen);
+
+	/* write the page */
+	write_metapage(mp);
+}
+
+/*
+ * NAME:        diFreeSpecial()
+ *
+ * FUNCTION:    Free allocated space for special inode
+ */
+void diFreeSpecial(struct inode *ip)
+{
+	if (ip == NULL) {
+		jERROR(1, ("diFreeSpecial called with NULL ip!\n"));
+		return;
+	}
+	fsync_inode_data_buffers(ip);
+	truncate_inode_pages(ip->i_mapping, 0);
+	iput(ip);
+}
+
+
+
+/*
+ * NAME:        diWrite()
+ *
+ * FUNCTION:    write the on-disk inode portion of the in-memory inode
+ *		to its corresponding on-disk inode.
+ *
+ *		on entry, the specifed incore inode should itself
+ *		specify the disk inode number corresponding to the
+ *		incore inode (i.e. i_number should be initialized).
+ *
+ *		the inode contains the inode extent address for the disk
+ *		inode.  with the inode extent address in hand, the
+ *		page of the extent that contains the disk inode is
+ *		read and the disk inode portion of the incore inode
+ *		is copied to the disk inode.
+ *		
+ * PARAMETERS:
+ *	tid -  transacation id
+ *      ip  -  pointer to incore inode to be written to the inode extent.
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      EIO  	- i/o error.
+ */
+int diWrite(tid_t tid, struct inode *ip)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
+	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+	int rc = 0;
+	s32 ino;
+	dinode_t *dp;
+	s64 blkno;
+	int block_offset;
+	int inodes_left;
+	metapage_t *mp;
+	uint pageno;
+	int rel_inode;
+	int dioffset;
+	struct inode *ipimap;
+	uint type;
+	lid_t lid;
+	tlock_t *ditlck, *tlck;
+	linelock_t *dilinelock, *ilinelock;
+	lv_t *lv;
+	int n;
+
+	ipimap = jfs_ip->ipimap;
+
+	ino = ip->i_ino & (INOSPERIAG - 1);
+
+	assert(lengthPXD(&(jfs_ip->ixpxd)) ==
+	       JFS_IP(ipimap)->i_imap->im_nbperiext);
+	assert(addressPXD(&(jfs_ip->ixpxd)));
+
+	/*
+	 * read the page of disk inode containing the specified inode:
+	 */
+	/* compute the block address of the page */
+	blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage);
+
+	rel_inode = (ino & (INOSPERPAGE - 1));
+	pageno = blkno >> sbi->l2nbperpage;
+
+	if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
+		/*
+		 * OS/2 didn't always align inode extents on page boundaries
+		 */
+		inodes_left =
+		    (sbi->nbperpage - block_offset) << sbi->l2niperblk;
+
+		if (rel_inode < inodes_left)
+			rel_inode += block_offset << sbi->l2niperblk;
+		else {
+			pageno += 1;
+			rel_inode -= inodes_left;
+		}
+	}
+	/* read the page of disk inode */
+      retry:
+	mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
+	if (mp == 0)
+		return (EIO);
+
+	/* get the pointer to the disk inode */
+	dp = (dinode_t *) mp->data;
+	dp += rel_inode;
+
+	dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE;
+
+	/*
+	 * acquire transaction lock on the on-disk inode;
+	 * N.B. tlock is acquired on ipimap not ip;
+	 */
+	if ((ditlck =
+	     txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL)
+		goto retry;
+	dilinelock = (linelock_t *) & ditlck->lock;
+
+	/*
+	 * copy btree root from in-memory inode to on-disk inode
+	 *
+	 * (tlock is taken from inline B+-tree root in in-memory
+	 * inode when the B+-tree root is updated, which is pointed 
+	 * by jfs_ip->blid as well as being on tx tlock list)
+	 *
+	 * further processing of btree root is based on the copy 
+	 * in in-memory inode, where txLog() will log from, and, 
+	 * for xtree root, txUpdateMap() will update map and reset
+	 * XAD_NEW bit;
+	 */
+
+	if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) {
+		/*
+		 * This is the special xtree inside the directory for storing
+		 * the directory table
+		 */
+		xtpage_t *p, *xp;
+		xad_t *xad;
+
+		jfs_ip->xtlid = 0;
+		tlck = lid_to_tlock(lid);
+		assert(tlck->type & tlckXTREE);
+		tlck->type |= tlckBTROOT;
+		tlck->mp = mp;
+		ilinelock = (linelock_t *) & tlck->lock;
+
+		/*
+		 * copy xtree root from inode to dinode:
+		 */
+		p = &jfs_ip->i_xtroot;
+		xp = (xtpage_t *) &dp->di_dirtable;
+		lv = (lv_t *) & ilinelock->lv;
+		for (n = 0; n < ilinelock->index; n++, lv++) {
+			memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
+			       lv->length << L2XTSLOTSIZE);
+		}
+
+		/* reset on-disk (metadata page) xtree XAD_NEW bit */
+		xad = &xp->xad[XTENTRYSTART];
+		for (n = XTENTRYSTART;
+		     n < le16_to_cpu(xp->header.nextindex); n++, xad++)
+			if (xad->flag & (XAD_NEW | XAD_EXTENDED))
+				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
+	}
+
+	if ((lid = jfs_ip->blid) == 0)
+		goto inlineData;
+	jfs_ip->blid = 0;
+
+	tlck = lid_to_tlock(lid);
+	type = tlck->type;
+	tlck->type |= tlckBTROOT;
+	tlck->mp = mp;
+	ilinelock = (linelock_t *) & tlck->lock;
+
+	/*
+	 *      regular file: 16 byte (XAD slot) granularity
+	 */
+	if (type & tlckXTREE) {
+		xtpage_t *p, *xp;
+		xad_t *xad;
+
+		/*
+		 * copy xtree root from inode to dinode:
+		 */
+		p = &jfs_ip->i_xtroot;
+		xp = &dp->di_xtroot;
+		lv = (lv_t *) & ilinelock->lv;
+		for (n = 0; n < ilinelock->index; n++, lv++) {
+			memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
+			       lv->length << L2XTSLOTSIZE);
+		}
+
+		/* reset on-disk (metadata page) xtree XAD_NEW bit */
+		xad = &xp->xad[XTENTRYSTART];
+		for (n = XTENTRYSTART;
+		     n < le16_to_cpu(xp->header.nextindex); n++, xad++)
+			if (xad->flag & (XAD_NEW | XAD_EXTENDED))
+				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
+	}
+	/*
+	 *      directory: 32 byte (directory entry slot) granularity
+	 */
+	else if (type & tlckDTREE) {
+		dtpage_t *p, *xp;
+
+		/*
+		 * copy dtree root from inode to dinode:
+		 */
+		p = (dtpage_t *) &jfs_ip->i_dtroot;
+		xp = (dtpage_t *) & dp->di_dtroot;
+		lv = (lv_t *) & ilinelock->lv;
+		for (n = 0; n < ilinelock->index; n++, lv++) {
+			memcpy(&xp->slot[lv->offset], &p->slot[lv->offset],
+			       lv->length << L2DTSLOTSIZE);
+		}
+	} else {
+		jERROR(1, ("diWrite: UFO tlock\n"));
+	}
+
+      inlineData:
+	/*
+	 * copy inline symlink from in-memory inode to on-disk inode
+	 */
+	if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) {
+		lv = (lv_t *) & dilinelock->lv[dilinelock->index];
+		lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE;
+		lv->length = 2;
+		memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE);
+		dilinelock->index++;
+	}
+#ifdef _STILL_TO_PORT
+	/*
+	 * copy inline data from in-memory inode to on-disk inode:
+	 * 128 byte slot granularity
+	 */
+	if (test_cflag(COMMIT_Inlineea, ip))
+		lv = (lv_t *) & dilinelock->lv[dilinelock->index];
+		lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE;
+		lv->length = 1;
+		memcpy(&dp->di_inlineea, &ip->i_inlineea, INODESLOTSIZE);
+		dilinelock->index++;
+
+		clear_cflag(COMMIT_Inlineea, ip);
+	}
+#endif				/* _STILL_TO_PORT */
+
+	/*
+	 *      lock/copy inode base: 128 byte slot granularity
+	 */
+// baseDinode:
+	lv = (lv_t *) & dilinelock->lv[dilinelock->index];
+	lv->offset = dioffset >> L2INODESLOTSIZE;
+	copy_to_dinode(dp, ip);
+	if (test_and_clear_cflag(COMMIT_Dirtable, ip)) {
+		lv->length = 2;
+		memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96);
+	} else
+		lv->length = 1;
+	dilinelock->index++;
+
+#ifdef _JFS_FASTDASD
+	/*
+	 * We aren't logging changes to the DASD used in directory inodes,
+	 * but we need to write them to disk.  If we don't unmount cleanly,
+	 * mount will recalculate the DASD used.
+	 */
+	if (S_ISDIR(ip->i_mode)
+	    && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED))
+		bcopy(&ip->i_DASD, &dp->di_DASD, sizeof(dasd_t));
+#endif				/*  _JFS_FASTDASD */
+
+	/* release the buffer holding the updated on-disk inode. 
+	 * the buffer will be later written by commit processing.
+	 */
+	write_metapage(mp);
+
+	return (rc);
+}
+
+
+/*
+ * NAME:        diFree(ip)
+ *
+ * FUNCTION:    free a specified inode from the inode working map
+ *		for a fileset or aggregate.
+ *
+ *		if the inode to be freed represents the first (only)
+ *		free inode within the iag, the iag will be placed on
+ *		the ag free inode list.
+ *	
+ *		freeing the inode will cause the inode extent to be
+ *		freed if the inode is the only allocated inode within
+ *		the extent.  in this case all the disk resource backing
+ *		up the inode extent will be freed. in addition, the iag
+ *		will be placed on the ag extent free list if the extent
+ *		is the first free extent in the iag.  if freeing the
+ *		extent also means that no free inodes will exist for
+ *		the iag, the iag will also be removed from the ag free
+ *		inode list.
+ *
+ *		the iag describing the inode will be freed if the extent
+ *		is to be freed and it is the only backed extent within
+ *		the iag.  in this case, the iag will be removed from the
+ *		ag free extent list and ag free inode list and placed on
+ *		the inode map's free iag list.
+ *
+ *		a careful update approach is used to provide consistency
+ *		in the face of updates to multiple buffers.  under this
+ *		approach, all required buffers are obtained before making
+ *		any updates and are held until all updates are complete.
+ *
+ * PARAMETERS:
+ *      ip  	- inode to be freed.
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      EIO  	- i/o error.
+ */
+int diFree(struct inode *ip)
+{
+	int rc;
+	ino_t inum = ip->i_ino;
+	iag_t *iagp, *aiagp, *biagp, *ciagp, *diagp;
+	metapage_t *mp, *amp, *bmp, *cmp, *dmp;
+	int iagno, ino, extno, bitno, sword, agno;
+	int back, fwd;
+	u32 bitmap, mask;
+	struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap;
+	imap_t *imap = JFS_IP(ipimap)->i_imap;
+	s64 xaddr;
+	s64 xlen;
+	pxd_t freepxd;
+	tid_t tid;
+	struct inode *iplist[3];
+	tlock_t *tlck;
+	pxdlock_t *pxdlock;
+
+	/*
+	 * This is just to suppress compiler warnings.  The same logic that
+	 * references these variables is used to initialize them.
+	 */
+	aiagp = biagp = ciagp = diagp = NULL;
+
+	/* get the iag number containing the inode.
+	 */
+	iagno = INOTOIAG(inum);
+
+	/* make sure that the iag is contained within 
+	 * the map.
+	 */
+	//assert(iagno < imap->im_nextiag);
+	if (iagno >= imap->im_nextiag) {
+		jERROR(1, ("diFree: inum = %d, iagno = %d, nextiag = %d\n",
+			   (uint) inum, iagno, imap->im_nextiag));
+		dump_mem("imap", imap, 32);
+		updateSuper(ip->i_sb, FM_DIRTY);
+		return EIO;
+	}
+
+	/* get the allocation group for this ino.
+	 */
+	agno = JFS_IP(ip)->agno;
+
+	/* Lock the AG specific inode map information
+	 */
+	AG_LOCK(imap, agno);
+
+	/* Obtain read lock in imap inode.  Don't release it until we have
+	 * read all of the IAG's that we are going to.
+	 */
+	IREAD_LOCK(ipimap);
+
+	/* read the iag.
+	 */
+	if ((rc = diIAGRead(imap, iagno, &mp))) {
+		IREAD_UNLOCK(ipimap);
+		AG_UNLOCK(imap, agno);
+		return (rc);
+	}
+	iagp = (iag_t *) mp->data;
+
+	/* get the inode number and extent number of the inode within
+	 * the iag and the inode number within the extent.
+	 */
+	ino = inum & (INOSPERIAG - 1);
+	extno = ino >> L2INOSPEREXT;
+	bitno = ino & (INOSPEREXT - 1);
+	mask = HIGHORDER >> bitno;
+
+	assert(le32_to_cpu(iagp->wmap[extno]) & mask);
+#ifdef _STILL_TO_PORT
+	assert((le32_to_cpu(iagp->pmap[extno]) & mask) == 0);
+#endif				/*  _STILL_TO_PORT */
+	assert(addressPXD(&iagp->inoext[extno]));
+
+	/* compute the bitmap for the extent reflecting the freed inode.
+	 */
+	bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask;
+
+	if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) {
+		jERROR(1,("diFree: numfree > numinos\n"));
+		release_metapage(mp);
+		IREAD_UNLOCK(ipimap);
+		AG_UNLOCK(imap, agno);
+		updateSuper(ip->i_sb, FM_DIRTY);
+		return EIO;
+	}
+	/*
+	 *      inode extent still has some inodes or below low water mark:
+	 *      keep the inode extent;
+	 */
+	if (bitmap ||
+	    imap->im_agctl[agno].numfree < 96 ||
+	    (imap->im_agctl[agno].numfree < 288 &&
+	     (((imap->im_agctl[agno].numfree * 100) /
+	       imap->im_agctl[agno].numinos) <= 25))) {
+		/* if the iag currently has no free inodes (i.e.,
+		 * the inode being freed is the first free inode of iag),
+		 * insert the iag at head of the inode free list for the ag.
+		 */
+		if (iagp->nfreeinos == 0) {
+			/* check if there are any iags on the ag inode
+			 * free list.  if so, read the first one so that
+			 * we can link the current iag onto the list at
+			 * the head.
+			 */
+			if ((fwd = imap->im_agctl[agno].inofree) >= 0) {
+				/* read the iag that currently is the head
+				 * of the list.
+				 */
+				if ((rc = diIAGRead(imap, fwd, &amp))) {
+					IREAD_UNLOCK(ipimap);
+					AG_UNLOCK(imap, agno);
+					release_metapage(mp);
+					return (rc);
+				}
+				aiagp = (iag_t *) amp->data;
+
+				/* make current head point back to the iag.
+				 */
+				aiagp->inofreeback = cpu_to_le32(iagno);
+
+				write_metapage(amp);
+			}
+
+			/* iag points forward to current head and iag
+			 * becomes the new head of the list.
+			 */
+			iagp->inofreefwd =
+			    cpu_to_le32(imap->im_agctl[agno].inofree);
+			iagp->inofreeback = -1;
+			imap->im_agctl[agno].inofree = iagno;
+		}
+		IREAD_UNLOCK(ipimap);
+
+		/* update the free inode summary map for the extent if
+		 * freeing the inode means the extent will now have free
+		 * inodes (i.e., the inode being freed is the first free 
+		 * inode of extent),
+		 */
+		if (iagp->wmap[extno] == ONES) {
+			sword = extno >> L2EXTSPERSUM;
+			bitno = extno & (EXTSPERSUM - 1);
+			iagp->inosmap[sword] &=
+			    cpu_to_le32(~(HIGHORDER >> bitno));
+		}
+
+		/* update the bitmap.
+		 */
+		iagp->wmap[extno] = cpu_to_le32(bitmap);
+		DBG_DIFREE(imap, inum);
+
+		/* update the free inode counts at the iag, ag and
+		 * map level.
+		 */
+		iagp->nfreeinos =
+		    cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 1);
+		imap->im_agctl[agno].numfree += 1;
+		atomic_inc(&imap->im_numfree);
+
+		/* release the AG inode map lock
+		 */
+		AG_UNLOCK(imap, agno);
+
+		/* write the iag */
+		write_metapage(mp);
+
+		return (0);
+	}
+
+
+	/*
+	 *      inode extent has become free and above low water mark:
+	 *      free the inode extent;
+	 */
+
+	/*
+	 *      prepare to update iag list(s) (careful update step 1)
+	 */
+	amp = bmp = cmp = dmp = NULL;
+	fwd = back = -1;
+
+	/* check if the iag currently has no free extents.  if so,
+	 * it will be placed on the head of the ag extent free list.
+	 */
+	if (iagp->nfreeexts == 0) {
+		/* check if the ag extent free list has any iags.
+		 * if so, read the iag at the head of the list now.
+		 * this (head) iag will be updated later to reflect
+		 * the addition of the current iag at the head of
+		 * the list.
+		 */
+		if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
+			if ((rc = diIAGRead(imap, fwd, &amp)))
+				goto error_out;
+			aiagp = (iag_t *) amp->data;
+		}
+	} else {
+		/* iag has free extents. check if the addition of a free
+		 * extent will cause all extents to be free within this
+		 * iag.  if so, the iag will be removed from the ag extent
+		 * free list and placed on the inode map's free iag list.
+		 */
+		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
+			/* in preparation for removing the iag from the
+			 * ag extent free list, read the iags preceeding
+			 * and following the iag on the ag extent free
+			 * list.
+			 */
+			if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
+				if ((rc = diIAGRead(imap, fwd, &amp)))
+					goto error_out;
+				aiagp = (iag_t *) amp->data;
+			}
+
+			if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
+				if ((rc = diIAGRead(imap, back, &bmp)))
+					goto error_out;
+				biagp = (iag_t *) bmp->data;
+			}
+		}
+	}
+
+	/* remove the iag from the ag inode free list if freeing
+	 * this extent cause the iag to have no free inodes.
+	 */
+	if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
+		int inofreeback = le32_to_cpu(iagp->inofreeback);
+		int inofreefwd = le32_to_cpu(iagp->inofreefwd);
+
+		/* in preparation for removing the iag from the
+		 * ag inode free list, read the iags preceeding
+		 * and following the iag on the ag inode free
+		 * list.  before reading these iags, we must make
+		 * sure that we already don't have them in hand
+		 * from up above, since re-reading an iag (buffer)
+		 * we are currently holding would cause a deadlock.
+		 */
+		if (inofreefwd >= 0) {
+
+			if (inofreefwd == fwd)
+				ciagp = (iag_t *) amp->data;
+			else if (inofreefwd == back)
+				ciagp = (iag_t *) bmp->data;
+			else {
+				if ((rc =
+				     diIAGRead(imap, inofreefwd, &cmp)))
+					goto error_out;
+				assert(cmp != NULL);
+				ciagp = (iag_t *) cmp->data;
+			}
+			assert(ciagp != NULL);
+		}
+
+		if (inofreeback >= 0) {
+			if (inofreeback == fwd)
+				diagp = (iag_t *) amp->data;
+			else if (inofreeback == back)
+				diagp = (iag_t *) bmp->data;
+			else {
+				if ((rc =
+				     diIAGRead(imap, inofreeback, &dmp)))
+					goto error_out;
+				assert(dmp != NULL);
+				diagp = (iag_t *) dmp->data;
+			}
+			assert(diagp != NULL);
+		}
+	}
+
+	IREAD_UNLOCK(ipimap);
+
+	/*
+	 * invalidate any page of the inode extent freed from buffer cache;
+	 */
+	freepxd = iagp->inoext[extno];
+	xaddr = addressPXD(&iagp->inoext[extno]);
+	xlen = lengthPXD(&iagp->inoext[extno]);
+	invalidate_metapages(JFS_SBI(ip->i_sb)->direct_inode, xaddr, xlen);
+
+	/*
+	 *      update iag list(s) (careful update step 2)
+	 */
+	/* add the iag to the ag extent free list if this is the
+	 * first free extent for the iag.
+	 */
+	if (iagp->nfreeexts == 0) {
+		if (fwd >= 0)
+			aiagp->extfreeback = cpu_to_le32(iagno);
+
+		iagp->extfreefwd =
+		    cpu_to_le32(imap->im_agctl[agno].extfree);
+		iagp->extfreeback = -1;
+		imap->im_agctl[agno].extfree = iagno;
+	} else {
+		/* remove the iag from the ag extent list if all extents
+		 * are now free and place it on the inode map iag free list.
+		 */
+		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
+			if (fwd >= 0)
+				aiagp->extfreeback = iagp->extfreeback;
+
+			if (back >= 0)
+				biagp->extfreefwd = iagp->extfreefwd;
+			else
+				imap->im_agctl[agno].extfree =
+				    le32_to_cpu(iagp->extfreefwd);
+
+			iagp->extfreefwd = iagp->extfreeback = -1;
+
+			IAGFREE_LOCK(imap);
+			iagp->iagfree = cpu_to_le32(imap->im_freeiag);
+			imap->im_freeiag = iagno;
+			IAGFREE_UNLOCK(imap);
+		}
+	}
+
+	/* remove the iag from the ag inode free list if freeing
+	 * this extent causes the iag to have no free inodes.
+	 */
+	if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
+		if ((int) le32_to_cpu(iagp->inofreefwd) >= 0)
+			ciagp->inofreeback = iagp->inofreeback;
+
+		if ((int) le32_to_cpu(iagp->inofreeback) >= 0)
+			diagp->inofreefwd = iagp->inofreefwd;
+		else
+			imap->im_agctl[agno].inofree =
+			    le32_to_cpu(iagp->inofreefwd);
+
+		iagp->inofreefwd = iagp->inofreeback = -1;
+	}
+
+	/* update the inode extent address and working map 
+	 * to reflect the free extent.
+	 * the permanent map should have been updated already 
+	 * for the inode being freed.
+	 */
+	assert(iagp->pmap[extno] == 0);
+	iagp->wmap[extno] = 0;
+	DBG_DIFREE(imap, inum);
+	PXDlength(&iagp->inoext[extno], 0);
+	PXDaddress(&iagp->inoext[extno], 0);
+
+	/* update the free extent and free inode summary maps
+	 * to reflect the freed extent.
+	 * the inode summary map is marked to indicate no inodes 
+	 * available for the freed extent.
+	 */
+	sword = extno >> L2EXTSPERSUM;
+	bitno = extno & (EXTSPERSUM - 1);
+	mask = HIGHORDER >> bitno;
+	iagp->inosmap[sword] |= cpu_to_le32(mask);
+	iagp->extsmap[sword] &= cpu_to_le32(~mask);
+
+	/* update the number of free inodes and number of free extents
+	 * for the iag.
+	 */
+	iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) -
+				      (INOSPEREXT - 1));
+	iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) + 1);
+
+	/* update the number of free inodes and backed inodes
+	 * at the ag and inode map level.
+	 */
+	imap->im_agctl[agno].numfree -= (INOSPEREXT - 1);
+	imap->im_agctl[agno].numinos -= INOSPEREXT;
+	atomic_sub(INOSPEREXT - 1, &imap->im_numfree);
+	atomic_sub(INOSPEREXT, &imap->im_numinos);
+
+	if (amp)
+		write_metapage(amp);
+	if (bmp)
+		write_metapage(bmp);
+	if (cmp)
+		write_metapage(cmp);
+	if (dmp)
+		write_metapage(dmp);
+
+	/*
+	 * start transaction to update block allocation map
+	 * for the inode extent freed;
+	 *
+	 * N.B. AG_LOCK is released and iag will be released below, and 
+	 * other thread may allocate inode from/reusing the ixad freed
+	 * BUT with new/different backing inode extent from the extent 
+	 * to be freed by the transaction;  
+	 */
+	tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
+
+	/* acquire tlock of the iag page of the freed ixad 
+	 * to force the page NOHOMEOK (even though no data is
+	 * logged from the iag page) until NOREDOPAGE|FREEXTENT log 
+	 * for the free of the extent is committed;
+	 * write FREEXTENT|NOREDOPAGE log record
+	 * N.B. linelock is overlaid as freed extent descriptor;
+	 */
+	tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE);
+	pxdlock = (pxdlock_t *) & tlck->lock;
+	pxdlock->flag = mlckFREEPXD;
+	pxdlock->pxd = freepxd;
+	pxdlock->index = 1;
+
+	write_metapage(mp);
+
+	iplist[0] = ipimap;
+
+	/*
+	 * logredo needs the IAG number and IAG extent index in order
+	 * to ensure that the IMap is consistent.  The least disruptive
+	 * way to pass these values through  to the transaction manager
+	 * is in the iplist array.  
+	 * 
+	 * It's not pretty, but it works.
+	 */
+	iplist[1] = (struct inode *) (size_t)iagno;
+	iplist[2] = (struct inode *) (size_t)extno;
+
+	rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);	// D233382
+
+	txEnd(tid);
+
+	/* unlock the AG inode map information */
+	AG_UNLOCK(imap, agno);
+
+	return (0);
+
+      error_out:
+	IREAD_UNLOCK(ipimap);
+
+	if (amp)
+		release_metapage(amp);
+	if (bmp)
+		release_metapage(bmp);
+	if (cmp)
+		release_metapage(cmp);
+	if (dmp)
+		release_metapage(dmp);
+
+	AG_UNLOCK(imap, agno);
+
+	release_metapage(mp);
+
+	return (rc);
+}
+
+/*
+ * There are several places in the diAlloc* routines where we initialize
+ * the inode.
+ */
+static inline void
+diInitInode(struct inode *ip, int iagno, int ino, int extno, iag_t * iagp)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
+	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+
+	ip->i_ino = (iagno << L2INOSPERIAG) + ino;
+	DBG_DIALLOC(JFS_IP(ipimap)->i_imap, ip->i_ino);
+	jfs_ip->ixpxd = iagp->inoext[extno];
+	jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
+}
+
+
+/*
+ * NAME:        diAlloc(pip,dir,ip)
+ *
+ * FUNCTION:    allocate a disk inode from the inode working map 
+ *		for a fileset or aggregate.
+ *
+ * PARAMETERS:
+ *      pip  	- pointer to incore inode for the parent inode.
+ *      dir  	- TRUE if the new disk inode is for a directory.
+ *      ip  	- pointer to a new inode
+ *
+ * RETURN VALUES:
+ *      0       - success.
+ *      ENOSPC 	- insufficient disk resources.
+ *      EIO  	- i/o error.
+ */
+int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip)
+{
+	int rc, ino, iagno, addext, extno, bitno, sword;
+	int nwords, rem, i, agno;
+	u32 mask, inosmap, extsmap;
+	struct inode *ipimap;
+	metapage_t *mp;
+	ino_t inum;
+	iag_t *iagp;
+	imap_t *imap;
+
+	/* get the pointers to the inode map inode and the
+	 * corresponding imap control structure.
+	 */
+	ipimap = JFS_SBI(pip->i_sb)->ipimap;
+	imap = JFS_IP(ipimap)->i_imap;
+	JFS_IP(ip)->ipimap = ipimap;
+	JFS_IP(ip)->fileset = FILESYSTEM_I;
+
+	/* for a directory, the allocation policy is to start 
+	 * at the ag level using the preferred ag.
+	 */
+	if (dir == TRUE) {
+		agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
+		AG_LOCK(imap, agno);
+		goto tryag;
+	}
+
+	/* for files, the policy starts off by trying to allocate from
+	 * the same iag containing the parent disk inode:
+	 * try to allocate the new disk inode close to the parent disk
+	 * inode, using parent disk inode number + 1 as the allocation
+	 * hint.  (we use a left-to-right policy to attempt to avoid
+	 * moving backward on the disk.)  compute the hint within the
+	 * file system and the iag.
+	 */
+	inum = pip->i_ino + 1;
+	ino = inum & (INOSPERIAG - 1);
+
+	/* back off the the hint if it is outside of the iag */
+	if (ino == 0)
+		inum = pip->i_ino;
+
+	/* get the ag number of this iag */
+	agno = JFS_IP(pip)->agno;
+
+	/* lock the AG inode map information */
+	AG_LOCK(imap, agno);
+
+	/* Get read lock on imap inode */
+	IREAD_LOCK(ipimap);
+
+	/* get the iag number and read the iag */
+	iagno = INOTOIAG(inum);
+	if ((rc = diIAGRead(imap, iagno, &mp))) {
+		IREAD_UNLOCK(ipimap);
+		return (rc);
+	}
+	iagp = (iag_t *) mp->data;
+
+	/* determine if new inode extent is allowed to be added to the iag.
+	 * new inode extent can be added to the iag if the ag
+	 * has less than 32 free disk inodes and the iag has free extents.
+	 */
+	addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
+
+	/*
+	 *      try to allocate from the IAG
+	 */
+	/* check if the inode may be allocated from the iag 
+	 * (i.e. the inode has free inodes or new extent can be added).
+	 */
+	if (iagp->nfreeinos || addext) {
+		/* determine the extent number of the hint.
+		 */
+		extno = ino >> L2INOSPEREXT;
+
+		/* check if the extent containing the hint has backed
+		 * inodes.  if so, try to allocate within this extent.
+		 */
+		if (addressPXD(&iagp->inoext[extno])) {
+			bitno = ino & (INOSPEREXT - 1);
+			if ((bitno =
+			     diFindFree(le32_to_cpu(iagp->wmap[extno]),
+					bitno))
+			    < INOSPEREXT) {
+				ino = (extno << L2INOSPEREXT) + bitno;
+
+				/* a free inode (bit) was found within this
+				 * extent, so allocate it.
+				 */
+				rc = diAllocBit(imap, iagp, ino);
+				IREAD_UNLOCK(ipimap);
+				if (rc) {
+					assert(rc == EIO);
+				} else {
+					/* set the results of the allocation
+					 * and write the iag.
+					 */
+					diInitInode(ip, iagno, ino, extno,
+						    iagp);
+					mark_metapage_dirty(mp);
+				}
+				release_metapage(mp);
+
+				/* free the AG lock and return.
+				 */
+				AG_UNLOCK(imap, agno);
+				return (rc);
+			}
+
+			if (!addext)
+				extno =
+				    (extno ==
+				     EXTSPERIAG - 1) ? 0 : extno + 1;
+		}
+
+		/*
+		 * no free inodes within the extent containing the hint.
+		 *
+		 * try to allocate from the backed extents following
+		 * hint or, if appropriate (i.e. addext is true), allocate
+		 * an extent of free inodes at or following the extent
+		 * containing the hint.
+		 * 
+		 * the free inode and free extent summary maps are used
+		 * here, so determine the starting summary map position
+		 * and the number of words we'll have to examine.  again,
+		 * the approach is to allocate following the hint, so we
+		 * might have to initially ignore prior bits of the summary
+		 * map that represent extents prior to the extent containing
+		 * the hint and later revisit these bits.
+		 */
+		bitno = extno & (EXTSPERSUM - 1);
+		nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1;
+		sword = extno >> L2EXTSPERSUM;
+
+		/* mask any prior bits for the starting words of the
+		 * summary map.
+		 */
+		mask = ONES << (EXTSPERSUM - bitno);
+		inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask;
+		extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask;
+
+		/* scan the free inode and free extent summary maps for
+		 * free resources.
+		 */
+		for (i = 0; i < nwords; i++) {
+			/* check if this word of the free inode summary
+			 * map describes an extent with free inodes.
+			 */
+			if (~inosmap) {
+				/* an extent with free inodes has been
+				 * found. determine the extent number
+				 * and the inode number within the extent.
+				 */
+				rem = diFindFree(inosmap, 0);
+				extno = (sword << L2EXTSPERSUM) + rem;
+				rem =
+				    diFindFree(le32_to_cpu
+					       (iagp->wmap[extno]), 0);
+				assert(rem < INOSPEREXT);
+
+				/* determine the inode number within the
+				 * iag and allocate the inode from the
+				 * map.
+				 */
+				ino = (extno << L2INOSPEREXT) + rem;
+				rc = diAllocBit(imap, iagp, ino);
+				IREAD_UNLOCK(ipimap);
+				if (rc) {
+					assert(rc == EIO);
+				} else {
+					/* set the results of the allocation
+					 * and write the iag.
+					 */
+					diInitInode(ip, iagno, ino, extno,
+						    iagp);
+					mark_metapage_dirty(mp);
+				}
+				release_metapage(mp);
+
+				/* free the AG lock and return.
+				 */
+				AG_UNLOCK(imap, agno);
+				return (rc);
+
+			}
+
+			/* check if we may allocate an extent of free
+			 * inodes and whether this word of the free
+			 * extents summary map describes a free extent.
+			 */
+			if (addext && ~extsmap) {
+				/* a free extent has been found.  determine
+				 * the extent number.
+				 */
+				rem = diFindFree(extsmap, 0);
+				extno = (sword << L2EXTSPERSUM) + rem;
+
+				/* allocate an extent of free inodes.
+				 */
+				if ((rc = diNewExt(imap, iagp, extno))) {
+					/* if there is no disk space for a
+					 * new extent, try to allocate the
+					 * disk inode from somewhere else.
+					 */
+					if (rc == ENOSPC)
+						break;
+
+					assert(rc == EIO);
+				} else {
+					/* set the results of the allocation
+					 * and write the iag.
+					 */
+					diInitInode(ip, iagno,
+						    extno << L2INOSPEREXT,
+						    extno, iagp);
+					mark_metapage_dirty(mp);
+				}
+				release_metapage(mp);
+				/* free the imap inode & the AG lock & return.
+				 */
+				IREAD_UNLOCK(ipimap);
+				AG_UNLOCK(imap, agno);
+				return (rc);
+			}
+
+			/* move on to the next set of summary map words.
+			 */
+			sword = (sword == SMAPSZ - 1) ? 0 : sword + 1;
+			inosmap = le32_to_cpu(iagp->inosmap[sword]);
+			extsmap = le32_to_cpu(iagp->extsmap[sword]);
+		}
+	}
+	/* unlock imap inode */
+	IREAD_UNLOCK(ipimap);
+
+	/* nothing doing in this iag, so release it. */
+	release_metapage(mp);
+
+      tryag:
+	/*
+	 * try to allocate anywhere within the same AG as the parent inode.
+	 */
+	rc = diAllocAG(imap, agno, dir, ip);
+
+	AG_UNLOCK(imap, agno);
+
+	if (rc != ENOSPC)
+		return (rc);
+
+	/*
+	 * try to allocate in any AG.
+	 */
+	return (diAllocAny(imap, agno, dir, ip));
+}
+
+
+/*
+ * NAME:        diAllocAG(imap,agno,dir,ip)
+ *
+ * FUNCTION:    allocate a disk inode from the allocation group.
+ *
+ *		this routine first determines if a new extent of free
+ *		inodes should be added for the allocation group, with
+ *		the current request satisfied from this extent. if this
+ *		is the case, an attempt will be made to do just that.  if
+ *		this attempt fails or it has been determined that a new 
+ *		extent should not be added, an attempt is made to satisfy
+ *		the request by allocating an existing (backed) free inode
+ *		from the allocation group.
+ *
+ * PRE CONDITION: Already have the AG lock for this AG.
+ *
+ * PARAMETERS:
+ *      imap  	- pointer to inode map control structure.
+ *      agno  	- allocation group to allocate from.
+ *      dir  	- TRUE if the new disk inode is for a directory.
+ *      ip  	- pointer to the new inode to be filled in on successful return
+ *		  with the disk inode number allocated, its extent address
+ *		  and the start of the ag.
+ *
+ * RETURN VALUES:
+ *      0       - success.
+ *      ENOSPC 	- insufficient disk resources.
+ *      EIO  	- i/o error.
+ */
+static int
+diAllocAG(imap_t * imap, int agno, boolean_t dir, struct inode *ip)
+{
+	int rc, addext, numfree, numinos;
+
+	/* get the number of free and the number of backed disk 
+	 * inodes currently within the ag.
+	 */
+	numfree = imap->im_agctl[agno].numfree;
+	numinos = imap->im_agctl[agno].numinos;
+
+	if (numfree > numinos) {
+		jERROR(1,("diAllocAG: numfree > numinos\n"));
+		updateSuper(ip->i_sb, FM_DIRTY);
+		return EIO;
+	}
+
+	/* determine if we should allocate a new extent of free inodes
+	 * within the ag: for directory inodes, add a new extent
+	 * if there are a small number of free inodes or number of free
+	 * inodes is a small percentage of the number of backed inodes.
+	 */
+	if (dir == TRUE)
+		addext = (numfree < 64 ||
+			  (numfree < 256
+			   && ((numfree * 100) / numinos) <= 20));
+	else
+		addext = (numfree == 0);
+
+	/*
+	 * try to allocate a new extent of free inodes.
+	 */
+	if (addext) {
+		/* if free space is not avaliable for this new extent, try
+		 * below to allocate a free and existing (already backed)
+		 * inode from the ag.
+		 */
+		if ((rc = diAllocExt(imap, agno, ip)) != ENOSPC)
+			return (rc);
+	}
+
+	/*
+	 * try to allocate an existing free inode from the ag.
+	 */
+	return (diAllocIno(imap, agno, ip));
+}
+
+
+/*
+ * NAME:        diAllocAny(imap,agno,dir,iap)
+ *
+ * FUNCTION:    allocate a disk inode from any other allocation group.
+ *
+ *		this routine is called when an allocation attempt within
+ *		the primary allocation group has failed. if attempts to
+ *		allocate an inode from any allocation group other than the
+ *		specified primary group.
+ *
+ * PARAMETERS:
+ *      imap  	- pointer to inode map control structure.
+ *      agno  	- primary allocation group (to avoid).
+ *      dir  	- TRUE if the new disk inode is for a directory.
+ *      ip  	- pointer to a new inode to be filled in on successful return
+ *		  with the disk inode number allocated, its extent address
+ *		  and the start of the ag.
+ *
+ * RETURN VALUES:
+ *      0       - success.
+ *      ENOSPC 	- insufficient disk resources.
+ *      EIO  	- i/o error.
+ */
+static int
+diAllocAny(imap_t * imap, int agno, boolean_t dir, struct inode *ip)
+{
+	int ag, rc;
+	int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag;
+
+
+	/* try to allocate from the ags following agno up to 
+	 * the maximum ag number.
+	 */
+	for (ag = agno + 1; ag <= maxag; ag++) {
+		AG_LOCK(imap, ag);
+
+		rc = diAllocAG(imap, ag, dir, ip);
+
+		AG_UNLOCK(imap, ag);
+
+		if (rc != ENOSPC)
+			return (rc);
+	}
+
+	/* try to allocate from the ags in front of agno.
+	 */
+	for (ag = 0; ag < agno; ag++) {
+		AG_LOCK(imap, ag);
+
+		rc = diAllocAG(imap, ag, dir, ip);
+
+		AG_UNLOCK(imap, ag);
+
+		if (rc != ENOSPC)
+			return (rc);
+	}
+
+	/* no free disk inodes.
+	 */
+	return (ENOSPC);
+}
+
+
+/*
+ * NAME:        diAllocIno(imap,agno,ip)
+ *
+ * FUNCTION:    allocate a disk inode from the allocation group's free
+ *		inode list, returning an error if this free list is
+ *		empty (i.e. no iags on the list).
+ *
+ *		allocation occurs from the first iag on the list using
+ *		the iag's free inode summary map to find the leftmost
+ *		free inode in the iag. 
+ *		
+ * PRE CONDITION: Already have AG lock for this AG.
+ *		
+ * PARAMETERS:
+ *      imap  	- pointer to inode map control structure.
+ *      agno  	- allocation group.
+ *      ip  	- pointer to new inode to be filled in on successful return
+ *		  with the disk inode number allocated, its extent address
+ *		  and the start of the ag.
+ *
+ * RETURN VALUES:
+ *      0       - success.
+ *      ENOSPC 	- insufficient disk resources.
+ *      EIO  	- i/o error.
+ */
+static int diAllocIno(imap_t * imap, int agno, struct inode *ip)
+{
+	int iagno, ino, rc, rem, extno, sword;
+	metapage_t *mp;
+	iag_t *iagp;
+
+	/* check if there are iags on the ag's free inode list.
+	 */
+	if ((iagno = imap->im_agctl[agno].inofree) < 0)
+		return (ENOSPC);
+
+	/* obtain read lock on imap inode */
+	IREAD_LOCK(imap->im_ipimap);
+
+	/* read the iag at the head of the list.
+	 */
+	if ((rc = diIAGRead(imap, iagno, &mp))) {
+		IREAD_UNLOCK(imap->im_ipimap);
+		return (rc);
+	}
+	iagp = (iag_t *) mp->data;
+
+	/* better be free inodes in this iag if it is on the
+	 * list.
+	 */
+	//assert(iagp->nfreeinos);
+	if (!iagp->nfreeinos) {
+		jERROR(1,
+		       ("diAllocIno: nfreeinos = 0, but iag on freelist\n"));
+		jERROR(1, ("  agno = %d, iagno = %d\n", agno, iagno));
+		dump_mem("iag", iagp, 64);
+		updateSuper(ip->i_sb, FM_DIRTY);
+		return EIO;
+	}
+
+	/* scan the free inode summary map to find an extent
+	 * with free inodes.
+	 */
+	for (sword = 0;; sword++) {
+		assert(sword < SMAPSZ);
+
+		if (~iagp->inosmap[sword])
+			break;
+	}
+
+	/* found a extent with free inodes. determine
+	 * the extent number.
+	 */
+	rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0);
+	assert(rem < EXTSPERSUM);
+	extno = (sword << L2EXTSPERSUM) + rem;
+
+	/* find the first free inode in the extent.
+	 */
+	rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0);
+	assert(rem < INOSPEREXT);
+
+	/* compute the inode number within the iag. 
+	 */
+	ino = (extno << L2INOSPEREXT) + rem;
+
+	/* allocate the inode.
+	 */
+	rc = diAllocBit(imap, iagp, ino);
+	IREAD_UNLOCK(imap->im_ipimap);
+	if (rc) {
+		release_metapage(mp);
+		return (rc);
+	}
+
+	/* set the results of the allocation and write the iag.
+	 */
+	diInitInode(ip, iagno, ino, extno, iagp);
+	write_metapage(mp);
+
+	return (0);
+}
+
+
+/*
+ * NAME:        diAllocExt(imap,agno,ip)
+ *
+ * FUNCTION:   	add a new extent of free inodes to an iag, allocating
+ *	       	an inode from this extent to satisfy the current allocation
+ *	       	request.
+ *		
+ *		this routine first tries to find an existing iag with free
+ *		extents through the ag free extent list.  if list is not
+ *		empty, the head of the list will be selected as the home
+ *		of the new extent of free inodes.  otherwise (the list is
+ *		empty), a new iag will be allocated for the ag to contain
+ *		the extent.
+ *		
+ *		once an iag has been selected, the free extent summary map
+ *		is used to locate a free extent within the iag and diNewExt()
+ *		is called to initialize the extent, with initialization
+ *		including the allocation of the first inode of the extent
+ *		for the purpose of satisfying this request.
+ *
+ * PARAMETERS:
+ *      imap  	- pointer to inode map control structure.
+ *      agno  	- allocation group number.
+ *      ip  	- pointer to new inode to be filled in on successful return
+ *		  with the disk inode number allocated, its extent address
+ *		  and the start of the ag.
+ *
+ * RETURN VALUES:
+ *      0       - success.
+ *      ENOSPC 	- insufficient disk resources.
+ *      EIO  	- i/o error.
+ */
+static int diAllocExt(imap_t * imap, int agno, struct inode *ip)
+{
+	int rem, iagno, sword, extno, rc;
+	metapage_t *mp;
+	iag_t *iagp;
+
+	/* check if the ag has any iags with free extents.  if not,
+	 * allocate a new iag for the ag.
+	 */
+	if ((iagno = imap->im_agctl[agno].extfree) < 0) {
+		/* If successful, diNewIAG will obtain the read lock on the
+		 * imap inode.
+		 */
+		if ((rc = diNewIAG(imap, &iagno, agno, &mp))) {
+			return (rc);
+		}
+		iagp = (iag_t *) mp->data;
+
+		/* set the ag number if this a brand new iag
+		 */
+		iagp->agstart =
+		    cpu_to_le64(AGTOBLK(agno, imap->im_ipimap));
+	} else {
+		/* read the iag.
+		 */
+		IREAD_LOCK(imap->im_ipimap);
+		if ((rc = diIAGRead(imap, iagno, &mp))) {
+			assert(0);
+		}
+		iagp = (iag_t *) mp->data;
+	}
+
+	/* using the free extent summary map, find a free extent.
+	 */
+	for (sword = 0;; sword++) {
+		assert(sword < SMAPSZ);
+		if (~iagp->extsmap[sword])
+			break;
+	}
+
+	/* determine the extent number of the free extent.
+	 */
+	rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0);
+	assert(rem < EXTSPERSUM);
+	extno = (sword << L2EXTSPERSUM) + rem;
+
+	/* initialize the new extent.
+	 */
+	rc = diNewExt(imap, iagp, extno);
+	IREAD_UNLOCK(imap->im_ipimap);
+	if (rc) {
+		/* something bad happened.  if a new iag was allocated,
+		 * place it back on the inode map's iag free list, and
+		 * clear the ag number information.
+		 */
+		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
+			IAGFREE_LOCK(imap);
+			iagp->iagfree = cpu_to_le32(imap->im_freeiag);
+			imap->im_freeiag = iagno;
+			IAGFREE_UNLOCK(imap);
+		}
+		write_metapage(mp);
+		return (rc);
+	}
+
+	/* set the results of the allocation and write the iag.
+	 */
+	diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp);
+
+	write_metapage(mp);
+
+	return (0);
+}
+
+
+/*
+ * NAME:        diAllocBit(imap,iagp,ino)
+ *
+ * FUNCTION:   	allocate a backed inode from an iag.
+ *
+ *		this routine performs the mechanics of allocating a
+ *		specified inode from a backed extent.
+ *
+ *		if the inode to be allocated represents the last free
+ *		inode within the iag, the iag will be removed from the
+ *		ag free inode list.
+ *
+ *		a careful update approach is used to provide consistency
+ *		in the face of updates to multiple buffers.  under this
+ *		approach, all required buffers are obtained before making
+ *		any updates and are held all are updates are complete.
+ *		
+ * PRE CONDITION: Already have buffer lock on iagp.  Already have AG lock on
+ *	this AG.  Must have read lock on imap inode.
+ *
+ * PARAMETERS:
+ *      imap  	- pointer to inode map control structure.
+ *      iagp  	- pointer to iag. 
+ *      ino   	- inode number to be allocated within the iag.
+ *
+ * RETURN VALUES:
+ *      0       - success.
+ *      ENOSPC 	- insufficient disk resources.
+ *      EIO  	- i/o error.
+ */
+static int diAllocBit(imap_t * imap, iag_t * iagp, int ino)
+{
+	int extno, bitno, agno, sword, rc;
+	metapage_t *amp, *bmp;
+	iag_t *aiagp = 0, *biagp = 0;
+	u32 mask;
+
+	/* check if this is the last free inode within the iag.
+	 * if so, it will have to be removed from the ag free
+	 * inode list, so get the iags preceeding and following
+	 * it on the list.
+	 */
+	if (iagp->nfreeinos == cpu_to_le32(1)) {
+		amp = bmp = NULL;
+
+		if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) {
+			if ((rc =
+			     diIAGRead(imap, le32_to_cpu(iagp->inofreefwd),
+				       &amp)))
+				return (rc);
+			aiagp = (iag_t *) amp->data;
+		}
+
+		if ((int) le32_to_cpu(iagp->inofreeback) >= 0) {
+			if ((rc =
+			     diIAGRead(imap,
+				       le32_to_cpu(iagp->inofreeback),
+				       &bmp))) {
+				if (amp)
+					release_metapage(amp);
+				return (rc);
+			}
+			biagp = (iag_t *) bmp->data;
+		}
+	}
+
+	/* get the ag number, extent number, inode number within
+	 * the extent.
+	 */
+	agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb));
+	extno = ino >> L2INOSPEREXT;
+	bitno = ino & (INOSPEREXT - 1);
+
+	/* compute the mask for setting the map.
+	 */
+	mask = HIGHORDER >> bitno;
+
+	/* the inode should be free and backed.
+	 */
+	assert((le32_to_cpu(iagp->pmap[extno]) & mask) == 0);
+	assert((le32_to_cpu(iagp->wmap[extno]) & mask) == 0);
+	assert(addressPXD(&iagp->inoext[extno]) != 0);
+
+	/* mark the inode as allocated in the working map.
+	 */
+	iagp->wmap[extno] |= cpu_to_le32(mask);
+
+	/* check if all inodes within the extent are now
+	 * allocated.  if so, update the free inode summary
+	 * map to reflect this.
+	 */
+	if (iagp->wmap[extno] == ONES) {
+		sword = extno >> L2EXTSPERSUM;
+		bitno = extno & (EXTSPERSUM - 1);
+		iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno);
+	}
+
+	/* if this was the last free inode in the iag, remove the
+	 * iag from the ag free inode list.
+	 */
+	if (iagp->nfreeinos == cpu_to_le32(1)) {
+		if (amp) {
+			aiagp->inofreeback = iagp->inofreeback;
+			write_metapage(amp);
+		}
+
+		if (bmp) {
+			biagp->inofreefwd = iagp->inofreefwd;
+			write_metapage(bmp);
+		} else {
+			imap->im_agctl[agno].inofree =
+			    le32_to_cpu(iagp->inofreefwd);
+		}
+		iagp->inofreefwd = iagp->inofreeback = -1;
+	}
+
+	/* update the free inode count at the iag, ag, inode
+	 * map levels.
+	 */
+	iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1);
+	imap->im_agctl[agno].numfree -= 1;
+	atomic_dec(&imap->im_numfree);
+
+	return (0);
+}
+
+
+/*
+ * NAME:        diNewExt(imap,iagp,extno)
+ *
+ * FUNCTION:    initialize a new extent of inodes for an iag, allocating
+ *	        the first inode of the extent for use for the current
+ *	        allocation request.
+ *
+ *		disk resources are allocated for the new extent of inodes
+ *		and the inodes themselves are initialized to reflect their
+ *		existence within the extent (i.e. their inode numbers and
+ *		inode extent addresses are set) and their initial state
+ *		(mode and link count are set to zero).
+ *
+ *		if the iag is new, it is not yet on an ag extent free list
+ *		but will now be placed on this list.
+ *
+ *		if the allocation of the new extent causes the iag to
+ *		have no free extent, the iag will be removed from the
+ *		ag extent free list.
+ *
+ *		if the iag has no free backed inodes, it will be placed
+ *		on the ag free inode list, since the addition of the new
+ *		extent will now cause it to have free inodes.
+ *
+ *		a careful update approach is used to provide consistency
+ *		(i.e. list consistency) in the face of updates to multiple
+ *		buffers.  under this approach, all required buffers are
+ *		obtained before making any updates and are held until all
+ *		updates are complete.
+ *		
+ * PRE CONDITION: Already have buffer lock on iagp.  Already have AG lock on
+ *	this AG.  Must have read lock on imap inode.
+ *
+ * PARAMETERS:
+ *      imap  	- pointer to inode map control structure.
+ *      iagp  	- pointer to iag. 
+ *      extno  	- extent number.
+ *
+ * RETURN VALUES:
+ *      0       - success.
+ *      ENOSPC 	- insufficient disk resources.
+ *      EIO  	- i/o error.
+ */
+static int diNewExt(imap_t * imap, iag_t * iagp, int extno)
+{
+	int agno, iagno, fwd, back, freei = 0, sword, rc;
+	iag_t *aiagp = 0, *biagp = 0, *ciagp = 0;
+	metapage_t *amp, *bmp, *cmp, *dmp;
+	struct inode *ipimap;
+	s64 blkno, hint;
+	int i, j;
+	u32 mask;
+	ino_t ino;
+	dinode_t *dp;
+	struct jfs_sb_info *sbi;
+
+	/* better have free extents.
+	 */
+	assert(iagp->nfreeexts);
+
+	/* get the inode map inode.
+	 */
+	ipimap = imap->im_ipimap;
+	sbi = JFS_SBI(ipimap->i_sb);
+
+	amp = bmp = cmp = NULL;
+
+	/* get the ag and iag numbers for this iag.
+	 */
+	agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
+	iagno = le32_to_cpu(iagp->iagnum);
+
+	/* check if this is the last free extent within the
+	 * iag.  if so, the iag must be removed from the ag
+	 * free extent list, so get the iags preceeding and
+	 * following the iag on this list.
+	 */
+	if (iagp->nfreeexts == cpu_to_le32(1)) {
+		if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
+			if ((rc = diIAGRead(imap, fwd, &amp)))
+				return (rc);
+			aiagp = (iag_t *) amp->data;
+		}
+
+		if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
+			if ((rc = diIAGRead(imap, back, &bmp)))
+				goto error_out;
+			biagp = (iag_t *) bmp->data;
+		}
+	} else {
+		/* the iag has free extents.  if all extents are free
+		 * (as is the case for a newly allocated iag), the iag
+		 * must be added to the ag free extent list, so get
+		 * the iag at the head of the list in preparation for
+		 * adding this iag to this list.
+		 */
+		fwd = back = -1;
+		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
+			if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
+				if ((rc = diIAGRead(imap, fwd, &amp)))
+					goto error_out;
+				aiagp = (iag_t *) amp->data;
+			}
+		}
+	}
+
+	/* check if the iag has no free inodes.  if so, the iag
+	 * will have to be added to the ag free inode list, so get
+	 * the iag at the head of the list in preparation for
+	 * adding this iag to this list.  in doing this, we must
+	 * check if we already have the iag at the head of
+	 * the list in hand.
+	 */
+	if (iagp->nfreeinos == 0) {
+		freei = imap->im_agctl[agno].inofree;
+
+		if (freei >= 0) {
+			if (freei == fwd) {
+				ciagp = aiagp;
+			} else if (freei == back) {
+				ciagp = biagp;
+			} else {
+				if ((rc = diIAGRead(imap, freei, &cmp)))
+					goto error_out;
+				ciagp = (iag_t *) cmp->data;
+			}
+			assert(ciagp != NULL);
+		}
+	}
+
+	/* allocate disk space for the inode extent.
+	 */
+	if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0))
+		hint = ((s64) agno << sbi->bmap->db_agl2size) - 1;
+	else
+		hint = addressPXD(&iagp->inoext[extno - 1]) +
+		    lengthPXD(&iagp->inoext[extno - 1]) - 1;
+
+	if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno)))
+		goto error_out;
+
+	/* compute the inode number of the first inode within the
+	 * extent.
+	 */
+	ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT);
+
+	/* initialize the inodes within the newly allocated extent a
+	 * page at a time.
+	 */
+	for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) {
+		/* get a buffer for this page of disk inodes.
+		 */
+		dmp = get_metapage(ipimap, blkno + i, PSIZE, 1);
+		if (dmp == NULL) {
+			rc = EIO;
+			goto error_out;
+		}
+		dp = (dinode_t *) dmp->data;
+
+		/* initialize the inode number, mode, link count and
+		 * inode extent address.
+		 */
+		for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) {
+			dp->di_inostamp = cpu_to_le32(sbi->inostamp);
+			dp->di_number = cpu_to_le32(ino);
+			dp->di_fileset = cpu_to_le32(FILESYSTEM_I);
+			dp->di_mode = 0;
+			dp->di_nlink = 0;
+			PXDaddress(&(dp->di_ixpxd), blkno);
+			PXDlength(&(dp->di_ixpxd), imap->im_nbperiext);
+		}
+		write_metapage(dmp);
+	}
+
+	/* if this is the last free extent within the iag, remove the
+	 * iag from the ag free extent list.
+	 */
+	if (iagp->nfreeexts == cpu_to_le32(1)) {
+		if (fwd >= 0)
+			aiagp->extfreeback = iagp->extfreeback;
+
+		if (back >= 0)
+			biagp->extfreefwd = iagp->extfreefwd;
+		else
+			imap->im_agctl[agno].extfree =
+			    le32_to_cpu(iagp->extfreefwd);
+
+		iagp->extfreefwd = iagp->extfreeback = -1;
+	} else {
+		/* if the iag has all free extents (newly allocated iag),
+		 * add the iag to the ag free extent list.
+		 */
+		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
+			if (fwd >= 0)
+				aiagp->extfreeback = cpu_to_le32(iagno);
+
+			iagp->extfreefwd = cpu_to_le32(fwd);
+			iagp->extfreeback = -1;
+			imap->im_agctl[agno].extfree = iagno;
+		}
+	}
+
+	/* if the iag has no free inodes, add the iag to the
+	 * ag free inode list.
+	 */
+	if (iagp->nfreeinos == 0) {
+		if (freei >= 0)
+			ciagp->inofreeback = cpu_to_le32(iagno);
+
+		iagp->inofreefwd =
+		    cpu_to_le32(imap->im_agctl[agno].inofree);
+		iagp->inofreeback = -1;
+		imap->im_agctl[agno].inofree = iagno;
+	}
+
+	/* initialize the extent descriptor of the extent. */
+	PXDlength(&iagp->inoext[extno], imap->im_nbperiext);
+	PXDaddress(&iagp->inoext[extno], blkno);
+
+	/* initialize the working and persistent map of the extent.
+	 * the working map will be initialized such that
+	 * it indicates the first inode of the extent is allocated.
+	 */
+	iagp->wmap[extno] = cpu_to_le32(HIGHORDER);
+	iagp->pmap[extno] = 0;
+
+	/* update the free inode and free extent summary maps
+	 * for the extent to indicate the extent has free inodes
+	 * and no longer represents a free extent.
+	 */
+	sword = extno >> L2EXTSPERSUM;
+	mask = HIGHORDER >> (extno & (EXTSPERSUM - 1));
+	iagp->extsmap[sword] |= cpu_to_le32(mask);
+	iagp->inosmap[sword] &= cpu_to_le32(~mask);
+
+	/* update the free inode and free extent counts for the
+	 * iag.
+	 */
+	iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) +
+				      (INOSPEREXT - 1));
+	iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) - 1);
+
+	/* update the free and backed inode counts for the ag.
+	 */
+	imap->im_agctl[agno].numfree += (INOSPEREXT - 1);
+	imap->im_agctl[agno].numinos += INOSPEREXT;
+
+	/* update the free and backed inode counts for the inode map.
+	 */
+	atomic_add(INOSPEREXT - 1, &imap->im_numfree);
+	atomic_add(INOSPEREXT, &imap->im_numinos);
+
+	/* write the iags.
+	 */
+	if (amp)
+		write_metapage(amp);
+	if (bmp)
+		write_metapage(bmp);
+	if (cmp)
+		write_metapage(cmp);
+
+	return (0);
+
+      error_out:
+
+	/* release the iags.
+	 */
+	if (amp)
+		release_metapage(amp);
+	if (bmp)
+		release_metapage(bmp);
+	if (cmp)
+		release_metapage(cmp);
+
+	return (rc);
+}
+
+
+/*
+ * NAME:        diNewIAG(imap,iagnop,agno)
+ *
+ * FUNCTION:   	allocate a new iag for an allocation group.
+ *		
+ *		first tries to allocate the iag from the inode map 
+ *		iagfree list:  
+ *		if the list has free iags, the head of the list is removed 
+ *		and returned to satisfy the request.
+ *		if the inode map's iag free list is empty, the inode map
+ *		is extended to hold a new iag. this new iag is initialized
+ *		and returned to satisfy the request.
+ *
+ * PARAMETERS:
+ *      imap  	- pointer to inode map control structure.
+ *      iagnop 	- pointer to an iag number set with the number of the
+ *		  newly allocated iag upon successful return.
+ *      agno  	- allocation group number.
+ *	bpp	- Buffer pointer to be filled in with new IAG's buffer
+ *
+ * RETURN VALUES:
+ *      0       - success.
+ *      ENOSPC 	- insufficient disk resources.
+ *      EIO  	- i/o error.
+ *
+ * serialization: 
+ *	AG lock held on entry/exit;
+ *	write lock on the map is held inside;
+ *	read lock on the map is held on successful completion;
+ *
+ * note: new iag transaction: 
+ * . synchronously write iag;
+ * . write log of xtree and inode  of imap;
+ * . commit;
+ * . synchronous write of xtree (right to left, bottom to top);
+ * . at start of logredo(): init in-memory imap with one additional iag page;
+ * . at end of logredo(): re-read imap inode to determine
+ *   new imap size;
+ */
+static int
+diNewIAG(imap_t * imap, int *iagnop, int agno, metapage_t ** mpp)
+{
+	int rc;
+	int iagno, i, xlen;
+	struct inode *ipimap;
+	struct super_block *sb;
+	struct jfs_sb_info *sbi;
+	metapage_t *mp;
+	iag_t *iagp;
+	s64 xaddr = 0;
+	s64 blkno;
+	tid_t tid;
+#ifdef _STILL_TO_PORT
+	xad_t xad;
+#endif				/*  _STILL_TO_PORT */
+	struct inode *iplist[1];
+
+	/* pick up pointers to the inode map and mount inodes */
+	ipimap = imap->im_ipimap;
+	sb = ipimap->i_sb;
+	sbi = JFS_SBI(sb);
+
+	/* acquire the free iag lock */
+	IAGFREE_LOCK(imap);
+
+	/* if there are any iags on the inode map free iag list, 
+	 * allocate the iag from the head of the list.
+	 */
+	if (imap->im_freeiag >= 0) {
+		/* pick up the iag number at the head of the list */
+		iagno = imap->im_freeiag;
+
+		/* determine the logical block number of the iag */
+		blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
+	} else {
+		/* no free iags. the inode map will have to be extented
+		 * to include a new iag.
+		 */
+
+		/* acquire inode map lock */
+		IWRITE_LOCK(ipimap);
+
+		assert(ipimap->i_size >> L2PSIZE == imap->im_nextiag + 1);
+
+		/* get the next avaliable iag number */
+		iagno = imap->im_nextiag;
+
+		/* make sure that we have not exceeded the maximum inode
+		 * number limit.
+		 */
+		if (iagno > (MAXIAGS - 1)) {
+			/* release the inode map lock */
+			IWRITE_UNLOCK(ipimap);
+
+			rc = ENOSPC;
+			goto out;
+		}
+
+		/*
+		 * synchronously append new iag page.
+		 */
+		/* determine the logical address of iag page to append */
+		blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
+
+		/* Allocate extent for new iag page */
+		xlen = sbi->nbperpage;
+		if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) {
+			/* release the inode map lock */
+			IWRITE_UNLOCK(ipimap);
+
+			goto out;
+		}
+
+		/* assign a buffer for the page */
+		mp = get_metapage(ipimap, xaddr, PSIZE, 1);
+		//bp = bmAssign(ipimap, blkno, xaddr, PSIZE, bmREAD_PAGE);
+		if (!mp) {
+			/* Free the blocks allocated for the iag since it was
+			 * not successfully added to the inode map
+			 */
+			dbFree(ipimap, xaddr, (s64) xlen);
+
+			/* release the inode map lock */
+			IWRITE_UNLOCK(ipimap);
+
+			rc = EIO;
+			goto out;
+		}
+		iagp = (iag_t *) mp->data;
+
+		/* init the iag */
+		memset(iagp, 0, sizeof(iag_t));
+		iagp->iagnum = cpu_to_le32(iagno);
+		iagp->inofreefwd = iagp->inofreeback = -1;
+		iagp->extfreefwd = iagp->extfreeback = -1;
+		iagp->iagfree = -1;
+		iagp->nfreeinos = 0;
+		iagp->nfreeexts = cpu_to_le32(EXTSPERIAG);
+
+		/* initialize the free inode summary map (free extent
+		 * summary map initialization handled by bzero).
+		 */
+		for (i = 0; i < SMAPSZ; i++)
+			iagp->inosmap[i] = ONES;
+
+		flush_metapage(mp);
+#ifdef _STILL_TO_PORT
+		/* synchronously write the iag page */
+		if (bmWrite(bp)) {
+			/* Free the blocks allocated for the iag since it was
+			 * not successfully added to the inode map
+			 */
+			dbFree(ipimap, xaddr, (s64) xlen);
+
+			/* release the inode map lock */
+			IWRITE_UNLOCK(ipimap);
+
+			rc = EIO;
+			goto out;
+		}
+
+		/* Now the iag is on disk */
+
+		/*
+		 * start tyransaction of update of the inode map
+		 * addressing structure pointing to the new iag page;
+		 */
+#endif				/*  _STILL_TO_PORT */
+		tid = txBegin(sb, COMMIT_FORCE);
+
+		/* update the inode map addressing structure to point to it */
+		if ((rc =
+		     xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
+			/* Free the blocks allocated for the iag since it was
+			 * not successfully added to the inode map
+			 */
+			dbFree(ipimap, xaddr, (s64) xlen);
+
+			/* release the inode map lock */
+			IWRITE_UNLOCK(ipimap);
+
+			goto out;
+		}
+
+		/* update the inode map's inode to reflect the extension */
+		ipimap->i_size += PSIZE;
+		ipimap->i_blocks += LBLK2PBLK(sb, xlen);
+
+		/*
+		 * txCommit(COMMIT_FORCE) will synchronously write address 
+		 * index pages and inode after commit in careful update order 
+		 * of address index pages (right to left, bottom up);
+		 */
+		iplist[0] = ipimap;
+		rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
+
+		txEnd(tid);
+
+		duplicateIXtree(sb, blkno, xlen, &xaddr);
+
+		/* update the next avaliable iag number */
+		imap->im_nextiag += 1;
+
+		/* Add the iag to the iag free list so we don't lose the iag
+		 * if a failure happens now.
+		 */
+		imap->im_freeiag = iagno;
+
+		/* Until we have logredo working, we want the imap inode &
+		 * control page to be up to date.
+		 */
+		diSync(ipimap);
+
+		/* release the inode map lock */
+		IWRITE_UNLOCK(ipimap);
+	}
+
+	/* obtain read lock on map */
+	IREAD_LOCK(ipimap);
+
+	/* read the iag */
+	if ((rc = diIAGRead(imap, iagno, &mp))) {
+		IREAD_UNLOCK(ipimap);
+		rc = EIO;
+		goto out;
+	}
+	iagp = (iag_t *) mp->data;
+
+	/* remove the iag from the iag free list */
+	imap->im_freeiag = le32_to_cpu(iagp->iagfree);
+	iagp->iagfree = -1;
+
+	/* set the return iag number and buffer pointer */
+	*iagnop = iagno;
+	*mpp = mp;
+
+      out:
+	/* release the iag free lock */
+	IAGFREE_UNLOCK(imap);
+
+	return (rc);
+}
+
+/*
+ * NAME:        diIAGRead()
+ *
+ * FUNCTION:    get the buffer for the specified iag within a fileset
+ *		or aggregate inode map.
+ *		
+ * PARAMETERS:
+ *      imap  	- pointer to inode map control structure.
+ *      iagno  	- iag number.
+ *      bpp  	- point to buffer pointer to be filled in on successful
+ *		  exit.
+ *
+ * SERIALIZATION:
+ *	must have read lock on imap inode
+ *	(When called by diExtendFS, the filesystem is quiesced, therefore
+ *	 the read lock is unnecessary.)
+ *
+ * RETURN VALUES:
+ *      0       - success.
+ *      EIO  	- i/o error.
+ */
+static int diIAGRead(imap_t * imap, int iagno, metapage_t ** mpp)
+{
+	struct inode *ipimap = imap->im_ipimap;
+	s64 blkno;
+
+	/* compute the logical block number of the iag. */
+	blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage);
+
+	/* read the iag. */
+	*mpp = read_metapage(ipimap, blkno, PSIZE, 0);
+	if (*mpp == NULL) {
+		return (EIO);
+	}
+
+	return (0);
+}
+
+/*
+ * NAME:        diFindFree()
+ *
+ * FUNCTION:    find the first free bit in a word starting at
+ *		the specified bit position.
+ *
+ * PARAMETERS:
+ *      word  	- word to be examined.
+ *      start  	- starting bit position.
+ *
+ * RETURN VALUES:
+ *      bit position of first free bit in the word or 32 if
+ *	no free bits were found.
+ */
+static int diFindFree(u32 word, int start)
+{
+	int bitno;
+	assert(start < 32);
+	/* scan the word for the first free bit. */
+	for (word <<= start, bitno = start; bitno < 32;
+	     bitno++, word <<= 1) {
+		if ((word & HIGHORDER) == 0)
+			break;
+	}
+	return (bitno);
+}
+
+/*
+ * NAME:	diUpdatePMap()
+ *                                                                    
+ * FUNCTION: Update the persistent map in an IAG for the allocation or 
+ *	freeing of the specified inode.
+ *                                                                    
+ * PRE CONDITIONS: Working map has already been updated for allocate.
+ *
+ * PARAMETERS:
+ *	ipimap	- Incore inode map inode
+ *	inum	- Number of inode to mark in permanent map
+ *	is_free	- If TRUE indicates inode should be marked freed, otherwise
+ *		  indicates inode should be marked allocated.
+ *
+ * RETURNS: 0 for success
+ */
+int
+diUpdatePMap(struct inode *ipimap,
+	     unsigned long inum, boolean_t is_free, tblock_t * tblk)
+{
+	int rc;
+	iag_t *iagp;
+	metapage_t *mp;
+	int iagno, ino, extno, bitno;
+	imap_t *imap;
+	u32 mask;
+	log_t *log;
+	int lsn, difft, diffp;
+
+	imap = JFS_IP(ipimap)->i_imap;
+	/* get the iag number containing the inode */
+	iagno = INOTOIAG(inum);
+	/* make sure that the iag is contained within the map */
+	assert(iagno < imap->im_nextiag);
+	/* read the iag */
+	IREAD_LOCK(ipimap);
+	rc = diIAGRead(imap, iagno, &mp);
+	IREAD_UNLOCK(ipimap);
+	if (rc)
+		return (rc);
+	iagp = (iag_t *) mp->data;
+	/* get the inode number and extent number of the inode within
+	 * the iag and the inode number within the extent.
+	 */
+	ino = inum & (INOSPERIAG - 1);
+	extno = ino >> L2INOSPEREXT;
+	bitno = ino & (INOSPEREXT - 1);
+	mask = HIGHORDER >> bitno;
+	/* 
+	 * mark the inode free in persistent map:
+	 */
+	if (is_free == TRUE) {
+		/* The inode should have been allocated both in working
+		 * map and in persistent map;
+		 * the inode will be freed from working map at the release
+		 * of last reference release;
+		 */
+//              assert(le32_to_cpu(iagp->wmap[extno]) & mask);
+		if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
+			jERROR(1,
+			       ("diUpdatePMap: inode %ld not marked as allocated in wmap!\n",
+				inum));
+			updateSuper(ipimap->i_sb, FM_DIRTY);
+		}
+//              assert(le32_to_cpu(iagp->pmap[extno]) & mask);
+		if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) {
+			jERROR(1,
+			       ("diUpdatePMap: inode %ld not marked as allocated in pmap!\n",
+				inum));
+			updateSuper(ipimap->i_sb, FM_DIRTY);
+		}
+		/* update the bitmap for the extent of the freed inode */
+		iagp->pmap[extno] &= cpu_to_le32(~mask);
+	}
+	/*
+	 * mark the inode allocated in persistent map:
+	 */
+	else {
+		/* The inode should be already allocated in the working map
+		 * and should be free in persistent map;
+		 */
+		assert(le32_to_cpu(iagp->wmap[extno]) & mask);
+		assert((le32_to_cpu(iagp->pmap[extno]) & mask) == 0);
+		/* update the bitmap for the extent of the allocated inode */
+		iagp->pmap[extno] |= cpu_to_le32(mask);
+	}
+	/*
+	 * update iag lsn
+	 */
+	lsn = tblk->lsn;
+	log = JFS_SBI(tblk->sb)->log;
+	if (mp->lsn != 0) {
+		/* inherit older/smaller lsn */
+		logdiff(difft, lsn, log);
+		logdiff(diffp, mp->lsn, log);
+		if (difft < diffp) {
+			mp->lsn = lsn;
+			/* move mp after tblock in logsync list */
+			LOGSYNC_LOCK(log);
+			list_del(&mp->synclist);
+			list_add(&mp->synclist, &tblk->synclist);
+			LOGSYNC_UNLOCK(log);
+		}
+		/* inherit younger/larger clsn */
+		LOGSYNC_LOCK(log);
+		assert(mp->clsn);
+		logdiff(difft, tblk->clsn, log);
+		logdiff(diffp, mp->clsn, log);
+		if (difft > diffp)
+			mp->clsn = tblk->clsn;
+		LOGSYNC_UNLOCK(log);
+	} else {
+		mp->log = log;
+		mp->lsn = lsn;
+		/* insert mp after tblock in logsync list */
+		LOGSYNC_LOCK(log);
+		log->count++;
+		list_add(&mp->synclist, &tblk->synclist);
+		mp->clsn = tblk->clsn;
+		LOGSYNC_UNLOCK(log);
+	}
+//      bmLazyWrite(mp, log->flag & JFS_COMMIT);
+	write_metapage(mp);
+	return (0);
+}
+
+/*
+ *	diExtendFS()
+ *
+ * function: update imap for extendfs();
+ * 
+ * note: AG size has been increased s.t. each k old contiguous AGs are 
+ * coalesced into a new AG;
+ */
+int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
+{
+	int rc, rcx = 0;
+	imap_t *imap = JFS_IP(ipimap)->i_imap;
+	iag_t *iagp = 0, *hiagp = 0;
+	bmap_t *mp = JFS_SBI(ipbmap->i_sb)->bmap;
+	metapage_t *bp, *hbp;
+	int i, n, head;
+	int numinos, xnuminos = 0, xnumfree = 0;
+	s64 agstart;
+
+	jEVENT(0, ("diExtendFS: nextiag:%d numinos:%d numfree:%d\n",
+		   imap->im_nextiag, atomic_read(&imap->im_numinos),
+		   atomic_read(&imap->im_numfree)));
+
+	/*
+	 *      reconstruct imap 
+	 *
+	 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
+	 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
+	 * note: new AG size = old AG size * (2**x).
+	 */
+
+	/* init per AG control information im_agctl[] */
+	for (i = 0; i < MAXAG; i++) {
+		imap->im_agctl[i].inofree = -1;	/* free inode list */
+		imap->im_agctl[i].extfree = -1;	/* free extent list */
+		imap->im_agctl[i].numinos = 0;	/* number of backed inodes */
+		imap->im_agctl[i].numfree = 0;	/* number of free backed inodes */
+	}
+
+	/*
+	 *      process each iag_t page of the map.
+	 *
+	 * rebuild AG Free Inode List, AG Free Inode Extent List;
+	 */
+	for (i = 0; i < imap->im_nextiag; i++) {
+		if ((rc = diIAGRead(imap, i, &bp))) {
+			rcx = rc;
+			continue;
+		}
+		iagp = (iag_t *) bp->data;
+		assert(le32_to_cpu(iagp->iagnum) == i);
+
+		/* leave free iag in the free iag list */
+		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {  
+		        release_metapage(bp);
+			continue;
+		}
+
+		/* agstart that computes to the same ag is treated as same; */
+		agstart = le64_to_cpu(iagp->agstart);
+		/* iagp->agstart = agstart & ~(mp->db_agsize - 1); */
+		n = agstart >> mp->db_agl2size;
+/*
+printf("diExtendFS: iag:%d agstart:%Ld agno:%d\n", i, agstart, n);
+*/
+
+		/* compute backed inodes */
+		numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts))
+		    << L2INOSPEREXT;
+		if (numinos > 0) {
+			/* merge AG backed inodes */
+			imap->im_agctl[n].numinos += numinos;
+			xnuminos += numinos;
+		}
+
+		/* if any backed free inodes, insert at AG free inode list */
+		if ((int) le32_to_cpu(iagp->nfreeinos) > 0) {
+			if ((head = imap->im_agctl[n].inofree) == -1)
+				iagp->inofreefwd = iagp->inofreeback = -1;
+			else {
+				if ((rc = diIAGRead(imap, head, &hbp))) {
+					rcx = rc;
+					goto nextiag;
+				}
+				hiagp = (iag_t *) hbp->data;
+				hiagp->inofreeback =
+				    le32_to_cpu(iagp->iagnum);
+				iagp->inofreefwd = cpu_to_le32(head);
+				iagp->inofreeback = -1;
+				write_metapage(hbp);
+			}
+
+			imap->im_agctl[n].inofree =
+			    le32_to_cpu(iagp->iagnum);
+
+			/* merge AG backed free inodes */
+			imap->im_agctl[n].numfree +=
+			    le32_to_cpu(iagp->nfreeinos);
+			xnumfree += le32_to_cpu(iagp->nfreeinos);
+		}
+
+		/* if any free extents, insert at AG free extent list */
+		if (le32_to_cpu(iagp->nfreeexts) > 0) {
+			if ((head = imap->im_agctl[n].extfree) == -1)
+				iagp->extfreefwd = iagp->extfreeback = -1;
+			else {
+				if ((rc = diIAGRead(imap, head, &hbp))) {
+					rcx = rc;
+					goto nextiag;
+				}
+				hiagp = (iag_t *) hbp->data;
+				hiagp->extfreeback = iagp->iagnum;
+				iagp->extfreefwd = cpu_to_le32(head);
+				iagp->extfreeback = -1;
+				write_metapage(hbp);
+			}
+
+			imap->im_agctl[n].extfree =
+			    le32_to_cpu(iagp->iagnum);
+		}
+
+	      nextiag:
+		write_metapage(bp);
+	}
+
+	ASSERT(xnuminos == atomic_read(&imap->im_numinos) &&
+	       xnumfree == atomic_read(&imap->im_numfree));
+
+	return rcx;
+}
+
+
+/*
+ *	duplicateIXtree()
+ *
+ * serialization: IWRITE_LOCK held on entry/exit
+ *
+ * note: shadow page with regular inode (rel.2);
+ */
+static void
+duplicateIXtree(struct super_block *sb, s64 blkno, int xlen, s64 * xaddr)
+{
+	int rc;
+	tid_t tid;
+	struct inode *ip;
+	metapage_t *mpsuper;
+	struct jfs_superblock *j_sb;
+
+	/* if AIT2 ipmap2 is bad, do not try to update it */
+	if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT)	/* s_flag */
+		return;
+	ip = diReadSpecial(sb, FILESYSTEM_I + INOSPEREXT);
+	if (ip == 0) {
+		JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
+		if ((rc = readSuper(sb, &mpsuper)))
+			return;
+		j_sb = (struct jfs_superblock *) (mpsuper->data);
+		j_sb->s_flag |= JFS_BAD_SAIT;
+		write_metapage(mpsuper);
+		return;
+	}
+
+	/* start transaction */
+	tid = txBegin(sb, COMMIT_FORCE);
+	/* update the inode map addressing structure to point to it */
+	if ((rc = xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0))) {
+		JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
+		txAbort(tid, 1);
+		goto cleanup;
+
+	}
+	/* update the inode map's inode to reflect the extension */
+	ip->i_size += PSIZE;
+	ip->i_blocks += LBLK2PBLK(sb, xlen);
+	rc = txCommit(tid, 1, &ip, COMMIT_FORCE);
+      cleanup:
+	txEnd(tid);
+	diFreeSpecial(ip);
+}
+
+/*
+ * NAME:        copy_from_dinode()
+ *
+ * FUNCTION:    Copies inode info from disk inode to in-memory inode
+ *
+ * RETURN VALUES:
+ *      0       - success
+ *      ENOMEM	- insufficient memory
+ */
+static int copy_from_dinode(dinode_t * dip, struct inode *ip)
+{
+	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+
+	jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
+	jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
+
+	ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
+	ip->i_nlink = le32_to_cpu(dip->di_nlink);
+	ip->i_uid = le32_to_cpu(dip->di_uid);
+	ip->i_gid = le32_to_cpu(dip->di_gid);
+	ip->i_size = le64_to_cpu(dip->di_size);
+	ip->i_atime = le32_to_cpu(dip->di_atime.tv_sec);
+	ip->i_mtime = le32_to_cpu(dip->di_mtime.tv_sec);
+	ip->i_ctime = le32_to_cpu(dip->di_ctime.tv_sec);
+	ip->i_blksize = ip->i_sb->s_blocksize;
+	ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
+	ip->i_version = ++event;
+	ip->i_generation = le32_to_cpu(dip->di_gen);
+
+	jfs_ip->ixpxd = dip->di_ixpxd;	/* in-memory pxd's are little-endian */
+	jfs_ip->acl = dip->di_acl;	/* as are dxd's */
+	jfs_ip->ea = dip->di_ea;
+	jfs_ip->next_index = le32_to_cpu(dip->di_next_index);
+	jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec);
+	jfs_ip->acltype = le32_to_cpu(dip->di_acltype);
+	/*
+	 * We may only need to do this for "special" inodes (dmap, imap)
+	 */
+	if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode))
+		ip->i_rdev = to_kdev_t(le32_to_cpu(dip->di_rdev));
+	else if (S_ISDIR(ip->i_mode)) {
+		memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384);
+	} else if (!S_ISFIFO(ip->i_mode)) {
+		memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288);
+	}
+	/* Zero the in-memory-only stuff */
+	jfs_ip->cflag = 0;
+	jfs_ip->btindex = 0;
+	jfs_ip->btorder = 0;
+	jfs_ip->bxflag = 0;
+	jfs_ip->blid = 0;
+	jfs_ip->atlhead = 0;
+	jfs_ip->atltail = 0;
+	jfs_ip->xtlid = 0;
+	return (0);
+}
+
+/*
+ * NAME:        copy_to_dinode()
+ *
+ * FUNCTION:    Copies inode info from in-memory inode to disk inode
+ */
+static void copy_to_dinode(dinode_t * dip, struct inode *ip)
+{
+	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+
+	dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
+	dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp);
+	dip->di_number = cpu_to_le32(ip->i_ino);
+	dip->di_gen = cpu_to_le32(ip->i_generation);
+	dip->di_size = cpu_to_le64(ip->i_size);
+	dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
+	dip->di_nlink = cpu_to_le32(ip->i_nlink);
+	dip->di_uid = cpu_to_le32(ip->i_uid);
+	dip->di_gid = cpu_to_le32(ip->i_gid);
+	/*
+	 * mode2 is only needed for storing the higher order bits.
+	 * Trust i_mode for the lower order ones
+	 */
+	dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | ip->i_mode);
+	dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime);
+	dip->di_atime.tv_nsec = 0;
+	dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime);
+	dip->di_ctime.tv_nsec = 0;
+	dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime);
+	dip->di_mtime.tv_nsec = 0;
+	dip->di_ixpxd = jfs_ip->ixpxd;	/* in-memory pxd's are little-endian */
+	dip->di_acl = jfs_ip->acl;	/* as are dxd's */
+	dip->di_ea = jfs_ip->ea;
+	dip->di_next_index = cpu_to_le32(jfs_ip->next_index);
+	dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime);
+	dip->di_otime.tv_nsec = 0;
+	dip->di_acltype = cpu_to_le32(jfs_ip->acltype);
+
+	if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode))
+		dip->di_rdev = cpu_to_le32(kdev_t_to_nr(ip->i_rdev));
+}
+
+void diClearExtension(struct inode *ip)
+{
+	jFYI(1, ("diClearExtension called ip = 0x%p\n", ip));
+
+	assert(list_empty(&JFS_IP(ip)->mp_list));
+	assert(list_empty(&JFS_IP(ip)->anon_inode_list));
+
+	if (JFS_IP(ip)->atlhead) {
+		jERROR(1,
+		       ("diClearExtension: inode 0x%p has anonymous tlocks\n",
+			ip));
+	}
+
+	free_jfs_inode(ip);
+	ip->u.generic_ip = 0;
+}
+
+#ifdef	_JFS_DEBUG_IMAP
+/*
+ *	DBGdiInit()
+ */
+static void *DBGdiInit(imap_t * imap)
+{
+	u32 *dimap;
+	int size;
+	size = 64 * 1024;
+	if ((dimap = (u32 *) xmalloc(size, L2PSIZE, kernel_heap)) == NULL)
+		assert(0);
+	bzero((void *) dimap, size);
+	imap->im_DBGdimap = dimap;
+}
+
+/*
+ *	DBGdiAlloc()
+ */
+static void DBGdiAlloc(imap_t * imap, ino_t ino)
+{
+	u32 *dimap = imap->im_DBGdimap;
+	int w, b;
+	u32 m;
+	w = ino >> 5;
+	b = ino & 31;
+	m = 0x80000000 >> b;
+	assert(w < 64 * 256);
+	if (dimap[w] & m) {
+		printk("DEBUG diAlloc: duplicate alloc ino:0x%x\n", ino);
+	}
+	dimap[w] |= m;
+}
+
+/*
+ *	DBGdiFree()
+ */
+static void DBGdiFree(imap_t * imap, ino_t ino)
+{
+	u32 *dimap = imap->im_DBGdimap;
+	int w, b;
+	u32 m;
+	w = ino >> 5;
+	b = ino & 31;
+	m = 0x80000000 >> b;
+	assert(w < 64 * 256);
+	if ((dimap[w] & m) == 0) {
+		printk("DEBUG diFree: duplicate free ino:0x%x\n", ino);
+	}
+	dimap[w] &= ~m;
+}
+
+static void dump_cp(imap_t * ipimap, char *function, int line)
+{
+	printk("\n* ********* *\nControl Page %s %d\n", function, line);
+	printk("FreeIAG %d\tNextIAG %d\n", ipimap->im_freeiag,
+	       ipimap->im_nextiag);
+	printk("NumInos %d\tNumFree %d\n",
+	       atomic_read(&ipimap->im_numinos),
+	       atomic_read(&ipimap->im_numfree));
+	printk("AG InoFree %d\tAG ExtFree %d\n",
+	       ipimap->im_agctl[0].inofree, ipimap->im_agctl[0].extfree);
+	printk("AG NumInos %d\tAG NumFree %d\n",
+	       ipimap->im_agctl[0].numinos, ipimap->im_agctl[0].numfree);
+}
+
+static void dump_iag(iag_t * iag, char *function, int line)
+{
+	printk("\n* ********* *\nIAG %s %d\n", function, line);
+	printk("IagNum %d\tIAG Free %d\n", le32_to_cpu(iag->iagnum),
+	       le32_to_cpu(iag->iagfree));
+	printk("InoFreeFwd %d\tInoFreeBack %d\n",
+	       le32_to_cpu(iag->inofreefwd),
+	       le32_to_cpu(iag->inofreeback));
+	printk("ExtFreeFwd %d\tExtFreeBack %d\n",
+	       le32_to_cpu(iag->extfreefwd),
+	       le32_to_cpu(iag->extfreeback));
+	printk("NFreeInos %d\tNFreeExts %d\n", le32_to_cpu(iag->nfreeinos),
+	       le32_to_cpu(iag->nfreeexts));
+}
+#endif				/* _JFS_DEBUG_IMAP */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_imap.h linuxppc64_2_4/fs/jfs/jfs_imap.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_imap.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_imap.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,161 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef	_H_JFS_IMAP
+#define _H_JFS_IMAP
+
+#include "jfs_txnmgr.h"
+
+/*
+ *	jfs_imap.h: disk inode manager
+ */
+
+#define	EXTSPERIAG	128	/* number of disk inode extent per iag  */
+#define IMAPBLKNO	0	/* lblkno of dinomap within inode map   */
+#define SMAPSZ		4	/* number of words per summary map      */
+#define	EXTSPERSUM	32	/* number of extents per summary map entry */
+#define	L2EXTSPERSUM	5	/* l2 number of extents per summary map */
+#define	PGSPERIEXT	4	/* number of 4K pages per dinode extent */
+#define	MAXIAGS		((1<<20)-1)	/* maximum number of iags       */
+#define	MAXAG		128	/* maximum number of allocation groups  */
+
+#define AMAPSIZE      512	/* bytes in the IAG allocation maps */
+#define SMAPSIZE      16	/* bytes in the IAG summary maps */
+
+/* convert inode number to iag number */
+#define	INOTOIAG(ino)	((ino) >> L2INOSPERIAG)
+
+/* convert iag number to logical block number of the iag page */
+#define IAGTOLBLK(iagno,l2nbperpg)	(((iagno) + 1) << (l2nbperpg))
+
+/* get the starting block number of the 4K page of an inode extent
+ * that contains ino.
+ */
+#define INOPBLK(pxd,ino,l2nbperpg)    	(addressPXD((pxd)) +		\
+	((((ino) & (INOSPEREXT-1)) >> L2INOSPERPAGE) << (l2nbperpg)))
+
+/*
+ *	inode allocation map:
+ * 
+ * inode allocation map consists of 
+ * . the inode map control page and
+ * . inode allocation group pages (per 4096 inodes)
+ * which are addressed by standard JFS xtree.
+ */
+/*
+ *	inode allocation group page (per 4096 inodes of an AG)
+ */
+typedef struct {
+	s64 agstart;		/* 8: starting block of ag              */
+	s32 iagnum;		/* 4: inode allocation group number     */
+	s32 inofreefwd;		/* 4: ag inode free list forward        */
+	s32 inofreeback;	/* 4: ag inode free list back           */
+	s32 extfreefwd;		/* 4: ag inode extent free list forward */
+	s32 extfreeback;	/* 4: ag inode extent free list back    */
+	s32 iagfree;		/* 4: iag free list                     */
+
+	/* summary map: 1 bit per inode extent */
+	s32 inosmap[SMAPSZ];	/* 16: sum map of mapwords w/ free inodes;
+				 *      note: this indicates free and backed
+				 *      inodes, if the extent is not backed the
+				 *      value will be 1.  if the extent is
+				 *      backed but all inodes are being used the
+				 *      value will be 1.  if the extent is
+				 *      backed but at least one of the inodes is
+				 *      free the value will be 0.
+				 */
+	s32 extsmap[SMAPSZ];	/* 16: sum map of mapwords w/ free extents */
+	s32 nfreeinos;		/* 4: number of free inodes             */
+	s32 nfreeexts;		/* 4: number of free extents            */
+	/* (72) */
+	u8 pad[1976];		/* 1976: pad to 2048 bytes */
+	/* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */
+	u32 wmap[EXTSPERIAG];	/* 512: working allocation map  */
+	u32 pmap[EXTSPERIAG];	/* 512: persistent allocation map */
+	pxd_t inoext[EXTSPERIAG];	/* 1024: inode extent addresses */
+} iag_t;			/* (4096) */
+
+/*
+ *	per AG control information (in inode map control page)
+ */
+typedef struct {
+	s32 inofree;		/* 4: free inode list anchor            */
+	s32 extfree;		/* 4: free extent list anchor           */
+	s32 numinos;		/* 4: number of backed inodes           */
+	s32 numfree;		/* 4: number of free inodes             */
+} iagctl_t;			/* (16) */
+
+/*
+ *	per fileset/aggregate inode map control page
+ */
+typedef struct {
+	s32 in_freeiag;		/* 4: free iag list anchor     */
+	s32 in_nextiag;		/* 4: next free iag number     */
+	s32 in_numinos;		/* 4: num of backed inodes */
+	s32 in_numfree;		/* 4: num of free backed inodes */
+	s32 in_nbperiext;	/* 4: num of blocks per inode extent */
+	s32 in_l2nbperiext;	/* 4: l2 of in_nbperiext */
+	s32 in_diskblock;	/* 4: for standalone test driver  */
+	s32 in_maxag;		/* 4: for standalone test driver  */
+	u8 pad[2016];		/* 2016: pad to 2048 */
+	iagctl_t in_agctl[MAXAG];	/* 2048: AG control information */
+} dinomap_t;			/* (4096) */
+
+
+/*
+ *	In-core inode map control page
+ */
+typedef struct inomap {
+	dinomap_t im_imap;	/* 4096: inode allocation control */
+	struct inode *im_ipimap;	/* 4: ptr to inode for imap   */
+	struct semaphore im_freelock;	/* 4: iag free list lock      */
+	struct semaphore im_aglock[MAXAG];	/* 512: per AG locks          */
+	u32 *im_DBGdimap;
+	atomic_t im_numinos;	/* num of backed inodes */
+	atomic_t im_numfree;	/* num of free backed inodes */
+} imap_t;
+
+#define	im_freeiag	im_imap.in_freeiag
+#define	im_nextiag	im_imap.in_nextiag
+#define	im_agctl	im_imap.in_agctl
+#define	im_nbperiext	im_imap.in_nbperiext
+#define	im_l2nbperiext	im_imap.in_l2nbperiext
+
+/* for standalone testdriver
+ */
+#define	im_diskblock	im_imap.in_diskblock
+#define	im_maxag	im_imap.in_maxag
+
+extern int diFree(struct inode *);
+extern int diAlloc(struct inode *, boolean_t, struct inode *);
+extern int diSync(struct inode *);
+/* external references */
+extern int diUpdatePMap(struct inode *ipimap, unsigned long inum,
+			boolean_t is_free, tblock_t * tblk);
+#ifdef _STILL_TO_PORT
+extern int diExtendFS(inode_t * ipimap, inode_t * ipbmap);
+#endif				/* _STILL_TO_PORT */
+
+extern int diMount(struct inode *);
+extern int diUnmount(struct inode *, int);
+extern int diRead(struct inode *);
+extern void diClearExtension(struct inode *);
+extern struct inode *diReadSpecial(struct super_block *, ino_t);
+extern void diWriteSpecial(struct inode *);
+extern void diFreeSpecial(struct inode *);
+extern int diWrite(tid_t tid, struct inode *);
+#endif				/* _H_JFS_IMAP */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_incore.h linuxppc64_2_4/fs/jfs/jfs_incore.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_incore.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_incore.h	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,155 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+*/ 
+#ifndef _H_JFS_INCORE
+#define _H_JFS_INCORE
+
+#include <asm/bitops.h>
+#include <linux/slab.h>
+#include "jfs_types.h"
+#include "jfs_xtree.h"
+#include "jfs_dtree.h"
+
+/*
+ * JFS magic number
+ */
+#define JFS_SUPER_MAGIC 0x3153464a /* "JFS1" */
+
+/*
+ * Due to header ordering problems this can't be in jfs_lock.h
+ */
+typedef struct	jfs_rwlock {
+	struct rw_semaphore rw_sem;
+	atomic_t in_use;	/* for hacked implementation of trylock */
+} jfs_rwlock_t;
+
+/*
+ * JFS-private inode information
+ */
+struct jfs_inode_info {
+	struct inode *inode;	/* pointer back to fs-independent inode */
+	int	fileset;	/* fileset number (always 16)*/
+	uint	mode2;		/* jfs-specific mode		*/
+	pxd_t   ixpxd;		/* inode extent descriptor	*/
+	dxd_t	acl;		/* dxd describing acl	*/
+	dxd_t	ea;		/* dxd describing ea	*/
+	time_t	otime;		/* time created	*/
+	uint	next_index;	/* next available directory entry index */
+	int	acltype;	/* Type of ACL	*/
+	short	btorder;	/* access order	*/
+	short	btindex;	/* btpage entry index*/
+	struct inode *ipimap;	/* inode map			*/
+	long	cflag;		/* commit flags		*/
+	u16	bxflag;		/* xflag of pseudo buffer?	*/
+	unchar	agno;		/* ag number			*/
+	unchar	pad;		/* pad			*/
+	lid_t	blid;		/* lid of pseudo buffer?	*/
+	lid_t	atlhead;	/* anonymous tlock list head	*/
+	lid_t	atltail;	/* anonymous tlock list tail	*/
+	struct list_head anon_inode_list; /* inodes having anonymous txns */
+	struct list_head mp_list; /* metapages in inode's address space */
+	jfs_rwlock_t rdwrlock;	/* read/write lock	*/
+	lid_t	xtlid;		/* lid of xtree lock on directory */
+	union {
+		struct {
+			xtpage_t _xtroot;	/* 288: xtree root */
+			struct inomap *_imap;	/* 4: inode map header	*/
+		} file;
+		struct {
+			dir_table_slot_t _table[12]; /* 96: directory index */
+			dtroot_t _dtroot;	/* 288: dtree root */
+		} dir;
+		struct {
+			unchar _unused[16];	/* 16: */
+			dxd_t _dxd;		/* 16: */
+			unchar _inline[128];	/* 128: inline symlink */
+		} link;
+	} u;
+};
+#define i_xtroot u.file._xtroot
+#define i_imap u.file._imap
+#define i_dirtable u.dir._table
+#define i_dtroot u.dir._dtroot
+#define i_inline u.link._inline
+
+/*
+ * cflag
+ */
+enum cflags {
+	COMMIT_New,		/* never committed inode   */
+	COMMIT_Nolink,		/* inode committed with zero link count */
+	COMMIT_Inlineea,	/* commit inode inline EA */
+	COMMIT_Freewmap,	/* free WMAP at iClose() */
+	COMMIT_Dirty,		/* Inode is really dirty */
+	COMMIT_Holdlock,	/* Hold the IWRITE_LOCK until commit is done */
+	COMMIT_Dirtable,	/* commit changes to di_dirtable */
+	COMMIT_Stale,		/* data extent is no longer valid */
+	COMMIT_Synclist,	/* metadata pages on group commit synclist */
+};
+
+#define set_cflag(flag, ip)	set_bit(flag, &(JFS_IP(ip)->cflag))
+#define clear_cflag(flag, ip)	clear_bit(flag, &(JFS_IP(ip)->cflag))
+#define test_cflag(flag, ip)	test_bit(flag, &(JFS_IP(ip)->cflag))
+#define test_and_clear_cflag(flag, ip) \
+	test_and_clear_bit(flag, &(JFS_IP(ip)->cflag))
+/*
+ * JFS-private superblock information.
+ */
+struct jfs_sb_info {
+	unsigned long	mntflag;	/* 4: aggregate attributes	*/
+	struct inode	*ipbmap;	/* 4: block map inode		*/
+	struct inode	*ipaimap;	/* 4: aggregate inode map inode	*/
+	struct inode	*ipaimap2;	/* 4: secondary aimap inode	*/
+	struct inode	*ipimap;	/* 4: aggregate inode map inode	*/
+	struct jfs_log	*log;		/* 4: log			*/
+	short		bsize;		/* 2: logical block size	*/
+	short		l2bsize;	/* 2: log2 logical block size	*/
+	short		nbperpage;	/* 2: blocks per page		*/
+	short		l2nbperpage;	/* 2: log2 blocks per page	*/
+	short		l2niperblk;	/* 2: log2 inodes per page	*/
+	kdev_t		logdev;		/* 2: external log device	*/
+	pxd_t		logpxd;		/* 8: pxd describing log	*/
+	pxd_t		ait2;		/* 8: pxd describing AIT copy	*/
+	/* Formerly in ipimap */
+	uint		gengen;		/* 4: inode generation generator*/
+	uint		inostamp;	/* 4: shows inode belongs to fileset*/
+
+        /* Formerly in ipbmap */
+	struct bmap	*bmap;		/* 4: incore bmap descriptor	*/
+	struct nls_table *nls_tab;	/* 4: current codepage		*/
+	struct inode	*direct_inode;	/* 4: inode for physical I/O	*/
+	struct address_space *direct_mapping; /* 4: mapping for physical I/O */
+	uint		state;		/* 4: mount/recovery state	*/
+};
+
+#define JFS_IP(ip)	((struct jfs_inode_info *)(ip)->u.generic_ip)
+#define JFS_SBI(sb)	((struct jfs_sb_info *)(sb)->u.generic_sbp)
+
+#define isReadOnly(ip)	((JFS_SBI((ip)->i_sb)->log) ? 0 : 1)
+
+/*
+ * Allocating and freeing the structure
+ */
+extern kmem_cache_t *jfs_inode_cachep;
+extern int alloc_jfs_inode(struct inode *);
+
+#define free_jfs_inode(inode) \
+	kmem_cache_free(jfs_inode_cachep, (inode)->u.generic_ip)
+
+#endif /* _H_JFS_INCORE */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_inode.c linuxppc64_2_4/fs/jfs/jfs_inode.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_inode.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_inode.c	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,160 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/fs.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_imap.h"
+#include "jfs_dinode.h"
+#include "jfs_debug.h"
+
+kmem_cache_t *jfs_inode_cachep;
+
+/*
+ * NAME:	ialloc()
+ *
+ * FUNCTION:	Allocate a new inode
+ *
+ */
+struct inode *ialloc(struct inode *parent, umode_t mode)
+{
+	struct super_block *sb = parent->i_sb;
+	struct inode *inode;
+	struct jfs_inode_info *jfs_inode;
+	int rc;
+
+	inode = new_inode(sb);
+	if (!inode) {
+		jERROR(1, ("ialloc: new_inode returned NULL!\n"));
+		return inode;
+	}
+
+	rc = alloc_jfs_inode(inode);
+	if (rc) {
+		make_bad_inode(inode);
+		iput(inode);
+		return NULL;
+	}
+	jfs_inode = JFS_IP(inode);
+
+	rc = diAlloc(parent, S_ISDIR(mode), inode);
+	if (rc) {
+		jERROR(1, ("ialloc: diAlloc returned %d!\n", rc));
+		free_jfs_inode(inode);
+		make_bad_inode(inode);
+		iput(inode);
+		return NULL;
+	}
+
+	inode->i_uid = current->fsuid;
+	if (parent->i_mode & S_ISGID) {
+		inode->i_gid = parent->i_gid;
+		if (S_ISDIR(mode))
+			mode |= S_ISGID;
+	} else
+		inode->i_gid = current->fsgid;
+
+	inode->i_mode = mode;
+	if (S_ISDIR(mode))
+		jfs_inode->mode2 = IDIRECTORY | mode;
+	else
+		jfs_inode->mode2 = INLINEEA | ISPARSE | mode;
+	inode->i_blksize = sb->s_blocksize;
+	inode->i_blocks = 0;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	jfs_inode->otime = inode->i_ctime;
+	inode->i_version = ++event;
+	inode->i_generation = JFS_SBI(sb)->gengen++;
+
+	jfs_inode->cflag = 0;
+	set_cflag(COMMIT_New, inode);
+
+	/* Zero remaining fields */
+	memset(&jfs_inode->acl, 0, sizeof(dxd_t));
+	memset(&jfs_inode->ea, 0, sizeof(dxd_t));
+	jfs_inode->next_index = 0;
+	jfs_inode->acltype = 0;
+	jfs_inode->btorder = 0;
+	jfs_inode->btindex = 0;
+	jfs_inode->bxflag = 0;
+	jfs_inode->blid = 0;
+	jfs_inode->atlhead = 0;
+	jfs_inode->atltail = 0;
+	jfs_inode->xtlid = 0;
+
+	jFYI(1, ("ialloc returns inode = 0x%p\n", inode));
+
+	return inode;
+}
+
+/*
+ * NAME:	iwritelocklist()
+ *
+ * FUNCTION:	Lock multiple inodes in sorted order to avoid deadlock
+ *
+ */
+void iwritelocklist(int n, ...)
+{
+	va_list ilist;
+	struct inode *sort[4];
+	struct inode *ip;
+	int k, m;
+
+	va_start(ilist, n);
+	for (k = 0; k < n; k++)
+		sort[k] = va_arg(ilist, struct inode *);
+	va_end(ilist);
+
+	/* Bubble sort in descending order */
+	do {
+		m = 0;
+		for (k = 0; k < n; k++)
+			if ((k + 1) < n
+			    && sort[k + 1]->i_ino > sort[k]->i_ino) {
+				ip = sort[k];
+				sort[k] = sort[k + 1];
+				sort[k + 1] = ip;
+				m++;
+			}
+	} while (m);
+
+	/* Lock them */
+	for (k = 0; k < n; k++) {
+		IWRITE_LOCK(sort[k]);
+	}
+}
+
+/*
+ * NAME:	alloc_jfs_inode()
+ *
+ * FUNCTION:	Allocate jfs portion of in-memory inode
+ *
+ */
+int alloc_jfs_inode(struct inode *inode)
+{
+	struct jfs_inode_info *jfs_inode;
+
+	jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS);
+	JFS_IP(inode) = jfs_inode;
+	if (!jfs_inode)
+		return -ENOSPC;
+	jfs_inode->inode = inode;
+
+	return 0;
+}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_inode.h linuxppc64_2_4/fs/jfs/jfs_inode.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_inode.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_inode.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,23 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef	_H_JFS_INODE
+#define _H_JFS_INODE
+
+extern struct inode *ialloc(struct inode *, umode_t);
+
+#endif				/* _H_JFS_INODE */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_lock.h linuxppc64_2_4/fs/jfs/jfs_lock.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_lock.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_lock.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,106 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _H_JFS_LOCK
+#define _H_JFS_LOCK
+
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+
+/*
+ *	jfs_lock.h
+ *
+ * JFS lock definition for globally referenced locks
+ */
+
+/* readers/writer lock: thread-thread */
+
+/*
+ * RW semaphores do not currently have a trylock function.  Since the
+ * implementation varies by platform, I have implemented a platform-independent
+ * wrapper around the rw_semaphore routines.  If this turns out to be the best
+ * way of avoiding our locking problems, I will push to get a trylock
+ * implemented in the kernel, but I'd rather find a way to avoid having to
+ * use it.
+ */
+#define RDWRLOCK_T jfs_rwlock_t
+static inline void RDWRLOCK_INIT(jfs_rwlock_t * Lock)
+{
+	init_rwsem(&Lock->rw_sem);
+	atomic_set(&Lock->in_use, 0);
+}
+static inline void READ_LOCK(jfs_rwlock_t * Lock)
+{
+	atomic_inc(&Lock->in_use);
+	down_read(&Lock->rw_sem);
+}
+static inline void READ_UNLOCK(jfs_rwlock_t * Lock)
+{
+	up_read(&Lock->rw_sem);
+	atomic_dec(&Lock->in_use);
+}
+static inline void WRITE_LOCK(jfs_rwlock_t * Lock)
+{
+	atomic_inc(&Lock->in_use);
+	down_write(&Lock->rw_sem);
+}
+
+static inline int WRITE_TRYLOCK(jfs_rwlock_t * Lock)
+{
+	if (atomic_read(&Lock->in_use))
+		return 0;
+	WRITE_LOCK(Lock);
+	return 1;
+}
+static inline void WRITE_UNLOCK(jfs_rwlock_t * Lock)
+{
+	up_write(&Lock->rw_sem);
+	atomic_dec(&Lock->in_use);
+}
+
+#define IREAD_LOCK(ip)		READ_LOCK(&JFS_IP(ip)->rdwrlock)
+#define IREAD_UNLOCK(ip)	READ_UNLOCK(&JFS_IP(ip)->rdwrlock)
+#define IWRITE_LOCK(ip)		WRITE_LOCK(&JFS_IP(ip)->rdwrlock)
+#define IWRITE_TRYLOCK(ip)	WRITE_TRYLOCK(&JFS_IP(ip)->rdwrlock)
+#define IWRITE_UNLOCK(ip)	WRITE_UNLOCK(&JFS_IP(ip)->rdwrlock)
+#define IWRITE_LOCK_LIST	iwritelocklist
+
+extern void iwritelocklist(int, ...);
+
+/*
+ * Conditional sleep where condition is protected by spinlock
+ *
+ * lock_cmd and unlock_cmd take and release the spinlock
+ */
+#define __SLEEP_COND(wq, cond, lock_cmd, unlock_cmd)	\
+do {							\
+	DECLARE_WAITQUEUE(__wait, current);		\
+							\
+	add_wait_queue(&wq, &__wait);			\
+	for (;;) {					\
+		set_current_state(TASK_UNINTERRUPTIBLE);\
+		if (cond)				\
+			break;				\
+		unlock_cmd;				\
+		schedule();				\
+		lock_cmd;				\
+	}						\
+	current->state = TASK_RUNNING;			\
+	remove_wait_queue(&wq, &__wait);		\
+} while (0)
+
+#endif				/* _H_JFS_LOCK */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_logmgr.c linuxppc64_2_4/fs/jfs/jfs_logmgr.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_logmgr.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_logmgr.c	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,2327 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+*/
+
+/*
+ *	jfs_logmgr.c: log manager
+ *
+ * for related information, see transaction manager (jfs_txnmgr.c), and
+ * recovery manager (jfs_logredo.c).
+ *
+ * note: for detail, RTFS.
+ *
+ *	log buffer manager:
+ * special purpose buffer manager supporting log i/o requirements.
+ * per log serial pageout of logpage
+ * queuing i/o requests and redrive i/o at iodone
+ * maintain current logpage buffer
+ * no caching since append only
+ * appropriate jfs buffer cache buffers as needed
+ *
+ *	group commit:
+ * transactions which wrote COMMIT records in the same in-memory
+ * log page during the pageout of previous/current log page(s) are
+ * committed together by the pageout of the page.
+ *
+ *	TBD lazy commit:
+ * transactions are committed asynchronously when the log page
+ * containing it COMMIT is paged out when it becomes full;
+ *
+ *	serialization:
+ * . a per log lock serialize log write.
+ * . a per log lock serialize group commit.
+ * . a per log lock serialize log open/close;
+ *
+ *	TBD log integrity:
+ * careful-write (ping-pong) of last logpage to recover from crash
+ * in overwrite.
+ * detection of split (out-of-order) write of physical sectors
+ * of last logpage via timestamp at end of each sector
+ * with its mirror data array at trailer).
+ *
+ *	alternatives:
+ * lsn - 64-bit monotonically increasing integer vs
+ * 32-bit lspn and page eor.
+ */
+
+#include <linux/fs.h>
+#include <linux/locks.h>
+#include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_metapage.h"
+#include "jfs_txnmgr.h"
+#include "jfs_debug.h"
+
+
+/*
+ * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIOtask)
+ */
+static lbuf_t *log_redrive_list;
+static spinlock_t log_redrive_lock = SPIN_LOCK_UNLOCKED;
+
+
+/*
+ *	log read/write serialization (per log)
+ */
+#define LOG_LOCK_INIT(log)	init_MUTEX(&(log)->loglock)
+#define LOG_LOCK(log)		down(&((log)->loglock))
+#define LOG_UNLOCK(log)		up(&((log)->loglock))
+
+
+/*
+ *	log group commit serialization (per log)
+ */
+
+#define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
+#define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
+#define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
+#define LOGGC_WAKEUP(tblk)	wake_up(&(tblk)->gcwait)
+
+/*
+ *	log sync serialization (per log)
+ */
+#define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
+#define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
+/*
+#define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
+#define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
+*/
+
+
+/*
+ *	log buffer cache synchronization
+ */
+static spinlock_t jfsLCacheLock = SPIN_LOCK_UNLOCKED;
+
+#define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
+#define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
+
+/*
+ * See __SLEEP_COND in jfs_locks.h
+ */
+#define LCACHE_SLEEP_COND(wq, cond, flags)	\
+do {						\
+	if (cond)				\
+		break;				\
+	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
+} while (0)
+
+#define	LCACHE_WAKEUP(event)	wake_up(event)
+
+
+/*
+ *	lbuf buffer cache (lCache) control
+ */
+/* log buffer manager pageout control (cumulative, inclusive) */
+#define	lbmREAD		0x0001
+#define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
+				 * init pageout if at head of queue;
+				 */
+#define	lbmRELEASE	0x0004	/* remove from write queue
+				 * at completion of pageout;
+				 * do not free/recycle it yet:
+				 * caller will free it;
+				 */
+#define	lbmSYNC		0x0008	/* do not return to freelist
+				 * when removed from write queue;
+				 */
+#define lbmFREE		0x0010	/* return to freelist
+				 * at completion of pageout;
+				 * the buffer may be recycled;
+				 */
+#define	lbmDONE		0x0020
+#define	lbmERROR	0x0040
+#define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
+				 * of log page
+				 */
+#define lbmDIRECT	0x0100
+
+/*
+ * external references
+ */
+extern void txLazyUnlock(tblock_t * tblk);
+extern int jfs_thread_stopped(void);
+extern struct task_struct *jfsIOtask;
+extern struct completion jfsIOwait;
+
+/*
+ * forward references
+ */
+static int lmWriteRecord(log_t * log, tblock_t * tblk, lrd_t * lrd,
+			 tlock_t * tlck);
+
+static int lmNextPage(log_t * log);
+static int lmLogFileSystem(log_t * log, kdev_t fsdev, int activate);
+static int lmLogInit(log_t * log);
+static int lmLogShutdown(log_t * log);
+
+static int lbmLogInit(log_t * log);
+static void lbmLogShutdown(log_t * log);
+static lbuf_t *lbmAllocate(log_t * log, int);
+static void lbmFree(lbuf_t * bp);
+static void lbmfree(lbuf_t * bp);
+static int lbmRead(log_t * log, int pn, lbuf_t ** bpp);
+static void lbmWrite(log_t * log, lbuf_t * bp, int flag, int cant_block);
+static void lbmDirectWrite(log_t * log, lbuf_t * bp, int flag);
+static int lbmIOWait(lbuf_t * bp, int flag);
+static void lbmIODone(struct buffer_head *bh, int);
+
+void lbmStartIO(lbuf_t * bp);
+void lmGCwrite(log_t * log, int cant_block);
+
+
+/*
+ *	statistics
+ */
+#ifdef CONFIG_JFS_STATISTICS
+struct lmStat {
+	uint commit;		/* # of commit */
+	uint pagedone;		/* # of page written */
+	uint submitted;		/* # of pages submitted */
+} lmStat;
+#endif
+
+
+/*
+ * NAME:	lmLog()
+ *
+ * FUNCTION:	write a log record;
+ *
+ * PARAMETER:
+ *
+ * RETURN:	lsn - offset to the next log record to write (end-of-log);
+ *		-1  - error;
+ *
+ * note: todo: log error handler
+ */
+int lmLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck)
+{
+	int lsn;
+	int diffp, difft;
+	metapage_t *mp = NULL;
+
+	jFYI(1, ("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p\n",
+		 log, tblk, lrd, tlck));
+
+	LOG_LOCK(log);
+
+	/* log by (out-of-transaction) JFS ? */
+	if (tblk == NULL)
+		goto writeRecord;
+
+	/* log from page ? */
+	if (tlck == NULL ||
+	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
+		goto writeRecord;
+
+	/*
+	 *      initialize/update page/transaction recovery lsn
+	 */
+	lsn = log->lsn;
+
+	LOGSYNC_LOCK(log);
+
+	/*
+	 * initialize page lsn if first log write of the page
+	 */
+	if (mp->lsn == 0) {
+		mp->log = log;
+		mp->lsn = lsn;
+		log->count++;
+
+		/* insert page at tail of logsynclist */
+		list_add_tail(&mp->synclist, &log->synclist);
+	}
+
+	/*
+	 *      initialize/update lsn of tblock of the page
+	 *
+	 * transaction inherits oldest lsn of pages associated
+	 * with allocation/deallocation of resources (their
+	 * log records are used to reconstruct allocation map
+	 * at recovery time: inode for inode allocation map,
+	 * B+-tree index of extent descriptors for block
+	 * allocation map);
+	 * allocation map pages inherit transaction lsn at
+	 * commit time to allow forwarding log syncpt past log
+	 * records associated with allocation/deallocation of
+	 * resources only after persistent map of these map pages
+	 * have been updated and propagated to home.
+	 */
+	/*
+	 * initialize transaction lsn:
+	 */
+	if (tblk->lsn == 0) {
+		/* inherit lsn of its first page logged */
+		tblk->lsn = mp->lsn;
+		log->count++;
+
+		/* insert tblock after the page on logsynclist */
+		list_add(&tblk->synclist, &mp->synclist);
+	}
+	/*
+	 * update transaction lsn:
+	 */
+	else {
+		/* inherit oldest/smallest lsn of page */
+		logdiff(diffp, mp->lsn, log);
+		logdiff(difft, tblk->lsn, log);
+		if (diffp < difft) {
+			/* update tblock lsn with page lsn */
+			tblk->lsn = mp->lsn;
+
+			/* move tblock after page on logsynclist */
+			list_del(&tblk->synclist);
+			list_add(&tblk->synclist, &mp->synclist);
+		}
+	}
+
+	LOGSYNC_UNLOCK(log);
+
+	/*
+	 *      write the log record
+	 */
+      writeRecord:
+	lsn = lmWriteRecord(log, tblk, lrd, tlck);
+
+	/*
+	 * forward log syncpt if log reached next syncpt trigger
+	 */
+	logdiff(diffp, lsn, log);
+	if (diffp >= log->nextsync)
+		lsn = lmLogSync(log, 0);
+
+	/* update end-of-log lsn */
+	log->lsn = lsn;
+
+	LOG_UNLOCK(log);
+
+	/* return end-of-log address */
+	return lsn;
+}
+
+
+/*
+ * NAME:	lmWriteRecord()
+ *
+ * FUNCTION:	move the log record to current log page
+ *
+ * PARAMETER:	cd	- commit descriptor
+ *
+ * RETURN:	end-of-log address
+ *			
+ * serialization: LOG_LOCK() held on entry/exit
+ */
+static int
+lmWriteRecord(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck)
+{
+	int lsn = 0;		/* end-of-log address */
+	lbuf_t *bp;		/* dst log page buffer */
+	logpage_t *lp;		/* dst log page */
+	caddr_t dst;		/* destination address in log page */
+	int dstoffset;		/* end-of-log offset in log page */
+	int freespace;		/* free space in log page */
+	caddr_t p;		/* src meta-data page */
+	caddr_t src;
+	int srclen;
+	int nbytes;		/* number of bytes to move */
+	int i;
+	int len;
+	linelock_t *linelock;
+	lv_t *lv;
+	lvd_t *lvd;
+	int l2linesize;
+
+	len = 0;
+
+	/* retrieve destination log page to write */
+	bp = (lbuf_t *) log->bp;
+	lp = (logpage_t *) bp->l_ldata;
+	dstoffset = log->eor;
+
+	/* any log data to write ? */
+	if (tlck == NULL)
+		goto moveLrd;
+
+	/*
+	 *      move log record data
+	 */
+	/* retrieve source meta-data page to log */
+	if (tlck->flag & tlckPAGELOCK) {
+		p = (caddr_t) (tlck->mp->data);
+		linelock = (linelock_t *) & tlck->lock;
+	}
+	/* retrieve source in-memory inode to log */
+	else if (tlck->flag & tlckINODELOCK) {
+		if (tlck->type & tlckDTREE)
+			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
+		else
+			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
+		linelock = (linelock_t *) & tlck->lock;
+	}
+#ifdef	_JFS_WIP
+	else if (tlck->flag & tlckINLINELOCK) {
+
+		inlinelock = (inlinelock_t *) & tlck;
+		p = (caddr_t) & inlinelock->pxd;
+		linelock = (linelock_t *) & tlck;
+	}
+#endif				/* _JFS_WIP */
+	else {
+		jERROR(2, ("lmWriteRecord: UFO tlck:0x%p\n", tlck));
+		return 0;	/* Probably should trap */
+	}
+	l2linesize = linelock->l2linesize;
+
+      moveData:
+	ASSERT(linelock->index <= linelock->maxcnt);
+
+	lv = (lv_t *) & linelock->lv;
+	for (i = 0; i < linelock->index; i++, lv++) {
+		if (lv->length == 0)
+			continue;
+
+		/* is page full ? */
+		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
+			/* page become full: move on to next page */
+			lmNextPage(log);
+
+			bp = log->bp;
+			lp = (logpage_t *) bp->l_ldata;
+			dstoffset = LOGPHDRSIZE;
+		}
+
+		/*
+		 * move log vector data
+		 */
+		src = (u8 *) p + (lv->offset << l2linesize);
+		srclen = lv->length << l2linesize;
+		len += srclen;
+		while (srclen > 0) {
+			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
+			nbytes = min(freespace, srclen);
+			dst = (caddr_t) lp + dstoffset;
+			memcpy(dst, src, nbytes);
+			dstoffset += nbytes;
+
+			/* is page not full ? */
+			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
+				break;
+
+			/* page become full: move on to next page */
+			lmNextPage(log);
+
+			bp = (lbuf_t *) log->bp;
+			lp = (logpage_t *) bp->l_ldata;
+			dstoffset = LOGPHDRSIZE;
+
+			srclen -= nbytes;
+			src += nbytes;
+		}
+
+		/*
+		 * move log vector descriptor
+		 */
+		len += 4;
+		lvd = (lvd_t *) ((caddr_t) lp + dstoffset);
+		lvd->offset = cpu_to_le16(lv->offset);
+		lvd->length = cpu_to_le16(lv->length);
+		dstoffset += 4;
+		jFYI(1,
+		     ("lmWriteRecord: lv offset:%d length:%d\n",
+		      lv->offset, lv->length));
+	}
+
+	if ((i = linelock->next)) {
+		linelock = (linelock_t *) lid_to_tlock(i);
+		goto moveData;
+	}
+
+	/*
+	 *      move log record descriptor
+	 */
+      moveLrd:
+	lrd->length = cpu_to_le16(len);
+
+	src = (caddr_t) lrd;
+	srclen = LOGRDSIZE;
+
+	while (srclen > 0) {
+		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
+		nbytes = min(freespace, srclen);
+		dst = (caddr_t) lp + dstoffset;
+		memcpy(dst, src, nbytes);
+
+		dstoffset += nbytes;
+		srclen -= nbytes;
+
+		/* are there more to move than freespace of page ? */
+		if (srclen)
+			goto pageFull;
+
+		/*
+		 * end of log record descriptor
+		 */
+
+		/* update last log record eor */
+		log->eor = dstoffset;
+		bp->l_eor = dstoffset;
+		lsn = (log->page << L2LOGPSIZE) + dstoffset;
+
+		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
+			tblk->clsn = lsn;
+			jFYI(1,
+			     ("wr: tclsn:0x%x, beor:0x%x\n", tblk->clsn,
+			      bp->l_eor));
+
+			INCREMENT(lmStat.commit);	/* # of commit */
+
+			/*
+			 * enqueue tblock for group commit:
+			 *
+			 * enqueue tblock of non-trivial/synchronous COMMIT
+			 * at tail of group commit queue
+			 * (trivial/asynchronous COMMITs are ignored by
+			 * group commit.)
+			 */
+			LOGGC_LOCK(log);
+
+			/* init tblock gc state */
+			tblk->flag = tblkGC_QUEUE;
+			tblk->bp = log->bp;
+			tblk->pn = log->page;
+			tblk->eor = log->eor;
+			init_waitqueue_head(&tblk->gcwait);
+
+			/* enqueue transaction to commit queue */
+			tblk->cqnext = NULL;
+			if (log->cqueue.head) {
+				log->cqueue.tail->cqnext = tblk;
+				log->cqueue.tail = tblk;
+			} else
+				log->cqueue.head = log->cqueue.tail = tblk;
+
+			LOGGC_UNLOCK(log);
+		}
+
+		jFYI(1,
+		     ("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x\n",
+		      le16_to_cpu(lrd->type), log->bp, log->page,
+		      dstoffset));
+
+		/* page not full ? */
+		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
+			return lsn;
+
+	      pageFull:
+		/* page become full: move on to next page */
+		lmNextPage(log);
+
+		bp = (lbuf_t *) log->bp;
+		lp = (logpage_t *) bp->l_ldata;
+		dstoffset = LOGPHDRSIZE;
+		src += nbytes;
+	}
+
+	return lsn;
+}
+
+
+/*
+ * NAME:	lmNextPage()
+ *
+ * FUNCTION:	write current page and allocate next page.
+ *
+ * PARAMETER:	log
+ *
+ * RETURN:	0
+ *			
+ * serialization: LOG_LOCK() held on entry/exit
+ */
+static int lmNextPage(log_t * log)
+{
+	logpage_t *lp;
+	int lspn;		/* log sequence page number */
+	int pn;			/* current page number */
+	lbuf_t *bp;
+	lbuf_t *nextbp;
+	tblock_t *tblk;
+
+	jFYI(1, ("lmNextPage\n"));
+
+	/* get current log page number and log sequence page number */
+	pn = log->page;
+	bp = log->bp;
+	lp = (logpage_t *) bp->l_ldata;
+	lspn = le32_to_cpu(lp->h.page);
+
+	LOGGC_LOCK(log);
+
+	/*
+	 *      write or queue the full page at the tail of write queue
+	 */
+	/* get the tail tblk on commit queue */
+	tblk = log->cqueue.tail;
+
+	/* every tblk who has COMMIT record on the current page,
+	 * and has not been committed, must be on commit queue
+	 * since tblk is queued at commit queueu at the time
+	 * of writing its COMMIT record on the page before
+	 * page becomes full (even though the tblk thread
+	 * who wrote COMMIT record may have been suspended
+	 * currently);
+	 */
+
+	/* is page bound with outstanding tail tblk ? */
+	if (tblk && tblk->pn == pn) {
+		/* mark tblk for end-of-page */
+		tblk->flag |= tblkGC_EOP;
+
+		/* if page is not already on write queue,
+		 * just enqueue (no lbmWRITE to prevent redrive)
+		 * buffer to wqueue to ensure correct serial order
+		 * of the pages since log pages will be added
+		 * continuously (tblk bound with the page hasn't
+		 * got around to init write of the page, either
+		 * preempted or the page got filled by its COMMIT
+		 * record);
+		 * pages with COMMIT are paged out explicitly by
+		 * tblk in lmGroupCommit();
+		 */
+		if (bp->l_wqnext == NULL) {
+			/* bp->l_ceor = bp->l_eor; */
+			/* lp->h.eor = lp->t.eor = bp->l_ceor; */
+			lbmWrite(log, bp, 0, 0);
+		}
+	}
+	/* page is not bound with outstanding tblk:
+	 * init write or mark it to be redriven (lbmWRITE)
+	 */
+	else {
+		/* finalize the page */
+		bp->l_ceor = bp->l_eor;
+		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
+		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
+	}
+	LOGGC_UNLOCK(log);
+
+	/*
+	 *      allocate/initialize next page
+	 */
+	/* if log wraps, the first data page of log is 2
+	 * (0 never used, 1 is superblock).
+	 */
+	log->page = (pn == log->size - 1) ? 2 : pn + 1;
+	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
+
+	/* allocate/initialize next log page buffer */
+	nextbp = lbmAllocate(log, log->page);
+	nextbp->l_eor = log->eor;
+	log->bp = nextbp;
+
+	/* initialize next log page */
+	lp = (logpage_t *) nextbp->l_ldata;
+	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
+	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
+
+	jFYI(1, ("lmNextPage done\n"));
+	return 0;
+}
+
+
+/*
+ * NAME:	lmGroupCommit()
+ *
+ * FUNCTION:	group commit
+ *	initiate pageout of the pages with COMMIT in the order of
+ *	page number - redrive pageout of the page at the head of
+ *	pageout queue until full page has been written.
+ *
+ * RETURN:	
+ *
+ * NOTE:
+ *	LOGGC_LOCK serializes log group commit queue, and
+ *	transaction blocks on the commit queue.
+ *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
+ */
+int lmGroupCommit(log_t * log, tblock_t * tblk)
+{
+	int rc = 0;
+
+	LOGGC_LOCK(log);
+
+	/* group committed already ? */
+	if (tblk->flag & tblkGC_COMMITTED) {
+		if (tblk->flag & tblkGC_ERROR)
+			rc = EIO;
+
+		LOGGC_UNLOCK(log);
+		return rc;
+	}
+	jFYI(1,
+	     ("lmGroup Commit: tblk = 0x%p, gcrtc = %d\n", tblk,
+	      log->gcrtc));
+
+	/*
+	 * group commit pageout in progress
+	 */
+	if ((!(log->cflag & logGC_PAGEOUT)) && log->cqueue.head) {
+		/*
+		 * only transaction in the commit queue:
+		 *
+		 * start one-transaction group commit as
+		 * its group leader.
+		 */
+		log->cflag |= logGC_PAGEOUT;
+
+		lmGCwrite(log, 0);
+	}
+	/* lmGCwrite gives up LOGGC_LOCK, check again */
+
+	if (tblk->flag & tblkGC_COMMITTED) {
+		if (tblk->flag & tblkGC_ERROR)
+			rc = EIO;
+
+		LOGGC_UNLOCK(log);
+		return rc;
+	}
+
+	/* upcount transaction waiting for completion
+	 */
+	log->gcrtc++;
+
+	if (tblk->xflag & COMMIT_LAZY) {
+		tblk->flag |= tblkGC_LAZY;
+		LOGGC_UNLOCK(log);
+		return 0;
+	}
+	tblk->flag |= tblkGC_READY;
+
+	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
+		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
+
+	/* removed from commit queue */
+	if (tblk->flag & tblkGC_ERROR)
+		rc = EIO;
+
+	LOGGC_UNLOCK(log);
+	return rc;
+}
+
+/*
+ * NAME:	lmGCwrite()
+ *
+ * FUNCTION:	group commit write
+ *	initiate write of log page, building a group of all transactions
+ *	with commit records on that page.
+ *
+ * RETURN:	None
+ *
+ * NOTE:
+ *	LOGGC_LOCK must be held by caller.
+ *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
+ */
+void lmGCwrite(log_t * log, int cant_write)
+{
+	lbuf_t *bp;
+	logpage_t *lp;
+	int gcpn;		/* group commit page number */
+	tblock_t *tblk;
+	tblock_t *xtblk;
+
+	/*
+	 * build the commit group of a log page
+	 *
+	 * scan commit queue and make a commit group of all
+	 * transactions with COMMIT records on the same log page.
+	 */
+	/* get the head tblk on the commit queue */
+	tblk = xtblk = log->cqueue.head;
+	gcpn = tblk->pn;
+
+	while (tblk && tblk->pn == gcpn) {
+		xtblk = tblk;
+
+		/* state transition: (QUEUE, READY) -> COMMIT */
+		tblk->flag |= tblkGC_COMMIT;
+		tblk = tblk->cqnext;
+	}
+	tblk = xtblk;		/* last tblk of the page */
+
+	/*
+	 * pageout to commit transactions on the log page.
+	 */
+	bp = (lbuf_t *) tblk->bp;
+	lp = (logpage_t *) bp->l_ldata;
+	/* is page already full ? */
+	if (tblk->flag & tblkGC_EOP) {
+		/* mark page to free at end of group commit of the page */
+		tblk->flag &= ~tblkGC_EOP;
+		tblk->flag |= tblkGC_FREE;
+		bp->l_ceor = bp->l_eor;
+		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
+		jEVENT(0,
+		       ("gc: tclsn:0x%x, bceor:0x%x\n", tblk->clsn,
+			bp->l_ceor));
+		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
+			 cant_write);
+	}
+	/* page is not yet full */
+	else {
+		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
+		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
+		jEVENT(0,
+		       ("gc: tclsn:0x%x, bceor:0x%x\n", tblk->clsn,
+			bp->l_ceor));
+		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
+	}
+}
+
+/*
+ * NAME:	lmPostGC()
+ *
+ * FUNCTION:	group commit post-processing
+ *	Processes transactions after their commit records have been written
+ *	to disk, redriving log I/O if necessary.
+ *
+ * RETURN:	None
+ *
+ * NOTE:
+ *	This routine is called a interrupt time by lbmIODone
+ */
+void lmPostGC(lbuf_t * bp)
+{
+	unsigned long flags;
+	log_t *log = bp->l_log;
+	logpage_t *lp;
+	tblock_t *tblk;
+
+	//LOGGC_LOCK(log);
+	spin_lock_irqsave(&log->gclock, flags);
+	/*
+	 * current pageout of group commit completed.
+	 *
+	 * remove/wakeup transactions from commit queue who were
+	 * group committed with the current log page
+	 */
+	while ((tblk = log->cqueue.head) && (tblk->flag & tblkGC_COMMIT)) {
+		/* if transaction was marked GC_COMMIT then
+		 * it has been shipped in the current pageout
+		 * and made it to disk - it is committed.
+		 */
+
+		if (bp->l_flag & lbmERROR)
+			tblk->flag |= tblkGC_ERROR;
+
+		/* remove it from the commit queue */
+		log->cqueue.head = tblk->cqnext;
+		if (log->cqueue.head == NULL)
+			log->cqueue.tail = NULL;
+		tblk->flag &= ~tblkGC_QUEUE;
+		tblk->cqnext = 0;
+
+		jEVENT(0,
+		       ("lmPostGC: tblk = 0x%p, flag = 0x%x\n", tblk,
+			tblk->flag));
+
+		if (!(tblk->xflag & COMMIT_FORCE))
+			/*
+			 * Hand tblk over to lazy commit thread
+			 */
+			txLazyUnlock(tblk);
+		else {
+			/* state transition: COMMIT -> COMMITTED */
+			tblk->flag |= tblkGC_COMMITTED;
+
+			if (tblk->flag & tblkGC_READY) {
+				log->gcrtc--;
+				LOGGC_WAKEUP(tblk);
+			}
+		}
+
+		/* was page full before pageout ?
+		 * (and this is the last tblk bound with the page)
+		 */
+		if (tblk->flag & tblkGC_FREE)
+			lbmFree(bp);
+		/* did page become full after pageout ?
+		 * (and this is the last tblk bound with the page)
+		 */
+		else if (tblk->flag & tblkGC_EOP) {
+			/* finalize the page */
+			lp = (logpage_t *) bp->l_ldata;
+			bp->l_ceor = bp->l_eor;
+			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
+			jEVENT(0, ("lmPostGC: calling lbmWrite\n"));
+			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
+				 1);
+		}
+
+	}
+
+	/* are there any transactions who have entered lnGroupCommit()
+	 * (whose COMMITs are after that of the last log page written.
+	 * They are waiting for new group commit (above at (SLEEP 1)):
+	 * select the latest ready transaction as new group leader and
+	 * wake her up to lead her group.
+	 */
+	if ((log->gcrtc > 0) && log->cqueue.head)
+		/*
+		 * Call lmGCwrite with new group leader
+		 */
+		lmGCwrite(log, 1);
+
+	/* no transaction are ready yet (transactions are only just
+	 * queued (GC_QUEUE) and not entered for group commit yet).
+	 * let the first transaction entering group commit
+	 * will elect hetself as new group leader.
+	 */
+	else
+		log->cflag &= ~logGC_PAGEOUT;
+
+	//LOGGC_UNLOCK(log);
+	spin_unlock_irqrestore(&log->gclock, flags);
+	return;
+}
+
+/*
+ * NAME:	lmLogSync()
+ *
+ * FUNCTION:	write log SYNCPT record for specified log
+ *	if new sync address is available
+ *	(normally the case if sync() is executed by back-ground
+ *	process).
+ *	if not, explicitly run jfs_blogsync() to initiate
+ *	getting of new sync address.
+ *	calculate new value of i_nextsync which determines when
+ *	this code is called again.
+ *
+ *	this is called only from lmLog().
+ *
+ * PARAMETER:	ip	- pointer to logs inode.
+ *
+ * RETURN:	0
+ *			
+ * serialization: LOG_LOCK() held on entry/exit
+ */
+int lmLogSync(log_t * log, int nosyncwait)
+{
+	int logsize;
+	int written;		/* written since last syncpt */
+	int free;		/* free space left available */
+	int delta;		/* additional delta to write normally */
+	int more;		/* additional write granted */
+	lrd_t lrd;
+	int lsn;
+	struct logsyncblk *lp;
+
+	/*
+	 *      forward syncpt
+	 */
+	/* if last sync is same as last syncpt,
+	 * invoke sync point forward processing to update sync.
+	 */
+
+	if (log->sync == log->syncpt) {
+		LOGSYNC_LOCK(log);
+		/* ToDo: push dirty metapages out to disk */
+//              bmLogSync(log);
+
+		if (list_empty(&log->synclist))
+			log->sync = log->lsn;
+		else {
+			lp = list_entry(log->synclist.next,
+					struct logsyncblk, synclist);
+			log->sync = lp->lsn;
+		}
+		LOGSYNC_UNLOCK(log);
+
+	}
+
+	/* if sync is different from last syncpt,
+	 * write a SYNCPT record with syncpt = sync.
+	 * reset syncpt = sync
+	 */
+	if (log->sync != log->syncpt) {
+		struct jfs_sb_info	*sbi = JFS_SBI(log->sb);
+		/*
+		 * We need to make sure all of the "written" metapages
+		 * actually make it to disk
+		 */
+		fsync_inode_data_buffers(sbi->ipbmap);
+		fsync_inode_data_buffers(sbi->ipimap);
+		fsync_inode_data_buffers(sbi->direct_inode);
+
+		lrd.logtid = 0;
+		lrd.backchain = 0;
+		lrd.type = cpu_to_le16(LOG_SYNCPT);
+		lrd.length = 0;
+		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
+		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
+
+		log->syncpt = log->sync;
+	} else
+		lsn = log->lsn;
+
+	/*
+	 *      setup next syncpt trigger (SWAG)
+	 */
+	logsize = log->logsize;
+
+	logdiff(written, lsn, log);
+	free = logsize - written;
+	delta = LOGSYNC_DELTA(logsize);
+	more = min(free / 2, delta);
+	if (more < 2 * LOGPSIZE) {
+		jEVENT(1,
+		       ("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n\n"));
+		/*
+		 *      log wrapping
+		 *
+		 * option 1 - panic ? No.!
+		 * option 2 - shutdown file systems
+		 *            associated with log ?
+		 * option 3 - extend log ?
+		 */
+		/*
+		 * option 4 - second chance
+		 *
+		 * mark log wrapped, and continue.
+		 * when all active transactions are completed,
+		 * mark log vaild for recovery.
+		 * if crashed during invalid state, log state
+		 * implies invald log, forcing fsck().
+		 */
+		/* mark log state log wrap in log superblock */
+		/* log->state = LOGWRAP; */
+
+		/* reset sync point computation */
+		log->syncpt = log->sync = lsn;
+		log->nextsync = delta;
+	} else
+		/* next syncpt trigger = written + more */
+		log->nextsync = written + more;
+
+	/* return if lmLogSync() from outside of transaction, e.g., sync() */
+	if (nosyncwait)
+		return lsn;
+
+	/* if number of bytes written from last sync point is more
+	 * than 1/4 of the log size, stop new transactions from
+	 * starting until all current transactions are completed
+	 * by setting syncbarrier flag.
+	 */
+	if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) {
+		log->syncbarrier = 1;
+		jFYI(1, ("log barrier on: lsn=0x%x syncpt=0x%x\n", lsn,
+			 log->syncpt));
+	}
+
+	return lsn;
+}
+
+
+/*
+ * NAME:	lmLogOpen()
+ *
+ * FUNCTION:    open the log on first open;
+ *	insert filesystem in the active list of the log.
+ *
+ * PARAMETER:	ipmnt	- file system mount inode
+ *		iplog 	- log inode (out)
+ *
+ * RETURN:
+ *
+ * serialization:
+ */
+int lmLogOpen(struct super_block *sb, log_t ** logptr)
+{
+	int rc;
+	struct block_device *bdev;
+	log_t *log;
+
+	if (!(log = kmalloc(sizeof(log_t), GFP_KERNEL)))
+		return ENOMEM;
+	memset(log, 0, sizeof(log_t));
+
+	if (!(JFS_SBI(sb)->mntflag & JFS_INLINELOG))
+		goto externalLog;
+
+	/*
+	 *      in-line log in host file system
+	 *
+	 * file system to log have 1-to-1 relationship;
+	 */
+
+	log->sb = sb;		/* This should be a list */
+	log->flag = JFS_INLINELOG;
+	log->dev = sb->s_dev;
+	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
+	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
+	    (L2LOGPSIZE - sb->s_blocksize_bits);
+	log->l2bsize = sb->s_blocksize_bits;
+	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
+
+	/*
+	 * initialize log.
+	 */
+	if ((rc = lmLogInit(log)))
+		goto errout10;
+	goto out;
+
+	/*
+	 *      external log as separate logical volume
+	 *
+	 * file systems to log may have n-to-1 relationship;
+	 */
+      externalLog:
+	if (!(bdev = bdget(kdev_t_to_nr(JFS_SBI(sb)->logdev)))) {
+		rc = ENODEV;
+		goto errout10;
+	}
+
+	if ((rc = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_FS))) {
+		rc = -rc;
+		goto errout10;
+	}
+
+	log->sb = sb;		/* This should be a list */
+	log->dev = JFS_SBI(sb)->logdev;
+	log->bdev = bdev;
+	
+	/*
+	 * initialize log:
+	 */
+	if ((rc = lmLogInit(log)))
+		goto errout20;
+
+	/*
+	 * add file system to log active file system list
+	 */
+	if ((rc = lmLogFileSystem(log, sb->s_dev, 1)))
+		goto errout30;
+
+      out:
+	jFYI(1, ("lmLogOpen: exit(0)\n"));
+	*logptr = log;
+	return 0;
+
+	/*
+	 *      unwind on error
+	 */
+      errout30:		/* unwind lbmLogInit() */
+	lbmLogShutdown(log);
+
+      errout20:		/* close external log device */
+	blkdev_put(bdev, BDEV_FS);
+
+      errout10:		/* free log descriptor */
+	kfree(log);
+
+	jFYI(1, ("lmLogOpen: exit(%d)\n", rc));
+	return rc;
+}
+
+
+/*
+ * NAME:	lmLogInit()
+ *
+ * FUNCTION:	log initialization at first log open.
+ *
+ *	logredo() (or logformat()) should have been run previously.
+ *	initialize the log inode from log superblock.
+ *	set the log state in the superblock to LOGMOUNT and
+ *	write SYNCPT log record.
+ *		
+ * PARAMETER:	log	- log structure
+ *
+ * RETURN:	0	- if ok
+ *		EINVAL	- bad log magic number or superblock dirty
+ *		error returned from logwait()
+ *			
+ * serialization: single first open thread
+ */
+static int lmLogInit(log_t * log)
+{
+	int rc = 0;
+	lrd_t lrd;
+	logsuper_t *logsuper;
+	lbuf_t *bpsuper;
+	lbuf_t *bp;
+	logpage_t *lp;
+	int lsn;
+
+	jFYI(1, ("lmLogInit: log:0x%p\n", log));
+
+	/*
+	 * log inode is overlaid on generic inode where
+	 * dinode have been zeroed out by iRead();
+	 */
+
+	/*
+	 * initialize log i/o
+	 */
+	if ((rc = lbmLogInit(log)))
+		return rc;
+
+	/*
+	 * validate log superblock
+	 */
+
+
+	if (!(log->flag & JFS_INLINELOG))
+		log->l2bsize = 12;	/* XXX kludge alert XXX */
+	if ((rc = lbmRead(log, 1, &bpsuper)))
+		goto errout10;
+
+	logsuper = (logsuper_t *) bpsuper->l_ldata;
+
+	if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
+		jERROR(1, ("*** Log Format Error ! ***\n"));
+		rc = EINVAL;
+		goto errout20;
+	}
+
+	/* logredo() should have been run successfully. */
+	if (logsuper->state != cpu_to_le32(LOGREDONE)) {
+		jERROR(1, ("*** Log Is Dirty ! ***\n"));
+		rc = EINVAL;
+		goto errout20;
+	}
+
+	/* initialize log inode from log superblock */
+	if (log->flag & JFS_INLINELOG) {
+		if (log->size != le32_to_cpu(logsuper->size)) {
+			rc = EINVAL;
+			goto errout20;
+		}
+		jFYI(0,
+		     ("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x\n",
+		      log, (unsigned long long) log->base, log->size));
+	} else {
+		log->size = le32_to_cpu(logsuper->size);
+		log->l2bsize = le32_to_cpu(logsuper->l2bsize);
+		jFYI(0,
+		     ("lmLogInit: external log:0x%p base:0x%Lx size:0x%x\n",
+		      log, (unsigned long long) log->base, log->size));
+	}
+
+	log->flag |= JFS_GROUPCOMMIT;
+/*
+	log->flag |= JFS_LAZYCOMMIT;
+*/
+	log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
+	log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
+
+	/*
+	 * initialize for log append write mode
+	 */
+	/* establish current/end-of-log page/buffer */
+	if ((rc = lbmRead(log, log->page, &bp)))
+		goto errout20;
+
+	lp = (logpage_t *) bp->l_ldata;
+
+	jFYI(1, ("lmLogInit: lsn:0x%x page:%d eor:%d:%d\n",
+		 le32_to_cpu(logsuper->end), log->page, log->eor,
+		 le16_to_cpu(lp->h.eor)));
+
+//      ASSERT(log->eor == lp->h.eor);
+
+	log->bp = bp;
+	bp->l_pn = log->page;
+	bp->l_eor = log->eor;
+
+	/* initialize the group commit serialization lock */
+	LOGGC_LOCK_INIT(log);
+
+	/* if current page is full, move on to next page */
+	if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
+		lmNextPage(log);
+
+	/* allocate/initialize the log write serialization lock */
+	LOG_LOCK_INIT(log);
+
+	/*
+	 * initialize log syncpoint
+	 */
+	/*
+	 * write the first SYNCPT record with syncpoint = 0
+	 * (i.e., log redo up to HERE !);
+	 * remove current page from lbm write queue at end of pageout
+	 * (to write log superblock update), but do not release to freelist;
+	 */
+	lrd.logtid = 0;
+	lrd.backchain = 0;
+	lrd.type = cpu_to_le16(LOG_SYNCPT);
+	lrd.length = 0;
+	lrd.log.syncpt.sync = 0;
+	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
+	bp = log->bp;
+	bp->l_ceor = bp->l_eor;
+	lp = (logpage_t *) bp->l_ldata;
+	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
+	lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
+	if ((rc = lbmIOWait(bp, 0)))
+		goto errout30;
+
+	/* initialize logsync parameters */
+	log->logsize = (log->size - 2) << L2LOGPSIZE;
+	log->lsn = lsn;
+	log->syncpt = lsn;
+	log->sync = log->syncpt;
+	log->nextsync = LOGSYNC_DELTA(log->logsize);
+	init_waitqueue_head(&log->syncwait);
+
+	jFYI(1, ("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x\n",
+		 log->lsn, log->syncpt, log->sync));
+
+	LOGSYNC_LOCK_INIT(log);
+
+	INIT_LIST_HEAD(&log->synclist);
+
+	log->cqueue.head = log->cqueue.tail = 0;
+
+	log->count = 0;
+
+	/*
+	 * initialize for lazy/group commit
+	 */
+	log->clsn = lsn;
+
+	/*
+	 * update/write superblock
+	 */
+	logsuper->state = cpu_to_le32(LOGMOUNT);
+	log->serial = le32_to_cpu(logsuper->serial) + 1;
+	logsuper->serial = cpu_to_le32(log->serial);
+	logsuper->device = cpu_to_le32(kdev_t_to_nr(log->dev));
+	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
+	if ((rc = lbmIOWait(bpsuper, lbmFREE)))
+		goto errout30;
+
+	jFYI(1, ("lmLogInit: exit(%d)\n", rc));
+	return 0;
+
+	/*
+	 *      unwind on error
+	 */
+      errout30:		/* release log page */
+	lbmFree(bp);
+
+      errout20:		/* release log superblock */
+	lbmFree(bpsuper);
+
+      errout10:		/* unwind lbmLogInit() */
+	lbmLogShutdown(log);
+
+	jFYI(1, ("lmLogInit: exit(%d)\n", rc));
+	return rc;
+}
+
+
+/*
+ * NAME:	lmLogClose()
+ *
+ * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
+ *		and close it on last close.
+ *
+ * PARAMETER:	sb	- superblock
+ *		log	- log inode
+ *
+ * RETURN:	errors from subroutines
+ *
+ * serialization:
+ */
+int lmLogClose(struct super_block *sb, log_t * log)
+{
+	int rc;
+
+	jFYI(1, ("lmLogClose: log:0x%p\n", log));
+
+	if (!(log->flag & JFS_INLINELOG))
+		goto externalLog;
+	
+	/*
+	 *      in-line log in host file system
+	 */
+	rc = lmLogShutdown(log);
+	goto out;
+
+	/*
+	 *      external log as separate logical volume
+	 */
+      externalLog:
+	lmLogFileSystem(log, sb->s_dev, 0);
+	rc = lmLogShutdown(log);
+	blkdev_put(log->bdev, BDEV_FS);
+
+      out:
+	jFYI(0, ("lmLogClose: exit(%d)\n", rc));
+	return rc;
+}
+
+
+/*
+ * NAME:	lmLogShutdown()
+ *
+ * FUNCTION:	log shutdown at last LogClose().
+ *
+ *		write log syncpt record.
+ *		update super block to set redone flag to 0.
+ *
+ * PARAMETER:	log	- log inode
+ *
+ * RETURN:	0	- success
+ *			
+ * serialization: single last close thread
+ */
+static int lmLogShutdown(log_t * log)
+{
+	int rc;
+	lrd_t lrd;
+	int lsn;
+	logsuper_t *logsuper;
+	lbuf_t *bpsuper;
+	lbuf_t *bp;
+	logpage_t *lp;
+
+	jFYI(1, ("lmLogShutdown: log:0x%p\n", log));
+
+	if (log->cqueue.head || !list_empty(&log->synclist)) {
+		/*
+		 * If there was very recent activity, we may need to wait
+		 * for the lazycommit thread to catch up
+		 */
+		int i;
+
+		for (i = 0; i < 800; i++) {	/* Too much? */
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(HZ / 4);
+			if ((log->cqueue.head == NULL) &&
+			    list_empty(&log->synclist))
+				break;
+		}
+	}
+	assert(log->cqueue.head == NULL);
+	assert(list_empty(&log->synclist));
+
+	/*
+	 * We need to make sure all of the "written" metapages
+	 * actually make it to disk
+	 */
+#if ( (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8)) || \
+      ( (LINUX_VERSION_CODE == KERNEL_VERSION(2,4,8)) && defined(MODULE) ) )
+	/*
+	 * fsync_no_super not added until 2.4.8, not exported until 2.4.9
+	 */
+	{
+		struct jfs_sb_info *sbi = JFS_SBI(log->sb);
+
+		fsync_inode_data_buffers(sbi->ipbmap);
+		fsync_inode_data_buffers(sbi->ipimap);
+		fsync_inode_data_buffers(sbi->direct_inode);
+	}
+#else
+	fsync_no_super(log->sb->s_dev);
+#endif
+
+	/*
+	 * write the last SYNCPT record with syncpoint = 0
+	 * (i.e., log redo up to HERE !)
+	 */
+	lrd.logtid = 0;
+	lrd.backchain = 0;
+	lrd.type = cpu_to_le16(LOG_SYNCPT);
+	lrd.length = 0;
+	lrd.log.syncpt.sync = 0;
+	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
+	bp = log->bp;
+	lp = (logpage_t *) bp->l_ldata;
+	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
+	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
+	lbmIOWait(log->bp, lbmFREE);
+
+	/*
+	 * synchronous update log superblock
+	 * mark log state as shutdown cleanly
+	 * (i.e., Log does not need to be replayed).
+	 */
+	if ((rc = lbmRead(log, 1, &bpsuper)))
+		goto out;
+
+	logsuper = (logsuper_t *) bpsuper->l_ldata;
+	logsuper->state = cpu_to_le32(LOGREDONE);
+	logsuper->end = cpu_to_le32(lsn);
+	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
+	rc = lbmIOWait(bpsuper, lbmFREE);
+
+	jFYI(1, ("lmLogShutdown: lsn:0x%x page:%d eor:%d\n",
+		 lsn, log->page, log->eor));
+
+      out:    
+	/*
+	 * shutdown per log i/o
+	 */
+	lbmLogShutdown(log);
+
+	if (rc) {
+		jFYI(1, ("lmLogShutdown: exit(%d)\n", rc));
+	}
+	return rc;
+}
+
+
+/*
+ * NAME:	lmLogFileSystem()
+ *
+ * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
+ *	file system into/from log active file system list.
+ *
+ * PARAMETE:	log	- pointer to logs inode.
+ *		fsdev	- kdev_t of filesystem.
+ *		serial  - pointer to returned log serial number
+ *		activate - insert/remove device from active list.
+ *
+ * RETURN:	0	- success
+ *		errors returned by vms_iowait().
+ *			
+ * serialization: IWRITE_LOCK(log inode) held on entry/exit
+ */
+static int lmLogFileSystem(log_t * log, kdev_t fsdev, int activate)
+{
+	int rc = 0;
+	int i;
+	u32 dev_le = cpu_to_le32(kdev_t_to_nr(fsdev));
+	logsuper_t *logsuper;
+	lbuf_t *bpsuper;
+
+	/*
+	 * insert/remove file system device to log active file system list.
+	 */
+	if ((rc = lbmRead(log, 1, &bpsuper)))
+		return rc;
+
+	logsuper = (logsuper_t *) bpsuper->l_ldata;
+	if (activate) {
+		for (i = 0; i < MAX_ACTIVE; i++)
+			if (logsuper->active[i] == 0) {
+				logsuper->active[i] = dev_le;
+				break;
+			}
+		if (i == MAX_ACTIVE) {
+			jERROR(1,("Too many file systems sharing journal!\n"));
+			lbmFree(bpsuper);
+			return EMFILE;	/* Is there a better rc? */
+		}
+	} else {
+		for (i = 0; i < MAX_ACTIVE; i++)
+			if (logsuper->active[i] == dev_le) {
+				logsuper->active[i] = 0;
+				break;
+			}
+		assert(i < MAX_ACTIVE);
+	}
+
+	/*
+	 * synchronous write log superblock:
+	 *
+	 * write sidestream bypassing write queue:
+	 * at file system mount, log super block is updated for
+	 * activation of the file system before any log record
+	 * (MOUNT record) of the file system, and at file system
+	 * unmount, all meta data for the file system has been
+	 * flushed before log super block is updated for deactivation
+	 * of the file system.
+	 */
+	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
+	rc = lbmIOWait(bpsuper, lbmFREE);
+
+	return rc;
+}
+
+
+/*
+ *	lmLogQuiesce()
+ */
+int lmLogQuiesce(log_t * log)
+{
+	int rc;
+
+	rc = lmLogShutdown(log);
+
+	return rc;
+}
+
+
+/*
+ *	lmLogResume()
+ */
+int lmLogResume(log_t * log, struct super_block *sb)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	int rc;
+
+	log->base = addressPXD(&sbi->logpxd);
+	log->size =
+	    (lengthPXD(&sbi->logpxd) << sb->s_blocksize_bits) >> L2LOGPSIZE;
+	rc = lmLogInit(log);
+
+	return rc;
+}
+
+
+/*
+ *		log buffer manager (lbm)
+ *		------------------------
+ *
+ * special purpose buffer manager supporting log i/o requirements.
+ *
+ * per log write queue:
+ * log pageout occurs in serial order by fifo write queue and
+ * restricting to a single i/o in pregress at any one time.
+ * a circular singly-linked list
+ * (log->wrqueue points to the tail, and buffers are linked via
+ * bp->wrqueue field), and
+ * maintains log page in pageout ot waiting for pageout in serial pageout.
+ */
+
+/*
+ *	lbmLogInit()
+ *
+ * initialize per log I/O setup at lmLogInit()
+ */
+static int lbmLogInit(log_t * log)
+{				/* log inode */
+	int i;
+	lbuf_t *lbuf;
+
+	jFYI(1, ("lbmLogInit: log:0x%p\n", log));
+
+	/* initialize current buffer cursor */
+	log->bp = NULL;
+
+	/* initialize log device write queue */
+	log->wqueue = NULL;
+
+	/*
+	 * Each log has its own buffer pages allocated to it.  These are
+	 * not managed by the page cache.  This ensures that a transaction
+	 * writing to the log does not block trying to allocate a page from
+	 * the page cache (for the log).  This would be bad, since page
+	 * allocation waits on the kswapd thread that may be committing inodes
+	 * which would cause log activity.  Was that clear?  I'm trying to
+	 * avoid deadlock here.
+	 */
+	init_waitqueue_head(&log->free_wait);
+
+	log->lbuf_free = NULL;
+
+	for (i = 0; i < LOGPAGES; i++) {
+		lbuf = kmalloc(sizeof(lbuf_t), GFP_KERNEL);
+		if (lbuf == 0)
+			goto error;
+		lbuf->l_bh.b_data = lbuf->l_ldata =
+		    (char *) __get_free_page(GFP_KERNEL);
+		if (lbuf->l_ldata == 0) {
+			kfree(lbuf);
+			goto error;
+		}
+		lbuf->l_log = log;
+		init_waitqueue_head(&lbuf->l_ioevent);
+
+		lbuf->l_bh.b_size = LOGPSIZE;
+		lbuf->l_bh.b_dev = log->dev;
+		lbuf->l_bh.b_end_io = lbmIODone;
+		lbuf->l_bh.b_private = lbuf;
+		lbuf->l_bh.b_page = virt_to_page(lbuf->l_ldata);
+		lbuf->l_bh.b_state = 0;
+		init_waitqueue_head(&lbuf->l_bh.b_wait);
+
+		lbuf->l_freelist = log->lbuf_free;
+		log->lbuf_free = lbuf;
+	}
+
+	return (0);
+
+      error:
+	lbmLogShutdown(log);
+	return (ENOMEM);
+}
+
+
+/*
+ *	lbmLogShutdown()
+ *
+ * finalize per log I/O setup at lmLogShutdown()
+ */
+static void lbmLogShutdown(log_t * log)
+{
+	lbuf_t *lbuf;
+
+	jFYI(1, ("lbmLogShutdown: log:0x%p\n", log));
+
+	lbuf = log->lbuf_free;
+	while (lbuf) {
+		lbuf_t *next = lbuf->l_freelist;
+		free_page((unsigned long) lbuf->l_ldata);
+		kfree(lbuf);
+		lbuf = next;
+	}
+
+	log->bp = NULL;
+}
+
+
+/*
+ *	lbmAllocate()
+ *
+ * allocate an empty log buffer
+ */
+static lbuf_t *lbmAllocate(log_t * log, int pn)
+{
+	lbuf_t *bp;
+	unsigned long flags;
+
+	/*
+	 * recycle from log buffer freelist if any
+	 */
+	LCACHE_LOCK(flags);
+	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
+	log->lbuf_free = bp->l_freelist;
+	LCACHE_UNLOCK(flags);
+
+	bp->l_flag = 0;
+
+	bp->l_wqnext = NULL;
+	bp->l_freelist = NULL;
+
+	bp->l_pn = pn;
+	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
+	bp->l_bh.b_blocknr = bp->l_blkno;
+	bp->l_ceor = 0;
+
+	return bp;
+}
+
+
+/*
+ *	lbmFree()
+ *
+ * release a log buffer to freelist
+ */
+static void lbmFree(lbuf_t * bp)
+{
+	unsigned long flags;
+
+	LCACHE_LOCK(flags);
+
+	lbmfree(bp);
+
+	LCACHE_UNLOCK(flags);
+}
+
+static void lbmfree(lbuf_t * bp)
+{
+	log_t *log = bp->l_log;
+
+	assert(bp->l_wqnext == NULL);
+
+	/*
+	 * return the buffer to head of freelist
+	 */
+	bp->l_freelist = log->lbuf_free;
+	log->lbuf_free = bp;
+
+	wake_up(&log->free_wait);
+	return;
+}
+
+
+/*
+ * NAME:	lbmRedrive
+ *
+ * FUNCTION:	add a log buffer to the the log redrive list
+ *
+ * PARAMETER:
+ *     bp	- log buffer
+ *
+ * NOTES:
+ *	Takes log_redrive_lock.
+ */
+static inline void lbmRedrive(lbuf_t *bp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&log_redrive_lock, flags);
+	bp->l_redrive_next = log_redrive_list;
+	log_redrive_list = bp;
+	spin_unlock_irqrestore(&log_redrive_lock, flags);
+
+	wake_up_process(jfsIOtask);
+}
+
+
+/*
+ *	lbmRead()
+ */
+static int lbmRead(log_t * log, int pn, lbuf_t ** bpp)
+{
+	lbuf_t *bp;
+
+	/*
+	 * allocate a log buffer
+	 */
+	*bpp = bp = lbmAllocate(log, pn);
+	jFYI(1, ("lbmRead: bp:0x%p pn:0x%x\n", bp, pn));
+
+	bp->l_flag |= lbmREAD;
+	bp->l_bh.b_reqnext = NULL;
+	clear_bit(BH_Uptodate, &bp->l_bh.b_state);
+	lock_buffer(&bp->l_bh);
+	set_bit(BH_Mapped, &bp->l_bh.b_state);
+	set_bit(BH_Req, &bp->l_bh.b_state);
+	bp->l_bh.b_rdev = bp->l_bh.b_dev;
+	bp->l_bh.b_rsector = bp->l_blkno << (log->l2bsize - 9);
+	generic_make_request(READ, &bp->l_bh);
+	run_task_queue(&tq_disk);
+
+	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
+
+	return 0;
+}
+
+
+/*
+ *	lbmWrite()
+ *
+ * buffer at head of pageout queue stays after completion of
+ * partial-page pageout and redriven by explicit initiation of
+ * pageout by caller until full-page pageout is completed and
+ * released.
+ *
+ * device driver i/o done redrives pageout of new buffer at
+ * head of pageout queue when current buffer at head of pageout
+ * queue is released at the completion of its full-page pageout.
+ *
+ * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
+ * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
+ */
+static void lbmWrite(log_t * log, lbuf_t * bp, int flag, int cant_block)
+{
+	lbuf_t *tail;
+	unsigned long flags;
+
+	jFYI(1, ("lbmWrite: bp:0x%p flag:0x%x pn:0x%x\n",
+		 bp, flag, bp->l_pn));
+
+	/* map the logical block address to physical block address */
+	bp->l_blkno =
+	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
+
+	LCACHE_LOCK(flags);		/* disable+lock */
+
+	/*
+	 * initialize buffer for device driver
+	 */
+	bp->l_flag = flag;
+
+	/*
+	 *      insert bp at tail of write queue associated with log
+	 *
+	 * (request is either for bp already/currently at head of queue
+	 * or new bp to be inserted at tail)
+	 */
+	tail = log->wqueue;
+
+	/* is buffer not already on write queue ? */
+	if (bp->l_wqnext == NULL) {
+		/* insert at tail of wqueue */
+		if (tail == NULL) {
+			log->wqueue = bp;
+			bp->l_wqnext = bp;
+		} else {
+			log->wqueue = bp;
+			bp->l_wqnext = tail->l_wqnext;
+			tail->l_wqnext = bp;
+		}
+
+		tail = bp;
+	}
+
+	/* is buffer at head of wqueue and for write ? */
+	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
+		LCACHE_UNLOCK(flags);	/* unlock+enable */
+		return;
+	}
+
+	LCACHE_UNLOCK(flags);	/* unlock+enable */
+
+	if (cant_block)
+		lbmRedrive(bp);
+	else if (flag & lbmSYNC)
+		lbmStartIO(bp);
+	else {
+		LOGGC_UNLOCK(log);
+		lbmStartIO(bp);
+		LOGGC_LOCK(log);
+	}
+}
+
+
+/*
+ *	lbmDirectWrite()
+ *
+ * initiate pageout bypassing write queue for sidestream
+ * (e.g., log superblock) write;
+ */
+static void lbmDirectWrite(log_t * log, lbuf_t * bp, int flag)
+{
+	jEVENT(0, ("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x\n",
+		   bp, flag, bp->l_pn));
+
+	/*
+	 * initialize buffer for device driver
+	 */
+	bp->l_flag = flag | lbmDIRECT;
+
+	/* map the logical block address to physical block address */
+	bp->l_blkno =
+	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
+
+	/*
+	 *      initiate pageout of the page
+	 */
+	lbmStartIO(bp);
+}
+
+
+/*
+ * NAME:	lbmStartIO()
+ *
+ * FUNCTION:	Interface to DD strategy routine
+ *
+ * RETURN:      none
+ *
+ * serialization: LCACHE_LOCK() is NOT held during log i/o;
+ */
+void lbmStartIO(lbuf_t * bp)
+{
+	jFYI(1, ("lbmStartIO\n"));
+
+	bp->l_bh.b_reqnext = NULL;
+	set_bit(BH_Dirty, &bp->l_bh.b_state);
+//      lock_buffer(&bp->l_bh);
+	assert(!test_bit(BH_Lock, &bp->l_bh.b_state));
+	set_bit(BH_Lock, &bp->l_bh.b_state);
+
+	set_bit(BH_Mapped, &bp->l_bh.b_state);
+	set_bit(BH_Req, &bp->l_bh.b_state);
+	bp->l_bh.b_rdev = bp->l_bh.b_dev;
+	bp->l_bh.b_rsector = bp->l_blkno << (bp->l_log->l2bsize - 9);
+	generic_make_request(WRITE, &bp->l_bh);
+
+	INCREMENT(lmStat.submitted);
+	run_task_queue(&tq_disk);
+
+	jFYI(1, ("lbmStartIO done\n"));
+}
+
+
+/*
+ *	lbmIOWait()
+ */
+static int lbmIOWait(lbuf_t * bp, int flag)
+{
+	unsigned long flags;
+	int rc = 0;
+
+	jFYI(1,
+	     ("lbmIOWait1: bp:0x%p flag:0x%x:0x%x\n", bp, bp->l_flag,
+	      flag));
+
+	LCACHE_LOCK(flags);		/* disable+lock */
+
+	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
+
+	rc = (bp->l_flag & lbmERROR) ? EIO : 0;
+
+	if (flag & lbmFREE)
+		lbmfree(bp);
+
+	LCACHE_UNLOCK(flags);	/* unlock+enable */
+
+	jFYI(1,
+	     ("lbmIOWait2: bp:0x%p flag:0x%x:0x%x\n", bp, bp->l_flag,
+	      flag));
+	return rc;
+}
+
+/*
+ *	lbmIODone()
+ *
+ * executed at INTIODONE level
+ */
+static void lbmIODone(struct buffer_head *bh, int uptodate)
+{
+	lbuf_t *bp = bh->b_private;
+	lbuf_t *nextbp, *tail;
+	log_t *log;
+	unsigned long flags;
+
+	/*
+	 * get back jfs buffer bound to the i/o buffer
+	 */
+	jEVENT(0, ("lbmIODone: bp:0x%p flag:0x%x\n", bp, bp->l_flag));
+
+	LCACHE_LOCK(flags);		/* disable+lock */
+
+	unlock_buffer(&bp->l_bh);
+	bp->l_flag |= lbmDONE;
+
+	if (!uptodate) {
+		bp->l_flag |= lbmERROR;
+
+		jERROR(1, ("lbmIODone: I/O error in JFS log\n"));
+	}
+
+	/*
+	 *      pagein completion
+	 */
+	if (bp->l_flag & lbmREAD) {
+		bp->l_flag &= ~lbmREAD;
+
+		LCACHE_UNLOCK(flags);	/* unlock+enable */
+
+		/* wakeup I/O initiator */
+		LCACHE_WAKEUP(&bp->l_ioevent);
+
+		return;
+	}
+
+	/*
+	 *      pageout completion
+	 *
+	 * the bp at the head of write queue has completed pageout.
+	 *
+	 * if single-commit/full-page pageout, remove the current buffer
+	 * from head of pageout queue, and redrive pageout with
+	 * the new buffer at head of pageout queue;
+	 * otherwise, the partial-page pageout buffer stays at
+	 * the head of pageout queue to be redriven for pageout
+	 * by lmGroupCommit() until full-page pageout is completed.
+	 */
+	bp->l_flag &= ~lbmWRITE;
+	INCREMENT(lmStat.pagedone);
+
+	/* update committed lsn */
+	log = bp->l_log;
+	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
+
+	if (bp->l_flag & lbmDIRECT) {
+		LCACHE_WAKEUP(&bp->l_ioevent);
+		LCACHE_UNLOCK(flags);
+		return;
+	}
+
+	tail = log->wqueue;
+
+	/* single element queue */
+	if (bp == tail) {
+		/* remove head buffer of full-page pageout
+		 * from log device write queue
+		 */
+		if (bp->l_flag & lbmRELEASE) {
+			log->wqueue = NULL;
+			bp->l_wqnext = NULL;
+		}
+	}
+	/* multi element queue */
+	else {
+		/* remove head buffer of full-page pageout
+		 * from log device write queue
+		 */
+		if (bp->l_flag & lbmRELEASE) {
+			nextbp = tail->l_wqnext = bp->l_wqnext;
+			bp->l_wqnext = NULL;
+
+			/*
+			 * redrive pageout of next page at head of write queue:
+			 * redrive next page without any bound tblk
+			 * (i.e., page w/o any COMMIT records), or
+			 * first page of new group commit which has been
+			 * queued after current page (subsequent pageout
+			 * is performed synchronously, except page without
+			 * any COMMITs) by lmGroupCommit() as indicated
+			 * by lbmWRITE flag;
+			 */
+			if (nextbp->l_flag & lbmWRITE) {
+				/*
+				 * We can't do the I/O at interrupt time.
+				 * The jfsIO thread can do it
+				 */
+				lbmRedrive(nextbp);
+			}
+		}
+	}
+
+	/*
+	 *      synchronous pageout:
+	 *
+	 * buffer has not necessarily been removed from write queue
+	 * (e.g., synchronous write of partial-page with COMMIT):
+	 * leave buffer for i/o initiator to dispose
+	 */
+	if (bp->l_flag & lbmSYNC) {
+		LCACHE_UNLOCK(flags);	/* unlock+enable */
+
+		/* wakeup I/O initiator */
+		LCACHE_WAKEUP(&bp->l_ioevent);
+	}
+
+	/*
+	 *      Group Commit pageout:
+	 */
+	else if (bp->l_flag & lbmGC) {
+		LCACHE_UNLOCK(flags);
+		lmPostGC(bp);
+	}
+
+	/*
+	 *      asynchronous pageout:
+	 *
+	 * buffer must have been removed from write queue:
+	 * insert buffer at head of freelist where it can be recycled
+	 */
+	else {
+		assert(bp->l_flag & lbmRELEASE);
+		assert(bp->l_flag & lbmFREE);
+		lbmfree(bp);
+
+		LCACHE_UNLOCK(flags);	/* unlock+enable */
+	}
+}
+
+int jfsIOWait(void *arg)
+{
+	lbuf_t *bp;
+
+	jFYI(1, ("jfsIOWait is here!\n"));
+
+	lock_kernel();
+
+	daemonize();
+	current->tty = NULL;
+	strcpy(current->comm, "jfsIO");
+
+	unlock_kernel();
+
+	jfsIOtask = current;
+
+	spin_lock_irq(&current->sigmask_lock);
+	siginitsetinv(&current->blocked,
+		      sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP)
+		      | sigmask(SIGCONT));
+	spin_unlock_irq(&current->sigmask_lock);
+
+	complete(&jfsIOwait);
+
+	do {
+		spin_lock_irq(&log_redrive_lock);
+		while ((bp = log_redrive_list)) {
+			log_redrive_list = bp->l_redrive_next;
+			bp->l_redrive_next = NULL;
+			spin_unlock_irq(&log_redrive_lock);
+			lbmStartIO(bp);
+			spin_lock_irq(&log_redrive_lock);
+		}
+		spin_unlock_irq(&log_redrive_lock);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	} while (!jfs_thread_stopped());
+
+	jFYI(1,("jfsIOWait being killed!\n"));
+	complete(&jfsIOwait);
+	return 0;
+}
+
+
+#ifdef _STILL_TO_PORT
+/*
+ * NAME:	lmLogFormat()/jfs_logform()
+ *
+ * FUNCTION:	format file system log (ref. jfs_logform()).
+ *
+ * PARAMETERS:
+ *	log	- log inode (with common mount inode base);
+ *	logAddress - start address of log space in FS block;
+ *	logSize	- length of log space in FS block;
+ *
+ * RETURN:	0 -	success
+ *		-1 -	i/o error
+ */
+int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
+{
+	int rc = 0;
+	cbuf_t *bp;
+	logsuper_t *logsuper;
+	logpage_t *lp;
+	int lspn;		/* log sequence page number */
+	struct lrd *lrd_ptr;
+	int npbperpage, npages;
+
+	jFYI(0, ("lmLogFormat: logAddress:%Ld logSize:%d\n",
+		 logAddress, logSize));
+
+	/* allocate a JFS buffer */
+	bp = rawAllocate();
+
+	/* map the logical block address to physical block address */
+	bp->cm_blkno = logAddress << ipmnt->i_l2bfactor;
+
+	npbperpage = LOGPSIZE >> ipmnt->i_l2pbsize;
+	npages = logSize / (LOGPSIZE >> ipmnt->i_l2bsize);
+
+	/*
+	 *      log space:
+	 *
+	 * page 0 - reserved;
+	 * page 1 - log superblock;
+	 * page 2 - log data page: A SYNC log record is written
+	 *          into this page at logform time;
+	 * pages 3-N - log data page: set to empty log data pages;
+	 */
+	/*
+	 *      init log superblock: log page 1
+	 */
+	logsuper = (logsuper_t *) bp->cm_cdata;
+
+	logsuper->magic = cpu_to_le32(LOGMAGIC);
+	logsuper->version = cpu_to_le32(LOGVERSION);
+	logsuper->state = cpu_to_le32(LOGREDONE);
+	logsuper->flag = cpu_to_le32(ipmnt->i_mntflag);	/* ? */
+	logsuper->size = cpu_to_le32(npages);
+	logsuper->bsize = cpu_to_le32(ipmnt->i_bsize);
+	logsuper->l2bsize = cpu_to_le32(ipmnt->i_l2bsize);
+	logsuper->end =
+	    cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
+
+	bp->cm_blkno += npbperpage;
+	rawWrite(ipmnt, bp, 0);
+
+	/*
+	 *      init pages 2 to npages-1 as log data pages:
+	 *
+	 * log page sequence number (lpsn) initialization:
+	 *
+	 * pn:   0     1     2     3                 n-1
+	 *       +-----+-----+=====+=====+===.....===+=====+
+	 * lspn:             N-1   0     1           N-2
+	 *                   <--- N page circular file ---->
+	 *
+	 * the N (= npages-2) data pages of the log is maintained as
+	 * a circular file for the log records;
+	 * lpsn grows by 1 monotonically as each log page is written
+	 * to the circular file of the log;
+	 * Since the AIX DUMMY log record is dropped for this XJFS,
+	 * and setLogpage() will not reset the page number even if
+	 * the eor is equal to LOGPHDRSIZE. In order for binary search
+	 * still work in find log end process, we have to simulate the
+	 * log wrap situation at the log format time.
+	 * The 1st log page written will have the highest lpsn. Then
+	 * the succeeding log pages will have ascending order of
+	 * the lspn starting from 0, ... (N-2)
+	 */
+	lp = (logpage_t *) bp->cm_cdata;
+
+	/*
+	 * initialize 1st log page to be written: lpsn = N - 1,
+	 * write a SYNCPT log record is written to this page
+	 */
+	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
+	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
+
+	lrd_ptr = (struct lrd *) &lp->data;
+	lrd_ptr->logtid = 0;
+	lrd_ptr->backchain = 0;
+	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
+	lrd_ptr->length = 0;
+	lrd_ptr->log.syncpt.sync = 0;
+
+	bp->cm_blkno += npbperpage;
+	rawWrite(ipmnt, bp, 0);
+
+	/*
+	 *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
+	 */
+	for (lspn = 0; lspn < npages - 3; lspn++) {
+		lp->h.page = lp->t.page = cpu_to_le32(lspn);
+		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
+
+		bp->cm_blkno += npbperpage;
+		rawWrite(ipmnt, bp, 0);
+	}
+
+	/*
+	 *      finalize log
+	 */
+	/* release the buffer */
+	rawRelease(bp);
+
+	return rc;
+}
+#endif				/* _STILL_TO_PORT */
+
+
+#ifdef CONFIG_JFS_STATISTICS
+int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
+		      int *eof, void *data)
+{
+	int len = 0;
+	off_t begin;
+
+	len += sprintf(buffer,
+		       "JFS Logmgr stats\n"
+		       "================\n"
+		       "commits = %d\n"
+		       "writes submitted = %d\n"
+		       "writes completed = %d\n",
+		       lmStat.commit,
+		       lmStat.submitted,
+		       lmStat.pagedone);
+
+	begin = offset;
+	*start = buffer + begin;
+	len -= begin;
+
+	if (len > length)
+		len = length;
+	else
+		*eof = 1;
+
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+#endif /* CONFIG_JFS_STATISTICS */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_logmgr.h linuxppc64_2_4/fs/jfs/jfs_logmgr.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_logmgr.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_logmgr.h	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,502 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef	_H_JFS_LOGMGR
+#define _H_JFS_LOGMGR
+
+
+#include "jfs_filsys.h"
+#include "jfs_lock.h"
+
+/*
+ *	log manager configuration parameters
+ */
+
+/* log page size */
+#define	LOGPSIZE	4096
+#define	L2LOGPSIZE	12
+
+#define LOGPAGES	16	/* Log pages per mounted file system */
+
+/*
+ *	log logical volume
+ *
+ * a log is used to make the commit operation on journalled 
+ * files within the same logical volume group atomic.
+ * a log is implemented with a logical volume.
+ * there is one log per logical volume group. 
+ *
+ * block 0 of the log logical volume is not used (ipl etc).
+ * block 1 contains a log "superblock" and is used by logFormat(),
+ * lmLogInit(), lmLogShutdown(), and logRedo() to record status 
+ * of the log but is not otherwise used during normal processing. 
+ * blocks 2 - (N-1) are used to contain log records.
+ *
+ * when a volume group is varied-on-line, logRedo() must have 
+ * been executed before the file systems (logical volumes) in 
+ * the volume group can be mounted.
+ */
+/*
+ *	log superblock (block 1 of logical volume)
+ */
+#define	LOGSUPER_B	1
+#define	LOGSTART_B	2
+
+#define	LOGMAGIC	0x87654321
+#define	LOGVERSION	1
+
+#define MAX_ACTIVE	512	/* Max active file systems sharing log */
+
+typedef struct {
+	u32 magic;		/* 4: log lv identifier */
+	s32 version;		/* 4: version number */
+	s32 serial;		/* 4: log open/mount counter */
+	s32 size;		/* 4: size in number of LOGPSIZE blocks */
+	s32 bsize;		/* 4: logical block size in byte */
+	s32 l2bsize;		/* 4: log2 of bsize */
+
+	u32 flag;		/* 4: option */
+	u32 state;		/* 4: state - see below */
+
+	s32 end;		/* 4: addr of last log record set by logredo */
+	u32 device;		/* 4: save device in case location changes */
+	u32 active[MAX_ACTIVE];	/* 2048: active file systems list */
+} logsuper_t;
+
+/* log flag: commit option (see jfs_filsys.h) */
+
+/* log state */
+#define	LOGMOUNT	0	/* log mounted by lmLogInit() */
+#define LOGREDONE	1	/* log shutdown by lmLogShutdown().
+				 * log redo completed by logredo().
+				 */
+#define LOGWRAP		2	/* log wrapped */
+#define LOGREADERR	3	/* log read error detected in logredo() */
+
+
+/*
+ *	log logical page
+ *
+ * (this comment should be rewritten !)
+ * the header and trailer structures (h,t) will normally have 
+ * the same page and eor value.
+ * An exception to this occurs when a complete page write is not 
+ * accomplished on a power failure. Since the hardware may "split write"
+ * sectors in the page, any out of order sequence may occur during powerfail 
+ * and needs to be recognized during log replay.  The xor value is
+ * an "exclusive or" of all log words in the page up to eor.  This
+ * 32 bit eor is stored with the top 16 bits in the header and the
+ * bottom 16 bits in the trailer.  logredo can easily recognize pages
+ * that were not completed by reconstructing this eor and checking 
+ * the log page.
+ *
+ * Previous versions of the operating system did not allow split 
+ * writes and detected partially written records in logredo by 
+ * ordering the updates to the header, trailer, and the move of data 
+ * into the logdata area.  The order: (1) data is moved (2) header 
+ * is updated (3) trailer is updated.  In logredo, when the header 
+ * differed from the trailer, the header and trailer were reconciled 
+ * as follows: if h.page != t.page they were set to the smaller of 
+ * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) 
+ * h.eor != t.eor they were set to the smaller of their two values.
+ */
+typedef struct {
+	struct {		/* header */
+		s32 page;	/* 4: log sequence page number */
+		s16 rsrvd;	/* 2: */
+		s16 eor;	/* 2: end-of-log offset of lasrt record write */
+	} h;
+
+	s32 data[LOGPSIZE / 4 - 4];	/* log record area */
+
+	struct {		/* trailer */
+		s32 page;	/* 4: normally the same as h.page */
+		s16 rsrvd;	/* 2: */
+		s16 eor;	/* 2: normally the same as h.eor */
+	} t;
+} logpage_t;
+
+#define LOGPHDRSIZE	8	/* log page header size */
+#define LOGPTLRSIZE	8	/* log page trailer size */
+
+
+/*
+ *	log record
+ *
+ * (this comment should be rewritten !)
+ * jfs uses only "after" log records (only a single writer is allowed
+ * in a  page, pages are written to temporary paging space if
+ * if they must be written to disk before commit, and i/o is
+ * scheduled for modified pages to their home location after
+ * the log records containing the after values and the commit 
+ * record is written to the log on disk, undo discards the copy
+ * in main-memory.)
+ *
+ * a log record consists of a data area of variable length followed by 
+ * a descriptor of fixed size LOGRDSIZE bytes.
+ * the  data area is rounded up to an integral number of 4-bytes and 
+ * must be no longer than LOGPSIZE.
+ * the descriptor is of size of multiple of 4-bytes and aligned on a 
+ * 4-byte boundary. 
+ * records are packed one after the other in the data area of log pages.
+ * (sometimes a DUMMY record is inserted so that at least one record ends 
+ * on every page or the longest record is placed on at most two pages).
+ * the field eor in page header/trailer points to the byte following 
+ * the last record on a page.
+ */
+
+/* log record types */
+#define LOG_COMMIT		0x8000
+#define LOG_SYNCPT		0x4000
+#define LOG_MOUNT		0x2000
+#define LOG_REDOPAGE		0x0800
+#define LOG_NOREDOPAGE		0x0080
+#define LOG_NOREDOINOEXT	0x0040
+#define LOG_UPDATEMAP		0x0008
+#define LOG_NOREDOFILE		0x0001
+
+/* REDOPAGE/NOREDOPAGE log record data type */
+#define	LOG_INODE		0x0001
+#define	LOG_XTREE		0x0002
+#define	LOG_DTREE		0x0004
+#define	LOG_BTROOT		0x0010
+#define	LOG_EA			0x0020
+#define	LOG_ACL			0x0040
+#define	LOG_DATA		0x0080
+#define	LOG_NEW			0x0100
+#define	LOG_EXTEND		0x0200
+#define LOG_RELOCATE		0x0400
+#define LOG_DIR_XTREE		0x0800	/* Xtree is in directory inode */
+
+/* UPDATEMAP log record descriptor type */
+#define	LOG_ALLOCXADLIST	0x0080
+#define	LOG_ALLOCPXDLIST	0x0040
+#define	LOG_ALLOCXAD		0x0020
+#define	LOG_ALLOCPXD		0x0010
+#define	LOG_FREEXADLIST		0x0008
+#define	LOG_FREEPXDLIST		0x0004
+#define	LOG_FREEXAD		0x0002
+#define	LOG_FREEPXD		0x0001
+
+
+typedef struct lrd {
+	/*
+	 * type independent area
+	 */
+	s32 logtid;		/* 4: log transaction identifier */
+	s32 backchain;		/* 4: ptr to prev record of same transaction */
+	u16 type;		/* 2: record type */
+	s16 length;		/* 2: length of data in record (in byte) */
+	u32 aggregate;		/* 4: file system lv/aggregate */
+	/* (16) */
+
+	/*
+	 * type dependent area (20)
+	 */
+	union {
+
+		/*
+		 *      COMMIT: commit
+		 *
+		 * transaction commit: no type-dependent information;
+		 */
+
+		/*
+		 *      REDOPAGE: after-image
+		 *
+		 * apply after-image;
+		 *
+		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
+		 */
+		struct {
+			u32 fileset;	/* 4: fileset number */
+			u32 inode;	/* 4: inode number */
+			u16 type;	/* 2: REDOPAGE record type */
+			s16 l2linesize;	/* 2: log2 of line size */
+			pxd_t pxd;	/* 8: on-disk page pxd */
+		} redopage;	/* (20) */
+
+		/*
+		 *      NOREDOPAGE: the page is freed
+		 *
+		 * do not apply after-image records which precede this record
+		 * in the log with the same page block number to this page.
+		 *
+		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
+		 */
+		struct {
+			s32 fileset;	/* 4: fileset number */
+			u32 inode;	/* 4: inode number */
+			u16 type;	/* 2: NOREDOPAGE record type */
+			s16 rsrvd;	/* 2: reserved */
+			pxd_t pxd;	/* 8: on-disk page pxd */
+		} noredopage;	/* (20) */
+
+		/*
+		 *      UPDATEMAP: update block allocation map
+		 *
+		 * either in-line PXD,
+		 * or     out-of-line  XADLIST;
+		 *
+		 * N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
+		 */
+		struct {
+			u32 fileset;	/* 4: fileset number */
+			u32 inode;	/* 4: inode number */
+			u16 type;	/* 2: UPDATEMAP record type */
+			s16 nxd;	/* 2: number of extents */
+			pxd_t pxd;	/* 8: pxd */
+		} updatemap;	/* (20) */
+
+		/*
+		 *      NOREDOINOEXT: the inode extent is freed
+		 *
+		 * do not apply after-image records which precede this 
+		 * record in the log with the any of the 4 page block 
+		 * numbers in this inode extent. 
+		 * 
+		 * NOTE: The fileset and pxd fields MUST remain in 
+		 *       the same fields in the REDOPAGE record format.
+		 *
+		 */
+		struct {
+			s32 fileset;	/* 4: fileset number */
+			s32 iagnum;	/* 4: IAG number     */
+			s32 inoext_idx;	/* 4: inode extent index */
+			pxd_t pxd;	/* 8: on-disk page pxd */
+		} noredoinoext;	/* (20) */
+
+		/*
+		 *      SYNCPT: log sync point
+		 *
+		 * replay log upto syncpt address specified;
+		 */
+		struct {
+			s32 sync;	/* 4: syncpt address (0 = here) */
+		} syncpt;
+
+		/*
+		 *      MOUNT: file system mount
+		 *
+		 * file system mount: no type-dependent information;
+		 */
+
+		/*
+		 *      ? FREEXTENT: free specified extent(s)
+		 *
+		 * free specified extent(s) from block allocation map
+		 * N.B.: nextents should be length of data/sizeof(xad_t)
+		 */
+		struct {
+			s32 type;	/* 4: FREEXTENT record type */
+			s32 nextent;	/* 4: number of extents */
+
+			/* data: PXD or XAD list */
+		} freextent;
+
+		/*
+		 *      ? NOREDOFILE: this file is freed
+		 *
+		 * do not apply records which precede this record in the log
+		 * with the same inode number.
+		 *
+		 * NOREDILE must be the first to be written at commit
+		 * (last to be read in logredo()) - it prevents
+		 * replay of preceding updates of all preceding generations
+		 * of the inumber esp. the on-disk inode itself, 
+		 * but does NOT prevent
+		 * replay of the 
+		 */
+		struct {
+			s32 fileset;	/* 4: fileset number */
+			u32 inode;	/* 4: inode number */
+		} noredofile;
+
+		/*
+		 *      ? NEWPAGE: 
+		 *
+		 * metadata type dependent
+		 */
+		struct {
+			s32 fileset;	/* 4: fileset number */
+			u32 inode;	/* 4: inode number */
+			s32 type;	/* 4: NEWPAGE record type */
+			pxd_t pxd;	/* 8: on-disk page pxd */
+		} newpage;
+
+		/*
+		 *      ? DUMMY: filler
+		 *
+		 * no type-dependent information
+		 */
+	} log;
+} lrd_t;			/* (36) */
+
+#define	LOGRDSIZE	(sizeof(struct lrd))
+
+/*
+ *	line vector descriptor
+ */
+typedef struct {
+	s16 offset;
+	s16 length;
+} lvd_t;
+
+
+/*
+ *	log logical volume
+ */
+typedef struct jfs_log {
+
+	struct super_block *sb;	/* 4: This is used to sync metadata
+				 *    before writing syncpt.  Will
+				 *    need to be a list if we share
+				 *    the log between fs's
+				 */
+	kdev_t dev;		/* 4: log lv number */
+	struct block_device *bdev; /* 4: log lv pointer */
+	s32 serial;		/* 4: log mount serial number */
+
+	s64 base;		/* @8: log extent address (inline log ) */
+	int size;		/* 4: log size in log page (in page) */
+	int l2bsize;		/* 4: log2 of bsize */
+
+	uint flag;		/* 4: flag */
+	uint state;		/* 4: state */
+
+	struct lbuf *lbuf_free;	/* 4: free lbufs */
+	wait_queue_head_t free_wait;	/* 4: */
+
+	/* log write */
+	int logtid;		/* 4: log tid */
+	int page;		/* 4: page number of eol page */
+	int eor;		/* 4: eor of last record in eol page */
+	struct lbuf *bp;	/* 4: current log page buffer */
+
+	struct semaphore loglock;	/* 4: log write serialization lock */
+
+	/* syncpt */
+	int nextsync;		/* 4: bytes to write before next syncpt */
+	int active;		/* 4: */
+	int syncbarrier;	/* 4: */
+	wait_queue_head_t syncwait;	/* 4: */
+
+	/* commit */
+	uint cflag;		/* 4: */
+	struct {		/* 8: FIFO commit queue header */
+		struct tblock *head;
+		struct tblock *tail;
+	} cqueue;
+	int gcrtc;		/* 4: GC_READY transaction count */
+	struct tblock *gclrt;	/* 4: latest GC_READY transaction */
+	spinlock_t gclock;	/* 4: group commit lock */
+	int logsize;		/* 4: log data area size in byte */
+	int lsn;		/* 4: end-of-log */
+	int clsn;		/* 4: clsn */
+	int syncpt;		/* 4: addr of last syncpt record */
+	int sync;		/* 4: addr from last logsync() */
+	struct list_head synclist;	/* 8: logsynclist anchor */
+	spinlock_t synclock;	/* 4: synclist lock */
+	struct lbuf *wqueue;	/* 4: log pageout queue */
+	int count;		/* 4: count */
+} log_t;
+
+/*
+ * group commit flag
+ */
+/* log_t */
+#define logGC_PAGEOUT	0x00000001
+
+/* tblock_t/lbuf_t */
+#define tblkGC_QUEUE		0x0001
+#define tblkGC_READY		0x0002
+#define tblkGC_COMMIT		0x0004
+#define tblkGC_COMMITTED	0x0008
+#define tblkGC_EOP		0x0010
+#define tblkGC_FREE		0x0020
+#define tblkGC_LEADER		0x0040
+#define tblkGC_ERROR		0x0080
+#define tblkGC_LAZY		0x0100	// D230860
+#define tblkGC_UNLOCKED		0x0200	// D230860
+
+/*
+ *		log cache buffer header
+ */
+typedef struct lbuf {
+	struct buffer_head l_bh;	/* for doing I/O */
+	log_t *l_log;		/* 4: log associated with buffer */
+
+	/*
+	 * data buffer base area
+	 */
+	uint l_flag;		/* 4: pageout control flags */
+
+	struct lbuf *l_wqnext;	/* 4: write queue link */
+	struct lbuf *l_freelist;	/* 4: freelistlink */
+
+	int l_pn;		/* 4: log page number */
+	int l_eor;		/* 4: log record eor */
+	int l_ceor;		/* 4: committed log record eor */
+
+	s64 l_blkno;		/* 8: log page block number */
+	caddr_t l_ldata;	/* 4: data page */
+
+	wait_queue_head_t l_ioevent;	/* 4: i/o done event */
+	struct page *l_page;	/* The page itself */
+} lbuf_t;
+
+/* Reuse l_freelist for redrive list */
+#define l_redrive_next l_freelist
+
+/*
+ *	logsynclist block
+ *
+ * common logsyncblk prefix for jbuf_t and tblock_t
+ */
+typedef struct logsyncblk {
+	u16 xflag;		/* flags */
+	u16 flag;		/* only meaninful in tblock_t */
+	lid_t lid;		/* lock id */
+	s32 lsn;		/* log sequence number */
+	struct list_head synclist;	/* log sync list link */
+} logsyncblk_t;
+
+/*
+ *	logsynclist serialization (per log)
+ */
+
+#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
+#define LOGSYNC_LOCK(log) spin_lock(&(log)->synclock)
+#define LOGSYNC_UNLOCK(log) spin_unlock(&(log)->synclock)
+
+/* compute the difference in bytes of lsn from sync point */
+#define logdiff(diff, lsn, log)\
+{\
+	diff = (lsn) - (log)->syncpt;\
+	if (diff < 0)\
+		diff += (log)->logsize;\
+}
+
+extern int lmLogOpen(struct super_block *sb, log_t ** log);
+extern int lmLogClose(struct super_block *sb, log_t * log);
+extern int lmLogSync(log_t * log, int nosyncwait);
+extern int lmLogQuiesce(log_t * log);
+extern int lmLogResume(log_t * log, struct super_block *sb);
+extern int lmLogFormat(struct super_block *sb, s64 logAddress, int logSize);
+
+#endif				/* _H_JFS_LOGMGR */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_metapage.c linuxppc64_2_4/fs/jfs/jfs_metapage.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_metapage.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_metapage.c	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,688 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Module: jfs/jfs_metapage.c
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_metapage.h"
+#include "jfs_txnmgr.h"
+#include "jfs_debug.h"
+
+extern struct task_struct *jfsCommitTask;
+static unsigned int metapages = 1024;	/* ??? Need a better number */
+static unsigned int free_metapages;
+static metapage_t *metapage_buf;
+static unsigned long meta_order;
+static metapage_t *meta_free_list = NULL;
+static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
+static wait_queue_head_t meta_wait;
+
+#ifdef CONFIG_JFS_STATISTICS
+struct {
+	uint	pagealloc;	/* # of page allocations */
+	uint	pagefree;	/* # of page frees */
+	uint	lockwait;	/* # of sleeping lock_metapage() calls */
+	uint	allocwait;	/* # of sleeping alloc_metapage() calls */
+} mpStat;
+#endif
+
+
+#define HASH_BITS 10		/* This makes hash_table 1 4K page */
+#define HASH_SIZE (1 << HASH_BITS)
+static metapage_t **hash_table = NULL;
+static unsigned long hash_order;
+
+
+static inline int metapage_locked(struct metapage *mp)
+{
+	return test_bit(META_locked, &mp->flag);
+}
+
+static inline int trylock_metapage(struct metapage *mp)
+{
+	return test_and_set_bit(META_locked, &mp->flag);
+}
+
+static inline void unlock_metapage(struct metapage *mp)
+{
+	clear_bit(META_locked, &mp->flag);
+	wake_up(&mp->wait);
+}
+
+static void __lock_metapage(struct metapage *mp)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	INCREMENT(mpStat.lockwait);
+
+	add_wait_queue_exclusive(&mp->wait, &wait);
+	do {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (metapage_locked(mp)) {
+			spin_unlock(&meta_lock);
+			schedule();
+			spin_lock(&meta_lock);
+		}
+	} while (trylock_metapage(mp));
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&mp->wait, &wait);
+}
+
+/* needs meta_lock */
+static inline void lock_metapage(struct metapage *mp)
+{
+	if (trylock_metapage(mp))
+		__lock_metapage(mp);
+}
+
+/* We're currently re-evaluating the method we use to write metadata
+ * pages.  Currently, we have to make sure there no dirty buffer_heads
+ * hanging around after we free the metadata page, since the same
+ * physical disk blocks may be used in a different address space and we
+ * can't write old data over the good data.
+ *
+ * The best way to do this now is with block_invalidate_page.  However,
+ * this is only available in the newer kernels and is not exported
+ * to modules.  block_flushpage is the next best, but it too is not exported
+ * to modules.
+ *
+ * In a module, about the best we have is generic_buffer_fdatasync.  This
+ * synchronously writes any dirty buffers.  This is not optimal, but it will
+ * keep old dirty buffers from overwriting newer data.
+ */
+static inline void invalidate_page(metapage_t *mp)
+{
+#ifdef MODULE
+	generic_buffer_fdatasync(mp->mapping->host, mp->index, mp->index + 1);
+#else
+	lock_page(mp->page);
+	block_flushpage(mp->page, 0);
+	UnlockPage(mp->page);
+#endif
+}
+
+int __init metapage_init(void)
+{
+	int i;
+	metapage_t *last = NULL;
+	metapage_t *mp;
+
+	/*
+	 * Initialize wait queue
+	 */
+	init_waitqueue_head(&meta_wait);
+
+	/*
+	 * Allocate the metapage structures
+	 */
+	for (meta_order = 0;
+	     ((PAGE_SIZE << meta_order) / sizeof(metapage_t)) < metapages;
+	     meta_order++);
+	metapages = (PAGE_SIZE << meta_order) / sizeof(metapage_t);
+
+	jFYI(1, ("metapage_init: metapage size = %Zd, metapages = %d\n",
+		 sizeof(metapage_t), metapages));
+
+	metapage_buf =
+	    (metapage_t *) __get_free_pages(GFP_KERNEL, meta_order);
+	assert(metapage_buf);
+	memset(metapage_buf, 0, PAGE_SIZE << meta_order);
+
+	mp = metapage_buf;
+	for (i = 0; i < metapages; i++, mp++) {
+		mp->flag = 0;
+		set_bit(META_free, &mp->flag);
+		init_waitqueue_head(&mp->wait);
+		mp->hash_next = last;
+		last = mp;
+	}
+	meta_free_list = last;
+	free_metapages = metapages;
+
+	/*
+	 * Now the hash list
+	 */
+	for (hash_order = 0;
+	     ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
+	     hash_order++);
+	hash_table =
+	    (metapage_t **) __get_free_pages(GFP_KERNEL, hash_order);
+	assert(hash_table);
+	memset(hash_table, 0, PAGE_SIZE << hash_order);
+
+	return 0;
+}
+
+void metapage_exit(void)
+{
+	free_pages((unsigned long) metapage_buf, meta_order);
+	free_pages((unsigned long) hash_table, hash_order);
+	metapage_buf = 0;	/* This is a signal to the jfsIOwait thread */
+}
+
+/*
+ * Get metapage structure from freelist
+ * 
+ * Caller holds meta_lock
+ */
+static metapage_t *alloc_metapage(int *dropped_lock)
+{
+	metapage_t *new;
+
+	*dropped_lock = FALSE;
+
+	/*
+	 * Reserve two metapages for the lazy commit thread.  Otherwise
+	 * we may deadlock with holders of metapages waiting for tlocks
+	 * that lazy thread should be freeing.
+	 */
+	if ((free_metapages < 3) && (current != jfsCommitTask)) {
+		INCREMENT(mpStat.allocwait);
+		*dropped_lock = TRUE;
+		__SLEEP_COND(meta_wait, (free_metapages > 2),
+			     spin_lock(&meta_lock), spin_unlock(&meta_lock));
+	}
+
+	assert(meta_free_list);
+
+	new = meta_free_list;
+	meta_free_list = new->hash_next;
+	free_metapages--;
+
+	return new;
+}
+
+/*
+ * Put metapage on freelist (holding meta_lock)
+ */
+static inline void __free_metapage(metapage_t * mp)
+{
+	mp->flag = 0;
+	set_bit(META_free, &mp->flag);
+	mp->hash_next = meta_free_list;
+	meta_free_list = mp;
+	free_metapages++;
+	wake_up(&meta_wait);
+}
+
+/*
+ * Put metapage on freelist (not holding meta_lock)
+ */
+static inline void free_metapage(metapage_t * mp)
+{
+	spin_lock(&meta_lock);
+	__free_metapage(mp);
+	spin_unlock(&meta_lock);
+}
+
+/*
+ * Basically same hash as in pagemap.h, but using our hash table
+ */
+static metapage_t **meta_hash(struct address_space *mapping,
+			      unsigned long index)
+{
+#define i (((unsigned long)mapping)/ \
+	   (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
+#define s(x) ((x) + ((x) >> HASH_BITS))
+	return hash_table + (s(i + index) & (HASH_SIZE - 1));
+#undef i
+#undef s
+}
+
+static metapage_t *search_hash(metapage_t ** hash_ptr,
+			       struct address_space *mapping,
+			       unsigned long index)
+{
+	metapage_t *ptr;
+
+	for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
+		if ((ptr->mapping == mapping) && (ptr->index == index))
+			return ptr;
+	}
+
+	return NULL;
+}
+
+static void add_to_hash(metapage_t * mp, metapage_t ** hash_ptr)
+{
+	if (*hash_ptr)
+		(*hash_ptr)->hash_prev = mp;
+
+	mp->hash_prev = NULL;
+	mp->hash_next = *hash_ptr;
+	*hash_ptr = mp;
+	list_add(&mp->inode_list, &JFS_IP(mp->mapping->host)->mp_list);
+}
+
+static void remove_from_hash(metapage_t * mp, metapage_t ** hash_ptr)
+{
+	list_del(&mp->inode_list);
+
+	if (mp->hash_prev)
+		mp->hash_prev->hash_next = mp->hash_next;
+	else {
+		assert(*hash_ptr == mp);
+		*hash_ptr = mp->hash_next;
+	}
+
+	if (mp->hash_next)
+		mp->hash_next->hash_prev = mp->hash_prev;
+}
+
+/*
+ * Direct address space operations
+ */
+
+static int direct_get_block(struct inode *ip, long lblock,
+			    struct buffer_head *bh_result, int create)
+{
+	bh_result->b_dev = ip->i_dev;
+	bh_result->b_blocknr = lblock;
+	if (create)
+		bh_result->b_state |= (1UL << BH_Mapped) | (1UL << BH_New);
+	else
+		bh_result->b_state |= (1UL << BH_Mapped);
+
+	return 0;
+}
+
+static int direct_writepage(struct page *page)
+{
+	return block_write_full_page(page, direct_get_block);
+}
+
+static int direct_readpage(struct file *fp, struct page *page)
+{
+	return block_read_full_page(page, direct_get_block);
+}
+
+static int direct_prepare_write(struct file *file, struct page *page,
+				unsigned from, unsigned to)
+{
+	return block_prepare_write(page, from, to, direct_get_block);
+}
+
+static int direct_bmap(struct address_space *mapping, long block)
+{
+	return generic_block_bmap(mapping, block, direct_get_block);
+}
+
+struct address_space_operations direct_aops = {
+	readpage:	direct_readpage,
+	writepage:	direct_writepage,
+	sync_page:	block_sync_page,
+	prepare_write:	direct_prepare_write,
+	commit_write:	generic_commit_write,
+	bmap:		direct_bmap,
+};
+
+metapage_t *__get_metapage(struct inode *inode,
+			   unsigned long lblock, unsigned int size,
+			   int absolute, unsigned long new)
+{
+	int dropped_lock;
+	metapage_t **hash_ptr;
+	int l2BlocksPerPage;
+	int l2bsize;
+	struct address_space *mapping;
+	metapage_t *mp;
+	unsigned long page_index;
+	unsigned long page_offset;
+
+	jFYI(1, ("__get_metapage: inode = 0x%p, lblock = 0x%lx\n",
+		 inode, lblock));
+
+	if (absolute)
+		mapping = JFS_SBI(inode->i_sb)->direct_mapping;
+	else
+		mapping = inode->i_mapping;
+
+	spin_lock(&meta_lock);
+
+	hash_ptr = meta_hash(mapping, lblock);
+
+	mp = search_hash(hash_ptr, mapping, lblock);
+	if (mp) {
+	      page_found:
+		if (test_bit(META_discard, &mp->flag)) {
+			assert(new);	/* It's okay to reuse a discarded
+					 * if we expect it to be empty
+					 */
+			clear_bit(META_discard, &mp->flag);
+		}
+		mp->count++;
+		jFYI(1, ("__get_metapage: found 0x%p, in hash\n", mp));
+		assert(mp->logical_size == size);
+		lock_metapage(mp);
+		spin_unlock(&meta_lock);
+	} else {
+		l2bsize = inode->i_sb->s_blocksize_bits;
+		l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
+		page_index = lblock >> l2BlocksPerPage;
+		page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
+		    l2bsize;
+		if ((page_offset + size) > PAGE_SIZE) {
+			spin_unlock(&meta_lock);
+			jERROR(1, ("MetaData crosses page boundary!!\n"));
+			return NULL;
+		}
+
+		mp = alloc_metapage(&dropped_lock);
+		if (dropped_lock) {
+			/* alloc_metapage blocked, we need to search the hash
+			 * again.  (The goto is ugly, maybe we'll clean this
+			 * up in the future.)
+			 */
+			metapage_t *mp2;
+			mp2 = search_hash(hash_ptr, mapping, lblock);
+			if (mp2) {
+				__free_metapage(mp);
+				mp = mp2;
+				goto page_found;
+			}
+		}
+		mp->flag = 0;
+		lock_metapage(mp);
+		if (absolute)
+			set_bit(META_absolute, &mp->flag);
+		mp->xflag = COMMIT_PAGE;
+		mp->count = 1;
+		atomic_set(&mp->nohomeok,0);
+		mp->mapping = mapping;
+		mp->index = lblock;
+		mp->page = 0;
+		mp->logical_size = size;
+		add_to_hash(mp, hash_ptr);
+		spin_unlock(&meta_lock);
+
+		if (new) {
+			jFYI(1,
+			     ("__get_metapage: Calling grab_cache_page\n"));
+			mp->page = grab_cache_page(mapping, page_index);
+			if (!mp->page) {
+				jERROR(1, ("grab_cache_page failed!\n"));
+				spin_lock(&meta_lock);
+				remove_from_hash(mp, hash_ptr);
+				__free_metapage(mp);
+				spin_unlock(&meta_lock);
+				return NULL;
+			} else
+				INCREMENT(mpStat.pagealloc);
+		} else {
+			jFYI(1,
+			     ("__get_metapage: Calling read_cache_page\n"));
+			mp->page =
+			    read_cache_page(mapping, lblock,
+					    (filler_t *) mapping->a_ops->
+					    readpage, NULL);
+			if (IS_ERR(mp->page)) {
+				jERROR(1, ("read_cache_page failed!\n"));
+				spin_lock(&meta_lock);
+				remove_from_hash(mp, hash_ptr);
+				__free_metapage(mp);
+				spin_unlock(&meta_lock);
+				return NULL;
+			} else
+				INCREMENT(mpStat.pagealloc);
+			lock_page(mp->page);
+		}
+		mp->data = (void *) (kmap(mp->page) + page_offset);
+	}
+	jFYI(1, ("__get_metapage: returning = 0x%p\n", mp));
+	return mp;
+}
+
+void hold_metapage(metapage_t * mp, int force)
+{
+	spin_lock(&meta_lock);
+
+	mp->count++;
+
+	if (force) {
+		ASSERT (!(test_bit(META_forced, &mp->flag)));
+		if (trylock_metapage(mp))
+			set_bit(META_forced, &mp->flag);
+	} else
+		lock_metapage(mp);
+
+	spin_unlock(&meta_lock);
+}
+
+static void __write_metapage(metapage_t * mp)
+{
+	struct inode *ip = (struct inode *) mp->mapping->host;
+	unsigned long page_index;
+	unsigned long page_offset;
+	int rc;
+	int l2bsize = ip->i_sb->s_blocksize_bits;
+	int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
+
+	jFYI(1, ("__write_metapage: mp = 0x%p\n", mp));
+
+	if (test_bit(META_discard, &mp->flag)) {
+		/*
+		 * This metadata is no longer valid
+		 */
+		clear_bit(META_dirty, &mp->flag);
+		return;
+	}
+
+	page_index = mp->page->index;
+	page_offset =
+	    (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;
+
+	rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
+					       page_offset +
+					       mp->logical_size);
+	if (rc) {
+		jERROR(1, ("prepare_write return %d!\n", rc));
+		ClearPageUptodate(mp->page);
+		kunmap(mp->page);
+		clear_bit(META_dirty, &mp->flag);
+		return;
+	}
+	rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
+					      page_offset +
+					      mp->logical_size);
+	if (rc) {
+		jERROR(1, ("commit_write returned %d\n", rc));
+	}
+
+	clear_bit(META_dirty, &mp->flag);
+
+	jFYI(1, ("__write_metapage done\n"));
+}
+
+void release_metapage(metapage_t * mp)
+{
+	log_t *log;
+	struct inode *ip;
+
+	jFYI(1,
+	     ("release_metapage: mp = 0x%p, flag = 0x%lx\n", mp,
+	      mp->flag));
+
+	spin_lock(&meta_lock);
+	if (test_bit(META_forced, &mp->flag)) {
+		clear_bit(META_forced, &mp->flag);
+		mp->count--;
+		spin_unlock(&meta_lock);
+		return;
+	}
+
+	ip = (struct inode *) mp->mapping->host;
+
+	assert(mp->count);
+	if (--mp->count || atomic_read(&mp->nohomeok)) {
+		unlock_metapage(mp);
+		spin_unlock(&meta_lock);
+	} else {
+		remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
+		spin_unlock(&meta_lock);
+
+		if (mp->page) {
+			kunmap(mp->page);
+			mp->data = 0;
+			if (test_bit(META_dirty, &mp->flag))
+				__write_metapage(mp);
+			UnlockPage(mp->page);
+			if (test_bit(META_sync, &mp->flag)) {
+				sync_metapage(mp);
+				clear_bit(META_sync, &mp->flag);
+			}
+
+			if (test_bit(META_discard, &mp->flag))
+				invalidate_page(mp);
+
+			page_cache_release(mp->page);
+			INCREMENT(mpStat.pagefree);
+		}
+
+		if (mp->lsn) {
+			/*
+			 * Remove metapage from logsynclist.
+			 */
+			log = mp->log;
+			LOGSYNC_LOCK(log);
+			mp->log = 0;
+			mp->lsn = 0;
+			mp->clsn = 0;
+			log->count--;
+			list_del(&mp->synclist);
+			LOGSYNC_UNLOCK(log);
+		}
+
+		free_metapage(mp);
+	}
+	jFYI(1, ("release_metapage: done\n"));
+}
+
+void invalidate_metapages(struct inode *ip, unsigned long addr,
+			 unsigned long len)
+{
+	metapage_t **hash_ptr;
+	unsigned long lblock;
+	int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_sb->s_blocksize_bits;
+	struct address_space *mapping = ip->i_mapping;
+	metapage_t *mp;
+#ifndef MODULE
+	struct page *page;
+#endif
+
+	/*
+	 * First, mark metapages to discard.  They will eventually be
+	 * released, but should not be written.
+	 */
+	for (lblock = addr; lblock < addr + len;
+	     lblock += 1 << l2BlocksPerPage) {
+		hash_ptr = meta_hash(mapping, lblock);
+		spin_lock(&meta_lock);
+		mp = search_hash(hash_ptr, mapping, lblock);
+		if (mp) {
+			set_bit(META_discard, &mp->flag);
+			spin_unlock(&meta_lock);
+			/*
+			 * If in the metapage cache, we've got the page locked
+			 */
+#ifdef MODULE
+			UnlockPage(mp->page);
+			generic_buffer_fdatasync(mp->mapping->host, mp->index,
+						 mp->index+1);
+			lock_page(mp->page);
+#else
+			block_flushpage(mp->page, 0);
+#endif
+		} else {
+			spin_unlock(&meta_lock);
+#ifdef MODULE
+			generic_buffer_fdatasync(ip, lblock << l2BlocksPerPage,
+					(lblock + 1) << l2BlocksPerPage);
+#else
+			page = find_lock_page(mapping,
+					      lblock >> l2BlocksPerPage);
+			if (page) {
+				block_flushpage(page, 0);
+				UnlockPage(page);
+			}
+#endif
+		}
+	}
+}
+
+void invalidate_inode_metapages(struct inode *inode)
+{
+	struct list_head *ptr;
+	metapage_t *mp;
+
+	spin_lock(&meta_lock);
+	list_for_each(ptr, &JFS_IP(inode)->mp_list) {
+		mp = list_entry(ptr, metapage_t, inode_list);
+		clear_bit(META_dirty, &mp->flag);
+		set_bit(META_discard, &mp->flag);
+		kunmap(mp->page);
+		UnlockPage(mp->page);
+		page_cache_release(mp->page);
+		INCREMENT(mpStat.pagefree);
+		mp->data = 0;
+		mp->page = 0;
+	}
+	spin_unlock(&meta_lock);
+	truncate_inode_pages(inode->i_mapping, 0);
+}
+
+#ifdef CONFIG_JFS_STATISTICS
+int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
+		    int *eof, void *data)
+{
+	int len = 0;
+	off_t begin;
+
+	len += sprintf(buffer,
+		       "JFS Metapage statistics\n"
+		       "=======================\n"
+		       "metapages in use = %d\n"
+		       "page allocations = %d\n"
+		       "page frees = %d\n"
+		       "lock waits = %d\n"
+		       "allocation waits = %d\n",
+		       metapages - free_metapages,
+		       mpStat.pagealloc,
+		       mpStat.pagefree,
+		       mpStat.lockwait,
+		       mpStat.allocwait);
+
+	begin = offset;
+	*start = buffer + begin;
+	len -= begin;
+
+	if (len > length)
+		len = length;
+	else
+		*eof = 1;
+
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+#endif
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_metapage.h linuxppc64_2_4/fs/jfs/jfs_metapage.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_metapage.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_metapage.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,123 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef	_H_JFS_METAPAGE
+#define _H_JFS_METAPAGE
+
+#include <linux/pagemap.h>
+
+typedef struct metapage {
+	/* Common logsyncblk prefix (see jfs_logmgr.h) */
+	u16 xflag;
+	u16 unused;
+	lid_t lid;
+	int lsn;
+	struct list_head synclist;
+	/* End of logsyncblk prefix */
+
+	unsigned long flag;	/* See Below */
+	unsigned long count;	/* Reference count */
+	void *data;		/* Data pointer */
+
+	/* list management stuff */
+	struct metapage *hash_prev;
+	struct metapage *hash_next;	/* Also used for free list */
+
+	struct list_head inode_list;	/* per-inode metapage list */
+	/*
+	 * mapping & index become redundant, but we need these here to
+	 * add the metapage to the hash before we have the real page
+	 */
+	struct address_space *mapping;
+	unsigned long index;
+	wait_queue_head_t wait;
+
+	/* implementation */
+	struct page *page;
+	unsigned long logical_size;
+
+	/* Journal management */
+	int clsn;
+	atomic_t nohomeok;
+	struct jfs_log *log;
+} metapage_t;
+
+/*
+ * Direct-access address space operations
+ */
+extern struct address_space_operations direct_aops;
+
+/* metapage flag */
+#define META_locked	0
+#define META_absolute	1
+#define META_free	2
+#define META_dirty	3
+#define META_sync	4
+#define META_discard	5
+#define META_forced	6
+
+#define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag)
+
+/* function prototypes */
+extern metapage_t *__get_metapage(struct inode *inode,
+				  unsigned long lblock, unsigned int size,
+				  int absolute, unsigned long new);
+
+#define read_metapage(inode, lblock, size, absolute)\
+	 __get_metapage(inode, lblock, size, absolute, FALSE)
+
+#define get_metapage(inode, lblock, size, absolute)\
+	 __get_metapage(inode, lblock, size, absolute, TRUE)
+
+extern void release_metapage(metapage_t *);
+
+#define flush_metapage(mp) \
+{\
+	set_bit(META_dirty, &(mp)->flag);\
+	set_bit(META_sync, &(mp)->flag);\
+	release_metapage(mp);\
+}
+
+#define sync_metapage(mp) \
+	generic_buffer_fdatasync((struct inode *)mp->mapping->host,\
+				 mp->page->index, mp->page->index + 1)
+
+#define write_metapage(mp) \
+{\
+	set_bit(META_dirty, &(mp)->flag);\
+	release_metapage(mp);\
+}
+
+#define discard_metapage(mp) \
+{\
+	clear_bit(META_dirty, &(mp)->flag);\
+	set_bit(META_discard, &(mp)->flag);\
+	release_metapage(mp);\
+}
+
+extern void hold_metapage(metapage_t *, int);
+
+/*
+ * This routine uses hash to explicitly find small number of pages
+ */
+extern void invalidate_metapages(struct inode *, unsigned long, unsigned long);
+
+/*
+ * This one uses mp_list to invalidate all pages for an inode
+ */
+extern void invalidate_inode_metapages(struct inode *inode);
+#endif				/* _H_JFS_METAPAGE */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_mount.c linuxppc64_2_4/fs/jfs/jfs_mount.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_mount.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_mount.c	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,525 @@
+/*
+ *   MODULE_NAME:		jfs_mount.c
+ *
+ *   COMPONENT_NAME:		sysjfs
+ *
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * Change History :
+ *
+ */
+
+/*
+ * Module: jfs_mount.c
+ *
+ * note: file system in transition to aggregate/fileset:
+ *
+ * file system mount is interpreted as the mount of aggregate, 
+ * if not already mounted, and mount of the single/only fileset in 
+ * the aggregate;
+ *
+ * a file system/aggregate is represented by an internal inode
+ * (aka mount inode) initialized with aggregate superblock;
+ * each vfs represents a fileset, and points to its "fileset inode 
+ * allocation map inode" (aka fileset inode):
+ * (an aggregate itself is structured recursively as a filset: 
+ * an internal vfs is constructed and points to its "fileset inode 
+ * allocation map inode" (aka aggregate inode) where each inode 
+ * represents a fileset inode) so that inode number is mapped to 
+ * on-disk inode in uniform way at both aggregate and fileset level;
+ *
+ * each vnode/inode of a fileset is linked to its vfs (to facilitate
+ * per fileset inode operations, e.g., unmount of a fileset, etc.);
+ * each inode points to the mount inode (to facilitate access to
+ * per aggregate information, e.g., block size, etc.) as well as
+ * its file set inode.
+ *
+ *   aggregate 
+ *   ipmnt
+ *   mntvfs -> fileset ipimap+ -> aggregate ipbmap -> aggregate ipaimap;
+ *             fileset vfs     -> vp(1) <-> ... <-> vp(n) <->vproot;
+ */
+
+#include <linux/fs.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_superblock.h"
+#include "jfs_dmap.h"
+#include "jfs_imap.h"
+#include "jfs_metapage.h"
+#include "jfs_debug.h"
+
+
+/*
+ * forward references
+ */
+static int chkSuper(struct super_block *);
+static int logMOUNT(struct super_block *sb);
+
+/*
+ * NAME:	jfs_mount(sb)
+ *
+ * FUNCTION:	vfs_mount()
+ *
+ * PARAMETER:	sb	- super block
+ *
+ * RETURN:	EBUSY	- device already mounted or open for write
+ *		EBUSY	- cvrdvp already mounted;
+ *		EBUSY	- mount table full
+ *		ENOTDIR	- cvrdvp not directory on a device mount
+ *		ENXIO	- device open failure
+ */
+int jfs_mount(struct super_block *sb)
+{
+	int rc = 0;		/* Return code          */
+	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	struct inode *ipaimap = NULL;
+	struct inode *ipaimap2 = NULL;
+	struct inode *ipimap = NULL;
+	struct inode *ipbmap = NULL;
+
+	jFYI(1, ("\nMount JFS\n"));
+
+	/*
+	 * read/validate superblock 
+	 * (initialize mount inode from the superblock)
+	 */
+	if ((rc = chkSuper(sb))) {
+		goto errout20;
+	}
+
+	ipaimap = diReadSpecial(sb, AGGREGATE_I);
+	if (ipaimap == NULL) {
+		jERROR(1, ("jfs_mount: Faild to read AGGREGATE_I\n"));
+		rc = EIO;
+		goto errout20;
+	}
+	sbi->ipaimap = ipaimap;
+
+	jFYI(1, ("jfs_mount: ipaimap:0x%p\n", ipaimap));
+
+	/*
+	 * initialize aggregate inode allocation map
+	 */
+	if ((rc = diMount(ipaimap))) {
+		jERROR(1,
+		       ("jfs_mount: diMount(ipaimap) failed w/rc = %d\n",
+			rc));
+		goto errout21;
+	}
+
+	/*
+	 * open aggregate block allocation map
+	 */
+	ipbmap = diReadSpecial(sb, BMAP_I);
+	if (ipbmap == NULL) {
+		rc = EIO;
+		goto errout22;
+	}
+
+	jFYI(1, ("jfs_mount: ipbmap:0x%p\n", ipbmap));
+
+	sbi->ipbmap = ipbmap;
+
+	/*
+	 * initialize aggregate block allocation map
+	 */
+	if ((rc = dbMount(ipbmap))) {
+		jERROR(1, ("jfs_mount: dbMount failed w/rc = %d\n", rc));
+		goto errout22;
+	}
+
+	/*
+	 * open the secondary aggregate inode allocation map
+	 *
+	 * This is a duplicate of the aggregate inode allocation map.
+	 *
+	 * hand craft a vfs in the same fashion as we did to read ipaimap.
+	 * By adding INOSPEREXT (32) to the inode number, we are telling
+	 * diReadSpecial that we are reading from the secondary aggregate
+	 * inode table.  This also creates a unique entry in the inode hash
+	 * table.
+	 */
+	if ((sbi->mntflag & JFS_BAD_SAIT) == 0) {
+		ipaimap2 = diReadSpecial(sb, AGGREGATE_I + INOSPEREXT);
+		if (ipaimap2 == 0) {
+			jERROR(1,
+			       ("jfs_mount: Faild to read AGGREGATE_I\n"));
+			rc = EIO;
+			goto errout35;
+		}
+		sbi->ipaimap2 = ipaimap2;
+
+		jFYI(1, ("jfs_mount: ipaimap2:0x%p\n", ipaimap2));
+
+		/*
+		 * initialize secondary aggregate inode allocation map
+		 */
+		if ((rc = diMount(ipaimap2))) {
+			jERROR(1,
+			       ("jfs_mount: diMount(ipaimap2) failed, rc = %d\n",
+				rc));
+			goto errout35;
+		}
+	} else
+		/* Secondary aggregate inode table is not valid */
+		sbi->ipaimap2 = 0;
+
+	/*
+	 *      mount (the only/single) fileset
+	 */
+	/*
+	 * open fileset inode allocation map (aka fileset inode)
+	 */
+	ipimap = diReadSpecial(sb, FILESYSTEM_I);
+	if (ipimap == NULL) {
+		jERROR(1, ("jfs_mount: Failed to read FILESYSTEM_I\n"));
+		/* open fileset secondary inode allocation map */
+		rc = EIO;
+		goto errout40;
+	}
+	jFYI(1, ("jfs_mount: ipimap:0x%p\n", ipimap));
+
+	/* map further access of per fileset inodes by the fileset inode */
+	sbi->ipimap = ipimap;
+
+	/* initialize fileset inode allocation map */
+	if ((rc = diMount(ipimap))) {
+		jERROR(1, ("jfs_mount: diMount failed w/rc = %d\n", rc));
+		goto errout41;
+	}
+
+	jFYI(1, ("Mount JFS Complete.\n"));
+	goto out;
+
+	/*
+	 *      unwind on error
+	 */
+//errout42: /* close fileset inode allocation map */
+	diUnmount(ipimap, 1);
+
+      errout41:		/* close fileset inode allocation map inode */
+	diFreeSpecial(ipimap);
+
+      errout40:		/* fileset closed */
+
+	/* close secondary aggregate inode allocation map */
+	if (ipaimap2) {
+		diUnmount(ipaimap2, 1);
+		diFreeSpecial(ipaimap2);
+	}
+
+      errout35:
+
+	/* close aggregate block allocation map */
+	dbUnmount(ipbmap, 1);
+	diFreeSpecial(ipbmap);
+
+      errout22:		/* close aggregate inode allocation map */
+
+	diUnmount(ipaimap, 1);
+
+      errout21:		/* close aggregate inodes */
+	diFreeSpecial(ipaimap);
+      errout20:		/* aggregate closed */
+
+      out:
+
+	if (rc) {
+		jERROR(1, ("Mount JFS Failure: %d\n", rc));
+	}
+	return rc;
+}
+
+/*
+ * NAME:	jfs_mount_rw(sb, remount)
+ *
+ * FUNCTION:	Completes read-write mount, or remounts read-only volume
+ *		as read-write
+ */
+int jfs_mount_rw(struct super_block *sb, int remount)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(sb);  
+	log_t *log;
+	int rc;
+
+	/*
+	 * If we are re-mounting a previously read-only volume, we want to
+	 * re-read the inode and block maps, since fsck.jfs may have updated
+	 * them.
+	 */
+	if (remount) {
+		if (chkSuper(sb) || (sbi->state != FM_CLEAN))
+			return -EINVAL;
+
+		truncate_inode_pages(sbi->ipimap->i_mapping, 0);
+		truncate_inode_pages(sbi->ipbmap->i_mapping, 0);
+		diUnmount(sbi->ipimap, 1);
+		if ((rc = diMount(sbi->ipimap))) {
+			jERROR(1,("jfs_mount_rw: diMount failed!\n"));
+			return rc;
+		}
+
+		dbUnmount(sbi->ipbmap, 1);
+		if ((rc = dbMount(sbi->ipbmap))) {
+			jERROR(1,("jfs_mount_rw: dbMount failed!\n"));
+			return rc;
+		}
+	}
+
+	/*
+	 * open/initialize log
+	 */
+	if ((rc = lmLogOpen(sb, &log)))
+		return rc;
+
+	JFS_SBI(sb)->log = log;
+
+	/*
+	 * update file system superblock;
+	 */
+	if ((rc = updateSuper(sb, FM_MOUNT))) {
+		jERROR(1,
+		       ("jfs_mount: updateSuper failed w/rc = %d\n", rc));
+		lmLogClose(sb, log);
+		JFS_SBI(sb)->log = 0;
+		return rc;
+	}
+
+	/*
+	 * write MOUNT log record of the file system
+	 */
+	logMOUNT(sb);
+
+	return rc;
+}
+
+/*
+ *	chkSuper()
+ *
+ * validate the superblock of the file system to be mounted and 
+ * get the file system parameters.
+ *
+ * returns
+ *	0 with fragsize set if check successful
+ *	error code if not successful
+ */
+static int chkSuper(struct super_block *sb)
+{
+	int rc = 0;
+	metapage_t *mp;
+	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	struct jfs_superblock *j_sb;
+	int AIM_bytesize, AIT_bytesize;
+	int expected_AIM_bytesize, expected_AIT_bytesize;
+	s64 AIM_byte_addr, AIT_byte_addr, fsckwsp_addr;
+	s64 byte_addr_diff0, byte_addr_diff1;
+	s32 bsize;
+
+	if ((rc = readSuper(sb, &mp)))
+		return rc;
+	j_sb = (struct jfs_superblock *) (mp->data);
+
+	/*
+	 * validate superblock
+	 */
+	/* validate fs signature */
+	if (strncmp(j_sb->s_magic, JFS_MAGIC, 4) ||
+	    j_sb->s_version > cpu_to_le32(JFS_VERSION)) {
+		//rc = EFORMAT;
+		rc = EINVAL;
+		goto out;
+	}
+
+	bsize = le32_to_cpu(j_sb->s_bsize);
+#ifdef _JFS_4K
+	if (bsize != PSIZE) {
+		jERROR(1, ("Currently only 4K block size supported!\n"));
+		rc = EINVAL;
+		goto out;
+	}
+#endif				/* _JFS_4K */
+
+	jFYI(1, ("superblock: flag:0x%08x state:0x%08x size:0x%Lx\n",
+		 le32_to_cpu(j_sb->s_flag), le32_to_cpu(j_sb->s_state),
+		 (unsigned long long) le64_to_cpu(j_sb->s_size)));
+
+	/* validate the descriptors for Secondary AIM and AIT */
+	if ((j_sb->s_flag & cpu_to_le32(JFS_BAD_SAIT)) !=
+	    cpu_to_le32(JFS_BAD_SAIT)) {
+		expected_AIM_bytesize = 2 * PSIZE;
+		AIM_bytesize = lengthPXD(&(j_sb->s_aim2)) * bsize;
+		expected_AIT_bytesize = 4 * PSIZE;
+		AIT_bytesize = lengthPXD(&(j_sb->s_ait2)) * bsize;
+		AIM_byte_addr = addressPXD(&(j_sb->s_aim2)) * bsize;
+		AIT_byte_addr = addressPXD(&(j_sb->s_ait2)) * bsize;
+		byte_addr_diff0 = AIT_byte_addr - AIM_byte_addr;
+		fsckwsp_addr = addressPXD(&(j_sb->s_fsckpxd)) * bsize;
+		byte_addr_diff1 = fsckwsp_addr - AIT_byte_addr;
+		if ((AIM_bytesize != expected_AIM_bytesize) ||
+		    (AIT_bytesize != expected_AIT_bytesize) ||
+		    (byte_addr_diff0 != AIM_bytesize) ||
+		    (byte_addr_diff1 <= AIT_bytesize))
+			j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT);
+	}
+
+	if ((j_sb->s_flag & cpu_to_le32(JFS_GROUPCOMMIT)) !=
+	    cpu_to_le32(JFS_GROUPCOMMIT))
+		j_sb->s_flag |= cpu_to_le32(JFS_GROUPCOMMIT);
+	jFYI(0, ("superblock: flag:0x%08x state:0x%08x size:0x%Lx\n",
+		 le32_to_cpu(j_sb->s_flag), le32_to_cpu(j_sb->s_state),
+		 (unsigned long long) le64_to_cpu(j_sb->s_size)));
+
+	/* validate fs state */
+	if (j_sb->s_state != cpu_to_le32(FM_CLEAN) &&
+	    !(sb->s_flags & MS_RDONLY)) {
+		jERROR(1,
+		       ("jfs_mount: Mount Failure: File System Dirty.\n"));
+		rc = EINVAL;
+		goto out;
+	}
+
+	sbi->state = le32_to_cpu(j_sb->s_state);
+	sbi->mntflag = le32_to_cpu(j_sb->s_flag);
+
+	/*
+	 * JFS always does I/O by 4K pages.  Don't tell the buffer cache
+	 * that we use anything else (leave s_blocksize alone).
+	 */
+	sbi->bsize = bsize;
+	sbi->l2bsize = le16_to_cpu(j_sb->s_l2bsize);
+
+	/*
+	 * For now, ignore s_pbsize, l2bfactor.  All I/O going through buffer
+	 * cache.
+	 */
+	sbi->nbperpage = PSIZE >> sbi->l2bsize;
+	sbi->l2nbperpage = L2PSIZE - sbi->l2bsize;
+	sbi->l2niperblk = sbi->l2bsize - L2DISIZE;
+	if (sbi->mntflag & JFS_INLINELOG)
+		sbi->logpxd = j_sb->s_logpxd;
+	else
+		sbi->logdev = to_kdev_t(le32_to_cpu(j_sb->s_logdev));
+	sbi->ait2 = j_sb->s_ait2;
+
+      out:
+	release_metapage(mp);
+
+	return rc;
+}
+
+
+/*
+ *	updateSuper()
+ *
+ * update synchronously superblock if it is mounted read-write.
+ */
+int updateSuper(struct super_block *sb, uint state)
+{
+	int rc;
+	metapage_t *mp;
+	struct jfs_superblock *j_sb;
+
+	/*
+	 * Only fsck can fix dirty state
+	 */
+	if (JFS_SBI(sb)->state == FM_DIRTY)
+		return 0;
+
+	if ((rc = readSuper(sb, &mp)))
+		return rc;
+
+	j_sb = (struct jfs_superblock *) (mp->data);
+
+	j_sb->s_state = cpu_to_le32(state);
+	JFS_SBI(sb)->state = state;
+
+	if (state == FM_MOUNT) {
+		/* record log's dev_t and mount serial number */
+		j_sb->s_logdev =
+			cpu_to_le32(kdev_t_to_nr(JFS_SBI(sb)->log->dev));
+		j_sb->s_logserial = cpu_to_le32(JFS_SBI(sb)->log->serial);
+		/* record our own device number in case the location
+		 * changes after a reboot
+		 */
+		j_sb->s_device = cpu_to_le32(kdev_t_to_nr(sb->s_dev));
+	} else if (state == FM_CLEAN) {
+		/*
+		 * If this volume is shared with OS/2, OS/2 will need to
+		 * recalculate DASD usage, since we don't deal with it.
+		 */
+		if (j_sb->s_flag & cpu_to_le32(JFS_DASD_ENABLED))
+			j_sb->s_flag |= cpu_to_le32(JFS_DASD_PRIME);
+	}
+
+	flush_metapage(mp);
+
+	return 0;
+}
+
+
+/*
+ *	readSuper()
+ *
+ * read superblock by raw sector address
+ */
+int readSuper(struct super_block *sb, metapage_t ** mpp)
+{
+	/* read in primary superblock */
+	*mpp = read_metapage(JFS_SBI(sb)->direct_inode,
+			     SUPER1_OFF >> sb->s_blocksize_bits, PSIZE, 1);
+	if (*mpp == NULL) {
+		/* read in secondary/replicated superblock */
+		*mpp = read_metapage(JFS_SBI(sb)->direct_inode,
+				     SUPER2_OFF >> sb->s_blocksize_bits,
+				     PSIZE, 1);
+	}
+	return *mpp ? 0 : 1;
+}
+
+
+/*
+ *	logMOUNT()
+ *
+ * function: write a MOUNT log record for file system.
+ *
+ * MOUNT record keeps logredo() from processing log records
+ * for this file system past this point in log.
+ * it is harmless if mount fails.
+ *
+ * note: MOUNT record is at aggregate level, not at fileset level, 
+ * since log records of previous mounts of a fileset
+ * (e.g., AFTER record of extent allocation) have to be processed 
+ * to update block allocation map at aggregate level.
+ */
+static int logMOUNT(struct super_block *sb)
+{
+	log_t *log = JFS_SBI(sb)->log;
+	lrd_t lrd;
+
+	lrd.logtid = 0;
+	lrd.backchain = 0;
+	lrd.type = cpu_to_le16(LOG_MOUNT);
+	lrd.length = 0;
+	lrd.aggregate = cpu_to_le32(kdev_t_to_nr(sb->s_dev));
+	lmLog(log, NULL, &lrd, NULL);
+
+	return 0;
+}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_superblock.h linuxppc64_2_4/fs/jfs/jfs_superblock.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_superblock.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_superblock.h	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,115 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#ifndef	_H_JFS_SUPERBLOCK
+#define _H_JFS_SUPERBLOCK
+/*
+ *	jfs_superblock.h
+ */
+
+/*
+ * make the magic number something a human could read
+ */
+#define JFS_MAGIC 	"JFS1"	/* Magic word */
+
+#define JFS_VERSION	2	/* Version number: Version 2 */
+
+#define LV_NAME_SIZE	11	/* MUST BE 11 for OS/2 boot sector */
+
+/* 
+ *	aggregate superblock 
+ *
+ * The name superblock is too close to super_block, so the name has been
+ * changed to jfs_superblock.  The utilities are still using the old name.
+ */
+struct jfs_superblock {
+	char s_magic[4];	/* 4: magic number */
+	u32 s_version;		/* 4: version number */
+
+	s64 s_size;		/* 8: aggregate size in hardware/LVM blocks;
+				 * VFS: number of blocks
+				 */
+	s32 s_bsize;		/* 4: aggregate block size in bytes; 
+				 * VFS: fragment size
+				 */
+	s16 s_l2bsize;		/* 2: log2 of s_bsize */
+	s16 s_l2bfactor;	/* 2: log2(s_bsize/hardware block size) */
+	s32 s_pbsize;		/* 4: hardware/LVM block size in bytes */
+	s16 s_l2pbsize;		/* 2: log2 of s_pbsize */
+	s16 pad;		/* 2: padding necessary for alignment */
+
+	u32 s_agsize;		/* 4: allocation group size in aggr. blocks */
+
+	u32 s_flag;		/* 4: aggregate attributes:
+				 *    see jfs_filsys.h
+				 */
+	u32 s_state;		/* 4: mount/unmount/recovery state: 
+				 *    see jfs_filsys.h
+				 */
+	s32 s_compress;		/* 4: > 0 if data compression */
+
+	pxd_t s_ait2;		/* 8: first extent of secondary
+				 *    aggregate inode table
+				 */
+
+	pxd_t s_aim2;		/* 8: first extent of secondary
+				 *    aggregate inode map
+				 */
+	u32 s_logdev;		/* 4: device address of log */
+	s32 s_logserial;	/* 4: log serial number at aggregate mount */
+	pxd_t s_logpxd;		/* 8: inline log extent */
+
+	pxd_t s_fsckpxd;	/* 8: inline fsck work space extent */
+
+	struct timestruc_t s_time;	/* 8: time last updated */
+
+	s32 s_fsckloglen;	/* 4: Number of filesystem blocks reserved for
+				 *    the fsck service log.  
+				 *    N.B. These blocks are divided among the
+				 *         versions kept.  This is not a per
+				 *         version size.
+				 *    N.B. These blocks are included in the 
+				 *         length field of s_fsckpxd.
+				 */
+	s8 s_fscklog;		/* 1: which fsck service log is most recent
+				 *    0 => no service log data yet
+				 *    1 => the first one
+				 *    2 => the 2nd one
+				 */
+	char s_fpack[11];	/* 11: file system volume name 
+				 *     N.B. This must be 11 bytes to
+				 *          conform with the OS/2 BootSector
+				 *          requirements
+				 */
+
+	/* extendfs() parameter under s_state & FM_EXTENDFS */
+	s64 s_xsize;		/* 8: extendfs s_size */
+	pxd_t s_xfsckpxd;	/* 8: extendfs fsckpxd */
+	pxd_t s_xlogpxd;	/* 8: extendfs logpxd */
+	/* - 128 byte boundary - */
+
+	u32 s_device;		/* Store device in case location changes
+				 * between reboots
+				 */
+
+};
+
+extern int readSuper(struct super_block *, struct metapage **);
+extern int updateSuper(struct super_block *, uint);
+
+#endif /*_H_JFS_SUPERBLOCK */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_txnmgr.c linuxppc64_2_4/fs/jfs/jfs_txnmgr.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_txnmgr.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_txnmgr.c	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,3019 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ *      jfs_txnmgr.c: transaction manager
+ *
+ * notes:
+ * transaction starts with txBegin() and ends with txCommit()
+ * or txAbort().
+ *
+ * tlock is acquired at the time of update;
+ * (obviate scan at commit time for xtree and dtree)
+ * tlock and mp points to each other;
+ * (no hashlist for mp -> tlock).
+ *
+ * special cases:
+ * tlock on in-memory inode:
+ * in-place tlock in the in-memory inode itself;
+ * converted to page lock by iWrite() at commit time.
+ *
+ * tlock during write()/mmap() under anonymous transaction (tid = 0):
+ * transferred (?) to transaction at commit time.
+ *
+ * use the page itself to update allocation maps
+ * (obviate intermediate replication of allocation/deallocation data)
+ * hold on to mp+lock thru update of maps
+ */
+
+
+#include <linux/fs.h>
+#include <linux/locks.h>
+#include <linux/vmalloc.h>
+#include <linux/smp_lock.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_metapage.h"
+#include "jfs_dinode.h"
+#include "jfs_imap.h"
+#include "jfs_dmap.h"
+#include "jfs_superblock.h"
+#include "jfs_debug.h"
+
+/*
+ *      transaction management structures
+ */
+static struct {
+	/* tblock */
+	int freetid;		/* 4: index of a free tid structure */
+	wait_queue_head_t freewait;	/* 4: eventlist of free tblock */
+
+	/* tlock */
+	int freelock;		/* 4: index first free lock word */
+	wait_queue_head_t freelockwait;	/* 4: eventlist of free tlock */
+	wait_queue_head_t lowlockwait;	/* 4: eventlist of ample tlocks */
+	int tlocksInUse;	/* 4: Number of tlocks in use */
+	spinlock_t LazyLock;	/* 4: synchronize sync_queue & unlock_queue */
+/*	tblock_t *sync_queue;	 * 4: Transactions waiting for data sync */
+	tblock_t *unlock_queue;	/* 4: Transactions waiting to be released */
+	tblock_t *unlock_tail;	/* 4: Tail of unlock_queue */
+	struct list_head anon_list;	/* inodes having anonymous txns */
+	struct list_head anon_list2;	/* inodes having anonymous txns
+					   that couldn't be sync'ed */
+} TxAnchor;
+
+static int nTxBlock = 512;	/* number of transaction blocks */
+struct tblock *TxBlock;	        /* transaction block table */
+
+static int nTxLock = 2048;	/* number of transaction locks */
+static int TxLockLWM = 2048*.4;	/* Low water mark for number of txLocks used */
+static int TxLockHWM = 2048*.8;	/* High water mark for number of txLocks used */
+struct tlock *TxLock;           /* transaction lock table */
+static int TlocksLow = 0;	/* Indicates low number of available tlocks */
+
+
+/*
+ *      transaction management lock
+ */
+static spinlock_t jfsTxnLock = SPIN_LOCK_UNLOCKED;
+
+#define TXN_LOCK()              spin_lock(&jfsTxnLock)
+#define TXN_UNLOCK()            spin_unlock(&jfsTxnLock)
+
+#define LAZY_LOCK_INIT()	spin_lock_init(&TxAnchor.LazyLock);
+#define LAZY_LOCK(flags)	spin_lock_irqsave(&TxAnchor.LazyLock, flags)
+#define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags)
+
+/*
+ * Retry logic exist outside these macros to protect from spurrious wakeups.
+ */
+static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(event, &wait);
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	TXN_UNLOCK();
+	schedule();
+	current->state = TASK_RUNNING;
+	remove_wait_queue(event, &wait);
+}
+
+#define TXN_SLEEP(event)\
+{\
+	TXN_SLEEP_DROP_LOCK(event);\
+	TXN_LOCK();\
+}
+
+#define TXN_WAKEUP(event) wake_up_all(event)
+
+
+/*
+ *      statistics
+ */
+struct {
+	tid_t maxtid;		/* 4: biggest tid ever used */
+	lid_t maxlid;		/* 4: biggest lid ever used */
+	int ntid;		/* 4: # of transactions performed */
+	int nlid;		/* 4: # of tlocks acquired */
+	int waitlock;		/* 4: # of tlock wait */
+} stattx;
+
+
+/*
+ * external references
+ */
+extern int lmGroupCommit(log_t * log, tblock_t * tblk);
+extern void lmSync(log_t *);
+extern int readSuper(struct super_block *sb, metapage_t ** bpp);
+extern int jfs_commit_inode(struct inode *, int);
+extern int jfs_thread_stopped(void);
+
+extern struct task_struct *jfsCommitTask;
+extern struct completion jfsIOwait;
+extern struct task_struct *jfsSyncTask;
+
+/*
+ * forward references
+ */
+int diLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck,
+	  commit_t * cd);
+int dataLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck);
+void dtLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck);
+void inlineLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck);
+void mapLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck);
+void txAbortCommit(commit_t * cd, int exval);
+static void txAllocPMap(struct inode *ip, maplock_t * maplock,
+			tblock_t * tblk);
+void txForce(tblock_t * tblk);
+static int txLog(log_t * log, tblock_t * tblk, commit_t * cd);
+int txMoreLock(void);
+static void txUpdateMap(tblock_t * tblk);
+static void txRelease(tblock_t * tblk);
+void xtLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck);
+static void LogSyncRelease(metapage_t * mp);
+
+/*
+ *              transaction block/lock management
+ *              ---------------------------------
+ */
+
+/*
+ * Get a transaction lock from the free list.  If the number in use is
+ * greater than the high water mark, wake up the sync daemon.  This should
+ * free some anonymous transaction locks.  (TXN_LOCK must be held.)
+ */
+static lid_t txLockAlloc(void)
+{
+	lid_t lid;
+
+	while (!(lid = TxAnchor.freelock))
+		TXN_SLEEP(&TxAnchor.freelockwait);
+	TxAnchor.freelock = TxLock[lid].next;
+	HIGHWATERMARK(stattx.maxlid, lid);
+	if ((++TxAnchor.tlocksInUse > TxLockHWM) && (TlocksLow == 0)) {
+		jEVENT(0,("txLockAlloc TlocksLow\n"));
+		TlocksLow = 1;
+	wake_up_process(jfsSyncTask);
+	}
+
+	return lid;
+}
+
+static void txLockFree(lid_t lid)
+{
+	TxLock[lid].next = TxAnchor.freelock;
+	TxAnchor.freelock = lid;
+	TxAnchor.tlocksInUse--;
+	if (TlocksLow && (TxAnchor.tlocksInUse < TxLockLWM)) {
+		jEVENT(0,("txLockFree TlocksLow no more\n"));
+		TlocksLow = 0;
+		TXN_WAKEUP(&TxAnchor.lowlockwait);
+	}
+	TXN_WAKEUP(&TxAnchor.freelockwait);
+}
+
+/*
+ * NAME:        txInit()
+ *
+ * FUNCTION:    initialize transaction management structures
+ *
+ * RETURN:
+ *
+ * serialization: single thread at jfs_init()
+ */
+int txInit(void)
+{
+	int k, size;
+
+	/*
+	 * initialize transaction block (tblock) table
+	 *
+	 * transaction id (tid) = tblock index
+	 * tid = 0 is reserved.
+	 */
+	size = sizeof(tblock_t) * nTxBlock;
+	TxBlock = (tblock_t *) vmalloc(size);
+	if (TxBlock == NULL)
+		return ENOMEM;
+
+	for (k = 1; k < nTxBlock - 1; k++) {
+		TxBlock[k].next = k + 1;
+		init_waitqueue_head(&TxBlock[k].gcwait);
+		init_waitqueue_head(&TxBlock[k].waitor);
+	}
+	TxBlock[k].next = 0;
+	init_waitqueue_head(&TxBlock[k].gcwait);
+	init_waitqueue_head(&TxBlock[k].waitor);
+
+	TxAnchor.freetid = 1;
+	init_waitqueue_head(&TxAnchor.freewait);
+
+	stattx.maxtid = 1;	/* statistics */
+
+	/*
+	 * initialize transaction lock (tlock) table
+	 *
+	 * transaction lock id = tlock index
+	 * tlock id = 0 is reserved.
+	 */
+	size = sizeof(tlock_t) * nTxLock;
+	TxLock = (tlock_t *) vmalloc(size);
+	if (TxLock == NULL) {
+		vfree(TxBlock);
+		return ENOMEM;
+	}
+
+	/* initialize tlock table */
+	for (k = 1; k < nTxLock - 1; k++)
+		TxLock[k].next = k + 1;
+	TxLock[k].next = 0;
+	init_waitqueue_head(&TxAnchor.freelockwait);
+	init_waitqueue_head(&TxAnchor.lowlockwait);
+
+	TxAnchor.freelock = 1;
+	TxAnchor.tlocksInUse = 0;
+	INIT_LIST_HEAD(&TxAnchor.anon_list);
+	INIT_LIST_HEAD(&TxAnchor.anon_list2);
+
+	stattx.maxlid = 1;	/* statistics */
+
+	return 0;
+}
+
+/*
+ * NAME:        txExit()
+ *
+ * FUNCTION:    clean up when module is unloaded
+ */
+void txExit(void)
+{
+	vfree(TxLock);
+	TxLock = 0;
+	vfree(TxBlock);
+	TxBlock = 0;
+}
+
+
+/*
+ * NAME:        txBegin()
+ *
+ * FUNCTION:    start a transaction.
+ *
+ * PARAMETER:   sb	- superblock
+ *              flag	- force for nested tx;
+ *
+ * RETURN:	tid	- transaction id
+ *
+ * note: flag force allows to start tx for nested tx
+ * to prevent deadlock on logsync barrier;
+ */
+tid_t txBegin(struct super_block *sb, int flag)
+{
+	tid_t t;
+	tblock_t *tblk;
+	log_t *log;
+
+	jFYI(1, ("txBegin: flag = 0x%x\n", flag));
+	log = (log_t *) JFS_SBI(sb)->log;
+
+	TXN_LOCK();
+
+      retry:
+	if (flag != COMMIT_FORCE) {
+		/*
+		 * synchronize with logsync barrier
+		 */
+		if (log->syncbarrier) {
+			TXN_SLEEP(&log->syncwait);
+			goto retry;
+		}
+	}
+	if (flag == 0) {
+		/*
+		 * Don't begin transaction if we're getting starved for tlocks
+		 * unless COMMIT_FORCE (imap changes) or COMMIT_INODE (which
+		 * may ultimately free tlocks)
+		 */
+		if (TlocksLow) {
+			TXN_SLEEP(&TxAnchor.lowlockwait);
+			goto retry;
+		}
+	}
+
+	/*
+	 * allocate transaction id/block
+	 */
+	if ((t = TxAnchor.freetid) == 0) {
+		jFYI(1, ("txBegin: waiting for free tid\n"));
+		TXN_SLEEP(&TxAnchor.freewait);
+		goto retry;
+	}
+
+	tblk = tid_to_tblock(t);
+
+	if ((tblk->next == 0) && (current != jfsCommitTask)) {
+		/* Save one tblk for jfsCommit thread */
+		jFYI(1, ("txBegin: waiting for free tid\n"));
+		TXN_SLEEP(&TxAnchor.freewait);
+		goto retry;
+	}
+
+	TxAnchor.freetid = tblk->next;
+
+	/*
+	 * initialize transaction
+	 */
+
+	/*
+	 * We can't zero the whole thing or we screw up another thread being
+	 * awakened after sleeping on tblk->waitor
+	 *
+	 * memset(tblk, 0, sizeof(tblock_t));
+	 */
+	tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0;
+
+	tblk->sb = sb;
+	++log->logtid;
+	tblk->logtid = log->logtid;
+
+	++log->active;
+
+	HIGHWATERMARK(stattx.maxtid, t);	/* statistics */
+	INCREMENT(stattx.ntid);	/* statistics */
+
+	TXN_UNLOCK();
+
+	jFYI(1, ("txBegin: returning tid = %d\n", t));
+
+	return t;
+}
+
+
+/*
+ * NAME:        txBeginAnon()
+ *
+ * FUNCTION:    start an anonymous transaction.
+ *		Blocks if logsync or available tlocks are low to prevent
+ *		anonymous tlocks from depleting supply.
+ *
+ * PARAMETER:   sb	- superblock
+ *
+ * RETURN:	none
+ */
+void txBeginAnon(struct super_block *sb)
+{
+	log_t *log;
+
+	log = (log_t *) JFS_SBI(sb)->log;
+
+	TXN_LOCK();
+
+      retry:
+	/*
+	 * synchronize with logsync barrier
+	 */
+	if (log->syncbarrier) {
+		TXN_SLEEP(&log->syncwait);
+		goto retry;
+	}
+
+	/*
+	 * Don't begin transaction if we're getting starved for tlocks
+	 */
+	if (TlocksLow) {
+		TXN_SLEEP(&TxAnchor.lowlockwait);
+		goto retry;
+	}
+	TXN_UNLOCK();
+}
+
+
+/*
+ *      txEnd()
+ *
+ * function: free specified transaction block.
+ *
+ *      logsync barrier processing:
+ *
+ * serialization:
+ */
+void txEnd(tid_t tid)
+{
+	tblock_t *tblk = tid_to_tblock(tid);
+	log_t *log;
+
+	jFYI(1, ("txEnd: tid = %d\n", tid));
+	TXN_LOCK();
+
+	/*
+	 * wakeup transactions waiting on the page locked
+	 * by the current transaction
+	 */
+	TXN_WAKEUP(&tblk->waitor);
+
+	log = (log_t *) JFS_SBI(tblk->sb)->log;
+
+	/*
+	 * Lazy commit thread can't free this guy until we mark it UNLOCKED,
+	 * otherwise, we would be left with a transaction that may have been
+	 * reused.
+	 *
+	 * Lazy commit thread will turn off tblkGC_LAZY before calling this
+	 * routine.
+	 */
+	if (tblk->flag & tblkGC_LAZY) {
+		jFYI(1,
+		     ("txEnd called w/lazy tid: %d, tblk = 0x%p\n",
+		      tid, tblk));
+		TXN_UNLOCK();
+
+		spin_lock_irq(&log->gclock);	// LOGGC_LOCK
+		tblk->flag |= tblkGC_UNLOCKED;
+		spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
+		return;
+	}
+
+	jFYI(1, ("txEnd: tid: %d, tblk = 0x%p\n", tid, tblk));
+
+	assert(tblk->next == 0);
+
+	/*
+	 * insert tblock back on freelist
+	 */
+	tblk->next = TxAnchor.freetid;
+	TxAnchor.freetid = tid;
+
+	/*
+	 * mark the tblock not active
+	 */
+	--log->active;
+
+	/*
+	 * synchronize with logsync barrier
+	 */
+	if (log->syncbarrier && log->active == 0) {
+		/* forward log syncpt */
+		/* lmSync(log); */
+
+		jFYI(1, ("     log barrier off: 0x%x\n", log->lsn));
+
+		/* enable new transactions start */
+		log->syncbarrier = 0;
+
+		/* wakeup all waitors for logsync barrier */
+		TXN_WAKEUP(&log->syncwait);
+	}
+
+	/*
+	 * wakeup all waitors for a free tblock
+	 */
+	TXN_WAKEUP(&TxAnchor.freewait);
+
+	TXN_UNLOCK();
+	jFYI(1, ("txEnd: exitting\n"));
+}
+
+
+/*
+ *      txLock()
+ *
+ * function: acquire a transaction lock on the specified <mp>
+ *
+ * parameter:
+ *
+ * return:      transaction lock id
+ *
+ * serialization:
+ */
+tlock_t *txLock(tid_t tid, struct inode *ip, metapage_t * mp, int type)
+{
+	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+	int dir_xtree = 0;
+	lid_t lid;
+	tid_t xtid;
+	tlock_t *tlck;
+	xtlock_t *xtlck;
+	linelock_t *linelock;
+	xtpage_t *p;
+	tblock_t *tblk;
+
+	TXN_LOCK();
+
+	if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) &&
+	    !(mp->xflag & COMMIT_PAGE)) {
+		/*
+		 * Directory inode is special.  It can have both an xtree tlock
+		 * and a dtree tlock associated with it.
+		 */
+		dir_xtree = 1;
+		lid = jfs_ip->xtlid;
+	} else
+		lid = mp->lid;
+
+	/* is page not locked by a transaction ? */
+	if (lid == 0)
+		goto allocateLock;
+
+	jFYI(1, ("txLock: tid:%d ip:0x%p mp:0x%p lid:%d\n",
+		 tid, ip, mp, lid));
+
+	/* is page locked by the requester transaction ? */
+	tlck = lid_to_tlock(lid);
+	if ((xtid = tlck->tid) == tid)
+		goto grantLock;
+
+	/*
+	 * is page locked by anonymous transaction/lock ?
+	 *
+	 * (page update without transaction (i.e., file write) is
+	 * locked under anonymous transaction tid = 0:
+	 * anonymous tlocks maintained on anonymous tlock list of
+	 * the inode of the page and available to all anonymous
+	 * transactions until txCommit() time at which point
+	 * they are transferred to the transaction tlock list of
+	 * the commiting transaction of the inode)
+	 */
+	if (xtid == 0) {
+		tlck->tid = tid;
+		tblk = tid_to_tblock(tid);
+		/*
+		 * The order of the tlocks in the transaction is important
+		 * (during truncate, child xtree pages must be freed before
+		 * parent's tlocks change the working map).
+		 * Take tlock off anonymous list and add to tail of
+		 * transaction list
+		 *
+		 * Note:  We really need to get rid of the tid & lid and
+		 * use list_head's.  This code is getting UGLY!
+		 */
+		if (jfs_ip->atlhead == lid) {
+			if (jfs_ip->atltail == lid) {
+				/* only anonymous txn.
+				 * Remove from anon_list
+				 */
+				list_del_init(&jfs_ip->anon_inode_list);
+			}
+			jfs_ip->atlhead = tlck->next;
+		} else {
+			lid_t last;
+			for (last = jfs_ip->atlhead;
+			     lid_to_tlock(last)->next != lid;
+			     last = lid_to_tlock(last)->next) {
+				assert(last);
+			}
+			lid_to_tlock(last)->next = tlck->next;
+			if (jfs_ip->atltail == lid)
+				jfs_ip->atltail = last;
+		}
+
+		/* insert the tlock at tail of transaction tlock list */
+
+		if (tblk->next)
+			lid_to_tlock(tblk->last)->next = lid;
+		else
+			tblk->next = lid;
+		tlck->next = 0;
+		tblk->last = lid;
+
+		goto grantLock;
+	}
+
+	goto waitLock;
+
+	/*
+	 * allocate a tlock
+	 */
+      allocateLock:
+	lid = txLockAlloc();
+	tlck = lid_to_tlock(lid);
+
+	/*
+	 * initialize tlock
+	 */
+	tlck->tid = tid;
+
+	/* mark tlock for meta-data page */
+	if (mp->xflag & COMMIT_PAGE) {
+
+		tlck->flag = tlckPAGELOCK;
+
+		/* mark the page dirty and nohomeok */
+		mark_metapage_dirty(mp);
+		atomic_inc(&mp->nohomeok);
+
+		jFYI(1,
+		     ("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p\n",
+		      mp, atomic_read(&mp->nohomeok), tid, tlck));
+
+		/* if anonymous transaction, and buffer is on the group
+		 * commit synclist, mark inode to show this.  This will
+		 * prevent the buffer from being marked nohomeok for too
+		 * long a time.
+		 */
+		if ((tid == 0) && mp->lsn)
+			set_cflag(COMMIT_Synclist, ip);
+	}
+	/* mark tlock for in-memory inode */
+	else
+		tlck->flag = tlckINODELOCK;
+
+	tlck->type = 0;
+
+	/* bind the tlock and the page */
+	tlck->ip = ip;
+	tlck->mp = mp;
+	if (dir_xtree)
+		jfs_ip->xtlid = lid;
+	else
+		mp->lid = lid;
+
+	/*
+	 * enqueue transaction lock to transaction/inode
+	 */
+	/* insert the tlock at tail of transaction tlock list */
+	if (tid) {
+		tblk = tid_to_tblock(tid);
+		if (tblk->next)
+			lid_to_tlock(tblk->last)->next = lid;
+		else
+			tblk->next = lid;
+		tlck->next = 0;
+		tblk->last = lid;
+	}
+	/* anonymous transaction:
+	 * insert the tlock at head of inode anonymous tlock list
+	 */
+	else {
+		tlck->next = jfs_ip->atlhead;
+		jfs_ip->atlhead = lid;
+		if (tlck->next == 0) {
+			/* This inode's first anonymous transaction */
+			jfs_ip->atltail = lid;
+			list_add_tail(&jfs_ip->anon_inode_list,
+				      &TxAnchor.anon_list);
+		}
+	}
+
+	/* initialize type dependent area for linelock */
+	linelock = (linelock_t *) & tlck->lock;
+	linelock->next = 0;
+	linelock->flag = tlckLINELOCK;
+	linelock->maxcnt = TLOCKSHORT;
+	linelock->index = 0;
+
+	switch (type & tlckTYPE) {
+	case tlckDTREE:
+		linelock->l2linesize = L2DTSLOTSIZE;
+		break;
+
+	case tlckXTREE:
+		linelock->l2linesize = L2XTSLOTSIZE;
+
+		xtlck = (xtlock_t *) linelock;
+		xtlck->header.offset = 0;
+		xtlck->header.length = 2;
+
+		if (type & tlckNEW) {
+			xtlck->lwm.offset = XTENTRYSTART;
+		} else {
+			if (mp->xflag & COMMIT_PAGE)
+				p = (xtpage_t *) mp->data;
+			else
+				p = &jfs_ip->i_xtroot;
+			xtlck->lwm.offset =
+			    le16_to_cpu(p->header.nextindex);
+		}
+		xtlck->lwm.length = 0;	/* ! */
+
+		xtlck->index = 2;
+		break;
+
+	case tlckINODE:
+		linelock->l2linesize = L2INODESLOTSIZE;
+		break;
+
+	case tlckDATA:
+		linelock->l2linesize = L2DATASLOTSIZE;
+		break;
+
+	default:
+		jERROR(1, ("UFO tlock:0x%p\n", tlck));
+	}
+
+	/*
+	 * update tlock vector
+	 */
+      grantLock:
+	tlck->type |= type;
+
+	TXN_UNLOCK();
+
+	return tlck;
+
+	/*
+	 * page is being locked by another transaction:
+	 */
+      waitLock:
+	/* Only locks on ipimap or ipaimap should reach here */
+	/* assert(jfs_ip->fileset == AGGREGATE_I); */
+	if (jfs_ip->fileset != AGGREGATE_I) {
+		jERROR(1, ("txLock: trying to lock locked page!\n"));
+		dump_mem("ip", ip, sizeof(struct inode));
+		dump_mem("mp", mp, sizeof(metapage_t));
+		dump_mem("Locker's tblk", tid_to_tblock(tid),
+			 sizeof(tblock_t));
+		dump_mem("Tlock", tlck, sizeof(tlock_t));
+		BUG();
+	}
+	INCREMENT(stattx.waitlock);	/* statistics */
+	release_metapage(mp);
+
+	jEVENT(0, ("txLock: in waitLock, tid = %d, xtid = %d, lid = %d\n",
+		   tid, xtid, lid));
+	TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
+	jEVENT(0, ("txLock: awakened     tid = %d, lid = %d\n", tid, lid));
+
+	return NULL;
+}
+
+
+/*
+ * NAME:        txRelease()
+ *
+ * FUNCTION:    Release buffers associated with transaction locks, but don't
+ *		mark homeok yet.  The allows other transactions to modify
+ *		buffers, but won't let them go to disk until commit record
+ *		actually gets written.
+ *
+ * PARAMETER:
+ *              tblk    -
+ *
+ * RETURN:      Errors from subroutines.
+ */
+static void txRelease(tblock_t * tblk)
+{
+	metapage_t *mp;
+	lid_t lid;
+	tlock_t *tlck;
+
+	TXN_LOCK();
+
+	for (lid = tblk->next; lid; lid = tlck->next) {
+		tlck = lid_to_tlock(lid);
+		if ((mp = tlck->mp) != NULL &&
+		    (tlck->type & tlckBTROOT) == 0) {
+			assert(mp->xflag & COMMIT_PAGE);
+			mp->lid = 0;
+		}
+	}
+
+	/*
+	 * wakeup transactions waiting on a page locked
+	 * by the current transaction
+	 */
+	TXN_WAKEUP(&tblk->waitor);
+
+	TXN_UNLOCK();
+}
+
+
+/*
+ * NAME:        txUnlock()
+ *
+ * FUNCTION:    Initiates pageout of pages modified by tid in journalled
+ *              objects and frees their lockwords.
+ *
+ * PARAMETER:
+ *              flag    -
+ *
+ * RETURN:      Errors from subroutines.
+ */
+static void txUnlock(tblock_t * tblk, int flag)
+{
+	tlock_t *tlck;
+	linelock_t *linelock;
+	lid_t lid, next, llid, k;
+	metapage_t *mp;
+	log_t *log;
+	int force;
+	int difft, diffp;
+
+	jFYI(1, ("txUnlock: tblk = 0x%p\n", tblk));
+	log = (log_t *) JFS_SBI(tblk->sb)->log;
+	force = flag & COMMIT_FLUSH;
+	if (log->syncbarrier)
+		force |= COMMIT_FORCE;
+
+	/*
+	 * mark page under tlock homeok (its log has been written):
+	 * if caller has specified FORCE (e.g., iRecycle()), or
+	 * if syncwait for the log is set (i.e., the log sync point
+	 * has fallen behind), or
+	 * if syncpt is set for the page, or
+	 * if the page is new, initiate pageout;
+	 * otherwise, leave the page in memory.
+	 */
+	for (lid = tblk->next; lid; lid = next) {
+		tlck = lid_to_tlock(lid);
+		next = tlck->next;
+
+		jFYI(1, ("unlocking lid = %d, tlck = 0x%p\n", lid, tlck));
+
+		/* unbind page from tlock */
+		if ((mp = tlck->mp) != NULL &&
+		    (tlck->type & tlckBTROOT) == 0) {
+			assert(mp->xflag & COMMIT_PAGE);
+
+			/* hold buffer
+			 *
+			 * It's possible that someone else has the metapage.
+			 * The only things were changing are nohomeok, which
+			 * is handled atomically, and clsn which is protected
+			 * by the LOGSYNC_LOCK.
+			 */
+			hold_metapage(mp, 1);
+
+			assert(atomic_read(&mp->nohomeok) > 0);
+			atomic_dec(&mp->nohomeok);
+
+			/* inherit younger/larger clsn */
+			LOGSYNC_LOCK(log);
+			if (mp->clsn) {
+				logdiff(difft, tblk->clsn, log);
+				logdiff(diffp, mp->clsn, log);
+				if (difft > diffp)
+					mp->clsn = tblk->clsn;
+			} else
+				mp->clsn = tblk->clsn;
+			LOGSYNC_UNLOCK(log);
+
+			assert(!(tlck->flag & tlckFREEPAGE));
+
+			if (tlck->flag & tlckWRITEPAGE) {
+				write_metapage(mp);
+			} else {
+				/* release page which has been forced */
+				release_metapage(mp);
+			}
+		}
+
+		/* insert tlock, and linelock(s) of the tlock if any,
+		 * at head of freelist
+		 */
+		TXN_LOCK();
+
+		llid = ((linelock_t *) & tlck->lock)->next;
+		while (llid) {
+			linelock = (linelock_t *) lid_to_tlock(llid);
+			k = linelock->next;
+			txLockFree(llid);
+			llid = k;
+		}
+		txLockFree(lid);
+
+		TXN_UNLOCK();
+	}
+	tblk->next = tblk->last = 0;
+
+	/*
+	 * remove tblock from logsynclist
+	 * (allocation map pages inherited lsn of tblk and
+	 * has been inserted in logsync list at txUpdateMap())
+	 */
+	if (tblk->lsn) {
+		LOGSYNC_LOCK(log);
+		log->count--;
+		list_del(&tblk->synclist);
+		LOGSYNC_UNLOCK(log);
+	}
+}
+
+
+/*
+ *      txMaplock()
+ *
+ * function: allocate a transaction lock for freed page/entry;
+ *      for freed page, maplock is used as xtlock/dtlock type;
+ */
+tlock_t *txMaplock(tid_t tid, struct inode *ip, int type)
+{
+	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+	lid_t lid;
+	tblock_t *tblk;
+	tlock_t *tlck;
+	maplock_t *maplock;
+
+	TXN_LOCK();
+
+	/*
+	 * allocate a tlock
+	 */
+	lid = txLockAlloc();
+	tlck = lid_to_tlock(lid);
+
+	/*
+	 * initialize tlock
+	 */
+	tlck->tid = tid;
+
+	/* bind the tlock and the object */
+	tlck->flag = tlckINODELOCK;
+	tlck->ip = ip;
+	tlck->mp = NULL;
+
+	tlck->type = type;
+
+	/*
+	 * enqueue transaction lock to transaction/inode
+	 */
+	/* insert the tlock at tail of transaction tlock list */
+	if (tid) {
+		tblk = tid_to_tblock(tid);
+		if (tblk->next)
+			lid_to_tlock(tblk->last)->next = lid;
+		else
+			tblk->next = lid;
+		tlck->next = 0;
+		tblk->last = lid;
+	}
+	/* anonymous transaction:
+	 * insert the tlock at head of inode anonymous tlock list
+	 */
+	else {
+		tlck->next = jfs_ip->atlhead;
+		jfs_ip->atlhead = lid;
+		if (tlck->next == 0) {
+			/* This inode's first anonymous transaction */
+			jfs_ip->atltail = lid;
+			list_add_tail(&jfs_ip->anon_inode_list,
+				      &TxAnchor.anon_list);
+		}
+	}
+
+	TXN_UNLOCK();
+
+	/* initialize type dependent area for maplock */
+	maplock = (maplock_t *) & tlck->lock;
+	maplock->next = 0;
+	maplock->maxcnt = 0;
+	maplock->index = 0;
+
+	return tlck;
+}
+
+
+/*
+ *      txLinelock()
+ *
+ * function: allocate a transaction lock for log vector list
+ */
+linelock_t *txLinelock(linelock_t * tlock)
+{
+	lid_t lid;
+	tlock_t *tlck;
+	linelock_t *linelock;
+
+	TXN_LOCK();
+
+	/* allocate a TxLock structure */
+	lid = txLockAlloc();
+	tlck = lid_to_tlock(lid);
+
+	TXN_UNLOCK();
+
+	/* initialize linelock */
+	linelock = (linelock_t *) tlck;
+	linelock->next = 0;
+	linelock->flag = tlckLINELOCK;
+	linelock->maxcnt = TLOCKLONG;
+	linelock->index = 0;
+
+	/* append linelock after tlock */
+	linelock->next = tlock->next;
+	tlock->next = lid;
+
+	return linelock;
+}
+
+
+
+/*
+ *              transaction commit management
+ *              -----------------------------
+ */
+
+/*
+ * NAME:        txCommit()
+ *
+ * FUNCTION:    commit the changes to the objects specified in
+ *              clist.  For journalled segments only the
+ *              changes of the caller are committed, ie by tid.
+ *              for non-journalled segments the data are flushed to
+ *              disk and then the change to the disk inode and indirect
+ *              blocks committed (so blocks newly allocated to the
+ *              segment will be made a part of the segment atomically).
+ *
+ *              all of the segments specified in clist must be in
+ *              one file system. no more than 6 segments are needed
+ *              to handle all unix svcs.
+ *
+ *              if the i_nlink field (i.e. disk inode link count)
+ *              is zero, and the type of inode is a regular file or
+ *              directory, or symbolic link , the inode is truncated
+ *              to zero length. the truncation is committed but the
+ *              VM resources are unaffected until it is closed (see
+ *              iput and iclose).
+ *
+ * PARAMETER:
+ *
+ * RETURN:
+ *
+ * serialization:
+ *              on entry the inode lock on each segment is assumed
+ *              to be held.
+ *
+ * i/o error:
+ */
+int txCommit(tid_t tid,		/* transaction identifier */
+	     int nip,		/* number of inodes to commit */
+	     struct inode **iplist,	/* list of inode to commit */
+	     int flag)
+{
+	int rc = 0, rc1 = 0;
+	commit_t cd;
+	log_t *log;
+	tblock_t *tblk;
+	lrd_t *lrd;
+	int lsn;
+	struct inode *ip;
+	struct jfs_inode_info *jfs_ip;
+	int k, n;
+	ino_t top;
+	struct super_block *sb;
+
+	jFYI(1, ("txCommit, tid = %d, flag = %d\n", tid, flag));
+	/* is read-only file system ? */
+	if (isReadOnly(iplist[0])) {
+		rc = EROFS;
+		goto TheEnd;
+	}
+
+	sb = cd.sb = iplist[0]->i_sb;
+	cd.tid = tid;
+
+	if (tid == 0)
+		tid = txBegin(sb, 0);
+	tblk = tid_to_tblock(tid);
+
+	/*
+	 * initialize commit structure
+	 */
+	log = (log_t *) JFS_SBI(sb)->log;
+	cd.log = log;
+
+	/* initialize log record descriptor in commit */
+	lrd = &cd.lrd;
+	lrd->logtid = cpu_to_le32(tblk->logtid);
+	lrd->backchain = 0;
+
+	tblk->xflag |= flag;
+
+	if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
+		tblk->xflag |= COMMIT_LAZY;
+	/*
+	 *      prepare non-journaled objects for commit
+	 *
+	 * flush data pages of non-journaled file
+	 * to prevent the file getting non-initialized disk blocks
+	 * in case of crash.
+	 * (new blocks - )
+	 */
+	cd.iplist = iplist;
+	cd.nip = nip;
+
+	/*
+	 *      acquire transaction lock on (on-disk) inodes
+	 *
+	 * update on-disk inode from in-memory inode
+	 * acquiring transaction locks for AFTER records
+	 * on the on-disk inode of file object
+	 *
+	 * sort the inodes array by inode number in descending order
+	 * to prevent deadlock when acquiring transaction lock
+	 * of on-disk inodes on multiple on-disk inode pages by
+	 * multiple concurrent transactions
+	 */
+	for (k = 0; k < cd.nip; k++) {
+		top = (cd.iplist[k])->i_ino;
+		for (n = k + 1; n < cd.nip; n++) {
+			ip = cd.iplist[n];
+			if (ip->i_ino > top) {
+				top = ip->i_ino;
+				cd.iplist[n] = cd.iplist[k];
+				cd.iplist[k] = ip;
+			}
+		}
+
+		ip = cd.iplist[k];
+		jfs_ip = JFS_IP(ip);
+
+		/*
+		 * BUGBUG - Should we call filemap_fdatasync here instead
+		 * of fsync_inode_data?
+		 * If we do, we have a deadlock condition since we may end
+		 * up recursively calling jfs_get_block with the IWRITELOCK
+		 * held.  We may be able to do away with IWRITELOCK while
+		 * committing transactions and use i_sem instead.
+		 */
+		if ((!S_ISDIR(ip->i_mode))
+		    && (tblk->flag & COMMIT_DELETE) == 0)
+			fsync_inode_data_buffers(ip);
+
+		/*
+		 * Mark inode as not dirty.  It will still be on the dirty
+		 * inode list, but we'll know not to commit it again unless
+		 * it gets marked dirty again
+		 */
+		clear_cflag(COMMIT_Dirty, ip);
+
+		/* inherit anonymous tlock(s) of inode */
+		if (jfs_ip->atlhead) {
+			lid_to_tlock(jfs_ip->atltail)->next = tblk->next;
+			tblk->next = jfs_ip->atlhead;
+			if (!tblk->last)
+				tblk->last = jfs_ip->atltail;
+			jfs_ip->atlhead = jfs_ip->atltail = 0;
+			TXN_LOCK();
+			list_del_init(&jfs_ip->anon_inode_list);
+			TXN_UNLOCK();
+		}
+
+		/*
+		 * acquire transaction lock on on-disk inode page
+		 * (become first tlock of the tblk's tlock list)
+		 */
+		if (((rc = diWrite(tid, ip))))
+			goto out;
+	}
+
+	/*
+	 *      write log records from transaction locks
+	 *
+	 * txUpdateMap() resets XAD_NEW in XAD.
+	 */
+	if ((rc = txLog(log, tblk, &cd)))
+		goto TheEnd;
+
+	/*
+	 * Ensure that inode isn't reused before
+	 * lazy commit thread finishes processing
+	 */
+	if (tblk->xflag & (COMMIT_CREATE | COMMIT_DELETE))
+		atomic_inc(&tblk->ip->i_count);
+	if (tblk->xflag & COMMIT_DELETE) {
+		ip = tblk->ip;
+		assert((ip->i_nlink == 0) && !test_cflag(COMMIT_Nolink, ip));
+		set_cflag(COMMIT_Nolink, ip);
+	}
+
+	/*
+	 *      write COMMIT log record
+	 */
+	lrd->type = cpu_to_le16(LOG_COMMIT);
+	lrd->length = 0;
+	lsn = lmLog(log, tblk, lrd, NULL);
+
+	lmGroupCommit(log, tblk);
+
+	/*
+	 *      - transaction is now committed -
+	 */
+
+	/*
+	 * force pages in careful update
+	 * (imap addressing structure update)
+	 */
+	if (flag & COMMIT_FORCE)
+		txForce(tblk);
+
+	/*
+	 *      update allocation map.
+	 *
+	 * update inode allocation map and inode:
+	 * free pager lock on memory object of inode if any.
+	 * update  block allocation map.
+	 *
+	 * txUpdateMap() resets XAD_NEW in XAD.
+	 */
+	if (tblk->xflag & COMMIT_FORCE)
+		txUpdateMap(tblk);
+
+	/*
+	 *      free transaction locks and pageout/free pages
+	 */
+	txRelease(tblk);
+
+	if ((tblk->flag & tblkGC_LAZY) == 0)
+		txUnlock(tblk, flag);
+
+
+	/*
+	 *      reset in-memory object state
+	 */
+	for (k = 0; k < cd.nip; k++) {
+		ip = cd.iplist[k];
+		jfs_ip = JFS_IP(ip);
+
+		/*
+		 * reset in-memory inode state
+		 */
+		jfs_ip->bxflag = 0;
+		jfs_ip->blid = 0;
+	}
+
+      out:
+	if (rc != 0)
+		txAbortCommit(&cd, rc);
+	else
+		rc = rc1;
+
+      TheEnd:
+	jFYI(1, ("txCommit: tid = %d, returning %d\n", tid, rc));
+	return rc;
+}
+
+
+/*
+ * NAME:        txLog()
+ *
+ * FUNCTION:    Writes AFTER log records for all lines modified
+ *              by tid for segments specified by inodes in comdata.
+ *              Code assumes only WRITELOCKS are recorded in lockwords.
+ *
+ * PARAMETERS:
+ *
+ * RETURN :
+ */
+static int txLog(log_t * log, tblock_t * tblk, commit_t * cd)
+{
+	int rc = 0;
+	struct inode *ip;
+	lid_t lid;
+	tlock_t *tlck;
+	lrd_t *lrd = &cd->lrd;
+
+	/*
+	 * write log record(s) for each tlock of transaction,
+	 */
+	for (lid = tblk->next; lid; lid = tlck->next) {
+		tlck = lid_to_tlock(lid);
+
+		tlck->flag |= tlckLOG;
+
+		/* initialize lrd common */
+		ip = tlck->ip;
+		lrd->aggregate = cpu_to_le32(kdev_t_to_nr(ip->i_dev));
+		lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset);
+		lrd->log.redopage.inode = cpu_to_le32(ip->i_ino);
+
+		if (tlck->mp)
+			hold_metapage(tlck->mp, 0);
+
+		/* write log record of page from the tlock */
+		switch (tlck->type & tlckTYPE) {
+		case tlckXTREE:
+			xtLog(log, tblk, lrd, tlck);
+			break;
+
+		case tlckDTREE:
+			dtLog(log, tblk, lrd, tlck);
+			break;
+
+		case tlckINODE:
+			diLog(log, tblk, lrd, tlck, cd);
+			break;
+
+		case tlckMAP:
+			mapLog(log, tblk, lrd, tlck);
+			break;
+
+		case tlckDATA:
+			dataLog(log, tblk, lrd, tlck);
+			break;
+
+		default:
+			jERROR(1, ("UFO tlock:0x%p\n", tlck));
+		}
+		if (tlck->mp)
+			release_metapage(tlck->mp);
+	}
+
+	return rc;
+}
+
+
+/*
+ *      diLog()
+ *
+ * function:    log inode tlock and format maplock to update bmap;
+ */
+int diLog(log_t * log,
+	  tblock_t * tblk, lrd_t * lrd, tlock_t * tlck, commit_t * cd)
+{
+	int rc = 0;
+	metapage_t *mp;
+	pxd_t *pxd;
+	pxdlock_t *pxdlock;
+
+	mp = tlck->mp;
+
+	/* initialize as REDOPAGE record format */
+	lrd->log.redopage.type = cpu_to_le16(LOG_INODE);
+	lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE);
+
+	pxd = &lrd->log.redopage.pxd;
+
+	/*
+	 *      inode after image
+	 */
+	if (tlck->type & tlckENTRY) {
+		/* log after-image for logredo(): */
+		lrd->type = cpu_to_le16(LOG_REDOPAGE);
+//              *pxd = mp->cm_pxd;
+		PXDaddress(pxd, mp->index);
+		PXDlength(pxd,
+			  mp->logical_size >> tblk->sb->s_blocksize_bits);
+		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
+
+		/* mark page as homeward bound */
+		tlck->flag |= tlckWRITEPAGE;
+	} else if (tlck->type & tlckFREE) {
+		/*
+		 *      free inode extent
+		 *
+		 * (pages of the freed inode extent have been invalidated and
+		 * a maplock for free of the extent has been formatted at
+		 * txLock() time);
+		 *
+		 * the tlock had been acquired on the inode allocation map page
+		 * (iag) that specifies the freed extent, even though the map
+		 * page is not itself logged, to prevent pageout of the map
+		 * page before the log;
+		 */
+		assert(tlck->type & tlckFREE);
+
+		/* log LOG_NOREDOINOEXT of the freed inode extent for
+		 * logredo() to start NoRedoPage filters, and to update
+		 * imap and bmap for free of the extent;
+		 */
+		lrd->type = cpu_to_le16(LOG_NOREDOINOEXT);
+		/*
+		 * For the LOG_NOREDOINOEXT record, we need
+		 * to pass the IAG number and inode extent
+		 * index (within that IAG) from which the
+		 * the extent being released.  These have been
+		 * passed to us in the iplist[1] and iplist[2].
+		 */
+		lrd->log.noredoinoext.iagnum =
+		    cpu_to_le32((u32) (size_t) cd->iplist[1]);
+		lrd->log.noredoinoext.inoext_idx =
+		    cpu_to_le32((u32) (size_t) cd->iplist[2]);
+
+		pxdlock = (pxdlock_t *) & tlck->lock;
+		*pxd = pxdlock->pxd;
+		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
+
+		/* update bmap */
+		tlck->flag |= tlckUPDATEMAP;
+
+		/* mark page as homeward bound */
+		tlck->flag |= tlckWRITEPAGE;
+	} else {
+		jERROR(2, ("diLog: UFO type tlck:0x%p\n", tlck));
+	}
+#ifdef  _JFS_WIP
+	/*
+	 *      alloc/free external EA extent
+	 *
+	 * a maplock for txUpdateMap() to update bPWMAP for alloc/free
+	 * of the extent has been formatted at txLock() time;
+	 */
+	else {
+		assert(tlck->type & tlckEA);
+
+		/* log LOG_UPDATEMAP for logredo() to update bmap for
+		 * alloc of new (and free of old) external EA extent;
+		 */
+		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
+		pxdlock = (pxdlock_t *) & tlck->lock;
+		nlock = pxdlock->index;
+		for (i = 0; i < nlock; i++, pxdlock++) {
+			if (pxdlock->flag & mlckALLOCPXD)
+				lrd->log.updatemap.type =
+				    cpu_to_le16(LOG_ALLOCPXD);
+			else
+				lrd->log.updatemap.type =
+				    cpu_to_le16(LOG_FREEPXD);
+			lrd->log.updatemap.nxd = cpu_to_le16(1);
+			lrd->log.updatemap.pxd = pxdlock->pxd;
+			lrd->backchain =
+			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
+		}
+
+		/* update bmap */
+		tlck->flag |= tlckUPDATEMAP;
+	}
+#endif				/* _JFS_WIP */
+
+	return rc;
+}
+
+
+/*
+ *      dataLog()
+ *
+ * function:    log data tlock
+ */
+int dataLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck)
+{
+	metapage_t *mp;
+	pxd_t *pxd;
+	int rc;
+	s64 xaddr;
+	int xflag;
+	s32 xlen;
+
+	mp = tlck->mp;
+
+	/* initialize as REDOPAGE record format */
+	lrd->log.redopage.type = cpu_to_le16(LOG_DATA);
+	lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE);
+
+	pxd = &lrd->log.redopage.pxd;
+
+	/* log after-image for logredo(): */
+	lrd->type = cpu_to_le16(LOG_REDOPAGE);
+
+	if (JFS_IP(tlck->ip)->next_index < MAX_INLINE_DIRTABLE_ENTRY) {
+		/*
+		 * The table has been truncated, we've must have deleted
+		 * the last entry, so don't bother logging this
+		 */
+		mp->lid = 0;
+		atomic_dec(&mp->nohomeok);
+		discard_metapage(mp);
+		tlck->mp = 0;
+		return 0;
+	}
+
+	rc = xtLookup(tlck->ip, mp->index, 1, &xflag, &xaddr, &xlen, 1);
+	if (rc || (xlen == 0)) {
+		jERROR(1, ("dataLog: can't find physical address\n"));
+		return 0;
+	}
+
+	PXDaddress(pxd, xaddr);
+	PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
+
+	lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
+
+	/* mark page as homeward bound */
+	tlck->flag |= tlckWRITEPAGE;
+
+	return 0;
+}
+
+
+/*
+ *      dtLog()
+ *
+ * function:    log dtree tlock and format maplock to update bmap;
+ */
+void dtLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck)
+{
+	struct inode *ip;
+	metapage_t *mp;
+	pxdlock_t *pxdlock;
+	pxd_t *pxd;
+
+	ip = tlck->ip;
+	mp = tlck->mp;
+
+	/* initialize as REDOPAGE/NOREDOPAGE record format */
+	lrd->log.redopage.type = cpu_to_le16(LOG_DTREE);
+	lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE);
+
+	pxd = &lrd->log.redopage.pxd;
+
+	if (tlck->type & tlckBTROOT)
+		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
+
+	/*
+	 *      page extension via relocation: entry insertion;
+	 *      page extension in-place: entry insertion;
+	 *      new right page from page split, reinitialized in-line
+	 *      root from root page split: entry insertion;
+	 */
+	if (tlck->type & (tlckNEW | tlckEXTEND)) {
+		/* log after-image of the new page for logredo():
+		 * mark log (LOG_NEW) for logredo() to initialize
+		 * freelist and update bmap for alloc of the new page;
+		 */
+		lrd->type = cpu_to_le16(LOG_REDOPAGE);
+		if (tlck->type & tlckEXTEND)
+			lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND);
+		else
+			lrd->log.redopage.type |= cpu_to_le16(LOG_NEW);
+//              *pxd = mp->cm_pxd;
+		PXDaddress(pxd, mp->index);
+		PXDlength(pxd,
+			  mp->logical_size >> tblk->sb->s_blocksize_bits);
+		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
+
+		/* format a maplock for txUpdateMap() to update bPMAP for
+		 * alloc of the new page;
+		 */
+		if (tlck->type & tlckBTROOT)
+			return;
+		tlck->flag |= tlckUPDATEMAP;
+		pxdlock = (pxdlock_t *) & tlck->lock;
+		pxdlock->flag = mlckALLOCPXD;
+		pxdlock->pxd = *pxd;
+
+		pxdlock->index = 1;
+
+		/* mark page as homeward bound */
+		tlck->flag |= tlckWRITEPAGE;
+		return;
+	}
+
+	/*
+	 *      entry insertion/deletion,
+	 *      sibling page link update (old right page before split);
+	 */
+	if (tlck->type & (tlckENTRY | tlckRELINK)) {
+		/* log after-image for logredo(): */
+		lrd->type = cpu_to_le16(LOG_REDOPAGE);
+		PXDaddress(pxd, mp->index);
+		PXDlength(pxd,
+			  mp->logical_size >> tblk->sb->s_blocksize_bits);
+		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
+
+		/* mark page as homeward bound */
+		tlck->flag |= tlckWRITEPAGE;
+		return;
+	}
+
+	/*
+	 *      page deletion: page has been invalidated
+	 *      page relocation: source extent
+	 *
+	 *      a maplock for free of the page has been formatted
+	 *      at txLock() time);
+	 */
+	if (tlck->type & (tlckFREE | tlckRELOCATE)) {
+		/* log LOG_NOREDOPAGE of the deleted page for logredo()
+		 * to start NoRedoPage filter and to update bmap for free
+		 * of the deletd page
+		 */
+		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
+		pxdlock = (pxdlock_t *) & tlck->lock;
+		*pxd = pxdlock->pxd;
+		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
+
+		/* a maplock for txUpdateMap() for free of the page
+		 * has been formatted at txLock() time;
+		 */
+		tlck->flag |= tlckUPDATEMAP;
+	}
+	return;
+}
+
+
+/*
+ *      xtLog()
+ *
+ * function:    log xtree tlock and format maplock to update bmap;
+ */
+void xtLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck)
+{
+	struct inode *ip;
+	metapage_t *mp;
+	xtpage_t *p;
+	xtlock_t *xtlck;
+	maplock_t *maplock;
+	xdlistlock_t *xadlock;
+	pxdlock_t *pxdlock;
+	pxd_t *pxd;
+	int next, lwm, hwm;
+
+	ip = tlck->ip;
+	mp = tlck->mp;
+
+	/* initialize as REDOPAGE/NOREDOPAGE record format */
+	lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
+	lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
+
+	pxd = &lrd->log.redopage.pxd;
+
+	if (tlck->type & tlckBTROOT) {
+		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
+		p = &JFS_IP(ip)->i_xtroot;
+		if (S_ISDIR(ip->i_mode))
+			lrd->log.redopage.type |=
+			    cpu_to_le16(LOG_DIR_XTREE);
+	} else
+		p = (xtpage_t *) mp->data;
+	next = le16_to_cpu(p->header.nextindex);
+
+	xtlck = (xtlock_t *) & tlck->lock;
+
+	maplock = (maplock_t *) & tlck->lock;
+	xadlock = (xdlistlock_t *) maplock;
+
+	/*
+	 *      entry insertion/extension;
+	 *      sibling page link update (old right page before split);
+	 */
+	if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
+		/* log after-image for logredo():
+		 * logredo() will update bmap for alloc of new/extended
+		 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
+		 * after-image of XADlist;
+		 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
+		 * applying the after-image to the meta-data page.
+		 */
+		lrd->type = cpu_to_le16(LOG_REDOPAGE);
+//              *pxd = mp->cm_pxd;
+		PXDaddress(pxd, mp->index);
+		PXDlength(pxd,
+			  mp->logical_size >> tblk->sb->s_blocksize_bits);
+		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
+
+		/* format a maplock for txUpdateMap() to update bPMAP
+		 * for alloc of new/extended extents of XAD[lwm:next)
+		 * from the page itself;
+		 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
+		 */
+		lwm = xtlck->lwm.offset;
+		if (lwm == 0)
+			lwm = XTPAGEMAXSLOT;
+
+		if (lwm == next)
+			goto out;
+		assert(lwm < next);
+		tlck->flag |= tlckUPDATEMAP;
+		xadlock->flag = mlckALLOCXADLIST;
+		xadlock->count = next - lwm;
+		if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) {
+			int i;
+			/*
+			 * Lazy commit may allow xtree to be modified before
+			 * txUpdateMap runs.  Copy xad into linelock to
+			 * preserve correct data.
+			 */
+			xadlock->xdlist = &xtlck->pxdlock;
+			memcpy(xadlock->xdlist, &p->xad[lwm],
+			       sizeof(xad_t) * xadlock->count);
+
+			for (i = 0; i < xadlock->count; i++)
+				p->xad[lwm + i].flag &=
+				    ~(XAD_NEW | XAD_EXTENDED);
+		} else {
+			/*
+			 * xdlist will point to into inode's xtree, ensure
+			 * that transaction is not committed lazily.
+			 */
+			xadlock->xdlist = &p->xad[lwm];
+			tblk->xflag &= ~COMMIT_LAZY;
+		}
+		jFYI(1,
+		     ("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d\n",
+		      tlck->ip, mp, tlck, lwm, xadlock->count));
+
+		maplock->index = 1;
+
+	      out:
+		/* mark page as homeward bound */
+		tlck->flag |= tlckWRITEPAGE;
+
+		return;
+	}
+
+	/*
+	 *      page deletion: file deletion/truncation (ref. xtTruncate())
+	 *
+	 * (page will be invalidated after log is written and bmap
+	 * is updated from the page);
+	 */
+	if (tlck->type & tlckFREE) {
+		/* LOG_NOREDOPAGE log for NoRedoPage filter:
+		 * if page free from file delete, NoRedoFile filter from
+		 * inode image of zero link count will subsume NoRedoPage
+		 * filters for each page;
+		 * if page free from file truncattion, write NoRedoPage
+		 * filter;
+		 *
+		 * upadte of block allocation map for the page itself:
+		 * if page free from deletion and truncation, LOG_UPDATEMAP
+		 * log for the page itself is generated from processing
+		 * its parent page xad entries;
+		 */
+		/* if page free from file truncation, log LOG_NOREDOPAGE
+		 * of the deleted page for logredo() to start NoRedoPage
+		 * filter for the page;
+		 */
+		if (tblk->xflag & COMMIT_TRUNCATE) {
+			/* write NOREDOPAGE for the page */
+			lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
+			PXDaddress(pxd, mp->index);
+			PXDlength(pxd,
+				  mp->logical_size >> tblk->sb->
+				  s_blocksize_bits);
+			lrd->backchain =
+			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
+
+			if (tlck->type & tlckBTROOT) {
+				/* Empty xtree must be logged */
+				lrd->type = cpu_to_le16(LOG_REDOPAGE);
+				lrd->backchain =
+				    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
+			}
+		}
+
+		/* init LOG_UPDATEMAP of the freed extents
+		 * XAD[XTENTRYSTART:hwm) from the deleted page itself
+		 * for logredo() to update bmap;
+		 */
+		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
+		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST);
+		xtlck = (xtlock_t *) & tlck->lock;
+		hwm = xtlck->hwm.offset;
+		lrd->log.updatemap.nxd =
+		    cpu_to_le16(hwm - XTENTRYSTART + 1);
+		/* reformat linelock for lmLog() */
+		xtlck->header.offset = XTENTRYSTART;
+		xtlck->header.length = hwm - XTENTRYSTART + 1;
+		xtlck->index = 1;
+		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
+
+		/* format a maplock for txUpdateMap() to update bmap
+		 * to free extents of XAD[XTENTRYSTART:hwm) from the
+		 * deleted page itself;
+		 */
+		tlck->flag |= tlckUPDATEMAP;
+		xadlock->flag = mlckFREEXADLIST;
+		xadlock->count = hwm - XTENTRYSTART + 1;
+		if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) {
+			/*
+			 * Lazy commit may allow xtree to be modified before
+			 * txUpdateMap runs.  Copy xad into linelock to
+			 * preserve correct data.
+			 */
+			xadlock->xdlist = &xtlck->pxdlock;
+			memcpy(xadlock->xdlist, &p->xad[XTENTRYSTART],
+			       sizeof(xad_t) * xadlock->count);
+		} else {
+			/*
+			 * xdlist will point to into inode's xtree, ensure
+			 * that transaction is not committed lazily unless
+			 * we're deleting the inode (unlink).  In that case
+			 * we have special logic for the inode to be
+			 * unlocked by the lazy commit thread.
+			 */
+			xadlock->xdlist = &p->xad[XTENTRYSTART];
+			if ((tblk->xflag & COMMIT_LAZY) &&
+			    (tblk->xflag & COMMIT_DELETE) &&
+			    (tblk->ip == ip))
+				set_cflag(COMMIT_Holdlock, ip);
+			else
+				tblk->xflag &= ~COMMIT_LAZY;
+		}
+		jFYI(1,
+		     ("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2\n",
+		      tlck->ip, mp, xadlock->count));
+
+		maplock->index = 1;
+
+		/* mark page as invalid */
+		if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode))
+		    && !(tlck->type & tlckBTROOT))
+			tlck->flag |= tlckFREEPAGE;
+		/*
+		   else (tblk->xflag & COMMIT_PMAP)
+		   ? release the page;
+		 */
+		return;
+	}
+
+	/*
+	 *      page/entry truncation: file truncation (ref. xtTruncate())
+	 *
+	 *     |----------+------+------+---------------|
+	 *                |      |      |
+	 *                |      |     hwm - hwm before truncation
+	 *                |     next - truncation point
+	 *               lwm - lwm before truncation
+	 * header ?
+	 */
+	if (tlck->type & tlckTRUNCATE) {
+		pxd_t tpxd;	/* truncated extent of xad */
+
+		/*
+		 * For truncation the entire linelock may be used, so it would
+		 * be difficult to store xad list in linelock itself.
+		 * Therefore, we'll just force transaction to be committed
+		 * synchronously, so that xtree pages won't be changed before
+		 * txUpdateMap runs.
+		 */
+		tblk->xflag &= ~COMMIT_LAZY;
+		lwm = xtlck->lwm.offset;
+		if (lwm == 0)
+			lwm = XTPAGEMAXSLOT;
+		hwm = xtlck->hwm.offset;
+
+		/*
+		 *      write log records
+		 */
+		/*
+		 * allocate entries XAD[lwm:next]:
+		 */
+		if (lwm < next) {
+			/* log after-image for logredo():
+			 * logredo() will update bmap for alloc of new/extended
+			 * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from
+			 * after-image of XADlist;
+			 * logredo() resets (XAD_NEW|XAD_EXTEND) flag when
+			 * applying the after-image to the meta-data page.
+			 */
+			lrd->type = cpu_to_le16(LOG_REDOPAGE);
+			PXDaddress(pxd, mp->index);
+			PXDlength(pxd,
+				  mp->logical_size >> tblk->sb->
+				  s_blocksize_bits);
+			lrd->backchain =
+			    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
+		}
+
+		/*
+		 * truncate entry XAD[hwm == next - 1]:
+		 */
+		if (hwm == next - 1) {
+			/* init LOG_UPDATEMAP for logredo() to update bmap for
+			 * free of truncated delta extent of the truncated
+			 * entry XAD[next - 1]:
+			 * (xtlck->pxdlock = truncated delta extent);
+			 */
+			pxdlock = (pxdlock_t *) & xtlck->pxdlock;
+			/* assert(pxdlock->type & tlckTRUNCATE); */
+			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
+			lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
+			lrd->log.updatemap.nxd = cpu_to_le16(1);
+			lrd->log.updatemap.pxd = pxdlock->pxd;
+			tpxd = pxdlock->pxd;	/* save to format maplock */
+			lrd->backchain =
+			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
+		}
+
+		/*
+		 * free entries XAD[next:hwm]:
+		 */
+		if (hwm >= next) {
+			/* init LOG_UPDATEMAP of the freed extents
+			 * XAD[next:hwm] from the deleted page itself
+			 * for logredo() to update bmap;
+			 */
+			lrd->type = cpu_to_le16(LOG_UPDATEMAP);
+			lrd->log.updatemap.type =
+			    cpu_to_le16(LOG_FREEXADLIST);
+			xtlck = (xtlock_t *) & tlck->lock;
+			hwm = xtlck->hwm.offset;
+			lrd->log.updatemap.nxd =
+			    cpu_to_le16(hwm - next + 1);
+			/* reformat linelock for lmLog() */
+			xtlck->header.offset = next;
+			xtlck->header.length = hwm - next + 1;
+			xtlck->index = 1;
+			lrd->backchain =
+			    cpu_to_le32(lmLog(log, tblk, lrd, tlck));
+		}
+
+		/*
+		 *      format maplock(s) for txUpdateMap() to update bmap
+		 */
+		maplock->index = 0;
+
+		/*
+		 * allocate entries XAD[lwm:next):
+		 */
+		if (lwm < next) {
+			/* format a maplock for txUpdateMap() to update bPMAP
+			 * for alloc of new/extended extents of XAD[lwm:next)
+			 * from the page itself;
+			 * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag.
+			 */
+			tlck->flag |= tlckUPDATEMAP;
+			xadlock->flag = mlckALLOCXADLIST;
+			xadlock->count = next - lwm;
+			xadlock->xdlist = &p->xad[lwm];
+
+			jFYI(1,
+			     ("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d\n",
+			      tlck->ip, mp, xadlock->count, lwm, next));
+			maplock->index++;
+			xadlock++;
+		}
+
+		/*
+		 * truncate entry XAD[hwm == next - 1]:
+		 */
+		if (hwm == next - 1) {
+			pxdlock_t *pxdlock;
+
+			/* format a maplock for txUpdateMap() to update bmap
+			 * to free truncated delta extent of the truncated
+			 * entry XAD[next - 1];
+			 * (xtlck->pxdlock = truncated delta extent);
+			 */
+			tlck->flag |= tlckUPDATEMAP;
+			pxdlock = (pxdlock_t *) xadlock;
+			pxdlock->flag = mlckFREEPXD;
+			pxdlock->count = 1;
+			pxdlock->pxd = tpxd;
+
+			jFYI(1,
+			     ("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d\n",
+			      ip, mp, pxdlock->count, hwm));
+			maplock->index++;
+			xadlock++;
+		}
+
+		/*
+		 * free entries XAD[next:hwm]:
+		 */
+		if (hwm >= next) {
+			/* format a maplock for txUpdateMap() to update bmap
+			 * to free extents of XAD[next:hwm] from thedeleted
+			 * page itself;
+			 */
+			tlck->flag |= tlckUPDATEMAP;
+			xadlock->flag = mlckFREEXADLIST;
+			xadlock->count = hwm - next + 1;
+			xadlock->xdlist = &p->xad[next];
+
+			jFYI(1,
+			     ("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d\n",
+			      tlck->ip, mp, xadlock->count, next, hwm));
+			maplock->index++;
+		}
+
+		/* mark page as homeward bound */
+		tlck->flag |= tlckWRITEPAGE;
+	}
+	return;
+}
+
+
+/*
+ *      mapLog()
+ *
+ * function:    log from maplock of freed data extents;
+ */
+void mapLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck)
+{
+	pxdlock_t *pxdlock;
+	int i, nlock;
+	pxd_t *pxd;
+
+	/*
+	 *      page relocation: free the source page extent
+	 *
+	 * a maplock for txUpdateMap() for free of the page
+	 * has been formatted at txLock() time saving the src
+	 * relocated page address;
+	 */
+	if (tlck->type & tlckRELOCATE) {
+		/* log LOG_NOREDOPAGE of the old relocated page
+		 * for logredo() to start NoRedoPage filter;
+		 */
+		lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
+		pxdlock = (pxdlock_t *) & tlck->lock;
+		pxd = &lrd->log.redopage.pxd;
+		*pxd = pxdlock->pxd;
+		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
+
+		/* (N.B. currently, logredo() does NOT update bmap
+		 * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE);
+		 * if page free from relocation, LOG_UPDATEMAP log is
+		 * specifically generated now for logredo()
+		 * to update bmap for free of src relocated page;
+		 * (new flag LOG_RELOCATE may be introduced which will
+		 * inform logredo() to start NORedoPage filter and also
+		 * update block allocation map at the same time, thus
+		 * avoiding an extra log write);
+		 */
+		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
+		lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
+		lrd->log.updatemap.nxd = cpu_to_le16(1);
+		lrd->log.updatemap.pxd = pxdlock->pxd;
+		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL));
+
+		/* a maplock for txUpdateMap() for free of the page
+		 * has been formatted at txLock() time;
+		 */
+		tlck->flag |= tlckUPDATEMAP;
+		return;
+	}
+	/*
+
+	 * Otherwise it's not a relocate request
+	 *
+	 */
+	else {
+		/* log LOG_UPDATEMAP for logredo() to update bmap for
+		 * free of truncated/relocated delta extent of the data;
+		 * e.g.: external EA extent, relocated/truncated extent
+		 * from xtTailgate();
+		 */
+		lrd->type = cpu_to_le16(LOG_UPDATEMAP);
+		pxdlock = (pxdlock_t *) & tlck->lock;
+		nlock = pxdlock->index;
+		for (i = 0; i < nlock; i++, pxdlock++) {
+			if (pxdlock->flag & mlckALLOCPXD)
+				lrd->log.updatemap.type =
+				    cpu_to_le16(LOG_ALLOCPXD);
+			else
+				lrd->log.updatemap.type =
+				    cpu_to_le16(LOG_FREEPXD);
+			lrd->log.updatemap.nxd = cpu_to_le16(1);
+			lrd->log.updatemap.pxd = pxdlock->pxd;
+			lrd->backchain =
+			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
+			jFYI(1, ("mapLog: xaddr:0x%lx xlen:0x%x\n",
+				 (ulong) addressPXD(&pxdlock->pxd),
+				 lengthPXD(&pxdlock->pxd)));
+		}
+
+		/* update bmap */
+		tlck->flag |= tlckUPDATEMAP;
+	}
+}
+
+
+/*
+ *      txEA()
+ *
+ * function:    acquire maplock for EA/ACL extents or
+ *              set COMMIT_INLINE flag;
+ */
+void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
+{
+	tlock_t *tlck = NULL;
+	pxdlock_t *maplock = NULL, *pxdlock = NULL;
+
+	/*
+	 * format maplock for alloc of new EA extent
+	 */
+	if (newea) {
+		/* Since the newea could be a completely zeroed entry we need to
+		 * check for the two flags which indicate we should actually
+		 * commit new EA data
+		 */
+		if (newea->flag & DXD_EXTENT) {
+			tlck = txMaplock(tid, ip, tlckMAP);
+			maplock = (pxdlock_t *) & tlck->lock;
+			pxdlock = (pxdlock_t *) maplock;
+			pxdlock->flag = mlckALLOCPXD;
+			PXDaddress(&pxdlock->pxd, addressDXD(newea));
+			PXDlength(&pxdlock->pxd, lengthDXD(newea));
+			pxdlock++;
+			maplock->index = 1;
+		} else if (newea->flag & DXD_INLINE) {
+			tlck = NULL;
+
+			set_cflag(COMMIT_Inlineea, ip);
+		}
+	}
+
+	/*
+	 * format maplock for free of old EA extent
+	 */
+	if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) {
+		if (tlck == NULL) {
+			tlck = txMaplock(tid, ip, tlckMAP);
+			maplock = (pxdlock_t *) & tlck->lock;
+			pxdlock = (pxdlock_t *) maplock;
+			maplock->index = 0;
+		}
+		pxdlock->flag = mlckFREEPXD;
+		PXDaddress(&pxdlock->pxd, addressDXD(oldea));
+		PXDlength(&pxdlock->pxd, lengthDXD(oldea));
+		maplock->index++;
+	}
+}
+
+
+/*
+ *      txForce()
+ *
+ * function: synchronously write pages locked by transaction
+ *              after txLog() but before txUpdateMap();
+ */
+void txForce(tblock_t * tblk)
+{
+	tlock_t *tlck;
+	lid_t lid, next;
+	metapage_t *mp;
+
+	/*
+	 * reverse the order of transaction tlocks in
+	 * careful update order of address index pages
+	 * (right to left, bottom up)
+	 */
+	tlck = lid_to_tlock(tblk->next);
+	lid = tlck->next;
+	tlck->next = 0;
+	while (lid) {
+		tlck = lid_to_tlock(lid);
+		next = tlck->next;
+		tlck->next = tblk->next;
+		tblk->next = lid;
+		lid = next;
+	}
+
+	/*
+	 * synchronously write the page, and
+	 * hold the page for txUpdateMap();
+	 */
+	for (lid = tblk->next; lid; lid = next) {
+		tlck = lid_to_tlock(lid);
+		next = tlck->next;
+
+		if ((mp = tlck->mp) != NULL &&
+		    (tlck->type & tlckBTROOT) == 0) {
+			assert(mp->xflag & COMMIT_PAGE);
+
+			if (tlck->flag & tlckWRITEPAGE) {
+				tlck->flag &= ~tlckWRITEPAGE;
+
+				/* do not release page to freelist */
+				assert(atomic_read(&mp->nohomeok));
+				hold_metapage(mp, 0);
+				write_metapage(mp);
+			}
+		}
+	}
+}
+
+
+/*
+ *      txUpdateMap()
+ *
+ * function:    update persistent allocation map (and working map
+ *              if appropriate);
+ *
+ * parameter:
+ */
+static void txUpdateMap(tblock_t * tblk)
+{
+	struct inode *ip;
+	struct inode *ipimap;
+	lid_t lid;
+	tlock_t *tlck;
+	maplock_t *maplock;
+	pxdlock_t pxdlock;
+	int maptype;
+	int k, nlock;
+	metapage_t *mp = 0;
+
+	ipimap = JFS_SBI(tblk->sb)->ipimap;
+
+	maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP;
+
+
+	/*
+	 *      update block allocation map
+	 *
+	 * update allocation state in pmap (and wmap) and
+	 * update lsn of the pmap page;
+	 */
+	/*
+	 * scan each tlock/page of transaction for block allocation/free:
+	 *
+	 * for each tlock/page of transaction, update map.
+	 *  ? are there tlock for pmap and pwmap at the same time ?
+	 */
+	for (lid = tblk->next; lid; lid = tlck->next) {
+		tlck = lid_to_tlock(lid);
+
+		if ((tlck->flag & tlckUPDATEMAP) == 0)
+			continue;
+
+		if (tlck->flag & tlckFREEPAGE) {
+			/*
+			 * Another thread may attempt to reuse freed space
+			 * immediately, so we want to get rid of the metapage
+			 * before anyone else has a chance to get it.
+			 * Lock metapage, update maps, then invalidate
+			 * the metapage.
+			 */
+			mp = tlck->mp;
+			ASSERT(mp->xflag & COMMIT_PAGE);
+			hold_metapage(mp, 0);
+		}
+
+		/*
+		 * extent list:
+		 * . in-line PXD list:
+		 * . out-of-line XAD list:
+		 */
+		maplock = (maplock_t *) & tlck->lock;
+		nlock = maplock->index;
+
+		for (k = 0; k < nlock; k++, maplock++) {
+			/*
+			 * allocate blocks in persistent map:
+			 *
+			 * blocks have been allocated from wmap at alloc time;
+			 */
+			if (maplock->flag & mlckALLOC) {
+				txAllocPMap(ipimap, maplock, tblk);
+			}
+			/*
+			 * free blocks in persistent and working map:
+			 * blocks will be freed in pmap and then in wmap;
+			 *
+			 * ? tblock specifies the PMAP/PWMAP based upon
+			 * transaction
+			 *
+			 * free blocks in persistent map:
+			 * blocks will be freed from wmap at last reference
+			 * release of the object for regular files;
+			 *
+			 * Alway free blocks from both persistent & working
+			 * maps for directories
+			 */
+			else {	/* (maplock->flag & mlckFREE) */
+
+				if (S_ISDIR(tlck->ip->i_mode))
+					txFreeMap(ipimap, maplock,
+						  tblk, COMMIT_PWMAP);
+				else
+					txFreeMap(ipimap, maplock,
+						  tblk, maptype);
+			}
+		}
+		if (tlck->flag & tlckFREEPAGE) {
+			if (!(tblk->flag & tblkGC_LAZY)) {
+				/* This is equivalent to txRelease */
+				ASSERT(mp->lid == lid);
+				tlck->mp->lid = 0;
+			}
+			assert(atomic_read(&mp->nohomeok) == 1);
+			atomic_dec(&mp->nohomeok);
+			discard_metapage(mp);
+			tlck->mp = 0;
+		}
+	}
+	/*
+	 *      update inode allocation map
+	 *
+	 * update allocation state in pmap and
+	 * update lsn of the pmap page;
+	 * update in-memory inode flag/state
+	 *
+	 * unlock mapper/write lock
+	 */
+	if (tblk->xflag & COMMIT_CREATE) {
+		ip = tblk->ip;
+
+		ASSERT(test_cflag(COMMIT_New, ip));
+		clear_cflag(COMMIT_New, ip);
+
+		diUpdatePMap(ipimap, ip->i_ino, FALSE, tblk);
+		ipimap->i_state |= I_DIRTY;
+		/* update persistent block allocation map
+		 * for the allocation of inode extent;
+		 */
+		pxdlock.flag = mlckALLOCPXD;
+		pxdlock.pxd = JFS_IP(ip)->ixpxd;
+		pxdlock.index = 1;
+		txAllocPMap(ip, (maplock_t *) & pxdlock, tblk);
+		iput(ip);
+	} else if (tblk->xflag & COMMIT_DELETE) {
+		ip = tblk->ip;
+		diUpdatePMap(ipimap, ip->i_ino, TRUE, tblk);
+		ipimap->i_state |= I_DIRTY;
+		if (test_and_clear_cflag(COMMIT_Holdlock, ip)) {
+			if (tblk->flag & tblkGC_LAZY)
+				IWRITE_UNLOCK(ip);
+		}
+		iput(ip);
+	}
+}
+
+
+/*
+ *      txAllocPMap()
+ *
+ * function: allocate from persistent map;
+ *
+ * parameter:
+ *      ipbmap  -
+ *      malock -
+ *              xad list:
+ *              pxd:
+ *
+ *      maptype -
+ *              allocate from persistent map;
+ *              free from persistent map;
+ *              (e.g., tmp file - free from working map at releae
+ *               of last reference);
+ *              free from persistent and working map;
+ *
+ *      lsn     - log sequence number;
+ */
+static void txAllocPMap(struct inode *ip, maplock_t * maplock,
+			tblock_t * tblk)
+{
+	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
+	xdlistlock_t *xadlistlock;
+	xad_t *xad;
+	s64 xaddr;
+	int xlen;
+	pxdlock_t *pxdlock;
+	xdlistlock_t *pxdlistlock;
+	pxd_t *pxd;
+	int n;
+
+	/*
+	 * allocate from persistent map;
+	 */
+	if (maplock->flag & mlckALLOCXADLIST) {
+		xadlistlock = (xdlistlock_t *) maplock;
+		xad = xadlistlock->xdlist;
+		for (n = 0; n < xadlistlock->count; n++, xad++) {
+			if (xad->flag & (XAD_NEW | XAD_EXTENDED)) {
+				xaddr = addressXAD(xad);
+				xlen = lengthXAD(xad);
+				dbUpdatePMap(ipbmap, FALSE, xaddr,
+					     (s64) xlen, tblk);
+				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
+				jFYI(1,
+				     ("allocPMap: xaddr:0x%lx xlen:%d\n",
+				      (ulong) xaddr, xlen));
+			}
+		}
+	} else if (maplock->flag & mlckALLOCPXD) {
+		pxdlock = (pxdlock_t *) maplock;
+		xaddr = addressPXD(&pxdlock->pxd);
+		xlen = lengthPXD(&pxdlock->pxd);
+		dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, tblk);
+		jFYI(1,
+		     ("allocPMap: xaddr:0x%lx xlen:%d\n", (ulong) xaddr,
+		      xlen));
+	} else {		/* (maplock->flag & mlckALLOCPXDLIST) */
+
+		pxdlistlock = (xdlistlock_t *) maplock;
+		pxd = pxdlistlock->xdlist;
+		for (n = 0; n < pxdlistlock->count; n++, pxd++) {
+			xaddr = addressPXD(pxd);
+			xlen = lengthPXD(pxd);
+			dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen,
+				     tblk);
+			jFYI(1,
+			     ("allocPMap: xaddr:0x%lx xlen:%d\n",
+			      (ulong) xaddr, xlen));
+		}
+	}
+}
+
+
+/*
+ *      txFreeMap()
+ *
+ * function:    free from persistent and/or working map;
+ *
+ * todo: optimization
+ */
+void txFreeMap(struct inode *ip,
+	       maplock_t * maplock, tblock_t * tblk, int maptype)
+{
+	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
+	xdlistlock_t *xadlistlock;
+	xad_t *xad;
+	s64 xaddr;
+	int xlen;
+	pxdlock_t *pxdlock;
+	xdlistlock_t *pxdlistlock;
+	pxd_t *pxd;
+	int n;
+
+	jFYI(1,
+	     ("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x\n",
+	      tblk, maplock, maptype));
+
+	/*
+	 * free from persistent map;
+	 */
+	if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) {
+		if (maplock->flag & mlckFREEXADLIST) {
+			xadlistlock = (xdlistlock_t *) maplock;
+			xad = xadlistlock->xdlist;
+			for (n = 0; n < xadlistlock->count; n++, xad++) {
+				if (!(xad->flag & XAD_NEW)) {
+					xaddr = addressXAD(xad);
+					xlen = lengthXAD(xad);
+					dbUpdatePMap(ipbmap, TRUE, xaddr,
+						     (s64) xlen, tblk);
+					jFYI(1,
+					     ("freePMap: xaddr:0x%lx xlen:%d\n",
+					      (ulong) xaddr, xlen));
+				}
+			}
+		} else if (maplock->flag & mlckFREEPXD) {
+			pxdlock = (pxdlock_t *) maplock;
+			xaddr = addressPXD(&pxdlock->pxd);
+			xlen = lengthPXD(&pxdlock->pxd);
+			dbUpdatePMap(ipbmap, TRUE, xaddr, (s64) xlen,
+				     tblk);
+			jFYI(1,
+			     ("freePMap: xaddr:0x%lx xlen:%d\n",
+			      (ulong) xaddr, xlen));
+		} else {	/* (maplock->flag & mlckALLOCPXDLIST) */
+
+			pxdlistlock = (xdlistlock_t *) maplock;
+			pxd = pxdlistlock->xdlist;
+			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
+				xaddr = addressPXD(pxd);
+				xlen = lengthPXD(pxd);
+				dbUpdatePMap(ipbmap, TRUE, xaddr,
+					     (s64) xlen, tblk);
+				jFYI(1,
+				     ("freePMap: xaddr:0x%lx xlen:%d\n",
+				      (ulong) xaddr, xlen));
+			}
+		}
+	}
+
+	/*
+	 * free from working map;
+	 */
+	if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) {
+		if (maplock->flag & mlckFREEXADLIST) {
+			xadlistlock = (xdlistlock_t *) maplock;
+			xad = xadlistlock->xdlist;
+			for (n = 0; n < xadlistlock->count; n++, xad++) {
+				xaddr = addressXAD(xad);
+				xlen = lengthXAD(xad);
+				dbFree(ip, xaddr, (s64) xlen);
+				xad->flag = 0;
+				jFYI(1,
+				     ("freeWMap: xaddr:0x%lx xlen:%d\n",
+				      (ulong) xaddr, xlen));
+			}
+		} else if (maplock->flag & mlckFREEPXD) {
+			pxdlock = (pxdlock_t *) maplock;
+			xaddr = addressPXD(&pxdlock->pxd);
+			xlen = lengthPXD(&pxdlock->pxd);
+			dbFree(ip, xaddr, (s64) xlen);
+			jFYI(1,
+			     ("freeWMap: xaddr:0x%lx xlen:%d\n",
+			      (ulong) xaddr, xlen));
+		} else {	/* (maplock->flag & mlckFREEPXDLIST) */
+
+			pxdlistlock = (xdlistlock_t *) maplock;
+			pxd = pxdlistlock->xdlist;
+			for (n = 0; n < pxdlistlock->count; n++, pxd++) {
+				xaddr = addressPXD(pxd);
+				xlen = lengthPXD(pxd);
+				dbFree(ip, xaddr, (s64) xlen);
+				jFYI(1,
+				     ("freeWMap: xaddr:0x%lx xlen:%d\n",
+				      (ulong) xaddr, xlen));
+			}
+		}
+	}
+}
+
+
+/*
+ *      txFreelock()
+ *
+ * function:    remove tlock from inode anonymous locklist
+ */
+void txFreelock(struct inode *ip)
+{
+	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+	tlock_t *xtlck, *tlck;
+	lid_t xlid = 0, lid;
+
+	if (!jfs_ip->atlhead)
+		return;
+
+	xtlck = (tlock_t *) &jfs_ip->atlhead;
+
+	while ((lid = xtlck->next)) {
+		tlck = lid_to_tlock(lid);
+		if (tlck->flag & tlckFREELOCK) {
+			xtlck->next = tlck->next;
+			txLockFree(lid);
+		} else {
+			xtlck = tlck;
+			xlid = lid;
+		}
+	}
+
+	if (jfs_ip->atlhead)
+		jfs_ip->atltail = xlid;
+	else {
+		jfs_ip->atltail = 0;
+		/*
+		 * If inode was on anon_list, remove it
+		 */
+		TXN_LOCK();
+		list_del_init(&jfs_ip->anon_inode_list);
+		TXN_UNLOCK();
+	}
+}
+
+
+/*
+ *      txAbort()
+ *
+ * function: abort tx before commit;
+ *
+ * frees line-locks and segment locks for all
+ * segments in comdata structure.
+ * Optionally sets state of file-system to FM_DIRTY in super-block.
+ * log age of page-frames in memory for which caller has
+ * are reset to 0 (to avoid logwarap).
+ */
+void txAbort(tid_t tid, int dirty)
+{
+	lid_t lid, next;
+	metapage_t *mp;
+	tblock_t *tblk = tid_to_tblock(tid);
+
+	jEVENT(1, ("txAbort: tid:%d dirty:0x%x\n", tid, dirty));
+
+	/*
+	 * free tlocks of the transaction
+	 */
+	for (lid = tblk->next; lid; lid = next) {
+		next = lid_to_tlock(lid)->next;
+
+		mp = lid_to_tlock(lid)->mp;
+
+		if (mp) {
+			mp->lid = 0;
+
+			/*
+			 * reset lsn of page to avoid logwarap:
+			 *
+			 * (page may have been previously committed by another
+			 * transaction(s) but has not been paged, i.e.,
+			 * it may be on logsync list even though it has not
+			 * been logged for the current tx.)
+			 */
+			if (mp->xflag & COMMIT_PAGE && mp->lsn)
+				LogSyncRelease(mp);
+		}
+		/* insert tlock at head of freelist */
+		TXN_LOCK();
+		txLockFree(lid);
+		TXN_UNLOCK();
+	}
+
+	/* caller will free the transaction block */
+
+	tblk->next = tblk->last = 0;
+
+	/*
+	 * mark filesystem dirty
+	 */
+	if (dirty)
+		updateSuper(tblk->sb, FM_DIRTY);
+
+	return;
+}
+
+
+/*
+ *      txAbortCommit()
+ *
+ * function: abort commit.
+ *
+ * frees tlocks of transaction; line-locks and segment locks for all
+ * segments in comdata structure. frees malloc storage
+ * sets state of file-system to FM_MDIRTY in super-block.
+ * log age of page-frames in memory for which caller has
+ * are reset to 0 (to avoid logwarap).
+ */
+void txAbortCommit(commit_t * cd, int exval)
+{
+	tblock_t *tblk;
+	tid_t tid;
+	lid_t lid, next;
+	metapage_t *mp;
+
+	assert(exval == EIO || exval == ENOMEM);
+	jEVENT(1, ("txAbortCommit: cd:0x%p\n", cd));
+
+	/*
+	 * free tlocks of the transaction
+	 */
+	tid = cd->tid;
+	tblk = tid_to_tblock(tid);
+	for (lid = tblk->next; lid; lid = next) {
+		next = lid_to_tlock(lid)->next;
+
+		mp = lid_to_tlock(lid)->mp;
+		if (mp) {
+			mp->lid = 0;
+
+			/*
+			 * reset lsn of page to avoid logwarap;
+			 */
+			if (mp->xflag & COMMIT_PAGE)
+				LogSyncRelease(mp);
+		}
+
+		/* insert tlock at head of freelist */
+		TXN_LOCK();
+		txLockFree(lid);
+		TXN_UNLOCK();
+	}
+
+	tblk->next = tblk->last = 0;
+
+	/* free the transaction block */
+	txEnd(tid);
+
+	/*
+	 * mark filesystem dirty
+	 */
+	updateSuper(cd->sb, FM_DIRTY);
+}
+
+
+/*
+ *      txLazyCommit(void)
+ *
+ *	All transactions except those changing ipimap (COMMIT_FORCE) are
+ *	processed by this routine.  This insures that the inode and block
+ *	allocation maps are updated in order.  For synchronous transactions,
+ *	let the user thread finish processing after txUpdateMap() is called.
+ */
+void txLazyCommit(tblock_t * tblk)
+{
+	log_t *log;
+
+	while (((tblk->flag & tblkGC_READY) == 0) &&
+	       ((tblk->flag & tblkGC_UNLOCKED) == 0)) {
+		/* We must have gotten ahead of the user thread
+		 */
+		jFYI(1,
+		     ("jfs_lazycommit: tblk 0x%p not unlocked\n", tblk));
+		schedule();
+	}
+
+	jFYI(1, ("txLazyCommit: processing tblk 0x%p\n", tblk));
+
+	txUpdateMap(tblk);
+
+	log = (log_t *) JFS_SBI(tblk->sb)->log;
+
+	spin_lock_irq(&log->gclock);	// LOGGC_LOCK
+
+	tblk->flag |= tblkGC_COMMITTED;
+
+	if ((tblk->flag & tblkGC_READY) || (tblk->flag & tblkGC_LAZY))
+		log->gcrtc--;
+
+	if (tblk->flag & tblkGC_READY)
+		wake_up(&tblk->gcwait);	// LOGGC_WAKEUP
+
+	spin_unlock_irq(&log->gclock);	// LOGGC_UNLOCK
+
+	if (tblk->flag & tblkGC_LAZY) {
+		txUnlock(tblk, 0);
+		tblk->flag &= ~tblkGC_LAZY;
+		txEnd(tblk - TxBlock);	/* Convert back to tid */
+	}
+
+	jFYI(1, ("txLazyCommit: done: tblk = 0x%p\n", tblk));
+}
+
+/*
+ *      jfs_lazycommit(void)
+ *
+ *	To be run as a kernel daemon.  If lbmIODone is called in an interrupt
+ *	context, or where blocking is not wanted, this routine will process
+ *	committed transactions from the unlock queue.
+ */
+int jfs_lazycommit(void)
+{
+	int WorkDone;
+	tblock_t *tblk;
+	unsigned long flags;
+
+	lock_kernel();
+
+	daemonize();
+	current->tty = NULL;
+	strcpy(current->comm, "jfsCommit");
+
+	unlock_kernel();
+
+	jfsCommitTask = current;
+
+	spin_lock_irq(&current->sigmask_lock);
+	siginitsetinv(&current->blocked,
+		      sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP)
+		      | sigmask(SIGCONT));
+	spin_unlock_irq(&current->sigmask_lock);
+
+	LAZY_LOCK_INIT();
+	TxAnchor.unlock_queue = TxAnchor.unlock_tail = 0;
+
+	complete(&jfsIOwait);
+
+	do {
+		LAZY_LOCK(flags);
+restart:
+		WorkDone = 0;
+		while ((tblk = TxAnchor.unlock_queue)) {
+			/*
+			 * We can't get ahead of user thread.  Spinning is
+			 * simpler than blocking/waking.  We shouldn't spin
+			 * very long, since user thread shouldn't be blocking
+			 * between lmGroupCommit & txEnd.
+			 */
+			WorkDone = 1;
+
+			/*
+			 * Remove first transaction from queue
+			 */
+			TxAnchor.unlock_queue = tblk->cqnext;
+			tblk->cqnext = 0;
+			if (TxAnchor.unlock_tail == tblk)
+				TxAnchor.unlock_tail = 0;
+
+			LAZY_UNLOCK(flags);
+			txLazyCommit(tblk);
+
+			/*
+			 * We can be running indefinately if other processors
+			 * are adding transactions to this list
+			 */
+			if (current->need_resched)
+				schedule();
+			LAZY_LOCK(flags);
+		}
+
+		if (WorkDone)
+			goto restart;
+		
+		LAZY_UNLOCK(flags);
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	} while (!jfs_thread_stopped());
+
+	if (TxAnchor.unlock_queue)
+		jERROR(1, ("jfs_lazycommit being killed with pending transactions!\n"));
+	else
+		jFYI(1, ("jfs_lazycommit being killed\n"));
+	complete(&jfsIOwait);
+	return 0;
+}
+
+void txLazyUnlock(tblock_t * tblk)
+{
+	unsigned long flags;
+
+	LAZY_LOCK(flags);
+
+	if (TxAnchor.unlock_tail)
+		TxAnchor.unlock_tail->cqnext = tblk;
+	else
+		TxAnchor.unlock_queue = tblk;
+	TxAnchor.unlock_tail = tblk;
+	tblk->cqnext = 0;
+	LAZY_UNLOCK(flags);
+	wake_up_process(jfsCommitTask);
+}
+
+static void LogSyncRelease(metapage_t * mp)
+{
+	log_t *log = mp->log;
+
+	assert(atomic_read(&mp->nohomeok));
+	assert(log);
+	atomic_dec(&mp->nohomeok);
+
+	if (atomic_read(&mp->nohomeok))
+		return;
+
+	hold_metapage(mp, 0);
+
+	LOGSYNC_LOCK(log);
+	mp->log = NULL;
+	mp->lsn = 0;
+	mp->clsn = 0;
+	log->count--;
+	list_del_init(&mp->synclist);
+	LOGSYNC_UNLOCK(log);
+
+	release_metapage(mp);
+}
+
+/*
+ *      jfs_sync(void)
+ *
+ *	To be run as a kernel daemon.  This is awakened when tlocks run low.
+ *	We write any inodes that have anonymous tlocks so they will become
+ *	available.
+ */
+int jfs_sync(void)
+{
+	struct inode *ip;
+	struct jfs_inode_info *jfs_ip;
+
+	lock_kernel();
+
+	daemonize();
+	current->tty = NULL;
+	strcpy(current->comm, "jfsSync");
+
+	unlock_kernel();
+
+	jfsSyncTask = current;
+
+	spin_lock_irq(&current->sigmask_lock);
+	siginitsetinv(&current->blocked,
+		      sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP)
+		      | sigmask(SIGCONT));
+	spin_unlock_irq(&current->sigmask_lock);
+
+	complete(&jfsIOwait);
+
+	do {
+		/*
+		 * write each inode on the anonymous inode list
+		 */
+		TXN_LOCK();
+		while (TlocksLow && !list_empty(&TxAnchor.anon_list)) {
+			jfs_ip = list_entry(TxAnchor.anon_list.next,
+					    struct jfs_inode_info,
+					    anon_inode_list);
+			ip = jfs_ip->inode;
+
+			/*
+			 * We must release the TXN_LOCK since our
+			 * IWRITE_TRYLOCK implementation may still block
+			 */
+			TXN_UNLOCK();
+			if (IWRITE_TRYLOCK(ip)) {
+				/*
+				 * inode will be removed from anonymous list
+				 * when it is committed
+				 */
+				jfs_commit_inode(ip, 0);
+				IWRITE_UNLOCK(ip);
+				/*
+				 * Just to be safe.  I don't know how
+				 * long we can run without blocking
+				 */
+				if (current->need_resched)
+					schedule();
+				TXN_LOCK();
+			} else {
+				/* We can't get the write lock.  It may
+				 * be held by a thread waiting for tlock's
+				 * so let's not block here.  Save it to
+				 * put back on the anon_list.
+				 */
+
+				/*
+				 * We released TXN_LOCK, let's make sure
+				 * this inode is still there
+				 */
+				TXN_LOCK();
+				if (TxAnchor.anon_list.next !=
+				    &jfs_ip->anon_inode_list)
+					continue;
+
+				/* Take off anon_list */
+				list_del(&jfs_ip->anon_inode_list);
+
+				/* Put on anon_list2 */
+				list_add(&jfs_ip->anon_inode_list,
+					 &TxAnchor.anon_list2);
+			}
+		}
+		/* Add anon_list2 back to anon_list */
+		if (!list_empty(&TxAnchor.anon_list2)) {
+			list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
+			INIT_LIST_HEAD(&TxAnchor.anon_list2);
+		}
+		TXN_UNLOCK();
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	} while (!jfs_thread_stopped());
+
+	jFYI(1, ("jfs_sync being killed\n"));
+	complete(&jfsIOwait);
+	return 0;
+}
+
+#if CONFIG_PROC_FS
+int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length,
+		      int *eof, void *data)
+{
+	int len = 0;
+	off_t begin;
+	char *freewait;
+	char *freelockwait;
+	char *lowlockwait;
+
+	freewait =
+	    waitqueue_active(&TxAnchor.freewait) ? "active" : "empty";
+	freelockwait =
+	    waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty";
+	lowlockwait =
+	    waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty";
+
+	len += sprintf(buffer,
+		       "JFS TxAnchor\n"
+		       "============\n"
+		       "freetid = %d\n"
+		       "freewait = %s\n"
+		       "freelock = %d\n"
+		       "freelockwait = %s\n"
+		       "lowlockwait = %s\n"
+		       "tlocksInUse = %d\n"
+		       "unlock_queue = 0x%p\n"
+		       "unlock_tail = 0x%p\n",
+		       TxAnchor.freetid,
+		       freewait,
+		       TxAnchor.freelock,
+		       freelockwait,
+		       lowlockwait,
+		       TxAnchor.tlocksInUse,
+		       TxAnchor.unlock_queue,
+		       TxAnchor.unlock_tail);
+
+	begin = offset;
+	*start = buffer + begin;
+	len -= begin;
+
+	if (len > length)
+		len = length;
+	else
+		*eof = 1;
+
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+#endif
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_txnmgr.h linuxppc64_2_4/fs/jfs/jfs_txnmgr.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_txnmgr.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_txnmgr.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,315 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * Change History :
+ *
+ */
+
+#ifndef _H_JFS_TXNMGR
+#define _H_JFS_TXNMGR
+/*
+ *	jfs_txnmgr.h: transaction manager
+ */
+
+#include "jfs_logmgr.h"
+
+/*
+ * Hide implementation of TxBlock and TxLock
+ */
+#define tid_to_tblock(tid) (&TxBlock[tid])
+
+#define lid_to_tlock(lid) (&TxLock[lid])
+
+/*
+ *	transaction block
+ */
+typedef struct tblock {
+	/*
+	 * tblock_t and jbuf_t common area: struct logsyncblk
+	 *
+	 * the following 5 fields are the same as struct logsyncblk
+	 * which is common to tblock and jbuf to form logsynclist
+	 */
+	u16 xflag;		/* tx commit type */
+	u16 flag;		/* tx commit state */
+	lid_t dummy;		/* Must keep structures common */
+	s32 lsn;		/* recovery lsn */
+	struct list_head synclist;	/* logsynclist link */
+
+	/* lock management */
+	struct super_block *sb;	/* 4: super block */
+	lid_t next;		/* 2: index of first tlock of tid */
+	lid_t last;		/* 2: index of last tlock of tid */
+	wait_queue_head_t waitor;	/* 4: tids waiting on this tid */
+
+	/* log management */
+	u32 logtid;		/* 4: log transaction id */
+				/* (32) */
+
+	/* commit management */
+	struct tblock *cqnext;	/* 4: commit queue link */
+	s32 clsn;		/* 4: commit lsn */
+	struct lbuf *bp;	/* 4: */
+	s32 pn;			/* 4: commit record log page number */
+	s32 eor;		/* 4: commit record eor */
+	wait_queue_head_t gcwait;	/* 4: group commit event list:
+					 *    ready transactions wait on this
+					 *    event for group commit completion.
+					 */
+	struct inode *ip;	/* 4: inode being created or deleted */
+	s32 rsrvd;		/* 4: */
+} tblock_t;			/* (64) */
+
+extern struct tblock *TxBlock;	/* transaction block table */
+
+/* commit flags: tblk->xflag */
+#define	COMMIT_SYNC	0x0001	/* synchronous commit */
+#define	COMMIT_FORCE	0x0002	/* force pageout at end of commit */
+#define	COMMIT_FLUSH	0x0004	/* init flush at end of commit */
+#define COMMIT_MAP	0x00f0
+#define	COMMIT_PMAP	0x0010	/* update pmap */
+#define	COMMIT_WMAP	0x0020	/* update wmap */
+#define	COMMIT_PWMAP	0x0040	/* update pwmap */
+#define	COMMIT_FREE	0x0f00
+#define	COMMIT_DELETE	0x0100	/* inode delete */
+#define	COMMIT_TRUNCATE	0x0200	/* file truncation */
+#define	COMMIT_CREATE	0x0400	/* inode create */
+#define	COMMIT_LAZY	0x0800	/* lazy commit */
+#define COMMIT_PAGE	0x1000	/* Identifies element as metapage */
+#define COMMIT_INODE	0x2000	/* Identifies element as inode */
+
+/* group commit flags tblk->flag: see jfs_logmgr.h */
+
+/*
+ *	transaction lock
+ */
+typedef struct tlock {
+	lid_t next;		/* index next lockword on tid locklist
+				 *          next lockword on freelist
+				 */
+	tid_t tid;		/* transaction id holding lock */
+
+	u16 flag;		/* 2: lock control */
+	u16 type;		/* 2: log type */
+
+	struct metapage *mp;	/* 4: object page buffer locked */
+	struct inode *ip;	/* 4: object */
+	/* (16) */
+
+	s16 lock[24];		/* 48: overlay area */
+} tlock_t;			/* (64) */
+
+extern struct tlock *TxLock;	/* transaction lock table */
+
+/*
+ * tlock flag
+ */
+/* txLock state */
+#define tlckPAGELOCK		0x8000
+#define tlckINODELOCK		0x4000
+#define tlckLINELOCK		0x2000
+#define tlckINLINELOCK		0x1000
+/* lmLog state */
+#define tlckLOG			0x0800
+/* updateMap state */
+#define	tlckUPDATEMAP		0x0080
+/* freeLock state */
+#define tlckFREELOCK		0x0008
+#define tlckWRITEPAGE		0x0004
+#define tlckFREEPAGE		0x0002
+
+/*
+ * tlock type
+ */
+#define	tlckTYPE		0xfe00
+#define	tlckINODE		0x8000
+#define	tlckXTREE		0x4000
+#define	tlckDTREE		0x2000
+#define	tlckMAP			0x1000
+#define	tlckEA			0x0800
+#define	tlckACL			0x0400
+#define	tlckDATA		0x0200
+#define	tlckBTROOT		0x0100
+
+#define	tlckOPERATION		0x00ff
+#define tlckGROW		0x0001	/* file grow */
+#define tlckREMOVE		0x0002	/* file delete */
+#define tlckTRUNCATE		0x0004	/* file truncate */
+#define tlckRELOCATE		0x0008	/* file/directory relocate */
+#define tlckENTRY		0x0001	/* directory insert/delete */
+#define tlckEXTEND		0x0002	/* directory extend in-line */
+#define tlckSPLIT		0x0010	/* splited page */
+#define tlckNEW			0x0020	/* new page from split */
+#define tlckFREE		0x0040	/* free page */
+#define tlckRELINK		0x0080	/* update sibling pointer */
+
+/*
+ *	linelock for lmLog()
+ *
+ * note: linelock_t and its variations are overlaid
+ * at tlock.lock: watch for alignment;
+ */
+typedef struct {
+	u8 offset;		/* 1: */
+	u8 length;		/* 1: */
+} lv_t;				/* (2) */
+
+#define	TLOCKSHORT	20
+#define	TLOCKLONG	28
+
+typedef struct {
+	u16 next;		/* 2: next linelock */
+
+	s8 maxcnt;		/* 1: */
+	s8 index;		/* 1: */
+
+	u16 flag;		/* 2: */
+	u8 type;		/* 1: */
+	u8 l2linesize;		/* 1: log2 of linesize */
+	/* (8) */
+
+	lv_t lv[20];		/* 40: */
+} linelock_t;			/* (48) */
+
+#define dtlock_t	linelock_t
+#define itlock_t	linelock_t
+
+typedef struct {
+	u16 next;		/* 2: */
+
+	s8 maxcnt;		/* 1: */
+	s8 index;		/* 1: */
+
+	u16 flag;		/* 2: */
+	u8 type;		/* 1: */
+	u8 l2linesize;		/* 1: log2 of linesize */
+				/* (8) */
+
+	lv_t header;		/* 2: */
+	lv_t lwm;		/* 2: low water mark */
+	lv_t hwm;		/* 2: high water mark */
+	lv_t twm;		/* 2: */
+				/* (16) */
+
+	s32 pxdlock[8];		/* 32: */
+} xtlock_t;			/* (48) */
+
+
+/*
+ *	maplock for txUpdateMap()
+ *
+ * note: maplock_t and its variations are overlaid
+ * at tlock.lock/linelock: watch for alignment;
+ * N.B. next field may be set by linelock, and should not
+ * be modified by maplock;
+ * N.B. index of the first pxdlock specifies index of next 
+ * free maplock (i.e., number of maplock) in the tlock; 
+ */
+typedef struct {
+	u16 next;		/* 2: */
+
+	u8 maxcnt;		/* 2: */
+	u8 index;		/* 2: next free maplock index */
+
+	u16 flag;		/* 2: */
+	u8 type;		/* 1: */
+	u8 count;		/* 1: number of pxd/xad */
+				/* (8) */
+
+	pxd_t pxd;		/* 8: */
+} maplock_t;			/* (16): */
+
+/* maplock flag */
+#define	mlckALLOC		0x00f0
+#define	mlckALLOCXADLIST	0x0080
+#define	mlckALLOCPXDLIST	0x0040
+#define	mlckALLOCXAD		0x0020
+#define	mlckALLOCPXD		0x0010
+#define	mlckFREE		0x000f
+#define	mlckFREEXADLIST		0x0008
+#define	mlckFREEPXDLIST		0x0004
+#define	mlckFREEXAD		0x0002
+#define	mlckFREEPXD		0x0001
+
+#define	pxdlock_t	maplock_t
+
+typedef struct {
+	u16 next;		/* 2: */
+
+	u8 maxcnt;		/* 2: */
+	u8 index;		/* 2: */
+
+	u16 flag;		/* 2: */
+	u8 type;		/* 1: */
+	u8 count;		/* 1: number of pxd/xad */
+				/* (8) */
+
+	void *xdlist;		/* 4: pxd/xad list */
+	s32 rsrvd;		/* 4: */
+} xdlistlock_t;			/* (16): */
+
+
+/*
+ *	commit
+ *
+ * parameter to the commit manager routines
+ */
+typedef struct commit {
+	tid_t tid;		/* 4: tid = index of tblock */
+	int flag;		/* 4: flags */
+	log_t *log;		/* 4: log */
+	struct super_block *sb;	/* 4: superblock */
+
+	int nip;		/* 4: number of entries in iplist */
+	struct inode **iplist;	/* 4: list of pointers to inodes */
+				/* (32) */
+
+	/* log record descriptor on 64-bit boundary */
+	lrd_t lrd;		/* : log record descriptor */
+} commit_t;
+
+/*
+ * external declarations
+ */
+extern tlock_t *txLock(tid_t tid, struct inode *ip, struct metapage *mp, int flag);
+
+extern tlock_t *txMaplock(tid_t tid, struct inode *ip, int flag);
+
+extern int txCommit(tid_t tid, int nip, struct inode **iplist, int flag);
+
+extern tid_t txBegin(struct super_block *sb, int flag);
+
+extern void txBeginAnon(struct super_block *sb);
+
+extern void txEnd(tid_t tid);
+
+extern void txAbort(tid_t tid, int dirty);
+
+extern linelock_t *txLinelock(linelock_t * tlock);
+
+extern void txFreeMap(struct inode *ip,
+		      maplock_t * maplock, tblock_t * tblk, int maptype);
+
+extern void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea);
+
+extern void txFreelock(struct inode *ip);
+
+extern int lmLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck);
+
+#endif				/* _H_JFS_TXNMGR */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_types.h linuxppc64_2_4/fs/jfs/jfs_types.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_types.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_types.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,188 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _H_JFS_TYPES
+#define	_H_JFS_TYPES
+
+/*
+ *	jfs_types.h:
+ *
+ * basic type/utility  definitions
+ *
+ * note: this header file must be the 1st include file
+ * of JFS include list in all JFS .c file.
+ */
+
+#include <linux/types.h>
+#include <linux/nls.h>
+
+#include "endian24.h"
+#include "jfs_compat.h"
+
+/*
+ * transaction and lock id's
+ */
+typedef uint tid_t;
+typedef uint lid_t;
+
+/*
+ * Almost identical to Linux's timespec, but not quite
+ */
+struct timestruc_t {
+	u32 tv_sec;
+	u32 tv_nsec;
+};
+
+/*
+ *	handy
+ */
+
+#define LEFTMOSTONE	0x80000000
+#define	HIGHORDER	0x80000000u	/* high order bit on            */
+#define	ONES		0xffffffffu	/* all bit on                   */
+
+typedef int boolean_t;
+#define TRUE 1
+#define FALSE 0
+
+/*
+ *	logical xd (lxd)
+ */
+typedef struct {
+	unsigned len:24;
+	unsigned off1:8;
+	u32 off2;
+} lxd_t;
+
+/* lxd_t field construction */
+#define	LXDlength(lxd, length32)	( (lxd)->len = length32 )
+#define	LXDoffset(lxd, offset64)\
+{\
+	(lxd)->off1 = ((s64)offset64) >> 32;\
+	(lxd)->off2 = (offset64) & 0xffffffff;\
+}
+
+/* lxd_t field extraction */
+#define	lengthLXD(lxd)	( (lxd)->len )
+#define	offsetLXD(lxd)\
+	( ((s64)((lxd)->off1)) << 32 | (lxd)->off2 )
+
+/* lxd list */
+typedef struct {
+	s16 maxnlxd;
+	s16 nlxd;
+	lxd_t *lxd;
+} lxdlist_t;
+
+/*
+ *	physical xd (pxd)
+ */
+typedef struct {
+	unsigned len:24;
+	unsigned addr1:8;
+	u32 addr2;
+} pxd_t;
+
+/* xd_t field construction */
+
+#define	PXDlength(pxd, length32)	((pxd)->len = __cpu_to_le24(length32))
+#define	PXDaddress(pxd, address64)\
+{\
+	(pxd)->addr1 = ((s64)address64) >> 32;\
+	(pxd)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
+}
+
+/* xd_t field extraction */
+#define	lengthPXD(pxd)	__le24_to_cpu((pxd)->len)
+#define	addressPXD(pxd)\
+	( ((s64)((pxd)->addr1)) << 32 | __le32_to_cpu((pxd)->addr2))
+
+/* pxd list */
+typedef struct {
+	s16 maxnpxd;
+	s16 npxd;
+	pxd_t pxd[8];
+} pxdlist_t;
+
+
+/*
+ *	data extent descriptor (dxd)
+ */
+typedef struct {
+	unsigned flag:8;	/* 1: flags */
+	unsigned rsrvd:24;	/* 3: */
+	u32 size;		/* 4: size in byte */
+	unsigned len:24;	/* 3: length in unit of fsblksize */
+	unsigned addr1:8;	/* 1: address in unit of fsblksize */
+	u32 addr2;		/* 4: address in unit of fsblksize */
+} dxd_t;			/* - 16 - */
+
+/* dxd_t flags */
+#define	DXD_INDEX	0x80	/* B+-tree index */
+#define	DXD_INLINE	0x40	/* in-line data extent */
+#define	DXD_EXTENT	0x20	/* out-of-line single extent */
+#define	DXD_FILE	0x10	/* out-of-line file (inode) */
+#define DXD_CORRUPT	0x08	/* Inconsistency detected */
+
+/* dxd_t field construction
+ *	Conveniently, the PXD macros work for DXD
+ */
+#define	DXDlength	PXDlength
+#define	DXDaddress	PXDaddress
+#define	lengthDXD	lengthPXD
+#define	addressDXD	addressPXD
+
+/*
+ *      directory entry argument
+ */
+typedef struct component_name {
+	int namlen;
+	wchar_t *name;
+} component_t;
+
+
+/*
+ *	DASD limit information - stored in directory inode
+ */
+typedef struct dasd {
+	u8 thresh;		/* Alert Threshold (in percent) */
+	u8 delta;		/* Alert Threshold delta (in percent)   */
+	u8 rsrvd1;
+	u8 limit_hi;		/* DASD limit (in logical blocks)       */
+	u32 limit_lo;		/* DASD limit (in logical blocks)       */
+	u8 rsrvd2[3];
+	u8 used_hi;		/* DASD usage (in logical blocks)       */
+	u32 used_lo;		/* DASD usage (in logical blocks)       */
+} dasd_t;
+
+#define DASDLIMIT(dasdp) \
+	(((u64)((dasdp)->limit_hi) << 32) + __le32_to_cpu((dasdp)->limit_lo))
+#define setDASDLIMIT(dasdp, limit)\
+{\
+	(dasdp)->limit_hi = ((u64)limit) >> 32;\
+	(dasdp)->limit_lo = __cpu_to_le32(limit);\
+}
+#define DASDUSED(dasdp) \
+	(((u64)((dasdp)->used_hi) << 32) + __le32_to_cpu((dasdp)->used_lo))
+#define setDASDUSED(dasdp, used)\
+{\
+	(dasdp)->used_hi = ((u64)used) >> 32;\
+	(dasdp)->used_lo = __cpu_to_le32(used);\
+}
+
+#endif				/* !_H_JFS_TYPES */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_umount.c linuxppc64_2_4/fs/jfs/jfs_umount.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_umount.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_umount.c	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,158 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * Change History :
+ */
+
+/*
+ *	jfs_umount.c
+ *
+ * note: file system in transition to aggregate/fileset:
+ * (ref. jfs_mount.c)
+ *
+ * file system unmount is interpreted as mount of the single/only 
+ * fileset in the aggregate and, if unmount of the last fileset, 
+ * as unmount of the aggerate;
+ */
+
+#include <linux/fs.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_superblock.h"
+#include "jfs_dmap.h"
+#include "jfs_imap.h"
+#include "jfs_metapage.h"
+#include "jfs_debug.h"
+
+/*
+ * NAME:	jfs_umount(vfsp, flags, crp)
+ *
+ * FUNCTION:	vfs_umount()
+ *
+ * PARAMETERS:	vfsp	- virtual file system pointer
+ *		flags	- unmount for shutdown
+ *		crp	- credential
+ *
+ * RETURN :	EBUSY	- device has open files
+ */
+int jfs_umount(struct super_block *sb)
+{
+	int rc = 0;
+	log_t *log;
+	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	struct inode *ipbmap = sbi->ipbmap;
+	struct inode *ipimap = sbi->ipimap;
+	struct inode *ipaimap = sbi->ipaimap;
+	struct inode *ipaimap2 = sbi->ipaimap2;
+
+	jFYI(1, ("\n	UnMount JFS: sb:0x%p\n", sb));
+
+	/*
+	 *      update superblock and close log 
+	 *
+	 * if mounted read-write and log based recovery was enabled
+	 */
+	if ((log = sbi->log)) {
+		/*
+		 * close log: 
+		 *
+		 * remove file system from log active file system list.
+		 */
+		log = sbi->log;
+		rc = lmLogClose(sb, log);
+	}
+
+	/*
+	 * close fileset inode allocation map (aka fileset inode)
+	 */
+	jEVENT(0, ("jfs_umount: close ipimap:0x%p\n", ipimap));
+	diUnmount(ipimap, 0);
+
+	diFreeSpecial(ipimap);
+	sbi->ipimap = NULL;
+
+	/*
+	 * close secondary aggregate inode allocation map
+	 */
+	ipaimap2 = sbi->ipaimap2;
+	if (ipaimap2) {
+		jEVENT(0, ("jfs_umount: close ipaimap2:0x%p\n", ipaimap2));
+		diUnmount(ipaimap2, 0);
+		diFreeSpecial(ipaimap2);
+		sbi->ipaimap2 = NULL;
+	}
+
+	/*
+	 * close aggregate inode allocation map
+	 */
+	ipaimap = sbi->ipaimap;
+	jEVENT(0, ("jfs_umount: close ipaimap:0x%p\n", ipaimap));
+	diUnmount(ipaimap, 0);
+	diFreeSpecial(ipaimap);
+	sbi->ipaimap = NULL;
+
+	/*
+	 * close aggregate block allocation map
+	 */
+	jEVENT(0, ("jfs_umount: close ipbmap:%p\n", ipbmap));
+	dbUnmount(ipbmap, 0);
+
+	diFreeSpecial(ipbmap);
+	sbi->ipimap = NULL;
+
+	/*
+	 * ensure all file system file pages are propagated to their
+	 * home blocks on disk (and their in-memory buffer pages are 
+	 * invalidated) BEFORE updating file system superblock state
+	 * (to signify file system is unmounted cleanly, and thus in 
+	 * consistent state) and log superblock active file system 
+	 * list (to signify skip logredo()).
+	 */
+	if (log)		/* log = NULL if read-only mount */
+		rc = updateSuper(sb, FM_CLEAN);
+
+
+	jFYI(0, ("	UnMount JFS Complete: %d\n", rc));
+	return rc;
+}
+
+
+int jfs_umount_rw(struct super_block *sb)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(sb);
+
+	if (!sbi->log)
+		return 0;
+
+	/*
+	 * close log: 
+	 *
+	 * remove file system from log active file system list.
+	 */
+	lmLogClose(sb, sbi->log);
+
+	dbSync(sbi->ipbmap);
+	diSync(sbi->ipimap);
+
+	sbi->log = 0;
+
+	return updateSuper(sb, FM_CLEAN);
+       
+}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_unicode.c linuxppc64_2_4/fs/jfs/jfs_unicode.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_unicode.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_unicode.c	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,110 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include "jfs_types.h"
+#include "jfs_filsys.h"
+#include "jfs_unicode.h"
+#include "jfs_debug.h"
+
+/*
+ * NAME:	jfs_strfromUCS()
+ *
+ * FUNCTION:	Convert little-endian unicode string to character string
+ *
+ */
+int jfs_strfromUCS_le(char *to, const wchar_t * from,	/* LITTLE ENDIAN */
+		      int len, struct nls_table *codepage)
+{
+	int i;
+	int outlen = 0;
+
+	for (i = 0; (i < len) && from[i]; i++) {
+		int charlen;
+		charlen =
+		    codepage->uni2char(le16_to_cpu(from[i]), &to[outlen],
+				       NLS_MAX_CHARSET_SIZE);
+		if (charlen > 0) {
+			outlen += charlen;
+		} else {
+			to[outlen++] = '?';
+		}
+	}
+	to[outlen] = 0;
+	jEVENT(0, ("jfs_strfromUCS returning %d - '%s'\n", outlen, to));
+	return outlen;
+}
+
+/*
+ * NAME:	jfs_strtoUCS()
+ *
+ * FUNCTION:	Convert character string to unicode string
+ *
+ */
+int jfs_strtoUCS(wchar_t * to,
+		 const char *from, int len, struct nls_table *codepage)
+{
+	int charlen;
+	int i;
+
+	jEVENT(0, ("jfs_strtoUCS - '%s'\n", from));
+
+	for (i = 0; len && *from; i++, from += charlen, len -= charlen) {
+		charlen = codepage->char2uni(from, len, &to[i]);
+		if (charlen < 1) {
+			jERROR(1, ("jfs_strtoUCS: char2uni returned %d.\n",
+				   charlen));
+			jERROR(1, ("charset = %s, char = 0x%x\n",
+				   codepage->charset, (unsigned char) *from));
+			to[i] = 0x003f;	/* a question mark */
+			charlen = 1;
+		}
+	}
+
+	jEVENT(0, (" returning %d\n", i));
+
+	to[i] = 0;
+	return i;
+}
+
+/*
+ * NAME:	get_UCSname()
+ *
+ * FUNCTION:	Allocate and translate to unicode string
+ *
+ */
+int get_UCSname(component_t * uniName, struct dentry *dentry,
+		struct nls_table *nls_tab)
+{
+	int length = dentry->d_name.len;
+
+	if (length > JFS_NAME_MAX)
+		return ENAMETOOLONG;
+
+	uniName->name =
+	    kmalloc((length + 1) * sizeof(wchar_t), GFP_NOFS);
+
+	if (uniName->name == NULL)
+		return ENOSPC;
+
+	uniName->namlen = jfs_strtoUCS(uniName->name, dentry->d_name.name,
+				       length, nls_tab);
+
+	return 0;
+}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_unicode.h linuxppc64_2_4/fs/jfs/jfs_unicode.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_unicode.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_unicode.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,143 @@
+/*
+ * unistrk:  Unicode kernel case support
+ *
+ * Function:
+ *     Convert a unicode character to upper or lower case using
+ *     compressed tables.
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ */
+
+#include <asm/byteorder.h>
+#include "jfs_types.h"
+
+typedef struct {
+	wchar_t start;
+	wchar_t end;
+	signed char *table;
+} UNICASERANGE;
+
+extern signed char UniUpperTable[512];
+extern UNICASERANGE UniUpperRange[];
+extern int get_UCSname(component_t *, struct dentry *, struct nls_table *);
+extern int jfs_strfromUCS_le(char *, const wchar_t *, int, struct nls_table *);
+
+#define free_UCSname(COMP) kfree((COMP)->name)
+
+/*
+ * UniStrcpy:  Copy a string
+ */
+static inline wchar_t *UniStrcpy(wchar_t * ucs1, const wchar_t * ucs2)
+{
+	wchar_t *anchor = ucs1;	/* save the start of result string */
+
+	while ((*ucs1++ = *ucs2++));
+	return anchor;
+}
+
+
+
+/*
+ * UniStrncpy:  Copy length limited string with pad
+ */
+static inline wchar_t *UniStrncpy(wchar_t * ucs1, const wchar_t * ucs2,
+				  size_t n)
+{
+	wchar_t *anchor = ucs1;
+
+	while (n-- && *ucs2)	/* Copy the strings */
+		*ucs1++ = *ucs2++;
+
+	n++;
+	while (n--)		/* Pad with nulls */
+		*ucs1++ = 0;
+	return anchor;
+}
+
+/*
+ * UniStrncmp_le:  Compare length limited string - native to little-endian
+ */
+static inline int UniStrncmp_le(const wchar_t * ucs1, const wchar_t * ucs2,
+				size_t n)
+{
+	if (!n)
+		return 0;	/* Null strings are equal */
+	while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
+		ucs1++;
+		ucs2++;
+	}
+	return (int) *ucs1 - (int) __le16_to_cpu(*ucs2);
+}
+
+/*
+ * UniStrncpy_le:  Copy length limited string with pad to little-endian
+ */
+static inline wchar_t *UniStrncpy_le(wchar_t * ucs1, const wchar_t * ucs2,
+				     size_t n)
+{
+	wchar_t *anchor = ucs1;
+
+	while (n-- && *ucs2)	/* Copy the strings */
+		*ucs1++ = __le16_to_cpu(*ucs2++);
+
+	n++;
+	while (n--)		/* Pad with nulls */
+		*ucs1++ = 0;
+	return anchor;
+}
+
+
+/*
+ * UniToupper:  Convert a unicode character to upper case
+ */
+static inline wchar_t UniToupper(register wchar_t uc)
+{
+	register UNICASERANGE *rp;
+
+	if (uc < sizeof(UniUpperTable)) {	/* Latin characters */
+		return uc + UniUpperTable[uc];	/* Use base tables */
+	} else {
+		rp = UniUpperRange;	/* Use range tables */
+		while (rp->start) {
+			if (uc < rp->start)	/* Before start of range */
+				return uc;	/* Uppercase = input */
+			if (uc <= rp->end)	/* In range */
+				return uc + rp->table[uc - rp->start];
+			rp++;	/* Try next range */
+		}
+	}
+	return uc;		/* Past last range */
+}
+
+
+/*
+ * UniStrupr:  Upper case a unicode string
+ */
+static inline wchar_t *UniStrupr(register wchar_t * upin)
+{
+	register wchar_t *up;
+
+	up = upin;
+	while (*up) {		/* For all characters */
+		*up = UniToupper(*up);
+		up++;
+	}
+	return upin;		/* Return input pointer */
+}
+
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_uniupr.c linuxppc64_2_4/fs/jfs/jfs_uniupr.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_uniupr.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_uniupr.c	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,137 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * jfs_uniupr.c - Unicode compressed case ranges
+ *
+*/
+
+#include <linux/fs.h>
+#include "jfs_unicode.h"
+
+/*
+ * Latin upper case
+ */
+signed char UniUpperTable[512] = {
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 000-00f */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 010-01f */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 020-02f */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 030-03f */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 040-04f */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 050-05f */
+   0,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 060-06f */
+ -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,  0,  0,  0,  0,  0, /* 070-07f */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 080-08f */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 090-09f */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 0a0-0af */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 0b0-0bf */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 0c0-0cf */
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 0d0-0df */
+ -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 0e0-0ef */
+ -32,-32,-32,-32,-32,-32,-32,  0,-32,-32,-32,-32,-32,-32,-32,121, /* 0f0-0ff */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 100-10f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 110-11f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 120-12f */
+   0,  0,  0, -1,  0, -1,  0, -1,  0,  0, -1,  0, -1,  0, -1,  0, /* 130-13f */
+  -1,  0, -1,  0, -1,  0, -1,  0, -1,  0,  0, -1,  0, -1,  0, -1, /* 140-14f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 150-15f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 160-16f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0,  0, -1,  0, -1,  0, -1,  0, /* 170-17f */
+   0,  0,  0, -1,  0, -1,  0,  0, -1,  0,  0,  0, -1,  0,  0,  0, /* 180-18f */
+   0,  0, -1,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0, /* 190-19f */
+   0, -1,  0, -1,  0, -1,  0,  0, -1,  0,  0,  0,  0, -1,  0,  0, /* 1a0-1af */
+  -1,  0,  0,  0, -1,  0, -1,  0,  0, -1,  0,  0,  0, -1,  0,  0, /* 1b0-1bf */
+   0,  0,  0,  0,  0, -1, -2,  0, -1, -2,  0, -1, -2,  0, -1,  0, /* 1c0-1cf */
+  -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,-79,  0, -1, /* 1d0-1df */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1e0-1ef */
+   0,  0, -1, -2,  0, -1,  0,  0,  0, -1,  0, -1,  0, -1,  0, -1, /* 1f0-1ff */
+};
+
+/* Upper case range - Greek */
+static signed char UniCaseRangeU03a0[47] = {
+   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,-38,-37,-37,-37, /* 3a0-3af */
+   0,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 3b0-3bf */
+ -32,-32,-31,-32,-32,-32,-32,-32,-32,-32,-32,-32,-64,-63,-63,
+};
+
+/* Upper case range - Cyrillic */
+static signed char UniCaseRangeU0430[48] = {
+ -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 430-43f */
+ -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 440-44f */
+   0,-80,-80,-80,-80,-80,-80,-80,-80,-80,-80,-80,-80,  0,-80,-80, /* 450-45f */
+};
+
+/* Upper case range - Extended cyrillic */
+static signed char UniCaseRangeU0490[61] = {
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 490-49f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 4a0-4af */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 4b0-4bf */
+   0,  0, -1,  0, -1,  0,  0,  0, -1,  0,  0,  0, -1,
+};
+
+/* Upper case range - Extended latin and greek */
+static signed char UniCaseRangeU1e00[509] = {
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1e00-1e0f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1e10-1e1f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1e20-1e2f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1e30-1e3f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1e40-1e4f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1e50-1e5f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1e60-1e6f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1e70-1e7f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1e80-1e8f */
+   0, -1,  0, -1,  0, -1,  0,  0,  0,  0,  0,-59,  0, -1,  0, -1, /* 1e90-1e9f */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1ea0-1eaf */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1eb0-1ebf */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1ec0-1ecf */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1ed0-1edf */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0, -1, /* 1ee0-1eef */
+   0, -1,  0, -1,  0, -1,  0, -1,  0, -1,  0,  0,  0,  0,  0,  0, /* 1ef0-1eff */
+   8,  8,  8,  8,  8,  8,  8,  8,  0,  0,  0,  0,  0,  0,  0,  0, /* 1f00-1f0f */
+   8,  8,  8,  8,  8,  8,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 1f10-1f1f */
+   8,  8,  8,  8,  8,  8,  8,  8,  0,  0,  0,  0,  0,  0,  0,  0, /* 1f20-1f2f */
+   8,  8,  8,  8,  8,  8,  8,  8,  0,  0,  0,  0,  0,  0,  0,  0, /* 1f30-1f3f */
+   8,  8,  8,  8,  8,  8,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 1f40-1f4f */
+   0,  8,  0,  8,  0,  8,  0,  8,  0,  0,  0,  0,  0,  0,  0,  0, /* 1f50-1f5f */
+   8,  8,  8,  8,  8,  8,  8,  8,  0,  0,  0,  0,  0,  0,  0,  0, /* 1f60-1f6f */
+  74, 74, 86, 86, 86, 86,100,100,  0,  0,112,112,126,126,  0,  0, /* 1f70-1f7f */
+   8,  8,  8,  8,  8,  8,  8,  8,  0,  0,  0,  0,  0,  0,  0,  0, /* 1f80-1f8f */
+   8,  8,  8,  8,  8,  8,  8,  8,  0,  0,  0,  0,  0,  0,  0,  0, /* 1f90-1f9f */
+   8,  8,  8,  8,  8,  8,  8,  8,  0,  0,  0,  0,  0,  0,  0,  0, /* 1fa0-1faf */
+   8,  8,  0,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 1fb0-1fbf */
+   0,  0,  0,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 1fc0-1fcf */
+   8,  8,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 1fd0-1fdf */
+   8,  8,  0,  0,  0,  7,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* 1fe0-1fef */
+   0,  0,  0,  9,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+};
+
+/* Upper case range - Wide latin */
+static signed char UniCaseRangeUff40[27] = {
+   0,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* ff40-ff4f */
+ -32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,
+};
+
+/*
+ * Upper Case Range
+ */
+UNICASERANGE UniUpperRange[] = {
+    { 0x03a0,  0x03ce,  UniCaseRangeU03a0 },
+    { 0x0430,  0x045f,  UniCaseRangeU0430 },
+    { 0x0490,  0x04cc,  UniCaseRangeU0490 },
+    { 0x1e00,  0x1ffc,  UniCaseRangeU1e00 },
+    { 0xff40,  0xff5a,  UniCaseRangeUff40 },
+    { 0, 0, 0 }
+};
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_xtree.c linuxppc64_2_4/fs/jfs/jfs_xtree.c
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_xtree.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_xtree.c	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,4444 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/*
+ *      jfs_xtree.c: extent allocation descriptor B+-tree manager
+ */
+
+#include <linux/fs.h>
+#include <linux/locks.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_metapage.h"
+#include "jfs_dmap.h"
+#include "jfs_dinode.h"
+#include "jfs_superblock.h"
+#include "jfs_debug.h"
+
+/*
+ * xtree local flag
+ */
+#define XT_INSERT       0x00000001
+
+/*
+ *       xtree key/entry comparison: extent offset
+ *
+ * return:
+ *      -1: k < start of extent
+ *       0: start_of_extent <= k <= end_of_extent
+ *       1: k > end_of_extent
+ */
+#define XT_CMP(CMP, K, X, OFFSET64)\
+{\
+        OFFSET64 = offsetXAD(X);\
+        (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
+              ((K) < OFFSET64) ? -1 : 0;\
+}
+
+/* write a xad entry */
+#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
+{\
+        (XAD)->flag = (FLAG);\
+        XADoffset((XAD), (OFF));\
+        XADlength((XAD), (LEN));\
+        XADaddress((XAD), (ADDR));\
+}
+
+#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
+
+/* get page buffer for specified block address */
+#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
+{\
+        BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\
+        if (!(RC))\
+        {\
+                if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\
+                    (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\
+                    (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\
+                {\
+                        jERROR(1,("XT_GETPAGE: xtree page corrupt\n"));\
+			BT_PUTPAGE(MP);\
+			updateSuper((IP)->i_sb, FM_DIRTY);\
+			MP = NULL;\
+                        RC = EIO;\
+                }\
+        }\
+}
+
+/* for consistency */
+#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
+
+#define XT_GETSEARCH(IP, LEAF, BN, MP,  P, INDEX) \
+	BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
+/* xtree entry parameter descriptor */
+typedef struct {
+	metapage_t *mp;
+	s16 index;
+	u8 flag;
+	s64 off;
+	s64 addr;
+	int len;
+	pxdlist_t *pxdlist;
+} xtsplit_t;
+
+
+/*
+ *      statistics
+ */
+#ifdef CONFIG_JFS_STATISTICS
+static struct {
+	uint search;
+	uint fastSearch;
+	uint split;
+} xtStat;
+#endif
+
+
+/*
+ * forward references
+ */
+static int xtSearch(struct inode *ip,
+		    s64 xoff, int *cmpp, btstack_t * btstack, int flag);
+
+static int xtSplitUp(tid_t tid,
+		     struct inode *ip,
+		     xtsplit_t * split, btstack_t * btstack);
+
+static int xtSplitPage(tid_t tid,
+		       struct inode *ip,
+		       xtsplit_t * split, metapage_t ** rmpp, s64 * rbnp);
+
+static int xtSplitRoot(tid_t tid,
+		       struct inode *ip,
+		       xtsplit_t * split, metapage_t ** rmpp);
+
+#ifdef _STILL_TO_PORT
+static int xtDeleteUp(tid_t tid,
+		      struct inode *ip,
+		      metapage_t * fmp,
+		      xtpage_t * fp, btstack_t * btstack);
+
+static int xtSearchNode(struct inode *ip,
+			xad_t * xad,
+			int *cmpp, btstack_t * btstack, int flag);
+
+static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
+#endif				/*  _STILL_TO_PORT */
+
+/* External references */
+
+/*
+ *      debug control
+ */
+/*      #define _JFS_DEBUG_XTREE        1 */
+
+
+/*
+ *      xtLookup()
+ *
+ * function: map a single page into a physical extent;
+ */
+int xtLookup(struct inode *ip, s64 lstart,
+	     s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
+{
+	int rc = 0;
+	btstack_t btstack;
+	int cmp;
+	s64 bn;
+	metapage_t *mp;
+	xtpage_t *p;
+	int index;
+	xad_t *xad;
+	s64 size, xoff, xend;
+	int xlen;
+	s64 xaddr;
+
+	*plen = 0;
+
+	if (!no_check) {
+		/* is lookup offset beyond eof ? */
+		size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
+		    JFS_SBI(ip->i_sb)->l2bsize;
+		if (lstart >= size) {
+			jERROR(1,
+			       ("xtLookup: lstart (0x%lx) >= size (0x%lx)\n",
+				(ulong) lstart, (ulong) size));
+			return 0;
+		}
+	}
+
+	/*
+	 * search for the xad entry covering the logical extent
+	 */
+//search:
+	if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) {
+		jERROR(1, ("xtLookup: xtSearch returned %d\n", rc));
+		return rc;
+	}
+
+	/*
+	 *      compute the physical extent covering logical extent
+	 *
+	 * N.B. search may have failed (e.g., hole in sparse file),
+	 * and returned the index of the next entry.
+	 */
+	/* retrieve search result */
+	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+
+	/* is xad found covering start of logical extent ?
+	 * lstart is a page start address,
+	 * i.e., lstart cannot start in a hole;
+	 */
+	if (cmp) {
+		jFYI(1, ("xtLookup: cmp = %d\n", cmp));
+		goto out;
+	}
+
+	/*
+	 * lxd covered by xad
+	 */
+	xad = &p->xad[index];
+	xoff = offsetXAD(xad);
+	xlen = lengthXAD(xad);
+	xend = xoff + xlen;
+	xaddr = addressXAD(xad);
+
+	jEVENT(0,
+	       ("index = %d, xoff = 0x%lx, xlen = 0x%x, xaddr = 0x%lx\n",
+		index, (ulong) xoff, xlen, (ulong) xaddr));
+
+	/* initialize new pxd */
+	*pflag = xad->flag;
+	*paddr = xaddr + (lstart - xoff);
+	/* a page must be fully covered by an xad */
+	*plen = min(xend - lstart, llen);
+
+      out:
+	XT_PUTPAGE(mp);
+
+	return rc;
+}
+
+
+/*
+ *      xtLookupList()
+ *
+ * function: map a single logical extent into a list of physical extent;
+ *
+ * parameter:
+ *      struct inode    *ip,
+ *      lxdlist_t       *lxdlist,       lxd list (in)
+ *      xadlist_t       *xadlist,       xad list (in/out)
+ *      int		flag)
+ *
+ * coverage of lxd by xad under assumption of
+ * . lxd's are ordered and disjoint.
+ * . xad's are ordered and disjoint.
+ *
+ * return:
+ *      0:      success
+ *
+ * note: a page being written (even a single byte) is backed fully,
+ *      except the last page which is only backed with blocks
+ *      required to cover the last byte;
+ *      the extent backing a page is fully contained within an xad;
+ */
+int xtLookupList(struct inode *ip, lxdlist_t * lxdlist,	/* lxd list (in) */
+		 xadlist_t * xadlist,	/* xad list (in/out) */
+		 int flag)
+{
+	int rc = 0;
+	btstack_t btstack;
+	int cmp;
+	s64 bn;
+	metapage_t *mp;
+	xtpage_t *p;
+	int index;
+	lxd_t *lxd;
+	xad_t *xad, *pxd;
+	s64 size, lstart, lend, xstart, xend, pstart;
+	s64 llen, xlen, plen;
+	s64 xaddr, paddr;
+	int nlxd, npxd, maxnpxd;
+
+	npxd = xadlist->nxad = 0;
+	maxnpxd = xadlist->maxnxad;
+	pxd = xadlist->xad;
+
+	nlxd = lxdlist->nlxd;
+	lxd = lxdlist->lxd;
+
+	lstart = offsetLXD(lxd);
+	llen = lengthLXD(lxd);
+	lend = lstart + llen;
+
+	size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
+	    JFS_SBI(ip->i_sb)->l2bsize;
+
+	/*
+	 * search for the xad entry covering the logical extent
+	 */
+      search:
+	if (lstart >= size)
+		return 0;
+
+	if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0)))
+		return rc;
+
+	/*
+	 *      compute the physical extent covering logical extent
+	 *
+	 * N.B. search may have failed (e.g., hole in sparse file),
+	 * and returned the index of the next entry.
+	 */
+//map:
+	/* retrieve search result */
+	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+
+	/* is xad on the next sibling page ? */
+	if (index == le16_to_cpu(p->header.nextindex)) {
+		if (p->header.flag & BT_ROOT)
+			goto mapend;
+
+		if ((bn = le64_to_cpu(p->header.next)) == 0)
+			goto mapend;
+
+		XT_PUTPAGE(mp);
+
+		/* get next sibling page */
+		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+
+		index = XTENTRYSTART;
+	}
+
+	xad = &p->xad[index];
+
+	/*
+	 * is lxd covered by xad ?
+	 */
+      compare:
+	xstart = offsetXAD(xad);
+	xlen = lengthXAD(xad);
+	xend = xstart + xlen;
+	xaddr = addressXAD(xad);
+
+      compare1:
+	if (xstart < lstart)
+		goto compare2;
+
+	/* (lstart <= xstart) */
+
+	/* lxd is NOT covered by xad */
+	if (lend <= xstart) {
+		/*
+		 * get next lxd
+		 */
+		if (--nlxd == 0)
+			goto mapend;
+		lxd++;
+
+		lstart = offsetLXD(lxd);
+		llen = lengthLXD(lxd);
+		lend = lstart + llen;
+		if (lstart >= size)
+			goto mapend;
+
+		/* compare with the current xad  */
+		goto compare1;
+	}
+	/* lxd is covered by xad */
+	else {			/* (xstart < lend) */
+
+		/* initialize new pxd */
+		pstart = xstart;
+		plen = min(lend - xstart, xlen);
+		paddr = xaddr;
+
+		goto cover;
+	}
+
+	/* (xstart < lstart) */
+      compare2:
+	/* lxd is covered by xad */
+	if (lstart < xend) {
+		/* initialize new pxd */
+		pstart = lstart;
+		plen = min(xend - lstart, llen);
+		paddr = xaddr + (lstart - xstart);
+
+		goto cover;
+	}
+	/* lxd is NOT covered by xad */
+	else {			/* (xend <= lstart) */
+
+		/*
+		 * get next xad
+		 *
+		 * linear search next xad covering lxd on
+		 * the current xad page, and then tree search
+		 */
+		if (index == le16_to_cpu(p->header.nextindex) - 1) {
+			if (p->header.flag & BT_ROOT)
+				goto mapend;
+
+			XT_PUTPAGE(mp);
+			goto search;
+		} else {
+			index++;
+			xad++;
+
+			/* compare with new xad */
+			goto compare;
+		}
+	}
+
+	/*
+	 * lxd is covered by xad and a new pxd has been initialized
+	 * (lstart <= xstart < lend) or (xstart < lstart < xend)
+	 */
+      cover:
+	/* finalize pxd corresponding to current xad */
+	XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr);
+
+	if (++npxd >= maxnpxd)
+		goto mapend;
+	pxd++;
+
+	/*
+	 * lxd is fully covered by xad
+	 */
+	if (lend <= xend) {
+		/*
+		 * get next lxd
+		 */
+		if (--nlxd == 0)
+			goto mapend;
+		lxd++;
+
+		lstart = offsetLXD(lxd);
+		llen = lengthLXD(lxd);
+		lend = lstart + llen;
+		if (lstart >= size)
+			goto mapend;
+
+		/*
+		 * test for old xad covering new lxd
+		 * (old xstart < new lstart)
+		 */
+		goto compare2;
+	}
+	/*
+	 * lxd is partially covered by xad
+	 */
+	else {			/* (xend < lend)  */
+
+		/*
+		 * get next xad
+		 *
+		 * linear search next xad covering lxd on
+		 * the current xad page, and then next xad page search
+		 */
+		if (index == le16_to_cpu(p->header.nextindex) - 1) {
+			if (p->header.flag & BT_ROOT)
+				goto mapend;
+
+			if ((bn = le64_to_cpu(p->header.next)) == 0)
+				goto mapend;
+
+			XT_PUTPAGE(mp);
+
+			/* get next sibling page */
+			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+			if (rc)
+				return rc;
+
+			index = XTENTRYSTART;
+			xad = &p->xad[index];
+		} else {
+			index++;
+			xad++;
+		}
+
+		/*
+		 * test for new xad covering old lxd
+		 * (old lstart < new xstart)
+		 */
+		goto compare;
+	}
+
+      mapend:
+	xadlist->nxad = npxd;
+
+//out:
+	XT_PUTPAGE(mp);
+
+	return rc;
+}
+
+
+/*
+ *      xtSearch()
+ *
+ * function:    search for the xad entry covering specified offset.
+ *
+ * parameters:
+ *      ip      - file object;
+ *      xoff    - extent offset;
+ *      cmpp    - comparison result:
+ *      btstack - traverse stack;
+ *      flag    - search process flag (XT_INSERT);
+ *
+ * returns:
+ *      btstack contains (bn, index) of search path traversed to the entry.
+ *      *cmpp is set to result of comparison with the entry returned.
+ *      the page containing the entry is pinned at exit.
+ */
+static int xtSearch(struct inode *ip, s64 xoff,	/* offset of extent */
+		    int *cmpp, btstack_t * btstack, int flag)
+{
+	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
+	int rc = 0;
+	int cmp = 1;		/* init for empty page */
+	s64 bn;			/* block number */
+	metapage_t *mp;		/* page buffer */
+	xtpage_t *p;		/* page */
+	xad_t *xad;
+	int base, index, lim, btindex;
+	btframe_t *btsp;
+	int nsplit = 0;		/* number of pages to split */
+	s64 t64;
+
+	INCREMENT(xtStat.search);
+
+	BT_CLR(btstack);
+
+	btstack->nsplit = 0;
+
+	/*
+	 *      search down tree from root:
+	 *
+	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
+	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
+	 *
+	 * if entry with search key K is not found
+	 * internal page search find the entry with largest key Ki
+	 * less than K which point to the child page to search;
+	 * leaf page search find the entry with smallest key Kj
+	 * greater than K so that the returned index is the position of
+	 * the entry to be shifted right for insertion of new entry.
+	 * for empty tree, search key is greater than any key of the tree.
+	 *
+	 * by convention, root bn = 0.
+	 */
+	for (bn = 0;;) {
+		/* get/pin the page to search */
+		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+
+		/* try sequential access heuristics with the previous
+		 * access entry in target leaf page:
+		 * once search narrowed down into the target leaf,
+		 * key must either match an entry in the leaf or
+		 * key entry does not exist in the tree;
+		 */
+//fastSearch:
+		if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
+		    (p->header.flag & BT_LEAF) &&
+		    (index = jfs_ip->btindex) <
+		    le16_to_cpu(p->header.nextindex)) {
+			xad = &p->xad[index];
+			t64 = offsetXAD(xad);
+			if (xoff < t64 + lengthXAD(xad)) {
+				if (xoff >= t64) {
+					*cmpp = 0;
+					goto out;
+				}
+
+				/* stop sequential access heuristics */
+				goto binarySearch;
+			} else {	/* (t64 + lengthXAD(xad)) <= xoff */
+
+				/* try next sequential entry */
+				index++;
+				if (index <
+				    le16_to_cpu(p->header.nextindex)) {
+					xad++;
+					t64 = offsetXAD(xad);
+					if (xoff < t64 + lengthXAD(xad)) {
+						if (xoff >= t64) {
+							*cmpp = 0;
+							goto out;
+						}
+
+						/* miss: key falls between
+						 * previous and this entry
+						 */
+						*cmpp = 1;
+						goto out;
+					}
+
+					/* (xoff >= t64 + lengthXAD(xad));
+					 * matching entry may be further out:
+					 * stop heuristic search
+					 */
+					/* stop sequential access heuristics */
+					goto binarySearch;
+				}
+
+				/* (index == p->header.nextindex);
+				 * miss: key entry does not exist in
+				 * the target leaf/tree
+				 */
+				*cmpp = 1;
+				goto out;
+			}
+
+			/*
+			 * if hit, return index of the entry found, and
+			 * if miss, where new entry with search key is
+			 * to be inserted;
+			 */
+		      out:
+			/* compute number of pages to split */
+			if (flag & XT_INSERT) {
+				if (p->header.nextindex ==	/* little-endian */
+				    p->header.maxentry)
+					nsplit++;
+				else
+					nsplit = 0;
+				btstack->nsplit = nsplit;
+			}
+
+			/* save search result */
+			btsp = btstack->top;
+			btsp->bn = bn;
+			btsp->index = index;
+			btsp->mp = mp;
+
+			/* update sequential access heuristics */
+			jfs_ip->btindex = index;
+
+			INCREMENT(xtStat.fastSearch);
+			return 0;
+		}
+
+		/* well, ... full search now */
+	      binarySearch:
+		lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
+
+		/*
+		 * binary search with search key K on the current page
+		 */
+		for (base = XTENTRYSTART; lim; lim >>= 1) {
+			index = base + (lim >> 1);
+
+			XT_CMP(cmp, xoff, &p->xad[index], t64);
+			if (cmp == 0) {
+				/*
+				 *      search hit
+				 */
+				/* search hit - leaf page:
+				 * return the entry found
+				 */
+				if (p->header.flag & BT_LEAF) {
+					*cmpp = cmp;
+
+					/* compute number of pages to split */
+					if (flag & XT_INSERT) {
+						if (p->header.nextindex ==
+						    p->header.maxentry)
+							nsplit++;
+						else
+							nsplit = 0;
+						btstack->nsplit = nsplit;
+					}
+
+					/* save search result */
+					btsp = btstack->top;
+					btsp->bn = bn;
+					btsp->index = index;
+					btsp->mp = mp;
+
+					/* init sequential access heuristics */
+					btindex = jfs_ip->btindex;
+					if (index == btindex ||
+					    index == btindex + 1)
+						jfs_ip->btorder = BT_SEQUENTIAL;
+					else
+						jfs_ip->btorder = BT_RANDOM;
+					jfs_ip->btindex = index;
+
+					return 0;
+				}
+
+				/* search hit - internal page:
+				 * descend/search its child page
+				 */
+				goto next;
+			}
+
+			if (cmp > 0) {
+				base = index + 1;
+				--lim;
+			}
+		}
+
+		/*
+		 *      search miss
+		 *
+		 * base is the smallest index with key (Kj) greater than
+		 * search key (K) and may be zero or maxentry index.
+		 */
+		/*
+		 * search miss - leaf page:
+		 *
+		 * return location of entry (base) where new entry with
+		 * search key K is to be inserted.
+		 */
+		if (p->header.flag & BT_LEAF) {
+			*cmpp = cmp;
+
+			/* compute number of pages to split */
+			if (flag & XT_INSERT) {
+				if (p->header.nextindex ==
+				    p->header.maxentry)
+					nsplit++;
+				else
+					nsplit = 0;
+				btstack->nsplit = nsplit;
+			}
+
+			/* save search result */
+			btsp = btstack->top;
+			btsp->bn = bn;
+			btsp->index = base;
+			btsp->mp = mp;
+
+			/* init sequential access heuristics */
+			btindex = jfs_ip->btindex;
+			if (base == btindex || base == btindex + 1)
+				jfs_ip->btorder = BT_SEQUENTIAL;
+			else
+				jfs_ip->btorder = BT_RANDOM;
+			jfs_ip->btindex = base;
+
+			return 0;
+		}
+
+		/*
+		 * search miss - non-leaf page:
+		 *
+		 * if base is non-zero, decrement base by one to get the parent
+		 * entry of the child page to search.
+		 */
+		index = base ? base - 1 : base;
+
+		/*
+		 * go down to child page
+		 */
+	      next:
+		/* update number of pages to split */
+		if (p->header.nextindex == p->header.maxentry)
+			nsplit++;
+		else
+			nsplit = 0;
+
+		/* push (bn, index) of the parent page/entry */
+		BT_PUSH(btstack, bn, index);
+
+		/* get the child page block number */
+		bn = addressXAD(&p->xad[index]);
+
+		/* unpin the parent page */
+		XT_PUTPAGE(mp);
+	}
+}
+
+/*
+ *      xtInsert()
+ *
+ * function:
+ *
+ * parameter:
+ *      tid     - transaction id;
+ *      ip      - file object;
+ *      xflag   - extent flag (XAD_NOTRECORDED):
+ *      xoff    - extent offset;
+ *      xlen    - extent length;
+ *      xaddrp  - extent address pointer (in/out):
+ *              if (*xaddrp)
+ *                      caller allocated data extent at *xaddrp;
+ *              else
+ *                      allocate data extent and return its xaddr;
+ *      flag    -
+ *
+ * return:
+ */
+int xtInsert(tid_t tid,		/* transaction id */
+	     struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
+	     int flag)
+{
+	int rc = 0;
+	s64 xaddr, hint;
+	metapage_t *mp;		/* meta-page buffer */
+	xtpage_t *p;		/* base B+-tree index page */
+	s64 bn;
+	int index, nextindex;
+	btstack_t btstack;	/* traverse stack */
+	xtsplit_t split;	/* split information */
+	xad_t *xad;
+	int cmp;
+	tlock_t *tlck;
+	xtlock_t *xtlck;
+
+	jFYI(1,
+	     ("xtInsert: nxoff:0x%lx nxlen:0x%x\n", (ulong) xoff, xlen));
+
+	/*
+	 *      search for the entry location at which to insert:
+	 *
+	 * xtFastSearch() and xtSearch() both returns (leaf page
+	 * pinned, index at which to insert).
+	 * n.b. xtSearch() may return index of maxentry of
+	 * the full page.
+	 */
+	if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
+		return rc;
+
+	/* retrieve search result */
+	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+
+	/* This test must follow XT_GETSEARCH since mp must be valid if
+	 * we branch to out: */
+	if (cmp == 0) {
+		rc = EEXIST;
+		goto out;
+	}
+
+	/*
+	 * allocate data extent requested
+	 *
+	 * allocation hint: last xad
+	 */
+	if ((xaddr = *xaddrp) == 0) {
+		if (index > XTENTRYSTART) {
+			xad = &p->xad[index - 1];
+			hint = addressXAD(xad) + lengthXAD(xad) - 1;
+		} else
+			hint = 0;
+		if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr)))
+			goto out;
+	}
+
+	/*
+	 *      insert entry for new extent
+	 */
+	xflag |= XAD_NEW;
+
+	/*
+	 *      if the leaf page is full, split the page and
+	 *      propagate up the router entry for the new page from split
+	 *
+	 * The xtSplitUp() will insert the entry and unpin the leaf page.
+	 */
+	nextindex = le16_to_cpu(p->header.nextindex);
+	if (nextindex == le16_to_cpu(p->header.maxentry)) {
+		split.mp = mp;
+		split.index = index;
+		split.flag = xflag;
+		split.off = xoff;
+		split.len = xlen;
+		split.addr = xaddr;
+		split.pxdlist = NULL;
+		if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
+			/* undo data extent allocation */
+			if (*xaddrp == 0)
+				dbFree(ip, xaddr, (s64) xlen);
+			return rc;
+		}
+
+		*xaddrp = xaddr;
+		return 0;
+	}
+
+	/*
+	 *      insert the new entry into the leaf page
+	 */
+	/*
+	 * acquire a transaction lock on the leaf page;
+	 *
+	 * action: xad insertion/extension;
+	 */
+	BT_MARK_DIRTY(mp, ip);
+
+	/* if insert into middle, shift right remaining entries. */
+	if (index < nextindex)
+		memmove(&p->xad[index + 1], &p->xad[index],
+			(nextindex - index) * sizeof(xad_t));
+
+	/* insert the new entry: mark the entry NEW */
+	xad = &p->xad[index];
+	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
+
+	/* advance next available entry index */
+	p->header.nextindex =
+	    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
+
+	/* Don't log it if there are no links to the file */
+	if (!test_cflag(COMMIT_Nolink, ip)) {
+		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
+		xtlck = (xtlock_t *) & tlck->lock;
+		xtlck->lwm.offset =
+		    (xtlck->lwm.offset) ? min(index,
+					      (int)xtlck->lwm.offset) : index;
+		xtlck->lwm.length =
+		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
+	}
+
+	*xaddrp = xaddr;
+
+      out:
+	/* unpin the leaf page */
+	XT_PUTPAGE(mp);
+
+	return rc;
+}
+
+
+/*
+ *      xtSplitUp()
+ *
+ * function:
+ *      split full pages as propagating insertion up the tree
+ *
+ * parameter:
+ *      tid     - transaction id;
+ *      ip      - file object;
+ *      split   - entry parameter descriptor;
+ *      btstack - traverse stack from xtSearch()
+ *
+ * return:
+ */
+static int
+xtSplitUp(tid_t tid,
+	  struct inode *ip, xtsplit_t * split, btstack_t * btstack)
+{
+	int rc = 0;
+	metapage_t *smp;
+	xtpage_t *sp;		/* split page */
+	metapage_t *rmp;
+	s64 rbn;		/* new right page block number */
+	metapage_t *rcmp;
+	xtpage_t *rcp;		/* right child page */
+	s64 rcbn;		/* right child page block number */
+	int skip;		/* index of entry of insertion */
+	int nextindex;		/* next available entry index of p */
+	btframe_t *parent;	/* parent page entry on traverse stack */
+	xad_t *xad;
+	s64 xaddr;
+	int xlen;
+	int nsplit;		/* number of pages split */
+	pxdlist_t pxdlist;
+	pxd_t *pxd;
+	tlock_t *tlck;
+	xtlock_t *xtlck;
+
+	smp = split->mp;
+	sp = XT_PAGE(ip, smp);
+
+	/* is inode xtree root extension/inline EA area free ? */
+	if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
+	    (sp->header.maxentry < cpu_to_le16(XTROOTMAXSLOT)) &&
+	    (JFS_IP(ip)->mode2 & INLINEEA)) {
+		sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
+		JFS_IP(ip)->mode2 &= ~INLINEEA;
+
+		BT_MARK_DIRTY(smp, ip);
+		/*
+		 * acquire a transaction lock on the leaf page;
+		 *
+		 * action: xad insertion/extension;
+		 */
+
+		/* if insert into middle, shift right remaining entries. */
+		skip = split->index;
+		nextindex = le16_to_cpu(sp->header.nextindex);
+		if (skip < nextindex)
+			memmove(&sp->xad[skip + 1], &sp->xad[skip],
+				(nextindex - skip) * sizeof(xad_t));
+
+		/* insert the new entry: mark the entry NEW */
+		xad = &sp->xad[skip];
+		XT_PUTENTRY(xad, split->flag, split->off, split->len,
+			    split->addr);
+
+		/* advance next available entry index */
+		sp->header.nextindex =
+		    cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1);
+
+		/* Don't log it if there are no links to the file */
+		if (!test_cflag(COMMIT_Nolink, ip)) {
+			tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
+			xtlck = (xtlock_t *) & tlck->lock;
+			xtlck->lwm.offset = (xtlck->lwm.offset) ?
+			    min(skip, (int)xtlck->lwm.offset) : skip;
+			xtlck->lwm.length =
+			    le16_to_cpu(sp->header.nextindex) -
+			    xtlck->lwm.offset;
+		}
+
+		return 0;
+	}
+
+	/*
+	 * allocate new index blocks to cover index page split(s)
+	 *
+	 * allocation hint: ?
+	 */
+	if (split->pxdlist == NULL) {
+		nsplit = btstack->nsplit;
+		split->pxdlist = &pxdlist;
+		pxdlist.maxnpxd = pxdlist.npxd = 0;
+		pxd = &pxdlist.pxd[0];
+		xlen = JFS_SBI(ip->i_sb)->nbperpage;
+		for (; nsplit > 0; nsplit--, pxd++) {
+			if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
+			    == 0) {
+				PXDaddress(pxd, xaddr);
+				PXDlength(pxd, xlen);
+
+				pxdlist.maxnpxd++;
+
+				continue;
+			}
+
+			/* undo allocation */
+
+			XT_PUTPAGE(smp);
+			return rc;
+		}
+	}
+
+	/*
+	 * Split leaf page <sp> into <sp> and a new right page <rp>.
+	 *
+	 * The split routines insert the new entry into the leaf page,
+	 * and acquire txLock as appropriate.
+	 * return <rp> pinned and its block number <rpbn>.
+	 */
+	rc = (sp->header.flag & BT_ROOT) ?
+	    xtSplitRoot(tid, ip, split, &rmp) :
+	    xtSplitPage(tid, ip, split, &rmp, &rbn);
+	if (rc)
+		return EIO;
+
+	XT_PUTPAGE(smp);
+
+	/*
+	 * propagate up the router entry for the leaf page just split
+	 *
+	 * insert a router entry for the new page into the parent page,
+	 * propagate the insert/split up the tree by walking back the stack
+	 * of (bn of parent page, index of child page entry in parent page)
+	 * that were traversed during the search for the page that split.
+	 *
+	 * the propagation of insert/split up the tree stops if the root
+	 * splits or the page inserted into doesn't have to split to hold
+	 * the new entry.
+	 *
+	 * the parent entry for the split page remains the same, and
+	 * a new entry is inserted at its right with the first key and
+	 * block number of the new right page.
+	 *
+	 * There are a maximum of 3 pages pinned at any time:
+	 * right child, left parent and right parent (when the parent splits)
+	 * to keep the child page pinned while working on the parent.
+	 * make sure that all pins are released at exit.
+	 */
+	while ((parent = BT_POP(btstack)) != NULL) {
+		/* parent page specified by stack frame <parent> */
+
+		/* keep current child pages <rcp> pinned */
+		rcmp = rmp;
+		rcbn = rbn;
+		rcp = XT_PAGE(ip, rcmp);
+
+		/*
+		 * insert router entry in parent for new right child page <rp>
+		 */
+		/* get/pin the parent page <sp> */
+		XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
+		if (rc)
+			goto errout2;
+
+		/*
+		 * The new key entry goes ONE AFTER the index of parent entry,
+		 * because the split was to the right.
+		 */
+		skip = parent->index + 1;
+
+		/*
+		 * split or shift right remaining entries of the parent page
+		 */
+		nextindex = le16_to_cpu(sp->header.nextindex);
+		/*
+		 * parent page is full - split the parent page
+		 */
+		if (nextindex == le16_to_cpu(sp->header.maxentry)) {
+			/* init for parent page split */
+			split->mp = smp;
+			split->index = skip;	/* index at insert */
+			split->flag = XAD_NEW;
+			split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
+			split->len = JFS_SBI(ip->i_sb)->nbperpage;
+			split->addr = rcbn;
+
+			/* unpin previous right child page */
+			XT_PUTPAGE(rcmp);
+
+			/* The split routines insert the new entry,
+			 * and acquire txLock as appropriate.
+			 * return <rp> pinned and its block number <rpbn>.
+			 */
+			rc = (sp->header.flag & BT_ROOT) ?
+			    xtSplitRoot(tid, ip, split, &rmp) :
+			    xtSplitPage(tid, ip, split, &rmp, &rbn);
+			if (rc)
+				goto errout1;
+
+			XT_PUTPAGE(smp);
+			/* keep new child page <rp> pinned */
+		}
+		/*
+		 * parent page is not full - insert in parent page
+		 */
+		else {
+			/*
+			 * insert router entry in parent for the right child
+			 * page from the first entry of the right child page:
+			 */
+			/*
+			 * acquire a transaction lock on the parent page;
+			 *
+			 * action: router xad insertion;
+			 */
+			BT_MARK_DIRTY(smp, ip);
+
+			/*
+			 * if insert into middle, shift right remaining entries
+			 */
+			if (skip < nextindex)
+				memmove(&sp->xad[skip + 1], &sp->xad[skip],
+					(nextindex -
+					 skip) << L2XTSLOTSIZE);
+
+			/* insert the router entry */
+			xad = &sp->xad[skip];
+			XT_PUTENTRY(xad, XAD_NEW,
+				    offsetXAD(&rcp->xad[XTENTRYSTART]),
+				    JFS_SBI(ip->i_sb)->nbperpage, rcbn);
+
+			/* advance next available entry index. */
+			sp->header.nextindex =
+			    cpu_to_le16(le16_to_cpu(sp->header.nextindex) +
+					1);
+
+			/* Don't log it if there are no links to the file */
+			if (!test_cflag(COMMIT_Nolink, ip)) {
+				tlck = txLock(tid, ip, smp,
+					      tlckXTREE | tlckGROW);
+				xtlck = (xtlock_t *) & tlck->lock;
+				xtlck->lwm.offset = (xtlck->lwm.offset) ?
+				    min(skip, (int)xtlck->lwm.offset) : skip;
+				xtlck->lwm.length =
+				    le16_to_cpu(sp->header.nextindex) -
+				    xtlck->lwm.offset;
+			}
+
+			/* unpin parent page */
+			XT_PUTPAGE(smp);
+
+			/* exit propagate up */
+			break;
+		}
+	}
+
+	/* unpin current right page */
+	XT_PUTPAGE(rmp);
+
+	return 0;
+
+	/*
+	 * If something fails in the above loop we were already walking back
+	 * up the tree and the tree is now inconsistent.
+	 * release all pages we're holding.
+	 */
+      errout1:
+	XT_PUTPAGE(smp);
+
+      errout2:
+	XT_PUTPAGE(rcmp);
+
+	return rc;
+}
+
+
+/*
+ *      xtSplitPage()
+ *
+ * function:
+ *      split a full non-root page into
+ *      original/split/left page and new right page
+ *      i.e., the original/split page remains as left page.
+ *
+ * parameter:
+ *      int		tid,
+ *      struct inode    *ip,
+ *      xtsplit_t       *split,
+ *      metapage_t	**rmpp,
+ *      u64		*rbnp,
+ *
+ * return:
+ *      Pointer to page in which to insert or NULL on error.
+ */
+static int
+xtSplitPage(tid_t tid, struct inode *ip,
+	    xtsplit_t * split, metapage_t ** rmpp, s64 * rbnp)
+{
+	int rc = 0;
+	metapage_t *smp;
+	xtpage_t *sp;
+	metapage_t *rmp;
+	xtpage_t *rp;		/* new right page allocated */
+	s64 rbn;		/* new right page block number */
+	metapage_t *mp;
+	xtpage_t *p;
+	s64 nextbn;
+	int skip, maxentry, middle, righthalf, n;
+	xad_t *xad;
+	pxdlist_t *pxdlist;
+	pxd_t *pxd;
+	tlock_t *tlck;
+	xtlock_t *sxtlck = 0, *rxtlck = 0;
+
+	smp = split->mp;
+	sp = XT_PAGE(ip, smp);
+
+	INCREMENT(xtStat.split);
+
+	/*
+	 * allocate the new right page for the split
+	 */
+	pxdlist = split->pxdlist;
+	pxd = &pxdlist->pxd[pxdlist->npxd];
+	pxdlist->npxd++;
+	rbn = addressPXD(pxd);
+	rmp = get_metapage(ip, rbn, PSIZE, 1);
+	if (rmp == NULL)
+		return EIO;
+
+	jEVENT(0,
+	       ("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p\n", ip, smp, rmp));
+
+	BT_MARK_DIRTY(rmp, ip);
+	/*
+	 * action: new page;
+	 */
+
+	rp = (xtpage_t *) rmp->data;
+	rp->header.self = *pxd;
+	rp->header.flag = sp->header.flag & BT_TYPE;
+	rp->header.maxentry = sp->header.maxentry;	/* little-endian */
+	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
+
+	BT_MARK_DIRTY(smp, ip);
+	/* Don't log it if there are no links to the file */
+	if (!test_cflag(COMMIT_Nolink, ip)) {
+		/*
+		 * acquire a transaction lock on the new right page;
+		 */
+		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
+		rxtlck = (xtlock_t *) & tlck->lock;
+		rxtlck->lwm.offset = XTENTRYSTART;
+		/*
+		 * acquire a transaction lock on the split page
+		 */
+		tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
+		sxtlck = (xtlock_t *) & tlck->lock;
+	}
+
+	/*
+	 * initialize/update sibling pointers of <sp> and <rp>
+	 */
+	nextbn = le64_to_cpu(sp->header.next);
+	rp->header.next = cpu_to_le64(nextbn);
+	rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
+	sp->header.next = cpu_to_le64(rbn);
+
+	skip = split->index;
+
+	/*
+	 *      sequential append at tail (after last entry of last page)
+	 *
+	 * if splitting the last page on a level because of appending
+	 * a entry to it (skip is maxentry), it's likely that the access is
+	 * sequential. adding an empty page on the side of the level is less
+	 * work and can push the fill factor much higher than normal.
+	 * if we're wrong it's no big deal -  we will do the split the right
+	 * way next time.
+	 * (it may look like it's equally easy to do a similar hack for
+	 * reverse sorted data, that is, split the tree left, but it's not.
+	 * Be my guest.)
+	 */
+	if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
+		/*
+		 * acquire a transaction lock on the new/right page;
+		 *
+		 * action: xad insertion;
+		 */
+		/* insert entry at the first entry of the new right page */
+		xad = &rp->xad[XTENTRYSTART];
+		XT_PUTENTRY(xad, split->flag, split->off, split->len,
+			    split->addr);
+
+		rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
+
+		if (!test_cflag(COMMIT_Nolink, ip)) {
+			/* rxtlck->lwm.offset = XTENTRYSTART; */
+			rxtlck->lwm.length = 1;
+		}
+
+		*rmpp = rmp;
+		*rbnp = rbn;
+
+		ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
+
+		jEVENT(0, ("xtSplitPage: sp:0x%p rp:0x%p\n", sp, rp));
+		return 0;
+	}
+
+	/*
+	 *      non-sequential insert (at possibly middle page)
+	 */
+
+	/*
+	 * update previous pointer of old next/right page of <sp>
+	 */
+	if (nextbn != 0) {
+		XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
+		if (rc) {
+			XT_PUTPAGE(rmp);
+			return rc;
+		}
+
+		BT_MARK_DIRTY(mp, ip);
+		/*
+		 * acquire a transaction lock on the next page;
+		 *
+		 * action:sibling pointer update;
+		 */
+		if (!test_cflag(COMMIT_Nolink, ip))
+			tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
+
+		p->header.prev = cpu_to_le64(rbn);
+
+		/* sibling page may have been updated previously, or
+		 * it may be updated later;
+		 */
+
+		XT_PUTPAGE(mp);
+	}
+
+	/*
+	 * split the data between the split and new/right pages
+	 */
+	maxentry = le16_to_cpu(sp->header.maxentry);
+	middle = maxentry >> 1;
+	righthalf = maxentry - middle;
+
+	/*
+	 * skip index in old split/left page - insert into left page:
+	 */
+	if (skip <= middle) {
+		/* move right half of split page to the new right page */
+		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
+			righthalf << L2XTSLOTSIZE);
+
+		/* shift right tail of left half to make room for new entry */
+		if (skip < middle)
+			memmove(&sp->xad[skip + 1], &sp->xad[skip],
+				(middle - skip) << L2XTSLOTSIZE);
+
+		/* insert new entry */
+		xad = &sp->xad[skip];
+		XT_PUTENTRY(xad, split->flag, split->off, split->len,
+			    split->addr);
+
+		/* update page header */
+		sp->header.nextindex = cpu_to_le16(middle + 1);
+		if (!test_cflag(COMMIT_Nolink, ip)) {
+			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
+			    min(skip, (int)sxtlck->lwm.offset) : skip;
+		}
+
+		rp->header.nextindex =
+		    cpu_to_le16(XTENTRYSTART + righthalf);
+	}
+	/*
+	 * skip index in new right page - insert into right page:
+	 */
+	else {
+		/* move left head of right half to right page */
+		n = skip - middle;
+		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
+			n << L2XTSLOTSIZE);
+
+		/* insert new entry */
+		n += XTENTRYSTART;
+		xad = &rp->xad[n];
+		XT_PUTENTRY(xad, split->flag, split->off, split->len,
+			    split->addr);
+
+		/* move right tail of right half to right page */
+		if (skip < maxentry)
+			memmove(&rp->xad[n + 1], &sp->xad[skip],
+				(maxentry - skip) << L2XTSLOTSIZE);
+
+		/* update page header */
+		sp->header.nextindex = cpu_to_le16(middle);
+		if (!test_cflag(COMMIT_Nolink, ip)) {
+			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
+			    min(middle, (int)sxtlck->lwm.offset) : middle;
+		}
+
+		rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
+						   righthalf + 1);
+	}
+
+	if (!test_cflag(COMMIT_Nolink, ip)) {
+		sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
+		    sxtlck->lwm.offset;
+
+		/* rxtlck->lwm.offset = XTENTRYSTART; */
+		rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
+		    XTENTRYSTART;
+	}
+
+	*rmpp = rmp;
+	*rbnp = rbn;
+
+	ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
+
+	jEVENT(0, ("xtSplitPage: sp:0x%p rp:0x%p\n", sp, rp));
+	return rc;
+}
+
+
+/*
+ *      xtSplitRoot()
+ *
+ * function:
+ *      split the full root page into
+ *      original/root/split page and new right page
+ *      i.e., root remains fixed in tree anchor (inode) and
+ *      the root is copied to a single new right child page
+ *      since root page << non-root page, and
+ *      the split root page contains a single entry for the
+ *      new right child page.
+ *
+ * parameter:
+ *      int		tid,
+ *      struct inode    *ip,
+ *      xtsplit_t       *split,
+ *      metapage_t	**rmpp)
+ *
+ * return:
+ *      Pointer to page in which to insert or NULL on error.
+ */
+static int
+xtSplitRoot(tid_t tid,
+	    struct inode *ip, xtsplit_t * split, metapage_t ** rmpp)
+{
+	xtpage_t *sp;
+	metapage_t *rmp;
+	xtpage_t *rp;
+	s64 rbn;
+	int skip, nextindex;
+	xad_t *xad;
+	pxd_t *pxd;
+	pxdlist_t *pxdlist;
+	tlock_t *tlck;
+	xtlock_t *xtlck;
+
+	sp = &JFS_IP(ip)->i_xtroot;
+
+	INCREMENT(xtStat.split);
+
+	/*
+	 *      allocate a single (right) child page
+	 */
+	pxdlist = split->pxdlist;
+	pxd = &pxdlist->pxd[pxdlist->npxd];
+	pxdlist->npxd++;
+	rbn = addressPXD(pxd);
+	rmp = get_metapage(ip, rbn, PSIZE, 1);
+	if (rmp == NULL)
+		return EIO;
+
+	jEVENT(0, ("xtSplitRoot: ip:0x%p rmp:0x%p\n", ip, rmp));
+
+	/*
+	 * acquire a transaction lock on the new right page;
+	 *
+	 * action: new page;
+	 */
+	BT_MARK_DIRTY(rmp, ip);
+
+	rp = (xtpage_t *) rmp->data;
+	rp->header.flag =
+	    (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
+	rp->header.self = *pxd;
+	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
+	rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
+
+	/* initialize sibling pointers */
+	rp->header.next = 0;
+	rp->header.prev = 0;
+
+	/*
+	 * copy the in-line root page into new right page extent
+	 */
+	nextindex = le16_to_cpu(sp->header.maxentry);
+	memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
+		(nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
+
+	/*
+	 * insert the new entry into the new right/child page
+	 * (skip index in the new right page will not change)
+	 */
+	skip = split->index;
+	/* if insert into middle, shift right remaining entries */
+	if (skip != nextindex)
+		memmove(&rp->xad[skip + 1], &rp->xad[skip],
+			(nextindex - skip) * sizeof(xad_t));
+
+	xad = &rp->xad[skip];
+	XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
+
+	/* update page header */
+	rp->header.nextindex = cpu_to_le16(nextindex + 1);
+
+	if (!test_cflag(COMMIT_Nolink, ip)) {
+		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
+		xtlck = (xtlock_t *) & tlck->lock;
+		xtlck->lwm.offset = XTENTRYSTART;
+		xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
+		    XTENTRYSTART;
+	}
+
+	/*
+	 *      reset the root
+	 *
+	 * init root with the single entry for the new right page
+	 * set the 1st entry offset to 0, which force the left-most key
+	 * at any level of the tree to be less than any search key.
+	 */
+	/*
+	 * acquire a transaction lock on the root page (in-memory inode);
+	 *
+	 * action: root split;
+	 */
+	BT_MARK_DIRTY(split->mp, ip);
+
+	xad = &sp->xad[XTENTRYSTART];
+	XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
+
+	/* update page header of root */
+	sp->header.flag &= ~BT_LEAF;
+	sp->header.flag |= BT_INTERNAL;
+
+	sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
+
+	if (!test_cflag(COMMIT_Nolink, ip)) {
+		tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
+		xtlck = (xtlock_t *) & tlck->lock;
+		xtlck->lwm.offset = XTENTRYSTART;
+		xtlck->lwm.length = 1;
+	}
+
+	*rmpp = rmp;
+
+	ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
+
+	jEVENT(0, ("xtSplitRoot: sp:0x%p rp:0x%p\n", sp, rp));
+	return 0;
+}
+
+
+/*
+ *      xtExtend()
+ *
+ * function: extend in-place;
+ *
+ * note: existing extent may or may not have been committed.
+ * caller is responsible for pager buffer cache update, and
+ * working block allocation map update;
+ * update pmap: alloc whole extended extent;
+ */
+int xtExtend(tid_t tid,		/* transaction id */
+	     struct inode *ip, s64 xoff,	/* delta extent offset */
+	     s32 xlen,		/* delta extent length */
+	     int flag)
+{
+	int rc = 0;
+	int cmp;
+	metapage_t *mp;		/* meta-page buffer */
+	xtpage_t *p;		/* base B+-tree index page */
+	s64 bn;
+	int index, nextindex, len;
+	btstack_t btstack;	/* traverse stack */
+	xtsplit_t split;	/* split information */
+	xad_t *xad;
+	s64 xaddr;
+	tlock_t *tlck;
+	xtlock_t *xtlck = 0;
+	int rootsplit = 0;
+
+	jFYI(1,
+	     ("xtExtend: nxoff:0x%lx nxlen:0x%x\n", (ulong) xoff, xlen));
+
+	/* there must exist extent to be extended */
+	if ((rc = xtSearch(ip, xoff - 1, &cmp, &btstack, XT_INSERT)))
+		return rc;
+	assert(cmp == 0);
+
+	/* retrieve search result */
+	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+
+	/* extension must be contiguous */
+	xad = &p->xad[index];
+	jFYI(0, ("xtExtend: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
+		 (ulong) offsetXAD(xad), lengthXAD(xad),
+		 (ulong) addressXAD(xad)));
+	assert((offsetXAD(xad) + lengthXAD(xad)) == xoff);
+
+	/*
+	 * acquire a transaction lock on the leaf page;
+	 *
+	 * action: xad insertion/extension;
+	 */
+	BT_MARK_DIRTY(mp, ip);
+	if (!test_cflag(COMMIT_Nolink, ip)) {
+		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
+		xtlck = (xtlock_t *) & tlck->lock;
+	}
+
+	/* extend will overflow extent ? */
+	xlen = lengthXAD(xad) + xlen;
+	if ((len = xlen - MAXXLEN) <= 0)
+		goto extendOld;
+
+	/*
+	 *      extent overflow: insert entry for new extent
+	 */
+//insertNew:
+	xoff = offsetXAD(xad) + MAXXLEN;
+	xaddr = addressXAD(xad) + MAXXLEN;
+	nextindex = le16_to_cpu(p->header.nextindex);
+
+	/*
+	 *      if the leaf page is full, insert the new entry and
+	 *      propagate up the router entry for the new page from split
+	 *
+	 * The xtSplitUp() will insert the entry and unpin the leaf page.
+	 */
+	if (nextindex == le16_to_cpu(p->header.maxentry)) {
+		rootsplit = p->header.flag & BT_ROOT;
+
+		/* xtSpliUp() unpins leaf pages */
+		split.mp = mp;
+		split.index = index + 1;
+		split.flag = XAD_NEW;
+		split.off = xoff;	/* split offset */
+		split.len = len;
+		split.addr = xaddr;
+		split.pxdlist = NULL;
+		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
+			return rc;
+
+		/*
+		 * if leaf root has been split, original root has been
+		 * copied to new child page, i.e., original entry now
+		 * resides on the new child page;
+		 */
+		if (rootsplit) {
+			if (p->header.nextindex ==
+			    cpu_to_le16(XTENTRYSTART + 1)) {
+				xad = &p->xad[XTENTRYSTART];
+				bn = addressXAD(xad);
+
+				/* get new child page */
+				XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+
+				BT_MARK_DIRTY(mp, ip);
+				if (!test_cflag(COMMIT_Nolink, ip)) {
+					tlck = txLock(tid, ip, mp,
+						      tlckXTREE |
+						      tlckGROW);
+					xtlck = (xtlock_t *) & tlck->lock;
+				}
+			}
+		} else
+			/* get back old page */
+			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	}
+	/*
+	 *      insert the new entry into the leaf page
+	 */
+	else {
+		/* insert the new entry: mark the entry NEW */
+		xad = &p->xad[index + 1];
+		XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
+
+		/* advance next available entry index */
+		p->header.nextindex =
+		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
+	}
+
+	/* get back old entry */
+	xad = &p->xad[index];
+	xlen = MAXXLEN;
+
+	/*
+	 * extend old extent
+	 */
+      extendOld:
+	XADlength(xad, xlen);
+	if (!(xad->flag & XAD_NEW))
+		xad->flag |= XAD_EXTENDED;
+
+	if (!test_cflag(COMMIT_Nolink, ip)) {
+		xtlck->lwm.offset =
+		    (xtlck->lwm.offset) ? min(index,
+					      (int)xtlck->lwm.offset) : index;
+		xtlck->lwm.length =
+		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
+	}
+
+	/* unpin the leaf page */
+	XT_PUTPAGE(mp);
+
+	return rc;
+}
+
+
+/*
+ *      xtTailgate()
+ *
+ * function: split existing 'tail' extent
+ *      (split offset >= start offset of tail extent), and
+ *      relocate and extend the split tail half;
+ *
+ * note: existing extent may or may not have been committed.
+ * caller is responsible for pager buffer cache update, and
+ * working block allocation map update;
+ * update pmap: free old split tail extent, alloc new extent;
+ */
+int xtTailgate(tid_t tid,		/* transaction id */
+	       struct inode *ip, s64 xoff,	/* split/new extent offset */
+	       s32 xlen,	/* new extent length */
+	       s64 xaddr,	/* new extent address */
+	       int flag)
+{
+	int rc = 0;
+	int cmp;
+	metapage_t *mp;		/* meta-page buffer */
+	xtpage_t *p;		/* base B+-tree index page */
+	s64 bn;
+	int index, nextindex, llen, rlen;
+	btstack_t btstack;	/* traverse stack */
+	xtsplit_t split;	/* split information */
+	xad_t *xad;
+	tlock_t *tlck;
+	xtlock_t *xtlck = 0;
+	tlock_t *mtlck;
+	maplock_t *pxdlock;
+	int rootsplit = 0;
+
+/*
+printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
+        (ulong)xoff, xlen, (ulong)xaddr);
+*/
+
+	/* there must exist extent to be tailgated */
+	if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
+		return rc;
+	assert(cmp == 0);
+
+	/* retrieve search result */
+	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+
+	/* entry found must be last entry */
+	nextindex = le16_to_cpu(p->header.nextindex);
+	assert(index == nextindex - 1);
+
+	BT_MARK_DIRTY(mp, ip);
+	/*
+	 * acquire tlock of the leaf page containing original entry
+	 */
+	if (!test_cflag(COMMIT_Nolink, ip)) {
+		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
+		xtlck = (xtlock_t *) & tlck->lock;
+	}
+
+	/* completely replace extent ? */
+	xad = &p->xad[index];
+/*
+printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
+        (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
+*/
+	if ((llen = xoff - offsetXAD(xad)) == 0)
+		goto updateOld;
+
+	/*
+	 *      partially replace extent: insert entry for new extent
+	 */
+//insertNew:
+	/*
+	 *      if the leaf page is full, insert the new entry and
+	 *      propagate up the router entry for the new page from split
+	 *
+	 * The xtSplitUp() will insert the entry and unpin the leaf page.
+	 */
+	if (nextindex == le16_to_cpu(p->header.maxentry)) {
+		rootsplit = p->header.flag & BT_ROOT;
+
+		/* xtSpliUp() unpins leaf pages */
+		split.mp = mp;
+		split.index = index + 1;
+		split.flag = XAD_NEW;
+		split.off = xoff;	/* split offset */
+		split.len = xlen;
+		split.addr = xaddr;
+		split.pxdlist = NULL;
+		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
+			return rc;
+
+		/*
+		 * if leaf root has been split, original root has been
+		 * copied to new child page, i.e., original entry now
+		 * resides on the new child page;
+		 */
+		if (rootsplit) {
+			if (p->header.nextindex ==
+			    cpu_to_le16(XTENTRYSTART + 1)) {
+				xad = &p->xad[XTENTRYSTART];
+				bn = addressXAD(xad);
+
+				/* get new child page */
+				XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+
+				BT_MARK_DIRTY(mp, ip);
+				if (!test_cflag(COMMIT_Nolink, ip)) {
+					tlck = txLock(tid, ip, mp,
+						      tlckXTREE |
+						      tlckGROW);
+					xtlck = (xtlock_t *) & tlck->lock;
+				}
+			}
+		} else
+			/* get back old page */
+			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	}
+	/*
+	 *      insert the new entry into the leaf page
+	 */
+	else {
+		/* insert the new entry: mark the entry NEW */
+		xad = &p->xad[index + 1];
+		XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
+
+		/* advance next available entry index */
+		p->header.nextindex =
+		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
+	}
+
+	/* get back old XAD */
+	xad = &p->xad[index];
+
+	/*
+	 * truncate/relocate old extent at split offset
+	 */
+      updateOld:
+	/* update dmap for old/committed/truncated extent */
+	rlen = lengthXAD(xad) - llen;
+	if (!(xad->flag & XAD_NEW)) {
+		/* free from PWMAP at commit */
+		if (!test_cflag(COMMIT_Nolink, ip)) {
+			mtlck = txMaplock(tid, ip, tlckMAP);
+			pxdlock = (maplock_t *) & mtlck->lock;
+			pxdlock->flag = mlckFREEPXD;
+			PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
+			PXDlength(&pxdlock->pxd, rlen);
+			pxdlock->index = 1;
+		}
+		jEVENT(0,
+		       ("xtTailgate: free extent xaddr:0x%lx xlen:0x%x\n",
+			(ulong) addressPXD(&pxdlock->pxd),
+			lengthPXD(&pxdlock->pxd)));
+	} else
+		/* free from WMAP */
+		dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
+
+	if (llen)
+		/* truncate */
+		XADlength(xad, llen);
+	else
+		/* replace */
+		XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
+
+	if (!test_cflag(COMMIT_Nolink, ip)) {
+		xtlck->lwm.offset = (xtlck->lwm.offset) ?
+		    min(index, (int)xtlck->lwm.offset) : index;
+		xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
+		    xtlck->lwm.offset;
+	}
+
+	/* unpin the leaf page */
+	XT_PUTPAGE(mp);
+
+	return rc;
+}
+
+
+/*
+ *      xtUpdate()
+ *
+ * function: update XAD;
+ *
+ *      update extent for allocated_but_not_recorded or
+ *      compressed extent;
+ *
+ * parameter:
+ *      nxad    - new XAD;
+ *                logical extent of the specified XAD must be completely
+ *                contained by an existing XAD;
+ */
+int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
+{				/* new XAD */
+	int rc = 0;
+	int cmp;
+	metapage_t *mp;		/* meta-page buffer */
+	xtpage_t *p;		/* base B+-tree index page */
+	s64 bn;
+	int index0, index, newindex, nextindex;
+	btstack_t btstack;	/* traverse stack */
+	xtsplit_t split;	/* split information */
+	xad_t *xad, *lxad, *rxad;
+	int xflag;
+	s64 nxoff, xoff;
+	int nxlen, xlen, lxlen, rxlen;
+	s64 nxaddr, xaddr;
+	tlock_t *tlck;
+	xtlock_t *xtlck = 0;
+	int rootsplit = 0, newpage = 0;
+
+	/* there must exist extent to be tailgated */
+	nxoff = offsetXAD(nxad);
+	nxlen = lengthXAD(nxad);
+	nxaddr = addressXAD(nxad);
+/*
+printf("xtUpdate: nxflag:0x%x nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
+        nxad->flag, (ulong)nxoff, nxlen, (ulong)nxaddr);
+*/
+	if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT)))
+		return rc;
+	assert(cmp == 0);
+
+	/* retrieve search result */
+	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
+
+	BT_MARK_DIRTY(mp, ip);
+	/*
+	 * acquire tlock of the leaf page containing original entry
+	 */
+	if (!test_cflag(COMMIT_Nolink, ip)) {
+		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
+		xtlck = (xtlock_t *) & tlck->lock;
+	}
+
+	xad = &p->xad[index0];
+	xflag = xad->flag;
+	xoff = offsetXAD(xad);
+	xlen = lengthXAD(xad);
+	xaddr = addressXAD(xad);
+/*
+printf("xtUpdate: xflag:0x%x xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
+        xflag, (ulong)xoff, xlen, (ulong)xaddr);
+*/
+
+	/* nXAD must be completely contained within XAD */
+	assert(xoff <= nxoff);
+	assert(nxoff + nxlen <= xoff + xlen);
+
+	index = index0;
+	newindex = index + 1;
+	nextindex = le16_to_cpu(p->header.nextindex);
+
+#ifdef  _JFS_WIP_NOCOALESCE
+	if (xoff < nxoff)
+		goto updateRight;
+
+	/*
+	 * replace XAD with nXAD
+	 */
+      replace:			/* (nxoff == xoff) */
+	if (nxlen == xlen) {
+		/* replace XAD with nXAD:recorded */
+		*xad = *nxad;
+		xad->flag = xflag & ~XAD_NOTRECORDED;
+
+		goto out;
+	} else			/* (nxlen < xlen) */
+		goto updateLeft;
+#endif				/* _JFS_WIP_NOCOALESCE */
+
+/* #ifdef _JFS_WIP_COALESCE */
+	if (xoff < nxoff)
+		goto coalesceRight;
+
+	/*
+	 * coalesce with left XAD
+	 */
+//coalesceLeft: /* (xoff == nxoff) */
+	/* is XAD first entry of page ? */
+	if (index == XTENTRYSTART)
+		goto replace;
+
+	/* is nXAD logically and physically contiguous with lXAD ? */
+	lxad = &p->xad[index - 1];
+	lxlen = lengthXAD(lxad);
+	if (!(lxad->flag & XAD_NOTRECORDED) &&
+	    (nxoff == offsetXAD(lxad) + lxlen) &&
+	    (nxaddr == addressXAD(lxad) + lxlen) &&
+	    (lxlen + nxlen < MAXXLEN)) {
+		/* extend right lXAD */
+		index0 = index - 1;
+		XADlength(lxad, lxlen + nxlen);
+
+		/* If we just merged two extents together, need to make sure the
+		 * right extent gets logged.  If the left one is marked XAD_NEW,
+		 * then we know it will be logged.  Otherwise, mark as
+		 * XAD_EXTENDED
+		 */
+		if (!(lxad->flag & XAD_NEW))
+			lxad->flag |= XAD_EXTENDED;
+
+		if (xlen > nxlen) {
+			/* truncate XAD */
+			XADoffset(xad, xoff + nxlen);
+			XADlength(xad, xlen - nxlen);
+			XADaddress(xad, xaddr + nxlen);
+			goto out;
+		} else {	/* (xlen == nxlen) */
+
+			/* remove XAD */
+			if (index < nextindex - 1)
+				memmove(&p->xad[index], &p->xad[index + 1],
+					(nextindex - index -
+					 1) << L2XTSLOTSIZE);
+
+			p->header.nextindex =
+			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
+					1);
+
+			index = index0;
+			newindex = index + 1;
+			nextindex = le16_to_cpu(p->header.nextindex);
+			xoff = nxoff = offsetXAD(lxad);
+			xlen = nxlen = lxlen + nxlen;
+			xaddr = nxaddr = addressXAD(lxad);
+			goto coalesceRight;
+		}
+	}
+
+	/*
+	 * replace XAD with nXAD
+	 */
+      replace:			/* (nxoff == xoff) */
+	if (nxlen == xlen) {
+		/* replace XAD with nXAD:recorded */
+		*xad = *nxad;
+		xad->flag = xflag & ~XAD_NOTRECORDED;
+
+		goto coalesceRight;
+	} else			/* (nxlen < xlen) */
+		goto updateLeft;
+
+	/*
+	 * coalesce with right XAD
+	 */
+      coalesceRight:		/* (xoff <= nxoff) */
+	/* is XAD last entry of page ? */
+	if (newindex == nextindex) {
+		if (xoff == nxoff)
+			goto out;
+		goto updateRight;
+	}
+
+	/* is nXAD logically and physically contiguous with rXAD ? */
+	rxad = &p->xad[index + 1];
+	rxlen = lengthXAD(rxad);
+	if (!(rxad->flag & XAD_NOTRECORDED) &&
+	    (nxoff + nxlen == offsetXAD(rxad)) &&
+	    (nxaddr + nxlen == addressXAD(rxad)) &&
+	    (rxlen + nxlen < MAXXLEN)) {
+		/* extend left rXAD */
+		XADoffset(rxad, nxoff);
+		XADlength(rxad, rxlen + nxlen);
+		XADaddress(rxad, nxaddr);
+
+		/* If we just merged two extents together, need to make sure
+		 * the left extent gets logged.  If the right one is marked
+		 * XAD_NEW, then we know it will be logged.  Otherwise, mark as
+		 * XAD_EXTENDED
+		 */
+		if (!(rxad->flag & XAD_NEW))
+			rxad->flag |= XAD_EXTENDED;
+
+		if (xlen > nxlen)
+			/* truncate XAD */
+			XADlength(xad, xlen - nxlen);
+		else {		/* (xlen == nxlen) */
+
+			/* remove XAD */
+			memmove(&p->xad[index], &p->xad[index + 1],
+				(nextindex - index - 1) << L2XTSLOTSIZE);
+
+			p->header.nextindex =
+			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
+					1);
+		}
+
+		goto out;
+	} else if (xoff == nxoff)
+		goto out;
+
+	assert(xoff < nxoff);
+/* #endif _JFS_WIP_COALESCE */
+
+	/*
+	 * split XAD into (lXAD, nXAD):
+	 *
+	 *          |---nXAD--->
+	 * --|----------XAD----------|--
+	 *   |-lXAD-|
+	 */
+      updateRight:		/* (xoff < nxoff) */
+	/* truncate old XAD as lXAD:not_recorded */
+	xad = &p->xad[index];
+	XADlength(xad, nxoff - xoff);
+
+	/* insert nXAD:recorded */
+	if (nextindex == le16_to_cpu(p->header.maxentry)) {
+/*
+printf("xtUpdate.updateRight.split p:0x%p\n", p);
+*/
+		rootsplit = p->header.flag & BT_ROOT;
+
+		/* xtSpliUp() unpins leaf pages */
+		split.mp = mp;
+		split.index = newindex;
+		split.flag = xflag & ~XAD_NOTRECORDED;
+		split.off = nxoff;
+		split.len = nxlen;
+		split.addr = nxaddr;
+		split.pxdlist = NULL;
+		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
+			return rc;
+
+		/*
+		 * if leaf root has been split, original root has been
+		 * copied to new child page, i.e., original entry now
+		 * resides on the new child page;
+		 */
+		if (rootsplit) {
+			if (p->header.nextindex ==
+			    cpu_to_le16(XTENTRYSTART + 1)) {
+				xad = &p->xad[XTENTRYSTART];
+				bn = addressXAD(xad);
+
+				/* get new child page */
+				XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+
+				BT_MARK_DIRTY(mp, ip);
+				if (!test_cflag(COMMIT_Nolink, ip)) {
+					tlck = txLock(tid, ip, mp,
+						      tlckXTREE |
+						      tlckGROW);
+					xtlck = (xtlock_t *) & tlck->lock;
+				}
+			}
+		} else {
+			/* get back old page */
+			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+
+			/* is nXAD on new page ? */
+			if (newindex >
+			    (le16_to_cpu(p->header.maxentry) >> 1)) {
+				newindex =
+				    newindex -
+				    le16_to_cpu(p->header.nextindex) +
+				    XTENTRYSTART;
+				newpage = 1;
+			}
+		}
+	} else {
+		/* if insert into middle, shift right remaining entries */
+		if (newindex < nextindex)
+			memmove(&p->xad[newindex + 1], &p->xad[newindex],
+				(nextindex - newindex) << L2XTSLOTSIZE);
+
+		/* insert the entry */
+		xad = &p->xad[newindex];
+		*xad = *nxad;
+		xad->flag = xflag & ~XAD_NOTRECORDED;
+
+		/* advance next available entry index. */
+		p->header.nextindex =
+		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
+	}
+
+	/*
+	 * does nXAD force 3-way split ?
+	 *
+	 *          |---nXAD--->|
+	 * --|----------XAD-------------|--
+	 *   |-lXAD-|           |-rXAD -|
+	 */
+	if (nxoff + nxlen == xoff + xlen)
+		goto out;
+
+	/* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
+	if (newpage) {
+		/* close out old page */
+		if (!test_cflag(COMMIT_Nolink, ip)) {
+			xtlck->lwm.offset = (xtlck->lwm.offset) ?
+			    min(index0, (int)xtlck->lwm.offset) : index0;
+			xtlck->lwm.length =
+			    le16_to_cpu(p->header.nextindex) -
+			    xtlck->lwm.offset;
+		}
+
+		bn = le64_to_cpu(p->header.next);
+		XT_PUTPAGE(mp);
+
+		/* get new right page */
+		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+
+		BT_MARK_DIRTY(mp, ip);
+		if (!test_cflag(COMMIT_Nolink, ip)) {
+			tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
+			xtlck = (xtlock_t *) & tlck->lock;
+		}
+
+		index0 = index = newindex;
+	} else
+		index++;
+
+	newindex = index + 1;
+	nextindex = le16_to_cpu(p->header.nextindex);
+	xlen = xlen - (nxoff - xoff);
+	xoff = nxoff;
+	xaddr = nxaddr;
+
+	/* recompute split pages */
+	if (nextindex == le16_to_cpu(p->header.maxentry)) {
+/*
+printf("xtUpdate: updateRight+Left recompute split pages: p:0x%p\n", p);
+*/
+		XT_PUTPAGE(mp);
+
+		if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT)))
+			return rc;
+		assert(cmp == 0);
+
+		/* retrieve search result */
+		XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
+		assert(index0 == index);
+	}
+
+	/*
+	 * split XAD into (nXAD, rXAD)
+	 *
+	 *          ---nXAD---|
+	 * --|----------XAD----------|--
+	 *                    |-rXAD-|
+	 */
+      updateLeft:		/* (nxoff == xoff) && (nxlen < xlen) */
+	/* update old XAD with nXAD:recorded */
+	xad = &p->xad[index];
+	*xad = *nxad;
+	xad->flag = xflag & ~XAD_NOTRECORDED;
+
+	/* insert rXAD:not_recorded */
+	xoff = xoff + nxlen;
+	xlen = xlen - nxlen;
+	xaddr = xaddr + nxlen;
+	if (nextindex == le16_to_cpu(p->header.maxentry)) {
+		rootsplit = p->header.flag & BT_ROOT;
+
+/*
+printf("xtUpdate.updateLeft.split p:0x%p\n", p);
+*/
+		/* xtSpliUp() unpins leaf pages */
+		split.mp = mp;
+		split.index = newindex;
+		split.flag = xflag;
+		split.off = xoff;
+		split.len = xlen;
+		split.addr = xaddr;
+		split.pxdlist = NULL;
+		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
+			return rc;
+
+		/*
+		 * if leaf root has been split, original root has been
+		 * copied to new child page, i.e., original entry now
+		 * resides on the new child page;
+		 */
+		if (rootsplit) {
+			if (p->header.nextindex ==
+			    cpu_to_le16(XTENTRYSTART + 1)) {
+				xad = &p->xad[XTENTRYSTART];
+				bn = addressXAD(xad);
+
+				/* get new child page */
+				XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+
+				BT_MARK_DIRTY(mp, ip);
+				if (!test_cflag(COMMIT_Nolink, ip)) {
+					tlck = txLock(tid, ip, mp,
+						      tlckXTREE |
+						      tlckGROW);
+					xtlck = (xtlock_t *) & tlck->lock;
+				}
+			}
+		} else
+			/* get back old page */
+			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	} else {
+		/* if insert into middle, shift right remaining entries */
+		if (newindex < nextindex)
+			memmove(&p->xad[newindex + 1], &p->xad[newindex],
+				(nextindex - newindex) << L2XTSLOTSIZE);
+
+		/* insert the entry */
+		xad = &p->xad[newindex];
+		XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
+
+		/* advance next available entry index. */
+		p->header.nextindex =
+		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
+	}
+
+      out:
+	if (!test_cflag(COMMIT_Nolink, ip)) {
+		xtlck->lwm.offset = (xtlck->lwm.offset) ?
+		    min(index0, (int)xtlck->lwm.offset) : index0;
+		xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
+		    xtlck->lwm.offset;
+	}
+
+	/* unpin the leaf page */
+	XT_PUTPAGE(mp);
+
+	return rc;
+}
+
+
+#ifdef _STILL_TO_PORT
+/*
+ *      xtAppend()
+ *
+ * function: grow in append mode from contiguous region specified ;
+ *
+ * parameter:
+ *      tid             - transaction id;
+ *      ip              - file object;
+ *      xflag           - extent flag:
+ *      xoff            - extent offset;
+ *      maxblocks       - max extent length;
+ *      xlen            - extent length (in/out);
+ *      xaddrp          - extent address pointer (in/out):
+ *      flag            -
+ *
+ * return:
+ */
+int xtAppend(tid_t tid,		/* transaction id */
+	     struct inode *ip, int xflag, s64 xoff, s32 maxblocks,	/* @GD1 */
+	     s32 * xlenp,	/* (in/out) */
+	     s64 * xaddrp,	/* (in/out) */
+	     int flag)
+{
+	int rc = 0;
+	metapage_t *mp;		/* meta-page buffer */
+	xtpage_t *p;		/* base B+-tree index page */
+	s64 bn, xaddr;
+	int index, nextindex;
+	btstack_t btstack;	/* traverse stack */
+	xtsplit_t split;	/* split information */
+	xad_t *xad;
+	int cmp;
+	tlock_t *tlck;
+	xtlock_t *xtlck;
+	int nsplit, nblocks, xlen;
+	pxdlist_t pxdlist;
+	pxd_t *pxd;
+
+	xaddr = *xaddrp;
+	xlen = *xlenp;
+	jEVENT(0,
+	       ("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx\n",
+		(ulong) xoff, maxblocks, xlen, (ulong) xaddr));
+
+	/*
+	 *      search for the entry location at which to insert:
+	 *
+	 * xtFastSearch() and xtSearch() both returns (leaf page
+	 * pinned, index at which to insert).
+	 * n.b. xtSearch() may return index of maxentry of
+	 * the full page.
+	 */
+	if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
+		return rc;
+
+	/* retrieve search result */
+	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+
+	if (cmp == 0) {
+		rc = EEXIST;
+		goto out;
+	}
+//insert:
+	/*
+	 *      insert entry for new extent
+	 */
+	xflag |= XAD_NEW;
+
+	/*
+	 *      if the leaf page is full, split the page and
+	 *      propagate up the router entry for the new page from split
+	 *
+	 * The xtSplitUp() will insert the entry and unpin the leaf page.
+	 */
+	nextindex = le16_to_cpu(p->header.nextindex);
+	if (nextindex < le16_to_cpu(p->header.maxentry))
+		goto insertLeaf;
+
+	/*
+	 * allocate new index blocks to cover index page split(s)
+	 */
+	nsplit = btstack.nsplit;
+	split.pxdlist = &pxdlist;
+	pxdlist.maxnpxd = pxdlist.npxd = 0;
+	pxd = &pxdlist.pxd[0];
+	nblocks = JFS_SBI(ip->i_sb)->nbperpage;
+	for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {	/* @GD1 */
+		if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
+			PXDaddress(pxd, xaddr);
+			PXDlength(pxd, nblocks);
+
+			pxdlist.maxnpxd++;
+
+			continue;
+		}
+
+		/* undo allocation */
+
+		goto out;
+	}
+
+	xlen = min(xlen, maxblocks);	/* @GD1 */
+
+	/*
+	 * allocate data extent requested
+	 */
+	if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
+		goto out;
+
+	split.mp = mp;
+	split.index = index;
+	split.flag = xflag;
+	split.off = xoff;
+	split.len = xlen;
+	split.addr = xaddr;
+	if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
+		/* undo data extent allocation */
+		dbFree(ip, *xaddrp, (s64) * xlenp);
+
+		return rc;
+	}
+
+	*xaddrp = xaddr;
+	*xlenp = xlen;
+	return 0;
+
+	/*
+	 *      insert the new entry into the leaf page
+	 */
+      insertLeaf:
+	/*
+	 * allocate data extent requested
+	 */
+	if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
+		goto out;
+
+	BT_MARK_DIRTY(mp, ip);
+	/*
+	 * acquire a transaction lock on the leaf page;
+	 *
+	 * action: xad insertion/extension;
+	 */
+	tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
+	xtlck = (xtlock_t *) & tlck->lock;
+
+	/* insert the new entry: mark the entry NEW */
+	xad = &p->xad[index];
+	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
+
+	/* advance next available entry index */
+	p->header.nextindex =
+	    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
+
+	xtlck->lwm.offset =
+	    (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
+	xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
+	    xtlck->lwm.offset;
+
+	*xaddrp = xaddr;
+	*xlenp = xlen;
+
+      out:
+	/* unpin the leaf page */
+	XT_PUTPAGE(mp);
+
+	return rc;
+}
+
+
+/* - TBD for defragmentaion/reorganization -
+ *
+ *      xtDelete()
+ *
+ * function:
+ *      delete the entry with the specified key.
+ *
+ *      N.B.: whole extent of the entry is assumed to be deleted.
+ *
+ * parameter:
+ *
+ * return:
+ *       ENOENT: if the entry is not found.
+ *
+ * exception:
+ */
+int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
+{
+	int rc = 0;
+	btstack_t btstack;
+	int cmp;
+	s64 bn;
+	metapage_t *mp;
+	xtpage_t *p;
+	int index, nextindex;
+	tlock_t *tlck;
+	xtlock_t *xtlck;
+
+	/*
+	 * find the matching entry; xtSearch() pins the page
+	 */
+	if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0)))
+		return rc;
+
+	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+	if (cmp) {
+		/* unpin the leaf page */
+		XT_PUTPAGE(mp);
+		return ENOENT;
+	}
+
+	/*
+	 * delete the entry from the leaf page
+	 */
+	nextindex = le16_to_cpu(p->header.nextindex);
+	p->header.nextindex =
+	    cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1);
+
+	/*
+	 * if the leaf page bocome empty, free the page
+	 */
+	if (p->header.nextindex == cpu_to_le16(XTENTRYSTART))
+		return (xtDeleteUp(tid, ip, mp, p, &btstack));
+
+	BT_MARK_DIRTY(mp, ip);
+	/*
+	 * acquire a transaction lock on the leaf page;
+	 *
+	 * action:xad deletion;
+	 */
+	tlck = txLock(tid, ip, mp, tlckXTREE);
+	xtlck = (xtlock_t *) & tlck->lock;
+	xtlck->lwm.offset =
+	    (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
+
+	/* if delete from middle, shift left/compact the remaining entries */
+	if (index < nextindex - 1)
+		memmove(&p->xad[index], &p->xad[index + 1],
+			(nextindex - index - 1) * sizeof(xad_t));
+
+	XT_PUTPAGE(mp);
+
+	return 0;
+}
+
+
+/* - TBD for defragmentaion/reorganization -
+ *
+ *      xtDeleteUp()
+ *
+ * function:
+ *      free empty pages as propagating deletion up the tree
+ *
+ * parameter:
+ *
+ * return:
+ */
+static int
+xtDeleteUp(tid_t tid,
+	   struct inode *ip,
+	   metapage_t * fmp, xtpage_t * fp, btstack_t * btstack)
+{
+	int rc = 0;
+	metapage_t *mp;
+	xtpage_t *p;
+	int index, nextindex;
+	s64 xaddr;
+	int xlen;
+	btframe_t *parent;
+	tlock_t *tlck;
+	xtlock_t *xtlck;
+
+	/*
+	 * keep root leaf page which has become empty
+	 */
+	if (fp->header.flag & BT_ROOT) {
+		/* keep the root page */
+		fp->header.flag &= ~BT_INTERNAL;
+		fp->header.flag |= BT_LEAF;
+		fp->header.nextindex = cpu_to_le16(XTENTRYSTART);
+
+		/* XT_PUTPAGE(fmp); */
+
+		return 0;
+	}
+
+	/*
+	 * free non-root leaf page
+	 */
+	if ((rc = xtRelink(tid, ip, fp)))
+		return rc;
+
+	xaddr = addressPXD(&fp->header.self);
+	xlen = lengthPXD(&fp->header.self);
+	/* free the page extent */
+	dbFree(ip, xaddr, (s64) xlen);
+
+	/* free the buffer page */
+	discard_metapage(fmp);
+
+	/*
+	 * propagate page deletion up the index tree
+	 *
+	 * If the delete from the parent page makes it empty,
+	 * continue all the way up the tree.
+	 * stop if the root page is reached (which is never deleted) or
+	 * if the entry deletion does not empty the page.
+	 */
+	while ((parent = BT_POP(btstack)) != NULL) {
+		/* get/pin the parent page <sp> */
+		XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+
+		index = parent->index;
+
+		/* delete the entry for the freed child page from parent.
+		 */
+		nextindex = le16_to_cpu(p->header.nextindex);
+
+		/*
+		 * the parent has the single entry being deleted:
+		 * free the parent page which has become empty.
+		 */
+		if (nextindex == 1) {
+			if (p->header.flag & BT_ROOT) {
+				/* keep the root page */
+				p->header.flag &= ~BT_INTERNAL;
+				p->header.flag |= BT_LEAF;
+				p->header.nextindex =
+				    cpu_to_le16(XTENTRYSTART);
+
+				/* XT_PUTPAGE(fmp); */
+
+				break;
+			} else {
+				/* free the parent page */
+				if ((rc = xtRelink(tid, ip, p)))
+					return rc;
+
+				xaddr = addressPXD(&p->header.self);
+				/* free the page extent */
+				dbFree(ip, xaddr,
+				       (s64) JFS_SBI(ip->i_sb)->nbperpage);
+
+				/* unpin/free the buffer page */
+				discard_metapage(fmp);
+
+				/* propagate up */
+				continue;
+			}
+		}
+		/*
+		 * the parent has other entries remaining:
+		 * delete the router entry from the parent page.
+		 */
+		else {
+			BT_MARK_DIRTY(mp, ip);
+			/*
+			 * acquire a transaction lock on the leaf page;
+			 *
+			 * action:xad deletion;
+			 */
+			tlck = txLock(tid, ip, mp, tlckXTREE);
+			xtlck = (xtlock_t *) & tlck->lock;
+			xtlck->lwm.offset =
+			    (xtlck->lwm.offset) ? min(index,
+						      xtlck->lwm.
+						      offset) : index;
+
+			/* if delete from middle,
+			 * shift left/compact the remaining entries in the page
+			 */
+			if (index < nextindex - 1)
+				memmove(&p->xad[index], &p->xad[index + 1],
+					(nextindex - index -
+					 1) << L2XTSLOTSIZE);
+
+			p->header.nextindex =
+			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
+					1);
+			jEVENT(0,
+			       ("xtDeleteUp(entry): 0x%lx[%d]\n",
+				(ulong) parent->bn, index));
+		}
+
+		/* unpin the parent page */
+		XT_PUTPAGE(mp);
+
+		/* exit propagation up */
+		break;
+	}
+
+	return 0;
+}
+
+
+/*
+ * NAME:        xtRelocate()
+ *
+ * FUNCTION:    relocate xtpage or data extent of regular file;
+ *              This function is mainly used by defragfs utility.
+ *
+ * NOTE:        This routine does not have the logic to handle
+ *              uncommitted allocated extent. The caller should call
+ *              txCommit() to commit all the allocation before call
+ *              this routine.
+ */
+xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
+	   s64 nxaddr,		/* new xaddr */
+	   int xtype)
+{				/* extent type: XTPAGE or DATAEXT */
+	int rc = 0;
+	tblock_t *tblk;
+	tlock_t *tlck;
+	xtlock_t *xtlck;
+	metapage_t *mp, *pmp, *lmp, *rmp;	/* meta-page buffer */
+	xtpage_t *p, *pp, *rp, *lp;	/* base B+-tree index page */
+	xad_t *xad;
+	pxd_t *pxd;
+	s64 xoff, xsize;
+	int xlen;
+	s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn;
+	cbuf_t *cp;
+	s64 offset, nbytes, nbrd, pno;
+	int nb, npages, nblks;
+	s64 bn;
+	int cmp;
+	int index;
+	pxdlock_t *pxdlock;
+	btstack_t btstack;	/* traverse stack */
+
+	xtype = xtype & EXTENT_TYPE;
+
+	xoff = offsetXAD(oxad);
+	oxaddr = addressXAD(oxad);
+	xlen = lengthXAD(oxad);
+
+	/* validate extent offset */
+	offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
+	if (offset >= ip->i_size)
+		return ESTALE;	/* stale extent */
+
+	jEVENT(0,
+	       ("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx\n",
+		xtype, (ulong) xoff, xlen, (ulong) oxaddr,
+		(ulong) nxaddr));
+
+	/*
+	 *      1. get and validate the parent xtpage/xad entry
+	 *      covering the source extent to be relocated;
+	 */
+	if (xtype == DATAEXT) {
+		/* search in leaf entry */
+		rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
+		if (rc)
+			return rc;
+		if (cmp) {
+			XT_PUTPAGE(pmp);
+			return ESTALE;
+		}
+
+		/* retrieve search result */
+		XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
+
+		/* validate for exact match with a single entry */
+		xad = &pp->xad[index];
+		if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) {
+			XT_PUTPAGE(pmp);
+			return ESTALE;
+		}
+	} else {		/* (xtype == XTPAGE) */
+
+		/* search in internal entry */
+		rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0);
+		if (rc)
+			return rc;
+		if (cmp) {
+			XT_PUTPAGE(pmp);
+			return ESTALE;
+		}
+
+		/* retrieve search result */
+		XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
+
+		/* xtSearchNode() validated for exact match with a single entry
+		 */
+		xad = &pp->xad[index];
+	}
+	jEVENT(0, ("xtRelocate: parent xad entry validated.\n"));
+
+	/*
+	 *      2. relocate the extent
+	 */
+	if (xtype == DATAEXT) {
+		/* if the extent is allocated-but-not-recorded
+		 * there is no real data to be moved in this extent,
+		 */
+		if (xad->flag & XAD_NOTRECORDED)
+			goto out;
+		else
+			/* release xtpage for cmRead()/xtLookup() */
+			XT_PUTPAGE(pmp);
+
+		/*
+		 *      cmRelocate()
+		 *
+		 * copy target data pages to be relocated;
+		 *
+		 * data extent must start at page boundary and
+		 * multiple of page size (except the last data extent);
+		 * read in each page of the source data extent into cbuf,
+		 * update the cbuf extent descriptor of the page to be
+		 * homeward bound to new dst data extent
+		 * copy the data from the old extent to new extent.
+		 * copy is essential for compressed files to avoid problems
+		 * that can arise if there was a change in compression
+		 * algorithms.
+		 * it is a good strategy because it may disrupt cache
+		 * policy to keep the pages in memory afterwards.
+		 */
+		offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
+		assert((offset & CM_OFFSET) == 0);
+		nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize;
+		pno = offset >> CM_L2BSIZE;
+		npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
+/*
+                npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
+                         (offset >> CM_L2BSIZE) + 1;
+*/
+		sxaddr = oxaddr;
+		dxaddr = nxaddr;
+
+		/* process the request one cache buffer at a time */
+		for (nbrd = 0; nbrd < nbytes; nbrd += nb,
+		     offset += nb, pno++, npages--) {
+			/* compute page size */
+			nb = min(nbytes - nbrd, CM_BSIZE);
+
+			/* get the cache buffer of the page */
+			if (rc = cmRead(ip, offset, npages, &cp))
+				break;
+
+			assert(addressPXD(&cp->cm_pxd) == sxaddr);
+			assert(!cp->cm_modified);
+
+			/* bind buffer with the new extent address */
+			nblks = nb >> JFS_IP(ip->i_sb)->l2bsize;
+			cmSetXD(ip, cp, pno, dxaddr, nblks);
+
+			/* release the cbuf, mark it as modified */
+			cmPut(cp, TRUE);
+
+			dxaddr += nblks;
+			sxaddr += nblks;
+		}
+
+		/* get back parent page */
+		rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
+		XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
+		jEVENT(0, ("xtRelocate: target data extent relocated.\n"));
+	} else {		/* (xtype  == XTPAGE) */
+
+		/*
+		 * read in the target xtpage from the source extent;
+		 */
+		XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
+		if (rc) {
+			XT_PUTPAGE(pmp);
+			return rc;
+		}
+
+		/*
+		 * read in sibling pages if any to update sibling pointers;
+		 */
+		rmp = NULL;
+		if (p->header.next) {
+			nextbn = le64_to_cpu(p->header.next);
+			XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
+			if (rc) {
+				XT_PUTPAGE(pmp);
+				XT_PUTPAGE(mp);
+				return (rc);
+			}
+		}
+
+		lmp = NULL;
+		if (p->header.prev) {
+			prevbn = le64_to_cpu(p->header.prev);
+			XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
+			if (rc) {
+				XT_PUTPAGE(pmp);
+				XT_PUTPAGE(mp);
+				if (rmp)
+					XT_PUTPAGE(rmp);
+				return (rc);
+			}
+		}
+
+		/* at this point, all xtpages to be updated are in memory */
+
+		/*
+		 * update sibling pointers of sibling xtpages if any;
+		 */
+		if (lmp) {
+			BT_MARK_DIRTY(lmp, ip);
+			tlck =
+			    txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
+			lp->header.next = cpu_to_le64(nxaddr);
+			XT_PUTPAGE(lmp);
+		}
+
+		if (rmp) {
+			BT_MARK_DIRTY(rmp, ip);
+			tlck =
+			    txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
+			rp->header.prev = cpu_to_le64(nxaddr);
+			XT_PUTPAGE(rmp);
+		}
+
+		/*
+		 * update the target xtpage to be relocated
+		 *
+		 * update the self address of the target page
+		 * and write to destination extent;
+		 * redo image covers the whole xtpage since it is new page
+		 * to the destination extent;
+		 * update of bmap for the free of source extent
+		 * of the target xtpage itself:
+		 * update of bmap for the allocation of destination extent
+		 * of the target xtpage itself:
+		 * update of bmap for the extents covered by xad entries in
+		 * the target xtpage is not necessary since they are not
+		 * updated;
+		 * if not committed before this relocation,
+		 * target page may contain XAD_NEW entries which must
+		 * be scanned for bmap update (logredo() always
+		 * scan xtpage REDOPAGE image for bmap update);
+		 * if committed before this relocation (tlckRELOCATE),
+		 * scan may be skipped by commit() and logredo();
+		 */
+		BT_MARK_DIRTY(mp, ip);
+		/* tlckNEW init  xtlck->lwm.offset = XTENTRYSTART; */
+		tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
+		xtlck = (xtlock_t *) & tlck->lock;
+
+		/* update the self address in the xtpage header */
+		pxd = &p->header.self;
+		PXDaddress(pxd, nxaddr);
+
+		/* linelock for the after image of the whole page */
+		xtlck->lwm.length =
+		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
+
+		/* update the buffer extent descriptor of target xtpage */
+		xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
+		bmSetXD(mp, nxaddr, xsize);
+
+		/* unpin the target page to new homeward bound */
+		XT_PUTPAGE(mp);
+		jEVENT(0, ("xtRelocate: target xtpage relocated.\n"));
+	}
+
+	/*
+	 *      3. acquire maplock for the source extent to be freed;
+	 *
+	 * acquire a maplock saving the src relocated extent address;
+	 * to free of the extent at commit time;
+	 */
+      out:
+	/* if DATAEXT relocation, write a LOG_UPDATEMAP record for
+	 * free PXD of the source data extent (logredo() will update
+	 * bmap for free of source data extent), and update bmap for
+	 * free of the source data extent;
+	 */
+	if (xtype == DATAEXT)
+		tlck = txMaplock(tid, ip, tlckMAP);
+	/* if XTPAGE relocation, write a LOG_NOREDOPAGE record
+	 * for the source xtpage (logredo() will init NoRedoPage
+	 * filter and will also update bmap for free of the source
+	 * xtpage), and update bmap for free of the source xtpage;
+	 * N.B. We use tlckMAP instead of tlkcXTREE because there
+	 *      is no buffer associated with this lock since the buffer
+	 *      has been redirected to the target location.
+	 */
+	else			/* (xtype  == XTPAGE) */
+		tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
+
+	pxdlock = (pxdlock_t *) & tlck->lock;
+	pxdlock->flag = mlckFREEPXD;
+	PXDaddress(&pxdlock->pxd, oxaddr);
+	PXDlength(&pxdlock->pxd, xlen);
+	pxdlock->index = 1;
+
+	/*
+	 *      4. update the parent xad entry for relocation;
+	 *
+	 * acquire tlck for the parent entry with XAD_NEW as entry
+	 * update which will write LOG_REDOPAGE and update bmap for
+	 * allocation of XAD_NEW destination extent;
+	 */
+	jEVENT(0, ("xtRelocate: update parent xad entry.\n"));
+	BT_MARK_DIRTY(pmp, ip);
+	tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW);
+	xtlck = (xtlock_t *) & tlck->lock;
+
+	/* update the XAD with the new destination extent; */
+	xad = &pp->xad[index];
+	xad->flag |= XAD_NEW;
+	XADaddress(xad, nxaddr);
+
+	xtlck->lwm.offset = min(index, xtlck->lwm.offset);
+	xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) -
+	    xtlck->lwm.offset;
+
+	/* unpin the parent xtpage */
+	XT_PUTPAGE(pmp);
+
+	return rc;
+}
+
+
+/*
+ *      xtSearchNode()
+ *
+ * function:    search for the internal xad entry covering specified extent.
+ *              This function is mainly used by defragfs utility.
+ *
+ * parameters:
+ *      ip      - file object;
+ *      xad     - extent to find;
+ *      cmpp    - comparison result:
+ *      btstack - traverse stack;
+ *      flag    - search process flag;
+ *
+ * returns:
+ *      btstack contains (bn, index) of search path traversed to the entry.
+ *      *cmpp is set to result of comparison with the entry returned.
+ *      the page containing the entry is pinned at exit.
+ */
+static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
+			int *cmpp, btstack_t * btstack, int flag)
+{
+	int rc = 0;
+	s64 xoff, xaddr;
+	int xlen;
+	int cmp = 1;		/* init for empty page */
+	s64 bn;			/* block number */
+	metapage_t *mp;		/* meta-page buffer */
+	xtpage_t *p;		/* page */
+	int base, index, lim;
+	btframe_t *btsp;
+	s64 t64;
+
+	BT_CLR(btstack);
+
+	xoff = offsetXAD(xad);
+	xlen = lengthXAD(xad);
+	xaddr = addressXAD(xad);
+
+	/*
+	 *      search down tree from root:
+	 *
+	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
+	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
+	 *
+	 * if entry with search key K is not found
+	 * internal page search find the entry with largest key Ki
+	 * less than K which point to the child page to search;
+	 * leaf page search find the entry with smallest key Kj
+	 * greater than K so that the returned index is the position of
+	 * the entry to be shifted right for insertion of new entry.
+	 * for empty tree, search key is greater than any key of the tree.
+	 *
+	 * by convention, root bn = 0.
+	 */
+	for (bn = 0;;) {
+		/* get/pin the page to search */
+		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+		if (p->header.flag & BT_LEAF)
+			return ESTALE;
+
+		lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
+
+		/*
+		 * binary search with search key K on the current page
+		 */
+		for (base = XTENTRYSTART; lim; lim >>= 1) {
+			index = base + (lim >> 1);
+
+			XT_CMP(cmp, xoff, &p->xad[index], t64);
+			if (cmp == 0) {
+				/*
+				 *      search hit
+				 *
+				 * verify for exact match;
+				 */
+				if (xaddr == addressXAD(&p->xad[index]) &&
+				    xoff == offsetXAD(&p->xad[index])) {
+					*cmpp = cmp;
+
+					/* save search result */
+					btsp = btstack->top;
+					btsp->bn = bn;
+					btsp->index = index;
+					btsp->mp = mp;
+
+					return 0;
+				}
+
+				/* descend/search its child page */
+				goto next;
+			}
+
+			if (cmp > 0) {
+				base = index + 1;
+				--lim;
+			}
+		}
+
+		/*
+		 *      search miss - non-leaf page:
+		 *
+		 * base is the smallest index with key (Kj) greater than
+		 * search key (K) and may be zero or maxentry index.
+		 * if base is non-zero, decrement base by one to get the parent
+		 * entry of the child page to search.
+		 */
+		index = base ? base - 1 : base;
+
+		/*
+		 * go down to child page
+		 */
+	      next:
+		/* get the child page block number */
+		bn = addressXAD(&p->xad[index]);
+
+		/* unpin the parent page */
+		XT_PUTPAGE(mp);
+	}
+}
+
+
+/*
+ *      xtRelink()
+ *
+ * function:
+ *      link around a freed page.
+ *
+ * Parameter:
+ *      int           tid,
+ *      struct inode    *ip,
+ *      xtpage_t        *p)
+ *
+ * returns:
+ */
+static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
+{
+	int rc = 0;
+	metapage_t *mp;
+	s64 nextbn, prevbn;
+	tlock_t *tlck;
+
+	nextbn = le64_to_cpu(p->header.next);
+	prevbn = le64_to_cpu(p->header.prev);
+
+	/* update prev pointer of the next page */
+	if (nextbn != 0) {
+		XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+
+		/*
+		 * acquire a transaction lock on the page;
+		 *
+		 * action: update prev pointer;
+		 */
+		BT_MARK_DIRTY(mp, ip);
+		tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
+
+		/* the page may already have been tlock'd */
+
+		p->header.prev = cpu_to_le64(prevbn);
+
+		XT_PUTPAGE(mp);
+	}
+
+	/* update next pointer of the previous page */
+	if (prevbn != 0) {
+		XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+
+		/*
+		 * acquire a transaction lock on the page;
+		 *
+		 * action: update next pointer;
+		 */
+		BT_MARK_DIRTY(mp, ip);
+		tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
+
+		/* the page may already have been tlock'd */
+
+		p->header.next = le64_to_cpu(nextbn);
+
+		XT_PUTPAGE(mp);
+	}
+
+	return 0;
+}
+#endif				/*  _STILL_TO_PORT */
+
+
+/*
+ *      xtInitRoot()
+ *
+ * initialize file root (inline in inode)
+ */
+void xtInitRoot(tid_t tid, struct inode *ip)
+{
+	xtpage_t *p;
+	tlock_t *tlck;
+
+	/*
+	 * acquire a transaction lock on the root
+	 *
+	 * action:
+	 */
+	tlck = txLock(tid, ip, (metapage_t *) &JFS_IP(ip)->bxflag,
+		      tlckXTREE | tlckNEW);
+	p = &JFS_IP(ip)->i_xtroot;
+
+	p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
+	p->header.nextindex = cpu_to_le16(XTENTRYSTART);
+
+	if (S_ISDIR(ip->i_mode))
+		p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
+	else {
+		p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
+		ip->i_size = 0;
+	}
+
+
+	return;
+}
+
+
+/*
+ * We can run into a deadlock truncating a file with a large number of
+ * xtree pages (large fragmented file).  A robust fix would entail a
+ * reservation system where we would reserve a number of metadata pages
+ * and tlocks which we would be guaranteed without a deadlock.  Without
+ * this, a partial fix is to limit number of metadata pages we will lock
+ * in a single transaction.  Currently we will truncate the file so that
+ * no more than 50 leaf pages will be locked.  The caller of xtTruncate
+ * will be responsible for ensuring that the current transaction gets
+ * committed, and that subsequent transactions are created to truncate
+ * the file further if needed.
+ */
+#define MAX_TRUNCATE_LEAVES 50
+
+/*
+ *      xtTruncate()
+ *
+ * function:
+ *      traverse for truncation logging backward bottom up;
+ *      terminate at the last extent entry at the current subtree
+ *      root page covering new down size.
+ *      truncation may occur within the last extent entry.
+ *
+ * parameter:
+ *      int           tid,
+ *      struct inode    *ip,
+ *      s64           newsize,
+ *      int           type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
+ *
+ * return:
+ *
+ * note:
+ *      PWMAP:
+ *       1. truncate (non-COMMIT_NOLINK file)
+ *          by jfs_truncate() or jfs_open(O_TRUNC):
+ *          xtree is updated;
+ *	 2. truncate index table of directory when last entry removed
+ *       map update via tlock at commit time;
+ *      PMAP:
+ *	 Call xtTruncate_pmap instead
+ *      WMAP:
+ *       1. remove (free zero link count) on last reference release
+ *          (pmap has been freed at commit zero link count);
+ *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
+ *          xtree is updated;
+ *       map update directly at truncation time;
+ *
+ *      if (DELETE)
+ *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
+ *      else if (TRUNCATE)
+ *              must write LOG_NOREDOPAGE for deleted index page;
+ *
+ * pages may already have been tlocked by anonymous transactions
+ * during file growth (i.e., write) before truncation;
+ *
+ * except last truncated entry, deleted entries remains as is
+ * in the page (nextindex is updated) for other use
+ * (e.g., log/update allocation map): this avoid copying the page
+ * info but delay free of pages;
+ *
+ */
+s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
+{
+	int rc = 0;
+	s64 teof;
+	metapage_t *mp;
+	xtpage_t *p;
+	s64 bn;
+	int index, nextindex;
+	xad_t *xad;
+	s64 xoff, xaddr;
+	int xlen, len, freexlen;
+	btstack_t btstack;
+	btframe_t *parent;
+	tblock_t *tblk = 0;
+	tlock_t *tlck = 0;
+	xtlock_t *xtlck = 0;
+	xdlistlock_t xadlock;	/* maplock for COMMIT_WMAP */
+	pxdlock_t *pxdlock;	/* maplock for COMMIT_WMAP */
+	s64 nfreed;
+	int freed, log;
+	int locked_leaves = 0;
+
+	/* save object truncation type */
+	if (tid) {
+		tblk = tid_to_tblock(tid);
+		tblk->xflag |= flag;
+	}
+
+	nfreed = 0;
+
+	flag &= COMMIT_MAP;
+	assert(flag != COMMIT_PMAP);
+
+	if (flag == COMMIT_PWMAP)
+		log = 1;
+	else {
+		log = 0;
+		xadlock.flag = mlckFREEXADLIST;
+		xadlock.index = 1;
+	}
+
+	/*
+	 * if the newsize is not an integral number of pages,
+	 * the file between newsize and next page boundary will
+	 * be cleared.
+	 * if truncating into a file hole, it will cause
+	 * a full block to be allocated for the logical block.
+	 */
+
+	/*
+	 * release page blocks of truncated region <teof, eof>
+	 *
+	 * free the data blocks from the leaf index blocks.
+	 * delete the parent index entries corresponding to
+	 * the freed child data/index blocks.
+	 * free the index blocks themselves which aren't needed
+	 * in new sized file.
+	 *
+	 * index blocks are updated only if the blocks are to be
+	 * retained in the new sized file.
+	 * if type is PMAP, the data and index pages are NOT
+	 * freed, and the data and index blocks are NOT freed
+	 * from  working map.
+	 * (this will allow continued access of data/index of
+	 * temporary file (zerolink count file truncated to zero-length)).
+	 */
+	teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
+	    JFS_SBI(ip->i_sb)->l2bsize;
+
+	/* clear stack */
+	BT_CLR(&btstack);
+
+	/*
+	 * start with root
+	 *
+	 * root resides in the inode
+	 */
+	bn = 0;
+
+	/*
+	 * first access of each page:
+	 */
+      getPage:
+	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	if (rc)
+		return -rc;
+
+	/* process entries backward from last index */
+	index = le16_to_cpu(p->header.nextindex) - 1;
+
+	if (p->header.flag & BT_INTERNAL)
+		goto getChild;
+
+	/*
+	 *      leaf page
+	 */
+
+	/* Since this is the rightmost leaf, and we may have already freed
+	 * a page that was formerly to the right, let's make sure that the
+	 * next pointer is zero.
+	 */
+	p->header.next = 0;
+
+	freed = 0;
+
+	/* does region covered by leaf page precede Teof ? */
+	xad = &p->xad[index];
+	xoff = offsetXAD(xad);
+	xlen = lengthXAD(xad);
+	if (teof >= xoff + xlen) {
+		XT_PUTPAGE(mp);
+		goto getParent;
+	}
+
+	/* (re)acquire tlock of the leaf page */
+	if (log) {
+		if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
+			/*
+			 * We need to limit the size of the transaction
+			 * to avoid exhausting pagecache & tlocks
+			 */
+			XT_PUTPAGE(mp);
+			newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
+			goto getParent;
+		}
+		tlck = txLock(tid, ip, mp, tlckXTREE);
+		tlck->type = tlckXTREE | tlckTRUNCATE;
+		xtlck = (xtlock_t *) & tlck->lock;
+		xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
+	}
+	BT_MARK_DIRTY(mp, ip);
+
+	/*
+	 * scan backward leaf page entries
+	 */
+	for (; index >= XTENTRYSTART; index--) {
+		xad = &p->xad[index];
+		xoff = offsetXAD(xad);
+		xlen = lengthXAD(xad);
+		xaddr = addressXAD(xad);
+
+		/*
+		 * entry beyond eof: continue scan of current page
+		 *          xad
+		 * ---|---=======------->
+		 *   eof
+		 */
+		if (teof < xoff) {
+			nfreed += xlen;
+			continue;
+		}
+
+		/*
+		 * (xoff <= teof): last entry to be deleted from page;
+		 * If other entries remain in page: keep and update the page.
+		 */
+
+		/*
+		 * eof == entry_start: delete the entry
+		 *           xad
+		 * -------|=======------->
+		 *       eof
+		 *
+		 */
+		if (teof == xoff) {
+			nfreed += xlen;
+
+			if (index == XTENTRYSTART)
+				break;
+
+			nextindex = index;
+		}
+		/*
+		 * eof within the entry: truncate the entry.
+		 *          xad
+		 * -------===|===------->
+		 *          eof
+		 */
+		else if (teof < xoff + xlen) {
+			/* update truncated entry */
+			len = teof - xoff;
+			freexlen = xlen - len;
+			XADlength(xad, len);
+
+			/* save pxd of truncated extent in tlck */
+			xaddr += len;
+			if (log) {	/* COMMIT_PWMAP */
+				xtlck->lwm.offset = (xtlck->lwm.offset) ?
+				    min(index, (int)xtlck->lwm.offset) : index;
+				xtlck->lwm.length = index + 1 -
+				    xtlck->lwm.offset;
+				pxdlock = (pxdlock_t *) & xtlck->pxdlock;
+				pxdlock->flag = mlckFREEPXD;
+				PXDaddress(&pxdlock->pxd, xaddr);
+				PXDlength(&pxdlock->pxd, freexlen);
+			}
+			/* free truncated extent */
+			else {	/* COMMIT_WMAP */
+
+				pxdlock = (pxdlock_t *) & xadlock;
+				pxdlock->flag = mlckFREEPXD;
+				PXDaddress(&pxdlock->pxd, xaddr);
+				PXDlength(&pxdlock->pxd, freexlen);
+				txFreeMap(ip, pxdlock, 0, COMMIT_WMAP);
+
+				/* reset map lock */
+				xadlock.flag = mlckFREEXADLIST;
+			}
+
+			/* current entry is new last entry; */
+			nextindex = index + 1;
+
+			nfreed += freexlen;
+		}
+		/*
+		 * eof beyond the entry:
+		 *          xad
+		 * -------=======---|--->
+		 *                 eof
+		 */
+		else {		/* (xoff + xlen < teof) */
+
+			nextindex = index + 1;
+		}
+
+		if (nextindex < le16_to_cpu(p->header.nextindex)) {
+			if (!log) {	/* COMMIT_WAMP */
+				xadlock.xdlist = &p->xad[nextindex];
+				xadlock.count =
+				    le16_to_cpu(p->header.nextindex) -
+				    nextindex;
+				txFreeMap(ip, (maplock_t *) & xadlock, 0,
+					  COMMIT_WMAP);
+			}
+			p->header.nextindex = cpu_to_le16(nextindex);
+		}
+
+		XT_PUTPAGE(mp);
+
+		/* assert(freed == 0); */
+		goto getParent;
+	}			/* end scan of leaf page entries */
+
+	freed = 1;
+
+	/*
+	 * leaf page become empty: free the page if type != PMAP
+	 */
+	if (log) {		/* COMMIT_PWMAP */
+		/* txCommit() with tlckFREE:
+		 * free data extents covered by leaf [XTENTRYSTART:hwm);
+		 * invalidate leaf if COMMIT_PWMAP;
+		 * if (TRUNCATE), will write LOG_NOREDOPAGE;
+		 */
+		tlck->type = tlckXTREE | tlckFREE;
+	} else {		/* COMMIT_WAMP */
+
+		/* free data extents covered by leaf */
+		xadlock.xdlist = &p->xad[XTENTRYSTART];
+		xadlock.count =
+		    le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
+		txFreeMap(ip, (maplock_t *) & xadlock, 0, COMMIT_WMAP);
+	}
+
+	if (p->header.flag & BT_ROOT) {
+		p->header.flag &= ~BT_INTERNAL;
+		p->header.flag |= BT_LEAF;
+		p->header.nextindex = cpu_to_le16(XTENTRYSTART);
+
+		XT_PUTPAGE(mp);	/* debug */
+		goto out;
+	} else {
+		if (log) {	/* COMMIT_PWMAP */
+			/* page will be invalidated at tx completion
+			 */
+			XT_PUTPAGE(mp);
+		} else {	/* COMMIT_WMAP */
+
+			if (mp->lid)
+				lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
+
+			/* invalidate empty leaf page */
+			discard_metapage(mp);
+		}
+	}
+
+	/*
+	 * the leaf page become empty: delete the parent entry
+	 * for the leaf page if the parent page is to be kept
+	 * in the new sized file.
+	 */
+
+	/*
+	 * go back up to the parent page
+	 */
+      getParent:
+	/* pop/restore parent entry for the current child page */
+	if ((parent = BT_POP(&btstack)) == NULL)
+		/* current page must have been root */
+		goto out;
+
+	/* get back the parent page */
+	bn = parent->bn;
+	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	if (rc)
+		return -rc;
+
+	index = parent->index;
+
+	/*
+	 * child page was not empty:
+	 */
+	if (freed == 0) {
+		/* has any entry deleted from parent ? */
+		if (index < le16_to_cpu(p->header.nextindex) - 1) {
+			/* (re)acquire tlock on the parent page */
+			if (log) {	/* COMMIT_PWMAP */
+				/* txCommit() with tlckTRUNCATE:
+				 * free child extents covered by parent [);
+				 */
+				tlck = txLock(tid, ip, mp, tlckXTREE);
+				xtlck = (xtlock_t *) & tlck->lock;
+				xtlck->twm.offset = index;
+				if (!(tlck->type & tlckTRUNCATE)) {
+					xtlck->hwm.offset =
+					    le16_to_cpu(p->header.
+							nextindex) - 1;
+					tlck->type =
+					    tlckXTREE | tlckTRUNCATE;
+				}
+			} else {	/* COMMIT_WMAP */
+
+				/* free child extents covered by parent */
+				xadlock.xdlist = &p->xad[index + 1];
+				xadlock.count =
+				    le16_to_cpu(p->header.nextindex) -
+				    index - 1;
+				txFreeMap(ip, (maplock_t *) & xadlock, 0,
+					  COMMIT_WMAP);
+			}
+			BT_MARK_DIRTY(mp, ip);
+
+			p->header.nextindex = cpu_to_le16(index + 1);
+		}
+		XT_PUTPAGE(mp);
+		goto getParent;
+	}
+
+	/*
+	 * child page was empty:
+	 */
+	nfreed += lengthXAD(&p->xad[index]);
+
+	/*
+	 * During working map update, child page's tlock must be handled
+	 * before parent's.  This is because the parent's tlock will cause
+	 * the child's disk space to be marked available in the wmap, so
+	 * it's important that the child page be released by that time.
+	 *
+	 * ToDo:  tlocks should be on doubly-linked list, so we can
+	 * quickly remove it and add it to the end.
+	 */
+
+	/*
+	 * Move parent page's tlock to the end of the tid's tlock list
+	 */
+	if (log && mp->lid && (tblk->last != mp->lid) &&
+	    lid_to_tlock(mp->lid)->tid) {
+		lid_t lid = mp->lid;
+		tlock_t *prev;
+
+		tlck = lid_to_tlock(lid);
+
+		if (tblk->next == lid)
+			tblk->next = tlck->next;
+		else {
+			for (prev = lid_to_tlock(tblk->next);
+			     prev->next != lid;
+			     prev = lid_to_tlock(prev->next)) {
+				assert(prev->next);
+			}
+			prev->next = tlck->next;
+		}
+		lid_to_tlock(tblk->last)->next = lid;
+		tlck->next = 0;
+		tblk->last = lid;
+	}
+
+	/*
+	 * parent page become empty: free the page
+	 */
+	if (index == XTENTRYSTART) {
+		if (log) {	/* COMMIT_PWMAP */
+			/* txCommit() with tlckFREE:
+			 * free child extents covered by parent;
+			 * invalidate parent if COMMIT_PWMAP;
+			 */
+			tlck = txLock(tid, ip, mp, tlckXTREE);
+			xtlck = (xtlock_t *) & tlck->lock;
+			xtlck->twm.offset = index;
+			xtlck->hwm.offset =
+			    le16_to_cpu(p->header.nextindex) - 1;
+			tlck->type = tlckXTREE | tlckFREE;
+		} else {	/* COMMIT_WMAP */
+
+			/* free child extents covered by parent */
+			xadlock.xdlist = &p->xad[XTENTRYSTART];
+			xadlock.count =
+			    le16_to_cpu(p->header.nextindex) -
+			    XTENTRYSTART;
+			txFreeMap(ip, (maplock_t *) & xadlock, 0,
+				  COMMIT_WMAP);
+		}
+		BT_MARK_DIRTY(mp, ip);
+
+		if (p->header.flag & BT_ROOT) {
+			p->header.flag &= ~BT_INTERNAL;
+			p->header.flag |= BT_LEAF;
+			p->header.nextindex = cpu_to_le16(XTENTRYSTART);
+			if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
+				/*
+				 * Shrink root down to allow inline
+				 * EA (otherwise fsck complains)
+				 */
+				p->header.maxentry =
+				    cpu_to_le16(XTROOTINITSLOT);
+				JFS_IP(ip)->mode2 |= INLINEEA;
+			}
+
+			XT_PUTPAGE(mp);	/* debug */
+			goto out;
+		} else {
+			if (log) {	/* COMMIT_PWMAP */
+				/* page will be invalidated at tx completion
+				 */
+				XT_PUTPAGE(mp);
+			} else {	/* COMMIT_WMAP */
+
+				if (mp->lid)
+					lid_to_tlock(mp->lid)->flag |=
+						tlckFREELOCK;
+
+				/* invalidate parent page */
+				discard_metapage(mp);
+			}
+
+			/* parent has become empty and freed:
+			 * go back up to its parent page
+			 */
+			/* freed = 1; */
+			goto getParent;
+		}
+	}
+	/*
+	 * parent page still has entries for front region;
+	 */
+	else {
+		/* try truncate region covered by preceding entry
+		 * (process backward)
+		 */
+		index--;
+
+		/* go back down to the child page corresponding
+		 * to the entry
+		 */
+		goto getChild;
+	}
+
+	/*
+	 *      internal page: go down to child page of current entry
+	 */
+      getChild:
+	/* save current parent entry for the child page */
+	BT_PUSH(&btstack, bn, index);
+
+	/* get child page */
+	xad = &p->xad[index];
+	bn = addressXAD(xad);
+
+	/*
+	 * first access of each internal entry:
+	 */
+	/* release parent page */
+	XT_PUTPAGE(mp);
+
+	/* process the child page */
+	goto getPage;
+
+      out:
+	/*
+	 * update file resource stat
+	 */
+	/* set size
+	 */
+	if (S_ISDIR(ip->i_mode) && !newsize)
+		ip->i_size = 1;	/* fsck hates zero-length directories */
+	else
+		ip->i_size = newsize;
+
+	/* update nblocks to reflect freed blocks */
+	ip->i_blocks -= LBLK2PBLK(ip->i_sb, nfreed);
+
+	/*
+	 * free tlock of invalidated pages
+	 */
+	if (flag == COMMIT_WMAP)
+		txFreelock(ip);
+
+	return newsize;
+}
+
+
+/*
+ *      xtTruncate_pmap()
+ *
+ * function:
+ *	Perform truncate to zero lenghth for deleted file, leaving the
+ *	the xtree and working map untouched.  This allows the file to
+ *	be accessed via open file handles, while the delete of the file
+ *	is committed to disk.
+ *
+ * parameter:
+ *      tid_t		tid,
+ *      struct inode	*ip,
+ *      s64		committed_size)
+ *
+ * return: new committed size
+ *
+ * note:
+ *
+ *	To avoid deadlock by holding too many transaction locks, the
+ *	truncation may be broken up into multiple transactions.
+ *	The committed_size keeps track of part of the file has been
+ *	freed from the pmaps.
+ */
+s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
+{
+	s64 bn;
+	btstack_t btstack;
+	int cmp;
+	int index;
+	int locked_leaves = 0;
+	metapage_t *mp;
+	xtpage_t *p;
+	btframe_t *parent;
+	int rc;
+	tblock_t *tblk;
+	tlock_t *tlck = 0;
+	xad_t *xad;
+	int xlen;
+	s64 xoff;
+	xtlock_t *xtlck = 0;
+
+	/* save object truncation type */
+	tblk = tid_to_tblock(tid);
+	tblk->xflag |= COMMIT_PMAP;
+
+	/* clear stack */
+	BT_CLR(&btstack);
+
+	if (committed_size) {
+		xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
+		rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
+		if (rc)
+			return -rc;
+		assert(cmp == 0);
+		XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
+	} else {
+		/*
+		 * start with root
+		 *
+		 * root resides in the inode
+		 */
+		bn = 0;
+
+		/*
+		 * first access of each page:
+		 */
+      getPage:
+		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+		if (rc)
+			return -rc;
+
+		/* process entries backward from last index */
+		index = le16_to_cpu(p->header.nextindex) - 1;
+
+		if (p->header.flag & BT_INTERNAL)
+			goto getChild;
+	}
+
+	/*
+	 *      leaf page
+	 */
+
+	if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
+		/*
+		 * We need to limit the size of the transaction
+		 * to avoid exhausting pagecache & tlocks
+		 */
+		xad = &p->xad[index];
+		xoff = offsetXAD(xad);
+		xlen = lengthXAD(xad);
+		XT_PUTPAGE(mp);
+		return  (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
+	}
+	tlck = txLock(tid, ip, mp, tlckXTREE);
+	tlck->type = tlckXTREE | tlckTRUNCATE;
+	xtlck = (xtlock_t *) & tlck->lock;
+	xtlck->hwm.offset = index;
+
+	tlck->type = tlckXTREE | tlckFREE;
+
+	XT_PUTPAGE(mp);
+
+	/*
+	 * go back up to the parent page
+	 */
+      getParent:
+	/* pop/restore parent entry for the current child page */
+	if ((parent = BT_POP(&btstack)) == NULL)
+		/* current page must have been root */
+		goto out;
+
+	/* get back the parent page */
+	bn = parent->bn;
+	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	if (rc)
+		return -rc;
+
+	index = parent->index;
+
+	/*
+	 * parent page become empty: free the page
+	 */
+	if (index == XTENTRYSTART) {
+		/* txCommit() with tlckFREE:
+		 * free child extents covered by parent;
+		 * invalidate parent if COMMIT_PWMAP;
+		 */
+		tlck = txLock(tid, ip, mp, tlckXTREE);
+		xtlck = (xtlock_t *) & tlck->lock;
+		xtlck->twm.offset = index;
+		xtlck->hwm.offset =
+		    le16_to_cpu(p->header.nextindex) - 1;
+		tlck->type = tlckXTREE | tlckFREE;
+
+		XT_PUTPAGE(mp);
+
+		if (p->header.flag & BT_ROOT) {
+
+			goto out;
+		} else {
+			goto getParent;
+		}
+	}
+	/*
+	 * parent page still has entries for front region;
+	 */
+	else
+		index--;
+	/*
+	 *      internal page: go down to child page of current entry
+	 */
+      getChild:
+	/* save current parent entry for the child page */
+	BT_PUSH(&btstack, bn, index);
+
+	/* get child page */
+	xad = &p->xad[index];
+	bn = addressXAD(xad);
+
+	/*
+	 * first access of each internal entry:
+	 */
+	/* release parent page */
+	XT_PUTPAGE(mp);
+
+	/* process the child page */
+	goto getPage;
+
+      out:
+
+	return 0;
+}
+
+
+#ifdef _JFS_DEBUG_XTREE
+/*
+ *      xtDisplayTree()
+ *
+ * function: traverse forward
+ */
+int xtDisplayTree(struct inode *ip)
+{
+	int rc = 0;
+	metapage_t *mp;
+	xtpage_t *p;
+	s64 bn, pbn;
+	int index, lastindex, v, h;
+	xad_t *xad;
+	btstack_t btstack;
+	btframe_t *btsp;
+	btframe_t *parent;
+
+	printk("display B+-tree.\n");
+
+	/* clear stack */
+	btsp = btstack.stack;
+
+	/*
+	 * start with root
+	 *
+	 * root resides in the inode
+	 */
+	bn = 0;
+	v = h = 0;
+
+	/*
+	 * first access of each page:
+	 */
+      getPage:
+	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	if (rc)
+		return rc;
+
+	/* process entries forward from first index */
+	index = XTENTRYSTART;
+	lastindex = le16_to_cpu(p->header.nextindex) - 1;
+
+	if (p->header.flag & BT_INTERNAL) {
+		/*
+		 * first access of each internal page
+		 */
+		goto getChild;
+	} else {		/* (p->header.flag & BT_LEAF) */
+
+		/*
+		 * first access of each leaf page
+		 */
+		printf("leaf page ");
+		xtDisplayPage(ip, bn, p);
+
+		/* unpin the leaf page */
+		XT_PUTPAGE(mp);
+	}
+
+	/*
+	 * go back up to the parent page
+	 */
+      getParent:
+	/* pop/restore parent entry for the current child page */
+	if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL)
+		/* current page must have been root */
+		return;
+
+	/*
+	 * parent page scan completed
+	 */
+	if ((index = parent->index) == (lastindex = parent->lastindex)) {
+		/* go back up to the parent page */
+		goto getParent;
+	}
+
+	/*
+	 * parent page has entries remaining
+	 */
+	/* get back the parent page */
+	bn = parent->bn;
+	/* v = parent->level; */
+	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	if (rc)
+		return rc;
+
+	/* get next parent entry */
+	index++;
+
+	/*
+	 * internal page: go down to child page of current entry
+	 */
+      getChild:
+	/* push/save current parent entry for the child page */
+	btsp->bn = pbn = bn;
+	btsp->index = index;
+	btsp->lastindex = lastindex;
+	/* btsp->level = v; */
+	/* btsp->node = h; */
+	++btsp;
+
+	/* get child page */
+	xad = &p->xad[index];
+	bn = addressXAD(xad);
+
+	/*
+	 * first access of each internal entry:
+	 */
+	/* release parent page */
+	XT_PUTPAGE(mp);
+
+	printk("traverse down 0x%lx[%d]->0x%lx\n", (ulong) pbn, index,
+	       (ulong) bn);
+	v++;
+	h = index;
+
+	/* process the child page */
+	goto getPage;
+}
+
+
+/*
+ *      xtDisplayPage()
+ *
+ * function: display page
+ */
+int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p)
+{
+	int rc = 0;
+	metapage_t *mp;
+	xad_t *xad;
+	s64 xaddr, xoff;
+	int xlen, i, j;
+
+	if (p == NULL) {
+		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+		if (rc)
+			return rc;
+	}
+
+	/* display page control */
+	printf("bn:0x%lx flag:0x%x nextindex:%d\n",
+	       (ulong) bn, p->header.flag,
+	       le16_to_cpu(p->header.nextindex));
+
+	/* display entries */
+	xad = &p->xad[XTENTRYSTART];
+		for (i = XTENTRYSTART, j = 1; i < le16_to_cpu(p->header.nextindex);
+		     i++, xad++, j++) {
+			xoff = offsetXAD(xad);
+			xaddr = addressXAD(xad);
+			xlen = lengthXAD(xad);
+			printf("\t[%d] 0x%lx:0x%lx(0x%x)", i, (ulong) xoff,
+			       (ulong) xaddr, xlen);
+
+			if (j == 4) {
+				printf("\n");
+				j = 0;
+		}
+	}
+
+	printf("\n");
+}
+#endif				/* _JFS_DEBUG_XTREE */
+
+
+#ifdef _JFS_WIP
+/*
+ *      xtGather()
+ *
+ * function:
+ *      traverse for allocation acquiring tlock at commit time
+ *      (vs at the time of update) logging backward top down
+ *
+ * note:
+ *      problem - establishing that all new allocation have been
+ *      processed both for append and random write in sparse file
+ *      at the current entry at the current subtree root page
+ *
+ */
+int xtGather(t)
+btree_t *t;
+{
+	int rc = 0;
+	xtpage_t *p;
+	u64 bn;
+	int index;
+	btentry_t *e;
+	btstack_t btstack;
+	struct btsf *parent;
+
+	/* clear stack */
+	BT_CLR(&btstack);
+
+	/*
+	 * start with root
+	 *
+	 * root resides in the inode
+	 */
+	bn = 0;
+	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	if (rc)
+		return rc;
+
+	/* new root is NOT pointed by a new entry
+	   if (p->header.flag & NEW)
+	   allocate new page lock;
+	   write a NEWPAGE log;
+	 */
+
+      dopage:
+	/*
+	 * first access of each page:
+	 */
+	/* process entries backward from last index */
+	index = le16_to_cpu(p->header.nextindex) - 1;
+
+	if (p->header.flag & BT_LEAF) {
+		/*
+		 * first access of each leaf page
+		 */
+		/* process leaf page entries backward */
+		for (; index >= XTENTRYSTART; index--) {
+			e = &p->xad[index];
+			/*
+			 * if newpage, log NEWPAGE.
+			 *
+			 if (e->flag & XAD_NEW) {
+			 nfound =+ entry->length;
+			 update current page lock for the entry;
+			 newpage(entry);
+			 *
+			 * if moved, log move.
+			 *
+			 } else if (e->flag & XAD_MOVED) {
+			 reset flag;
+			 update current page lock for the entry;
+			 }
+			 */
+		}
+
+		/* unpin the leaf page */
+		XT_PUTPAGE(mp);
+
+		/*
+		 * go back up to the parent page
+		 */
+	      getParent:
+		/* restore parent entry for the current child page */
+		if ((parent = BT_POP(&btstack)) == NULL)
+			/* current page must have been root */
+			return 0;
+
+		if ((index = parent->index) == XTENTRYSTART) {
+			/*
+			 * parent page scan completed
+			 */
+			/* go back up to the parent page */
+			goto getParent;
+		} else {
+			/*
+			 * parent page has entries remaining
+			 */
+			/* get back the parent page */
+			bn = parent->bn;
+			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+			if (rc)
+				return EIO;
+
+			/* first subroot page which
+			 * covers all new allocated blocks
+			 * itself not new/modified.
+			 * (if modified from split of descendent,
+			 * go down path of split page)
+
+			 if (nfound == nnew &&
+			 !(p->header.flag & (NEW | MOD)))
+			 exit scan;
+			 */
+
+			/* process parent page entries backward */
+			index--;
+		}
+	} else {
+		/*
+		 * first access of each internal page
+		 */
+	}
+
+	/*
+	 * internal page: go down to child page of current entry
+	 */
+
+	/* save current parent entry for the child page */
+	BT_PUSH(&btstack, bn, index);
+
+	/* get current entry for the child page */
+	e = &p->xad[index];
+
+	/*
+	 * first access of each internal entry:
+	 */
+	/*
+	 * if new entry, log btree_tnewentry.
+	 *
+	 if (e->flag & XAD_NEW)
+	 update parent page lock for the entry;
+	 */
+
+	/* release parent page */
+	XT_PUTPAGE(mp);
+
+	/* get child page */
+	bn = e->bn;
+	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
+	if (rc)
+		return rc;
+
+	/*
+	 * first access of each non-root page:
+	 */
+	/*
+	 * if new, log btree_newpage.
+	 *
+	 if (p->header.flag & NEW)
+	 allocate new page lock;
+	 write a NEWPAGE log (next, prev);
+	 */
+
+	/* process the child page */
+	goto dopage;
+
+      out:
+	return 0;
+}
+#endif				/* _JFS_WIP */
+
+
+#ifdef CONFIG_JFS_STATISTICS
+int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
+		    int *eof, void *data)
+{
+	int len = 0;
+	off_t begin;
+
+	len += sprintf(buffer,
+		       "JFS Xtree statistics\n"
+		       "====================\n"
+		       "searches = %d\n"
+		       "fast searches = %d\n"
+		       "splits = %d\n",
+		       xtStat.search,
+		       xtStat.fastSearch,
+		       xtStat.split);
+
+	begin = offset;
+	*start = buffer + begin;
+	len -= begin;
+
+	if (len > length)
+		len = length;
+	else
+		*eof = 1;
+
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+#endif
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/jfs_xtree.h linuxppc64_2_4/fs/jfs/jfs_xtree.h
--- ../kernel.org/linux-2.4.19/fs/jfs/jfs_xtree.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/jfs_xtree.h	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,143 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+/*
+ * Change History :
+ *
+*/
+
+#ifndef _H_JFS_XTREE
+#define _H_JFS_XTREE
+
+/*
+ *      jfs_xtree.h: extent allocation descriptor B+-tree manager
+ */
+
+#include "jfs_btree.h"
+
+
+/*
+ *      extent allocation descriptor (xad)
+ */
+typedef struct xad {
+	unsigned flag:8;	/* 1: flag */
+	unsigned rsvrd:16;	/* 2: reserved */
+	unsigned off1:8;	/* 1: offset in unit of fsblksize */
+	u32 off2;		/* 4: offset in unit of fsblksize */
+	unsigned len:24;	/* 3: length in unit of fsblksize */
+	unsigned addr1:8;	/* 1: address in unit of fsblksize */
+	u32 addr2;		/* 4: address in unit of fsblksize */
+} xad_t;			/* (16) */
+
+#define MAXXLEN         ((1 << 24) - 1)
+
+#define XTSLOTSIZE      16
+#define L2XTSLOTSIZE    4
+
+/* xad_t field construction */
+#define XADoffset(xad, offset64)\
+{\
+        (xad)->off1 = ((u64)offset64) >> 32;\
+        (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
+}
+#define XADaddress(xad, address64)\
+{\
+        (xad)->addr1 = ((u64)address64) >> 32;\
+        (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
+}
+#define XADlength(xad, length32)        (xad)->len = __cpu_to_le24(length32)
+
+/* xad_t field extraction */
+#define offsetXAD(xad)\
+        ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
+#define addressXAD(xad)\
+        ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2))
+#define lengthXAD(xad)  __le24_to_cpu((xad)->len)
+
+/* xad list */
+typedef struct {
+	s16 maxnxad;
+	s16 nxad;
+	xad_t *xad;
+} xadlist_t;
+
+/* xad_t flags */
+#define XAD_NEW         0x01	/* new */
+#define XAD_EXTENDED    0x02	/* extended */
+#define XAD_COMPRESSED  0x04	/* compressed with recorded length */
+#define XAD_NOTRECORDED 0x08	/* allocated but not recorded */
+#define XAD_COW         0x10	/* copy-on-write */
+
+
+/* possible values for maxentry */
+#define XTROOTINITSLOT_DIR  6
+#define XTROOTINITSLOT  10
+#define XTROOTMAXSLOT   18
+#define XTPAGEMAXSLOT   256
+#define XTENTRYSTART    2
+
+/*
+ *      xtree page:
+ */
+typedef union {
+	struct xtheader {
+		s64 next;	/* 8: */
+		s64 prev;	/* 8: */
+
+		u8 flag;	/* 1: */
+		u8 rsrvd1;	/* 1: */
+		s16 nextindex;	/* 2: next index = number of entries */
+		s16 maxentry;	/* 2: max number of entries */
+		s16 rsrvd2;	/* 2: */
+
+		pxd_t self;	/* 8: self */
+	} header;		/* (32) */
+
+	xad_t xad[XTROOTMAXSLOT];	/* 16 * maxentry: xad array */
+} xtpage_t;
+
+/*
+ *      external declaration
+ */
+extern int xtLookup(struct inode *ip, s64 lstart, s64 llen,
+		    int *pflag, s64 * paddr, int *plen, int flag);
+extern int xtLookupList(struct inode *ip, lxdlist_t * lxdlist,
+			xadlist_t * xadlist, int flag);
+extern void xtInitRoot(tid_t tid, struct inode *ip);
+extern int xtInsert(tid_t tid, struct inode *ip,
+		    int xflag, s64 xoff, int xlen, s64 * xaddrp, int flag);
+extern int xtExtend(tid_t tid, struct inode *ip, s64 xoff, int xlen,
+		    int flag);
+extern int xtTailgate(tid_t tid, struct inode *ip,
+		      s64 xoff, int xlen, s64 xaddr, int flag);
+extern int xtUpdate(tid_t tid, struct inode *ip, struct xad *nxad);
+extern int xtDelete(tid_t tid, struct inode *ip, s64 xoff, int xlen,
+		    int flag);
+extern s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int type);
+extern s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size);
+extern int xtRelocate(tid_t tid, struct inode *ip,
+		      xad_t * oxad, s64 nxaddr, int xtype);
+extern int xtAppend(tid_t tid,
+		    struct inode *ip, int xflag, s64 xoff, int maxblocks,
+		    int *xlenp, s64 * xaddrp, int flag);
+
+#ifdef  _JFS_DEBUG_XTREE
+extern int xtDisplayTree(struct inode *ip);
+extern int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p);
+#endif				/* _JFS_DEBUG_XTREE */
+
+#endif				/* !_H_JFS_XTREE */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/namei.c linuxppc64_2_4/fs/jfs/namei.c
--- ../kernel.org/linux-2.4.19/fs/jfs/namei.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/namei.c	Tue Apr 23 11:25:34 2002
@@ -0,0 +1,1461 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Module: jfs/namei.c
+ *
+ */
+
+/*
+ * Change History :
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/locks.h>
+#include "jfs_incore.h"
+#include "jfs_inode.h"
+#include "jfs_dinode.h"
+#include "jfs_dmap.h"
+#include "jfs_unicode.h"
+#include "jfs_metapage.h"
+#include "jfs_debug.h"
+
+extern struct inode_operations jfs_file_inode_operations;
+extern struct inode_operations jfs_symlink_inode_operations;
+extern struct file_operations jfs_file_operations;
+extern struct address_space_operations jfs_aops;
+
+extern int jfs_fsync(struct file *, struct dentry *, int);
+extern void jfs_truncate_nolock(struct inode *, loff_t);
+
+/*
+ * forward references
+ */
+struct inode_operations jfs_dir_inode_operations;
+struct file_operations jfs_dir_operations;
+
+s64 commitZeroLink(tid_t, struct inode *);
+
+/*
+ * NAME:	jfs_create(dip, dentry, mode)
+ *
+ * FUNCTION:	create a regular file in the parent directory <dip>
+ *		with name = <from dentry> and mode = <mode>
+ *
+ * PARAMETER:	dip 	- parent directory vnode
+ *		dentry	- dentry of new file
+ *		mode	- create mode (rwxrwxrwx).
+ *
+ * RETURN:	Errors from subroutines
+ *
+ */
+int jfs_create(struct inode *dip, struct dentry *dentry, int mode)
+{
+	int rc = 0;
+	tid_t tid;		/* transaction id */
+	struct inode *ip = NULL;	/* child directory inode */
+	ino_t ino;
+	component_t dname;	/* child directory name */
+	btstack_t btstack;
+	struct inode *iplist[2];
+	tblock_t *tblk;
+
+	jFYI(1, ("jfs_create: dip:0x%p name:%s\n", dip, dentry->d_name.name));
+
+	IWRITE_LOCK(dip);
+
+	/*
+	 * search parent directory for entry/freespace
+	 * (dtSearch() returns parent directory page pinned)
+	 */
+	if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab)))
+		goto out1;
+
+	/*
+	 * Either iAlloc() or txBegin() may block.  Deadlock can occur if we
+	 * block there while holding dtree page, so we allocate the inode &
+	 * begin the transaction before we search the directory.
+	 */
+	ip = ialloc(dip, mode);
+	if (ip == NULL) {
+		rc = ENOSPC;
+		goto out2;
+	}
+
+	tid = txBegin(dip->i_sb, 0);
+
+	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
+		jERROR(1, ("jfs_create: dtSearch returned %d\n", rc));
+		ip->i_nlink = 0;
+		iput(ip);
+		txEnd(tid);
+		goto out2;
+	}
+
+	tblk = tid_to_tblock(tid);
+	tblk->xflag |= COMMIT_CREATE;
+	tblk->ip = ip;
+
+	iplist[0] = dip;
+	iplist[1] = ip;
+
+	/*
+	 * initialize the child XAD tree root in-line in inode
+	 */
+	xtInitRoot(tid, ip);
+
+	/*
+	 * create entry in parent directory for child directory
+	 * (dtInsert() releases parent directory page)
+	 */
+	ino = ip->i_ino;
+	if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) {
+		jERROR(1, ("jfs_create: dtInsert returned %d\n", rc));
+		/* discard new inode */
+		ip->i_nlink = 0;
+		iput(ip);
+
+		if (rc == EIO)
+			txAbort(tid, 1);	/* Marks Filesystem dirty */
+		else
+			txAbort(tid, 0);	/* Filesystem full */
+		txEnd(tid);
+		goto out2;
+	}
+
+	ip->i_op = &jfs_file_inode_operations;
+	ip->i_fop = &jfs_file_operations;
+	ip->i_mapping->a_ops = &jfs_aops;
+
+	insert_inode_hash(ip);
+	mark_inode_dirty(ip);
+	d_instantiate(dentry, ip);
+
+	dip->i_version = ++event;
+	dip->i_ctime = dip->i_mtime = CURRENT_TIME;
+
+	mark_inode_dirty(dip);
+
+	rc = txCommit(tid, 2, &iplist[0], 0);
+	txEnd(tid);
+
+      out2:
+	free_UCSname(&dname);
+
+      out1:
+
+	IWRITE_UNLOCK(dip);
+	jFYI(1, ("jfs_create: rc:%d\n", -rc));
+	return -rc;
+}
+
+
+/*
+ * NAME:	jfs_mkdir(dip, dentry, mode)
+ *
+ * FUNCTION:	create a child directory in the parent directory <dip>
+ *		with name = <from dentry> and mode = <mode>
+ *
+ * PARAMETER:	dip 	- parent directory vnode
+ *		dentry	- dentry of child directory
+ *		mode	- create mode (rwxrwxrwx).
+ *
+ * RETURN:	Errors from subroutines
+ *
+ * note:
+ * EACCESS: user needs search+write permission on the parent directory
+ */
+int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
+{
+	int rc = 0;
+	tid_t tid;		/* transaction id */
+	struct inode *ip = NULL;	/* child directory inode */
+	ino_t ino;
+	component_t dname;	/* child directory name */
+	btstack_t btstack;
+	struct inode *iplist[2];
+	tblock_t *tblk;
+
+	jFYI(1, ("jfs_mkdir: dip:0x%p name:%s\n", dip, dentry->d_name.name));
+
+	IWRITE_LOCK(dip);
+
+	/* link count overflow on parent directory ? */
+	if (dip->i_nlink == JFS_LINK_MAX) {
+		rc = EMLINK;
+		goto out1;
+	}
+
+	/*
+	 * search parent directory for entry/freespace
+	 * (dtSearch() returns parent directory page pinned)
+	 */
+	if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab)))
+		goto out1;
+
+	/*
+	 * Either iAlloc() or txBegin() may block.  Deadlock can occur if we
+	 * block there while holding dtree page, so we allocate the inode &
+	 * begin the transaction before we search the directory.
+	 */
+	ip = ialloc(dip, S_IFDIR | mode);
+	if (ip == NULL) {
+		rc = ENOSPC;
+		goto out2;
+	}
+
+	tid = txBegin(dip->i_sb, 0);
+
+	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
+		jERROR(1, ("jfs_mkdir: dtSearch returned %d\n", rc));
+		ip->i_nlink = 0;
+		iput(ip);
+		txEnd(tid);
+		goto out2;
+	}
+
+	tblk = tid_to_tblock(tid);
+	tblk->xflag |= COMMIT_CREATE;
+	tblk->ip = ip;
+
+	iplist[0] = dip;
+	iplist[1] = ip;
+
+	/*
+	 * initialize the child directory in-line in inode
+	 */
+	dtInitRoot(tid, ip, dip->i_ino);
+
+	/*
+	 * create entry in parent directory for child directory
+	 * (dtInsert() releases parent directory page)
+	 */
+	ino = ip->i_ino;
+	if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) {
+		jERROR(1, ("jfs_mkdir: dtInsert returned %d\n", rc));
+		/* discard new directory inode */
+		ip->i_nlink = 0;
+		iput(ip);
+
+		if (rc == EIO)
+			txAbort(tid, 1);	/* Marks Filesystem dirty */
+		else
+			txAbort(tid, 0);	/* Filesystem full */
+		txEnd(tid);
+		goto out2;
+	}
+
+	ip->i_nlink = 2;	/* for '.' */
+	ip->i_op = &jfs_dir_inode_operations;
+	ip->i_fop = &jfs_dir_operations;
+	ip->i_mapping->a_ops = &jfs_aops;
+	ip->i_mapping->gfp_mask = GFP_NOFS;
+
+	insert_inode_hash(ip);
+	mark_inode_dirty(ip);
+	d_instantiate(dentry, ip);
+
+	/* update parent directory inode */
+	dip->i_nlink++;		/* for '..' from child directory */
+	dip->i_version = ++event;
+	dip->i_ctime = dip->i_mtime = CURRENT_TIME;
+	mark_inode_dirty(dip);
+
+	rc = txCommit(tid, 2, &iplist[0], 0);
+	txEnd(tid);
+
+      out2:
+	free_UCSname(&dname);
+
+      out1:
+
+	IWRITE_UNLOCK(dip);
+
+	jFYI(1, ("jfs_mkdir: rc:%d\n", -rc));
+	return -rc;
+}
+
+/*
+ * NAME:	jfs_rmdir(dip, dentry)
+ *
+ * FUNCTION:	remove a link to child directory
+ *
+ * PARAMETER:	dip 	- parent inode
+ *		dentry	- child directory dentry
+ *
+ * RETURN:	EINVAL	- if name is . or ..
+ *		EINVAL  - if . or .. exist but are invalid.
+ *		errors from subroutines
+ *
+ * note:
+ * if other threads have the directory open when the last link 
+ * is removed, the "." and ".." entries, if present, are removed before 
+ * rmdir() returns and no new entries may be created in the directory, 
+ * but the directory is not removed until the last reference to 
+ * the directory is released (cf.unlink() of regular file).
+ */
+int jfs_rmdir(struct inode *dip, struct dentry *dentry)
+{
+	int rc;
+	tid_t tid;		/* transaction id */
+	struct inode *ip = dentry->d_inode;
+	ino_t ino;
+	component_t dname;
+	struct inode *iplist[2];
+	tblock_t *tblk;
+
+	jFYI(1, ("jfs_rmdir: dip:0x%p name:%s\n", dip, dentry->d_name.name));
+
+	IWRITE_LOCK_LIST(2, dip, ip);
+
+	/* directory must be empty to be removed */
+	if (!dtEmpty(ip)) {
+		IWRITE_UNLOCK(ip);
+		IWRITE_UNLOCK(dip);
+		rc = ENOTEMPTY;
+		goto out;
+	}
+
+	if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab))) {
+		IWRITE_UNLOCK(ip);
+		IWRITE_UNLOCK(dip);
+		goto out;
+	}
+
+	tid = txBegin(dip->i_sb, 0);
+
+	iplist[0] = dip;
+	iplist[1] = ip;
+
+	tblk = tid_to_tblock(tid);
+	tblk->xflag |= COMMIT_DELETE;
+	tblk->ip = ip;
+
+	/*
+	 * delete the entry of target directory from parent directory
+	 */
+	ino = ip->i_ino;
+	if ((rc = dtDelete(tid, dip, &dname, &ino, JFS_REMOVE))) {
+		jERROR(1, ("jfs_rmdir: dtDelete returned %d\n", rc));
+		if (rc == EIO)
+			txAbort(tid, 1);
+		txEnd(tid);
+
+		IWRITE_UNLOCK(ip);
+		IWRITE_UNLOCK(dip);
+
+		goto out2;
+	}
+
+	/* update parent directory's link count corresponding
+	 * to ".." entry of the target directory deleted
+	 */
+	dip->i_nlink--;
+	dip->i_ctime = dip->i_mtime = CURRENT_TIME;
+	dip->i_version = ++event;
+	mark_inode_dirty(dip);
+
+	/*
+	 * OS/2 could have created EA and/or ACL
+	 */
+	/* free EA from both persistent and working map */
+	if (JFS_IP(ip)->ea.flag & DXD_EXTENT) {
+		/* free EA pages */
+		txEA(tid, ip, &JFS_IP(ip)->ea, NULL);
+	}
+	JFS_IP(ip)->ea.flag = 0;
+
+	/* free ACL from both persistent and working map */
+	if (JFS_IP(ip)->acl.flag & DXD_EXTENT) {
+		/* free ACL pages */
+		txEA(tid, ip, &JFS_IP(ip)->acl, NULL);
+	}
+	JFS_IP(ip)->acl.flag = 0;
+
+	/* mark the target directory as deleted */
+	ip->i_nlink = 0;
+	mark_inode_dirty(ip);
+
+	rc = txCommit(tid, 2, &iplist[0], 0);
+
+	txEnd(tid);
+
+	IWRITE_UNLOCK(ip);
+
+	/*
+	 * Truncating the directory index table is not guaranteed.  It
+	 * may need to be done iteratively
+	 */
+	if (test_cflag(COMMIT_Stale, dip)) {
+		if (dip->i_size > 1)
+			jfs_truncate_nolock(dip, 0);
+
+		clear_cflag(COMMIT_Stale, dip);
+	}
+
+	IWRITE_UNLOCK(dip);
+
+	d_delete(dentry);
+
+      out2:
+	free_UCSname(&dname);
+
+      out:
+	jFYI(1, ("jfs_rmdir: rc:%d\n", rc));
+	return -rc;
+}
+
+/*
+ * NAME:	jfs_unlink(dip, dentry)
+ *
+ * FUNCTION:	remove a link to object <vp> named by <name> 
+ *		from parent directory <dvp>
+ *
+ * PARAMETER:	dip 	- inode of parent directory
+ *		dentry 	- dentry of object to be removed
+ *
+ * RETURN:	errors from subroutines
+ *
+ * note:
+ * temporary file: if one or more processes have the file open
+ * when the last link is removed, the link will be removed before
+ * unlink() returns, but the removal of the file contents will be
+ * postponed until all references to the files are closed.
+ *
+ * JFS does NOT support unlink() on directories.
+ *
+ */
+int jfs_unlink(struct inode *dip, struct dentry *dentry)
+{
+	int rc;
+	tid_t tid;		/* transaction id */
+	struct inode *ip = dentry->d_inode;
+	ino_t ino;
+	component_t dname;	/* object name */
+	struct inode *iplist[2];
+	tblock_t *tblk;
+	s64 new_size = 0;
+	int commit_flag;
+
+	jFYI(1, ("jfs_unlink: dip:0x%p name:%s\n", dip, dentry->d_name.name));
+
+	if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab)))
+		goto out;
+
+	IWRITE_LOCK_LIST(2, ip, dip);
+
+	tid = txBegin(dip->i_sb, 0);
+
+	iplist[0] = dip;
+	iplist[1] = ip;
+
+	/*
+	 * delete the entry of target file from parent directory
+	 */
+	ino = ip->i_ino;
+	if ((rc = dtDelete(tid, dip, &dname, &ino, JFS_REMOVE))) {
+		jERROR(1, ("jfs_unlink: dtDelete returned %d\n", rc));
+		if (rc == EIO)
+			txAbort(tid, 1);	/* Marks FS Dirty */
+		txEnd(tid);
+		IWRITE_UNLOCK(ip);
+		IWRITE_UNLOCK(dip);
+		goto out1;
+	}
+
+	ASSERT(ip->i_nlink);
+
+	ip->i_ctime = dip->i_ctime = dip->i_mtime = CURRENT_TIME;
+	dip->i_version = ++event;
+	mark_inode_dirty(dip);
+
+	/* update target's inode */
+	ip->i_nlink--;
+	mark_inode_dirty(ip);
+
+	/*
+	 *      commit zero link count object
+	 */
+	if (ip->i_nlink == 0) {
+		assert(!test_cflag(COMMIT_Nolink, ip));
+		/* free block resources */
+		if ((new_size = commitZeroLink(tid, ip)) < 0) {
+			txAbort(tid, 1);	/* Marks FS Dirty */
+			txEnd(tid);
+			IWRITE_UNLOCK(ip);
+			IWRITE_UNLOCK(dip);
+			rc = -new_size;		/* We return -rc */
+			goto out1;
+		}
+		tblk = tid_to_tblock(tid);
+		tblk->xflag |= COMMIT_DELETE;
+		tblk->ip = ip;
+	}
+
+	/*
+	 * Incomplete truncate of file data can
+	 * result in timing problems unless we synchronously commit the
+	 * transaction.
+	 */
+	if (new_size)
+		commit_flag = COMMIT_SYNC;
+	else
+		commit_flag = 0;
+
+	/*
+	 * If xtTruncate was incomplete, commit synchronously to avoid
+	 * timing complications
+	 */
+	rc = txCommit(tid, 2, &iplist[0], commit_flag);
+
+	txEnd(tid);
+
+	while (new_size && (rc == 0)) {
+		tid = txBegin(dip->i_sb, 0);
+		new_size = xtTruncate_pmap(tid, ip, new_size);
+		if (new_size < 0) {
+			txAbort(tid, 1);	/* Marks FS Dirty */
+			rc = -new_size;		/* We return -rc */
+		} else
+			rc = txCommit(tid, 2, &iplist[0], COMMIT_SYNC);
+		txEnd(tid);
+	}
+
+	if (!test_cflag(COMMIT_Holdlock, ip))
+		IWRITE_UNLOCK(ip);
+
+	/*
+	 * Truncating the directory index table is not guaranteed.  It
+	 * may need to be done iteratively
+	 */
+	if (test_cflag(COMMIT_Stale, dip)) {
+		if (dip->i_size > 1)
+			jfs_truncate_nolock(dip, 0);
+
+		clear_cflag(COMMIT_Stale, dip);
+	}
+
+	IWRITE_UNLOCK(dip);
+
+	d_delete(dentry);
+
+      out1:
+	free_UCSname(&dname);
+      out:
+	jFYI(1, ("jfs_unlink: rc:%d\n", -rc));
+	return -rc;
+}
+
+/*
+ * NAME:	commitZeroLink()
+ *
+ * FUNCTION:    for non-directory, called by jfs_remove(),
+ *		truncate a regular file, directory or symbolic
+ *		link to zero length. return 0 if type is not 
+ *		one of these.
+ *
+ *		if the file is currently associated with a VM segment
+ *		only permanent disk and inode map resources are freed,
+ *		and neither the inode nor indirect blocks are modified
+ *		so that the resources can be later freed in the work
+ *		map by ctrunc1.
+ *		if there is no VM segment on entry, the resources are
+ *		freed in both work and permanent map.
+ *		(? for temporary file - memory object is cached even 
+ *		after no reference:
+ *		reference count > 0 -   )
+ *
+ * PARAMETERS:	cd	- pointer to commit data structure.
+ *			  current inode is the one to truncate.
+ *
+ * RETURN :	Errors from subroutines
+ */
+s64 commitZeroLink(tid_t tid, struct inode *ip)
+{
+	int filetype;
+	tblock_t *tblk;
+
+	jFYI(1, ("commitZeroLink: tid = %d, ip = 0x%p\n", tid, ip));
+
+	filetype = ip->i_mode & S_IFMT;
+	switch (filetype) {
+	case S_IFREG:
+		break;
+	case S_IFLNK:
+		/* fast symbolic link */
+		if (ip->i_size <= 256) {
+			ip->i_size = 0;
+			return 0;
+		}
+		break;
+	default:
+		assert(filetype != S_IFDIR);
+		return 0;
+	}
+
+	set_cflag(COMMIT_Freewmap, ip);
+
+	/* mark transaction of block map update type */
+	tblk = tid_to_tblock(tid);
+	tblk->xflag |= COMMIT_PMAP;
+
+	/*
+	 * free EA
+	 */
+	if (JFS_IP(ip)->ea.flag & DXD_EXTENT)
+		/* acquire maplock on EA to be freed from block map */
+		txEA(tid, ip, &JFS_IP(ip)->ea, NULL);
+
+	/*
+	 * free ACL
+	 */
+	if (JFS_IP(ip)->acl.flag & DXD_EXTENT)
+		/* acquire maplock on EA to be freed from block map */
+		txEA(tid, ip, &JFS_IP(ip)->acl, NULL);
+
+	/*
+	 * free xtree/data (truncate to zero length):
+	 * free xtree/data pages from cache if COMMIT_PWMAP, 
+	 * free xtree/data blocks from persistent block map, and
+	 * free xtree/data blocks from working block map if COMMIT_PWMAP;
+	 */
+	if (ip->i_size)
+		return xtTruncate_pmap(tid, ip, 0);
+
+	return 0;
+}
+
+
+/*
+ * NAME:	freeZeroLink()
+ *
+ * FUNCTION:    for non-directory, called by iClose(),
+ *		free resources of a file from cache and WORKING map 
+ *		for a file previously committed with zero link count
+ *		while associated with a pager object,
+ *
+ * PARAMETER:	ip	- pointer to inode of file.
+ *
+ * RETURN:	0 -ok
+ */
+int freeZeroLink(struct inode *ip)
+{
+	int rc = 0;
+	int type;
+
+	jFYI(1, ("freeZeroLink: ip = 0x%p\n", ip));
+
+	/* return if not reg or symbolic link or if size is
+	 * already ok.
+	 */
+	type = ip->i_mode & S_IFMT;
+
+	switch (type) {
+	case S_IFREG:
+		break;
+	case S_IFLNK:
+		/* if its contained in inode nothing to do */
+		if (ip->i_size <= 256)
+			return 0;
+		break;
+	default:
+		return 0;
+	}
+
+	/*
+	 * free EA
+	 */
+	if (JFS_IP(ip)->ea.flag & DXD_EXTENT) {
+		s64 xaddr;
+		int xlen;
+		maplock_t maplock;	/* maplock for COMMIT_WMAP */
+		pxdlock_t *pxdlock;	/* maplock for COMMIT_WMAP */
+
+		/* free EA pages from cache */
+		xaddr = addressDXD(&JFS_IP(ip)->ea);
+		xlen = lengthDXD(&JFS_IP(ip)->ea);
+#ifdef _STILL_TO_PORT
+		bmExtentInvalidate(ip, xaddr, xlen);
+#endif
+
+		/* free EA extent from working block map */
+		maplock.index = 1;
+		pxdlock = (pxdlock_t *) & maplock;
+		pxdlock->flag = mlckFREEPXD;
+		PXDaddress(&pxdlock->pxd, xaddr);
+		PXDlength(&pxdlock->pxd, xlen);
+		txFreeMap(ip, pxdlock, 0, COMMIT_WMAP);
+	}
+
+	/*
+	 * free ACL
+	 */
+	if (JFS_IP(ip)->acl.flag & DXD_EXTENT) {
+		s64 xaddr;
+		int xlen;
+		maplock_t maplock;	/* maplock for COMMIT_WMAP */
+		pxdlock_t *pxdlock;	/* maplock for COMMIT_WMAP */
+
+		/* free ACL pages from cache */
+		xaddr = addressDXD(&JFS_IP(ip)->acl);
+		xlen = lengthDXD(&JFS_IP(ip)->acl);
+#ifdef _STILL_TO_PORT
+		bmExtentInvalidate(ip, xaddr, xlen);
+#endif
+
+		/* free ACL extent from working block map */
+		maplock.index = 1;
+		pxdlock = (pxdlock_t *) & maplock;
+		pxdlock->flag = mlckFREEPXD;
+		PXDaddress(&pxdlock->pxd, xaddr);
+		PXDlength(&pxdlock->pxd, xlen);
+		txFreeMap(ip, pxdlock, 0, COMMIT_WMAP);
+	}
+
+	/*
+	 * free xtree/data (truncate to zero length):
+	 * free xtree/data pages from cache, and
+	 * free xtree/data blocks from working block map;
+	 */
+	if (ip->i_size)
+		rc = xtTruncate(0, ip, 0, COMMIT_WMAP);
+
+	return rc;
+}
+
+/*
+ * NAME:	jfs_link(vp, dvp, name, crp)
+ *
+ * FUNCTION:	create a link to <vp> by the name = <name>
+ *		in the parent directory <dvp>
+ *
+ * PARAMETER:	vp 	- target object
+ *		dvp	- parent directory of new link
+ *		name	- name of new link to target object
+ *		crp	- credential
+ *
+ * RETURN:	Errors from subroutines
+ *
+ * note:
+ * JFS does NOT support link() on directories (to prevent circular
+ * path in the directory hierarchy);
+ * EPERM: the target object is a directory, and either the caller
+ * does not have appropriate privileges or the implementation prohibits
+ * using link() on directories [XPG4.2].
+ *
+ * JFS does NOT support links between file systems:
+ * EXDEV: target object and new link are on different file systems and
+ * implementation does not support links between file systems [XPG4.2].
+ */
+int jfs_link(struct dentry *old_dentry,
+	     struct inode *dir, struct dentry *dentry)
+{
+	int rc;
+	tid_t tid;
+	struct inode *ip = old_dentry->d_inode;
+	ino_t ino;
+	component_t dname;
+	btstack_t btstack;
+	struct inode *iplist[2];
+
+	jFYI(1,
+	     ("jfs_link: %s %s\n", old_dentry->d_name.name,
+	      dentry->d_name.name));
+
+	/* JFS does NOT support link() on directories */
+	if (S_ISDIR(ip->i_mode))
+		return -EPERM;
+
+	IWRITE_LOCK_LIST(2, dir, ip);
+
+	tid = txBegin(ip->i_sb, 0);
+
+	if (ip->i_nlink == JFS_LINK_MAX) {
+		rc = EMLINK;
+		goto out;
+	}
+
+	/*
+	 * scan parent directory for entry/freespace
+	 */
+	if ((rc = get_UCSname(&dname, dentry, JFS_SBI(ip->i_sb)->nls_tab)))
+		goto out;
+
+	if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE)))
+		goto out;
+
+	/*
+	 * create entry for new link in parent directory
+	 */
+	ino = ip->i_ino;
+	if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack)))
+		goto out;
+
+	dir->i_version = ++event;
+
+	/* update object inode */
+	ip->i_nlink++;		/* for new link */
+	ip->i_ctime = CURRENT_TIME;
+	mark_inode_dirty(dir);
+	atomic_inc(&ip->i_count);
+	d_instantiate(dentry, ip);
+
+	iplist[0] = ip;
+	iplist[1] = dir;
+	rc = txCommit(tid, 2, &iplist[0], 0);
+
+      out:
+	IWRITE_UNLOCK(dir);
+	IWRITE_UNLOCK(ip);
+
+	txEnd(tid);
+
+	jFYI(1, ("jfs_link: rc:%d\n", rc));
+	return -rc;
+}
+
+/*
+ * NAME:	jfs_symlink(dip, dentry, name)
+ *
+ * FUNCTION:	creates a symbolic link to <symlink> by name <name>
+ *		        in directory <dip>
+ *
+ * PARAMETER:	dip	    - parent directory vnode
+ *		        dentry 	- dentry of symbolic link
+ *		        name    - the path name of the existing object 
+ *			              that will be the source of the link
+ *
+ * RETURN:	errors from subroutines
+ *
+ * note:
+ * ENAMETOOLONG: pathname resolution of a symbolic link produced
+ * an intermediate result whose length exceeds PATH_MAX [XPG4.2]
+*/
+
+int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
+{
+	int rc;
+	tid_t tid;
+	ino_t ino = 0;
+	component_t dname;
+	int ssize;		/* source pathname size */
+	btstack_t btstack;
+	struct inode *ip = dentry->d_inode;
+	unchar *i_fastsymlink;
+	s64 xlen = 0;
+	int bmask = 0, xsize;
+	s64 xaddr;
+	metapage_t *mp;
+	struct super_block *sb;
+	tblock_t *tblk;
+
+	struct inode *iplist[2];
+
+	jFYI(1, ("jfs_symlink: dip:0x%p name:%s\n", dip, name));
+
+	IWRITE_LOCK(dip);
+
+	ssize = strlen(name) + 1;
+
+	tid = txBegin(dip->i_sb, 0);
+
+	/*
+	 * search parent directory for entry/freespace
+	 * (dtSearch() returns parent directory page pinned)
+	 */
+
+	if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab)))
+		goto out1;
+
+	if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE)))
+		goto out2;
+
+
+
+	/*
+	 * allocate on-disk/in-memory inode for symbolic link:
+	 * (iAlloc() returns new, locked inode)
+	 */
+
+	ip = ialloc(dip, S_IFLNK | 0777);
+	if (ip == NULL) {
+		BT_PUTSEARCH(&btstack);
+		rc = ENOSPC;
+		goto out2;
+	}
+
+	tblk = tid_to_tblock(tid);
+	tblk->xflag |= COMMIT_CREATE;
+	tblk->ip = ip;
+
+	/*
+	 * create entry for symbolic link in parent directory
+	 */
+
+	ino = ip->i_ino;
+
+
+
+	if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) {
+		jERROR(1, ("jfs_symlink: dtInsert returned %d\n", rc));
+		/* discard ne inode */
+		ip->i_nlink = 0;
+		iput(ip);
+		goto out2;
+
+	}
+
+	/* fix symlink access permission
+	 * (dir_create() ANDs in the u.u_cmask, 
+	 * but symlinks really need to be 777 access)
+	 */
+	ip->i_mode |= 0777;
+
+	/*
+	   *       write symbolic link target path name
+	 */
+	xtInitRoot(tid, ip);
+
+	/*
+	 * write source path name inline in on-disk inode (fast symbolic link)
+	 */
+
+	if (ssize <= IDATASIZE) {
+		ip->i_op = &jfs_symlink_inode_operations;
+
+		i_fastsymlink = JFS_IP(ip)->i_inline;
+		memcpy(i_fastsymlink, name, ssize);
+		ip->i_size = ssize - 1;
+		jFYI(1,
+		     ("jfs_symlink: fast symlink added  ssize:%d name:%s \n",
+		      ssize, name));
+	}
+	/*
+	 * write source path name in a single extent
+	 */
+	else {
+		jFYI(1, ("jfs_symlink: allocate extent ip:0x%p\n", ip));
+
+		ip->i_op = &page_symlink_inode_operations;
+		ip->i_mapping->a_ops = &jfs_aops;
+
+		/*
+		 * even though the data of symlink object (source 
+		 * path name) is treated as non-journaled user data,
+		 * it is read/written thru buffer cache for performance.
+		 */
+		sb = ip->i_sb;
+		bmask = JFS_SBI(sb)->bsize - 1;
+		xsize = (ssize + bmask) & ~bmask;
+		xaddr = 0;
+		xlen = xsize >> JFS_SBI(sb)->l2bsize;
+		if ((rc = xtInsert(tid, ip, 0, 0, xlen, &xaddr, 0)) == 0) {
+			ip->i_size = ssize - 1;
+			while (ssize) {
+				int copy_size = min(ssize, PSIZE);
+
+				mp = get_metapage(ip, xaddr, PSIZE, 1);
+
+				if (mp == NULL) {
+					dtDelete(tid, dip, &dname, &ino,
+						 JFS_REMOVE);
+					ip->i_nlink = 0;
+					iput(ip);
+					rc = EIO;
+					goto out2;
+				}
+				memcpy(mp->data, name, copy_size);
+				flush_metapage(mp);
+#if 0
+				mark_buffer_uptodate(bp, 1);
+				mark_buffer_dirty(bp, 1);
+				if (IS_SYNC(dip)) {
+					ll_rw_block(WRITE, 1, &bp);
+					wait_on_buffer(bp);
+				}
+				brelse(bp);
+#endif				/* 0 */
+				ssize -= copy_size;
+				xaddr += JFS_SBI(sb)->nbperpage;
+			}
+			ip->i_blocks = LBLK2PBLK(sb, xlen);
+		} else {
+			dtDelete(tid, dip, &dname, &ino, JFS_REMOVE);
+			ip->i_nlink = 0;
+			iput(ip);
+			rc = ENOSPC;
+			goto out2;
+		}
+	}
+	dip->i_version = ++event;
+
+	insert_inode_hash(ip);
+	mark_inode_dirty(ip);
+	d_instantiate(dentry, ip);
+
+	/*
+	 * commit update of parent directory and link object
+	 *
+	 * if extent allocation failed (ENOSPC),
+	 * the parent inode is committed regardless to avoid
+	 * backing out parent directory update (by dtInsert())
+	 * and subsequent dtDelete() which is harmless wrt 
+	 * integrity concern.  
+	 * the symlink inode will be freed by iput() at exit
+	 * as it has a zero link count (by dtDelete()) and 
+	 * no permanant resources. 
+	 */
+
+	iplist[0] = dip;
+	if (rc == 0) {
+		iplist[1] = ip;
+		rc = txCommit(tid, 2, &iplist[0], 0);
+	} else
+		rc = txCommit(tid, 1, &iplist[0], 0);
+
+      out2:
+
+	free_UCSname(&dname);
+      out1:
+	IWRITE_UNLOCK(dip);
+
+	txEnd(tid);
+
+	jFYI(1, ("jfs_symlink: rc:%d\n", -rc));
+	return -rc;
+}
+
+
+/*
+ * NAME:        jfs_rename
+ *
+ * FUNCTION:    rename a file or directory
+ */
+int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+	       struct inode *new_dir, struct dentry *new_dentry)
+{
+	btstack_t btstack;
+	ino_t ino;
+	component_t new_dname;
+	struct inode *new_ip;
+	component_t old_dname;
+	struct inode *old_ip;
+	int rc;
+	tid_t tid;
+	tlock_t *tlck;
+	dtlock_t *dtlck;
+	lv_t *lv;
+	int ipcount;
+	struct inode *iplist[4];
+	tblock_t *tblk;
+	s64 new_size = 0;
+	int commit_flag;
+
+
+	jFYI(1,
+	     ("jfs_rename: %s %s\n", old_dentry->d_name.name,
+	      new_dentry->d_name.name));
+
+	old_ip = old_dentry->d_inode;
+	new_ip = new_dentry->d_inode;
+
+	if (old_dir == new_dir) {
+		if (new_ip)
+			IWRITE_LOCK_LIST(3, old_dir, old_ip, new_ip);
+		else
+			IWRITE_LOCK_LIST(2, old_dir, old_ip);
+	} else {
+		if (new_ip)
+			IWRITE_LOCK_LIST(4, old_dir, new_dir, old_ip,
+					 new_ip);
+		else
+			IWRITE_LOCK_LIST(3, old_dir, new_dir, old_ip);
+	}
+
+	if ((rc = get_UCSname(&old_dname, old_dentry,
+			      JFS_SBI(old_dir->i_sb)->nls_tab)))
+		goto out1;
+
+	if ((rc = get_UCSname(&new_dname, new_dentry,
+			      JFS_SBI(old_dir->i_sb)->nls_tab)))
+		goto out2;
+
+	/*
+	 * Make sure source inode number is what we think it is
+	 */
+	rc = dtSearch(old_dir, &old_dname, &ino, &btstack, JFS_LOOKUP);
+	if (rc || (ino != old_ip->i_ino)) {
+		rc = ENOENT;
+		goto out3;
+	}
+
+	/*
+	 * Make sure dest inode number (if any) is what we think it is
+	 */
+	rc = dtSearch(new_dir, &new_dname, &ino, &btstack, JFS_LOOKUP);
+	if (rc == 0) {
+		if ((new_ip == 0) || (ino != new_ip->i_ino)) {
+			rc = ESTALE;
+			goto out3;
+		}
+	} else if (rc != ENOENT)
+		goto out3;
+	else if (new_ip) {
+		/* no entry exists, but one was expected */
+		rc = ESTALE;
+		goto out3;
+	}
+
+	if (S_ISDIR(old_ip->i_mode)) {
+		if (new_ip) {
+			if (!dtEmpty(new_ip)) {
+				rc = ENOTEMPTY;
+				goto out3;
+			}
+		} else if ((new_dir != old_dir) &&
+			   (new_dir->i_nlink == JFS_LINK_MAX)) {
+			rc = EMLINK;
+			goto out3;
+		}
+	}
+
+	/*
+	 * The real work starts here
+	 */
+	tid = txBegin(new_dir->i_sb, 0);
+
+	if (new_ip) {
+		/*
+		 * Change existing directory entry to new inode number
+		 */
+		ino = new_ip->i_ino;
+		rc = dtModify(tid, new_dir, &new_dname, &ino,
+			      old_ip->i_ino, JFS_RENAME);
+		if (rc)
+			goto out4;
+		new_ip->i_nlink--;
+		if (S_ISDIR(new_ip->i_mode)) {
+			new_ip->i_nlink--;
+			assert(new_ip->i_nlink == 0);
+			tblk = tid_to_tblock(tid);
+			tblk->xflag |= COMMIT_DELETE;
+			tblk->ip = new_ip;
+		} else if (new_ip->i_nlink == 0) {
+			assert(!test_cflag(COMMIT_Nolink, new_ip));
+			/* free block resources */
+			if ((new_size = commitZeroLink(tid, new_ip)) < 0) {
+				txAbort(tid, 1);	/* Marks FS Dirty */
+				rc = -new_size;		/* We return -rc */
+				goto out4;
+			}
+			tblk = tid_to_tblock(tid);
+			tblk->xflag |= COMMIT_DELETE;
+			tblk->ip = new_ip;
+		} else {
+			new_ip->i_ctime = CURRENT_TIME;
+			mark_inode_dirty(new_ip);
+		}
+	} else {
+		/*
+		 * Add new directory entry
+		 */
+		rc = dtSearch(new_dir, &new_dname, &ino, &btstack,
+			      JFS_CREATE);
+		if (rc) {
+			jERROR(1,
+			       ("jfs_rename didn't expect dtSearch to fail w/rc = %d\n",
+				rc));
+			goto out4;
+		}
+
+		ino = old_ip->i_ino;
+		rc = dtInsert(tid, new_dir, &new_dname, &ino, &btstack);
+		if (rc) {
+			jERROR(1,
+			       ("jfs_rename: dtInsert failed w/rc = %d\n",
+				rc));
+			goto out4;
+		}
+		if (S_ISDIR(old_ip->i_mode))
+			new_dir->i_nlink++;
+	}
+	/*
+	 * Remove old directory entry
+	 */
+
+	ino = old_ip->i_ino;
+	rc = dtDelete(tid, old_dir, &old_dname, &ino, JFS_REMOVE);
+	if (rc) {
+		jERROR(1,
+		       ("jfs_rename did not expect dtDelete to return rc = %d\n",
+			rc));
+		txAbort(tid, 1);	/* Marks Filesystem dirty */
+		goto out4;
+	}
+	if (S_ISDIR(old_ip->i_mode)) {
+		old_dir->i_nlink--;
+		if (old_dir != new_dir) {
+			/*
+			 * Change inode number of parent for moved directory
+			 */
+
+			JFS_IP(old_ip)->i_dtroot.header.idotdot =
+				cpu_to_le32(new_dir->i_ino);
+
+			/* Linelock header of dtree */
+			tlck = txLock(tid, old_ip,
+				      (metapage_t *) & JFS_IP(old_ip)->bxflag,
+				      tlckDTREE | tlckBTROOT);
+			dtlck = (dtlock_t *) & tlck->lock;
+			ASSERT(dtlck->index == 0);
+			lv = (lv_t *) & dtlck->lv[0];
+			lv->offset = 0;
+			lv->length = 1;
+			dtlck->index++;
+		}
+	}
+
+	/*
+	 * Update ctime on changed/moved inodes & mark dirty
+	 */
+	old_ip->i_ctime = CURRENT_TIME;
+	mark_inode_dirty(old_ip);
+
+	new_dir->i_version = ++event;
+	new_dir->i_ctime = CURRENT_TIME;
+	mark_inode_dirty(new_dir);
+
+	/* Build list of inodes modified by this transaction */
+	ipcount = 0;
+	iplist[ipcount++] = old_ip;
+	if (new_ip)
+		iplist[ipcount++] = new_ip;
+	iplist[ipcount++] = old_dir;
+
+	if (old_dir != new_dir) {
+		iplist[ipcount++] = new_dir;
+		old_dir->i_version = ++event;
+		old_dir->i_ctime = CURRENT_TIME;
+		mark_inode_dirty(old_dir);
+	}
+
+	/*
+	 * Incomplete truncate of file data can
+	 * result in timing problems unless we synchronously commit the
+	 * transaction.
+	 */
+	if (new_size)
+		commit_flag = COMMIT_SYNC;
+	else
+		commit_flag = 0;
+
+	rc = txCommit(tid, ipcount, iplist, commit_flag);
+
+	/*
+	 * Don't unlock new_ip if COMMIT_HOLDLOCK is set
+	 */
+	if (new_ip && test_cflag(COMMIT_Holdlock, new_ip))
+		new_ip = 0;
+
+      out4:
+	txEnd(tid);
+
+	while (new_size && (rc == 0)) {
+		tid = txBegin(new_ip->i_sb, 0);
+		new_size = xtTruncate_pmap(tid, new_ip, new_size);
+		if (new_size < 0) {
+			txAbort(tid, 1);
+			rc = -new_size;		/* We return -rc */
+		} else
+			rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC);
+		txEnd(tid);
+	}
+      out3:
+	free_UCSname(&new_dname);
+      out2:
+	free_UCSname(&old_dname);
+      out1:
+	IWRITE_UNLOCK(old_ip);
+	if (old_dir != new_dir)
+		IWRITE_UNLOCK(new_dir);
+	if (new_ip)
+		IWRITE_UNLOCK(new_ip);
+
+	/*
+	 * Truncating the directory index table is not guaranteed.  It
+	 * may need to be done iteratively
+	 */
+	if (test_cflag(COMMIT_Stale, old_dir)) {
+		if (old_dir->i_size > 1)
+			jfs_truncate_nolock(old_dir, 0);
+
+		clear_cflag(COMMIT_Stale, old_dir);
+	}
+
+	IWRITE_UNLOCK(old_dir);
+
+	jFYI(1, ("jfs_rename: returning %d\n", rc));
+	return -rc;
+}
+
+
+/*
+ * NAME:        jfs_mknod
+ *
+ * FUNCTION:    Create a special file (device)
+ */
+int jfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
+{
+	btstack_t btstack;
+	component_t dname;
+	ino_t ino;
+	struct inode *ip;
+	struct inode *iplist[2];
+	int rc;
+	tid_t tid;
+	tblock_t *tblk;
+
+	jFYI(1, ("jfs_mknod: %s\n", dentry->d_name.name));
+
+	if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dir->i_sb)->nls_tab)))
+		goto out;
+
+	IWRITE_LOCK(dir);
+
+	ip = ialloc(dir, mode);
+	if (ip == NULL) {
+		rc = ENOSPC;
+		goto out1;
+	}
+
+	tid = txBegin(dir->i_sb, 0);
+
+	if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) {
+		ip->i_nlink = 0;
+		iput(ip);
+		txEnd(tid);
+		goto out1;
+	}
+
+	tblk = tid_to_tblock(tid);
+	tblk->xflag |= COMMIT_CREATE;
+	tblk->ip = ip;
+
+	ino = ip->i_ino;
+	if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) {
+		ip->i_nlink = 0;
+		iput(ip);
+		txEnd(tid);
+		goto out1;
+	}
+
+	if (S_ISREG(ip->i_mode)) {
+		ip->i_op = &jfs_file_inode_operations;
+		ip->i_fop = &jfs_file_operations;
+		ip->i_mapping->a_ops = &jfs_aops;
+	} else
+		init_special_inode(ip, ip->i_mode, rdev);
+
+	insert_inode_hash(ip);
+	mark_inode_dirty(ip);
+	d_instantiate(dentry, ip);
+
+	dir->i_version = ++event;
+	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+
+	mark_inode_dirty(dir);
+
+	iplist[0] = dir;
+	iplist[1] = ip;
+	rc = txCommit(tid, 2, iplist, 0);
+	txEnd(tid);
+
+      out1:
+	IWRITE_UNLOCK(dir);
+	free_UCSname(&dname);
+
+      out:
+	jFYI(1, ("jfs_mknod: returning %d\n", rc));
+	return -rc;
+}
+
+static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry)
+{
+	btstack_t btstack;
+	ino_t inum;
+	struct inode *ip;
+	component_t key;
+	const char *name = dentry->d_name.name;
+	int len = dentry->d_name.len;
+	int rc;
+
+	jFYI(1, ("jfs_lookup: name = %s\n", name));
+
+
+	if ((name[0] == '.') && (len == 1))
+		inum = dip->i_ino;
+	else if (strcmp(name, "..") == 0)
+		inum = PARENT(dip);
+	else {
+		if ((rc =
+		     get_UCSname(&key, dentry, JFS_SBI(dip->i_sb)->nls_tab)))
+			return ERR_PTR(-rc);
+		IREAD_LOCK(dip);
+		rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP);
+		IREAD_UNLOCK(dip);
+		free_UCSname(&key);
+		if (rc == ENOENT) {
+			d_add(dentry, NULL);
+			return ERR_PTR(0);
+		} else if (rc) {
+			jERROR(1,
+			       ("jfs_lookup: dtSearch returned %d\n", rc));
+			return ERR_PTR(-rc);
+		}
+	}
+
+	ip = iget(dip->i_sb, inum);
+	if (ip == NULL) {
+		jERROR(1,
+		       ("jfs_lookup: iget failed on inum %d\n",
+			(uint) inum));
+		return ERR_PTR(-EACCES);
+	}
+
+	d_add(dentry, ip);
+
+	return ERR_PTR(0);
+}
+
+struct inode_operations jfs_dir_inode_operations = {
+	create:		jfs_create,
+	lookup:		jfs_lookup,
+	link:		jfs_link,
+	unlink:		jfs_unlink,
+	symlink:	jfs_symlink,
+	mkdir:		jfs_mkdir,
+	rmdir:		jfs_rmdir,
+	mknod:		jfs_mknod,
+	rename:		jfs_rename,
+};
+
+struct file_operations jfs_dir_operations = {
+	read:		generic_read_dir,
+	readdir:	jfs_readdir,
+	fsync:		jfs_fsync,
+};
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/super.c linuxppc64_2_4/fs/jfs/super.c
--- ../kernel.org/linux-2.4.19/fs/jfs/super.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/super.c	Tue Apr 23 11:14:25 2002
@@ -0,0 +1,481 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/fs.h>
+#include <linux/locks.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include "jfs_incore.h"
+#include "jfs_filsys.h"
+#include "jfs_metapage.h"
+#include "jfs_superblock.h"
+#include "jfs_dmap.h"
+#include "jfs_imap.h"
+#include "jfs_debug.h"
+
+MODULE_DESCRIPTION("The Journaled Filesystem (JFS)");
+MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM");
+MODULE_LICENSE("GPL");
+
+static int in_shutdown;
+static pid_t jfsIOthread;
+static pid_t jfsCommitThread;
+static pid_t jfsSyncThread;
+struct task_struct *jfsIOtask;
+struct task_struct *jfsCommitTask;
+struct task_struct *jfsSyncTask;
+DECLARE_COMPLETION(jfsIOwait);
+
+#ifdef CONFIG_JFS_DEBUG
+int jfsloglevel = 1;
+MODULE_PARM(jfsloglevel, "i");
+MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)");
+#endif
+
+/*
+ * External declarations
+ */
+extern int jfs_mount(struct super_block *);
+extern int jfs_mount_rw(struct super_block *, int);
+extern int jfs_umount(struct super_block *);
+extern int jfs_umount_rw(struct super_block *);
+
+extern int jfsIOWait(void *);
+extern int jfs_lazycommit(void *);
+extern int jfs_sync(void *);
+extern void jfs_put_inode(struct inode *inode);
+extern void jfs_read_inode(struct inode *inode);
+extern void jfs_dirty_inode(struct inode *inode);
+extern void jfs_delete_inode(struct inode *inode);
+extern void jfs_write_inode(struct inode *inode, int wait);
+
+#if defined(CONFIG_JFS_DEBUG) && defined(CONFIG_PROC_FS)
+extern void jfs_proc_init(void);
+extern void jfs_proc_clean(void);
+#endif
+
+int jfs_thread_stopped(void)
+{
+	unsigned long signr;
+	siginfo_t info;
+
+	spin_lock_irq(&current->sigmask_lock);
+	signr = dequeue_signal(&current->blocked, &info);
+	spin_unlock_irq(&current->sigmask_lock);
+
+	if (signr == SIGKILL && in_shutdown)
+		return 1;
+	return 0;
+}
+
+static int jfs_statfs(struct super_block *sb, struct statfs *buf)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	s64 maxinodes;
+	imap_t *imap = JFS_IP(sbi->ipimap)->i_imap;
+
+	jFYI(1, ("In jfs_statfs\n"));
+	buf->f_type = JFS_SUPER_MAGIC;
+	buf->f_bsize = sbi->bsize;
+	buf->f_blocks = sbi->bmap->db_mapsize;
+	buf->f_bfree = sbi->bmap->db_nfree;
+	buf->f_bavail = sbi->bmap->db_nfree;
+	/*
+	 * If we really return the number of allocated & free inodes, some
+	 * applications will fail because they won't see enough free inodes.
+	 * We'll try to calculate some guess as to how may inodes we can
+	 * really allocate
+	 *
+	 * buf->f_files = atomic_read(&imap->im_numinos);
+	 * buf->f_ffree = atomic_read(&imap->im_numfree);
+	 */
+	maxinodes = min((s64) atomic_read(&imap->im_numinos) +
+			((sbi->bmap->db_nfree >> imap->im_l2nbperiext)
+			 << L2INOSPEREXT), (s64)0xffffffffLL);
+	buf->f_files = maxinodes;
+	buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) -
+				    atomic_read(&imap->im_numfree));
+
+	buf->f_namelen = JFS_NAME_MAX;
+	return 0;
+}
+
+static void jfs_put_super(struct super_block *sb)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(sb);
+	int rc;
+
+	jFYI(1, ("In jfs_put_super\n"));
+	rc = jfs_umount(sb);
+	if (rc) {
+		jERROR(1, ("jfs_umount failed with return code %d\n", rc));
+	}
+	unload_nls(sbi->nls_tab);
+	sbi->nls_tab = NULL;
+
+	/*
+	 * We need to clean out the direct_inode pages since this inode
+	 * is not in the inode hash.
+	 */
+	fsync_inode_data_buffers(sbi->direct_inode);
+	truncate_inode_pages(sbi->direct_mapping, 0);
+	iput(sbi->direct_inode);
+	sbi->direct_inode = NULL;
+	sbi->direct_mapping = NULL;
+
+	JFS_SBI(sb) = 0;
+	kfree(sbi);
+}
+
+static int parse_options (char * options, struct jfs_sb_info *sbi)
+{
+	void *nls_map = NULL;
+	char * this_char;
+	char * value;
+
+	if (!options)
+		return 1;
+	while ((this_char = strsep (&options, ",")) != NULL) {
+		if (!*this_char)
+			continue;
+		if ((value = strchr (this_char, '=')) != NULL)
+			*value++ = 0;
+		if (!strcmp (this_char, "iocharset")) {
+			if (!value || !*value)
+				goto needs_arg;
+			if (nls_map)	/* specified iocharset twice! */
+				unload_nls(nls_map);
+			nls_map = load_nls(value);
+			if (!nls_map) {
+				printk(KERN_ERR "JFS: charset not found\n");
+				goto cleanup;
+			}
+		/* Silently ignore the quota options */
+		} else if (!strcmp (this_char, "grpquota")
+		         || !strcmp (this_char, "noquota")
+		         || !strcmp (this_char, "quota")
+		         || !strcmp (this_char, "usrquota"))
+			/* Don't do anything ;-) */ ;
+		else {
+			printk ("jfs: Unrecognized mount option %s\n", this_char);
+			goto cleanup;
+		}
+	}
+	if (nls_map) {
+		/* Discard old (if remount) */
+		if (sbi->nls_tab)
+			unload_nls(sbi->nls_tab);
+		sbi->nls_tab = nls_map;
+	}
+	return 1;
+needs_arg:
+	printk(KERN_ERR "JFS: %s needs an argument\n", this_char);
+cleanup:
+	if (nls_map)
+		unload_nls(nls_map);
+	return 0;
+}
+
+int jfs_remount(struct super_block *sb, int *flags, char *data)
+{
+	struct jfs_sb_info *sbi = JFS_SBI(sb);
+
+	if (!parse_options(data, sbi)) {
+		return -EINVAL;
+	}
+
+	if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
+		/*
+		 * Invalidate any previously read metadata.  fsck may
+		 * have changed the on-disk data since we mounted r/o
+		 */
+		truncate_inode_pages(sbi->direct_mapping, 0);
+
+		return jfs_mount_rw(sb, 1);
+	} else if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY))
+		return jfs_umount_rw(sb);
+
+	return 0;
+}
+
+static struct super_operations jfs_sops = {
+	read_inode:	jfs_read_inode,
+	dirty_inode:	jfs_dirty_inode,
+	write_inode:	jfs_write_inode,
+	put_inode:	jfs_put_inode,
+	delete_inode:	jfs_delete_inode,
+	put_super:	jfs_put_super,
+	statfs:		jfs_statfs,
+	remount_fs:	jfs_remount,
+	clear_inode:	diClearExtension,
+};
+
+static struct super_block *jfs_read_super(struct super_block *sb,
+					  void *data, int silent)
+{
+	struct jfs_sb_info *sbi;
+	struct inode *inode;
+	int rc;
+
+	jFYI(1,
+	     ("In jfs_read_super s_dev=0x%x s_flags=0x%lx\n", sb->s_dev,
+	      sb->s_flags));
+
+	sbi = kmalloc(sizeof(struct jfs_sb_info), GFP_KERNEL);
+	JFS_SBI(sb) = sbi;
+	if (!sbi)
+		return NULL;
+	memset(sbi, 0, sizeof(struct jfs_sb_info));
+
+	if (!parse_options((char *)data, sbi)) {
+		kfree(sbi);
+		return NULL;
+	}
+
+	/*
+	 * Initialize blocksize to 4K.
+	 */
+	sb->s_blocksize = PSIZE;
+	sb->s_blocksize_bits = L2PSIZE;
+	set_blocksize(sb->s_dev, PSIZE);
+
+	/*
+	 * Initialize direct-mapping inode/address-space
+	 */
+	inode = new_inode(sb);
+	if (inode == NULL)
+		goto out_kfree;
+	inode->i_ino = 0;
+	inode->i_nlink = 1;
+	inode->i_size = 0x0000010000000000LL;
+	inode->i_mapping->a_ops = &direct_aops;
+	inode->i_mapping->gfp_mask = GFP_NOFS;
+
+	sbi->direct_inode = inode;
+	sbi->direct_mapping = inode->i_mapping;
+
+	rc = alloc_jfs_inode(inode);
+	if (rc)
+		goto out_free_inode;
+
+	sb->s_op = &jfs_sops;
+	rc = jfs_mount(sb);
+	if (rc) {
+		if (!silent) {
+			jERROR(1,
+			       ("jfs_mount failed w/return code = %d\n",
+				rc));
+		}
+		goto out_mount_failed;
+	}
+	if (sb->s_flags & MS_RDONLY)
+		sbi->log = 0;
+	else {
+		rc = jfs_mount_rw(sb, 0);
+		if (rc) {
+			if (!silent) {
+				jERROR(1,
+				       ("jfs_mount_rw failed w/return code = %d\n",
+					rc));
+			}
+			goto out_no_rw;
+		}
+	}
+
+	sb->s_magic = JFS_SUPER_MAGIC;
+
+	inode = iget(sb, ROOT_I);
+	if (!inode || is_bad_inode(inode))
+		goto out_no_root;
+	sb->s_root = d_alloc_root(inode);
+	if (!sb->s_root)
+		goto out_no_root;
+
+	if (!sbi->nls_tab)
+		sbi->nls_tab = load_nls_default();
+
+	sb->s_maxbytes = ((u64) sb->s_blocksize) << 40;
+#if BITS_PER_LONG == 32
+	sb->s_maxbytes = min((u64)PAGE_CACHE_SIZE << 32, sb->s_maxbytes);
+#endif
+
+	return sb;
+
+out_no_root:
+	jEVENT(1, ("jfs_read_super: get root inode failed\n"));
+	if (inode)
+		iput(inode);
+
+out_no_rw:
+	rc = jfs_umount(sb);
+	if (rc) {
+		jERROR(1, ("jfs_umount failed with return code %d\n", rc));
+	}
+out_mount_failed:
+	fsync_inode_data_buffers(sbi->direct_inode);
+	truncate_inode_pages(sbi->direct_mapping, 0);
+	sb->s_op = NULL;
+
+	free_jfs_inode(inode);
+
+out_free_inode:
+	iput(sbi->direct_inode);
+	sbi->direct_inode = NULL;
+	sbi->direct_mapping = NULL;
+out_kfree:
+	if (sbi->nls_tab)
+		unload_nls(sbi->nls_tab);
+	kfree(sbi);
+	return NULL;
+}
+
+static DECLARE_FSTYPE_DEV(jfs_fs_type, "jfs", jfs_read_super);
+
+extern int metapage_init(void);
+extern int txInit(void);
+extern void txExit(void);
+extern void metapage_exit(void);
+
+static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+      struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
+
+      if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+          SLAB_CTOR_CONSTRUCTOR) {
+              INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
+              INIT_LIST_HEAD(&jfs_ip->mp_list);
+              RDWRLOCK_INIT(&jfs_ip->rdwrlock);
+      }
+}
+
+static int __init init_jfs_fs(void)
+{
+	int rc;
+
+	printk("JFS development version: $Name:  $\n");
+
+	jfs_inode_cachep =
+	    kmem_cache_create("jfs_ip",
+	                    sizeof(struct jfs_inode_info),
+                            0, 0, init_once, NULL);
+	if (jfs_inode_cachep == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Metapage initialization
+	 */
+	rc = metapage_init();
+	if (rc) {
+		jERROR(1, ("metapage_init failed w/rc = %d\n", rc));
+		goto free_slab;
+	}
+
+	/*
+	 * Transaction Manager initialization
+	 */
+	rc = txInit();
+	if (rc) {
+		jERROR(1, ("txInit failed w/rc = %d\n", rc));
+		goto free_metapage;
+	}
+
+	/*
+	 * I/O completion thread (endio)
+	 */
+	jfsIOthread = kernel_thread(jfsIOWait, 0,
+				    CLONE_FS | CLONE_FILES |
+				    CLONE_SIGHAND);
+	if (jfsIOthread < 0) {
+		jERROR(1,
+		       ("init_jfs_fs: fork failed w/rc = %d\n",
+			jfsIOthread));
+		goto end_txmngr;
+	}
+	wait_for_completion(&jfsIOwait);	/* Wait until IO thread starts */
+
+	jfsCommitThread = kernel_thread(jfs_lazycommit, 0,
+					CLONE_FS | CLONE_FILES |
+					CLONE_SIGHAND);
+	if (jfsCommitThread < 0) {
+		jERROR(1,
+		       ("init_jfs_fs: fork failed w/rc = %d\n",
+			jfsCommitThread));
+		goto kill_iotask;
+	}
+	wait_for_completion(&jfsIOwait);	/* Wait until IO thread starts */
+
+	jfsSyncThread = kernel_thread(jfs_sync, 0,
+				      CLONE_FS | CLONE_FILES |
+				      CLONE_SIGHAND);
+	if (jfsSyncThread < 0) {
+		jERROR(1,
+		       ("init_jfs_fs: fork failed w/rc = %d\n",
+			jfsSyncThread));
+		goto kill_committask;
+	}
+	wait_for_completion(&jfsIOwait);	/* Wait until IO thread starts */
+
+#if defined(CONFIG_JFS_DEBUG) && defined(CONFIG_PROC_FS)
+	jfs_proc_init();
+#endif
+
+	return register_filesystem(&jfs_fs_type);
+
+
+kill_committask:
+	send_sig(SIGKILL, jfsCommitTask, 1);
+	wait_for_completion(&jfsIOwait);	/* Wait until Commit thread exits */
+kill_iotask:
+	send_sig(SIGKILL, jfsIOtask, 1);
+	wait_for_completion(&jfsIOwait);	/* Wait until IO thread exits */
+end_txmngr:
+	txExit();
+free_metapage:
+	metapage_exit();
+free_slab:
+	kmem_cache_destroy(jfs_inode_cachep);
+	return -rc;
+}
+
+static void __exit exit_jfs_fs(void)
+{
+	jFYI(1, ("exit_jfs_fs called\n"));
+
+	in_shutdown = 1;
+	txExit();
+	metapage_exit();
+	send_sig(SIGKILL, jfsIOtask, 1);
+	wait_for_completion(&jfsIOwait);	/* Wait until IO thread exits */
+	send_sig(SIGKILL, jfsCommitTask, 1);
+	wait_for_completion(&jfsIOwait);	/* Wait until Commit thread exits */
+	send_sig(SIGKILL, jfsSyncTask, 1);
+	wait_for_completion(&jfsIOwait);	/* Wait until Sync thread exits */
+#if defined(CONFIG_JFS_DEBUG) && defined(CONFIG_PROC_FS)
+	jfs_proc_clean();
+#endif
+	unregister_filesystem(&jfs_fs_type);
+	kmem_cache_destroy(jfs_inode_cachep);
+}
+
+
+EXPORT_NO_SYMBOLS;
+
+module_init(init_jfs_fs)
+module_exit(exit_jfs_fs)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/jfs/symlink.c linuxppc64_2_4/fs/jfs/symlink.c
--- ../kernel.org/linux-2.4.19/fs/jfs/symlink.c	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/fs/jfs/symlink.c	Tue Apr 23 11:21:51 2002
@@ -0,0 +1,47 @@
+
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *  JFS fast symlink handling code
+ */
+
+#include <linux/fs.h>
+#include "jfs_incore.h"
+
+static int jfs_readlink(struct dentry *, char *buffer, int buflen);
+static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd);
+
+/*
+ * symlinks can't do much...
+ */
+struct inode_operations jfs_symlink_inode_operations = {
+	readlink:	jfs_readlink,
+	follow_link:	jfs_follow_link,
+};
+
+static int jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *s = JFS_IP(dentry->d_inode)->i_inline;
+	return vfs_follow_link(nd, s);
+}
+
+static int jfs_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+	char *s = JFS_IP(dentry->d_inode)->i_inline;
+	return vfs_readlink(dentry, buffer, buflen, s);
+}
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/nfsd/nfsctl.c linuxppc64_2_4/fs/nfsd/nfsctl.c
--- ../kernel.org/linux-2.4.19/fs/nfsd/nfsctl.c	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/fs/nfsd/nfsctl.c	Tue Apr 23 09:37:30 2002
@@ -9,6 +9,7 @@
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/version.h>
+#include <linux/init.h>
 
 #include <linux/linkage.h>
 #include <linux/sched.h>
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/nls/Config.in linuxppc64_2_4/fs/nls/Config.in
--- ../kernel.org/linux-2.4.19/fs/nls/Config.in	Fri Apr 19 10:30:00 2002
+++ linuxppc64_2_4/fs/nls/Config.in	Thu Feb 21 20:57:41 2002
@@ -12,7 +12,7 @@
 # msdos and Joliet want NLS
 if [ "$CONFIG_JOLIET" = "y" -o "$CONFIG_FAT_FS" != "n" \
 	-o "$CONFIG_NTFS_FS" != "n" -o "$CONFIG_NCPFS_NLS" = "y" \
-	-o "$CONFIG_SMB_NLS" = "y" ]; then
+	-o "$CONFIG_SMB_NLS" = "y" -o "$CONFIG_JFS_FS" != "n" ]; then
   define_bool CONFIG_NLS y
 else
   define_bool CONFIG_NLS n
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/proc/proc_misc.c linuxppc64_2_4/fs/proc/proc_misc.c
--- ../kernel.org/linux-2.4.19/fs/proc/proc_misc.c	Fri Apr 19 11:00:46 2002
+++ linuxppc64_2_4/fs/proc/proc_misc.c	Mon Apr 22 10:35:10 2002
@@ -348,13 +348,46 @@
 	return len;
 }
 
-#if !defined(CONFIG_ARCH_S390)
+#if !defined(CONFIG_ARCH_S390) && !defined(CONFIG_X86) && !defined(CONFIG_PPC64)
 static int interrupts_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
 	int len = get_irq_list(page);
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
+#else
+extern int show_interrupts(struct seq_file *p, void *v);
+static struct seq_operations proc_interrupts_op = {
+	show:	show_interrupts,
+};
+static int interrupts_open(struct inode *inode, struct file *file)
+{
+	unsigned size = PAGE_SIZE;
+	/*
+	 * probably should depend on NR_CPUS, but that's only rough estimate;
+	 * if we'll need more it will be given,
+	 */
+	char *buf = kmalloc(size, GFP_KERNEL);
+	struct seq_file *m;
+	int res;
+
+	if (!buf)
+		return -ENOMEM;
+	res = seq_open(file, &proc_interrupts_op);
+	if (!res) {
+		m = file->private_data;
+		m->buf = buf;
+		m->size = size;
+	} else
+		kfree(buf);
+	return res;
+}
+static struct file_operations proc_interrupts_operations = {
+	open:		interrupts_open,
+	read:		seq_read_single,
+	llseek:		seq_lseek,
+	release:	seq_release,
+};
 #endif
 
 static int filesystems_read_proc(char *page, char **start, off_t off,
@@ -526,7 +559,7 @@
 		{"stat",	kstat_read_proc},
 		{"devices",	devices_read_proc},
 		{"partitions",	partitions_read_proc},
-#if !defined(CONFIG_ARCH_S390)
+#if !defined(CONFIG_ARCH_S390) && !defined(CONFIG_X86) && !defined(CONFIG_PPC64)
 		{"interrupts",	interrupts_read_proc},
 #endif
 		{"filesystems",	filesystems_read_proc},
@@ -552,6 +585,11 @@
 	if (entry)
 		entry->proc_fops = &proc_kmsg_operations;
 	create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
+#if defined(CONFIG_ARCH_S390) || defined(CONFIG_X86) || defined(CONFIG_PPC64)
+	entry = create_proc_entry("interrupts", 0, NULL);
+	if (entry)
+		entry->proc_fops = &proc_interrupts_operations;
+#endif
 #ifdef CONFIG_MODULES
 	create_seq_entry("ksyms", 0, &proc_ksyms_operations);
 #endif
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/proc/root.c linuxppc64_2_4/fs/proc/root.c
--- ../kernel.org/linux-2.4.19/fs/proc/root.c	Fri Apr 19 11:00:38 2002
+++ linuxppc64_2_4/fs/proc/root.c	Tue Aug 14 14:55:53 2001
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <asm/bitops.h>
+#include <asm/proc_fs.h>
 
 struct proc_dir_entry *proc_net, *proc_bus, *proc_root_fs, *proc_root_driver;
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/fs/seq_file.c linuxppc64_2_4/fs/seq_file.c
--- ../kernel.org/linux-2.4.19/fs/seq_file.c	Fri Apr 19 10:30:02 2002
+++ linuxppc64_2_4/fs/seq_file.c	Thu Feb 21 21:43:40 2002
@@ -8,6 +8,7 @@
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 
 #include <asm/uaccess.h>
 
@@ -187,6 +188,62 @@
 	kfree(m->buf);
 	m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
 	return !m->buf ? -ENOMEM : -EAGAIN;
+}
+
+ssize_t seq_read_single(struct file *file, char *buf, size_t size, loff_t *ppos)
+{
+	struct seq_file *m = (struct seq_file *)file->private_data;
+	size_t copied = 0;
+	size_t n;
+	int err = 0;
+
+	if (ppos != &file->f_pos)
+		return -EPIPE;
+
+	down(&m->sem);
+	if (*ppos != 0)
+		return 0;
+	if (!m->count) {
+		if (!m->buf) {
+			m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+			if (!m->buf)
+				goto Enomem;
+		}
+		m->from = 0;
+		while (1) {
+			err = m->op->show(m, NULL);
+			if (err)
+				goto Done;
+			if (m->count < m->size) 
+				break;
+			kfree(m->buf);
+			m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+			if (!m->buf)
+				goto Enomem;
+		}
+	}
+	n = min(m->count, size);
+	err = copy_to_user(buf, m->buf + m->from, n);
+	if (err)
+		goto Efault;
+	size -= n;
+	buf += n;
+	m->count -= n;
+	m->from += n;
+	copied += n;
+	if (!m->count)
+		*ppos = 1;
+Done:
+	if (!copied)
+		copied = err;
+	up(&m->sem);
+	return copied;
+Enomem:
+	err = -ENOMEM;
+	goto Done;
+Efault:
+	err = -EFAULT;
+	goto Done;
 }
 
 /**
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-arm/arch-sa1100/system3.h linuxppc64_2_4/include/asm-arm/arch-sa1100/system3.h
--- ../kernel.org/linux-2.4.19/include/asm-arm/arch-sa1100/system3.h	Fri Apr 19 11:00:38 2002
+++ linuxppc64_2_4/include/asm-arm/arch-sa1100/system3.h	Mon Apr 22 14:12:30 2002
@@ -3,13 +3,21 @@
  *
  * Copyright (C) 2001 Stefan Eletzhofer <stefan.eletzhofer@eletztrick.de>
  *
- * $Id: system3.h,v 1.2.4.2 2001/12/04 14:58:50 seletz Exp $
+ * $Id: system3.h,v 1.1 2002/04/22 19:12:30 tgall Exp $
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
  * $Log: system3.h,v $
+ * Revision 1.1  2002/04/22 19:12:30  tgall
+ * TAG: adds and deletes up to 2.4.19-pre6 now
+ *
+ *
+ * Code Origin : Myself
+ * License     : Same as project
+ * Copyright   : IBM
+ *
  * Revision 1.2.4.2  2001/12/04 14:58:50  seletz
  * - removed neponset hack
  * - removed irq definitions (now in irqs.h)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ia64/sn/ate_utils.h linuxppc64_2_4/include/asm-ia64/sn/ate_utils.h
--- ../kernel.org/linux-2.4.19/include/asm-ia64/sn/ate_utils.h	Fri Apr 19 11:00:00 2002
+++ linuxppc64_2_4/include/asm-ia64/sn/ate_utils.h	Mon Apr 22 13:25:47 2002
@@ -1,7 +1,7 @@
 #ifndef _ASM_IA64_SN_ATE_UTILS_H
 #define _ASM_IA64_SN_ATE_UTILS_H
 
-/* $Id: ate_utils.h,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: ate_utils.h,v 1.1 2002/04/22 18:25:47 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ia64/sn/kldir.h linuxppc64_2_4/include/asm-ia64/sn/kldir.h
--- ../kernel.org/linux-2.4.19/include/asm-ia64/sn/kldir.h	Fri Apr 19 11:00:00 2002
+++ linuxppc64_2_4/include/asm-ia64/sn/kldir.h	Wed Dec 31 18:00:00 1969
@@ -1,363 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Derived from IRIX <sys/SN/kldir.h>, revision 1.21.
- *
- * Copyright (C) 1992-1997,1999,2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
- * Copyright (C) 1999 by Ralf Baechle
- */
-#ifndef _ASM_IA64_SN_KLDIR_H
-#define _ASM_IA64_SN_KLDIR_H
-
-#include <asm/sn/sgi.h>
-
-/*
- * The kldir memory area resides at a fixed place in each node's memory and
- * provides pointers to most other IP27 memory areas.  This allows us to
- * resize and/or relocate memory areas at a later time without breaking all
- * firmware and kernels that use them.  Indices in the array are
- * permanently dedicated to areas listed below.  Some memory areas (marked
- * below) reside at a permanently fixed location, but are included in the
- * directory for completeness.
- */
-
-#define KLDIR_MAGIC		0x434d5f53505f5357
-
-/*
- * The upper portion of the memory map applies during boot
- * only and is overwritten by IRIX/SYMMON.
- *
- *                                    MEMORY MAP PER NODE
- *
- * 0x2000000 (32M)         +-----------------------------------------+
- *                         |      IO6 BUFFERS FOR FLASH ENET IOC3    |
- * 0x1F80000 (31.5M)       +-----------------------------------------+
- *                         |      IO6 TEXT/DATA/BSS/stack            |
- * 0x1C00000 (30M)         +-----------------------------------------+
- *                         |      IO6 PROM DEBUG TEXT/DATA/BSS/stack |
- * 0x0800000 (28M)         +-----------------------------------------+
- *                         |      IP27 PROM TEXT/DATA/BSS/stack      |
- * 0x1B00000 (27M)         +-----------------------------------------+
- *                         |      IP27 CFG                           |
- * 0x1A00000 (26M)         +-----------------------------------------+
- *                         |      Graphics PROM                      |
- * 0x1800000 (24M)         +-----------------------------------------+
- *                         |      3rd Party PROM drivers             |
- * 0x1600000 (22M)         +-----------------------------------------+
- *                         |                                         |
- *                         |      Free                               |
- *                         |                                         |
- *                         +-----------------------------------------+
- *                         |      UNIX DEBUG Version                 |
- * 0x190000 (2M--)         +-----------------------------------------+
- *                         |      SYMMON                             |
- *                         |      (For UNIX Debug only)              |
- * 0x34000 (208K)          +-----------------------------------------+
- *                         |      SYMMON STACK [NUM_CPU_PER_NODE]    |
- *                         |      (For UNIX Debug only)              |
- * 0x25000 (148K)          +-----------------------------------------+
- *                         |      KLCONFIG - II (temp)               |
- *                         |                                         |
- *                         |    ----------------------------         |
- *                         |                                         |
- *                         |      UNIX NON-DEBUG Version             |
- * 0x19000 (100K)          +-----------------------------------------+
- *
- *
- * The lower portion of the memory map contains information that is
- * permanent and is used by the IP27PROM, IO6PROM and IRIX.
- *
- * 0x19000 (100K)          +-----------------------------------------+
- *                         |                                         |
- *                         |      PI Error Spools (32K)              |
- *                         |                                         |
- * 0x12000 (72K)           +-----------------------------------------+
- *                         |      Unused                             |
- * 0x11c00 (71K)           +-----------------------------------------+
- *                         |      CPU 1 NMI Eframe area       	     |
- * 0x11a00 (70.5K)         +-----------------------------------------+
- *                         |      CPU 0 NMI Eframe area       	     |
- * 0x11800 (70K)           +-----------------------------------------+
- *                         |      CPU 1 NMI Register save area       |
- * 0x11600 (69.5K)         +-----------------------------------------+
- *                         |      CPU 0 NMI Register save area       |
- * 0x11400 (69K)           +-----------------------------------------+
- *                         |      GDA (1k)                           |
- * 0x11000 (68K)           +-----------------------------------------+
- *                         |      Early cache Exception stack        |
- *                         |             and/or                      |
- *			   |      kernel/io6prom nmi registers	     |
- * 0x10800  (66k)	   +-----------------------------------------+
- *			   |      cache error eframe   	 	     |
- * 0x10400 (65K)           +-----------------------------------------+
- *                         |      Exception Handlers (UALIAS copy)   |
- * 0x10000 (64K)           +-----------------------------------------+
- *                         |                                         |
- *                         |                                         |
- *                         |      KLCONFIG - I (permanent) (48K)     |
- *                         |                                         |
- *                         |                                         |
- *                         |                                         |
- * 0x4000 (16K)            +-----------------------------------------+
- *                         |      NMI Handler (Protected Page)       |
- * 0x3000 (12K)            +-----------------------------------------+
- *                         |      ARCS PVECTORS (master node only)   |
- * 0x2c00 (11K)            +-----------------------------------------+
- *                         |      ARCS TVECTORS (master node only)   |
- * 0x2800 (10K)            +-----------------------------------------+
- *                         |      LAUNCH [NUM_CPU]                   |
- * 0x2400 (9K)             +-----------------------------------------+
- *                         |      Low memory directory (KLDIR)       |
- * 0x2000 (8K)             +-----------------------------------------+
- *                         |      ARCS SPB (1K)                      |
- * 0x1000 (4K)             +-----------------------------------------+
- *                         |      Early cache Exception stack        |
- *                         |             and/or                      |
- *			   |      kernel/io6prom nmi registers	     |
- * 0x800  (2k)	           +-----------------------------------------+
- *			   |      cache error eframe   	 	     |
- * 0x400 (1K)              +-----------------------------------------+
- *                         |      Exception Handlers                 |
- * 0x0   (0K)              +-----------------------------------------+
- */
-
-#ifdef __ASSEMBLY__
-#define KLDIR_OFF_MAGIC			0x00
-#define KLDIR_OFF_OFFSET		0x08
-#define KLDIR_OFF_POINTER		0x10
-#define KLDIR_OFF_SIZE			0x18
-#define KLDIR_OFF_COUNT			0x20
-#define KLDIR_OFF_STRIDE		0x28
-#endif /* __ASSEMBLY__ */
-
-#ifndef __ASSEMBLY__
-typedef struct kldir_ent_s {
-	u64		magic;		/* Indicates validity of entry      */
-	off_t		offset;		/* Offset from start of node space  */
-	__psunsigned_t	pointer;	/* Pointer to area in some cases    */
-	size_t		size;		/* Size in bytes 		    */
-	u64		count;		/* Repeat count if array, 1 if not  */
-	size_t		stride;		/* Stride if array, 0 if not        */
-	char		rsvd[16];	/* Pad entry to 0x40 bytes          */
-	/* NOTE: These 16 bytes are used in the Partition KLDIR
-	   entry to store partition info. Refer to klpart.h for this. */
-} kldir_ent_t;
-#endif /* __ASSEMBLY__ */
-
-
-#define KLDIR_ENT_SIZE			0x40
-#define KLDIR_MAX_ENTRIES		(0x400 / 0x40)
-
-
-
-/*
- * The upper portion of the memory map applies during boot
- * only and is overwritten by IRIX/SYMMON.  The minimum memory bank
- * size on IP35 is 64M, which provides a limit on the amount of space
- * the PROM can assume it has available.
- *
- * Most of the addresses below are defined as macros in this file, or
- * in SN/addrs.h or SN/SN1/addrs.h.
- *
- *                                    MEMORY MAP PER NODE
- *
- * 0x4000000 (64M)         +-----------------------------------------+
- *                         |                                         |
- *                         |                                         |
- *                         |      IO7 TEXT/DATA/BSS/stack            |
- * 0x3000000 (48M)         +-----------------------------------------+
- *                         |      Free                               |
- * 0x2102000 (>33M)        +-----------------------------------------+
- *                         |      IP35 Topology (PCFG) + misc data   |
- * 0x2000000 (32M)         +-----------------------------------------+
- *                         |      IO7 BUFFERS FOR FLASH ENET IOC3    |
- * 0x1F80000 (31.5M)       +-----------------------------------------+
- *                         |      Free                               |
- * 0x1C00000 (28M)         +-----------------------------------------+
- *                         |      IP35 PROM TEXT/DATA/BSS/stack      |
- * 0x1A00000 (26M)         +-----------------------------------------+
- *                         |      Routing temp. space                |
- * 0x1800000 (24M)         +-----------------------------------------+
- *                         |      Diagnostics temp. space            |
- * 0x1500000 (21M)         +-----------------------------------------+
- *                         |      Free                               |
- * 0x1400000 (20M)         +-----------------------------------------+
- *                         |      IO7 PROM temporary copy            |
- * 0x1300000 (19M)         +-----------------------------------------+
- *                         |                                         |
- *                         |      Free                               |
- *                         |      (UNIX DATA starts above 0x1000000) |
- *                         |                                         |
- *                         +-----------------------------------------+
- *                         |      UNIX DEBUG Version                 |
- * 0x0310000 (3.1M)        +-----------------------------------------+
- *                         |      SYMMON, loaded just below UNIX     |
- *                         |      (For UNIX Debug only)              |
- *                         |                                         |
- *                         |                                         |
- * 0x006C000 (432K)        +-----------------------------------------+
- *                         |      SYMMON STACK [NUM_CPU_PER_NODE]    |
- *                         |      (For UNIX Debug only)              |
- * 0x004C000 (304K)        +-----------------------------------------+
- *                         |                                         |
- *                         |                                         |
- *                         |      UNIX NON-DEBUG Version             |
- * 0x0040000 (256K)        +-----------------------------------------+
- *
- *
- * The lower portion of the memory map contains information that is
- * permanent and is used by the IP35PROM, IO7PROM and IRIX.
- *
- * 0x40000 (256K)          +-----------------------------------------+
- *                         |                                         |
- *                         |      KLCONFIG (64K)                     |
- *                         |                                         |
- * 0x30000 (192K)          +-----------------------------------------+
- *                         |                                         |
- *                         |      PI Error Spools (64K)              |
- *                         |                                         |
- * 0x20000 (128K)          +-----------------------------------------+
- *                         |                                         |
- *                         |      Unused                             |
- *                         |                                         |
- * 0x19000 (100K)          +-----------------------------------------+
- *                         |      Early cache Exception stack (CPU 3)|
- * 0x18800 (98K)           +-----------------------------------------+
- *			   |      cache error eframe (CPU 3)	     |
- * 0x18400 (97K)           +-----------------------------------------+
- *                         |      Exception Handlers (CPU 3)         |
- * 0x18000 (96K)           +-----------------------------------------+
- *                         |                                         |
- *                         |      Unused                             |
- *                         |                                         |
- * 0x13c00 (79K)           +-----------------------------------------+
- *                         |      GPDA (8k)                          |
- * 0x11c00 (71K)           +-----------------------------------------+
- *                         |      Early cache Exception stack (CPU 2)|
- * 0x10800 (66k)	   +-----------------------------------------+
- *			   |      cache error eframe (CPU 2)	     |
- * 0x10400 (65K)           +-----------------------------------------+
- *                         |      Exception Handlers (CPU 2)         |
- * 0x10000 (64K)           +-----------------------------------------+
- *                         |                                         |
- *                         |      Unused                             |
- *                         |                                         |
- * 0x0b400 (45K)           +-----------------------------------------+
- *                         |      GDA (1k)                           |
- * 0x0b000 (44K)           +-----------------------------------------+
- *                         |      NMI Eframe areas (4)       	     |
- * 0x0a000 (40K)           +-----------------------------------------+
- *                         |      NMI Register save areas (4)        |
- * 0x09000 (36K)           +-----------------------------------------+
- *                         |      Early cache Exception stack (CPU 1)|
- * 0x08800 (34K)           +-----------------------------------------+
- *			   |      cache error eframe (CPU 1)	     |
- * 0x08400 (33K)           +-----------------------------------------+
- *                         |      Exception Handlers (CPU 1)         |
- * 0x08000 (32K)           +-----------------------------------------+
- *                         |                                         |
- *                         |                                         |
- *                         |      Unused                             |
- *                         |                                         |
- *                         |                                         |
- * 0x04000 (16K)           +-----------------------------------------+
- *                         |      NMI Handler (Protected Page)       |
- * 0x03000 (12K)           +-----------------------------------------+
- *                         |      ARCS PVECTORS (master node only)   |
- * 0x02c00 (11K)           +-----------------------------------------+
- *                         |      ARCS TVECTORS (master node only)   |
- * 0x02800 (10K)           +-----------------------------------------+
- *                         |      LAUNCH [NUM_CPU]                   |
- * 0x02400 (9K)            +-----------------------------------------+
- *                         |      Low memory directory (KLDIR)       |
- * 0x02000 (8K)            +-----------------------------------------+
- *                         |      ARCS SPB (1K)                      |
- * 0x01000 (4K)            +-----------------------------------------+
- *                         |      Early cache Exception stack (CPU 0)|
- * 0x00800 (2k)	           +-----------------------------------------+
- *			   |      cache error eframe (CPU 0)	     |
- * 0x00400 (1K)            +-----------------------------------------+
- *                         |      Exception Handlers (CPU 0)         |
- * 0x00000 (0K)            +-----------------------------------------+
- */
-
-/*
- * NOTE:  To change the kernel load address, you must update:
- *  - the appropriate elspec files in irix/kern/master.d
- *  - NODEBUGUNIX_ADDR in SN/SN1/addrs.h
- *  - IP27_FREEMEM_OFFSET below
- *  - KERNEL_START_OFFSET below (if supporting cells)
- */
-
-
-/*
- * This is defined here because IP27_SYMMON_STK_SIZE must be at least what
- * we define here.  Since it's set up in the prom.  We can't redefine it later
- * and expect more space to be allocated.  The way to find out the true size
- * of the symmon stacks is to divide SYMMON_STK_SIZE by SYMMON_STK_STRIDE
- * for a particular node.
- */
-#define SYMMON_STACK_SIZE		0x8000
-
-#if defined (PROM) || defined (SABLE)
-
-/*
- * These defines are prom version dependent.  No code other than the IP35
- * prom should attempt to use these values.
- */
-#define IP27_LAUNCH_OFFSET		0x2400
-#define IP27_LAUNCH_SIZE		0x400
-#define IP27_LAUNCH_COUNT		4
-#define IP27_LAUNCH_STRIDE		0x100 /* could be as small as 0x80 */
-
-#define IP27_KLCONFIG_OFFSET		0x30000
-#define IP27_KLCONFIG_SIZE		0x10000
-#define IP27_KLCONFIG_COUNT		1
-#define IP27_KLCONFIG_STRIDE		0
-
-#define IP27_NMI_OFFSET			0x3000
-#define IP27_NMI_SIZE			0x100
-#define IP27_NMI_COUNT			4
-#define IP27_NMI_STRIDE			0x40
-
-#define IP27_PI_ERROR_OFFSET		0x20000
-#define IP27_PI_ERROR_SIZE		0x10000
-#define IP27_PI_ERROR_COUNT		1
-#define IP27_PI_ERROR_STRIDE		0
-
-#define IP27_SYMMON_STK_OFFSET		0x4c000
-#define IP27_SYMMON_STK_SIZE		0x20000
-#define IP27_SYMMON_STK_COUNT		4
-/* IP27_SYMMON_STK_STRIDE must be >= SYMMON_STACK_SIZE */
-#define IP27_SYMMON_STK_STRIDE		0x8000
-
-#define IP27_FREEMEM_OFFSET		0x40000
-#define IP27_FREEMEM_SIZE		(-1)
-#define IP27_FREEMEM_COUNT		1
-#define IP27_FREEMEM_STRIDE		0
-
-#endif /* PROM || SABLE*/
-/*
- * There will be only one of these in a partition so the IO7 must set it up.
- */
-#define IO6_GDA_OFFSET			0xb000
-#define IO6_GDA_SIZE			0x400
-#define IO6_GDA_COUNT			1
-#define IO6_GDA_STRIDE			0
-
-/*
- * save area of kernel nmi regs in the prom format
- */
-#define IP27_NMI_KREGS_OFFSET		0x9000
-#define IP27_NMI_KREGS_CPU_SIZE		0x400
-/*
- * save area of kernel nmi regs in eframe format 
- */
-#define IP27_NMI_EFRAME_OFFSET		0xa000
-#define IP27_NMI_EFRAME_SIZE		0x400
-
-#define GPDA_OFFSET			0x11c00
-
-#endif /* _ASM_IA64_SN_KLDIR_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ia64/sn/leds.h linuxppc64_2_4/include/asm-ia64/sn/leds.h
--- ../kernel.org/linux-2.4.19/include/asm-ia64/sn/leds.h	Fri Apr 19 11:00:00 2002
+++ linuxppc64_2_4/include/asm-ia64/sn/leds.h	Wed Dec 31 18:00:00 1969
@@ -1,46 +0,0 @@
-#ifndef _ASM_IA64_SN_LEDS_H
-#define _ASM_IA64_SN_LEDS_H
-
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- * Copyright (C) 2000-2002 Silicon Graphics, Inc. All rights reserved.
- */
-
-#include <linux/config.h>
-#include <asm/smp.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/sn_cpuid.h>
-#include <asm/sn/pda.h>
-
-#ifdef CONFIG_IA64_SGI_SN1
-#define LED0		0xc0000b00100000c0LL	/* ZZZ fixme */
-#define LED_CPU_SHIFT	3
-#else
-#include <asm/sn/sn2/shub.h>
-#define LED0		(LOCAL_MMR_ADDR(SH_REAL_JUNK_BUS_LED0))
-#define LED_CPU_SHIFT	16
-#endif
-
-#define LED_CPU_HEARTBEAT	0x01
-#define LED_CPU_ACTIVITY	0x02
-#define LED_MASK_AUTOTEST	0xfe
-
-/*
- * Basic macros for flashing the LEDS on an SGI, SN1.
- */
-
-static __inline__ void
-set_led_bits(u8 value, u8 mask)
-{
-	pda.led_state = (pda.led_state & ~mask) | (value & mask);
-#ifdef CONFIG_IA64_SGI_SN1
-	*pda.led_address = (long) pda.led_state;
-#else
-	*pda.led_address = (short) pda.led_state;
-#endif
-}
-
-#endif /* _ASM_IA64_SN_LEDS_H */
-
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn1/intr.h linuxppc64_2_4/include/asm-ia64/sn/sn1/intr.h
--- ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn1/intr.h	Fri Apr 19 11:00:01 2002
+++ linuxppc64_2_4/include/asm-ia64/sn/sn1/intr.h	Mon Apr 22 13:25:48 2002
@@ -1,4 +1,4 @@
-/* $Id: intr.h,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: intr.h,v 1.1 2002/04/22 18:25:48 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn1/intr_public.h linuxppc64_2_4/include/asm-ia64/sn/sn1/intr_public.h
--- ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn1/intr_public.h	Fri Apr 19 11:00:01 2002
+++ linuxppc64_2_4/include/asm-ia64/sn/sn1/intr_public.h	Mon Apr 22 13:25:48 2002
@@ -1,4 +1,4 @@
-/* $Id: intr_public.h,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: intr_public.h,v 1.1 2002/04/22 18:25:48 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn1/kldir.h linuxppc64_2_4/include/asm-ia64/sn/sn1/kldir.h
--- ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn1/kldir.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/include/asm-ia64/sn/sn1/kldir.h	Fri Jun 22 23:46:34 2001
@@ -0,0 +1,222 @@
+/* $Id$
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1992 - 1997, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2000 by Colin Ngam
+ */
+
+#ifndef _ASM_SN_SN1_KLDIR_H
+#define _ASM_SN_SN1_KLDIR_H
+
+/*
+ * The upper portion of the memory map applies during boot
+ * only and is overwritten by IRIX/SYMMON.  The minimum memory bank
+ * size on IP35 is 64M, which provides a limit on the amount of space
+ * the PROM can assume it has available.
+ *
+ * Most of the addresses below are defined as macros in this file, or
+ * in SN/addrs.h or SN/SN1/addrs.h.
+ *
+ *                                    MEMORY MAP PER NODE
+ *
+ * 0x4000000 (64M)         +-----------------------------------------+
+ *                         |                                         |
+ *                         |                                         |
+ *                         |      IO7 TEXT/DATA/BSS/stack            |
+ * 0x3000000 (48M)         +-----------------------------------------+
+ *                         |      Free                               |
+ * 0x2102000 (>33M)        +-----------------------------------------+
+ *                         |      IP35 Topology (PCFG) + misc data   |
+ * 0x2000000 (32M)         +-----------------------------------------+
+ *                         |      IO7 BUFFERS FOR FLASH ENET IOC3    |
+ * 0x1F80000 (31.5M)       +-----------------------------------------+
+ *                         |      Free                               |
+ * 0x1C00000 (28M)         +-----------------------------------------+
+ *                         |      IP35 PROM TEXT/DATA/BSS/stack      |
+ * 0x1A00000 (26M)         +-----------------------------------------+
+ *                         |      Routing temp. space                |
+ * 0x1800000 (24M)         +-----------------------------------------+
+ *                         |      Diagnostics temp. space            |
+ * 0x1500000 (21M)         +-----------------------------------------+
+ *                         |      Free                               |
+ * 0x1400000 (20M)         +-----------------------------------------+
+ *                         |      IO7 PROM temporary copy            |
+ * 0x1300000 (19M)         +-----------------------------------------+
+ *                         |                                         |
+ *                         |      Free                               |
+ *                         |      (UNIX DATA starts above 0x1000000) |
+ *                         |                                         |
+ *                         +-----------------------------------------+
+ *                         |      UNIX DEBUG Version                 |
+ * 0x0310000 (3.1M)        +-----------------------------------------+
+ *                         |      SYMMON, loaded just below UNIX     |
+ *                         |      (For UNIX Debug only)              |
+ *                         |                                         |
+ *                         |                                         |
+ * 0x006C000 (432K)        +-----------------------------------------+
+ *                         |      SYMMON STACK [NUM_CPU_PER_NODE]    |
+ *                         |      (For UNIX Debug only)              |
+ * 0x004C000 (304K)        +-----------------------------------------+
+ *                         |                                         |
+ *                         |                                         |
+ *                         |      UNIX NON-DEBUG Version             |
+ * 0x0040000 (256K)        +-----------------------------------------+
+ *
+ *
+ * The lower portion of the memory map contains information that is
+ * permanent and is used by the IP35PROM, IO7PROM and IRIX.
+ *
+ * 0x40000 (256K)          +-----------------------------------------+
+ *                         |                                         |
+ *                         |      KLCONFIG (64K)                     |
+ *                         |                                         |
+ * 0x30000 (192K)          +-----------------------------------------+
+ *                         |                                         |
+ *                         |      PI Error Spools (64K)              |
+ *                         |                                         |
+ * 0x20000 (128K)          +-----------------------------------------+
+ *                         |                                         |
+ *                         |      Unused                             |
+ *                         |                                         |
+ * 0x19000 (100K)          +-----------------------------------------+
+ *                         |      Early cache Exception stack (CPU 3)|
+ * 0x18800 (98K)           +-----------------------------------------+
+ *			   |      cache error eframe (CPU 3)	     |
+ * 0x18400 (97K)           +-----------------------------------------+
+ *                         |      Exception Handlers (CPU 3)         |
+ * 0x18000 (96K)           +-----------------------------------------+
+ *                         |                                         |
+ *                         |      Unused                             |
+ *                         |                                         |
+ * 0x13c00 (79K)           +-----------------------------------------+
+ *                         |      GPDA (8k)                          |
+ * 0x11c00 (71K)           +-----------------------------------------+
+ *                         |      Early cache Exception stack (CPU 2)|
+ * 0x10800 (66k)	   +-----------------------------------------+
+ *			   |      cache error eframe (CPU 2)	     |
+ * 0x10400 (65K)           +-----------------------------------------+
+ *                         |      Exception Handlers (CPU 2)         |
+ * 0x10000 (64K)           +-----------------------------------------+
+ *                         |                                         |
+ *                         |      Unused                             |
+ *                         |                                         |
+ * 0x0b400 (45K)           +-----------------------------------------+
+ *                         |      GDA (1k)                           |
+ * 0x0b000 (44K)           +-----------------------------------------+
+ *                         |      NMI Eframe areas (4)       	     |
+ * 0x0a000 (40K)           +-----------------------------------------+
+ *                         |      NMI Register save areas (4)        |
+ * 0x09000 (36K)           +-----------------------------------------+
+ *                         |      Early cache Exception stack (CPU 1)|
+ * 0x08800 (34K)           +-----------------------------------------+
+ *			   |      cache error eframe (CPU 1)	     |
+ * 0x08400 (33K)           +-----------------------------------------+
+ *                         |      Exception Handlers (CPU 1)         |
+ * 0x08000 (32K)           +-----------------------------------------+
+ *                         |                                         |
+ *                         |                                         |
+ *                         |      Unused                             |
+ *                         |                                         |
+ *                         |                                         |
+ * 0x04000 (16K)           +-----------------------------------------+
+ *                         |      NMI Handler (Protected Page)       |
+ * 0x03000 (12K)           +-----------------------------------------+
+ *                         |      ARCS PVECTORS (master node only)   |
+ * 0x02c00 (11K)           +-----------------------------------------+
+ *                         |      ARCS TVECTORS (master node only)   |
+ * 0x02800 (10K)           +-----------------------------------------+
+ *                         |      LAUNCH [NUM_CPU]                   |
+ * 0x02400 (9K)            +-----------------------------------------+
+ *                         |      Low memory directory (KLDIR)       |
+ * 0x02000 (8K)            +-----------------------------------------+
+ *                         |      ARCS SPB (1K)                      |
+ * 0x01000 (4K)            +-----------------------------------------+
+ *                         |      Early cache Exception stack (CPU 0)|
+ * 0x00800 (2k)	           +-----------------------------------------+
+ *			   |      cache error eframe (CPU 0)	     |
+ * 0x00400 (1K)            +-----------------------------------------+
+ *                         |      Exception Handlers (CPU 0)         |
+ * 0x00000 (0K)            +-----------------------------------------+
+ */
+
+/*
+ * NOTE:  To change the kernel load address, you must update:
+ *  - the appropriate elspec files in irix/kern/master.d
+ *  - NODEBUGUNIX_ADDR in SN/SN1/addrs.h
+ *  - IP27_FREEMEM_OFFSET below
+ *  - KERNEL_START_OFFSET below (if supporting cells)
+ */
+
+
+/*
+ * This is defined here because IP27_SYMMON_STK_SIZE must be at least what
+ * we define here.  Since it's set up in the prom.  We can't redefine it later
+ * and expect more space to be allocated.  The way to find out the true size
+ * of the symmon stacks is to divide SYMMON_STK_SIZE by SYMMON_STK_STRIDE
+ * for a particular node.
+ */
+#define SYMMON_STACK_SIZE		0x8000
+
+#if defined (PROM) || defined (SABLE)
+
+/*
+ * These defines are prom version dependent.  No code other than the IP35
+ * prom should attempt to use these values.
+ */
+#define IP27_LAUNCH_OFFSET		0x2400
+#define IP27_LAUNCH_SIZE		0x400
+#define IP27_LAUNCH_COUNT		4
+#define IP27_LAUNCH_STRIDE		0x100 /* could be as small as 0x80 */
+
+#define IP27_KLCONFIG_OFFSET		0x30000
+#define IP27_KLCONFIG_SIZE		0x10000
+#define IP27_KLCONFIG_COUNT		1
+#define IP27_KLCONFIG_STRIDE		0
+
+#define IP27_NMI_OFFSET			0x3000
+#define IP27_NMI_SIZE			0x100
+#define IP27_NMI_COUNT			4
+#define IP27_NMI_STRIDE			0x40
+
+#define IP27_PI_ERROR_OFFSET		0x20000
+#define IP27_PI_ERROR_SIZE		0x10000
+#define IP27_PI_ERROR_COUNT		1
+#define IP27_PI_ERROR_STRIDE		0
+
+#define IP27_SYMMON_STK_OFFSET		0x4c000
+#define IP27_SYMMON_STK_SIZE		0x20000
+#define IP27_SYMMON_STK_COUNT		4
+/* IP27_SYMMON_STK_STRIDE must be >= SYMMON_STACK_SIZE */
+#define IP27_SYMMON_STK_STRIDE		0x8000
+
+#define IP27_FREEMEM_OFFSET		0x40000
+#define IP27_FREEMEM_SIZE		-1
+#define IP27_FREEMEM_COUNT		1
+#define IP27_FREEMEM_STRIDE		0
+
+#endif /* PROM || SABLE*/
+/*
+ * There will be only one of these in a partition so the IO7 must set it up.
+ */
+#define IO6_GDA_OFFSET			0xb000
+#define IO6_GDA_SIZE			0x400
+#define IO6_GDA_COUNT			1
+#define IO6_GDA_STRIDE			0
+
+/*
+ * save area of kernel nmi regs in the prom format
+ */
+#define IP27_NMI_KREGS_OFFSET		0x9000
+#define IP27_NMI_KREGS_CPU_SIZE		0x400
+/*
+ * save area of kernel nmi regs in eframe format 
+ */
+#define IP27_NMI_EFRAME_OFFSET		0xa000
+#define IP27_NMI_EFRAME_SIZE		0x400
+
+#define GPDA_OFFSET			0x11c00
+
+#endif /* _ASM_SN_SN1_KLDIR_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn1/leds.h linuxppc64_2_4/include/asm-ia64/sn/sn1/leds.h
--- ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn1/leds.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/include/asm-ia64/sn/sn1/leds.h	Fri May  4 17:12:32 2001
@@ -0,0 +1,35 @@
+#ifndef _ASM_SN_SN1_LED_H
+#define _ASM_SN_SN1_LED_H
+
+/*
+ * Copyright (C) 2000 Silicon Graphics, Inc
+ * Copyright (C) 2000 Jack Steiner (steiner@sgi.com)
+ */
+
+#include <asm/smp.h>
+
+#define LED0		0xc0000b00100000c0LL	/* ZZZ fixme */
+
+
+
+#define LED_AP_START	0x01		/* AP processor started */
+#define LED_AP_IDLE	0x01
+
+/*
+ * Basic macros for flashing the LEDS on an SGI, SN1.
+ */
+
+extern __inline__ void
+HUB_SET_LED(int val)
+{
+	long	*ledp;
+	int	eid;
+
+	eid = hard_smp_processor_id() & 3;
+	ledp = (long*) (LED0 + (eid<<3));
+	*ledp = val;
+}
+
+
+#endif /* _ASM_SN_SN1_LED_H */
+
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn1/sn_private.h linuxppc64_2_4/include/asm-ia64/sn/sn1/sn_private.h
--- ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn1/sn_private.h	Fri Apr 19 11:00:01 2002
+++ linuxppc64_2_4/include/asm-ia64/sn/sn1/sn_private.h	Mon Apr 22 13:25:48 2002
@@ -1,4 +1,4 @@
-/* $Id: sn_private.h,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: sn_private.h,v 1.1 2002/04/22 18:25:48 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn2/arch.h linuxppc64_2_4/include/asm-ia64/sn/sn2/arch.h
--- ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn2/arch.h	Fri Apr 19 11:00:01 2002
+++ linuxppc64_2_4/include/asm-ia64/sn/sn2/arch.h	Mon Apr 22 13:25:49 2002
@@ -1,4 +1,4 @@
-/* $Id: arch.h,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: arch.h,v 1.1 2002/04/22 18:25:49 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn2/intr.h linuxppc64_2_4/include/asm-ia64/sn/sn2/intr.h
--- ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn2/intr.h	Fri Apr 19 11:00:01 2002
+++ linuxppc64_2_4/include/asm-ia64/sn/sn2/intr.h	Mon Apr 22 13:25:49 2002
@@ -1,4 +1,4 @@
-/* $Id: intr.h,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: intr.h,v 1.1 2002/04/22 18:25:49 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn2/shubio.h linuxppc64_2_4/include/asm-ia64/sn/sn2/shubio.h
--- ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn2/shubio.h	Fri Apr 19 11:00:02 2002
+++ linuxppc64_2_4/include/asm-ia64/sn/sn2/shubio.h	Mon Apr 22 13:25:49 2002
@@ -1,4 +1,4 @@
-/* $Id: shubio.h,v 1.1 2002/02/28 17:31:25 marcelo Exp $
+/* $Id: shubio.h,v 1.1 2002/04/22 18:25:49 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn2/sn_private.h linuxppc64_2_4/include/asm-ia64/sn/sn2/sn_private.h
--- ../kernel.org/linux-2.4.19/include/asm-ia64/sn/sn2/sn_private.h	Fri Apr 19 11:00:02 2002
+++ linuxppc64_2_4/include/asm-ia64/sn/sn2/sn_private.h	Mon Apr 22 13:25:49 2002
@@ -1,4 +1,4 @@
-/* $Id: sn_private.h,v 1.1 2002/02/28 17:31:26 marcelo Exp $
+/* $Id: sn_private.h,v 1.1 2002/04/22 18:25:49 tgall Exp $
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-mips/jmr3927/ds1742rtc.h linuxppc64_2_4/include/asm-mips/jmr3927/ds1742rtc.h
--- ../kernel.org/linux-2.4.19/include/asm-mips/jmr3927/ds1742rtc.h	Fri Apr 19 11:00:03 2002
+++ linuxppc64_2_4/include/asm-mips/jmr3927/ds1742rtc.h	Mon Apr 22 13:25:51 2002
@@ -1,4 +1,4 @@
-/* $Id: ds1742rtc.h,v 1.1 2002/02/27 18:57:58 marcelo Exp $
+/* $Id: ds1742rtc.h,v 1.1 2002/04/22 18:25:51 tgall Exp $
  *
  * ds1742rtc.h - register definitions for the Real-Time-Clock / CMOS RAM
  *
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-mips64/time.h linuxppc64_2_4/include/asm-mips64/time.h
--- ../kernel.org/linux-2.4.19/include/asm-mips64/time.h	Fri Apr 19 11:00:04 2002
+++ linuxppc64_2_4/include/asm-mips64/time.h	Wed Dec 31 18:00:00 1969
@@ -1,78 +0,0 @@
-/*
- * Copyright 2001 MontaVista Software Inc.
- * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
- *
- * include/asm-mips/time.h
- *     header file for the new style time.c file and time services.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- */
-
-/*
- * Please refer to Documentation/MIPS/time.README.
- */
-
-#ifndef _ASM_TIME_H
-#define _ASM_TIME_H
-
-#include <linux/ptrace.h>               /* for struct pt_regs */
-#include <linux/linkage.h>              /* for asmlinkage */
-#include <linux/rtc.h>                  /* for struct rtc_time */
-
-/* 
- * RTC ops.  By default, they point a no-RTC functions.
- *	rtc_get_time - mktime(year, mon, day, hour, min, sec) in seconds.
- *	rtc_set_time - reverse the above translation and set time to RTC.
- */
-extern unsigned long (*rtc_get_time)(void);
-extern int (*rtc_set_time)(unsigned long);
-
-/*
- * to_tm() converts system time back to (year, mon, day, hour, min, sec).
- * It is intended to help implement rtc_set_time() functions.
- * Copied from PPC implementation.
- */
-extern void to_tm(unsigned long tim, struct rtc_time * tm);
-
-/*
- * do_gettimeoffset(). By default, this func pointer points to 
- * do_null_gettimeoffset(), which leads to the same resolution as HZ.
- * Higher resolution versions are vailable, which gives ~1us resolution.
- */
-extern unsigned long (*do_gettimeoffset)(void);
-
-extern unsigned long null_gettimeoffset(void);
-extern unsigned long fixed_rate_gettimeoffset(void);
-extern unsigned long calibrate_div32_gettimeoffset(void);
-extern unsigned long calibrate_div64_gettimeoffset(void);
-
-/*
- * high-level timer interrupt routines.
- */
-extern void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs);
-
-/*
- * the corresponding low-level timer interrupt routine.
- */
-asmlinkage void ll_timer_interrupt(int irq, struct pt_regs *regs);
-
-/*
- * board specific routines required by time_init().
- * board_time_init is defaulted to NULL and can remains so.
- * board_timer_setup must be setup properly in machine setup routine.
- */
-struct irqaction;
-extern void (*board_time_init)(void);
-extern void (*board_timer_setup)(struct irqaction *irq);
-
-/*
- * mips_counter_frequency - must be set if you intend to use
- * counter as timer interrupt source or use fixed_rate_gettimeoffset.
- */
-extern unsigned int mips_counter_frequency;
-
-#endif /* _ASM_TIME_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/Naca.h linuxppc64_2_4/include/asm-ppc64/Naca.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/Naca.h	Fri Apr 19 11:00:38 2002
+++ linuxppc64_2_4/include/asm-ppc64/Naca.h	Wed Dec 31 18:00:00 1969
@@ -1,39 +0,0 @@
-#ifndef _NACA_H
-#define _NACA_H
-
-/* 
- * c 2001 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/types.h>
-
-struct Naca
-{
-	void *xItVpdAreas;
-	void *xRamDisk;
-	u64 xRamDiskSize;		/* In pages */
-	struct Paca *paca;		/* Ptr to an array of pacas */
-	u64 debug_switch;		/* Bits to control debug printing */
-	u16 processorCount;		/* # of physical processors */
-	u16 dCacheL1LineSize;		/* Line size of L1 DCache in bytes */
-	u16 dCacheL1LogLineSize;	/* Log-2 of DCache line size */
-	u16 dCacheL1LinesPerPage;	/* DCache lines per page */
-	u16 iCacheL1LineSize;		/* Line size of L1 ICache in bytes */
-	u16 iCacheL1LogLineSize;	/* Log-2 of ICache line size */
-	u16 iCacheL1LinesPerPage;	/* ICache lines per page */
-	u16 slb_size;			/* SLB size in entries */
-	u64 physicalMemorySize;		/* Size of real memory in bytes */
-	u64 pftSize;			/* Log base 2 of page table size */
-	u64 serialPortAddr;		/* Phyical address of serial port */
-	u8 interrupt_controller;	/* Type of interrupt controller */ 
-	u8 resv0[6];			/* Padding */
-};
-
-extern struct Naca *naca;
-
-#endif /* _NACA_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/Paca.h linuxppc64_2_4/include/asm-ppc64/Paca.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/Paca.h	Fri Apr 19 11:00:38 2002
+++ linuxppc64_2_4/include/asm-ppc64/Paca.h	Wed Dec 31 18:00:00 1969
@@ -1,164 +0,0 @@
-#ifndef _PPC64_PACA_H
-#define _PPC64_PACA_H
-
-/*============================================================================
- *                                                         Header File Id
- * Name______________:	Paca.H
- *
- * Description_______:
- *
- * This control block defines the PACA which defines the processor 
- * specific data for each logical processor on the system.  
- * There are some pointers defined that are utilized by PLIC.
- *
- * C 2001 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */    
-#include	<asm/types.h>
-
-#define N_EXC_STACK    2
-
-/*-----------------------------------------------------------------------------
- * Other Includes
- *-----------------------------------------------------------------------------
- */
-#include	<asm/iSeries/ItLpPaca.h>
-#include	<asm/iSeries/ItLpRegSave.h>
-#include	<asm/iSeries/ItLpQueue.h>
-#include	<asm/rtas.h>
-#include	<asm/mmu.h>
-#include	<asm/processor.h>
-
-/* A Paca entry is required for each logical processor.  On systems
- * that support hardware multi-threading, this is equal to twice the
- * number of physical processors.  On LPAR systems, we are required
- * to have space for the maximum number of logical processors we
- * could ever possibly have.  Currently, we are limited to allocating
- * 24 processors to a partition which gives 48 logical processors on
- * an HMT box.  Therefore, we reserve this many Paca entries.
- */
-#define maxProcessors 24
-#define maxPacas maxProcessors * 2
-
-extern struct Paca   xPaca[];
-#define get_paca() ((struct Paca *)mfspr(SPRG3))
-
-/*============================================================================
- * Name_______:	Paca
- *
- * Description:
- *
- *	Defines the layout of the Paca.  
- *
- *	This structure is not directly accessed by PLIC or the SP except
- *	for the first two pointers that point to the ItLpPaca area and the
- *	ItLpRegSave area for this processor.  Both the ItLpPaca and
- *	ItLpRegSave objects are currently contained within the
- *	PACA but they do not need to be.
- *
- *============================================================================
- */
-struct Paca
-{
-/*=====================================================================================
- * CACHE_LINE_1 0x0000 - 0x007F
- *=====================================================================================
- */
-	struct ItLpPaca *xLpPacaPtr;	/* Pointer to LpPaca for PLIC		0x00 */
-	struct ItLpRegSave *xLpRegSavePtr; /* Pointer to LpRegSave for PLIC	0x08 */
-	u64 xCurrent;  		        /* Pointer to current			0x10 */
-	u16 xPacaIndex;			/* Logical processor number		0x18 */
-	u16 xHwProcNum;			/* Actual Hardware Processor Number	0x1a */
-	u32 default_decr;		/* Default decrementer value		0x1c */	
-	u64 xHrdIntStack;		/* Stack for hardware interrupts	0x20 */
-	u64 xKsave;			/* Saved Kernel stack addr or zero	0x28 */
-	u64 pvr;			/* Processor version register		0x30 */
-	u8 *exception_sp;		/*					0x38 */
-
-	struct ItLpQueue *lpQueuePtr;	/* LpQueue handled by this processor    0x40 */
-	u64  xTOC;			/* Kernel TOC address			0x48 */
-	STAB xStab_data;		/* Segment table information		0x50,0x58,0x60 */
-	u8 xSegments[STAB_CACHE_SIZE];	/* Cache of used stab entries		0x68,0x70 */
-	u8 xProcEnabled;		/* 1=soft enabled			0x78 */
-	u8 xHrdIntCount;		/* Count of active hardware interrupts  0x79  */
-	u8 prof_enabled;		/* 1=iSeries profiling enabled          0x7A */
-	u8 resv1[5];			/*					0x7B-0x7F */
-
-/*=====================================================================================
- * CACHE_LINE_2 0x0080 - 0x00FF
- *=====================================================================================
- */
-	u64 *pgd_cache;			/*					0x00 */
-	u64 *pmd_cache;			/*					0x08 */
-	u64 *pte_cache;			/*					0x10 */
-	u64 pgtable_cache_sz;		/*					0x18 */
-	u64 next_jiffy_update_tb;	/* TB value for next jiffy update	0x20 */
-	u32 lpEvent_count;		/* lpEvents processed			0x28 */
-	u32 prof_multiplier;		/*					0x2C */
-	u32 prof_counter;		/*					0x30 */
-	u32 prof_shift;			/* iSeries shift for profile bucket size0x34 */
-	u32 *prof_buffer;		/* iSeries profiling buffer		0x38 */
-	u32 *prof_stext;		/* iSeries start of kernel text		0x40 */
-	u32 prof_len;			/* iSeries length of profile buffer -1	0x48 */
-	u8  rsvd2[128-76];		/*					0x4C */
-
-/*=====================================================================================
- * CACHE_LINE_3 0x0100 - 0x017F
- *=====================================================================================
- */
-	u8		xProcStart;	/* At startup, processor spins until	0x100 */
-  					/* xProcStart becomes non-zero. */
-	u8		rsvd3[127];
-
-/*=====================================================================================
- * CACHE_LINE_4-8  0x0180 - 0x03FF Contains ItLpPaca
- *=====================================================================================
- */
-	struct ItLpPaca xLpPaca;	/* Space for ItLpPaca */
-
-/*=====================================================================================
- * CACHE_LINE_9-16 0x0400 - 0x07FF Contains ItLpRegSave
- *=====================================================================================
- */
-	struct ItLpRegSave xRegSav;	/* Register save for proc */
-
-/*=====================================================================================
- * CACHE_LINE_17-18 0x0800 - 0x0EFF Reserved
- *=====================================================================================
- */
-	struct rtas_args xRtas;		/* Per processor RTAS struct */
-	u64 xR1;			/* r1 save for RTAS calls */
-	u64 xSavedMsr;			/* Old msr saved here by HvCall */
-	u8 rsvd5[256-16-sizeof(struct rtas_args)];
-
-/*=====================================================================================
- * CACHE_LINE_19-30 0x0800 - 0x0EFF Reserved
- *=====================================================================================
- */
-	u8 rsvd6[0x600];
-
-/*=====================================================================================
- * CACHE_LINE_31 0x0F00 - 0x0F7F Exception stack
- *=====================================================================================
- */
-	u8 exception_stack[N_EXC_STACK*EXC_FRAME_SIZE];
-
-/*=====================================================================================
- * CACHE_LINE_32 0x0F80 - 0x0FFF Reserved
- *=====================================================================================
- */
-	u8 rsvd7[0x80];                  /* Give the stack some rope ... */
-
-/*=====================================================================================
- * Page 2 Reserved for guard page.  Also used as a stack early in SMP boots before
- *        relocation is enabled.
- *=====================================================================================
- */
-	u8 guard[0x1000];               /* ... and then hang 'em         */ 
-};
-
-#endif /* _PPC64_PACA_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/abs_addr.h linuxppc64_2_4/include/asm-ppc64/abs_addr.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/abs_addr.h	Fri Apr 19 11:00:38 2002
+++ linuxppc64_2_4/include/asm-ppc64/abs_addr.h	Mon Mar 11 13:52:27 2002
@@ -103,7 +103,7 @@
 #define physRpn_to_absRpn(rpn) (rpn)
 #define absolute_to_phys(aa) (aa)
 
-#endif /* CONFIG_MSCHUNKS */
+#endif /* !CONFIG_MSCHUNKS */
 
 
 static inline unsigned long
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/eeh.h linuxppc64_2_4/include/asm-ppc64/eeh.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/eeh.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/eeh.h	Fri Mar 29 10:18:26 2002
@@ -37,11 +37,13 @@
 #define EEH_STATE_OVERRIDE 1   /* IOA does not require eeh traps */
 #define EEH_STATE_FAILURE  16  /* */
 
-/* This is for profiling only and should be removed */
-extern unsigned long eeh_total_mmio_reads;
+/* This is for profiling only */
 extern unsigned long eeh_total_mmio_ffs;
 
+extern int eeh_implemented;
+
 void eeh_init(void);
+static inline int is_eeh_implemented(void) { return eeh_implemented; }
 int eeh_get_state(unsigned long ea);
 unsigned long eeh_check_failure(void *token, unsigned long val);
 
@@ -83,7 +85,7 @@
  */
 /* #define EEH_POSSIBLE_ERROR(addr, vaddr, val) ((vaddr) != (addr) && ~(val) == 0 && !IS_EEH_TOKEN_DISABLED(addr)) */
 /* This version is rearranged to collect some profiling data */
-#define EEH_POSSIBLE_ERROR(addr, vaddr, val) (++eeh_total_mmio_reads, (~(val) == 0 && (++eeh_total_mmio_ffs, (vaddr) != (addr) && !IS_EEH_TOKEN_DISABLED(addr))))
+#define EEH_POSSIBLE_ERROR(addr, vaddr, val) (~(val) == 0 && (++eeh_total_mmio_ffs, (vaddr) != (addr) && !IS_EEH_TOKEN_DISABLED(addr)))
 
 /* 
  * MMIO read/write operations with EEH support.
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/fcntl.h linuxppc64_2_4/include/asm-ppc64/fcntl.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/fcntl.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/fcntl.h	Tue Apr  2 15:52:43 2002
@@ -87,17 +87,6 @@
 	pid_t l_pid;
 };
 
-#ifdef __KERNEL__
-struct flock32 {
-	short l_type;
-	short l_whence;
-	__kernel_off_t32 l_start;
-	__kernel_off_t32 l_len;
-	__kernel_pid_t32 l_pid;
-	short __unused;
-};
-#endif
-
 struct flock64 {
 	short  l_type;
 	short  l_whence;
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/iSeries/HvCall.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvCall.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/iSeries/HvCall.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvCall.h	Tue Apr  9 11:16:35 2002
@@ -35,7 +35,7 @@
 #include <asm/iSeries/HvTypes.h>
 #endif
 
-#include <asm/Paca.h>
+#include <asm/paca.h>
 
 //-------------------------------------------------------------------
 // Constants
@@ -133,10 +133,9 @@
 //=====================================================================================
 static inline void		HvCall_setVirtualDecr(void)
 {
-	// Ignore any error return codes - most likely means that the target value for the
-	// LP has been increased and this vary off would bring us below the new target.
+	/* Ignore any error return codes - most likely means that the target value for the
+	 * LP has been increased and this vary off would bring us below the new target. */
 	HvCall0(HvCallBaseSetVirtualDecr);
-	// getPaca()->adjustHmtForNoOfSpinLocksHeld();
 }
 //=====================================================================
 static inline void		HvCall_yieldProcessor(unsigned typeOfYield, u64 yieldParm)
@@ -147,21 +146,18 @@
 static inline void		HvCall_setEnabledInterrupts(u64 enabledInterrupts)
 {
 	HvCall1(HvCallBaseSetEnabledInterrupts,enabledInterrupts);
-	// getPaca()->adjustHmtForNoOfSpinLocksHeld();
 }
 
 //=====================================================================
 static inline void		HvCall_clearLogBuffer(HvLpIndex lpindex)
 {
 	HvCall1(HvCallBaseClearLogBuffer,lpindex);
-	// getPaca()->adjustHmtForNoOfSpinLocksHeld();
 }
 
 //=====================================================================
 static inline u32  		HvCall_getLogBufferCodePage(HvLpIndex lpindex)
 {
 	u32 retVal = HvCall1(HvCallBaseGetLogBufferCodePage,lpindex);
-	// getPaca()->adjustHmtForNoOfSpinLocksHeld();
 	return retVal;
 }
 
@@ -169,7 +165,6 @@
 static inline int  		HvCall_getLogBufferFormat(HvLpIndex lpindex)
 {
 	int retVal = HvCall1(HvCallBaseGetLogBufferFormat,lpindex);
-	// getPaca()->adjustHmtForNoOfSpinLocksHeld();
 	return retVal;
 }
 
@@ -177,7 +172,6 @@
 static inline u32  		HvCall_getLogBufferLength(HvLpIndex lpindex)
 {
 	u32 retVal = HvCall1(HvCallBaseGetLogBufferLength,lpindex);
-	// getPaca()->adjustHmtForNoOfSpinLocksHeld();
 	return retVal;
 }
 
@@ -185,7 +179,6 @@
 static inline void  		HvCall_setLogBufferFormatAndCodepage(int format, u32 codePage)
 {
 	HvCall2(HvCallBaseSetLogBufferFormatAndCodePage,format, codePage);
-	// getPaca()->adjustHmtForNoOfSpinLocksHeld();
 }
 
 //=====================================================================
@@ -193,7 +186,7 @@
 void HvCall_writeLogBuffer(const void *buffer, u64 bufLen);
 
 //=====================================================================
-static inline void		HvCall_sendIPI(struct Paca * targetPaca)
+static inline void		HvCall_sendIPI(struct paca_struct * targetPaca)
 {
 	HvCall1( HvCallBaseSendIPI, targetPaca->xPacaIndex );
 }
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/iSeries/HvReleaseData.h linuxppc64_2_4/include/asm-ppc64/iSeries/HvReleaseData.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/iSeries/HvReleaseData.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/iSeries/HvReleaseData.h	Fri Apr 19 13:53:07 2002
@@ -52,7 +52,7 @@
 	u32	xDesc;			// Descriptor	"HvRD" ebcdic	x00-x03
 	u16	xSize;			// Size of this control block	x04-x05
 	u16	xVpdAreasPtrOffset;	// Offset in NACA of ItVpdAreas	x06-x07
-	struct Naca * xSlicNacaAddr;	// Virtual address of SLIC NACA x08-x0F
+	struct  naca_struct * xSlicNacaAddr; // Virt addr of SLIC NACA  x08-x0F
 	u32	xMsNucDataOffset;	// Offset of Linux Mapping Data x10-x13
 	u32	xRsvd1;			// Reserved			x14-x17
 	u16	xTagsMode:1;		// 0 == tags active, 1 == tags inactive
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/iSeries/LparData.h linuxppc64_2_4/include/asm-ppc64/iSeries/LparData.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/iSeries/LparData.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/iSeries/LparData.h	Tue Apr  9 11:16:35 2002
@@ -28,11 +28,11 @@
 #include <asm/page.h>
 #include <asm/abs_addr.h>
 
-#include <asm/Naca.h>
+#include <asm/naca.h>
 #include <asm/iSeries/ItLpNaca.h>
 #include <asm/iSeries/ItLpPaca.h>
 #include <asm/iSeries/ItLpRegSave.h>
-#include <asm/Paca.h>
+#include <asm/paca.h>
 #include <asm/iSeries/HvReleaseData.h>
 #include <asm/iSeries/LparMap.h>
 #include <asm/iSeries/ItVpdAreas.h>
@@ -42,8 +42,6 @@
 #include <asm/page.h>
 
 extern struct LparMap	xLparMap;
-extern struct Naca	xNaca;
-extern struct Paca	xPaca[];
 extern struct HvReleaseData hvReleaseData;
 extern struct ItLpNaca	itLpNaca;
 extern struct ItIplParmsReal xItIplParmsReal;
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/io.h linuxppc64_2_4/include/asm-ppc64/io.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/io.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/io.h	Tue Apr  2 16:37:11 2002
@@ -51,7 +51,6 @@
 #define outl(data,addr)		writel(data,((unsigned long)(addr)))
 #else
 #define IS_MAPPED_VADDR(port)	((unsigned long)(port) >> 60UL)
-#ifdef CONFIG_PPC_EEH
 #define readb(addr)		eeh_readb((void*)(addr))  
 #define readw(addr)		eeh_readw((void*)(addr))  
 #define readl(addr)		eeh_readl((void*)(addr))
@@ -61,17 +60,6 @@
 #define memset_io(a,b,c)	eeh_memset((void *)(a),(b),(c))
 #define memcpy_fromio(a,b,c)	eeh_memcpy_fromio((a),(void *)(b),(c))
 #define memcpy_toio(a,b,c)	eeh_memcpy_toio((void *)(a),(b),(c))
-#else
-#define readb(addr)		in_8((volatile u8 *)(addr))
-#define writeb(b,addr)		out_8((volatile u8 *)(addr), (b))
-#define readw(addr)		in_le16((volatile u16 *)(addr))
-#define readl(addr)		in_le32((volatile u32 *)(addr))
-#define writew(b,addr)		out_le16((volatile u16 *)(addr),(b))
-#define writel(b,addr)		out_le32((volatile u32 *)(addr),(b))
-#define memset_io(a,b,c)	memset((void *)(a),(b),(c))
-#define memcpy_fromio(a,b,c)	memcpy((a),(void *)(b),(c))
-#define memcpy_toio(a,b,c)	memcpy((void *)(a),(b),(c))
-#endif
 #define inb(port)		_inb((unsigned long)port)
 #define outb(val, port)		_outb(val, (unsigned long)port)
 #define inw(port)		_inw((unsigned long)port)
@@ -180,25 +168,28 @@
 
 /*
  * 8, 16 and 32 bit, big and little endian I/O operations, with barrier.
+ * Until we can validate all required device drivers are weakc safe, an
+ * excess of syncs before the MMIO operations will make things work.  On 
+ * sstar, sync time is << than mmio time, so this should not be a big impact.
  */
 extern inline int in_8(volatile unsigned char *addr)
 {
 	int ret;
 
-	__asm__ __volatile__("eieio; lbz%U1%X1 %0,%1" : "=r" (ret) : "m" (*addr));
+	__asm__ __volatile__("sync; lbz%U1%X1 %0,%1; sync" : "=r" (ret) : "m" (*addr));
 	return ret;
 }
 
 extern inline void out_8(volatile unsigned char *addr, int val)
 {
-	__asm__ __volatile__("stb%U0%X0 %1,%0" : "=m" (*addr) : "r" (val));
+	__asm__ __volatile__("sync; stb%U0%X0 %1,%0; sync" : "=m" (*addr) : "r" (val));
 }
 
 extern inline int in_le16(volatile unsigned short *addr)
 {
 	int ret;
 
-	__asm__ __volatile__("eieio; lhbrx %0,0,%1" : "=r" (ret) :
+	__asm__ __volatile__("sync; lhbrx %0,0,%1; sync" : "=r" (ret) :
 			      "r" (addr), "m" (*addr));
 	return ret;
 }
@@ -207,26 +198,26 @@
 {
 	int ret;
 
-	__asm__ __volatile__("eieio; lhz%U1%X1 %0,%1" : "=r" (ret) : "m" (*addr));
+	__asm__ __volatile__("sync; lhz%U1%X1 %0,%1; sync" : "=r" (ret) : "m" (*addr));
 	return ret;
 }
 
 extern inline void out_le16(volatile unsigned short *addr, int val)
 {
-	__asm__ __volatile__("sthbrx %1,0,%2" : "=m" (*addr) :
+	__asm__ __volatile__("sync; sthbrx %1,0,%2; sync" : "=m" (*addr) :
 			      "r" (val), "r" (addr));
 }
 
 extern inline void out_be16(volatile unsigned short *addr, int val)
 {
-	__asm__ __volatile__("sth%U0%X0 %1,%0" : "=m" (*addr) : "r" (val));
+	__asm__ __volatile__("sync; sth%U0%X0 %1,%0; sync" : "=m" (*addr) : "r" (val));
 }
 
 extern inline unsigned in_le32(volatile unsigned *addr)
 {
 	unsigned ret;
 
-	__asm__ __volatile__("eieio; lwbrx %0,0,%1" : "=r" (ret) :
+	__asm__ __volatile__("sync; lwbrx %0,0,%1; sync" : "=r" (ret) :
 			     "r" (addr), "m" (*addr));
 	return ret;
 }
@@ -235,26 +226,24 @@
 {
 	unsigned ret;
 
-	__asm__ __volatile__("eieio; lwz%U1%X1 %0,%1" : "=r" (ret) : "m" (*addr));
+	__asm__ __volatile__("sync; lwz%U1%X1 %0,%1; sync" : "=r" (ret) : "m" (*addr));
 	return ret;
 }
 
 extern inline void out_le32(volatile unsigned *addr, int val)
 {
-	__asm__ __volatile__("stwbrx %1,0,%2" : "=m" (*addr) :
+	__asm__ __volatile__("sync; stwbrx %1,0,%2; sync" : "=m" (*addr) :
 			     "r" (val), "r" (addr));
 }
 
 extern inline void out_be32(volatile unsigned *addr, int val)
 {
-	__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m" (*addr) : "r" (val));
+	__asm__ __volatile__("sync; stw%U0%X0 %1,%0; sync" : "=m" (*addr) : "r" (val));
 }
 
-#ifdef CONFIG_PPC_EEH
+#ifndef CONFIG_PPC_ISERIES 
 #include <asm/eeh.h>
-#endif
 
-#ifndef CONFIG_PPC_ISERIES 
 static inline u8 _inb(unsigned long port) {
 	if (IS_MAPPED_VADDR(port))
 		return readb((void *)port);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/linux_logo.h linuxppc64_2_4/include/asm-ppc64/linux_logo.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/linux_logo.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/linux_logo.h	Mon Apr  8 09:39:12 2002
@@ -22,25 +22,5 @@
 
 #define LINUX_LOGO_HEIGHT	80
 #define LINUX_LOGO_WIDTH	80
-#define LINUX_LOGO_COLORS	214
 
-#ifdef INCLUDE_LINUX_LOGO_DATA
-
-#define INCLUDE_LINUX_LOGOBW
-#define INCLUDE_LINUX_LOGO16
 #include <linux/linux_logo.h>
-
-#else
-
-/* prototypes only */
-extern unsigned char linux_logo_red[];
-extern unsigned char linux_logo_green[];
-extern unsigned char linux_logo_blue[];
-extern unsigned char linux_logo[];
-extern unsigned char linux_logo_bw[];
-extern unsigned char linux_logo16_red[];
-extern unsigned char linux_logo16_green[];
-extern unsigned char linux_logo16_blue[];
-extern unsigned char linux_logo16[];
-
-#endif
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/lmb.h linuxppc64_2_4/include/asm-ppc64/lmb.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/lmb.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/lmb.h	Mon Mar 25 07:18:41 2002
@@ -17,7 +17,7 @@
 
 extern unsigned long reloc_offset(void);
 
-#define MAX_LMB_REGIONS 32
+#define MAX_LMB_REGIONS 64
 
 union lmb_reg_property { 
 	struct reg_property32 addr32[MAX_LMB_REGIONS];
@@ -53,7 +53,9 @@
 extern void lmb_init(void);
 extern void lmb_analyze(void);
 extern long lmb_add(unsigned long, unsigned long);
+#ifdef CONFIG_MSCHUNKS
 extern long lmb_add_io(unsigned long base, unsigned long size);
+#endif /* CONFIG_MSCHUNKS */
 extern long lmb_reserve(unsigned long, unsigned long);
 extern unsigned long lmb_alloc(unsigned long, unsigned long);
 extern unsigned long lmb_phys_mem_size(void);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/machdep.h linuxppc64_2_4/include/asm-ppc64/machdep.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/machdep.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/machdep.h	Fri Apr 19 13:37:38 2002
@@ -48,10 +48,8 @@
 				     long tcenum,
 				     unsigned long uaddr,
 				     int direction);
-	void		(*tce_free)(struct TceTable *tbl,
-				    dma_addr_t dma_addr, 
-				    unsigned order,
-				    unsigned numPages);
+	void		(*tce_free_one)(struct TceTable *tbl,
+				        long tcenum);    
 
 	void		(*smp_message_pass)(int target,
 					    int msg, 
@@ -147,18 +145,6 @@
 extern char cmd_line[512];
 
 extern void setup_pci_ptrs(void);
-
-/*
- * Power macintoshes have either a CUDA or a PMU controlling
- * system reset, power, NVRAM, RTC.
- */
-typedef enum sys_ctrler_kind {
-	SYS_CTRLER_UNKNOWN = 0,
-	SYS_CTRLER_CUDA = 1,
-	SYS_CTRLER_PMU = 2,
-} sys_ctrler_t;
-
-extern sys_ctrler_t sys_ctrler;
 
 #endif /* _PPC_MACHDEP_H */
 #endif /* __KERNEL__ */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/mmu.h linuxppc64_2_4/include/asm-ppc64/mmu.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/mmu.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/mmu.h	Tue Apr  9 11:31:08 2002
@@ -22,7 +22,7 @@
  * Define the size of the cache used for segment table entries.  The first
  * entry is used as a cache pointer, therefore the actual number of entries
  * stored is one less than defined here.  Do not change this value without
- * considering the impact it will have on the layout of the paca in Paca.h.
+ * considering the impact it will have on the layout of the paca in paca.h.
  */
 #define STAB_CACHE_SIZE 16
 
@@ -90,7 +90,7 @@
 } SLBE;
 
 /*
- * This structure is used in Paca.h where the layout depends on the 
+ * This structure is used in paca.h where the layout depends on the 
  * size being 24B.
  */
 typedef struct {
@@ -108,7 +108,7 @@
 	unsigned long :     2; /* Software use */
 	unsigned long bolted: 1; /* HPTE is "bolted" */
 	unsigned long :     1; /* Software use */
-  unsigned long l:    1; /* Virtual page is large (L=1) or 4 KB (L=0) */
+	unsigned long l:    1; /* Virtual page is large (L=1) or 4 KB (L=0) */
 	unsigned long h:    1; /* Hash function identifier */
 	unsigned long v:    1; /* Valid (v=1) or invalid (v=0) */
 } Hpte_dword0;
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/naca.h linuxppc64_2_4/include/asm-ppc64/naca.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/naca.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/include/asm-ppc64/naca.h	Thu Apr 18 09:35:37 2002
@@ -0,0 +1,40 @@
+#ifndef _NACA_H
+#define _NACA_H
+
+/* 
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/types.h>
+
+struct naca_struct {
+	void *xItVpdAreas;
+	void *xRamDisk;
+	u64 xRamDiskSize;		/* In pages */
+	struct paca_struct *paca;	/* Ptr to an array of pacas */
+	u64 debug_switch;		/* Bits to control debug printing */
+	u16 processorCount;		/* # of physical processors */
+	u16 dCacheL1LineSize;		/* Line size of L1 DCache in bytes */
+	u16 dCacheL1LogLineSize;	/* Log-2 of DCache line size */
+	u16 dCacheL1LinesPerPage;	/* DCache lines per page */
+	u16 iCacheL1LineSize;		/* Line size of L1 ICache in bytes */
+	u16 iCacheL1LogLineSize;	/* Log-2 of ICache line size */
+	u16 iCacheL1LinesPerPage;	/* ICache lines per page */
+	u16 slb_size;			/* SLB size in entries */
+	u64 physicalMemorySize;		/* Size of real memory in bytes */
+	u64 pftSize;			/* Log base 2 of page table size */
+	u64 serialPortAddr;		/* Phyical address of serial port */
+	u8 interrupt_controller;	/* Type of interrupt controller */ 
+	u8 resv0;    			/* Type of interrupt controller */
+	u16 platform;			/* Platform flags */
+	u8 resv1[12];			/* Padding */
+};
+
+extern struct naca_struct *naca;
+
+#endif /* _NACA_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/nvram.h linuxppc64_2_4/include/asm-ppc64/nvram.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/nvram.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/nvram.h	Fri Mar  1 13:28:29 2002
@@ -36,33 +36,4 @@
 #define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
 #endif
 
-/* PowerMac specific nvram stuffs */
-
-enum {
-	pmac_nvram_OF,		/* Open Firmware partition */
-	pmac_nvram_XPRAM,	/* MacOS XPRAM partition */
-	pmac_nvram_NR		/* MacOS Name Registry partition */
-};
-
-/* Return partition offset in nvram */
-extern int	pmac_get_partition(int partition);
-
-/* Direct access to XPRAM */
-extern u8	pmac_xpram_read(int xpaddr);
-extern void	pmac_xpram_write(int xpaddr, u8 data);
-
-/* Some offsets in XPRAM */
-#define PMAC_XPRAM_MACHINE_LOC	0xe4
-#define PMAC_XPRAM_SOUND_VOLUME	0x08
-
-/* Machine location structure in XPRAM */
-struct pmac_machine_location {
-	u32	latitude;	/* 2+30 bit Fractional number */
-	u32	longitude;	/* 2+30 bit Fractional number */
-	u32	delta;		/* mix of GMT delta and DLS */
-};
-
-/* /dev/nvram ioctls */
-#define PMAC_NVRAM_GET_OFFSET   _IOWR('p', 0x40, int) /* Get NVRAM partition offset */
-
 #endif /* _PPC64_NVRAM_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/paca.h linuxppc64_2_4/include/asm-ppc64/paca.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/paca.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/include/asm-ppc64/paca.h	Mon Apr 15 21:07:30 2002
@@ -0,0 +1,163 @@
+#ifndef _PPC64_PACA_H
+#define _PPC64_PACA_H
+
+/*============================================================================
+ *                                                         Header File Id
+ * Name______________:	paca.h
+ *
+ * Description_______:
+ *
+ * This control block defines the PACA which defines the processor 
+ * specific data for each logical processor on the system.  
+ * There are some pointers defined that are utilized by PLIC.
+ *
+ * C 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */    
+#include	<asm/types.h>
+
+#define N_EXC_STACK    2
+
+/*-----------------------------------------------------------------------------
+ * Other Includes
+ *-----------------------------------------------------------------------------
+ */
+#include	<asm/iSeries/ItLpPaca.h>
+#include	<asm/iSeries/ItLpRegSave.h>
+#include	<asm/iSeries/ItLpQueue.h>
+#include	<asm/rtas.h>
+#include	<asm/mmu.h>
+#include	<asm/processor.h>
+
+/* A paca entry is required for each logical processor.  On systems
+ * that support hardware multi-threading, this is equal to twice the
+ * number of physical processors.  On LPAR systems, we are required
+ * to have space for the maximum number of logical processors we
+ * could ever possibly have.  Currently, we are limited to allocating
+ * 24 processors to a partition which gives 48 logical processors on
+ * an HMT box.  Therefore, we reserve this many paca entries.
+ */
+#define MAX_PROCESSORS 24
+#define MAX_PACAS MAX_PROCESSORS * 2
+
+extern struct paca_struct paca[];
+#define get_paca() ((struct paca_struct *)mfspr(SPRG3))
+
+/*============================================================================
+ * Name_______:	paca
+ *
+ * Description:
+ *
+ *	Defines the layout of the paca.  
+ *
+ *	This structure is not directly accessed by PLIC or the SP except
+ *	for the first two pointers that point to the ItLpPaca area and the
+ *	ItLpRegSave area for this processor.  Both the ItLpPaca and
+ *	ItLpRegSave objects are currently contained within the
+ *	PACA but they do not need to be.
+ *
+ *============================================================================
+ */
+struct paca_struct {
+/*=====================================================================================
+ * CACHE_LINE_1 0x0000 - 0x007F
+ *=====================================================================================
+ */
+	struct ItLpPaca *xLpPacaPtr;	/* Pointer to LpPaca for PLIC		0x00 */
+	struct ItLpRegSave *xLpRegSavePtr; /* Pointer to LpRegSave for PLIC	0x08 */
+	u64 xCurrent;  		        /* Pointer to current			0x10 */
+	u16 xPacaIndex;			/* Logical processor number		0x18 */
+	u16 xHwProcNum;			/* Actual Hardware Processor Number	0x1a */
+	u32 default_decr;		/* Default decrementer value		0x1c */	
+	u64 xHrdIntStack;		/* Stack for hardware interrupts	0x20 */
+	u64 xKsave;			/* Saved Kernel stack addr or zero	0x28 */
+	u64 pvr;			/* Processor version register		0x30 */
+	u8 *exception_sp;		/*					0x38 */
+
+	struct ItLpQueue *lpQueuePtr;	/* LpQueue handled by this processor    0x40 */
+	u64  xTOC;			/* Kernel TOC address			0x48 */
+	STAB xStab_data;		/* Segment table information		0x50,0x58,0x60 */
+	u8 xSegments[STAB_CACHE_SIZE];	/* Cache of used stab entries		0x68,0x70 */
+	u8 xProcEnabled;		/* 1=soft enabled			0x78 */
+	u8 xHrdIntCount;		/* Count of active hardware interrupts  0x79  */
+	u8 prof_enabled;		/* 1=iSeries profiling enabled          0x7A */
+	u8 resv1[5];			/*					0x7B-0x7F */
+
+/*=====================================================================================
+ * CACHE_LINE_2 0x0080 - 0x00FF
+ *=====================================================================================
+ */
+	u64 *pgd_cache;			/*					0x00 */
+	u64 *pmd_cache;			/*					0x08 */
+	u64 *pte_cache;			/*					0x10 */
+	u64 pgtable_cache_sz;		/*					0x18 */
+	u64 next_jiffy_update_tb;	/* TB value for next jiffy update	0x20 */
+	u32 lpEvent_count;		/* lpEvents processed			0x28 */
+	u32 prof_multiplier;		/*					0x2C */
+	u32 prof_counter;		/*					0x30 */
+	u32 prof_shift;			/* iSeries shift for profile bucket size0x34 */
+	u32 *prof_buffer;		/* iSeries profiling buffer		0x38 */
+	u32 *prof_stext;		/* iSeries start of kernel text		0x40 */
+	u32 prof_len;			/* iSeries length of profile buffer -1	0x48 */
+	u8  rsvd2[128-76];		/*					0x4C */
+
+/*=====================================================================================
+ * CACHE_LINE_3 0x0100 - 0x017F
+ *=====================================================================================
+ */
+	u8		xProcStart;	/* At startup, processor spins until	0x100 */
+  					/* xProcStart becomes non-zero. */
+	u8		rsvd3[127];
+
+/*=====================================================================================
+ * CACHE_LINE_4-8  0x0180 - 0x03FF Contains ItLpPaca
+ *=====================================================================================
+ */
+	struct ItLpPaca xLpPaca;	/* Space for ItLpPaca */
+
+/*=====================================================================================
+ * CACHE_LINE_9-16 0x0400 - 0x07FF Contains ItLpRegSave
+ *=====================================================================================
+ */
+	struct ItLpRegSave xRegSav;	/* Register save for proc */
+
+/*=====================================================================================
+ * CACHE_LINE_17-18 0x0800 - 0x0EFF Reserved
+ *=====================================================================================
+ */
+	struct rtas_args xRtas;		/* Per processor RTAS struct */
+	u64 xR1;			/* r1 save for RTAS calls */
+	u64 xSavedMsr;			/* Old msr saved here by HvCall */
+	u8 rsvd5[256-16-sizeof(struct rtas_args)];
+
+/*=====================================================================================
+ * CACHE_LINE_19-30 0x0800 - 0x0EFF Reserved
+ *=====================================================================================
+ */
+	u8 rsvd6[0x600];
+
+/*=====================================================================================
+ * CACHE_LINE_31 0x0F00 - 0x0F7F Exception stack
+ *=====================================================================================
+ */
+	u8 exception_stack[N_EXC_STACK*EXC_FRAME_SIZE];
+
+/*=====================================================================================
+ * CACHE_LINE_32 0x0F80 - 0x0FFF Reserved
+ *=====================================================================================
+ */
+	u8 rsvd7[0x80];                  /* Give the stack some rope ... */
+
+/*=====================================================================================
+ * Page 2 Reserved for guard page.  Also used as a stack early in SMP boots before
+ *        relocation is enabled.
+ *=====================================================================================
+ */
+	u8 guard[0x1000];               /* ... and then hang 'em         */ 
+};
+
+#endif /* _PPC64_PACA_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/page.h linuxppc64_2_4/include/asm-ppc64/page.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/page.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/page.h	Wed Apr 17 11:08:40 2002
@@ -31,7 +31,7 @@
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
-#include <asm/Naca.h>
+#include <asm/naca.h>
 
 #define STRICT_MM_TYPECHECKS
 
@@ -42,6 +42,7 @@
 #define REGION_MASK   (((1UL<<REGION_SIZE)-1UL)<<REGION_SHIFT)
 #define REGION_STRIDE (1UL << REGION_SHIFT)
 
+#ifdef ___powerpc64__
 typedef union ppc64_va {
         struct {
                 unsigned long off : OFFSET_SIZE;  /* intra-region offset */
@@ -50,6 +51,7 @@
         unsigned long l;
         void *p;
 } ppc64_va;
+#endif /* ___powerpc64__ */
        
 static __inline__ void clear_page(void *addr)
 {
@@ -130,6 +132,13 @@
 
 #define PAGE_BUG(page) do { BUG(); } while (0)
 
+/*
+ * XXX A bug in the current ppc64 compiler prevents an optimisation
+ * where a divide is replaced by a multiply by shifted inverse. For
+ * the moment use page->virtaul
+ */
+#define WANT_PAGE_VIRTUAL 1
+
 /* Pure 2^n version of get_order */
 extern __inline__ int get_order(unsigned long size)
 {
@@ -179,10 +188,12 @@
 #define KERNELBASE      PAGE_OFFSET
 #define VMALLOCBASE     0xD000000000000000
 #define IOREGIONBASE    0xE000000000000000
+#define BOLTEDBASE      0xB000000000000000
 
 #define IO_REGION_ID       (IOREGIONBASE>>REGION_SHIFT)
 #define VMALLOC_REGION_ID  (VMALLOCBASE>>REGION_SHIFT)
 #define KERNEL_REGION_ID   (KERNELBASE>>REGION_SHIFT)
+#define BOLTED_REGION_ID   (BOLTEDBASE>>REGION_SHIFT)
 #define USER_REGION_ID     (0UL)
 #define REGION_ID(X)	   (((unsigned long)(X))>>REGION_SHIFT)
 
@@ -193,7 +204,7 @@
 #define INVALID_EA_BITS (~(REGION_MASK|VALID_EA_BITS))
 
 #define IS_VALID_REGION_ID(x) \
-        (((x) == USER_REGION_ID) || ((x) >= KERNEL_REGION_ID))
+        (((x) == USER_REGION_ID) || ((x) >= BOLTED_REGION_ID))
 #define IS_VALID_EA(x) \
         ((!((x) & INVALID_EA_BITS)) && IS_VALID_REGION_ID(REGION_ID(x)))
 
@@ -220,6 +231,9 @@
 #define VALID_PAGE(page)    ((page - mem_map) < max_mapnr)
 
 #define MAP_NR(addr)        (__pa(addr) >> PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #endif /* __KERNEL__ */
 #endif /* _PPC64_PAGE_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/parport.h linuxppc64_2_4/include/asm-ppc64/parport.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/parport.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/parport.h	Sun Mar 10 23:37:13 2002
@@ -1,7 +1,4 @@
 /*
- * BK Id: SCCS/s.parport.h 1.5 05/17/01 18:14:25 cort
- */
-/*
  * parport.h: platform-specific PC-style parport initialisation
  *
  * Copyright (C) 1999, 2000  Tim Waugh <tim@cyberelk.demon.co.uk>
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/pci.h linuxppc64_2_4/include/asm-ppc64/pci.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/pci.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/pci.h	Mon Apr  8 09:14:27 2002
@@ -109,6 +109,7 @@
 /* Return the index of the PCI controller for device PDEV. */
 extern int pci_controller_num(struct pci_dev *pdev);
 
+struct vm_area_struct;
 /* Map a range of PCI memory or I/O space for a device into user space */
 int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
 			enum pci_mmap_state mmap_state, int write_combine);
@@ -139,6 +140,12 @@
 
 #define pci_dac_dma_supported(pci_dev, mask)	(0)
 
+/* The PCI address space does equal the physical memory
+ * address space.  The networking and block device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS	(0)
+	
 #endif	/* __KERNEL__ */
 
 #endif /* __PPC64_PCI_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/pgalloc.h linuxppc64_2_4/include/asm-ppc64/pgalloc.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/pgalloc.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/pgalloc.h	Tue Apr  9 11:31:08 2002
@@ -3,8 +3,8 @@
 
 #include <linux/threads.h>
 #include <asm/processor.h>
-#include <asm/Naca.h>
-#include <asm/Paca.h>
+#include <asm/naca.h>
+#include <asm/paca.h>
 
 /*
  * This program is free software; you can redistribute it and/or
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/pgtable.h linuxppc64_2_4/include/asm-ppc64/pgtable.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/pgtable.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/pgtable.h	Wed Apr 17 11:08:40 2002
@@ -1,8 +1,6 @@
 #ifndef _PPC64_PGTABLE_H
 #define _PPC64_PGTABLE_H
 
-#define NEW_HPT 1
-
 /*
  * This file contains the functions and defines necessary to modify and use
  * the ppc64 hashed page table.
@@ -19,7 +17,6 @@
  * hook is made available.
  */
 
-
 /* PMD_SHIFT determines what a second-level page table entry can map */
 #define PMD_SHIFT	(PAGE_SHIFT + PAGE_SHIFT - 3)
 #define PMD_SIZE	(1UL << PMD_SHIFT)
@@ -409,7 +406,10 @@
 #define update_mmu_cache(vma, addr, pte)	do { } while (0)
 
 extern void flush_hash_segments(unsigned low_vsid, unsigned high_vsid);
-extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t pte);
+extern void flush_hash_page(unsigned long context, unsigned long ea, pte_t *ptep);
+extern void build_valid_hpte(unsigned long vsid, unsigned long ea, 
+			     unsigned long pa, pte_t * ptep, 
+			     unsigned hpteflags, unsigned bolted );
 
 /* Encode and de-code a swap entry */
 #define SWP_TYPE(entry)			(((entry).val >> 1) & 0x3f)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/pmc.h linuxppc64_2_4/include/asm-ppc64/pmc.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/pmc.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/pmc.h	Wed Apr 17 11:08:40 2002
@@ -85,6 +85,9 @@
 extern char *ppc64_pmc_htab(int file);
 extern char *ppc64_pmc_hw(int file);
 
+void *btmalloc(unsigned long size);
+void btfree(void *addr);
+
 #if 1
 #define PMC_SW_PROCESSOR(F)      pmc_sw_cpu[smp_processor_id()].F++
 #define PMC_SW_PROCESSOR_A(F, E) (pmc_sw_cpu[smp_processor_id()].F[(E)])++
@@ -109,5 +112,10 @@
 
 #define PMC_CONTROL_CPI 1
 #define PMC_CONTROL_TLB 2
+
+/* To find an entry in the bolted page-table-directory */
+#define pgd_offset_b(address) (bolted_pgd + pgd_index(address))
+#define BTMALLOC_START 0xB000000000000000
+#define BTMALLOC_END   0xB0000000ffffffff /* 4 GB Max-more or less arbitrary */
 
 #endif /* _PMC_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/pnp.h linuxppc64_2_4/include/asm-ppc64/pnp.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/pnp.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/pnp.h	Wed Dec 31 18:00:00 1969
@@ -1,648 +0,0 @@
-/* 11/02/95                                                                   */
-/*----------------------------------------------------------------------------*/
-/*      Plug and Play header definitions                                      */
-/*----------------------------------------------------------------------------*/
-
-/* Structure map for PnP on PowerPC Reference Platform                        */
-/* See Plug and Play ISA Specification, Version 1.0, May 28, 1993.  It        */
-/* (or later versions) is available on Compuserve in the PLUGPLAY area.       */
-/* This code has extensions to that specification, namely new short and       */
-/* long tag types for platform dependent information                          */
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-/* Warning: LE notation used throughout this file                             */
-
-/* For enum's: if given in hex then they are bit significant, i.e.            */
-/* only one bit is on for each enum                                           */
-
-#ifndef _PNP_
-#define _PNP_
-
-#ifndef __ASSEMBLY__
-#define MAX_MEM_REGISTERS 9
-#define MAX_IO_PORTS 20
-#define MAX_IRQS 7
-/*#define MAX_DMA_CHANNELS 7*/
-
-/* Interrupt controllers */
-
-#define PNPinterrupt0 "PNP0000"      /* AT Interrupt Controller               */
-#define PNPinterrupt1 "PNP0001"      /* EISA Interrupt Controller             */
-#define PNPinterrupt2 "PNP0002"      /* MCA Interrupt Controller              */
-#define PNPinterrupt3 "PNP0003"      /* APIC                                  */
-#define PNPExtInt     "IBM000D"      /* PowerPC Extended Interrupt Controller */
-
-/* Timers */
-
-#define PNPtimer0     "PNP0100"      /* AT Timer                              */
-#define PNPtimer1     "PNP0101"      /* EISA Timer                            */
-#define PNPtimer2     "PNP0102"      /* MCA Timer                             */
-
-/* DMA controllers */
-
-#define PNPdma0       "PNP0200"      /* AT DMA Controller                     */
-#define PNPdma1       "PNP0201"      /* EISA DMA Controller                   */
-#define PNPdma2       "PNP0202"      /* MCA DMA Controller                    */
-
-/* start of August 15, 1994 additions */
-/* CMOS */
-#define PNPCMOS       "IBM0009"      /* CMOS                                  */
-
-/* L2 Cache */
-#define PNPL2         "IBM0007"      /* L2 Cache                              */
-
-/* NVRAM */
-#define PNPNVRAM      "IBM0008"      /* NVRAM                                 */
-
-/* Power Management */
-#define PNPPM         "IBM0005"      /* Power Management                      */
-/* end of August 15, 1994 additions */
-
-/* Keyboards */
-
-#define PNPkeyboard0  "PNP0300"      /* IBM PC/XT KB Cntlr (83 key, no mouse) */
-#define PNPkeyboard1  "PNP0301"      /* Olivetti ICO (102 key)                */
-#define PNPkeyboard2  "PNP0302"      /* IBM PC/AT KB Cntlr (84 key)           */
-#define PNPkeyboard3  "PNP0303"      /* IBM Enhanced (101/2 key, PS/2 mouse)  */
-#define PNPkeyboard4  "PNP0304"      /* Nokia 1050 KB Cntlr                   */
-#define PNPkeyboard5  "PNP0305"      /* Nokia 9140 KB Cntlr                   */
-#define PNPkeyboard6  "PNP0306"      /* Standard Japanese KB Cntlr            */
-#define PNPkeyboard7  "PNP0307"      /* Microsoft Windows (R) KB Cntlr        */
-
-/* Parallel port controllers */
-
-#define PNPparallel0 "PNP0400"       /* Standard LPT Parallel Port            */
-#define PNPparallel1 "PNP0401"       /* ECP Parallel Port                     */
-#define PNPepp       "IBM001C"       /* EPP Parallel Port                     */
-
-/* Serial port controllers */
-
-#define PNPserial0   "PNP0500"       /* Standard PC Serial port               */
-#define PNPSerial1   "PNP0501"       /* 16550A Compatible Serial port         */
-
-/* Disk controllers */
-
-#define PNPdisk0     "PNP0600"       /* Generic ESDI/IDE/ATA Compat HD Cntlr  */
-#define PNPdisk1     "PNP0601"       /* Plus Hardcard II                      */
-#define PNPdisk2     "PNP0602"       /* Plus Hardcard IIXL/EZ                 */
-
-/* Diskette controllers */
-
-#define PNPdiskette0 "PNP0700"       /* PC Standard Floppy Disk Controller    */
-
-/* Display controllers */
-
-#define PNPdisplay0  "PNP0900"       /* VGA Compatible                        */
-#define PNPdisplay1  "PNP0901"       /* Video Seven VGA                       */
-#define PNPdisplay2  "PNP0902"       /* 8514/A Compatible                     */
-#define PNPdisplay3  "PNP0903"       /* Trident VGA                           */
-#define PNPdisplay4  "PNP0904"       /* Cirrus Logic Laptop VGA               */
-#define PNPdisplay5  "PNP0905"       /* Cirrus Logic VGA                      */
-#define PNPdisplay6  "PNP0906"       /* Tseng ET4000 or ET4000/W32            */
-#define PNPdisplay7  "PNP0907"       /* Western Digital VGA                   */
-#define PNPdisplay8  "PNP0908"       /* Western Digital Laptop VGA            */
-#define PNPdisplay9  "PNP0909"       /* S3                                    */
-#define PNPdisplayA  "PNP090A"       /* ATI Ultra Pro/Plus (Mach 32)          */
-#define PNPdisplayB  "PNP090B"       /* ATI Ultra (Mach 8)                    */
-#define PNPdisplayC  "PNP090C"       /* XGA Compatible                        */
-#define PNPdisplayD  "PNP090D"       /* ATI VGA Wonder                        */
-#define PNPdisplayE  "PNP090E"       /* Weitek P9000 Graphics Adapter         */
-#define PNPdisplayF  "PNP090F"       /* Oak Technology VGA                    */
-
-/* Peripheral busses */
-
-#define PNPbuses0    "PNP0A00"       /* ISA Bus                               */
-#define PNPbuses1    "PNP0A01"       /* EISA Bus                              */
-#define PNPbuses2    "PNP0A02"       /* MCA Bus                               */
-#define PNPbuses3    "PNP0A03"       /* PCI Bus                               */
-#define PNPbuses4    "PNP0A04"       /* VESA/VL Bus                           */
-
-/* RTC, BIOS, planar devices */
-
-#define PNPspeaker0  "PNP0800"       /* AT Style Speaker Sound                */
-#define PNPrtc0      "PNP0B00"       /* AT RTC                                */
-#define PNPpnpbios0  "PNP0C00"       /* PNP BIOS (only created by root enum)  */
-#define PNPpnpbios1  "PNP0C01"       /* System Board Memory Device            */
-#define PNPpnpbios2  "PNP0C02"       /* Math Coprocessor                      */
-#define PNPpnpbios3  "PNP0C03"       /* PNP BIOS Event Notification Interrupt */
-
-/* PCMCIA controller */
-
-#define PNPpcmcia0   "PNP0E00"       /* Intel 82365 Compatible PCMCIA Cntlr   */
-
-/* Mice */
-
-#define PNPmouse0    "PNP0F00"       /* Microsoft Bus Mouse                   */
-#define PNPmouse1    "PNP0F01"       /* Microsoft Serial Mouse                */
-#define PNPmouse2    "PNP0F02"       /* Microsoft Inport Mouse                */
-#define PNPmouse3    "PNP0F03"       /* Microsoft PS/2 Mouse                  */
-#define PNPmouse4    "PNP0F04"       /* Mousesystems Mouse                    */
-#define PNPmouse5    "PNP0F05"       /* Mousesystems 3 Button Mouse - COM2    */
-#define PNPmouse6    "PNP0F06"       /* Genius Mouse - COM1                   */
-#define PNPmouse7    "PNP0F07"       /* Genius Mouse - COM2                   */
-#define PNPmouse8    "PNP0F08"       /* Logitech Serial Mouse                 */
-#define PNPmouse9    "PNP0F09"       /* Microsoft Ballpoint Serial Mouse      */
-#define PNPmouseA    "PNP0F0A"       /* Microsoft PNP Mouse                   */
-#define PNPmouseB    "PNP0F0B"       /* Microsoft PNP Ballpoint Mouse         */
-
-/* Modems */
-
-#define PNPmodem0    "PNP9000"       /* Specific IDs TBD                      */
-
-/* Network controllers */
-
-#define PNPnetworkC9 "PNP80C9"       /* IBM Token Ring                        */
-#define PNPnetworkCA "PNP80CA"       /* IBM Token Ring II                     */
-#define PNPnetworkCB "PNP80CB"       /* IBM Token Ring II/Short               */
-#define PNPnetworkCC "PNP80CC"       /* IBM Token Ring 4/16Mbs                */
-#define PNPnetwork27 "PNP8327"       /* IBM Token Ring (All types)            */
-#define PNPnetworket "IBM0010"       /* IBM Ethernet used by Power PC         */
-#define PNPneteisaet "IBM2001"       /* IBM Ethernet EISA adapter             */
-#define PNPAMD79C970 "IBM0016"       /* AMD 79C970 (PCI Ethernet)             */
-
-/* SCSI controllers */
-
-#define PNPscsi0     "PNPA000"       /* Adaptec 154x Compatible SCSI Cntlr    */
-#define PNPscsi1     "PNPA001"       /* Adaptec 174x Compatible SCSI Cntlr    */
-#define PNPscsi2     "PNPA002"       /* Future Domain 16-700 Compat SCSI Cntlr*/
-#define PNPscsi3     "PNPA003"       /* Panasonic CDROM Adapter (SBPro/SB16)  */
-#define PNPscsiF     "IBM000F"       /* NCR 810 SCSI Controller               */
-#define PNPscsi825   "IBM001B"       /* NCR 825 SCSI Controller               */
-#define PNPscsi875   "IBM0018"       /* NCR 875 SCSI Controller               */
-
-/* Sound/Video, Multimedia */
-
-#define PNPmm0       "PNPB000"       /* Sound Blaster Compatible Sound Device */
-#define PNPmm1       "PNPB001"       /* MS Windows Sound System Compat Device */
-#define PNPmmF       "IBM000E"       /* Crystal CS4231 Audio Device           */
-#define PNPv7310     "IBM0015"       /* ASCII V7310 Video Capture Device      */
-#define PNPmm4232    "IBM0017"       /* Crystal CS4232 Audio Device           */
-#define PNPpmsyn     "IBM001D"       /* YMF 289B chip (Yamaha)                */
-#define PNPgp4232    "IBM0012"       /* Crystal CS4232 Game Port              */
-#define PNPmidi4232  "IBM0013"       /* Crystal CS4232 MIDI                   */
-
-/* Operator Panel */
-#define PNPopctl     "IBM000B"       /* Operator's panel                      */
-
-/* Service Processor */
-#define PNPsp        "IBM0011"       /* IBM Service Processor                 */
-#define PNPLTsp      "IBM001E"       /* Lightning/Terlingua Support Processor */
-#define PNPLTmsp     "IBM001F"       /* Lightning/Terlingua Mini-SP           */
-
-/* Memory Controller */
-#define PNPmemctl    "IBM000A"       /* Memory controller                     */
-
-/* Graphics Assist */
-#define PNPg_assist  "IBM0014"       /* Graphics Assist                       */
-
-/* Miscellaneous Device Controllers */
-#define PNPtablet    "IBM0019"       /* IBM Tablet Controller                 */
-
-/* PNP Packet Handles */
-
-#define S1_Packet                0x0A   /* Version resource                   */
-#define S2_Packet                0x15   /* Logical DEVID (without flags)      */
-#define S2_Packet_flags          0x16   /* Logical DEVID (with flags)         */
-#define S3_Packet                0x1C   /* Compatible device ID               */
-#define S4_Packet                0x22   /* IRQ resource (without flags)       */
-#define S4_Packet_flags          0x23   /* IRQ resource (with flags)          */
-#define S5_Packet                0x2A   /* DMA resource                       */
-#define S6_Packet                0x30   /* Depend funct start (w/o priority)  */
-#define S6_Packet_priority       0x31   /* Depend funct start (w/ priority)   */
-#define S7_Packet                0x38   /* Depend funct end                   */
-#define S8_Packet                0x47   /* I/O port resource (w/o fixed loc)  */
-#define S9_Packet_fixed          0x4B   /* I/O port resource (w/ fixed loc)   */
-#define S14_Packet               0x71   /* Vendor defined                     */
-#define S15_Packet               0x78   /* End of resource (w/o checksum)     */
-#define S15_Packet_checksum      0x79   /* End of resource (w/ checksum)      */
-#define L1_Packet                0x81   /* Memory range                       */
-#define L1_Shadow                0x20   /* Memory is shadowable               */
-#define L1_32bit_mem             0x18   /* 32-bit memory only                 */
-#define L1_8_16bit_mem           0x10   /* 8- and 16-bit supported            */
-#define L1_Decode_Hi             0x04   /* decode supports high address       */
-#define L1_Cache                 0x02   /* read cacheable, write-through      */
-#define L1_Writeable             0x01   /* Memory is writeable                */
-#define L2_Packet                0x82   /* ANSI ID string                     */
-#define L3_Packet                0x83   /* Unicode ID string                  */
-#define L4_Packet                0x84   /* Vendor defined                     */
-#define L5_Packet                0x85   /* Large I/O                          */
-#define L6_Packet                0x86   /* 32-bit Fixed Loc Mem Range Desc    */
-#define END_TAG                  0x78   /* End of resource                    */
-#define DF_START_TAG             0x30   /* Dependent function start           */
-#define DF_START_TAG_priority    0x31   /* Dependent function start           */
-#define DF_END_TAG               0x38   /* Dependent function end             */
-#define SUBOPTIMAL_CONFIGURATION 0x2    /* Priority byte sub optimal config   */
-
-/* Device Base Type Codes */
-
-typedef enum _PnP_BASE_TYPE {
-  Reserved = 0,
-  MassStorageDevice = 1,
-  NetworkInterfaceController = 2,
-  DisplayController = 3,
-  MultimediaController = 4,
-  MemoryController = 5,
-  BridgeController = 6,
-  CommunicationsDevice = 7,
-  SystemPeripheral = 8,
-  InputDevice = 9,
-  ServiceProcessor = 0x0A,              /* 11/2/95                            */
-  } PnP_BASE_TYPE;
-
-/* Device Sub Type Codes */
-
-typedef enum _PnP_SUB_TYPE {
-  SCSIController = 0,
-  IDEController = 1,
-  FloppyController = 2,
-  IPIController = 3,
-  OtherMassStorageController = 0x80,
-
-  EthernetController = 0,
-  TokenRingController = 1,
-  FDDIController = 2,
-  OtherNetworkController = 0x80,
-
-  VGAController= 0,
-  SVGAController= 1,
-  XGAController= 2,
-  OtherDisplayController = 0x80,
-
-  VideoController = 0,
-  AudioController = 1,
-  OtherMultimediaController = 0x80,
-
-  RAM = 0,
-  FLASH = 1,
-  OtherMemoryDevice = 0x80,
-
-  HostProcessorBridge = 0,
-  ISABridge = 1,
-  EISABridge = 2,
-  MicroChannelBridge = 3,
-  PCIBridge = 4,
-  PCMCIABridge = 5,
-  VMEBridge = 6,
-  OtherBridgeDevice = 0x80,
-
-  RS232Device = 0,
-  ATCompatibleParallelPort = 1,
-  OtherCommunicationsDevice = 0x80,
-
-  ProgrammableInterruptController = 0,
-  DMAController = 1,
-  SystemTimer = 2,
-  RealTimeClock = 3,
-  L2Cache = 4,
-  NVRAM = 5,
-  PowerManagement = 6,
-  CMOS = 7,
-  OperatorPanel = 8,
-  ServiceProcessorClass1 = 9,
-  ServiceProcessorClass2 = 0xA,
-  ServiceProcessorClass3 = 0xB,
-  GraphicAssist = 0xC,
-  SystemPlanar = 0xF,                   /* 10/5/95                            */
-  OtherSystemPeripheral = 0x80,
-
-  KeyboardController = 0,
-  Digitizer = 1,
-  MouseController = 2,
-  TabletController = 3,                 /* 10/27/95                           */
-  OtherInputController = 0x80,
-
-  GeneralMemoryController = 0,
-  } PnP_SUB_TYPE;
-
-/* Device Interface Type Codes */
-
-typedef enum _PnP_INTERFACE {
-  General = 0,
-  GeneralSCSI = 0,
-  GeneralIDE = 0,
-  ATACompatible = 1,
-
-  GeneralFloppy = 0,
-  Compatible765 = 1,
-  NS398_Floppy = 2,                     /* NS Super I/O wired to use index
-                                           register at port 398 and data
-                                           register at port 399               */
-  NS26E_Floppy = 3,                     /* Ports 26E and 26F                  */
-  NS15C_Floppy = 4,                     /* Ports 15C and 15D                  */
-  NS2E_Floppy = 5,                      /* Ports 2E and 2F                    */
-  CHRP_Floppy = 6,                      /* CHRP Floppy in PR*P system         */
-
-  GeneralIPI = 0,
-
-  GeneralEther = 0,
-  GeneralToken = 0,
-  GeneralFDDI = 0,
-
-  GeneralVGA = 0,
-  GeneralSVGA = 0,
-  GeneralXGA = 0,
-
-  GeneralVideo = 0,
-  GeneralAudio = 0,
-  CS4232Audio = 1,                      /* CS 4232 Plug 'n Play Configured    */
-
-  GeneralRAM = 0,
-  GeneralFLASH = 0,
-  PCIMemoryController = 0,              /* PCI Config Method                  */
-  RS6KMemoryController = 1,             /* RS6K Config Method                 */
-
-  GeneralHostBridge = 0,
-  GeneralISABridge = 0,
-  GeneralEISABridge = 0,
-  GeneralMCABridge = 0,
-  GeneralPCIBridge = 0,
-  PCIBridgeDirect = 0,
-  PCIBridgeIndirect = 1,
-  PCIBridgeRS6K = 2,
-  GeneralPCMCIABridge = 0,
-  GeneralVMEBridge = 0,
-
-  GeneralRS232 = 0,
-  COMx = 1,
-  Compatible16450 = 2,
-  Compatible16550 = 3,
-  NS398SerPort = 4,                     /* NS Super I/O wired to use index
-                                           register at port 398 and data
-                                           register at port 399               */
-  NS26ESerPort = 5,                     /* Ports 26E and 26F                  */
-  NS15CSerPort = 6,                     /* Ports 15C and 15D                  */
-  NS2ESerPort = 7,                      /* Ports 2E and 2F                    */
-
-  GeneralParPort = 0,
-  LPTx = 1,
-  NS398ParPort = 2,                     /* NS Super I/O wired to use index
-                                           register at port 398 and data
-                                           register at port 399               */
-  NS26EParPort = 3,                     /* Ports 26E and 26F                  */
-  NS15CParPort = 4,                     /* Ports 15C and 15D                  */
-  NS2EParPort = 5,                      /* Ports 2E and 2F                    */
-
-  GeneralPIC = 0,
-  ISA_PIC = 1,
-  EISA_PIC = 2,
-  MPIC = 3,
-  RS6K_PIC = 4,
-
-  GeneralDMA = 0,
-  ISA_DMA = 1,
-  EISA_DMA = 2,
-
-  GeneralTimer = 0,
-  ISA_Timer = 1,
-  EISA_Timer = 2,
-  GeneralRTC = 0,
-  ISA_RTC = 1,
-
-  StoreThruOnly = 1,
-  StoreInEnabled = 2,
-  RS6KL2Cache = 3,
-
-  IndirectNVRAM = 0,                    /* Indirectly addressed               */
-  DirectNVRAM = 1,                      /* Memory Mapped                      */
-  IndirectNVRAM24 = 2,                  /* Indirectly addressed - 24 bit      */
-
-  GeneralPowerManagement = 0,
-  EPOWPowerManagement = 1,
-  PowerControl = 2,                    /* d1378 */
-
-  GeneralCMOS = 0,
-
-  GeneralOPPanel = 0,
-  HarddiskLight = 1,
-  CDROMLight = 2,
-  PowerLight = 3,
-  KeyLock = 4,
-  ANDisplay = 5,                        /* AlphaNumeric Display               */
-  SystemStatusLED = 6,                  /* 3 digit 7 segment LED              */
-  CHRP_SystemStatusLED = 7,             /* CHRP LEDs in PR*P system           */
-
-  GeneralServiceProcessor = 0,
-
-  TransferData = 1,
-  IGMC32 = 2,
-  IGMC64 = 3,
-
-  GeneralSystemPlanar = 0,              /* 10/5/95                            */
-
-  } PnP_INTERFACE;
-
-/* PnP resources */
-
-/* Compressed ASCII is 5 bits per char; 00001=A ... 11010=Z */
-
-typedef struct _SERIAL_ID {
-  unsigned char VendorID0;              /*    Bit(7)=0                        */
-                                        /*    Bits(6:2)=1st character in      */
-                                        /*       compressed ASCII             */
-                                        /*    Bits(1:0)=2nd character in      */
-                                        /*       compressed ASCII bits(4:3)   */
-  unsigned char VendorID1;              /*    Bits(7:5)=2nd character in      */
-                                        /*       compressed ASCII bits(2:0)   */
-                                        /*    Bits(4:0)=3rd character in      */
-                                        /*       compressed ASCII             */
-  unsigned char VendorID2;              /* Product number - vendor assigned   */
-  unsigned char VendorID3;              /* Product number - vendor assigned   */
-
-/* Serial number is to provide uniqueness if more than one board of same      */
-/* type is in system.  Must be "FFFFFFFF" if feature not supported.           */
-
-  unsigned char Serial0;                /* Unique serial number bits (7:0)    */
-  unsigned char Serial1;                /* Unique serial number bits (15:8)   */
-  unsigned char Serial2;                /* Unique serial number bits (23:16)  */
-  unsigned char Serial3;                /* Unique serial number bits (31:24)  */
-  unsigned char Checksum;
-  } SERIAL_ID;
-
-typedef enum _PnPItemName {
-  Unused = 0,
-  PnPVersion = 1,
-  LogicalDevice = 2,
-  CompatibleDevice = 3,
-  IRQFormat = 4,
-  DMAFormat = 5,
-  StartDepFunc = 6,
-  EndDepFunc = 7,
-  IOPort = 8,
-  FixedIOPort = 9,
-  Res1 = 10,
-  Res2 = 11,
-  Res3 = 12,
-  SmallVendorItem = 14,
-  EndTag = 15,
-  MemoryRange = 1,
-  ANSIIdentifier = 2,
-  UnicodeIdentifier = 3,
-  LargeVendorItem = 4,
-  MemoryRange32 = 5,
-  MemoryRangeFixed32 = 6,
-  } PnPItemName;
-
-/* Define a bunch of access functions for the bits in the tag field */
-
-/* Tag type - 0 = small; 1 = large */
-#define tag_type(t) (((t) & 0x80)>>7)
-#define set_tag_type(t,v) (t = (t & 0x7f) | ((v)<<7))
-
-/* Small item name is 4 bits - one of PnPItemName enum above */
-#define tag_small_item_name(t) (((t) & 0x78)>>3)
-#define set_tag_small_item_name(t,v) (t = (t & 0x07) | ((v)<<3))
-
-/* Small item count is 3 bits - count of further bytes in packet */
-#define tag_small_count(t) ((t) & 0x07)
-#define set_tag_count(t,v) (t = (t & 0x78) | (v))
-
-/* Large item name is 7 bits - one of PnPItemName enum above */
-#define tag_large_item_name(t) ((t) & 0x7f)
-#define set_tag_large_item_name(t,v) (t = (t | 0x80) | (v))
-
-/* a PnP resource is a bunch of contiguous TAG packets ending with an end tag */
-
-typedef union _PnP_TAG_PACKET {
-  struct _S1_Pack{                      /* VERSION PACKET                     */
-    unsigned char Tag;                  /* small tag = 0x0a                   */
-    unsigned char Version[2];           /* PnP version, Vendor version        */
-    } S1_Pack;
-
-  struct _S2_Pack{                      /* LOGICAL DEVICE ID PACKET           */
-    unsigned char Tag;                  /* small tag = 0x15 or 0x16           */
-    unsigned char DevId[4];             /* Logical device id                  */
-    unsigned char Flags[2];             /* bit(0) boot device;                */
-                                        /* bit(7:1) cmd in range x31-x37      */
-                                        /* bit(7:0) cmd in range x28-x3f (opt)*/
-    } S2_Pack;
-
-  struct _S3_Pack{                      /* COMPATIBLE DEVICE ID PACKET        */
-    unsigned char Tag;                  /* small tag = 0x1c                   */
-    unsigned char CompatId[4];          /* Compatible device id               */
-    } S3_Pack;
-
-  struct _S4_Pack{                      /* IRQ PACKET                         */
-    unsigned char Tag;                  /* small tag = 0x22 or 0x23           */
-    unsigned char IRQMask[2];           /* bit(0) is IRQ0, ...;               */
-                                        /* bit(0) is IRQ8 ...                 */
-    unsigned char IRQInfo;              /* optional; assume bit(0)=1; else    */
-                                        /*  bit(0) - high true edge sensitive */
-                                        /*  bit(1) - low true edge sensitive  */
-                                        /*  bit(2) - high true level sensitive*/
-                                        /*  bit(3) - low true level sensitive */
-                                        /*  bit(7:4) - must be 0              */
-    } S4_Pack;
-
-  struct _S5_Pack{                      /* DMA PACKET                         */
-    unsigned char Tag;                  /* small tag = 0x2a                   */
-    unsigned char DMAMask;              /* bit(0) is channel 0 ...            */
-    unsigned char DMAInfo;
-    } S5_Pack;
-
-  struct _S6_Pack{                      /* START DEPENDENT FUNCTION PACKET    */
-    unsigned char Tag;                  /* small tag = 0x30 or 0x31           */
-    unsigned char Priority;             /* Optional; if missing then x01; else*/
-                                        /*  x00 = best possible               */
-                                        /*  x01 = acceptible                  */
-                                        /*  x02 = sub-optimal but functional  */
-    } S6_Pack;
-
-  struct _S7_Pack{                      /* END DEPENDENT FUNCTION PACKET      */
-    unsigned char Tag;                  /* small tag = 0x38                   */
-    } S7_Pack;
-
-  struct _S8_Pack{                      /* VARIABLE I/O PORT PACKET           */
-    unsigned char Tag;                  /* small tag x47                      */
-    unsigned char IOInfo;               /* x0  = decode only bits(9:0);       */
-#define  ISAAddr16bit         0x01      /* x01 = decode bits(15:0)            */
-    unsigned char RangeMin[2];          /* Min base address                   */
-    unsigned char RangeMax[2];          /* Max base address                   */
-    unsigned char IOAlign;              /* base alignmt, incr in 1B blocks    */
-    unsigned char IONum;                /* number of contiguous I/O ports     */
-    } S8_Pack;
-
-  struct _S9_Pack{                      /* FIXED I/O PORT PACKET              */
-    unsigned char Tag;                  /* small tag = 0x4b                   */
-    unsigned char Range[2];             /* base address 10 bits               */
-    unsigned char IONum;                /* number of contiguous I/O ports     */
-    } S9_Pack;
-
-  struct _S14_Pack{                     /* VENDOR DEFINED PACKET              */
-    unsigned char Tag;                  /* small tag = 0x7m m = 1-7           */
-    union _S14_Data{
-      unsigned char Data[7];            /* Vendor defined                     */
-      struct _S14_PPCPack{              /* Pr*p s14 pack                      */
-         unsigned char Type;            /* 00=non-IBM                         */
-         unsigned char PPCData[6];      /* Vendor defined                     */
-        } S14_PPCPack;
-      } S14_Data;
-    } S14_Pack;
-
-  struct _S15_Pack{                     /* END PACKET                         */
-    unsigned char Tag;                  /* small tag = 0x78 or 0x79           */
-    unsigned char Check;                /* optional - checksum                */
-    } S15_Pack;
-
-  struct _L1_Pack{                      /* MEMORY RANGE PACKET                */
-    unsigned char Tag;                  /* large tag = 0x81                   */
-    unsigned char Count0;               /* x09                                */
-    unsigned char Count1;               /* x00                                */
-    unsigned char Data[9];              /* a variable array of bytes,         */
-                                        /* count in tag                       */
-    } L1_Pack;
-
-  struct _L2_Pack{                      /* ANSI ID STRING PACKET              */
-    unsigned char Tag;                  /* large tag = 0x82                   */
-    unsigned char Count0;               /* Length of string                   */
-    unsigned char Count1;
-    unsigned char Identifier[1];        /* a variable array of bytes,         */
-                                        /* count in tag                       */
-    } L2_Pack;
-
-  struct _L3_Pack{                      /* UNICODE ID STRING PACKET           */
-    unsigned char Tag;                  /* large tag = 0x83                   */
-    unsigned char Count0;               /* Length + 2 of string               */
-    unsigned char Count1;
-    unsigned char Country0;             /* TBD                                */
-    unsigned char Country1;             /* TBD                                */
-    unsigned char Identifier[1];        /* a variable array of bytes,         */
-                                        /* count in tag                       */
-    } L3_Pack;
-
-  struct _L4_Pack{                      /* VENDOR DEFINED PACKET              */
-    unsigned char Tag;                  /* large tag = 0x84                   */
-    unsigned char Count0;
-    unsigned char Count1;
-    union _L4_Data{
-      unsigned char Data[1];            /* a variable array of bytes,         */
-                                        /* count in tag                       */
-      struct _L4_PPCPack{               /* Pr*p L4 packet                     */
-         unsigned char Type;            /* 00=non-IBM                         */
-         unsigned char PPCData[1];      /* a variable array of bytes,         */
-                                        /* count in tag                       */
-        } L4_PPCPack;
-      } L4_Data;
-    } L4_Pack;
-
-  struct _L5_Pack{
-    unsigned char Tag;                  /* large tag = 0x85                   */
-    unsigned char Count0;               /* Count = 17                         */
-    unsigned char Count1;
-    unsigned char Data[17];
-    } L5_Pack;
-
-  struct _L6_Pack{
-    unsigned char Tag;                  /* large tag = 0x86                   */
-    unsigned char Count0;               /* Count = 9                          */
-    unsigned char Count1;
-    unsigned char Data[9];
-    } L6_Pack;
-
-  } PnP_TAG_PACKET;
-
-#endif /* __ASSEMBLY__ */
-#endif  /* ndef _PNP_ */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/posix_types.h linuxppc64_2_4/include/asm-ppc64/posix_types.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/posix_types.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/posix_types.h	Tue Apr  2 15:52:43 2002
@@ -11,13 +11,22 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
-
+#if 0
+# define DRENG_TYPES
+#endif
+
+#ifdef DRENG_TYPES
+typedef unsigned long	__kernel_dev_t;
+typedef unsigned long	__kernel_ino_t;
+typedef unsigned long  	__kernel_nlink_t;
+#else
 typedef unsigned int	__kernel_dev_t;
 typedef unsigned int	__kernel_ino_t;
-typedef unsigned int	__kernel_mode_t;
 typedef unsigned int  	__kernel_nlink_t;
+#endif
+typedef unsigned int	__kernel_mode_t;
 typedef long		__kernel_off_t;
-typedef long long	        __kernel_loff_t;
+typedef long long	__kernel_loff_t;
 typedef int		__kernel_pid_t;
 typedef int             __kernel_ipc_pid_t;
 typedef unsigned int	__kernel_uid_t;
@@ -41,30 +50,6 @@
 typedef struct {
 	int	val[2];
 } __kernel_fsid_t;
-
-
-/* These are here to support 32-bit syscalls on a 64-bit kernel. */
-typedef unsigned int           __kernel_size_t32;
-typedef int                    __kernel_ssize_t32;
-typedef int                    __kernel_ptrdiff_t32;
-typedef int                    __kernel_time_t32;
-typedef int                    __kernel_clock_t32;
-typedef int                    __kernel_pid_t32;
-typedef unsigned short         __kernel_ipc_pid_t32;
-typedef unsigned int           __kernel_uid_t32;
-typedef unsigned int           __kernel_gid_t32;
-typedef unsigned int           __kernel_dev_t32;
-typedef unsigned int           __kernel_ino_t32;
-typedef unsigned int           __kernel_mode_t32;
-typedef unsigned int           __kernel_umode_t32;
-typedef short                  __kernel_nlink_t32;
-typedef int                    __kernel_daddr_t32;
-typedef int                    __kernel_off_t32;
-typedef unsigned int           __kernel_caddr_t32;
-typedef int 		       __kernel_loff_t32;
-/* typedef __kernel_fsid_t        __kernel_fsid_t32; */
-
-
 
 #ifndef __GNUC__
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/ppc_asm.tmpl linuxppc64_2_4/include/asm-ppc64/ppc_asm.tmpl
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/ppc_asm.tmpl	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/include/asm-ppc64/ppc_asm.tmpl	Wed Apr 10 12:23:03 2002
@@ -0,0 +1,115 @@
+/* Condition Register Bit Fields */
+
+#define	cr0	0
+#define	cr1	1
+#define	cr2	2
+#define	cr3	3
+#define	cr4	4
+#define	cr5	5
+#define	cr6	6
+#define	cr7	7
+
+
+/* General Purpose Registers (GPRs) */
+
+#define	r0	0
+#define	r1	1
+#define	r2	2
+#define	r3	3
+#define	r4	4
+#define	r5	5
+#define	r6	6
+#define	r7	7
+#define	r8	8
+#define	r9	9
+#define	r10	10
+#define	r11	11
+#define	r12	12
+#define	r13	13
+#define	r14	14
+#define	r15	15
+#define	r16	16
+#define	r17	17
+#define	r18	18
+#define	r19	19
+#define	r20	20
+#define	r21	21
+#define	r22	22
+#define	r23	23
+#define	r24	24
+#define	r25	25
+#define	r26	26
+#define	r27	27
+#define	r28	28
+#define	r29	29
+#define	r30	30
+#define	r31	31
+
+
+/* Floating Point Registers (FPRs) */
+
+#define	fr0	0
+#define	fr1	1
+#define	fr2	2
+#define	fr3	3
+#define	fr4	4
+#define	fr5	5
+#define	fr6	6
+#define	fr7	7
+#define	fr8	8
+#define	fr9	9
+#define	fr10	10
+#define	fr11	11
+#define	fr12	12
+#define	fr13	13
+#define	fr14	14
+#define	fr15	15
+#define	fr16	16
+#define	fr17	17
+#define	fr18	18
+#define	fr19	19
+#define	fr20	20
+#define	fr21	21
+#define	fr22	22
+#define	fr23	23
+#define	fr24	24
+#define	fr25	25
+#define	fr26	26
+#define	fr27	27
+#define	fr28	28
+#define	fr29	29
+#define	fr30	30
+#define	fr31	31
+
+#define	vr0	0
+#define	vr1	1
+#define	vr2	2
+#define	vr3	3
+#define	vr4	4
+#define	vr5	5
+#define	vr6	6
+#define	vr7	7
+#define	vr8	8
+#define	vr9	9
+#define	vr10	10
+#define	vr11	11
+#define	vr12	12
+#define	vr13	13
+#define	vr14	14
+#define	vr15	15
+#define	vr16	16
+#define	vr17	17
+#define	vr18	18
+#define	vr19	19
+#define	vr20	20
+#define	vr21	21
+#define	vr22	22
+#define	vr23	23
+#define	vr24	24
+#define	vr25	25
+#define	vr26	26
+#define	vr27	27
+#define	vr28	28
+#define	vr29	29
+#define	vr30	30
+#define	vr31	31
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/ppcdebug.h linuxppc64_2_4/include/asm-ppc64/ppcdebug.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/ppcdebug.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/ppcdebug.h	Tue Apr  9 11:31:08 2002
@@ -22,7 +22,7 @@
 #define PPCDBG_BITVAL(X)     ((1UL)<<((unsigned long)(X)))
 
 /* Defined below are the bit positions of various debug flags in the
- * debug_switch variable (defined in Naca.h).
+ * debug_switch variable (defined in naca.h).
  * -- When adding new values, please enter them into trace names below -- 
  *
  * Values 62 & 63 can be used to stress the hardware page table management
@@ -110,6 +110,10 @@
 
 #ifndef PPCDBG_ENTER_DEBUGGER
 #define PPCDBG_ENTER_DEBUGGER() do {;} while(0)
+#endif
+
+#ifndef PPCDBG_ENTER_DEBUGGER_REGS
+#define PPCDBG_ENTER_DEBUGGER_REGS(A) do {;} while(0)
 #endif
 
 #endif /*__PPCDEBUG_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/processor.h linuxppc64_2_4/include/asm-ppc64/processor.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/processor.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/processor.h	Thu Apr 18 09:35:16 2002
@@ -10,6 +10,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/stringify.h>
 #ifndef __ASSEMBLY__
 #include <asm/atomic.h>
 #include <asm/ppcdebug.h>
@@ -52,36 +53,36 @@
 #define MSR_LE_LG	0 		/* Little Endian */
 
 #ifdef __ASSEMBLY__
-#define MASK(X)		(1<<(X))
+#define __MASK(X)	(1<<(X))
 #else
-#define MASK(X)		(1UL<<(X))
+#define __MASK(X)	(1UL<<(X))
 #endif
 
-#define MSR_SF		MASK(MSR_SF_LG)  /* Enable 64 bit mode */
-#define MSR_ISF		MASK(MSR_ISF_LG) /* Interrupt 64b mode valid on 630 */
-#define MSR_HV 		MASK(MSR_HV_LG)	 /* Hypervisor state */
-#define MSR_VEC		MASK(MSR_VEC_LG) /* Enable AltiVec */
-#define MSR_POW		MASK(MSR_POW_LG) /* Enable Power Management */
-#define MSR_WE		MASK(MSR_WE_LG)	 /* Wait State Enable */
-#define MSR_TGPR	MASK(MSR_TGPR_LG)/* TLB Update registers in use */
-#define MSR_CE		MASK(MSR_CE_LG)	 /* Critical Interrupt Enable */
-#define MSR_ILE		MASK(MSR_ILE_LG) /* Interrupt Little Endian */
-#define MSR_EE		MASK(MSR_EE_LG)	 /* External Interrupt Enable */
-#define MSR_PR		MASK(MSR_PR_LG)	 /* Problem State / Privilege Level */
-#define MSR_FP		MASK(MSR_FP_LG)	 /* Floating Point enable */
-#define MSR_ME		MASK(MSR_ME_LG)	 /* Machine Check Enable */
-#define MSR_FE0		MASK(MSR_FE0_LG) /* Floating Exception mode 0 */
-#define MSR_SE		MASK(MSR_SE_LG)	 /* Single Step */
-#define MSR_BE		MASK(MSR_BE_LG)	 /* Branch Trace */
-#define MSR_DE		MASK(MSR_DE_LG)	 /* Debug Exception Enable */
-#define MSR_FE1		MASK(MSR_FE1_LG) /* Floating Exception mode 1 */
-#define MSR_IP		MASK(MSR_IP_LG)	 /* Exception prefix 0x000/0xFFF */
-#define MSR_IR		MASK(MSR_IR_LG)	 /* Instruction Relocate */
-#define MSR_DR		MASK(MSR_DR_LG)	 /* Data Relocate */
-#define MSR_PE		MASK(MSR_PE_LG)	 /* Protection Enable */
-#define MSR_PX		MASK(MSR_PX_LG)	 /* Protection Exclusive Mode */
-#define MSR_RI		MASK(MSR_RI_LG)	 /* Recoverable Exception */
-#define MSR_LE		MASK(MSR_LE_LG)	 /* Little Endian */
+#define MSR_SF		__MASK(MSR_SF_LG)	/* Enable 64 bit mode */
+#define MSR_ISF		__MASK(MSR_ISF_LG)	/* Interrupt 64b mode valid on 630 */
+#define MSR_HV 		__MASK(MSR_HV_LG)	/* Hypervisor state */
+#define MSR_VEC		__MASK(MSR_VEC_LG)	/* Enable AltiVec */
+#define MSR_POW		__MASK(MSR_POW_LG)	/* Enable Power Management */
+#define MSR_WE		__MASK(MSR_WE_LG)	/* Wait State Enable */
+#define MSR_TGPR	__MASK(MSR_TGPR_LG)	/* TLB Update registers in use */
+#define MSR_CE		__MASK(MSR_CE_LG)	/* Critical Interrupt Enable */
+#define MSR_ILE		__MASK(MSR_ILE_LG)	/* Interrupt Little Endian */
+#define MSR_EE		__MASK(MSR_EE_LG)	/* External Interrupt Enable */
+#define MSR_PR		__MASK(MSR_PR_LG)	/* Problem State / Privilege Level */
+#define MSR_FP		__MASK(MSR_FP_LG)	/* Floating Point enable */
+#define MSR_ME		__MASK(MSR_ME_LG)	/* Machine Check Enable */
+#define MSR_FE0		__MASK(MSR_FE0_LG)	/* Floating Exception mode 0 */
+#define MSR_SE		__MASK(MSR_SE_LG)	/* Single Step */
+#define MSR_BE		__MASK(MSR_BE_LG)	/* Branch Trace */
+#define MSR_DE		__MASK(MSR_DE_LG)	/* Debug Exception Enable */
+#define MSR_FE1		__MASK(MSR_FE1_LG)	/* Floating Exception mode 1 */
+#define MSR_IP		__MASK(MSR_IP_LG)	/* Exception prefix 0x000/0xFFF */
+#define MSR_IR		__MASK(MSR_IR_LG)	/* Instruction Relocate */
+#define MSR_DR		__MASK(MSR_DR_LG)	/* Data Relocate */
+#define MSR_PE		__MASK(MSR_PE_LG)	/* Protection Enable */
+#define MSR_PX		__MASK(MSR_PX_LG)	/* Protection Exclusive Mode */
+#define MSR_RI		__MASK(MSR_RI_LG)	/* Recoverable Exception */
+#define MSR_LE		__MASK(MSR_LE_LG)	/* Little Endian */
 
 #define MSR_		MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF
 #define MSR_KERNEL      MSR_ | MSR_SF | MSR_HV
@@ -489,18 +490,10 @@
 #define	PV_630        	0x0040
 #define	PV_630p	        0x0041
 
-/* Platforms supported by PPC64.  _machine is actually a set of flags */
-#define _MACH_pSeriesHW 0x00010000
-#define _MACH_iSeriesHW 0x00020000
-#define _MACH_LPAR	0x00000001
-
-#define _MACH_unknown	0x00000000
-#define _MACH_pSeries	(_MACH_pSeriesHW)
-#define _MACH_pSeriesLP	(_MACH_pSeriesHW | _MACH_LPAR)
-#define _MACH_iSeries	(_MACH_iSeriesHW | _MACH_LPAR)
-
-/* Compat defines for drivers */
-#define _MACH_Pmac	0xf0000000	/* bogus value */
+/* Platforms supported by PPC64 */
+#define PLATFORM_PSERIES      0x0100
+#define PLATFORM_PSERIES_LPAR 0x0101
+#define PLATFORM_ISERIES_LPAR 0x0201
 	
 /*
  * List of interrupt controllers.
@@ -509,8 +502,6 @@
 #define IC_OPEN_PIC   1
 #define IC_PPC_XIC    2
 
-#define stringify(s)	tostring(s)
-#define tostring(s)	#s
 #define XGLUE(a,b) a##b
 #define GLUE(a,b) XGLUE(a,b)
 
@@ -569,7 +560,7 @@
 	.long GLUE(.LT,NAME)-GLUE(.,NAME) ;\
 	.short GLUE(GLUE(.LT,NAME),_procname_end)-GLUE(GLUE(.LT,NAME),_procname_start) ;\
 GLUE(GLUE(.LT,NAME),_procname_start): ;\
-	.ascii stringify(NAME) ;\
+	.ascii __stringify(NAME) ;\
 GLUE(GLUE(.LT,NAME),_procname_end):
 
 #endif /* __ASSEMBLY__ */
@@ -583,9 +574,9 @@
 #define mtmsrd(v)	asm volatile("mtmsrd %0" : : "r" (v))
 
 #define mfspr(rn)	({unsigned long rval; \
-			asm volatile("mfspr %0," stringify(rn) \
+			asm volatile("mfspr %0," __stringify(rn) \
 				     : "=r" (rval)); rval;})
-#define mtspr(rn, v)	asm volatile("mtspr " stringify(rn) ",%0" : : "r" (v))
+#define mtspr(rn, v)	asm volatile("mtspr " __stringify(rn) ",%0" : : "r" (v))
 
 #define mftb()		({unsigned long rval;	\
 			asm volatile("mftb %0" : "=r" (rval)); rval;})
@@ -609,7 +600,6 @@
 			asm volatile("mfasr %0" : "=r" (rval)); rval;})
 
 #ifndef __ASSEMBLY__
-extern int _machine;
 extern int have_of;
 
 struct task_struct;
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/rtas.h linuxppc64_2_4/include/asm-ppc64/rtas.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/rtas.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/rtas.h	Mon Mar 25 03:29:56 2002
@@ -38,7 +38,9 @@
 	u32 nargs;
 	u32 nret; 
 	rtas_arg_t args[16];
+#if 0
 	spinlock_t lock;
+#endif
 	rtas_arg_t *rets;     /* Pointer to return values in args[]. */
 };  
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/sigcontext.h linuxppc64_2_4/include/asm-ppc64/sigcontext.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/sigcontext.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/sigcontext.h	Tue Apr  2 15:52:43 2002
@@ -18,17 +18,4 @@
 	struct pt_regs 	*regs;
 };
 
-#ifdef __KERNEL__
-
-struct sigcontext32_struct {
-	unsigned int	_unused[4];
-	int		signal;
-	unsigned int	handler;
-	unsigned int	oldmask;
-	u32 regs;  /* 4 byte pointer to the pt_regs32 structure. */
-};
-
-#endif /* __KERNEL__ */
-
-
 #endif /* _ASM_PPC64_SIGCONTEXT_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/siginfo.h linuxppc64_2_4/include/asm-ppc64/siginfo.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/siginfo.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/siginfo.h	Mon Apr  8 08:56:24 2002
@@ -16,18 +16,6 @@
 	void *sival_ptr;
 } sigval_t;
 
-
-#ifdef __KERNEL__
-
-typedef union sigval32 {
-	int sival_int;
-	unsigned int sival_ptr;
-} sigval_t32;
-
-
-#endif /* __KERNEL__ */
-
-
 #define SI_MAX_SIZE	128
 #define SI_PAD_SIZE	((SI_MAX_SIZE/sizeof(int)) - 3)
 
@@ -80,62 +68,6 @@
 	} _sifields;
 } siginfo_t;
 
-
-#ifdef __KERNEL__
-                
-typedef struct siginfo32 {
-	int si_signo;
-	int si_errno;
-	int si_code;
-
-	union {
-		int _pad[SI_PAD_SIZE];
-
-		/* kill() */
-		struct {
-			__kernel_pid_t32 _pid;		/* sender's pid */
-			unsigned int _uid;		/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			unsigned int _timer1;
-			unsigned int _timer2;
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			__kernel_pid_t32 _pid;		/* sender's pid */
-			unsigned int _uid;		/* sender's uid */
-			sigval_t32 _sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			__kernel_pid_t32 _pid;		/* which child */
-			unsigned int _uid;		/* sender's uid */
-			int _status;			/* exit code */
-			__kernel_clock_t32 _utime;
-			__kernel_clock_t32 _stime;
-		} _sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
-		struct {
-			u32 _addr; /* faulting insn/memory ref. */
-			int _trapno;
-		} _sigfault;
-
-		/* SIGPOLL */
-		struct {
-			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-	} _sifields;
-} siginfo_t32;
-
-#endif /* __KERNEL__ */
-
-
 /*
  * How these fields are to be accessed.
  */
@@ -181,6 +113,8 @@
 #define SI_MESGQ	-3		/* sent by real time mesq state change */
 #define SI_ASYNCIO	-4		/* sent by AIO completion */
 #define SI_SIGIO	-5		/* sent by queued SIGIO */
+#define SI_TKILL	-6		/* sent by tkill system call */
+#define SI_DETHREAD	-7		/* sent by execve() killing subsidiary threads */
 
 #define SI_FROMUSER(siptr)	((siptr)->si_code <= 0)
 #define SI_FROMKERNEL(siptr)	((siptr)->si_code > 0)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/signal.h linuxppc64_2_4/include/asm-ppc64/signal.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/signal.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/signal.h	Tue Apr  2 15:52:43 2002
@@ -58,17 +58,6 @@
 #define SIGRTMIN	32
 #define SIGRTMAX	(_NSIG-1)
 
-
-
-#if defined(__KERNEL__) || defined(__WANT_POSIX1B_SIGNALS__)
-#define __old_sigset_t32	old_sigset_t32
-#define __old_sigaction32	old_sigaction32
-#else
-#endif
-
-
-
-
 /*
  * SA_FLAGS values:
  *
@@ -148,55 +137,10 @@
 	struct sigaction sa;
 };
 
-#ifdef __KERNEL__
-
-typedef unsigned int __old_sigset_t32;
-struct __old_sigaction32 {
-	unsigned		sa_handler;
-	__old_sigset_t32  	sa_mask;
-	unsigned int    	sa_flags;
-	unsigned		sa_restorer;     /* not used by Linux/SPARC yet */
-};
-
-
-
-#define _PPC32_NSIG	       64
-#define _PPC32_NSIG_BPW	       32
-#define _PPC32_NSIG_WORDS	       (_PPC32_NSIG / _PPC32_NSIG_BPW)
-
-typedef struct {
-       unsigned int sig[_PPC32_NSIG_WORDS];
-} sigset32_t;
-
-struct sigaction32 {
-       unsigned int  sa_handler;	/* Really a pointer, but need to deal 
-					     with 32 bits */
-       unsigned int sa_flags;
-       unsigned int sa_restorer;	/* Another 32 bit pointer */
-       sigset32_t sa_mask;		/* A 32 bit mask */
-};
-
-#endif /* __KERNEL__ */
-
-
 typedef struct sigaltstack {
 	void *ss_sp;
 	int ss_flags;
 	size_t ss_size;
 } stack_t;
-
-#ifdef __KERNEL__
-#include <asm/sigcontext.h>
-    
-typedef struct sigaltstack_32 {
-	unsigned int ss_sp;
-	int ss_flags;
-	__kernel_size_t32 ss_size;
-} stack_32_t;
-
-
-
-
-#endif
 
 #endif /* _ASMPPC64_SIGNAL_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/smp.h linuxppc64_2_4/include/asm-ppc64/smp.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/smp.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/smp.h	Tue Apr  9 11:16:35 2002
@@ -20,13 +20,11 @@
 #include <linux/config.h>
 #include <linux/kernel.h>
 
-
-
 #ifdef CONFIG_SMP
 
 #ifndef __ASSEMBLY__
 
-#include <asm/Paca.h>
+#include <asm/paca.h>
 
 struct current_set_struct {
 	struct task_struct *task;
@@ -52,7 +50,7 @@
 
 #define smp_processor_id() (get_paca()->xPacaIndex)
 #define hard_smp_processor_id() (get_paca()->xHwProcNum)
-#define get_hard_smp_processor_id(CPU) (xPaca[(CPU)].xHwProcNum)
+#define get_hard_smp_processor_id(CPU) (paca[(CPU)].xHwProcNum)
 
 
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/spinlock.h linuxppc64_2_4/include/asm-ppc64/spinlock.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/spinlock.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/spinlock.h	Sun Mar 10 23:26:36 2002
@@ -67,7 +67,7 @@
 
 static __inline__ void spin_unlock(spinlock_t *lock)
 {
-	__asm__ __volatile__("eieio	# spin_unlock": : :"memory");
+	__asm__ __volatile__("lwsync	# spin_unlock": : :"memory");
 	lock->lock = 0;
 }
 
@@ -137,7 +137,7 @@
 	unsigned int tmp;
 
 	__asm__ __volatile__(
-	"eieio				# read_unlock\n\
+	"lwsync				# read_unlock\n\
 1:	lwarx		%0,0,%1\n\
 	addic		%0,%0,-1\n\
 	stwcx.		%0,0,%1\n\
@@ -192,7 +192,7 @@
 
 static __inline__ void write_unlock(rwlock_t *rw)
 {
-	__asm__ __volatile__("eieio		# write_unlock": : :"memory");
+	__asm__ __volatile__("lwsync		# write_unlock": : :"memory");
 	rw->lock = 0;
 }
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/stat.h linuxppc64_2_4/include/asm-ppc64/stat.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/stat.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/stat.h	Tue Apr  2 15:52:43 2002
@@ -10,28 +10,6 @@
 
 #include <linux/types.h>
 
-struct stat32 {
-	__kernel_dev_t32   st_dev; /* 2 */
-	/* __kernel_dev_t32 __pad1; */ /* 2 */
-	__kernel_ino_t32   st_ino; /* 4  */
-	__kernel_mode_t32  st_mode; /* 2  */
-	short   	   st_nlink; /* 2 */
-	__kernel_uid_t32   st_uid; /* 2 */
-	__kernel_gid_t32   st_gid; /* 2 */
-	__kernel_dev_t32   st_rdev; /* 2 */
-	/* __kernel_dev_t32 __pad2; */ /* 2 */
-	__kernel_off_t32   st_size; /* 4 */
-	__kernel_off_t32   st_blksize; /* 4 */
-	__kernel_off_t32   st_blocks; /* 4 */
-	__kernel_time_t32  st_atime; /* 4 */
-	unsigned int       __unused1; /* 4 */
-	__kernel_time_t32  st_mtime; /* 4 */
-	unsigned int       __unused2; /* 4 */
-	__kernel_time_t32  st_ctime; /* 4 */
-	unsigned int       __unused3; /* 4 */
-	unsigned int  __unused4[2]; /* 2*4 */
-};
-
 struct __old_kernel_stat {
 	unsigned short st_dev;
 	unsigned short st_ino;
@@ -44,21 +22,6 @@
 	unsigned long  st_atime;
 	unsigned long  st_mtime;
 	unsigned long  st_ctime;
-};
-
-struct __old_kernel_stat32
-{
-	unsigned short st_dev;
-	unsigned short st_ino;
-	unsigned short st_mode;
-	unsigned short st_nlink;
-	unsigned short st_uid;
-	unsigned short st_gid;
-	unsigned short st_rdev;
-	unsigned int   st_size;
-	unsigned int   st_atime;
-	unsigned int   st_mtime;
-	unsigned int   st_ctime;
 };
 
 struct stat {
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/statfs.h linuxppc64_2_4/include/asm-ppc64/statfs.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/statfs.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/statfs.h	Tue Apr  2 15:52:43 2002
@@ -9,31 +9,10 @@
  */
 
 #ifndef __KERNEL_STRICT_NAMES
-
 #include <linux/types.h>
-
 typedef __kernel_fsid_t	fsid_t;
-typedef __kernel_fsid_t __kernel_fsid_t32;
-
 #endif
 
-/* 
- * Both SPARC64 & IA64 also define the following -
- */
-
-struct statfs32 {
-	int f_type;
-	int f_bsize;
-	int f_blocks;
-	int f_bfree;
-	int f_bavail;
-	int f_files;
-	int f_ffree;
-	__kernel_fsid_t32 f_fsid;
-	int f_namelen;  /* SunOS ignores this field. */
-	int f_spare[6];
-};
-
 struct statfs {
 	long f_type;
 	long f_bsize;
@@ -48,6 +27,3 @@
 };
 
 #endif  /* _PPC64_STATFS_H */
-
-
-
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/system.h linuxppc64_2_4/include/asm-ppc64/system.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/system.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/system.h	Fri Mar 29 11:17:02 2002
@@ -38,7 +38,7 @@
  * give any ordering guarantees about loads, we have to use the
  * stronger but slower sync instruction for mb and rmb.
  */
-#define mb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define mb()   __asm__ __volatile__ ("sync" : : : "memory")
 #define rmb()  __asm__ __volatile__ ("lwsync" : : : "memory")
 #define wmb()  __asm__ __volatile__ ("eieio" : : : "memory")
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/time.h linuxppc64_2_4/include/asm-ppc64/time.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/time.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/time.h	Tue Apr  9 11:16:35 2002
@@ -1,5 +1,4 @@
 /*
- * 
  * Common time prototypes and such for all ppc machines.
  *
  * Written by Cort Dougan (cort@cs.nmt.edu) to merge
@@ -19,7 +18,7 @@
 #include <linux/mc146818rtc.h>
 
 #include <asm/processor.h>
-#include <asm/Paca.h>
+#include <asm/paca.h>
 #include <asm/iSeries/HvCall.h>
 
 /* time.c */
@@ -50,6 +49,7 @@
 	unsigned long tb_ticks_per_sec;
 	struct gettimeofday_vars vars[2];
 	struct gettimeofday_vars * volatile varp;
+	unsigned      var_idx;
 	unsigned      tb_to_us;
 };
 
@@ -73,22 +73,21 @@
 
 static __inline__ void set_dec(int val)
 {
-    struct Paca * paca;
-    int cur_dec;
+	struct paca_struct *lpaca = get_paca();
+	int cur_dec;
 
-    paca = (struct Paca *)mfspr(SPRG3);
-    if ( paca->xLpPaca.xSharedProc ) {
-	paca->xLpPaca.xVirtualDecr = val;
-	cur_dec = get_dec();
-	if ( cur_dec > val )
-	    HvCall_setVirtualDecr();
-    }
-    else
-	mtspr(SPRN_DEC, val);
+	if ( lpaca->xLpPaca.xSharedProc ) {
+		lpaca->xLpPaca.xVirtualDecr = val;
+		cur_dec = get_dec();
+		if ( cur_dec > val )
+			HvCall_setVirtualDecr();
+	} else {
+		mtspr(SPRN_DEC, val);
+	}
 }
 
 extern __inline__ unsigned long tb_ticks_since(unsigned long tstamp) {
-    return get_tb() - tstamp;
+	return get_tb() - tstamp;
 }
 
 #define mulhwu(x,y) \
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/ucontext.h linuxppc64_2_4/include/asm-ppc64/ucontext.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/ucontext.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/ucontext.h	Tue Apr  2 15:52:43 2002
@@ -1,6 +1,8 @@
 #ifndef _ASMPPC64_UCONTEXT_H
 #define _ASMPPC64_UCONTEXT_H
 
+#include <asm/sigcontext.h>
+
 /* Copied from i386. 
  *
  * This program is free software; you can redistribute it and/or
@@ -16,19 +18,5 @@
 	struct sigcontext_struct uc_mcontext;
 	sigset_t	  uc_sigmask;	/* mask last for extensibility */
 };
-
-#ifdef __KERNEL__
-
-
-struct ucontext32 { 
-	unsigned int	  uc_flags;
-	unsigned int 	  uc_link;
-	stack_32_t	  uc_stack;
-	struct sigcontext32_struct uc_mcontext;
-	sigset_t	  uc_sigmask;	/* mask last for extensibility */
-};
-
-#endif /* __KERNEL__ */
-
 
 #endif /* _ASMPPC64_UCONTEXT_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-ppc64/unistd.h linuxppc64_2_4/include/asm-ppc64/unistd.h
--- ../kernel.org/linux-2.4.19/include/asm-ppc64/unistd.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/asm-ppc64/unistd.h	Mon Apr  8 08:53:11 2002
@@ -211,6 +211,27 @@
 #define __NR_pciconfig_iobase	200
 #define __NR_multiplexer	201
 #define __NR_getdents64		202
+#define __NR_pivot_root		203
+#define __NR_fcntl64		204
+#define __NR_madvise		205
+#define __NR_mincore		206
+#define __NR_gettid		207
+#if 0 /* Reserved syscalls */
+#define __NR_tkill		208
+#define __NR_setxattr		209
+#define __NR_lsetxattr		210
+#define __NR_fsetxattr		211
+#define __NR_getxattr		212
+#define __NR_lgetxattr		213
+#define __NR_fgetxattr		214
+#define __NR_listxattr		215
+#define __NR_llistxattr		216
+#define __NR_flistxattr		217
+#define __NR_removexattr	218
+#define __NR_lremovexattr	219
+#define __NR_fremovexattr	220
+#define __NR_futex		221
+#endif
 
 #define __NR(n)	#n
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/asm-sparc64/pil.h linuxppc64_2_4/include/asm-sparc64/pil.h
--- ../kernel.org/linux-2.4.19/include/asm-sparc64/pil.h	Fri Apr 19 11:00:23 2002
+++ linuxppc64_2_4/include/asm-sparc64/pil.h	Mon Apr 22 13:53:43 2002
@@ -1,4 +1,4 @@
-/* $Id: pil.h,v 1.1.2.1 2002/03/03 10:31:56 davem Exp $ */
+/* $Id: pil.h,v 1.1 2002/04/22 18:53:43 tgall Exp $ */
 #ifndef _SPARC64_PIL_H
 #define _SPARC64_PIL_H
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/linux/blk.h linuxppc64_2_4/include/linux/blk.h
--- ../kernel.org/linux-2.4.19/include/linux/blk.h	Fri Apr 19 11:00:46 2002
+++ linuxppc64_2_4/include/linux/blk.h	Mon Apr 22 10:35:35 2002
@@ -313,6 +313,21 @@
 #define DEVICE_REQUEST i2ob_request
 #define DEVICE_NR(device) (MINOR(device)>>4)
 
+#elif (MAJOR_NR == VIODASD_MAJOR)
+
+#define DEVICE_NAME "viod"
+#define TIMEOUT_VALUE (25*HZ)
+#define DEVICE_REQUEST do_viodasd_request
+#define DEVICE_NR(device) (MINOR(device) >> 3)
+
+#elif (MAJOR_NR == VIOCD_MAJOR)
+
+#define DEVICE_NAME "viocd"
+#define TIMEOUT_VALUE (25*HZ)
+#define DEVICE_REQUEST do_viocd_request
+#define DEVICE_NR(device) (MINOR(device))
+#define DEVICE_ON(device)
+#define DEVICE_OFF(device)
 #elif (MAJOR_NR == COMPAQ_SMART2_MAJOR)
 
 #define DEVICE_NAME "ida"
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/linux/hcdp_serial.h linuxppc64_2_4/include/linux/hcdp_serial.h
--- ../kernel.org/linux-2.4.19/include/linux/hcdp_serial.h	Mon Apr 22 11:34:26 2002
+++ linuxppc64_2_4/include/linux/hcdp_serial.h	Wed Dec 31 18:00:00 1969
@@ -1,84 +0,0 @@
-/*
- *  linux/include/asm-ia64/hcdp_serial.h
- *
- *  Copyright (C) 2002  Hewlett-Packard Co.
- *  Copyright (C) 2002  Khalid Aziz <khalid_aziz@hp.com>
- *
- *  Definitions for HCDP defined serial ports (Serial console and 
- *  debug ports)
- *
- */
-#ifndef _ASM_IA64_HCDP_SERIAL_H
-#define _ASM_IA64_HCDP_SERIAL_H
-
-/* ACPI table signatures */
-#define HCDP_SIG_LEN		4
-#define HCDP_SIGNATURE		"HCDP"
-
-/* Space ID as defined in ACPI generic address structure */
-#define ACPI_MEM_SPACE		0
-#define ACPI_IO_SPACE		1
-#define ACPI_PCICONF_SPACE	2
-
-/* 
- * Maximum number of HCDP devices we want to read in
- */
-#define MAX_HCDP_DEVICES	6
-
-/*
- * Default base baud rate if clock rate is 0 in HCDP table.
- */
-#define DEFAULT_BAUD_BASE	115200
-
-/* 
- * ACPI Generic Address Structure 
- */
-typedef struct {
-	u8  space_id;
-	u8  bit_width;
-	u8  bit_offset;
-	u8  resv;
-	u32 addrlo;
-	u32 addrhi;
-} acpi_gen_addr;
-
-/* HCDP Device descriptor entry types */
-#define HCDP_DEV_CONSOLE	0
-#define HCDP_DEV_DEBUG		1
-
-/* HCDP Device descriptor type */
-typedef struct {
-	u8	type;
-	u8	bits;
-	u8	parity;
-	u8	stop_bits;
-	u8	pci_seg;
-	u8	pci_bus;
-	u8	pci_dev;
-	u8	pci_func;
-	u64	baud;
-	acpi_gen_addr	base_addr;
-	u16	pci_dev_id;
-	u16	pci_vendor_id;
-	u32	global_int;
-	u32	clock_rate;
-	u8	pci_prog_intfc;
-	u8	resv;
-} hcdp_dev_t;
-
-/* HCDP Table format */
-typedef struct {
-	u8	signature[4];
-	u32	len;
-	u8	rev;
-	u8	chksum;
-	u8	oemid[6];
-	u8	oem_tabid[8];
-	u32	oem_rev;
-	u8	creator_id[4];
-	u32	creator_rev;
-	u32	num_entries;
-	hcdp_dev_t	hcdp_dev[MAX_HCDP_DEVICES];
-} hcdp_t;
-
-#endif	/* _ASM_IA64_HCDP_SERIAL_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/linux/jfs_fs.h linuxppc64_2_4/include/linux/jfs_fs.h
--- ../kernel.org/linux-2.4.19/include/linux/jfs_fs.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/include/linux/jfs_fs.h	Wed Nov 14 10:19:36 2001
@@ -0,0 +1,34 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+*/ 
+#ifndef _LINUX_JFS_FS_H
+#define _LINUX_JFS_FS_H
+
+#include <linux/version.h>
+
+#include <linux/jfs/jfs_types.h>
+#include <linux/jfs_fs_i.h>
+#include <linux/jfs_fs_sb.h>
+
+
+/* JFS magic number */
+
+#define JFS_SUPER_MAGIC 0x3153464a /* "JFS1" */
+
+#endif /* _LINUX_JFS_FS_H */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/linux/jfs_fs_i.h linuxppc64_2_4/include/linux/jfs_fs_i.h
--- ../kernel.org/linux-2.4.19/include/linux/jfs_fs_i.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/include/linux/jfs_fs_i.h	Wed Nov 14 10:19:36 2001
@@ -0,0 +1,80 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+*/ 
+#ifndef _JFS_FS_I
+#define _JFS_FS_I
+
+#include <linux/jfs/jfs_xtree.h>
+#include <linux/jfs/jfs_dtree.h>
+
+typedef struct	jfs_rwlock {
+	struct rw_semaphore rw_sem;
+	atomic_t in_use;	/* for hacked implementation of trylock */
+} jfs_rwlock_t;
+
+#define JFS_IP(ip)	((struct jfs_inode_info *)(ip)->u.generic_ip)
+
+struct jfs_inode_info {
+	int	fileset;	/* 4: fileset number (always 16)*/
+	uint	mode2;		/* 4: jfs-specific mode		*/
+        pxd_t   ixpxd;		/* 8: inode extent descriptor	*/
+	dxd_t	acl;		/* 16: dxd describing acl	*/
+	dxd_t	ea;		/* 16: dxd describing ea	*/
+	time_t	otime;		/* 4: time created	*/
+	uint	next_index;	/* 4: next available directory entry index */
+	int	acltype;	/* 4: Type of ACL	*/
+	short	btorder;	/* 2: access order	*/
+	short	btindex;	/* 2: btpage entry index*/
+	struct inode *ipimap;	/* 4: inode map			*/
+	ushort	flag;		/* 2: JFS in-memory flag*/
+	unchar	cflag;		/* 1: commit flags		*/
+	unchar	agno;		/* 1: ag number			*/
+	ushort	bxflag;		/* 2: xflag of pseudo buffer?	*/
+	short	blid;		/* 2: lid of pseudo buffer?	*/
+	ushort	atlhead;	/* 2: anonymous tlock list head	*/
+	ushort	atltail;	/* 2: anonymous tlock list tail	*/
+	struct inode *atlnext;	/* 4: next inode w/anonymous txn's */
+	struct inode *atlprev;	/* 4: previous inode w/anonymous txn's */
+	struct page *extent_page; /* 4: page containing extent  */
+	jfs_rwlock_t rdwrlock;	/* 12/20: read/write lock	*/
+	ushort	xtlid;		/* 2: lid of xtree lock on directory */
+	short	pad;		/* 2: pad			*/
+	union {
+		struct {
+			xtpage_t _xtroot;	/* 288: xtree root */
+			struct inomap *_imap;	/* 4: inode map header	*/
+		} file;
+		struct {
+			dir_table_slot_t _table[12]; /* 96: directory index */
+			dtroot_t _dtroot;	/* 288: dtree root */
+		} dir;
+		struct {
+			unchar _unused[16];	/* 16: */
+			dxd_t _dxd;		/* 16: */
+			unchar _inline[128];	/* 128: inline symlink */
+		} link;
+	} u;
+};
+#define i_xtroot u.file._xtroot
+#define i_imap u.file._imap
+#define i_dirtable u.dir._table
+#define i_dtroot u.dir._dtroot
+#define i_inline u.link._inline
+
+#endif
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/linux/jfs_fs_sb.h linuxppc64_2_4/include/linux/jfs_fs_sb.h
--- ../kernel.org/linux-2.4.19/include/linux/jfs_fs_sb.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/include/linux/jfs_fs_sb.h	Wed Nov 14 10:19:36 2001
@@ -0,0 +1,53 @@
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or 
+ *   (at your option) any later version.
+ * 
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software 
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+*/ 
+#ifndef _JFS_FS_SB
+#define _JFS_FS_SB
+
+#define JFS_SBI(sb)	((struct jfs_sb_info *)(sb)->u.generic_sbp)
+
+struct jfs_sb_info {
+	unsigned long	mntflag;	/* 4: aggregate attributes	*/
+	struct inode	*ipbmap;	/* 4: block map inode		*/
+	struct inode	*ipaimap;	/* 4: aggregate inode map inode	*/
+	struct inode	*ipaimap2;	/* 4: secondary aimap inode	*/
+	struct inode	*ipimap;	/* 4: aggregate inode map inode	*/
+	struct jfs_log	*log;		/* 4: log			*/
+	short		bsize;		/* 2: logical block size	*/
+	short		l2bsize;	/* 2: log2 logical block size	*/
+	short		nbperpage;	/* 2: blocks per page		*/
+	short		l2nbperpage;	/* 2: log2 blocks per page	*/
+	short		l2niperblk;	/* 2: log2 inodes per page	*/
+	short		reserved;	/* 2: log2 inodes per page	*/
+	pxd_t		logpxd;		/* 8: pxd describing log	*/
+	pxd_t		ait2;		/* 8: pxd describing AIT copy	*/
+	/* Formerly in ipimap */
+	uint		gengen;		/* 4: inode generation generator*/
+	uint		inostamp;	/* 4: shows inode belongs to fileset*/
+
+        /* Formerly in ipbmap */
+	struct bmap	*bmap;		/* 4: incore bmap descriptor	*/
+	struct nls_table *nls_tab;	/* 4: current codepage		*/
+	struct inode	*direct_inode;	/* 4: inode for physical I/O	*/
+	struct address_space *direct_mapping; /* 4: mapping for physical I/O */
+};					/* (72)				*/
+
+#define isReadOnly(ip) ((JFS_SBI((ip)->i_sb)->log) ? 0 : 1)
+
+#endif
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/linux/major.h linuxppc64_2_4/include/linux/major.h
--- ../kernel.org/linux-2.4.19/include/linux/major.h	Fri Apr 19 10:30:03 2002
+++ linuxppc64_2_4/include/linux/major.h	Thu Aug 30 10:54:52 2001
@@ -117,6 +117,9 @@
 #define COMPAQ_CISS_MAJOR6      110
 #define COMPAQ_CISS_MAJOR7      111
 
+#define VIODASD_MAJOR		112
+#define VIOCD_MAJOR	113
+
 #define ATARAID_MAJOR		114
 
 #define DASD_MAJOR      94	/* Official assignations from Peter */
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/linux/mtd/concat.h linuxppc64_2_4/include/linux/mtd/concat.h
--- ../kernel.org/linux-2.4.19/include/linux/mtd/concat.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/linux/mtd/concat.h	Mon Apr 22 14:12:31 2002
@@ -5,7 +5,7 @@
  *
  * This code is GPL
  *
- * $Id: concat.h,v 1.1 2002/03/08 16:34:36 rkaiser Exp $
+ * $Id: concat.h,v 1.1 2002/04/22 19:12:31 tgall Exp $
  */
 
 #ifndef MTD_CONCAT_H
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/linux/pci.h linuxppc64_2_4/include/linux/pci.h
--- ../kernel.org/linux-2.4.19/include/linux/pci.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/linux/pci.h	Mon Apr 22 10:35:36 2002
@@ -416,10 +416,10 @@
 	void		*sysdata;	/* hook for sys-specific extension */
 	struct proc_dir_entry *procdir;	/* directory entry in /proc/bus/pci */
 
-	unsigned char	number;		/* bus number */
-	unsigned char	primary;	/* number of primary bridge */
-	unsigned char	secondary;	/* number of secondary bridge */
-	unsigned char	subordinate;	/* max number of subordinate buses */
+	unsigned int	number;		/* bus number */
+	unsigned int	primary;	/* number of primary bridge */
+	unsigned int	secondary;	/* number of secondary bridge */
+	unsigned int	subordinate;	/* max number of subordinate buses */
 
 	char		name[48];
 	unsigned short	vendor;
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/linux/proc_fs.h linuxppc64_2_4/include/linux/proc_fs.h
--- ../kernel.org/linux-2.4.19/include/linux/proc_fs.h	Fri Apr 19 11:00:39 2002
+++ linuxppc64_2_4/include/linux/proc_fs.h	Mon Apr 22 10:35:36 2002
@@ -25,7 +25,11 @@
 /* Finally, the dynamically allocatable proc entries are reserved: */
 
 #define PROC_DYNAMIC_FIRST 4096
+#ifdef CONFIG_PPC64
+#define PROC_NDYNAMIC      16384
+#else
 #define PROC_NDYNAMIC      4096
+#endif
 
 #define PROC_SUPER_MAGIC 0x9fa0
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/linux/seq_file.h linuxppc64_2_4/include/linux/seq_file.h
--- ../kernel.org/linux-2.4.19/include/linux/seq_file.h	Fri Apr 19 11:00:24 2002
+++ linuxppc64_2_4/include/linux/seq_file.h	Mon Apr 22 10:35:36 2002
@@ -2,6 +2,10 @@
 #define _LINUX_SEQ_FILE_H
 #ifdef __KERNEL__
 
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <asm/semaphore.h>
+
 struct seq_operations;
 
 struct seq_file {
@@ -24,6 +28,7 @@
 
 int seq_open(struct file *, struct seq_operations *);
 ssize_t seq_read(struct file *, char *, size_t, loff_t *);
+ssize_t seq_read_single(struct file *, char *, size_t, loff_t *);
 loff_t seq_lseek(struct file *, loff_t, int);
 int seq_release(struct inode *, struct file *);
 int seq_escape(struct seq_file *, const char *, const char *);
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/linux/vethdevice.h linuxppc64_2_4/include/linux/vethdevice.h
--- ../kernel.org/linux-2.4.19/include/linux/vethdevice.h	Wed Dec 31 18:00:00 1969
+++ linuxppc64_2_4/include/linux/vethdevice.h	Tue Jun 19 11:06:08 2001
@@ -0,0 +1,16 @@
+/* File vethdevice.h created by Kyle A. Lucke on Wed Aug  9 2000. */
+
+/* Change Activity: */
+/* End Change Activity */
+
+#ifndef _LINUX_VETHDEVICE_H
+#define _LINUX_VETHDEVICE_H
+
+#include <linux/etherdevice.h>
+
+#ifdef __KERNEL__
+extern struct net_device	* init_vethdev(struct net_device *, int, int);
+#endif
+
+#endif /* _LINUX_VETHDEVICE_H */
+
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/include/net/bluetooth/sco.h linuxppc64_2_4/include/net/bluetooth/sco.h
--- ../kernel.org/linux-2.4.19/include/net/bluetooth/sco.h	Mon Apr 22 11:34:27 2002
+++ linuxppc64_2_4/include/net/bluetooth/sco.h	Tue Apr 23 09:37:32 2002
@@ -23,7 +23,7 @@
 */
 
 /*
- *  $Id: sco.h,v 1.1.1.1 2002/03/08 21:03:15 maxk Exp $
+ *  $Id: sco.h,v 1.1 2002/04/23 14:37:32 tgall Exp $
  */
 
 #ifndef __SCO_H
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/init/do_mounts.c linuxppc64_2_4/init/do_mounts.c
--- ../kernel.org/linux-2.4.19/init/do_mounts.c	Mon Apr 22 11:34:27 2002
+++ linuxppc64_2_4/init/do_mounts.c	Tue Apr 23 09:37:32 2002
@@ -198,6 +198,7 @@
 	{ "cciss/c0d13p",0x68D0 },
 	{ "cciss/c0d14p",0x68E0 },
 	{ "cciss/c0d15p",0x68F0 },
+
 #endif
 	{ "ataraid/d0p",0x7200 },
 	{ "ataraid/d1p",0x7210 },
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/init/main.c linuxppc64_2_4/init/main.c
--- ../kernel.org/linux-2.4.19/init/main.c	Mon Apr 22 11:34:27 2002
+++ linuxppc64_2_4/init/main.c	Tue Apr 23 09:37:32 2002
@@ -157,7 +157,7 @@
    better than 1% */
 #define LPS_PREC 8
 
-void __init calibrate_delay(void)
+void __init do_calibrate_delay(void)
 {
 	unsigned long ticks, loopbit;
 	int lps_precision = LPS_PREC;
@@ -197,6 +197,8 @@
 		loops_per_jiffy/(500000/HZ),
 		(loops_per_jiffy/(5000/HZ)) % 100);
 }
+
+void (*calibrate_delay)(void) = do_calibrate_delay;
 
 static int __init debug_kernel(char *str)
 {
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/kernel/ptrace.c linuxppc64_2_4/kernel/ptrace.c
--- ../kernel.org/linux-2.4.19/kernel/ptrace.c	Fri Apr 19 11:00:15 2002
+++ linuxppc64_2_4/kernel/ptrace.c	Thu Feb 21 21:00:33 2002
@@ -121,10 +121,114 @@
 }
 
 /*
- * Access another process' address space.
- * Source/target buffer must be kernel space, 
- * Do not walk the page table directly, use get_user_pages
+ * Access another process' address space, one page at a time.
  */
+static int access_one_page(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, void *buf, int len, int write)
+{
+	pgd_t * pgdir;
+	pmd_t * pgmiddle;
+	pte_t * pgtable;
+	char *maddr; 
+	struct page *page;
+
+repeat:
+	spin_lock(&mm->page_table_lock);
+	pgdir = pgd_offset(vma->vm_mm, addr);
+	if (pgd_none(*pgdir))
+		goto fault_in_page;
+	if (pgd_bad(*pgdir))
+		goto bad_pgd;
+	pgmiddle = pmd_offset(pgdir, addr);
+	if (pmd_none(*pgmiddle))
+		goto fault_in_page;
+	if (pmd_bad(*pgmiddle))
+		goto bad_pmd;
+	pgtable = pte_offset(pgmiddle, addr);
+	if (!pte_present(*pgtable))
+		goto fault_in_page;
+	if (write && (!pte_write(*pgtable) || !pte_dirty(*pgtable)))
+		goto fault_in_page;
+	page = pte_page(*pgtable);
+
+	/* ZERO_PAGE is special: reads from it are ok even though it's marked reserved */
+	if (page != ZERO_PAGE(addr) || write) {
+		if ((!VALID_PAGE(page)) || PageReserved(page)) {
+			spin_unlock(&mm->page_table_lock);
+			return 0;
+		}
+	}
+	get_page(page);
+	spin_unlock(&mm->page_table_lock);
+	flush_cache_page(vma, addr);
+
+	if (write) {
+		maddr = kmap(page) + (addr & ~PAGE_MASK);
+		memcpy(maddr, buf, len);
+		flush_page_to_ram(page);
+		flush_icache_range((unsigned long) maddr,
+				   (unsigned long) maddr + len);
+		kunmap(page);
+	} else {
+		maddr = kmap(page);
+		memcpy(buf, maddr + (addr & ~PAGE_MASK), len);
+		flush_page_to_ram(page);
+		kunmap(page);
+	}
+	put_page(page);
+	return len;
+
+fault_in_page:
+	spin_unlock(&mm->page_table_lock);
+	/* -1: out of memory. 0 - unmapped page */
+	if (handle_mm_fault(mm, vma, addr, write) > 0)
+		goto repeat;
+	return 0;
+
+bad_pgd:
+	spin_unlock(&mm->page_table_lock);
+	pgd_ERROR(*pgdir);
+	return 0;
+
+bad_pmd:
+	spin_unlock(&mm->page_table_lock);
+	pmd_ERROR(*pgmiddle);
+	return 0;
+}
+
+static int access_mm(struct mm_struct *mm, struct vm_area_struct * vma, unsigned long addr, void *buf, int len, int write)
+{
+	int copied = 0;
+
+	for (;;) {
+		unsigned long offset = addr & ~PAGE_MASK;
+		int this_len = PAGE_SIZE - offset;
+		int retval;
+
+		if (this_len > len)
+			this_len = len;
+		retval = access_one_page(mm, vma, addr, buf, this_len, write);
+		copied += retval;
+		if (retval != this_len)
+			break;
+
+		len -= retval;
+		if (!len)
+			break;
+
+		addr += retval;
+		buf += retval;
+
+		if (addr < vma->vm_end)
+			continue;	
+		if (!vma->vm_next)
+			break;
+		if (vma->vm_next->vm_start != vma->vm_end)
+			break;
+	
+		vma = vma->vm_next;
+	}
+	return copied;
+}
 
 int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
 {
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/kernel/sched.c linuxppc64_2_4/kernel/sched.c
--- ../kernel.org/linux-2.4.19/kernel/sched.c	Fri Apr 19 11:00:47 2002
+++ linuxppc64_2_4/kernel/sched.c	Mon Apr 22 10:35:37 2002
@@ -1171,10 +1171,13 @@
 	else
 		printk(" (NOTLB)\n");
 
+#if defined(CONFIG_X86) || defined(CONFIG_SPARC64) || defined(CONFIG_ARM) || defined(CONFIG_ALPHA) || defined(CONFIG_PPC64)
+/* This is very useful, but doesn't work on all archs yet */
 	{
 		extern void show_trace_task(struct task_struct *tsk);
 		show_trace_task(p);
 	}
+#endif
 }
 
 char * render_sigset_t(sigset_t *set, char *buffer)
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/kernel/sys.c linuxppc64_2_4/kernel/sys.c
--- ../kernel.org/linux-2.4.19/kernel/sys.c	Fri Apr 19 10:30:02 2002
+++ linuxppc64_2_4/kernel/sys.c	Thu Feb 21 21:00:33 2002
@@ -1272,6 +1272,16 @@
 			}
 			current->keep_capabilities = arg2;
 			break;
+
+#ifdef SET_FP_EXC_MODE
+		case PR_SET_FP_EXC:
+			error = SET_FP_EXC_MODE(current, arg2);
+			break;
+		case PR_GET_FP_EXC:
+			error = GET_FP_EXC_MODE(current);
+			break;
+#endif
+
 		default:
 			error = -EINVAL;
 			break;
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/lib/brlock.c linuxppc64_2_4/lib/brlock.c
--- ../kernel.org/linux-2.4.19/lib/brlock.c	Fri Apr 19 10:30:02 2002
+++ linuxppc64_2_4/lib/brlock.c	Thu Nov 15 00:10:51 2001
@@ -14,6 +14,7 @@
 
 #include <linux/sched.h>
 #include <linux/brlock.h>
+#include <linux/delay.h>
 
 #ifdef __BRLOCK_USE_ATOMICS
 
@@ -54,7 +55,8 @@
 		if (__brlock_array[cpu_logical_map(i)][idx] != 0) {
 			spin_unlock(&__br_write_locks[idx].lock);
 			barrier();
-			cpu_relax();
+			/* We must allow recursive readers to make progress */
+			udelay(1);
 			goto again;
 		}
 }
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/mm/filemap.c linuxppc64_2_4/mm/filemap.c
--- ../kernel.org/linux-2.4.19/mm/filemap.c	Mon Apr 22 11:34:27 2002
+++ linuxppc64_2_4/mm/filemap.c	Tue Apr 23 09:37:32 2002
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/iobuf.h>
+#include <linux/vmalloc.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -3168,7 +3169,7 @@
 			page_hash_bits++;
 
 		page_hash_table = (struct page **)
-			__get_free_pages(GFP_ATOMIC, order);
+			vmalloc(PAGE_SIZE << order);
 	} while(page_hash_table == NULL && --order > 0);
 
 	printk("Page-cache hash table entries: %d (order: %ld, %ld bytes)\n",
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/net/bluetooth/hci_conn.c linuxppc64_2_4/net/bluetooth/hci_conn.c
--- ../kernel.org/linux-2.4.19/net/bluetooth/hci_conn.c	Mon Apr 22 11:34:27 2002
+++ linuxppc64_2_4/net/bluetooth/hci_conn.c	Tue Apr 23 09:37:34 2002
@@ -25,7 +25,7 @@
 /*
  * HCI Connection handling.
  *
- * $Id: hci_conn.c,v 1.1 2002/03/08 21:06:59 maxk Exp $
+ * $Id: hci_conn.c,v 1.1 2002/04/23 14:37:34 tgall Exp $
  */
 
 #include <linux/config.h>
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/net/bluetooth/hci_event.c linuxppc64_2_4/net/bluetooth/hci_event.c
--- ../kernel.org/linux-2.4.19/net/bluetooth/hci_event.c	Mon Apr 22 11:34:27 2002
+++ linuxppc64_2_4/net/bluetooth/hci_event.c	Tue Apr 23 09:37:34 2002
@@ -25,7 +25,7 @@
 /*
  * HCI Events.
  *
- * $Id: hci_event.c,v 1.2 2002/03/26 17:56:44 maxk Exp $
+ * $Id: hci_event.c,v 1.1 2002/04/23 14:37:34 tgall Exp $
  */
 
 #include <linux/config.h>
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/net/bluetooth/l2cap.c linuxppc64_2_4/net/bluetooth/l2cap.c
--- ../kernel.org/linux-2.4.19/net/bluetooth/l2cap.c	Mon Apr 22 11:34:27 2002
+++ linuxppc64_2_4/net/bluetooth/l2cap.c	Tue Apr 23 09:37:34 2002
@@ -25,7 +25,7 @@
 /*
  * BlueZ L2CAP core and sockets.
  *
- * $Id: l2cap.c,v 1.6 2002/04/03 23:34:37 maxk Exp $
+ * $Id: l2cap.c,v 1.1 2002/04/23 14:37:34 tgall Exp $
  */
 #define VERSION "2.0"
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/net/bluetooth/sco.c linuxppc64_2_4/net/bluetooth/sco.c
--- ../kernel.org/linux-2.4.19/net/bluetooth/sco.c	Mon Apr 22 11:34:27 2002
+++ linuxppc64_2_4/net/bluetooth/sco.c	Tue Apr 23 09:37:36 2002
@@ -25,7 +25,7 @@
 /*
  * BlueZ SCO sockets.
  *
- * $Id: sco.c,v 1.2 2002/03/18 19:16:40 maxk Exp $
+ * $Id: sco.c,v 1.1 2002/04/23 14:37:36 tgall Exp $
  */
 #define VERSION "0.2"
 
diff -uNr --exclude=CVS ../kernel.org/linux-2.4.19/net/core/pktgen.c linuxppc64_2_4/net/core/pktgen.c
--- ../kernel.org/linux-2.4.19/net/core/pktgen.c	Fri Apr 19 11:00:24 2002
+++ linuxppc64_2_4/net/core/pktgen.c	Mon Apr 22 13:53:44 2002
@@ -1,4 +1,4 @@
-/* $Id: pktgen.c,v 1.1.2.1 2002/03/01 12:15:05 davem Exp $
+/* $Id: pktgen.c,v 1.1 2002/04/22 18:53:44 tgall Exp $
  * pktgen.c: Packet Generator for performance evaluation.
  *
  * Copyright 2001, 2002 by Robert Olsson <robert.olsson@its.uu.se>