diff -urN linux-2.4.0-test6/Documentation/Configure.help linux-2.4.0-test6-lia/Documentation/Configure.help
--- linux-2.4.0-test6/Documentation/Configure.help	Thu Aug 10 19:56:17 2000
+++ linux-2.4.0-test6-lia/Documentation/Configure.help	Fri Aug 11 17:13:35 2000
@@ -16466,6 +16466,108 @@
   another UltraSPARC-IIi-cEngine boardset with a 7-segment display,
   you should say N to this option. 
 
+IA-64 system type
+CONFIG_IA64_GENERIC
+  This selects the system type of your hardware.  A "generic" kernel
+  will run on any supported IA-64 system.  However, if you configure
+  a kernel for your specific system, it will be faster and smaller.
+
+  To find out what type of IA-64 system you have, you may want to
+  check the IA-64 Linux web site at http://www.linux-ia64.org/.
+  As of the time of this writing, most hardware is DIG compliant,
+  so the "DIG-compliant" option is usually the right choice.
+
+  HP-simulator   For the HP simulator (http://software.hp.com/ia64linux/).
+  SN1-simulator  For the SGI SN1 simulator.
+  DIG-compliant  For DIG ("Developer's Interface Guide") compliant system.
+
+  If you don't know what to do, choose "generic".
+
+Kernel page size
+CONFIG_IA64_PAGE_SIZE_4KB
+
+  This lets you select the page size of the kernel.  For best IA-64
+  performance, a page size of 8KB or 16KB is recommended.  For best
+  IA-32 compatibility, a page size of 4KB should be selected (the vast
+  majority of IA-32 binaries work perfectly fine with a larger page
+  size).  For Itanium systems, do NOT chose a page size larger than
+  16KB.
+
+  4KB                For best IA-32 compatibility
+  8KB                For best IA-64 performance
+  16KB               For best IA-64 performance
+  64KB               Not for Itanium.
+
+  If you don't know what to do, choose 8KB.
+
+Enable Itanium A-step specific code
+CONFIG_ITANIUM_ASTEP_SPECIFIC
+  Select this option to build a kernel for an Itanium prototype system
+  with an A-step CPU.  You have an A-step CPU if the "revision" field in
+  /proc/cpuinfo is 0.
+
+Enable Itanium A1-step specific code
+CONFIG_ITANIUM_A1_SPECIFIC
+  Select this option to build a kernel for an Itanium prototype system
+  with an A1-step CPU.  If you don't know whether you have an A1-step CPU,
+  you probably don't and you can answer "no" here.
+
+Enable Itanium B-step specific code
+CONFIG_ITANIUM_BSTEP_SPECIFIC
+  Select this option to build a kernel for an Itanium prototype system
+  with a B-step CPU.  You have a B-step CPU if the "revision" field in
+  /proc/cpuinfo has a value in the range from 1 to 4.
+
+Enable Itanium B0-step specific code
+CONFIG_ITANIUM_B0_SPECIFIC
+  Select this option to bild a kernel for an Itanium prototype system
+  with a B0-step CPU.  You have a B0-step CPU if the "revision" field in
+  /proc/cpuinfo is 1.
+
+Force interrupt redirection
+CONFIG_IA64_HAVE_IRQREDIR
+  Select this option if you know that your system has the ability to
+  redirect interrupts to different CPUs.  Select N here if you're
+  unsure.
+
+Enable use of global TLB purge instruction (ptc.g)
+CONFIG_ITANIUM_PTCG
+  Say Y here if you want the kernel to use the IA-64 "ptc.g"
+  instruction to flush the TLB on all CPUs.  Select N here if
+  you're unsure.
+
+Enable SoftSDV hacks
+CONFIG_IA64_SOFTSDV_HACKS
+  Say Y here to enable hacks to make the kernel work on the Intel
+  SoftSDV simulator.  Select N here if you're unsure.
+
+Enable AzusA hacks
+CONFIG_IA64_AZUSA_HACKS
+  Say Y here to enable hacks to make the kernel work on the NEC
+  AzusA platform.  Select N here if you're unsure.
+
+Enable IA-64 Machine Check Abort
+CONFIG_IA64_MCA
+  Say Y here to enable machine check support for IA-64.  If you're
+  unsure, answer Y.
+
+Performance monitor support
+CONFIG_PERFMON
+  Selects whether support for the IA-64 performance monitor hardware
+  is included in the kernel.  This makes some kernel data-structures a
+  little bigger and slows down execution a bit, but it is still
+  usually a good idea to turn this on.  If you're unsure, say N.
+
+/proc/pal support
+CONFIG_IA64_PALINFO
+  If you say Y here, you are able to get PAL (Processor Abstraction
+  Layer) information in /proc/pal.  This contains useful information
+  about the processors in your systems, such as cache and TLB sizes
+  and the PAL firmware version in use.
+
+  To use this option, you have to check that the "/proc file system
+  support" (CONFIG_PROC_FS) is enabled, too.
+
 #
 # A couple of things I keep forgetting:
 #   capitalize: AppleTalk, Ethernet, DOS, DMA, FAT, FTP, Internet, 
diff -urN linux-2.4.0-test6/Makefile linux-2.4.0-test6-lia/Makefile
--- linux-2.4.0-test6/Makefile	Thu Aug 10 19:56:18 2000
+++ linux-2.4.0-test6-lia/Makefile	Fri Aug 11 19:02:15 2000
@@ -87,7 +87,7 @@
 
 CPPFLAGS := -D__KERNEL__ -I$(HPATH)
 
-CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer
+CFLAGS := $(CPPFLAGS) -Wall -Wstrict-prototypes -g -O2 -fomit-frame-pointer
 AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS)
 
 #
diff -urN linux-2.4.0-test6/arch/ia64/config.in linux-2.4.0-test6-lia/arch/ia64/config.in
--- linux-2.4.0-test6/arch/ia64/config.in	Wed Aug  2 18:54:01 2000
+++ linux-2.4.0-test6-lia/arch/ia64/config.in	Fri Aug 11 16:59:01 2000
@@ -18,15 +18,16 @@
 comment 'General setup'
 
 define_bool CONFIG_IA64 y
+define_bool CONFIG_SWIOTLB y	# for now...
 
 define_bool CONFIG_ISA n
 define_bool CONFIG_SBUS n
 
 choice 'IA-64 system type'					\
-	"Generic		CONFIG_IA64_GENERIC		\
+	"generic		CONFIG_IA64_GENERIC		\
+	 DIG-compliant		CONFIG_IA64_DIG			\
 	 HP-simulator		CONFIG_IA64_HP_SIM		\
-	 SN1-simulator		CONFIG_IA64_SGI_SN1_SIM		\
-	 DIG-compliant		CONFIG_IA64_DIG" Generic
+	 SN1-simulator		CONFIG_IA64_SGI_SN1_SIM" generic
 
 choice 'Kernel page size'						\
 	"4KB			CONFIG_IA64_PAGE_SIZE_4KB		\
@@ -38,16 +39,18 @@
 	define_bool CONFIG_ITANIUM y
 	define_bool CONFIG_IA64_BRL_EMU y
 	bool '  Enable Itanium A-step specific code' CONFIG_ITANIUM_ASTEP_SPECIFIC
-	bool '  Enable Itanium A1-step specific code' CONFIG_ITANIUM_A1_SPECIFIC
+	if [ "$CONFIG_ITANIUM_ASTEP_SPECIFIC" = "y" ]; then
+	  bool '   Enable Itanium A1-step specific code' CONFIG_ITANIUM_A1_SPECIFIC
+	fi
+	bool '  Enable Itanium B-step specific code' CONFIG_ITANIUM_BSTEP_SPECIFIC
+	if [ "$CONFIG_ITANIUM_BSTEP_SPECIFIC" = "y" ]; then
+	  bool '   Enable Itanium B0-step specific code' CONFIG_ITANIUM_B0_SPECIFIC
+	fi
+	bool '  Force interrupt redirection' CONFIG_IA64_HAVE_IRQREDIR
 	bool '  Enable use of global TLB purge instruction (ptc.g)' CONFIG_ITANIUM_PTCG
 	bool '  Enable SoftSDV hacks' CONFIG_IA64_SOFTSDV_HACKS
 	bool '  Enable AzusA hacks' CONFIG_IA64_AZUSA_HACKS
-	bool '  Emulate PAL/SAL/EFI firmware' CONFIG_IA64_FW_EMU
-	bool '  Enable IA64 Machine Check Abort' CONFIG_IA64_MCA
-fi
-
-if [ "$CONFIG_IA64_GENERIC" = "y" ]; then
-	define_bool CONFIG_IA64_SOFTSDV_HACKS y
+	bool '  Enable IA-64 Machine Check Abort' CONFIG_IA64_MCA
 fi
 
 if [ "$CONFIG_IA64_SGI_SN1_SIM" = "y" ]; then
@@ -59,7 +62,7 @@
 
 bool 'SMP support' CONFIG_SMP
 bool 'Performance monitor support' CONFIG_PERFMON
-bool '/proc/palinfo support' CONFIG_IA64_PALINFO
+bool '/proc/pal support' CONFIG_IA64_PALINFO
 
 bool 'Networking support' CONFIG_NET
 bool 'System V IPC' CONFIG_SYSVIPC
@@ -162,8 +165,6 @@
 #source drivers/misc/Config.in
 
 source fs/Config.in
-
-source fs/nls/Config.in
 
 if [ "$CONFIG_VT" = "y" ]; then
   mainmenu_option next_comment
diff -urN linux-2.4.0-test6/arch/ia64/dig/iosapic.c linux-2.4.0-test6-lia/arch/ia64/dig/iosapic.c
--- linux-2.4.0-test6/arch/ia64/dig/iosapic.c	Thu Jun 22 07:09:44 2000
+++ linux-2.4.0-test6-lia/arch/ia64/dig/iosapic.c	Mon Jul 31 14:01:22 2000
@@ -22,12 +22,14 @@
 #include <linux/string.h>
 #include <linux/irq.h>
 
+#include <asm/acpi-ext.h>
+#include <asm/delay.h>
 #include <asm/io.h>
 #include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
-#include <asm/delay.h>
-#include <asm/processor.h>
 
 #undef DEBUG_IRQ_ROUTING
 
@@ -315,10 +317,6 @@
 	 */
 	outb(0xff, 0xA1);
 	outb(0xff, 0x21);
-
-#ifndef CONFIG_IA64_DIG
-	iosapic_init(IO_SAPIC_DEFAULT_ADDR);
-#endif
 }
 
 void
@@ -337,15 +335,23 @@
 			if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
 				struct pci_dev * bridge = dev->bus->self;
 
-				/* do the bridge swizzle... */
-				pin = (pin + PCI_SLOT(dev->devfn)) % 4;
-				irq = iosapic_get_PCI_irq_vector(bridge->bus->number,
-								 PCI_SLOT(bridge->devfn), pin);
+				/* allow for multiple bridges on an adapter */
+				do {
+					/* do the bridge swizzle... */
+					pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+					irq = iosapic_get_PCI_irq_vector(bridge->bus->number,
+									 PCI_SLOT(bridge->devfn), pin);
+				} while (irq < 0 && (bridge = bridge->bus->self));
 				if (irq >= 0)
 					printk(KERN_WARNING
 					       "PCI: using PPB(B%d,I%d,P%d) to get irq %02x\n",
 					       bridge->bus->number, PCI_SLOT(bridge->devfn),
 					       pin, irq);
+				else
+					printk(KERN_WARNING
+					       "PCI: Couldn't map irq for B%d,I%d,P%d\n",
+					       bridge->bus->number, PCI_SLOT(bridge->devfn),
+					       pin);
 			}
 			if (irq >= 0) {
 				printk("PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %02x\n",
@@ -360,4 +366,35 @@
 		if (dev->irq >= NR_IRQS)
 			dev->irq = 15;	/* Spurious interrupts */
 	}
+}
+
+/*
+ * Register an IOSAPIC discovered via ACPI.
+ */
+void __init
+dig_register_iosapic (acpi_entry_iosapic_t *iosapic)
+{
+	unsigned int ver, v;
+	int l, max_pin;
+
+	ver = iosapic_version(iosapic->address);
+	max_pin = (ver >> 16) & 0xff;
+	
+	printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n", 
+	       (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address, 
+	       iosapic->irq_base, iosapic->irq_base + max_pin);
+	
+	for (l = 0; l <= max_pin; l++) {
+		v = iosapic->irq_base + l;
+		if (v < 16)
+			v = isa_irq_to_vector(v);
+		if (v > IA64_MAX_VECTORED_IRQ) {
+			printk("    !!! bad IOSAPIC interrupt vector: %u\n", v);
+			continue;
+		}
+		/* XXX Check for IOSAPIC collisions */
+		iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0);
+		iosapic_baseirq(v) = iosapic->irq_base;
+	}
+	iosapic_init(iosapic->address, iosapic->irq_base);
 }
diff -urN linux-2.4.0-test6/arch/ia64/dig/machvec.c linux-2.4.0-test6-lia/arch/ia64/dig/machvec.c
--- linux-2.4.0-test6/arch/ia64/dig/machvec.c	Sun Feb  6 18:42:40 2000
+++ linux-2.4.0-test6-lia/arch/ia64/dig/machvec.c	Mon Jul 31 14:01:22 2000
@@ -1,4 +1,2 @@
+#define MACHVEC_PLATFORM_NAME	dig
 #include <asm/machvec_init.h>
-#include <asm/machvec_dig.h>
-
-MACHVEC_DEFINE(dig)
diff -urN linux-2.4.0-test6/arch/ia64/dig/setup.c linux-2.4.0-test6-lia/arch/ia64/dig/setup.c
--- linux-2.4.0-test6/arch/ia64/dig/setup.c	Wed Aug  2 18:54:01 2000
+++ linux-2.4.0-test6-lia/arch/ia64/dig/setup.c	Fri Aug 11 16:58:37 2000
@@ -24,10 +24,6 @@
 #include <asm/machvec.h>
 #include <asm/system.h>
 
-#ifdef CONFIG_IA64_FW_EMU
-# include "../../kernel/fw-emu.c"
-#endif
-
 /*
  * This is here so we can use the CMOS detection in ide-probe.c to
  * determine what drives are present.  In theory, we don't need this
diff -urN linux-2.4.0-test6/arch/ia64/hp/hpsim_machvec.c linux-2.4.0-test6-lia/arch/ia64/hp/hpsim_machvec.c
--- linux-2.4.0-test6/arch/ia64/hp/hpsim_machvec.c	Sun Feb  6 18:42:40 2000
+++ linux-2.4.0-test6-lia/arch/ia64/hp/hpsim_machvec.c	Mon Jul 31 14:01:22 2000
@@ -1,4 +1,2 @@
+#define MACHVEC_PLATFORM_NAME	hpsim
 #include <asm/machvec_init.h>
-#include <asm/machvec_hpsim.h>
-
-MACHVEC_DEFINE(hpsim)
diff -urN linux-2.4.0-test6/arch/ia64/ia32/ia32_entry.S linux-2.4.0-test6-lia/arch/ia64/ia32/ia32_entry.S
--- linux-2.4.0-test6/arch/ia64/ia32/ia32_entry.S	Wed Aug  2 18:54:01 2000
+++ linux-2.4.0-test6-lia/arch/ia64/ia32/ia32_entry.S	Wed Aug  2 12:32:26 2000
@@ -73,7 +73,7 @@
 END(ia32_trace_syscall)
 
 GLOBAL_ENTRY(sys32_vfork)
-	alloc r16=ar.pfs,2,2,3,0;;
+	alloc r16=ar.pfs,2,2,4,0;;
 	mov out0=IA64_CLONE_VFORK|IA64_CLONE_VM|SIGCHLD	// out0 = clone_flags
 	br.cond.sptk.few .fork1			// do the work
 END(sys32_vfork)
@@ -105,7 +105,7 @@
 	.align 8
 	.globl ia32_syscall_table
 ia32_syscall_table:	
-	data8 sys_ni_syscall	  /* 0	-  old "setup(" system call*/
+	data8 sys32_ni_syscall	  /* 0	-  old "setup(" system call*/
 	data8 sys_exit
 	data8 sys32_fork
 	data8 sys_read
@@ -122,25 +122,25 @@
 	data8 sys_mknod
 	data8 sys_chmod		  /* 15 */
 	data8 sys_lchown
-	data8 sys_ni_syscall	  /* old break syscall holder */
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall	  /* old break syscall holder */
+	data8 sys32_ni_syscall
 	data8 sys_lseek
 	data8 sys_getpid	  /* 20 */
 	data8 sys_mount
 	data8 sys_oldumount
 	data8 sys_setuid
 	data8 sys_getuid
-	data8 sys_ni_syscall /* sys_stime is not supported on IA64 */  /* 25 */
+	data8 sys32_ni_syscall /* sys_stime is not supported on IA64 */  /* 25 */
 	data8 sys32_ptrace
 	data8 sys32_alarm
-	data8 sys_ni_syscall
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall
+	data8 sys32_ni_syscall
 	data8 ia32_utime	  /* 30 */
-	data8 sys_ni_syscall	  /* old stty syscall holder */
-	data8 sys_ni_syscall	  /* old gtty syscall holder */
+	data8 sys32_ni_syscall	  /* old stty syscall holder */
+	data8 sys32_ni_syscall	  /* old gtty syscall holder */
 	data8 sys_access
 	data8 sys_nice
-	data8 sys_ni_syscall	  /* 35 */	  /* old ftime syscall holder */
+	data8 sys32_ni_syscall	  /* 35 */	  /* old ftime syscall holder */
 	data8 sys_sync
 	data8 sys_kill
 	data8 sys_rename
@@ -149,22 +149,22 @@
 	data8 sys_dup
 	data8 sys32_pipe
 	data8 sys32_times
-	data8 sys_ni_syscall	  /* old prof syscall holder */
+	data8 sys32_ni_syscall	  /* old prof syscall holder */
 	data8 sys_brk		  /* 45 */
 	data8 sys_setgid
 	data8 sys_getgid
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall
 	data8 sys_geteuid
 	data8 sys_getegid	  /* 50 */
 	data8 sys_acct
 	data8 sys_umount	  /* recycled never used phys( */
-	data8 sys_ni_syscall	  /* old lock syscall holder */
+	data8 sys32_ni_syscall	  /* old lock syscall holder */
 	data8 ia32_ioctl
-	data8 sys_fcntl		  /* 55 */
-	data8 sys_ni_syscall	  /* old mpx syscall holder */
+	data8 sys32_fcntl	  /* 55 */
+	data8 sys32_ni_syscall	  /* old mpx syscall holder */
 	data8 sys_setpgid
-	data8 sys_ni_syscall	  /* old ulimit syscall holder */
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall	  /* old ulimit syscall holder */
+	data8 sys32_ni_syscall
 	data8 sys_umask		  /* 60 */
 	data8 sys_chroot
 	data8 sys_ustat
@@ -172,12 +172,12 @@
 	data8 sys_getppid
 	data8 sys_getpgrp	  /* 65 */
 	data8 sys_setsid
-	data8 sys_ni_syscall
-	data8 sys_ni_syscall
-	data8 sys_ni_syscall
+	data8 sys32_sigaction
+	data8 sys32_ni_syscall
+	data8 sys32_ni_syscall
 	data8 sys_setreuid	  /* 70 */
 	data8 sys_setregid
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall
 	data8 sys_sigpending
 	data8 sys_sethostname
 	data8 sys32_setrlimit	  /* 75 */
@@ -189,7 +189,7 @@
 	data8 sys_setgroups
 	data8 old_select
 	data8 sys_symlink
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall
 	data8 sys_readlink	  /* 85 */
 	data8 sys_uselib
 	data8 sys_swapon
@@ -203,7 +203,7 @@
 	data8 sys_fchown	  /* 95 */
 	data8 sys_getpriority
 	data8 sys_setpriority
-	data8 sys_ni_syscall	  /* old profil syscall holder */
+	data8 sys32_ni_syscall	  /* old profil syscall holder */
 	data8 sys32_statfs
 	data8 sys32_fstatfs	  /* 100 */
 	data8 sys_ioperm
@@ -214,11 +214,11 @@
 	data8 sys32_newstat
 	data8 sys32_newlstat
 	data8 sys32_newfstat
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall
 	data8 sys_iopl		  /* 110 */
 	data8 sys_vhangup
-	data8 sys_ni_syscall		// used to be sys_idle
-	data8 sys_ni_syscall
+	data8 sys32_ni_syscall		// used to be sys_idle
+	data8 sys32_ni_syscall
 	data8 sys32_wait4
 	data8 sys_swapoff	  /* 115 */
 	data8 sys_sysinfo
@@ -242,7 +242,7 @@
 	data8 sys_bdflush
 	data8 sys_sysfs		  /* 135 */
 	data8 sys_personality
-	data8 sys_ni_syscall	  /* for afs_syscall */
+	data8 sys32_ni_syscall	  /* for afs_syscall */
 	data8 sys_setfsuid
 	data8 sys_setfsgid
 	data8 sys_llseek	  /* 140 */
@@ -293,8 +293,8 @@
 	data8 sys_capset	  /* 185 */
 	data8 sys_sigaltstack
 	data8 sys_sendfile
-	data8 sys_ni_syscall		  /* streams1 */
-	data8 sys_ni_syscall		  /* streams2 */
+	data8 sys32_ni_syscall		  /* streams1 */
+	data8 sys32_ni_syscall		  /* streams2 */
 	data8 sys32_vfork	  /* 190 */
 	/*
 	 *  CAUTION: If any system calls are added beyond this point
diff -urN linux-2.4.0-test6/arch/ia64/ia32/sys_ia32.c linux-2.4.0-test6-lia/arch/ia64/ia32/sys_ia32.c
--- linux-2.4.0-test6/arch/ia64/ia32/sys_ia32.c	Wed Aug  2 18:54:01 2000
+++ linux-2.4.0-test6-lia/arch/ia64/ia32/sys_ia32.c	Mon Jul 31 14:01:22 2000
@@ -74,10 +74,14 @@
 
 	n = 0;
 	do {
-		if ((err = get_user(addr, (int *)A(arg))) != 0)
-			return(err);
-		if (ap)
-			*ap++ = (char *)A(addr);
+		err = get_user(addr, (int *)A(arg));
+		if (IS_ERR(err))
+			return err;
+		if (ap) {		/* no access_ok needed, we allocated */
+			err = __put_user((char *)A(addr), ap++);
+			if (IS_ERR(err))
+				return err;
+		}
 		arg += sizeof(unsigned int);
 		n++;
 	} while (addr);
@@ -101,7 +105,11 @@
 	int na, ne, r, len;
 
 	na = nargs(argv, NULL);
+	if (IS_ERR(na))
+		return(na);
 	ne = nargs(envp, NULL);
+	if (IS_ERR(ne))
+		return(ne);
 	len = (na + ne + 2) * sizeof(*av);
 	/*
 	 *  kmalloc won't work because the `sys_exec' code will attempt
@@ -121,12 +129,21 @@
 	if (IS_ERR(av))
 		return (long)av;
 	ae = av + na + 1;
-	av[na] = (char *)0;
-	ae[ne] = (char *)0;
-	(void)nargs(argv, av);
-	(void)nargs(envp, ae);
+	r = __put_user(0, (av + na));
+	if (IS_ERR(r))
+		goto out;
+	r = __put_user(0, (ae + ne));
+	if (IS_ERR(r))
+		goto out;
+	r = nargs(argv, av);
+	if (IS_ERR(r))
+		goto out;
+	r = nargs(envp, ae);
+	if (IS_ERR(r))
+		goto out;
 	r = sys_execve(filename, av, ae, regs);
 	if (IS_ERR(r))
+out:
 		sys_munmap((unsigned long) av, len);
 	return(r);
 }
@@ -959,150 +976,85 @@
 }
 
 struct iovec32 { unsigned int iov_base; int iov_len; };
+asmlinkage ssize_t sys_readv(unsigned long,const struct iovec *,unsigned long);
+asmlinkage ssize_t sys_writev(unsigned long,const struct iovec *,unsigned long);
 
-typedef ssize_t (*IO_fn_t)(struct file *, char *, size_t, loff_t *);
-
-static long
-do_readv_writev32(int type, struct file *file, const struct iovec32 *vector,
-		  u32 count)
+static struct iovec *
+get_iovec32(struct iovec32 *iov32, struct iovec *iov_buf, u32 count, int type)
 {
-	unsigned long tot_len;
-	struct iovec iovstack[UIO_FASTIOV];
-	struct iovec *iov=iovstack, *ivp;
-	struct inode *inode;
-	long retval, i;
-	IO_fn_t fn;
+	int i;
+	u32 buf, len;
+	struct iovec *ivp, *iov;
+
+	/* Get the "struct iovec" from user memory */
 
-	/* First get the "struct iovec" from user memory and
-	 * verify all the pointers
-	 */
 	if (!count)
 		return 0;
-	if(verify_area(VERIFY_READ, vector, sizeof(struct iovec32)*count))
-		return -EFAULT;
+	if(verify_area(VERIFY_READ, iov32, sizeof(struct iovec32)*count))
+		return(struct iovec *)0;
 	if (count > UIO_MAXIOV)
-		return -EINVAL;
+		return(struct iovec *)0;
 	if (count > UIO_FASTIOV) {
 		iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL);
 		if (!iov)
-			return -ENOMEM;
-	}
+			return((struct iovec *)0);
+	} else
+		iov = iov_buf;
 
-	tot_len = 0;
-	i = count;
 	ivp = iov;
-	while(i > 0) {
-		u32 len;
-		u32 buf;
-
-		__get_user(len, &vector->iov_len);
-		__get_user(buf, &vector->iov_base);
-		tot_len += len;
+	for (i = 0; i < count; i++) {
+		if (__get_user(len, &iov32->iov_len) ||
+		    __get_user(buf, &iov32->iov_base)) {
+			if (iov != iov_buf)
+				kfree(iov);
+			return((struct iovec *)0);
+		}
+		if (verify_area(type, (void *)A(buf), len)) {
+			if (iov != iov_buf)
+				kfree(iov);
+			return((struct iovec *)0);
+		}
 		ivp->iov_base = (void *)A(buf);
-		ivp->iov_len = (__kernel_size_t) len;
-		vector++;
-		ivp++;
-		i--;
-	}
-
-	inode = file->f_dentry->d_inode;
-	/* VERIFY_WRITE actually means a read, as we write to user space */
-	retval = locks_verify_area((type == VERIFY_WRITE
-				    ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE),
-				   inode, file, file->f_pos, tot_len);
-	if (retval) {
-		if (iov != iovstack)
-			kfree(iov);
-		return retval;
-	}
-
-	/* Then do the actual IO.  Note that sockets need to be handled
-	 * specially as they have atomicity guarantees and can handle
-	 * iovec's natively
-	 */
-	if (inode->i_sock) {
-		int err;
-		err = sock_readv_writev(type, inode, file, iov, count, tot_len);
-		if (iov != iovstack)
-			kfree(iov);
-		return err;
-	}
-
-	if (!file->f_op) {
-		if (iov != iovstack)
-			kfree(iov);
-		return -EINVAL;
-	}
-	/* VERIFY_WRITE actually means a read, as we write to user space */
-	fn = file->f_op->read;
-	if (type == VERIFY_READ)
-		fn = (IO_fn_t) file->f_op->write;		
-	ivp = iov;
-	while (count > 0) {
-		void * base;
-		int len, nr;
-
-		base = ivp->iov_base;
-		len = ivp->iov_len;
+		ivp->iov_len = (__kernel_size_t)len;
+		iov32++;
 		ivp++;
-		count--;
-		nr = fn(file, base, len, &file->f_pos);
-		if (nr < 0) {
-			if (retval)
-				break;
-			retval = nr;
-			break;
-		}
-		retval += nr;
-		if (nr != len)
-			break;
 	}
-	if (iov != iovstack)
-		kfree(iov);
-	return retval;
+	return(iov);
 }
 
 asmlinkage long
 sys32_readv(int fd, struct iovec32 *vector, u32 count)
 {
-	struct file *file;
-	long ret = -EBADF;
-
-	file = fget(fd);
-	if(!file)
-		goto bad_file;
-
-	if(!(file->f_mode & 1))
-		goto out;
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov;
+	int ret;
+	mm_segment_t old_fs = get_fs();
 
-	ret = do_readv_writev32(VERIFY_WRITE, file,
-				vector, count);
-out:
-	fput(file);
-bad_file:
+	if ((iov = get_iovec32(vector, iovstack, count, VERIFY_WRITE)) == (struct iovec *)0)
+		return -EFAULT;
+	set_fs(KERNEL_DS);
+	ret = sys_readv(fd, iov, count);
+	set_fs(old_fs);
+	if (iov != iovstack)
+		kfree(iov);
 	return ret;
 }
 
 asmlinkage long
 sys32_writev(int fd, struct iovec32 *vector, u32 count)
 {
-	struct file *file;
-	int ret = -EBADF;
-
-	file = fget(fd);
-	if(!file)
-		goto bad_file;
-
-	if(!(file->f_mode & 2))
-		goto out;
+	struct iovec iovstack[UIO_FASTIOV];
+	struct iovec *iov;
+	int ret;
+	mm_segment_t old_fs = get_fs();
 
-	down(&file->f_dentry->d_inode->i_sem);
-	ret = do_readv_writev32(VERIFY_READ, file,
-				vector, count);
-	up(&file->f_dentry->d_inode->i_sem);
-out:
-	fput(file);
-bad_file:
+	if ((iov = get_iovec32(vector, iovstack, count, VERIFY_READ)) == (struct iovec *)0)
+		return -EFAULT;
+	set_fs(KERNEL_DS);
+	ret = sys_writev(fd, iov, count);
+	set_fs(old_fs);
+	if (iov != iovstack)
+		kfree(iov);
 	return ret;
 }
 
@@ -1173,21 +1125,22 @@
 static inline int
 shape_msg(struct msghdr *mp, struct msghdr32 *mp32)
 {
+	int ret;
 	unsigned int i;
 
 	if (!access_ok(VERIFY_READ, mp32, sizeof(*mp32)))
 		return(-EFAULT);
-	__get_user(i, &mp32->msg_name);
+	ret = __get_user(i, &mp32->msg_name);
 	mp->msg_name = (void *)A(i);
-	__get_user(mp->msg_namelen, &mp32->msg_namelen);
-	__get_user(i, &mp32->msg_iov);
+	ret |= __get_user(mp->msg_namelen, &mp32->msg_namelen);
+	ret |= __get_user(i, &mp32->msg_iov);
 	mp->msg_iov = (struct iovec *)A(i);
-	__get_user(mp->msg_iovlen, &mp32->msg_iovlen);
-	__get_user(i, &mp32->msg_control);
+	ret |= __get_user(mp->msg_iovlen, &mp32->msg_iovlen);
+	ret |= __get_user(i, &mp32->msg_control);
 	mp->msg_control = (void *)A(i);
-	__get_user(mp->msg_controllen, &mp32->msg_controllen);
-	__get_user(mp->msg_flags, &mp32->msg_flags);
-	return(0);
+	ret |= __get_user(mp->msg_controllen, &mp32->msg_controllen);
+	ret |= __get_user(mp->msg_flags, &mp32->msg_flags);
+	return(ret ? -EFAULT : 0);
 }
 
 /*
@@ -2341,17 +2294,17 @@
 {
 	struct switch_stack *swp;
 	struct pt_regs *ptp;
-	int i, tos;
+	int i, tos, ret;
 	int fsrlo, fsrhi;
 
 	if (!access_ok(VERIFY_READ, save, sizeof(*save)))
 		return(-EIO);
-	__get_user(tsk->thread.fcr, (unsigned int *)&save->cw);
-	__get_user(fsrlo, (unsigned int *)&save->sw);
-	__get_user(fsrhi, (unsigned int *)&save->tag);
+	ret = __get_user(tsk->thread.fcr, (unsigned int *)&save->cw);
+	ret |= __get_user(fsrlo, (unsigned int *)&save->sw);
+	ret |= __get_user(fsrhi, (unsigned int *)&save->tag);
 	tsk->thread.fsr = ((long)fsrhi << 32) | (long)fsrlo;
-	__get_user(tsk->thread.fir, (unsigned int *)&save->ipoff);
-	__get_user(tsk->thread.fdr, (unsigned int *)&save->dataoff);
+	ret |= __get_user(tsk->thread.fir, (unsigned int *)&save->ipoff);
+	ret |= __get_user(tsk->thread.fdr, (unsigned int *)&save->dataoff);
 	/*
 	 *  Stack frames start with 16-bytes of temp space
 	 */
@@ -2360,7 +2313,7 @@
 	tos = (tsk->thread.fsr >> 11) & 3;
 	for (i = 0; i < 8; i++)
 		get_fpreg(i, &save->_st[i], ptp, swp, tos);
-	return(0);
+	return(ret ? -EFAULT : 0);
 }
 
 asmlinkage long sys_ptrace(long, pid_t, unsigned long, unsigned long, long, long, long, long, long);
@@ -2492,6 +2445,105 @@
 	return ret;
 }
 
+static inline int
+get_flock32(struct flock *kfl, struct flock32 *ufl)
+{
+	int err;
+	
+	err = get_user(kfl->l_type, &ufl->l_type);
+	err |= __get_user(kfl->l_whence, &ufl->l_whence);
+	err |= __get_user(kfl->l_start, &ufl->l_start);
+	err |= __get_user(kfl->l_len, &ufl->l_len);
+	err |= __get_user(kfl->l_pid, &ufl->l_pid);
+	return err;
+}
+
+static inline int
+put_flock32(struct flock *kfl, struct flock32 *ufl)
+{
+	int err;
+	
+	err = __put_user(kfl->l_type, &ufl->l_type);
+	err |= __put_user(kfl->l_whence, &ufl->l_whence);
+	err |= __put_user(kfl->l_start, &ufl->l_start);
+	err |= __put_user(kfl->l_len, &ufl->l_len);
+	err |= __put_user(kfl->l_pid, &ufl->l_pid);
+	return err;
+}
+
+extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd,
+				 unsigned long arg);
+
+asmlinkage long
+sys32_fcntl(unsigned int fd, unsigned int cmd, int arg)
+{
+	struct flock f;
+	mm_segment_t old_fs;
+	long ret;
+
+	switch (cmd) {
+	case F_GETLK:
+	case F_SETLK:
+	case F_SETLKW:
+		if(cmd != F_GETLK && get_flock32(&f, (struct flock32 *)((long)arg)))
+			return -EFAULT;
+		old_fs = get_fs();
+		set_fs(KERNEL_DS);
+		ret = sys_fcntl(fd, cmd, (unsigned long)&f);
+		set_fs(old_fs);
+		if(cmd == F_GETLK && put_flock32(&f, (struct flock32 *)((long)arg)))
+			return -EFAULT;
+		return ret;
+	default:
+		/*
+		 *  `sys_fcntl' lies about arg, for the F_SETOWN
+		 *  sub-function arg can have a negative value.
+		 */
+		return sys_fcntl(fd, cmd, (unsigned long)((long)arg));
+	}
+}
+
+asmlinkage long
+sys32_sigaction (int sig, struct old_sigaction32 *act, struct old_sigaction32 *oact)
+{
+        struct k_sigaction new_ka, old_ka;
+        int ret;
+
+        if (act) {
+		old_sigset32_t mask;
+		
+		ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler);
+		ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		ret |= __get_user(mask, &act->sa_mask);
+		if (ret)
+			return ret;
+		siginitset(&new_ka.sa.sa_mask, mask);
+        }
+
+        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler);
+		ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		ret |= __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+        }
+
+	return ret;
+}
+
+asmlinkage long sys_ni_syscall(void);
+
+asmlinkage long
+sys32_ni_syscall(int dummy0, int dummy1, int dummy2, int dummy3,
+	int dummy4, int dummy5, int dummy6, int dummy7, int stack)
+{
+	struct pt_regs *regs = (struct pt_regs *)&stack;
+
+	printk("IA32 syscall #%d issued, maybe we should implement it\n",
+		(int)regs->r1);
+	return(sys_ni_syscall());
+}
+
 #ifdef	NOTYET  /* UNTESTED FOR IA64 FROM HERE DOWN */
 
 /* In order to reduce some races, while at the same time doing additional
@@ -2545,61 +2597,6 @@
 	return sys_ioperm((unsigned long)from, (unsigned long)num, on);
 }
 
-static inline int
-get_flock(struct flock *kfl, struct flock32 *ufl)
-{
-	int err;
-	
-	err = get_user(kfl->l_type, &ufl->l_type);
-	err |= __get_user(kfl->l_whence, &ufl->l_whence);
-	err |= __get_user(kfl->l_start, &ufl->l_start);
-	err |= __get_user(kfl->l_len, &ufl->l_len);
-	err |= __get_user(kfl->l_pid, &ufl->l_pid);
-	return err;
-}
-
-static inline int
-put_flock(struct flock *kfl, struct flock32 *ufl)
-{
-	int err;
-	
-	err = __put_user(kfl->l_type, &ufl->l_type);
-	err |= __put_user(kfl->l_whence, &ufl->l_whence);
-	err |= __put_user(kfl->l_start, &ufl->l_start);
-	err |= __put_user(kfl->l_len, &ufl->l_len);
-	err |= __put_user(kfl->l_pid, &ufl->l_pid);
-	return err;
-}
-
-extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd,
-				 unsigned long arg);
-
-asmlinkage long
-sys32_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case F_GETLK:
-	case F_SETLK:
-	case F_SETLKW:
-		{
-			struct flock f;
-			mm_segment_t old_fs;
-			long ret;
-			
-			if(get_flock(&f, (struct flock32 *)arg))
-				return -EFAULT;
-			old_fs = get_fs(); set_fs (KERNEL_DS);
-			ret = sys_fcntl(fd, cmd, (unsigned long)&f);
-			set_fs (old_fs);
-			if(put_flock(&f, (struct flock32 *)arg))
-				return -EFAULT;
-			return ret;
-		}
-	default:
-		return sys_fcntl(fd, cmd, (unsigned long)arg);
-	}
-}
-
 struct dqblk32 {
     __u32 dqb_bhardlimit;
     __u32 dqb_bsoftlimit;
@@ -3861,40 +3858,6 @@
 }
 
 extern void check_pending(int signum);
-
-asmlinkage long
-sys32_sigaction (int sig, struct old_sigaction32 *act,
-		 struct old_sigaction32 *oact)
-{
-        struct k_sigaction new_ka, old_ka;
-        int ret;
-
-	if(sig < 0) {
-		current->tss.new_signal = 1;
-		sig = -sig;
-	}
-
-        if (act) {
-		old_sigset_t32 mask;
-		
-		ret = get_user((long)new_ka.sa.sa_handler, &act->sa_handler);
-		ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
-		ret |= __get_user(mask, &act->sa_mask);
-		if (ret)
-			return ret;
-		siginitset(&new_ka.sa.sa_mask, mask);
-        }
-
-        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
-
-	if (!ret && oact) {
-		ret = put_user((long)old_ka.sa.sa_handler, &oact->sa_handler);
-		ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
-		ret |= __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
-        }
-
-	return ret;
-}
 
 #ifdef CONFIG_MODULES
 
diff -urN linux-2.4.0-test6/arch/ia64/kernel/Makefile linux-2.4.0-test6-lia/arch/ia64/kernel/Makefile
--- linux-2.4.0-test6/arch/ia64/kernel/Makefile	Wed Aug  2 18:54:01 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/Makefile	Wed Aug  2 18:57:03 2000
@@ -9,8 +9,8 @@
 
 all: kernel.o head.o init_task.o
 
-obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o	\
-	 pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o		\
+obj-y := acpi.o entry.o gate.o efi.o efi_stub.o irq.o irq_ia64.o irq_sapic.o ivt.o		\
+	 machvec.o pal.o pci-dma.o process.o perfmon.o ptrace.o sal.o semaphore.o setup.o	\
 	 signal.o sys_ia64.o traps.o time.o unaligned.o unwind.o
 
 obj-$(CONFIG_IA64_GENERIC) += machvec.o
diff -urN linux-2.4.0-test6/arch/ia64/kernel/acpi.c linux-2.4.0-test6-lia/arch/ia64/kernel/acpi.c
--- linux-2.4.0-test6/arch/ia64/kernel/acpi.c	Thu Jun 22 07:09:44 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/acpi.c	Mon Jul 31 14:01:22 2000
@@ -19,10 +19,11 @@
 #include <linux/irq.h>
 
 #include <asm/acpi-ext.h>
-#include <asm/page.h>
 #include <asm/efi.h>
 #include <asm/io.h>
 #include <asm/iosapic.h>
+#include <asm/machvec.h>
+#include <asm/page.h>
 
 #undef ACPI_DEBUG		/* Guess what this does? */
 
@@ -75,47 +76,6 @@
 }
 
 /*
- * Find all IOSAPICs and tag the iosapic_vector structure with the appropriate 
- * base addresses.
- */
-static void __init
-acpi_iosapic(char *p) 
-{
-	/*
-	 * This is not good.  ACPI is not necessarily limited to CONFIG_IA64_SV, yet
-	 * ACPI does not necessarily imply IOSAPIC either.  Perhaps there should be
-	 * a means for platform_setup() to register ACPI handlers?
-	 */
-#ifdef CONFIG_IA64_DIG
-	acpi_entry_iosapic_t *iosapic = (acpi_entry_iosapic_t *) p;
-	unsigned int ver, v;
-	int l, max_pin;
-
-	ver = iosapic_version(iosapic->address);
-	max_pin = (ver >> 16) & 0xff;
-	
-	printk("IOSAPIC Version %x.%x: address 0x%lx IRQs 0x%x - 0x%x\n", 
-	       (ver & 0xf0) >> 4, (ver & 0x0f), iosapic->address, 
-	       iosapic->irq_base, iosapic->irq_base + max_pin);
-	
-	for (l = 0; l <= max_pin; l++) {
-		v = iosapic->irq_base + l;
-		if (v < 16)
-			v = isa_irq_to_vector(v);
-		if (v > IA64_MAX_VECTORED_IRQ) {
-			printk("    !!! bad IOSAPIC interrupt vector: %u\n", v);
-			continue;
-		}
-		/* XXX Check for IOSAPIC collisions */
-		iosapic_addr(v) = (unsigned long) ioremap(iosapic->address, 0);
-		iosapic_baseirq(v) = iosapic->irq_base;
-	}
-	iosapic_init(iosapic->address, iosapic->irq_base);
-#endif
-}
-
-
-/*
  * Configure legacy IRQ information in iosapic_vector
  */
 static void __init
@@ -227,7 +187,7 @@
 			break;
 	
 		case ACPI_ENTRY_IO_SAPIC:
-			acpi_iosapic(p);
+			platform_register_iosapic((acpi_entry_iosapic_t *) p);
 			break;
 
 		case ACPI_ENTRY_INT_SRC_OVERRIDE:
diff -urN linux-2.4.0-test6/arch/ia64/kernel/efi.c linux-2.4.0-test6-lia/arch/ia64/kernel/efi.c
--- linux-2.4.0-test6/arch/ia64/kernel/efi.c	Thu Jun 22 07:09:44 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/efi.c	Fri Aug 11 18:01:55 2000
@@ -33,9 +33,10 @@
 extern efi_status_t efi_call_phys (void *, ...);
 
 struct efi efi;
-		    
 static efi_runtime_services_t *runtime;
 
+static unsigned long mem_limit = ~0UL;
+
 static efi_status_t
 phys_get_time (efi_time_t *tm, efi_time_cap_t *tc)
 {
@@ -169,15 +170,13 @@
 		      case EFI_BOOT_SERVICES_CODE:
 		      case EFI_BOOT_SERVICES_DATA:
 		      case EFI_CONVENTIONAL_MEMORY:
-			if (md->phys_addr > 1024*1024*1024UL) {
-				printk("Warning: ignoring %luMB of memory above 1GB!\n",
-				       md->num_pages >> 8);
-				md->type = EFI_UNUSABLE_MEMORY;
-				continue;
-			}
-
 			if (!(md->attribute & EFI_MEMORY_WB))
 				continue;
+			if (md->phys_addr + (md->num_pages << 12) > mem_limit) {
+				if (md->phys_addr > mem_limit)
+					continue;
+				md->num_pages = (mem_limit - md->phys_addr) >> 12;
+			}
 			if (md->num_pages == 0) {
 				printk("efi_memmap_walk: ignoring empty region at 0x%lx",
 				       md->phys_addr);
@@ -224,8 +223,8 @@
  * ITR to enable safe PAL calls in virtual mode.  See IA-64 Processor
  * Abstraction Layer chapter 11 in ADAG
  */
-static void
-map_pal_code (void)
+void
+efi_map_pal_code (void)
 {
 	void *efi_map_start, *efi_map_end, *p;
 	efi_memory_desc_t *md;
@@ -240,13 +239,14 @@
 
 	for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
 		md = p;
-		if (md->type != EFI_PAL_CODE) continue;
+		if (md->type != EFI_PAL_CODE)
+			continue;
 
 		if (++pal_code_count > 1) {
 			printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n",
 			       md->phys_addr);
 			continue;
-		} 
+		}
 		mask  = ~((1 << _PAGE_SIZE_4M)-1);	/* XXX should be dynamic? */
 		vaddr = PAGE_OFFSET + md->phys_addr;
 
@@ -281,9 +281,28 @@
 	efi_config_table_t *config_tables;
 	efi_char16_t *c16;
 	u64 efi_desc_size;
-	char vendor[100] = "unknown";
+	char *cp, *end, vendor[100] = "unknown";
+	extern char saved_command_line[];
 	int i;
 
+	/* it's too early to be able to use the standard kernel command line support... */
+	for (cp = saved_command_line; *cp; ) {
+		if (memcmp(cp, "mem=", 4) == 0) {
+			cp += 4;
+			mem_limit = memparse(cp, &end) - 1;
+			if (end != cp)
+				break;
+			cp = end;
+		} else {
+			while (*cp != ' ' && *cp)
+				++cp;
+			while (*cp == ' ')
+				++cp;
+		}
+	}
+	if (mem_limit != ~0UL)
+		printk("Ignoring memory above %luMB\n", mem_limit >> 20);
+
 	efi.systab = __va(ia64_boot_param.efi_systab);
 
 	/*
@@ -359,7 +378,7 @@
 	}
 #endif
 
-	map_pal_code();
+	efi_map_pal_code();
 }
 
 void
diff -urN linux-2.4.0-test6/arch/ia64/kernel/entry.S linux-2.4.0-test6-lia/arch/ia64/kernel/entry.S
--- linux-2.4.0-test6/arch/ia64/kernel/entry.S	Thu Aug 10 19:56:18 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/entry.S	Fri Aug 11 14:56:27 2000
@@ -106,29 +106,19 @@
 	alloc r16=ar.pfs,1,0,0,0
 	DO_SAVE_SWITCH_STACK
 	UNW(.body)
-	// disable interrupts to ensure atomicity for next few instructions:
-	mov r17=psr		// M-unit
-	;;
-	rsm psr.i		// M-unit
-	dep r18=-1,r0,0,61	// build mask 0x1fffffffffffffff
-	;;
-	srlz.d
-	;;
+
 	adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
+	dep r18=-1,r0,0,61	// build mask 0x1fffffffffffffff
 	adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
 	;;
 	st8 [r22]=sp		// save kernel stack pointer of old task
 	ld8 sp=[r21]		// load kernel stack pointer of new task
 	and r20=in0,r18		// physical address of "current"
 	;;
+	mov ar.k6=r20		// copy "current" into ar.k6
 	mov r8=r13		// return pointer to previously running task
 	mov r13=in0		// set "current" pointer
-	mov ar.k6=r20		// copy "current" into ar.k6
-	;;
-	// restore interrupts
-	mov psr.l=r17
 	;;
-	srlz.d
 	DO_LOAD_SWITCH_STACK( )
 	br.ret.sptk.few rp
 END(ia64_switch_to)
diff -urN linux-2.4.0-test6/arch/ia64/kernel/head.S linux-2.4.0-test6-lia/arch/ia64/kernel/head.S
--- linux-2.4.0-test6/arch/ia64/kernel/head.S	Wed Aug  2 18:54:01 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/head.S	Sat Aug  5 19:27:13 2000
@@ -181,7 +181,9 @@
 
 GLOBAL_ENTRY(ia64_load_debug_regs)
 	alloc r16=ar.pfs,1,0,0,0
+#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
 	lfetch.nta [in0]
+#endif
 	mov r20=ar.lc			// preserve ar.lc
 	add r19=IA64_NUM_DBG_REGS*8,in0
 	mov ar.lc=IA64_NUM_DBG_REGS-1
@@ -702,3 +704,74 @@
 SET_REG(b5);
 
 #endif /* CONFIG_IA64_BRL_EMU */
+
+#ifdef CONFIG_SMP
+
+	/*
+	 * This routine handles spinlock contention.  It uses a simple exponential backoff
+	 * algorithm to reduce unnecessary bus traffic.  The initial delay is selected from
+	 * the low-order bits of the cycle counter (a cheap "randomizer").  I'm sure this
+	 * could use additional tuning, especially on systems with a large number of CPUs.
+	 * Also, I think the maximum delay should be made a function of the number of CPUs in
+	 * the system. --davidm 00/08/05
+	 *
+	 * WARNING: This is not a normal procedure.  It gets called from C code without
+	 * the compiler knowing about it.  Thus, we must not use any scratch registers
+	 * beyond those that were declared "clobbered" at the call-site (see spin_lock()
+	 * macro).  We may not even use the stacked registers, because that could overwrite
+	 * output registers.  Similarly, we can't use the scratch stack area as it may be
+	 * in use, too.
+	 *
+	 * Inputs:
+	 *	ar.ccv = 0 (and available for use)
+	 *	r28 = available for use
+	 *	r29 = available for use
+	 *	r30 = non-zero (and available for use)
+	 *	r31 = address of lock we're trying to acquire
+	 *	p15 = available for use
+	 */
+
+#	define delay	r28
+#	define timeout	r29
+#	define tmp	r30
+
+GLOBAL_ENTRY(ia64_spinlock_contention)
+	mov tmp=ar.itc
+	;;
+	and delay=0x3f,tmp
+	;;
+
+.retry:	add timeout=tmp,delay
+	shl delay=delay,1
+	;;
+	dep delay=delay,r0,0,13	// limit delay to 8192 cycles
+	;;
+	// delay a little...
+.wait:	sub tmp=tmp,timeout
+	or delay=0xf,delay	// make sure delay is non-zero (otherwise we get stuck with 0)
+	;;
+	cmp.lt p15,p0=tmp,r0
+	mov tmp=ar.itc
+(p15)	br.cond.sptk .wait
+	;;
+	ld1 tmp=[r31]
+	;;
+	cmp.ne p15,p0=tmp,r0
+	mov tmp=ar.itc
+(p15)	br.cond.sptk.few .retry	// lock is still busy
+	;;
+	// try acquiring lock (we know ar.ccv is still zero!):
+	mov tmp=1
+	;;
+	IA64_SEMFIX_INSN
+	cmpxchg1.acq tmp=[r31],tmp,ar.ccv
+	;;
+	cmp.eq p15,p0=tmp,r0
+
+	mov tmp=ar.itc
+(p15)	br.ret.sptk.many b7	// got lock -> return
+	br .retry		// still no luck, retry
+
+END(ia64_spinlock_contention)
+
+#endif
diff -urN linux-2.4.0-test6/arch/ia64/kernel/ia64_ksyms.c linux-2.4.0-test6-lia/arch/ia64/kernel/ia64_ksyms.c
--- linux-2.4.0-test6/arch/ia64/kernel/ia64_ksyms.c	Thu Aug 10 19:56:18 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/ia64_ksyms.c	Mon Jul 31 14:01:22 2000
@@ -18,6 +18,7 @@
 EXPORT_SYMBOL(strncat);
 EXPORT_SYMBOL(strncmp);
 EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strstr);
 EXPORT_SYMBOL(strtok);
 
 #include <linux/pci.h>
@@ -37,6 +38,7 @@
 EXPORT_SYMBOL(kernel_thread);
 
 #ifdef CONFIG_SMP
+#include <asm/hardirq.h>
 EXPORT_SYMBOL(synchronize_irq);
 
 #include <asm/smplock.h>
diff -urN linux-2.4.0-test6/arch/ia64/kernel/irq_ia64.c linux-2.4.0-test6-lia/arch/ia64/kernel/irq_ia64.c
--- linux-2.4.0-test6/arch/ia64/kernel/irq_ia64.c	Thu Jun 22 07:09:44 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/irq_ia64.c	Mon Jul 31 14:01:22 2000
@@ -117,6 +117,13 @@
 	{
 		unsigned long bsp, sp;
 
+		/*
+		 * Note: if the interrupt happened while executing in
+		 * the context switch routine (ia64_switch_to), we may
+		 * get a spurious stack overflow here.  This is
+		 * because the register and the memory stack are not
+		 * switched atomically.
+		 */
 		asm ("mov %0=ar.bsp" : "=r"(bsp));
 		asm ("mov %0=sp" : "=r"(sp));
 
diff -urN linux-2.4.0-test6/arch/ia64/kernel/ivt.S linux-2.4.0-test6-lia/arch/ia64/kernel/ivt.S
--- linux-2.4.0-test6/arch/ia64/kernel/ivt.S	Wed Aug  2 18:54:02 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/ivt.S	Fri Aug 11 14:55:22 2000
@@ -170,33 +170,27 @@
 	 * The ITLB basically does the same as the VHPT handler except
 	 * that we always insert exactly one instruction TLB entry.
 	 */
-#if 1
 	/*
 	 * Attempt to lookup PTE through virtual linear page table.
 	 * The speculative access will fail if there is no TLB entry
 	 * for the L3 page table page we're trying to access.
 	 */
-	mov r31=pr				// save predicates
-	;;
-	thash r17=r16				// compute virtual address of L3 PTE
+	mov r16=cr.iha				// get virtual address of L3 PTE
 	;;
-	ld8.s r18=[r17]				// try to read L3 PTE
+	ld8.s r16=[r16]				// try to read L3 PTE
+	mov r31=pr				// save predicates
 	;;
-	tnat.nz p6,p0=r18			// did read succeed?
+	tnat.nz p6,p0=r16			// did read succeed?
 (p6)	br.cond.spnt.many 1f
 	;;
-	itc.i r18
+	itc.i r16
 	;;
 	mov pr=r31,-1
 	rfi
 
-1:	rsm psr.dt				// use physical addressing for data
-#else
-	mov r16=cr.ifa				// get address that caused the TLB miss
+1:	mov r16=cr.ifa				// get address that caused the TLB miss
 	;;
 	rsm psr.dt				// use physical addressing for data
-#endif
-	mov r31=pr				// save the predicate registers
 	mov r19=ar.k7				// get page table base address
 	shl r21=r16,3				// shift bit 60 into sign bit
 	shr.u r17=r16,61			// get the region number into r17
@@ -244,33 +238,27 @@
 	 * The DTLB basically does the same as the VHPT handler except
 	 * that we always insert exactly one data TLB entry.
 	 */
-	mov r16=cr.ifa				// get address that caused the TLB miss
-#if 1
 	/*
 	 * Attempt to lookup PTE through virtual linear page table.
 	 * The speculative access will fail if there is no TLB entry
 	 * for the L3 page table page we're trying to access.
 	 */
-	mov r31=pr				// save predicates
-	;;
-	thash r17=r16				// compute virtual address of L3 PTE
+	mov r16=cr.iha				// get virtual address of L3 PTE
 	;;
-	ld8.s r18=[r17]				// try to read L3 PTE
+	ld8.s r16=[r16]				// try to read L3 PTE
+	mov r31=pr				// save predicates
 	;;
-	tnat.nz p6,p0=r18			// did read succeed?
+	tnat.nz p6,p0=r16			// did read succeed?
 (p6)	br.cond.spnt.many 1f
 	;;
-	itc.d r18
+	itc.d r16
 	;;
 	mov pr=r31,-1
 	rfi
 
-1:	rsm psr.dt				// use physical addressing for data
-#else
-	rsm psr.dt				// use physical addressing for data
-	mov r31=pr				// save the predicate registers
+1:	mov r16=cr.ifa				// get address that caused the TLB miss
 	;;
-#endif
+	rsm psr.dt				// use physical addressing for data
 	mov r19=ar.k7				// get page table base address
 	shl r21=r16,3				// shift bit 60 into sign bit
 	shr.u r17=r16,61			// get the region number into r17
@@ -504,7 +492,24 @@
 	mov r29=b0				// save b0 in case of nested fault)
 	;;
 1:	ld8 r18=[r17]
-	;;					// avoid raw on r18
+#if defined(CONFIG_IA32_SUPPORT) && \
+    (defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_B0_SPECIFIC))
+	//
+	// Erratum 85 (Access bit fault could be reported before page not present fault)
+	//   If the PTE is indicates the page is not present, then just turn this into a
+	//   page fault.
+	//
+	mov r31=pr				// save predicates
+	;;
+	tbit.nz p6,p0=r18,0			// page present bit set?
+(p6)	br.cond.sptk 1f
+	;;					// avoid WAW on p6
+	mov pr=r31,-1
+	br.cond.sptk page_fault			// page wasn't present
+1:	mov pr=r31,-1
+#else
+	;;					// avoid RAW on r18
+#endif
 	or r18=_PAGE_A,r18			// set the accessed bit
 	mov b0=r29				// restore b0
 	;;
@@ -541,14 +546,6 @@
 	;;
 	srlz.d			// ensure everyone knows psr.dt is off...
 	cmp.eq p0,p7=r16,r17	// is this a system call? (p7 <- false, if so)
-#if 1
-	// Allow syscalls via the old system call number for the time being.  This is
-	// so we can transition to the new syscall number in a relatively smooth
-	// fashion.
-	mov r17=0x80000
-	;;
-(p7)	cmp.eq.or.andcm p0,p7=r16,r17		// is this the old syscall number?
-#endif
 (p7)	br.cond.spnt.many non_syscall
 
 	SAVE_MIN				// uses r31; defines r2:
diff -urN linux-2.4.0-test6/arch/ia64/kernel/machvec.c linux-2.4.0-test6-lia/arch/ia64/kernel/machvec.c
--- linux-2.4.0-test6/arch/ia64/kernel/machvec.c	Tue Feb  8 12:01:59 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/machvec.c	Mon Jul 31 14:01:22 2000
@@ -3,12 +3,9 @@
 #include <asm/page.h>
 #include <asm/machvec.h>
 
-struct ia64_machine_vector ia64_mv;
+#ifdef CONFIG_IA64_GENERIC
 
-void
-machvec_noop (void)
-{
-}
+struct ia64_machine_vector ia64_mv;
 
 /*
  * Most platforms use this routine for mapping page frame addresses
@@ -45,4 +42,11 @@
 	}
 	ia64_mv = *mv;
 	printk("booting generic kernel on platform %s\n", name);
+}
+
+#endif /* CONFIG_IA64_GENERIC */
+
+void
+machvec_noop (void)
+{
 }
diff -urN linux-2.4.0-test6/arch/ia64/kernel/pal.S linux-2.4.0-test6-lia/arch/ia64/kernel/pal.S
--- linux-2.4.0-test6/arch/ia64/kernel/pal.S	Wed Aug  2 18:54:02 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/pal.S	Fri Jul 28 09:04:50 2000
@@ -191,3 +191,57 @@
 	srlz.d				// seralize restoration of psr.l
 	br.ret.sptk.few	b0
 END(ia64_pal_call_phys_static)
+
+/*
+ * Make a PAL call using the stacked registers in physical mode.
+ *
+ * Inputs:
+ * 	in0         Index of PAL service
+ * 	in2 - in3   Remaning PAL arguments
+ */
+GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
+	UNW(.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5))
+	alloc	loc1 = ar.pfs,5,5,86,0
+	movl	loc2 = pal_entry_point
+1:	{
+	  mov r28  = in0		// copy procedure index
+	  mov loc0 = rp		// save rp
+	}
+	.body
+	;;
+	ld8 loc2 = [loc2]		// loc2 <- entry point
+	mov out0 = in0		// first argument
+	mov out1 = in1		// copy arg2
+	mov out2 = in2		// copy arg3
+	mov out3 = in3		// copy arg3
+	;;
+	mov loc3 = psr		// save psr
+	;; 
+	mov loc4=ar.rsc			// save RSE configuration
+	dep.z loc2=loc2,0,61		// convert pal entry point to physical
+	;;
+	mov ar.rsc=r0			// put RSE in enforced lazy, LE mode
+	movl r16=PAL_PSR_BITS_TO_CLEAR
+	movl r17=PAL_PSR_BITS_TO_SET
+	;;
+	or loc3=loc3,r17		// add in psr the bits to set
+	mov b7 = loc2			// install target to branch reg
+	;;
+	andcm r16=loc3,r16		// removes bits to clear from psr
+	br.call.sptk.few rp=ia64_switch_mode
+.ret6:
+	br.call.sptk.many rp=b7		// now make the call
+.ret7:
+	mov ar.rsc=r0			// put RSE in enforced lazy, LE mode
+	mov r16=loc3			// r16= original psr
+	br.call.sptk.few rp=ia64_switch_mode	// return to virtual mode
+
+.ret8:	mov psr.l  = loc3		// restore init PSR
+	mov ar.pfs = loc1
+	mov rp = loc0
+	;;
+	mov ar.rsc=loc4			// restore RSE configuration
+	srlz.d				// seralize restoration of psr.l
+	br.ret.sptk.few	b0
+END(ia64_pal_call_phys_stacked)
+
diff -urN linux-2.4.0-test6/arch/ia64/kernel/palinfo.c linux-2.4.0-test6-lia/arch/ia64/kernel/palinfo.c
--- linux-2.4.0-test6/arch/ia64/kernel/palinfo.c	Thu Jun 22 07:09:44 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/palinfo.c	Fri Aug 11 18:12:58 2000
@@ -21,19 +21,35 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/mm.h>
+#include <linux/module.h>
+#if defined(MODVERSIONS)
+#include <linux/modversions.h>
+#endif
 
 #include <asm/pal.h>
 #include <asm/sal.h>
 #include <asm/efi.h>
 #include <asm/page.h>
 #include <asm/processor.h>
+#ifdef CONFIG_SMP
+#include <linux/smp.h>
+#endif
+
+MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
+MODULE_DESCRIPTION("/proc interface to IA-64 PAL");
 
 /*
- * Hope to get rid of these in a near future
+ * Hope to get rid of this one in a near future
 */
 #define IA64_PAL_VERSION_BUG		1
 
-#define PALINFO_VERSION "0.1"
+#define PALINFO_VERSION "0.3"
+
+#ifdef CONFIG_SMP
+#define cpu_is_online(i) (cpu_online_map & (1UL << i))
+#else
+#define cpu_is_online(i)	1
+#endif
 
 typedef int (*palinfo_func_t)(char*);
 
@@ -43,7 +59,6 @@
 	struct proc_dir_entry	*entry;		/* registered entry (removal) */
 } palinfo_entry_t;
 
-static struct proc_dir_entry *palinfo_dir;
 
 /*
  *  A bunch of string array to get pretty printing
@@ -95,7 +110,7 @@
 #define RSE_HINTS_COUNT (sizeof(rse_hints)/sizeof(const char *))
 
 /*
- * The current resvision of the Volume 2 of 
+ * The current revision of the Volume 2 of 
  * IA-64 Architecture Software Developer's Manual is wrong.
  * Table 4-10 has invalid information concerning the ma field:
  * Correct table is:
@@ -121,64 +136,31 @@
 /*
  * Allocate a buffer suitable for calling PAL code in Virtual mode
  *
- * The documentation (PAL2.6) requires thius buffer to have a pinned
- * translation to avoid any DTLB faults. For this reason we allocate
- * a page (large enough to hold any possible reply) and use a DTC
- * to hold the translation during the call. A call the free_palbuffer()
- * is required to release ALL resources (page + translation).
- *
- * The size of the page allocated is based on the PAGE_SIZE defined
- * at compile time for the kernel, i.e.  >= 4Kb.
+ * The documentation (PAL2.6) allows DTLB misses on the buffer. So 
+ * using the TC is enough, no need to pin the entry.
  *
- * Return: a pointer to the newly allocated page (virtual address)
+ * We allocate a kernel-sized page (at least 4KB). This is enough to
+ * hold any possible reply.
  */
-static void *
+static inline void *
 get_palcall_buffer(void)
 {
 	void *tmp;
 
 	tmp = (void *)__get_free_page(GFP_KERNEL);
 	if (tmp == 0) {
-		printk(KERN_ERR "%s: can't get a buffer page\n", __FUNCTION__);
-	} else if ( ((u64)tmp - PAGE_OFFSET) > (1<<_PAGE_SIZE_256M) )  { /* XXX: temporary hack */
-		unsigned long flags;
-
-		/* PSR.ic must be zero to insert new DTR */
-		ia64_clear_ic(flags);
-
-		/*
-		 * we  only insert of DTR
-		 *
-		 * XXX: we need to figure out a way to "allocate" TR(s) to avoid
-		 * conflicts. Maybe something in an include file like pgtable.h
-		 * page.h or processor.h
-		 *
-		 * ITR0/DTR0: used for kernel code/data
-		 * ITR1/DTR1: used by HP simulator
-		 * ITR2/DTR2: used to map PAL code
-		 */
-		ia64_itr(0x2, 3, (u64)tmp,
-			 pte_val(mk_pte_phys(__pa(tmp), __pgprot(__DIRTY_BITS|_PAGE_PL_0|_PAGE_AR_RW))), PAGE_SHIFT);
-
-		ia64_srlz_d ();
-
-		__restore_flags(flags);	
-	}
-
+		printk(KERN_ERR __FUNCTION__" : can't get a buffer page\n"); 
+	} 
 	return tmp;
 }
 
 /*
  * Free a palcall buffer allocated with the previous call
- *
- * The translation is also purged.
  */
-static void
+static inline void
 free_palcall_buffer(void *addr)
 {
 	__free_page(addr);
-	ia64_ptr(0x2, (u64)addr, PAGE_SHIFT);
-	ia64_srlz_d ();
 }
 
 /*
@@ -564,7 +546,6 @@
 	int i;
 	s64 ret;
 
-	/* must be in physical mode */
 	if ((ret=ia64_pal_proc_get_features(&avail, &status, &control)) != 0) return 0;
 
 	for(i=0; i < 64; i++, v++,avail >>=1, status >>=1, control >>=1) {
@@ -577,6 +558,57 @@
 	return p - page;
 }
 
+static const char *bus_features[]={
+	NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+	NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
+	NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
+	NULL,NULL,
+	"Request  Bus Parking",
+	"Bus Lock Mask",
+	"Enable Half Transfer",
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+	NULL, NULL, NULL, NULL, NULL, NULL,
+	"Disable Transaction Queuing",
+	"Disable Reponse Error Checking",
+	"Disable Bus Error Checking",
+	"Disable Bus Requester Internal Error Signalling",
+	"Disable Bus Requester Error Signalling",
+	"Disable Bus Initialization Event Checking",
+	"Disable Bus Initialization Event Signalling",
+	"Disable Bus Address Error Checking",
+	"Disable Bus Address Error Signalling",
+	"Disable Bus Data Error Checking"
+};
+
+	
+static int
+bus_info(char *page)
+{
+	char *p = page;
+	const char **v = bus_features;
+	pal_bus_features_u_t av, st, ct;
+	u64 avail, status, control;
+	int i;
+	s64 ret;
+
+	if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) return 0;
+
+	avail   = av.pal_bus_features_val;
+	status  = st.pal_bus_features_val;
+	control = ct.pal_bus_features_val;
+
+	for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) {
+		if ( ! *v ) continue;
+		p += sprintf(p, "%-48s : %s%s %s\n", *v, 
+				avail & 0x1 ? "" : "NotImpl",
+				avail & 0x1 ? (status  & 0x1 ? "On" : "Off"): "",
+				avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
+	}
+	return p - page;
+}
+
+
 /*
  * physical mode call for PAL_VERSION is working fine.
  * This function is meant to go away once PAL get fixed.
@@ -613,21 +645,25 @@
 #endif
 	if (status != 0) return 0;
 
-	p += sprintf(p, "PAL_vendor     : 0x%x (min=0x%x)\n" \
-			"PAL_A revision : 0x%x (min=0x%x)\n" \
-			"PAL_A model    : 0x%x (min=0x%x)\n" \
-			"PAL_B mode     : 0x%x (min=0x%x)\n" \
-			"PAL_B revision : 0x%x (min=0x%x)\n",
+	p += sprintf(p, "PAL_vendor : 0x%02x (min=0x%02x)\n" \
+			"PAL_A      : %x.%x.%x (min=%x.%x.%x)\n" \
+			"PAL_B      : %x.%x.%x (min=%x.%x.%x)\n",
 	     		cur_ver.pal_version_s.pv_pal_vendor,
 	     		min_ver.pal_version_s.pv_pal_vendor,
+
+	     		cur_ver.pal_version_s.pv_pal_a_model>>4,
+	     		cur_ver.pal_version_s.pv_pal_a_model&0xf,
 	     		cur_ver.pal_version_s.pv_pal_a_rev,
-	     		cur_ver.pal_version_s.pv_pal_a_rev,
-	     		cur_ver.pal_version_s.pv_pal_a_model,
-	     		min_ver.pal_version_s.pv_pal_a_model,
+	     		min_ver.pal_version_s.pv_pal_a_model>>4,
+	     		min_ver.pal_version_s.pv_pal_a_model&0xf,
+	     		min_ver.pal_version_s.pv_pal_a_rev,
+
+	     		cur_ver.pal_version_s.pv_pal_b_model>>4,
+	     		cur_ver.pal_version_s.pv_pal_b_model&0xf,
 	     		cur_ver.pal_version_s.pv_pal_b_rev,
-	     		min_ver.pal_version_s.pv_pal_b_rev,
-	     		cur_ver.pal_version_s.pv_pal_b_model,
-	     		min_ver.pal_version_s.pv_pal_b_model);
+	     		min_ver.pal_version_s.pv_pal_b_model>>4,
+	     		min_ver.pal_version_s.pv_pal_b_model&0xf,
+	     		min_ver.pal_version_s.pv_pal_b_rev);
 
 	return p - page;
 }
@@ -648,6 +684,9 @@
 	}
 
 #ifdef IA64_PAL_PERF_MON_INFO_BUG
+	/*
+	 * This bug has been fixed in PAL 2.2.9 and higher
+	 */
 	pm_buffer[5]=0x3;
 	pm_info.pal_perf_mon_info_s.cycles  = 0x12;
 	pm_info.pal_perf_mon_info_s.retired = 0x08;
@@ -708,30 +747,111 @@
 	return p - page;
 }
 
-
-/*
- * Entry point routine: all calls go trhough this function
- */
 static int
-palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
+tr_info(char *page)
 {
-	palinfo_func_t info = (palinfo_func_t)data;
-        int len = info(page);
+	char *p = page;
+	s64 status;
+	pal_tr_valid_u_t tr_valid;
+	u64 tr_buffer[4];
+	pal_vm_info_1_u_t vm_info_1;
+	pal_vm_info_2_u_t vm_info_2;
+	int i, j;
+	u64 max[3], pgm;
+	struct ifa_reg {
+		u64 valid:1;
+		u64 ig:11;
+		u64 vpn:52;
+	} *ifa_reg;
+	struct itir_reg {
+		u64 rv1:2;
+		u64 ps:6;
+		u64 key:24;
+		u64 rv2:32;
+	} *itir_reg;
+	struct gr_reg {
+		u64 p:1;
+		u64 rv1:1;
+		u64 ma:3;
+		u64 a:1;
+		u64 d:1;
+		u64 pl:2;
+		u64 ar:3;
+		u64 ppn:38;
+		u64 rv2:2;
+		u64 ed:1;
+		u64 ig:11;
+	} *gr_reg;
+	struct rid_reg {
+		u64 ig1:1;
+		u64 rv1:1;
+		u64 ig2:6;
+		u64 rid:24;
+		u64 rv2:32;
+	} *rid_reg;
 
-        if (len <= off+count) *eof = 1;
+	if ((status=ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
+		printk("ia64_pal_vm_summary=%ld\n", status);
+		return 0;
+	}
+	max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
+	max[1] = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
 
-        *start = page + off;
-        len   -= off;
+	for (i=0; i < 2; i++ ) {
+		for (j=0; j < max[i]; j++) {
 
-        if (len>count) len = count;
-        if (len<0) len = 0;
+		status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid);
+		if (status != 0) {
+			printk(__FUNCTION__ " pal call failed on tr[%d:%d]=%ld\n", i, j, status);
+			continue;
+		}
 
-        return len;
+		ifa_reg  = (struct ifa_reg *)&tr_buffer[2];
+
+		if (ifa_reg->valid == 0) continue;
+
+		gr_reg   = (struct gr_reg *)tr_buffer;	
+		itir_reg = (struct itir_reg *)&tr_buffer[1];
+		rid_reg  = (struct rid_reg *)&tr_buffer[3];
+
+		pgm	 = -1 << (itir_reg->ps - 12);
+		p += sprintf(p, "%cTR%d: av=%d pv=%d dv=%d mv=%d\n" \
+				"\tppn  : 0x%lx\n" \
+				"\tvpn  : 0x%lx\n" \
+				"\tps   : ",
+
+				"ID"[i],
+				j,
+				tr_valid.pal_tr_valid_s.access_rights_valid,
+				tr_valid.pal_tr_valid_s.priv_level_valid,
+				tr_valid.pal_tr_valid_s.dirty_bit_valid,
+				tr_valid.pal_tr_valid_s.mem_attr_valid,
+				(gr_reg->ppn & pgm)<< 12,
+				(ifa_reg->vpn & pgm)<< 12);
+
+		p = bitvector_process(p, 1<< itir_reg->ps);
+
+		p += sprintf(p, "\n\tpl   : %d\n" \
+				"\tar   : %d\n" \
+				"\trid  : %x\n" \
+				"\tp    : %d\n" \
+				"\tma   : %d\n" \
+				"\td    : %d\n", 
+				gr_reg->pl,
+				gr_reg->ar,
+				rid_reg->rid,
+				gr_reg->p,
+				gr_reg->ma,
+				gr_reg->d);
+		}
+	}
+	return p - page;
 }
 
+
+
 /*
- * List names,function pairs for every entry in /proc/palinfo
- * Must be terminated with the NULL,NULL entry.
+ * List {name,function} pairs for every entry in /proc/palinfo/cpu*
  */
 static palinfo_entry_t palinfo_entries[]={
 	{ "version_info",	version_info, },
@@ -742,38 +862,190 @@
 	{ "processor_info",	processor_info, },
 	{ "perfmon_info",	perfmon_info, },
 	{ "frequency_info",	frequency_info, },
-	{ NULL,			NULL,}
+	{ "bus_info",		bus_info },
+	{ "tr_info",		tr_info, }
 };
 
+#define NR_PALINFO_ENTRIES	(sizeof(palinfo_entries)/sizeof(palinfo_entry_t))
+
+/*
+ * this array is used to keep track of the proc entries we create. This is 
+ * required in the module mode when we need to remove all entries. The procfs code
+ * does not do recursion of deletion
+ *
+ * Notes:
+ *	- first +1 accounts for the cpuN entry
+ *	- second +1 account for toplevel palinfo
+ * 
+ */
+#define NR_PALINFO_PROC_ENTRIES	(NR_CPUS*(NR_PALINFO_ENTRIES+1)+1)
+
+static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES];
+
+/*
+ * This data structure is used to pass which cpu,function is being requested
+ * It must fit in a 64bit quantity to be passed to the proc callback routine
+ *
+ * In SMP mode, when we get a request for another CPU, we must call that
+ * other CPU using IPI and wait for the result before returning.
+ */
+typedef union {
+	u64 value;
+	struct {
+		unsigned	req_cpu: 32;	/* for which CPU this info is */
+		unsigned	func_id: 32;	/* which function is requested */
+	} pal_func_cpu;
+} pal_func_cpu_u_t;
+
+#define req_cpu	pal_func_cpu.req_cpu
+#define func_id pal_func_cpu.func_id
+
+#ifdef CONFIG_SMP
+
+/*
+ * used to hold information about final function to call 
+ */
+typedef struct {
+	palinfo_func_t	func;	/* pointer to function to call */
+	char		*page;	/* buffer to store results */
+	int		ret;	/* return value from call */
+} palinfo_smp_data_t;
+
+
+/*
+ * this function does the actual final call and he called
+ * from the smp code, i.e., this is the palinfo callback routine
+ */
+static void
+palinfo_smp_call(void *info)
+{
+	palinfo_smp_data_t *data = (palinfo_smp_data_t *)info;
+	/* printk(__FUNCTION__" called on CPU %d\n", smp_processor_id());*/
+	if (data == NULL) {
+		printk(KERN_ERR __FUNCTION__" data pointer is NULL\n");
+		data->ret = 0; /* no output */
+		return;
+	}
+	/* does this actual call */
+	data->ret = (*data->func)(data->page);
+}
+
+/*
+ * function called to trigger the IPI, we need to access a remote CPU
+ * Return:
+ *	0 : error or nothing to output
+ *	otherwise how many bytes in the "page" buffer were written
+ */
+static 
+int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+{
+	palinfo_smp_data_t ptr;
+	int ret;
+
+	ptr.func = palinfo_entries[f->func_id].proc_read;
+	ptr.page = page;
+	ptr.ret  = 0; /* just in case */
+
+	/*printk(__FUNCTION__" calling CPU %d from CPU %d for function %d\n", f->req_cpu,smp_processor_id(), f->func_id);*/
+
+	/* will send IPI to other CPU and wait for completion of remote call */
+	if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 0, 1))) {
+		printk(__FUNCTION__" remote CPU call from %d to %d on function %d: error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret);
+		return 0;
+	}
+	return ptr.ret;
+}
+#else /* ! CONFIG_SMP */
+static 
+int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
+{
+	printk(__FUNCTION__" should not be called with non SMP kernel\n");
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * Entry point routine: all calls go through this function
+ */
+static int
+palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
+{
+	int len=0;
+	pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&data;
+
+	MOD_INC_USE_COUNT;
+	/*
+	 * in SMP mode, we may need to call another CPU to get correct
+	 * information. PAL, by definition, is processor specific
+	 */
+	if (f->req_cpu == smp_processor_id()) 
+		len = (*palinfo_entries[f->func_id].proc_read)(page);
+	else
+		len = palinfo_handle_smp(f, page);
+
+        if (len <= off+count) *eof = 1;
+
+        *start = page + off;
+        len   -= off;
+
+        if (len>count) len = count;
+        if (len<0) len = 0;
+
+	MOD_DEC_USE_COUNT;
+
+        return len;
+}
 
 static int __init 
 palinfo_init(void)
 {
-	palinfo_entry_t *p;
+#	define CPUSTR	"cpu%d"
+
+	pal_func_cpu_u_t f;
+	struct proc_dir_entry **pdir = palinfo_proc_entries;
+	struct proc_dir_entry *palinfo_dir, *cpu_dir;
+	int i, j;
+	char cpustr[sizeof(CPUSTR)];
 
 	printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION);
 
-	palinfo_dir = create_proc_entry("palinfo",  S_IFDIR | S_IRUGO | S_IXUGO, NULL);
+	palinfo_dir = proc_mkdir("pal", NULL);
+
+	/*
+	 * we keep track of created entries in a depth-first order for
+	 * cleanup purposes. Each entry is stored into palinfo_proc_entries
+	 */
+	for (i=0; i < NR_CPUS; i++) {
+
+		if (!cpu_is_online(i)) continue;
+
+		sprintf(cpustr,CPUSTR, i);
+
+		cpu_dir = proc_mkdir(cpustr, palinfo_dir);
 
-	for (p = palinfo_entries; p->name ; p++){
-		p->entry = create_proc_read_entry (p->name, 0, palinfo_dir, 
-						   palinfo_read_entry, p->proc_read);
+		f.req_cpu = i;
+
+		for (j=0; j < NR_PALINFO_ENTRIES; j++) {
+			f.func_id = j;
+			*pdir++ = create_proc_read_entry (palinfo_entries[j].name, 0, cpu_dir, 
+						palinfo_read_entry, (void *)f.value);
+		}
+		*pdir++ = cpu_dir;
 	}
+	*pdir = palinfo_dir;
 
 	return 0;
 }
 
-static int __exit
+static void __exit
 palinfo_exit(void)
 {
-	palinfo_entry_t *p;
+	int i = 0;
 
-	for (p = palinfo_entries; p->name ; p++){
-		remove_proc_entry (p->name, palinfo_dir);
+	/* remove all nodes: depth first pass */
+	for (i=0; i< NR_PALINFO_PROC_ENTRIES ; i++) {
+		remove_proc_entry (palinfo_proc_entries[i]->name, NULL);
 	}
-	remove_proc_entry ("palinfo", 0);
-
-	return 0;
 }
 
 module_init(palinfo_init);
diff -urN linux-2.4.0-test6/arch/ia64/kernel/pci-dma.c linux-2.4.0-test6-lia/arch/ia64/kernel/pci-dma.c
--- linux-2.4.0-test6/arch/ia64/kernel/pci-dma.c	Thu Jun 22 07:09:44 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/pci-dma.c	Mon Jul 31 14:01:22 2000
@@ -3,34 +3,509 @@
  *
  * This implementation is for IA-64 platforms that do not support
  * I/O TLBs (aka DMA address translation hardware).
- *
- * XXX This doesn't do the right thing yet.  It appears we would have
- * to add additional zones so we can implement the various address
- * mask constraints that we might encounter.  A zone for memory < 32
- * bits is obviously necessary...
+ * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
  */
 
-#include <linux/types.h>
+#include <linux/config.h>
+
 #include <linux/mm.h>
-#include <linux/string.h>
 #include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/types.h>
 
 #include <asm/io.h>
+#include <asm/pci.h>
+#include <asm/dma.h>
+
+#ifdef CONFIG_SWIOTLB
+
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#define ALIGN(val, align) ((unsigned long) (((unsigned long) (val) + ((align) - 1)) & ~((align) - 1)))
+
+/*
+ * log of the size of each IO TLB slab.  The number of slabs is command line
+ * controllable.
+ */
+#define IO_TLB_SHIFT 11
+
+/*
+ * Used to do a quick range check in pci_unmap_single and pci_sync_single, to see if the 
+ * memory was in fact allocated by this API.
+ */
+static char *io_tlb_start, *io_tlb_end;
+
+/*
+ * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and io_tlb_end.
+ * This is command line adjustable via setup_io_tlb_npages.
+ */
+unsigned long io_tlb_nslabs = 1024;
+
+/*
+ * This is a free list describing the number of free entries available from each index
+ */
+static unsigned int *io_tlb_list;
+static unsigned int io_tlb_index;
+
+/*
+ * We need to save away the original address corresponding to a mapped entry for the sync 
+ * operations.
+ */
+static unsigned char **io_tlb_orig_addr;
+
+/*
+ * Protect the above data structures in the map and unmap calls
+ */ 
+spinlock_t io_tlb_lock = SPIN_LOCK_UNLOCKED;
+
+static int __init
+setup_io_tlb_npages (char *str)
+{
+	io_tlb_nslabs = simple_strtoul(str, NULL, 0) << (PAGE_SHIFT - IO_TLB_SHIFT);
+	return 1;
+}
+__setup("swiotlb=", setup_io_tlb_npages);
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer
+ * data structures for the software IO TLB used to implement the PCI DMA API
+ */
+void
+setup_swiotlb (void)
+{
+	int i;
+
+	/*
+	 * Get IO TLB memory from the low pages
+	 */
+	io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
+	if (!io_tlb_start)
+		BUG();
+	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
+
+	/*
+	 * Allocate and initialize the free list array.  This array is used
+	 * to find contiguous free memory regions of size 2^IO_TLB_SHIFT between
+	 * io_tlb_start and io_tlb_end.
+	 */
+	io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+	for (i = 0; i < io_tlb_nslabs; i++)
+		io_tlb_list[i] = io_tlb_nslabs - i;
+	io_tlb_index = 0;
+	io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
+
+	printk("Placing software IO TLB between 0x%p - 0x%p\n", io_tlb_start, io_tlb_end);
+}
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+static void *
+__pci_map_single (struct pci_dev *hwdev, char *buffer, size_t size, int direction)
+{
+	unsigned long flags;
+	char *dma_addr;
+	unsigned int i, nslots, stride, index, wrap;
+
+	/*
+	 * For mappings greater than a page size, we limit the stride (and hence alignment)
+	 * to a page size.
+	 */
+	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	if (size > (1 << PAGE_SHIFT))
+		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+	else
+		stride = nslots;
+
+	if (!nslots)
+		BUG();
+
+	/*
+	 * Find suitable number of IO TLB entries size that will fit this request and allocate a buffer
+	 * from that IO TLB pool.
+	 */
+	spin_lock_irqsave(&io_tlb_lock, flags);
+	{
+		wrap = index = ALIGN(io_tlb_index, stride);
+		do {
+			/*
+			 * If we find a slot that indicates we have 'nslots' number of 
+			 * contiguous buffers, we allocate the buffers from that slot and mark the
+			 * entries as '0' indicating unavailable.
+			 */
+			if (io_tlb_list[index] >= nslots) {
+				for (i = index; i < index + nslots; i++)
+					io_tlb_list[i] = 0;
+				dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+
+				/*
+				 * Update the indices to avoid searching in the next round.
+				 */
+				io_tlb_index = (index + nslots) < io_tlb_nslabs ? (index + nslots) : 0;
+
+				goto found;
+			}
+			index += stride;
+			if (index >= io_tlb_nslabs)
+				index = 0;
+		} while (index != wrap);
+
+		/*
+		 * XXX What is a suitable recovery mechanism here?  We cannot 
+		 * sleep because we are called from with in interrupts!
+		 */
+		panic("__pci_map_single: could not allocate software IO TLB (%ld bytes)", size);
+found:
+	}
+	spin_unlock_irqrestore(&io_tlb_lock, flags);
+
+	/*
+	 * Save away the mapping from the original address to the DMA address.  This is needed
+	 * when we sync the memory.  Then we sync the buffer if needed.
+	 */
+	io_tlb_orig_addr[index] = buffer;
+	if (direction == PCI_DMA_TODEVICE || direction == PCI_DMA_BIDIRECTIONAL)
+		memcpy(dma_addr, buffer, size);
+
+	return dma_addr;
+}
+
+/*
+ * dma_addr is the kernel virtual address of the bounce buffer to unmap.
+ */
+static void
+__pci_unmap_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction)
+{
+	unsigned long flags;
+	int i, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	char *buffer = io_tlb_orig_addr[index];
+
+	/*
+	 * First, sync the memory before unmapping the entry
+	 */
+	if ((direction == PCI_DMA_FROMDEVICE) || (direction == PCI_DMA_BIDIRECTIONAL))
+		/*
+ 	 	 * bounce... copy the data back into the original buffer
+	  	 * and delete the bounce buffer.
+ 	 	 */
+		memcpy(buffer, dma_addr, size);
+
+	/*
+	 * Return the buffer to the free list by setting the corresponding entries to indicate
+	 * the number of contigous entries available.  
+	 * While returning the entries to the free list, we merge the entries with slots below
+	 * and above the pool being returned.
+	 */
+	spin_lock_irqsave(&io_tlb_lock, flags);
+	{
+		int count = ((index + nslots) < io_tlb_nslabs ? io_tlb_list[index + nslots] : 0);
+		/*
+		 * Step 1: return the slots to the free list, merging the slots with superceeding slots
+		 */
+		for (i = index + nslots - 1; i >= index; i--)
+			io_tlb_list[i] = ++count;
+		/*
+		 * Step 2: merge the returned slots with the preceeding slots, if available (non zero)
+		 */
+		for (i = index - 1; (i >= 0) && io_tlb_list[i]; i--)
+			io_tlb_list[i] += io_tlb_list[index];
+	}
+	spin_unlock_irqrestore(&io_tlb_lock, flags);
+}
+
+static void
+__pci_sync_single (struct pci_dev *hwdev, char *dma_addr, size_t size, int direction)
+{
+	int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	char *buffer = io_tlb_orig_addr[index];
+
+	/*
+  	 * bounce... copy the data back into/from the original buffer
+	 * XXX How do you handle PCI_DMA_BIDIRECTIONAL here ?
+ 	 */
+	if (direction == PCI_DMA_FROMDEVICE)
+		memcpy(buffer, dma_addr, size);
+	else if (direction == PCI_DMA_TODEVICE)
+		memcpy(dma_addr, buffer, size);
+	else
+		BUG();
+}
+
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode.
+ * The PCI address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+dma_addr_t
+pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
+{
+	unsigned long pci_addr = virt_to_phys(ptr);
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+	/*
+	 * Check if the PCI device can DMA to ptr... if so, just return ptr
+	 */
+	if ((pci_addr & ~hwdev->dma_mask) == 0)
+		/*
+		 * Device is bit capable of DMA'ing to the
+		 * buffer... just return the PCI address of ptr
+		 */
+		return pci_addr;
+
+	/* 
+	 * get a bounce buffer: 
+	 */
+	pci_addr = virt_to_phys(__pci_map_single(hwdev, ptr, size, direction));
+
+	/*
+	 * Ensure that the address returned is DMA'ble:
+	 */
+	if ((pci_addr & ~hwdev->dma_mask) != 0)
+		panic("__pci_map_single: bounce buffer is not DMA'ble");
+
+	return pci_addr;
+}
+
+/*
+ * Unmap a single streaming mode DMA translation.  The dma_addr and size
+ * must match what was provided for in a previous pci_map_single call.  All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+void
+pci_unmap_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction)
+{
+	char *dma_addr = phys_to_virt(pci_addr);
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+		__pci_unmap_single(hwdev, dma_addr, size, direction);
+}
+
+/*
+ * Make physical memory consistent for a single
+ * streaming mode DMA translation after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the PCI dma
+ * mapping, you must call this function before doing so.  At the
+ * next point you give the PCI dma address back to the card, the
+ * device again owns the buffer.
+ */
+void
+pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t pci_addr, size_t size, int direction)
+{
+	char *dma_addr = phys_to_virt(pci_addr);
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+		__pci_sync_single(hwdev, dma_addr, size, direction);
+}
+
+/*
+ * Map a set of buffers described by scatterlist in streaming
+ * mode for DMA.  This is the scather-gather version of the
+ * above pci_map_single interface.  Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length.  They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+int
+pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+	int i;
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+
+	for (i = 0; i < nelems; i++, sg++) {
+		sg->orig_address = sg->address;
+		if ((virt_to_phys(sg->address) & ~hwdev->dma_mask) != 0) {
+			sg->address = __pci_map_single(hwdev, sg->address, sg->length, direction);
+		}
+	}
+	return nelems;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+void
+pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+	int i;
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+
+	for (i = 0; i < nelems; i++, sg++)
+		if (sg->orig_address != sg->address) {
+			__pci_unmap_single(hwdev, sg->address, sg->length, direction);
+			sg->address = sg->orig_address;
+		}
+}
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA
+ * translations after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list,
+ * same rules and usage.
+ */
+void
+pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+	int i;
+
+	if (direction == PCI_DMA_NONE)
+		BUG();
+
+	for (i = 0; i < nelems; i++, sg++)
+		if (sg->orig_address != sg->address)
+			__pci_sync_single(hwdev, sg->address, sg->length, direction);
+}
+
+#else
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode.
+ * The 32-bit bus address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+extern inline dma_addr_t
+pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        return virt_to_bus(ptr);
+}
+
+/*
+ * Unmap a single streaming mode DMA translation.  The dma_addr and size
+ * must match what was provided for in a previous pci_map_single call.  All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+extern inline void
+pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        /* Nothing to do */
+}
+/*
+ * Map a set of buffers described by scatterlist in streaming
+ * mode for DMA.  This is the scather-gather version of the
+ * above pci_map_single interface.  Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length.  They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+extern inline int
+pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        return nents;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+extern inline void
+pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        /* Nothing to do */
+}
+/*
+ * Make physical memory consistent for a single
+ * streaming mode DMA translation after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the PCI dma
+ * mapping, you must call this function before doing so.  At the
+ * next point you give the PCI dma address back to the card, the
+ * device again owns the buffer.
+ */
+extern inline void
+pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        /* Nothing to do */
+}
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA
+ * translations after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list,
+ * same rules and usage.
+ */
+extern inline void
+pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
+{
+        if (direction == PCI_DMA_NONE)
+                BUG();
+        /* Nothing to do */
+}
+
+#endif /* CONFIG_SWIOTLB */
 
 void *
 pci_alloc_consistent (struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle)
 {
-	void *ret;
+	unsigned long pci_addr;
 	int gfp = GFP_ATOMIC;
+	void *ret;
 
-	if (!hwdev || hwdev->dma_mask == 0xffffffff)
-		gfp |= GFP_DMA;	/* XXX fix me: should change this to GFP_32BIT or ZONE_32BIT */
+	if (!hwdev || hwdev->dma_mask <= 0xffffffff)
+		gfp |= GFP_DMA; /* XXX fix me: should change this to GFP_32BIT or ZONE_32BIT */
 	ret = (void *)__get_free_pages(gfp, get_order(size));
+	if (!ret)
+		return NULL;
 
-	if (ret) {
-		memset(ret, 0, size);
-		*dma_handle = virt_to_bus(ret);
-	}
+	memset(ret, 0, size);
+	pci_addr = virt_to_phys(ret);
+	if ((pci_addr & ~hwdev->dma_mask) != 0)
+		panic("pci_alloc_consistent: allocated memory is out of range for PCI device");
+	*dma_handle = pci_addr;
 	return ret;
 }
 
diff -urN linux-2.4.0-test6/arch/ia64/kernel/perfmon.c linux-2.4.0-test6-lia/arch/ia64/kernel/perfmon.c
--- linux-2.4.0-test6/arch/ia64/kernel/perfmon.c	Fri Mar 10 15:24:02 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/perfmon.c	Fri Aug 11 18:19:21 2000
@@ -11,6 +11,7 @@
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/interrupt.h>
 #include <linux/smp_lock.h>
 
 #include <asm/errno.h>
@@ -55,24 +56,23 @@
 #define WRITE_PMCS		0xa1
 #define READ_PMDS		0xa2
 #define STOP_PMCS		0xa3
-#define IA64_COUNTER_MASK	0xffffffffffffff6f
-#define PERF_OVFL_VAL		0xffffffff
+#define IA64_COUNTER_MASK	0xffffffffffffff6fL
+#define PERF_OVFL_VAL		0xffffffffL
+
+volatile int used_by_system;
 
 struct perfmon_counter {
         unsigned long data;
         unsigned long counter_num;
 };
 
-unsigned long pmds[MAX_PERF_COUNTER];
-struct task_struct *perf_owner=NULL;
+unsigned long pmds[NR_CPUS][MAX_PERF_COUNTER];
 
 asmlinkage unsigned long
 sys_perfmonctl (int cmd1, int cmd2, void *ptr)
 {
         struct perfmon_counter tmp, *cptr = ptr;
-        unsigned long pmd, cnum, dcr, flags;
-        struct task_struct *p;
-        struct pt_regs *regs;
+        unsigned long cnum, dcr, flags;
         struct perf_counter;
         int i;
 
@@ -80,22 +80,24 @@
 	      case WRITE_PMCS:           /* Writes to PMC's and clears PMDs */
 	      case WRITE_PMCS_AND_START: /* Also starts counting */
 
-		if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2))
-			return -EFAULT;
+		if (cmd2 <= 0 || cmd2 > MAX_PERF_COUNTER - used_by_system)
+			return -EINVAL;
 
-		if (cmd2 > MAX_PERF_COUNTER)
+		if (!access_ok(VERIFY_READ, cptr, sizeof(struct perf_counter)*cmd2))
 			return -EFAULT;
 
-		if (perf_owner && perf_owner != current)
-			return -EBUSY;
-		perf_owner = current;
+		current->thread.flags |= IA64_THREAD_PM_VALID;
 
 		for (i = 0; i < cmd2; i++, cptr++) {
 			copy_from_user(&tmp, cptr, sizeof(tmp));
 			/* XXX need to check validity of counter_num and perhaps data!! */
+			if (tmp.counter_num < 4
+			    || tmp.counter_num >= 4 + MAX_PERF_COUNTER - used_by_system)
+				return -EFAULT;
+
 			ia64_set_pmc(tmp.counter_num, tmp.data);
 			ia64_set_pmd(tmp.counter_num, 0);
-			pmds[tmp.counter_num - 4] = 0;
+			pmds[smp_processor_id()][tmp.counter_num - 4] = 0;
 		}
 
 		if (cmd1 == WRITE_PMCS_AND_START) {
@@ -104,26 +106,13 @@
 			dcr |= IA64_DCR_PP;
 			ia64_set_dcr(dcr);
 			local_irq_restore(flags);
-
-			/*
-			 * This is a no can do.  It obviously wouldn't
-			 * work on SMP where another process may not
-			 * be blocked at all. We need to put in a  perfmon 
-			 * IPI to take care of MP systems. See blurb above.
-			 */
-			lock_kernel();
-			for_each_task(p) {
-				regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) -1 ;	
-				ia64_psr(regs)->pp = 1;
-			}
-			unlock_kernel();
 			ia64_set_pmc(0, 0);
 		}
                 break;
 
 	      case READ_PMDS:
-		if (cmd2 > MAX_PERF_COUNTER)
-			return -EFAULT;
+		if (cmd2 <= 0 || cmd2 > MAX_PERF_COUNTER - used_by_system)
+			return -EINVAL;
 		if (!access_ok(VERIFY_WRITE, cptr, sizeof(struct perf_counter)*cmd2))
 			return -EFAULT;
 
@@ -153,9 +142,13 @@
 		 * when we re-enabled interrupts. When I muck with dcr, 
 		 * is the irq_save/restore needed?
 		 */
-		for (i = 0, cnum = 4;i < MAX_PERF_COUNTER; i++, cnum++, cptr++){
-			pmd = pmds[i] + (ia64_get_pmd(cnum) & PERF_OVFL_VAL);
-			put_user(pmd, &cptr->data);
+		for (i = 0, cnum = 4;i < cmd2; i++, cnum++, cptr++) {
+			tmp.data = (pmds[smp_processor_id()][i]
+				    + (ia64_get_pmd(cnum) & PERF_OVFL_VAL));
+			tmp.counter_num = cnum;
+			if (copy_to_user(cptr, &tmp, sizeof(tmp)))
+				return -EFAULT;
+			//put_user(pmd, &cptr->data);
 		}
 		local_irq_save(flags);
 		__asm__ __volatile__("ssm psr.pp");
@@ -167,30 +160,22 @@
 
 	      case STOP_PMCS:
 		ia64_set_pmc(0, 1);
-		for (i = 0; i < MAX_PERF_COUNTER; ++i)
-			ia64_set_pmc(i, 0);
+		ia64_srlz_d();
+		for (i = 0; i < MAX_PERF_COUNTER - used_by_system; ++i)
+			ia64_set_pmc(4+i, 0);
 
-		local_irq_save(flags);
-		dcr = ia64_get_dcr();
-		dcr &= ~IA64_DCR_PP;
-		ia64_set_dcr(dcr);
-		local_irq_restore(flags);
-		/*
-		 * This is a no can do.  It obviously wouldn't
-		 * work on SMP where another process may not
-		 * be blocked at all. We need to put in a  perfmon 
-		 * IPI to take care of MP systems. See blurb above.
-		 */
-		lock_kernel();
-		for_each_task(p) {
-			regs = (struct pt_regs *) (((char *)p) + IA64_STK_OFFSET) - 1;
-			ia64_psr(regs)->pp = 0;
+		if (!used_by_system) {
+			local_irq_save(flags);
+			dcr = ia64_get_dcr();
+			dcr &= ~IA64_DCR_PP;
+			ia64_set_dcr(dcr);
+			local_irq_restore(flags);
 		}
-		unlock_kernel();
-		perf_owner = NULL;
+		current->thread.flags &= ~(IA64_THREAD_PM_VALID);
 		break;
 
 	      default:
+		return -EINVAL;
 		break;
         }
         return 0;
@@ -202,13 +187,13 @@
 	unsigned long mask, i, cnum, val;
 
 	mask = ia64_get_pmc(0) >> 4;
-	for (i = 0, cnum = 4; i < MAX_PERF_COUNTER; cnum++, i++, mask >>= 1) {
+	for (i = 0, cnum = 4; i < MAX_PERF_COUNTER - used_by_system; cnum++, i++, mask >>= 1) {
+		val = 0;
 		if (mask & 0x1) 
-			val = PERF_OVFL_VAL;
-		else
+			val += PERF_OVFL_VAL + 1;
 		/* since we got an interrupt, might as well clear every pmd. */
-			val = ia64_get_pmd(cnum) & PERF_OVFL_VAL;
-		pmds[i] += val;
+		val += ia64_get_pmd(cnum) & PERF_OVFL_VAL;
+		pmds[smp_processor_id()][i] += val;
 		ia64_set_pmd(cnum, 0);
 	}
 }
@@ -221,20 +206,61 @@
 	ia64_srlz_d();
 }
 
+static struct irqaction perfmon_irqaction = {
+	handler:	perfmon_interrupt,
+	flags:		SA_INTERRUPT,
+	name:		"perfmon"
+};
+
 void
 perfmon_init (void)
 {
-        if (request_irq(PERFMON_IRQ, perfmon_interrupt, 0, "perfmon", NULL)) {
-		printk("perfmon_init: could not allocate performance monitor vector %u\n",
-		       PERFMON_IRQ);
-		return;
-	}
+	irq_desc[PERFMON_IRQ].status |= IRQ_PER_CPU;
+	irq_desc[PERFMON_IRQ].handler = &irq_type_ia64_sapic;
+	setup_irq(PERFMON_IRQ, &perfmon_irqaction);
+
 	ia64_set_pmv(PERFMON_IRQ);
 	ia64_srlz_d();
 	printk("Initialized perfmon vector to %u\n",PERFMON_IRQ);
 }
 
+void
+perfmon_init_percpu (void)
+{
+	ia64_set_pmv(PERFMON_IRQ);
+	ia64_srlz_d();
+}
+
+void
+ia64_save_pm_regs (struct thread_struct *t)
+{
+	int i;
+
+	ia64_set_pmc(0, 1);
+	ia64_srlz_d();
+	for (i=0; i< IA64_NUM_PM_REGS - used_by_system ; i++) {
+		t->pmd[i] = ia64_get_pmd(4+i);
+		t->pmod[i] = pmds[smp_processor_id()][i];
+		t->pmc[i] = ia64_get_pmc(4+i);
+	}
+}
+
+void
+ia64_load_pm_regs (struct thread_struct *t)
+{
+	int i;
+
+	for (i=0; i< IA64_NUM_PM_REGS - used_by_system ; i++) {
+		ia64_set_pmd(4+i, t->pmd[i]);
+		pmds[smp_processor_id()][i] = t->pmod[i];
+		ia64_set_pmc(4+i, t->pmc[i]);
+	}
+	ia64_set_pmc(0, 0);
+	ia64_srlz_d();
+}
+
 #else /* !CONFIG_PERFMON */
+
 asmlinkage unsigned long
 sys_perfmonctl (int cmd1, int cmd2, void *ptr)
 {
diff -urN linux-2.4.0-test6/arch/ia64/kernel/process.c linux-2.4.0-test6-lia/arch/ia64/kernel/process.c
--- linux-2.4.0-test6/arch/ia64/kernel/process.c	Wed Aug  2 18:54:02 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/process.c	Fri Aug 11 17:20:17 2000
@@ -27,6 +27,8 @@
 #include <asm/unwind.h>
 #include <asm/user.h>
 
+#ifdef CONFIG_IA64_NEW_UNWIND
+
 static void
 do_show_stack (struct unw_frame_info *info, void *arg)
 {
@@ -44,6 +46,8 @@
 	} while (unw_unwind(info) >= 0);
 }
 
+#endif
+
 void
 show_stack (struct task_struct *task)
 {
@@ -118,15 +122,14 @@
 	current->nice = 20;
 	current->counter = -100;
 
-#ifdef CONFIG_SMP
-	if (!current->need_resched)
-		min_xtp();
-#endif
 
 	while (1) {
-		while (!current->need_resched) {
+#ifdef CONFIG_SMP
+		if (!current->need_resched)
+			min_xtp();
+#endif
+		while (!current->need_resched)
 			continue;
-		}
 #ifdef CONFIG_SMP
 		normal_xtp();
 #endif
@@ -157,11 +160,12 @@
 void
 ia64_save_extra (struct task_struct *task)
 {
-	extern void ia64_save_debug_regs (unsigned long *save_area);
-	extern void ia32_save_state (struct thread_struct *thread);
-
 	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
 		ia64_save_debug_regs(&task->thread.dbr[0]);
+#ifdef CONFIG_PERFMON
+	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+		ia64_save_pm_regs(&task->thread);
+#endif
 	if (IS_IA32_PROCESS(ia64_task_regs(task)))
 		ia32_save_state(&task->thread);
 }
@@ -169,11 +173,12 @@
 void
 ia64_load_extra (struct task_struct *task)
 {
-	extern void ia64_load_debug_regs (unsigned long *save_area);
-	extern void ia32_load_state (struct thread_struct *thread);
-
 	if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
 		ia64_load_debug_regs(&task->thread.dbr[0]);
+#ifdef CONFIG_PERFMON
+	if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
+		ia64_load_pm_regs(&task->thread);
+#endif
 	if (IS_IA32_PROCESS(ia64_task_regs(task)))
 		ia32_load_state(&task->thread);
 }
@@ -530,17 +535,6 @@
 	if (ia64_get_fpu_owner() == current) {
 		ia64_set_fpu_owner(0);
 	}
-}
-
-/*
- * Free remaining state associated with DEAD_TASK.  This is called
- * after the parent of DEAD_TASK has collected the exist status of the
- * task via wait().
- */
-void
-release_thread (struct task_struct *dead_task)
-{
-	/* nothing to do */
 }
 
 unsigned long
diff -urN linux-2.4.0-test6/arch/ia64/kernel/ptrace.c linux-2.4.0-test6-lia/arch/ia64/kernel/ptrace.c
--- linux-2.4.0-test6/arch/ia64/kernel/ptrace.c	Wed Aug  2 18:54:02 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/ptrace.c	Mon Jul 31 14:01:22 2000
@@ -549,6 +549,7 @@
 ia64_sync_fph (struct task_struct *child)
 {
 	if (ia64_psr(ia64_task_regs(child))->mfh && ia64_get_fpu_owner() == child) {
+		ia64_psr(ia64_task_regs(child))->mfh = 0;
 		ia64_set_fpu_owner(0);
 		ia64_save_fpu(&child->thread.fph[0]);
 		child->thread.flags |= IA64_THREAD_FPH_VALID;
diff -urN linux-2.4.0-test6/arch/ia64/kernel/sal.c linux-2.4.0-test6-lia/arch/ia64/kernel/sal.c
--- linux-2.4.0-test6/arch/ia64/kernel/sal.c	Fri Apr 21 15:21:24 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/sal.c	Mon Jul 31 14:01:22 2000
@@ -156,6 +156,14 @@
 			      struct ia64_sal_desc_platform_feature *pf = (void *) p;
 			      printk("SAL: Platform features ");
 
+#ifdef CONFIG_IA64_HAVE_IRQREDIR
+			      /*
+			       * Early versions of SAL say we don't have
+			       * IRQ redirection, even though we do...
+			       */
+			      pf->feature_mask |= (1 << 1);
+#endif
+
 			      if (pf->feature_mask & (1 << 0))
 				      printk("BusLock ");
 
diff -urN linux-2.4.0-test6/arch/ia64/kernel/semaphore.c linux-2.4.0-test6-lia/arch/ia64/kernel/semaphore.c
--- linux-2.4.0-test6/arch/ia64/kernel/semaphore.c	Fri Apr 21 15:21:24 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/semaphore.c	Fri Aug 11 17:20:34 2000
@@ -222,9 +222,6 @@
 void
 __down_read_failed (struct rw_semaphore *sem, long count)
 {
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
 	while (1) {
 		if (count == -1) {
 			down_read_failed_biased(sem);
diff -urN linux-2.4.0-test6/arch/ia64/kernel/setup.c linux-2.4.0-test6-lia/arch/ia64/kernel/setup.c
--- linux-2.4.0-test6/arch/ia64/kernel/setup.c	Wed Aug  2 18:54:02 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/setup.c	Mon Jul 31 14:01:22 2000
@@ -122,6 +122,10 @@
 	 */
 	memcpy(&ia64_boot_param, (void *) ZERO_PAGE_ADDR, sizeof(ia64_boot_param));
 
+	*cmdline_p = __va(ia64_boot_param.command_line);
+	strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
+	saved_command_line[COMMAND_LINE_SIZE-1] = '\0';		/* for safety */
+
 	efi_init();
 
 	max_pfn = 0;
@@ -133,19 +137,65 @@
 	 */
 	bootmap_start = PAGE_ALIGN(__pa(&_end));
 	if (ia64_boot_param.initrd_size)
-		bootmap_start = PAGE_ALIGN(bootmap_start + ia64_boot_param.initrd_size);
+		bootmap_start = PAGE_ALIGN(bootmap_start
+					   + ia64_boot_param.initrd_size);
 	bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
 
 	efi_memmap_walk(free_available_memory, 0);
 
 	reserve_bootmem(bootmap_start, bootmap_size);
+
 #ifdef CONFIG_BLK_DEV_INITRD
 	initrd_start = ia64_boot_param.initrd_start;
+
 	if (initrd_start) {
+		u64 start, size;
+#		define is_same_page(a,b) (((a)&PAGE_MASK) == ((b)&PAGE_MASK))
+
+#if 1
+		/* XXX for now some backwards compatibility... */
+		if (initrd_start >= PAGE_OFFSET)
+			printk("Warning: boot loader passed virtual address "
+			       "for initrd, please upgrade the loader\n");
+		} else
+#endif
+			/* 
+			 * The loader ONLY passes physical addresses
+			 */
+			initrd_start = (unsigned long)__va(initrd_start);
 		initrd_end = initrd_start+ia64_boot_param.initrd_size;
+		start      = initrd_start;
+		size       = ia64_boot_param.initrd_size;
+
 		printk("Initial ramdisk at: 0x%p (%lu bytes)\n",
 		       (void *) initrd_start, ia64_boot_param.initrd_size);
-		reserve_bootmem(virt_to_phys(initrd_start), ia64_boot_param.initrd_size);
+
+		/*
+		 * The kernel end and the beginning of initrd can be
+		 * on the same page. This would cause the page to be
+		 * reserved twice.  While not harmful, it does lead to
+		 * a warning message which can cause confusion.  Thus,
+		 * we make sure that in this case we only reserve new
+		 * pages, i.e., initrd only pages. We need to:
+		 *
+		 *	- align up start
+		 *	- adjust size of reserved section accordingly
+		 *
+		 * It should be noted that this operation is only
+		 * valid for the reserve_bootmem() call and does not
+		 * affect the integrety of the initrd itself.
+		 *
+		 * reserve_bootmem() considers partial pages as reserved.
+		 */
+		if (is_same_page(initrd_start, (unsigned long)&_end)) {
+			start  = PAGE_ALIGN(start);
+			size  -= start-initrd_start;
+
+			printk("Initial ramdisk & kernel on the same page: "
+			       "reserving start=%lx size=%ld bytes\n",
+			       start, size);
+		}
+		reserve_bootmem(__pa(start), size);
 	}
 #endif
 #if 0
@@ -164,27 +214,21 @@
 	/* process SAL system table: */
 	ia64_sal_init(efi.sal_systab);
 
-	*cmdline_p = __va(ia64_boot_param.command_line);
-	strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
-	saved_command_line[COMMAND_LINE_SIZE-1] = '\0';		/* for safety */
-
-	printk("args to kernel: %s\n", *cmdline_p);
-
 #ifdef CONFIG_SMP
 	bootstrap_processor = hard_smp_processor_id();
 	current->processor = bootstrap_processor;
 #endif
 	cpu_init();	/* initialize the bootstrap CPU */
 
+#ifdef CONFIG_IA64_GENERIC
+	machvec_init(acpi_get_sysname());
+#endif
+
 	if (efi.acpi) {
 		/* Parse the ACPI tables */
 		acpi_parse(efi.acpi);
 	}
 
-#ifdef CONFIG_IA64_GENERIC
-	machvec_init(acpi_get_sysname());
-#endif
-
 #ifdef CONFIG_VT
 # if defined(CONFIG_VGA_CONSOLE)
 	conswitchp = &vga_con;
@@ -197,8 +241,16 @@
 	/* enable IA-64 Machine Check Abort Handling */
 	ia64_mca_init();
 #endif
+
 	paging_init();
 	platform_setup(cmdline_p);
+
+#ifdef CONFIG_SWIOTLB
+	{
+		extern void setup_swiotlb (void);
+		setup_swiotlb();
+	}
+#endif
 }
 
 /*
diff -urN linux-2.4.0-test6/arch/ia64/kernel/smp.c linux-2.4.0-test6-lia/arch/ia64/kernel/smp.c
--- linux-2.4.0-test6/arch/ia64/kernel/smp.c	Wed Aug  2 18:54:02 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/smp.c	Fri Aug 11 20:40:15 2000
@@ -135,6 +135,7 @@
 static inline int
 pointer_lock(void *lock, void *data, int retry)
 {
+	volatile long *ptr = lock;
  again:
 	if (cmpxchg_acq((void **) lock, 0, data) == 0)
 		return 0;
@@ -142,7 +143,7 @@
 	if (!retry)
 		return -EBUSY;
 
-	while (*(void **) lock)
+	while (*ptr)
 		;
 
 	goto again;
@@ -320,6 +321,58 @@
 #endif	/* !CONFIG_ITANIUM_PTCG */
 
 /*
+ * Run a function on another CPU
+ *  <func>	The function to run. This must be fast and non-blocking.
+ *  <info>	An arbitrary pointer to pass to the function.
+ *  <retry>	If true, keep retrying until ready.
+ *  <wait>	If true, wait until function has completed on other CPUs.
+ *  [RETURNS]   0 on success, else a negative status code.
+ *
+ * Does not return until the remote CPU is nearly ready to execute <func>
+ * or is or has executed.
+ */
+
+int
+smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int retry, int wait)
+{
+	struct smp_call_struct data;
+	long timeout;
+	int cpus = 1;
+
+	if (cpuid == smp_processor_id()) {
+		printk(__FUNCTION__" trying to call self\n");
+		return -EBUSY;
+	}
+	
+	data.func = func;
+	data.info = info;
+	data.wait = wait;
+	atomic_set(&data.unstarted_count, cpus);
+	atomic_set(&data.unfinished_count, cpus);
+
+	if (pointer_lock(&smp_call_function_data, &data, retry))
+		return -EBUSY;
+
+	/*  Send a message to all other CPUs and wait for them to respond  */
+	send_IPI_single(cpuid, IPI_CALL_FUNC);
+
+	/*  Wait for response  */
+	timeout = jiffies + HZ;
+	while ((atomic_read(&data.unstarted_count) > 0) && time_before(jiffies, timeout))
+		barrier();
+	if (atomic_read(&data.unstarted_count) > 0) {
+		smp_call_function_data = NULL;
+		return -ETIMEDOUT;
+	}
+	if (wait)
+		while (atomic_read(&data.unfinished_count) > 0)
+			barrier();
+	/* unlock pointer */
+	smp_call_function_data = NULL;
+	return 0;
+}
+
+/*
  * Run a function on all other CPUs.
  *  <func>	The function to run. This must be fast and non-blocking.
  *  <info>	An arbitrary pointer to pass to the function.
@@ -396,13 +449,19 @@
 smp_do_timer(struct pt_regs *regs)
 {
         int cpu = smp_processor_id();
+        int user = user_mode(regs);
 	struct cpuinfo_ia64 *data = &cpu_data[cpu];
 
-        if (!--data->prof_counter) {
-		irq_enter(cpu, TIMER_IRQ);
-		update_process_times(user_mode(regs));
+        if (--data->prof_counter <= 0) {
 		data->prof_counter = data->prof_multiplier;
-		irq_exit(cpu, TIMER_IRQ);
+		/*
+		 * update_process_times() expects us to have done irq_enter().
+		 * Besides, if we don't timer interrupts ignore the global
+		 * interrupt lock, which is the WrongThing (tm) to do.
+		 */
+		irq_enter(cpu, 0);
+		update_process_times(user);
+		irq_exit(cpu, 0);
 	}
 }
 
@@ -473,6 +532,11 @@
 	extern void ia64_rid_init(void);
 	extern void ia64_init_itm(void);
 	extern void ia64_cpu_local_tick(void);
+#ifdef CONFIG_PERFMON
+	extern void perfmon_init_percpu(void);
+#endif
+
+	efi_map_pal_code();
 
 	cpu_init();
 
@@ -480,6 +544,10 @@
 
 	/* setup the CPU local timer tick */
 	ia64_init_itm();
+
+#ifdef CONFIG_PERFMON
+	perfmon_init_percpu();
+#endif
 
 	/* Disable all local interrupts */
 	ia64_set_lrr0(0, 1);	
diff -urN linux-2.4.0-test6/arch/ia64/kernel/time.c linux-2.4.0-test6-lia/arch/ia64/kernel/time.c
--- linux-2.4.0-test6/arch/ia64/kernel/time.c	Wed Aug  2 18:54:02 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/time.c	Mon Jul 31 14:01:22 2000
@@ -150,11 +150,13 @@
 static void
 timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
-	static unsigned long last_time;
-	static unsigned char count;
 	int cpu = smp_processor_id();
 	unsigned long new_itm;
+#if 0
+	static unsigned long last_time;
+	static unsigned char count;
 	int printed = 0;
+#endif
 
 	/*
 	 * Here we are in the timer irq handler. We have irqs locally
@@ -192,7 +194,7 @@
 		if (time_after(new_itm, ia64_get_itc()))
 			break;
 
-#if !(defined(CONFIG_IA64_SOFTSDV_HACKS) && defined(CONFIG_SMP))
+#if 0
 		/*
 		 * SoftSDV in SMP mode is _slow_, so we do "lose" ticks, 
 		 * but it's really OK...
diff -urN linux-2.4.0-test6/arch/ia64/kernel/traps.c linux-2.4.0-test6-lia/arch/ia64/kernel/traps.c
--- linux-2.4.0-test6/arch/ia64/kernel/traps.c	Thu Jun 22 07:09:44 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/traps.c	Fri Aug 11 14:55:22 2000
@@ -204,11 +204,13 @@
 {
 	struct task_struct *fpu_owner = ia64_get_fpu_owner();
 
+	/* first, clear psr.dfh and psr.mfh: */
 	regs->cr_ipsr &= ~(IA64_PSR_DFH | IA64_PSR_MFH);
 	if (fpu_owner != current) {
 		ia64_set_fpu_owner(current);
 
 		if (fpu_owner && ia64_psr(ia64_task_regs(fpu_owner))->mfh) {
+			ia64_psr(ia64_task_regs(fpu_owner))->mfh = 0;
 			fpu_owner->thread.flags |= IA64_THREAD_FPH_VALID;
 			__ia64_save_fpu(fpu_owner->thread.fph);
 		}
@@ -216,6 +218,11 @@
 			__ia64_load_fpu(current->thread.fph);
 		} else {
 			__ia64_init_fpu();
+			/*
+			 * Set mfh because the state in thread.fph does not match
+			 * the state in the fph partition.
+			 */
+			ia64_psr(regs)->mfh = 1;
 		}
 	}
 }
diff -urN linux-2.4.0-test6/arch/ia64/kernel/unwind.c linux-2.4.0-test6-lia/arch/ia64/kernel/unwind.c
--- linux-2.4.0-test6/arch/ia64/kernel/unwind.c	Wed Aug  2 18:54:02 2000
+++ linux-2.4.0-test6-lia/arch/ia64/kernel/unwind.c	Fri Aug 11 15:52:00 2000
@@ -62,7 +62,7 @@
 #define UNW_LOG_HASH_SIZE	(UNW_LOG_CACHE_SIZE + 1)
 #define UNW_HASH_SIZE		(1 << UNW_LOG_HASH_SIZE)
 
-#define UNW_DEBUG	1
+#define UNW_DEBUG	0
 #define UNW_STATS	0	/* WARNING: this disabled interrupts for long time-spans!! */
 
 #if UNW_DEBUG
diff -urN linux-2.4.0-test6/arch/ia64/lib/memcpy.S linux-2.4.0-test6-lia/arch/ia64/lib/memcpy.S
--- linux-2.4.0-test6/arch/ia64/lib/memcpy.S	Wed Aug  2 18:54:02 2000
+++ linux-2.4.0-test6-lia/arch/ia64/lib/memcpy.S	Sat Aug  5 13:19:26 2000
@@ -1,3 +1,20 @@
+/*
+ *
+ * Optimized version of the standard memcpy() function
+ *
+ * Inputs:
+ * 	in0:	destination address
+ *	in1:	source address
+ *	in2:	number of bytes to copy
+ * Output:
+ * 	no return value
+ *
+ * Copyright (C) 2000 Hewlett-Packard Co
+ * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#include <linux/config.h>
+
 #include <asm/asmmacro.h>
 
 GLOBAL_ENTRY(bcopy)
@@ -10,77 +27,254 @@
 	// FALL THROUGH
 GLOBAL_ENTRY(memcpy)
 
-#	define MEM_LAT	4
-
-#	define N	MEM_LAT-1
-#	define Nrot	((MEM_LAT + 7) & ~7)
+#	define MEM_LAT	2		/* latency to L1 cache */
 
 #	define dst	r2
 #	define src	r3
-#	define len	r9
-#	define saved_pfs r10
-#	define saved_lc	r11
-#	define saved_pr	r16
-#	define t0	r17
-#	define cnt	r18
-
+#	define retval	r8
+#	define saved_pfs r9
+#	define saved_lc	r10
+#	define saved_pr	r11
+#	define cnt	r16
+#	define src2	r17
+#	define t0	r18
+#	define t1	r19
+#	define t2	r20
+#	define t3	r21
+#	define t4	r22
+#	define src_end	r23
+
+#	define N	(MEM_LAT + 4)
+#	define Nrot	((N + 7) & ~7)
+
+	/*
+	 * First, check if everything (src, dst, len) is a multiple of eight.  If
+	 * so, we handle everything with no taken branches (other than the loop
+	 * itself) and a small icache footprint.  Otherwise, we jump off to
+	 * the more general copy routine handling arbitrary
+	 * sizes/alignment etc.
+	 */
 	UNW(.prologue)
 	UNW(.save ar.pfs, saved_pfs)
 	alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot
+#if !(defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC))
 	lfetch [in1]
+#else
+	nop.m 0
+#endif
+	or t0=in0,in1
+	;;
 
-	.rotr val[MEM_LAT]
-	.rotp p[MEM_LAT]
-
+	or t0=t0,in2
 	UNW(.save ar.lc, saved_lc)
 	mov saved_lc=ar.lc
-
-	or t0=in0,in1
 	UNW(.save pr, saved_pr)
 	mov saved_pr=pr
 
-	UNW(.body)
-
-	mov ar.ec=MEM_LAT
+	cmp.eq p6,p0=in2,r0	// zero length?
+	mov retval=in0		// return dst
+(p6)	br.ret.spnt.many rp	// zero length, return immediately
+	;;
 
-	mov r8=in0		// return dst
-	shr cnt=in2,3		// number of 8-byte words to copy
+	mov dst=in0		// copy because of rotation
+	shr.u cnt=in2,3		// number of 8-byte words to copy
 	mov pr.rot=1<<16
 	;;
-	cmp.eq p6,p0=in2,r0	// zero length?
-	or t0=t0,in2
-(p6)	br.ret.spnt.many rp	// yes, return immediately
 
-	mov dst=in0		// copy because of rotation
-	mov src=in1		// copy because of rotation
 	adds cnt=-1,cnt		// br.ctop is repeat/until
+	cmp.gtu p7,p0=16,in2	// copying less than 16 bytes?
+	UNW(.body)
+	mov ar.ec=N
 	;;
+
 	and t0=0x7,t0
 	mov ar.lc=cnt
 	;;
 	cmp.ne p6,p0=t0,r0
-(p6)	br.cond.spnt.few slow_memcpy
 
+	mov src=in1		// copy because of rotation
+(p7)	br.cond.spnt.few memcpy_short
+(p6)	br.cond.spnt.few memcpy_long
+	;;
+	.rotr val[N]
+	.rotp p[N]
 1:
 (p[0])	ld8 val[0]=[src],8
-(p[N])	st8 [dst]=val[N],8
-	br.ctop.sptk.few 1b
+(p[N-1])st8 [dst]=val[N-1],8
+	br.ctop.dptk.few 1b
 	;;
-.exit:
 	mov ar.lc=saved_lc
-	mov pr=saved_pr,0xffffffffffff0000
+	mov pr=saved_pr,-1
 	mov ar.pfs=saved_pfs
 	br.ret.sptk.many rp
 
-slow_memcpy:
-	adds cnt=-1,in2
+	/*
+	 * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time
+	 * copy loop.  This performs relatively poorly on Itanium, but it doesn't
+	 * get used very often (gcc inlines small copies) and due to atomicity
+	 * issues, we want to avoid read-modify-write of entire words.
+	 */
+	.align 32
+memcpy_short:
+	adds cnt=-1,in2		// br.ctop is repeat/until
+	mov ar.ec=MEM_LAT
 	;;
 	mov ar.lc=cnt
 	;;
+	/*
+	 * It is faster to put a stop bit in the loop here because it makes
+	 * the pipeline shorter (and latency is what matters on short copies).
+	 */
 1:
 (p[0])	ld1 val[0]=[src],1
-(p[N])	st1 [dst]=val[N],1
-	br.ctop.sptk.few 1b
-	br.sptk.few .exit
+	;;
+(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1
+	br.ctop.dptk.few 1b
+	;;
+	mov ar.lc=saved_lc
+	mov pr=saved_pr,-1
+	mov ar.pfs=saved_pfs
+	br.ret.sptk.many rp
+
+	/*
+	 * Large (>= 16 bytes) copying is done in a fancy way.  Latency isn't
+	 * an overriding concern here, but throughput is.  We first do
+	 * sub-word copying until the destination is aligned, then we check
+	 * if the source is also aligned.  If so, we do a simple load/store-loop
+	 * until there are less than 8 bytes left over and then we do the tail,
+	 * by storing the last few bytes using sub-word copying.  If the source
+	 * is not aligned, we branch off to the non-congruent loop.
+	 *
+	 *   stage:   op:
+	 *         0  ld
+	 *	   :
+	 * MEM_LAT+3  shrp
+	 * MEM_LAT+4  st
+	 *
+	 * On Itanium, the pipeline itself runs without stalls.  However,  br.ctop
+	 * seems to introduce an unavoidable bubble in the pipeline so the overall
+	 * latency is 2 cycles/iteration.  This gives us a _copy_ throughput
+	 * of 4 byte/cycle.  Still not bad.
+	 */
+#	undef N
+#	undef Nrot
+#	define N	(MEM_LAT + 5)		/* number of stages */
+#	define Nrot	((N+1 + 2 + 7) & ~7)	/* number of rotating regs */
+
+#define LOG_LOOP_SIZE	6
+
+memcpy_long:
+	alloc t3=ar.pfs,3,Nrot,0,Nrot	// resize register frame
+	and t0=-8,src		// t0 = src & ~7
+	and t2=7,src		// t2 = src & 7
+	;;
+	ld8 t0=[t0]		// t0 = 1st source word
+	adds src2=7,src		// src2 = (src + 7)
+	sub t4=r0,dst		// t4 = -dst
+	;;
+	and src2=-8,src2	// src2 = (src + 7) & ~7
+	shl t2=t2,3		// t2 = 8*(src & 7)
+	shl t4=t4,3		// t4 = 8*(dst & 7)
+	;;
+	ld8 t1=[src2]		// t1 = 1st source word if src is 8-byte aligned, 2nd otherwise
+	sub t3=64,t2		// t3 = 64-8*(src & 7)
+	shr.u t0=t0,t2
+	;;
+	add src_end=src,in2
+	shl t1=t1,t3
+	mov pr=t4,0x38		// (p5,p4,p3)=(dst & 7)
+	;;
+	or t0=t0,t1
+	mov cnt=r0
+	adds src_end=-1,src_end
+	;;
+(p3)	st1 [dst]=t0,1
+(p3)	shr.u t0=t0,8
+(p3)	adds cnt=1,cnt
+	;;
+(p4)	st2 [dst]=t0,2
+(p4)	shr.u t0=t0,16
+(p4)	adds cnt=2,cnt
+	;;
+(p5)	st4 [dst]=t0,4
+(p5)	adds cnt=4,cnt
+	and src_end=-8,src_end	// src_end = last word of source buffer
+	;;
+
+	// At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy:
+
+1:{	add src=cnt,src			// make src point to remainder of source buffer
+	sub cnt=in2,cnt			// cnt = number of bytes left to copy
+	mov t4=ip
+  }	;;
+	and src2=-8,src			// align source pointer
+	adds t4=memcpy_loops-1b,t4
+	mov ar.ec=N
+
+	and t0=7,src			// t0 = src & 7
+	shr.u t2=cnt,3			// t2 = number of 8-byte words left to copy
+	shl cnt=cnt,3			// move bits 0-2 to 3-5
+	;;
+
+	.rotr val[N+1], w[2]
+	.rotp p[N]
+
+	cmp.ne p6,p0=t0,r0		// is src aligned, too?
+	shl t0=t0,LOG_LOOP_SIZE		// t0 = 8*(src & 7)
+	adds t2=-1,t2			// br.ctop is repeat/until
+	;;
+	add t4=t0,t4
+	mov pr=cnt,0x38			// set (p5,p4,p3) to # of bytes last-word bytes to copy
+	mov ar.lc=t2
+	;;
+(p6)	ld8 val[1]=[src2],8		// prime the pump...
+	mov b6=t4
+	br.sptk.few b6
+	;;
+
+memcpy_tail:
+	// At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is
+	// less than 8) and t0 contains the last few bytes of the src buffer:
+(p5)	st4 [dst]=t0,4
+(p5)	shr.u t0=t0,32
+	mov ar.lc=saved_lc
+	;;
+(p4)	st2 [dst]=t0,2
+(p4)	shr.u t0=t0,16
+	mov ar.pfs=saved_pfs
+	;;
+(p3)	st1 [dst]=t0
+	mov pr=saved_pr,-1
+	br.ret.sptk.many rp
+
+///////////////////////////////////////////////////////
+	.align 64
+
+#define COPY(shift,index)									\
+ 1:												\
+  { .mfi											\
+	(p[0])		ld8 val[0]=[src2],8;							\
+			nop.f 0;								\
+	(p[MEM_LAT+3])	shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift;			\
+  };												\
+  { .mbb											\
+	(p[MEM_LAT+4])	st8 [dst]=w[1],8;							\
+			nop.b 0;								\
+			br.ctop.dptk.few 1b;							\
+  };												\
+			;;									\
+			ld8 val[N-1]=[src_end];	/* load last word (may be same as val[N]) */	\
+			;;									\
+			shrp t0=val[N-1],val[N-index],shift;					\
+			br memcpy_tail
+memcpy_loops:
+	COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */
+	COPY(8, 0)
+	COPY(16, 0)
+	COPY(24, 0)
+	COPY(32, 0)
+	COPY(40, 0)
+	COPY(48, 0)
+	COPY(56, 0)
 
 END(memcpy)
diff -urN linux-2.4.0-test6/arch/ia64/mm/init.c linux-2.4.0-test6-lia/arch/ia64/mm/init.c
--- linux-2.4.0-test6/arch/ia64/mm/init.c	Thu Aug 10 19:56:18 2000
+++ linux-2.4.0-test6-lia/arch/ia64/mm/init.c	Mon Jul 31 14:01:22 2000
@@ -185,8 +185,42 @@
 void
 free_initrd_mem(unsigned long start, unsigned long end)
 {
+	/*
+	 * EFI uses 4KB pages while the kernel can use 4KB  or bigger.
+	 * Thus EFI and the kernel may have different page sizes. It is 
+	 * therefore possible to have the initrd share the same page as 
+	 * the end of the kernel (given current setup). 
+	 *
+	 * To avoid freeing/using the wrong page (kernel sized) we:
+	 * 	- align up the beginning of initrd
+	 *	- keep the end untouched
+	 *
+	 *  |             |
+	 *  |=============| a000
+	 *  |             |
+	 *  |             |
+	 *  |             | 9000
+	 *  |/////////////| 
+	 *  |/////////////| 
+	 *  |=============| 8000
+	 *  |///INITRD////|
+	 *  |/////////////|
+	 *  |/////////////| 7000
+	 *  |             |
+	 *  |KKKKKKKKKKKKK|
+	 *  |=============| 6000
+	 *  |KKKKKKKKKKKKK|
+	 *  |KKKKKKKKKKKKK| 
+	 *  K=kernel using 8KB pages
+	 * 
+	 * In this example, we must free page 8000 ONLY. So we must align up
+	 * initrd_start and keep initrd_end as is.
+	 */
+	start = PAGE_ALIGN(start);
+
 	if (start < end)
 		printk ("Freeing initrd memory: %ldkB freed\n", (end - start) >> 10);
+
 	for (; start < end; start += PAGE_SIZE) {
 		clear_bit(PG_reserved, &virt_to_page(start)->flags);
 		set_page_count(virt_to_page(start), 1);
@@ -423,5 +457,4 @@
 #ifdef CONFIG_IA32_SUPPORT
 	ia32_gdt_init();
 #endif
-	return;
 }
diff -urN linux-2.4.0-test6/arch/ia64/mm/tlb.c linux-2.4.0-test6-lia/arch/ia64/mm/tlb.c
--- linux-2.4.0-test6/arch/ia64/mm/tlb.c	Wed Aug  2 18:54:02 2000
+++ linux-2.4.0-test6-lia/arch/ia64/mm/tlb.c	Mon Jul 31 14:01:22 2000
@@ -1,8 +1,11 @@
 /*
  * TLB support routines.
  *
- * Copyright (C) 1998, 1999 Hewlett-Packard Co
- * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 08/02/00 A. Mallick <asit.k.mallick@intel.com>	
+ *		Modified RID allocation for SMP 
  */
 #include <linux/config.h>
 #include <linux/init.h>
@@ -27,9 +30,11 @@
 		1 << _PAGE_SIZE_8K   |		\
 		1 << _PAGE_SIZE_4K )
 
-static void wrap_context (struct mm_struct *mm);
-
-unsigned long ia64_next_context = (1UL << IA64_HW_CONTEXT_BITS) + 1;
+struct ia64_ctx ia64_ctx = {
+	lock:	SPIN_LOCK_UNLOCKED,
+	next:	1,
+	limit:	(1UL << IA64_HW_CONTEXT_BITS)
+};
 
  /*
   * Put everything in a struct so we avoid the global offset table whenever
@@ -106,49 +111,43 @@
 
 #endif /* CONFIG_SMP && !CONFIG_ITANIUM_PTCG */
 
-void
-get_new_mmu_context (struct mm_struct *mm)
-{
-	if ((ia64_next_context & IA64_HW_CONTEXT_MASK) == 0) {
-		wrap_context(mm);
-	}
-	mm->context = ia64_next_context++;
-}
-
 /*
- * This is where we handle the case where (ia64_next_context &
- * IA64_HW_CONTEXT_MASK) == 0.  Whenever this happens, we need to
- * flush the entire TLB and skip over region id number 0, which is
- * used by the kernel.
+ * Acquire the ia64_ctx.lock before calling this function!
  */
-static void
-wrap_context (struct mm_struct *mm)
+void
+wrap_mmu_context (struct mm_struct *mm)
 {
-	struct task_struct *task;
+	struct task_struct *tsk;
+	unsigned long tsk_context;
+
+	if (ia64_ctx.next >= (1UL << IA64_HW_CONTEXT_BITS)) 
+		ia64_ctx.next = 300;	/* skip daemons */
+	ia64_ctx.limit = (1UL << IA64_HW_CONTEXT_BITS);
 
 	/*
-	 * We wrapped back to the first region id so we nuke the TLB
-	 * so we can switch to the next generation of region ids.
+	 * Scan all the task's mm->context and set proper safe range
 	 */
-	__flush_tlb_all();
-	if (ia64_next_context++ == 0) {
-		/*
-		 * Oops, we've used up all 64 bits of the context
-		 * space---walk through task table to ensure we don't
-		 * get tricked into using an old context.  If this
-		 * happens, the machine has been running for a long,
-		 * long time!
-		 */
-		ia64_next_context = (1UL << IA64_HW_CONTEXT_BITS) + 1;
-
-		read_lock(&tasklist_lock);
-		for_each_task (task) {
-			if (task->mm == mm)
-				continue;
-			flush_tlb_mm(mm);
+
+	read_lock(&tasklist_lock);
+  repeat:
+	for_each_task(tsk) {
+		if (!tsk->mm)
+			continue;
+		tsk_context = tsk->mm->context;
+		if (tsk_context == ia64_ctx.next) {
+			if (++ia64_ctx.next >= ia64_ctx.limit) {
+				/* empty range: reset the range limit and start over */
+				if (ia64_ctx.next >= (1UL << IA64_HW_CONTEXT_BITS)) 
+					ia64_ctx.next = 300;
+				ia64_ctx.limit = (1UL << IA64_HW_CONTEXT_BITS);
+				goto repeat;
+			}
 		}
-		read_unlock(&tasklist_lock);
+		if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
+			ia64_ctx.limit = tsk_context;
 	}
+	read_unlock(&tasklist_lock);
+	flush_tlb_all();
 }
 
 void
diff -urN linux-2.4.0-test6/arch/ia64/sn/sn1/irq.c linux-2.4.0-test6-lia/arch/ia64/sn/sn1/irq.c
--- linux-2.4.0-test6/arch/ia64/sn/sn1/irq.c	Tue Feb  8 12:01:59 2000
+++ linux-2.4.0-test6-lia/arch/ia64/sn/sn1/irq.c	Mon Jul 31 14:01:22 2000
@@ -1,9 +1,10 @@
 #include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
 
-#include <asm/irq.h>
 #include <asm/ptrace.h>
 
-static int
+static unsigned int
 sn1_startup_irq(unsigned int irq)
 {
         return(0);
@@ -24,23 +25,16 @@
 {
 }
 
-static int
-sn1_handle_irq(unsigned int irq, struct pt_regs *regs)
-{
-       return(0);
-}
-
 struct hw_interrupt_type irq_type_sn1 = {
         "sn1_irq",
         sn1_startup_irq,
         sn1_shutdown_irq,
-        sn1_handle_irq,
         sn1_enable_irq,
         sn1_disable_irq
 };
 
 void
-sn1_irq_init (struct irq_desc desc[NR_IRQS])
+sn1_irq_init (void)
 {
 	int i;
 
diff -urN linux-2.4.0-test6/arch/ia64/sn/sn1/machvec.c linux-2.4.0-test6-lia/arch/ia64/sn/sn1/machvec.c
--- linux-2.4.0-test6/arch/ia64/sn/sn1/machvec.c	Sun Feb  6 18:42:40 2000
+++ linux-2.4.0-test6-lia/arch/ia64/sn/sn1/machvec.c	Mon Jul 31 14:01:22 2000
@@ -1,4 +1,2 @@
+#define MACHVEC_PLATFORM_NAME	sn1
 #include <asm/machvec_init.h>
-#include <asm/machvec_sn1.h>
-
-MACHVEC_DEFINE(sn1)
diff -urN linux-2.4.0-test6/arch/ia64/sn/sn1/setup.c linux-2.4.0-test6-lia/arch/ia64/sn/sn1/setup.c
--- linux-2.4.0-test6/arch/ia64/sn/sn1/setup.c	Mon May  8 22:00:01 2000
+++ linux-2.4.0-test6-lia/arch/ia64/sn/sn1/setup.c	Mon Jul 31 14:01:22 2000
@@ -13,6 +13,7 @@
 #include <linux/console.h>
 #include <linux/timex.h>
 #include <linux/sched.h>
+#include <linux/ioport.h>
 
 #include <asm/io.h>
 #include <asm/machvec.h>
diff -urN linux-2.4.0-test6/arch/ia64/vmlinux.lds.S linux-2.4.0-test6-lia/arch/ia64/vmlinux.lds.S
--- linux-2.4.0-test6/arch/ia64/vmlinux.lds.S	Wed Aug  2 18:54:03 2000
+++ linux-2.4.0-test6-lia/arch/ia64/vmlinux.lds.S	Fri Aug 11 14:55:22 2000
@@ -46,6 +46,15 @@
 	{ *(__ex_table) }
   __stop___ex_table = .;
 
+#if defined(CONFIG_IA64_GENERIC)
+  /* Machine Vector */
+  . = ALIGN(16);
+  machvec_start = .;
+  .machvec : AT(ADDR(.machvec) - PAGE_OFFSET)
+	{ *(.machvec) }
+  machvec_end = .;
+#endif
+
   __start___ksymtab = .;	/* Kernel symbol table */
   __ksymtab : AT(ADDR(__ksymtab) - PAGE_OFFSET)
 	{ *(__ksymtab) }
diff -urN linux-2.4.0-test6/drivers/char/Makefile linux-2.4.0-test6-lia/drivers/char/Makefile
--- linux-2.4.0-test6/drivers/char/Makefile	Thu Aug 10 19:56:21 2000
+++ linux-2.4.0-test6-lia/drivers/char/Makefile	Thu Aug 10 20:29:27 2000
@@ -109,7 +109,17 @@
 endif
 
 obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o
+
 obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o
+ifeq ($(CONFIG_ATARI_DSP56K),y)
+S = y
+else
+  ifeq ($(CONFIG_ATARI_DSP56K),m)
+  SM = y
+  endif
+endif
+
+obj-$(CONFIG_SIM_SERIAL) += simserial.o
 obj-$(CONFIG_ROCKETPORT) += rocket.o
 obj-$(CONFIG_MOXA_SMARTIO) += mxser.o
 obj-$(CONFIG_MOXA_INTELLIO) += moxa.o
diff -urN linux-2.4.0-test6/drivers/char/agp/agpgart_be.c linux-2.4.0-test6-lia/drivers/char/agp/agpgart_be.c
--- linux-2.4.0-test6/drivers/char/agp/agpgart_be.c	Thu Aug 10 19:56:21 2000
+++ linux-2.4.0-test6-lia/drivers/char/agp/agpgart_be.c	Mon Jul 31 14:01:22 2000
@@ -67,14 +67,16 @@
 {
 #if defined(__i386__)
 	asm volatile ("wbinvd":::"memory");
-#elif defined(__alpha__)
+#elif defined(__alpha__) || defined(__ia64__)
 	/* ??? I wonder if we'll really need to flush caches, or if the
 	   core logic can manage to keep the system coherent.  The ARM
 	   speaks only of using `cflush' to get things in memory in
 	   preparation for power failure.
 
 	   If we do need to call `cflush', we'll need a target page,
-	   as we can only flush one page at a time.  */
+	   as we can only flush one page at a time.
+
+	   Ditto for IA-64. --davidm 00/08/07 */
 	mb();
 #else
 #error "Please define flush_cache."
diff -urN linux-2.4.0-test6/drivers/char/drm/agpsupport.c linux-2.4.0-test6-lia/drivers/char/drm/agpsupport.c
--- linux-2.4.0-test6/drivers/char/drm/agpsupport.c	Thu Aug 10 19:56:21 2000
+++ linux-2.4.0-test6-lia/drivers/char/drm/agpsupport.c	Mon Jul 31 14:01:22 2000
@@ -322,7 +322,7 @@
 		case ALI_M1541: 	head->chipset = "ALi M1541";     break;
 		default:		head->chipset = "Unknown";       break;
 		}
-		DRM_INFO("AGP %d.%d on %s @ 0x%08lx %dMB\n",
+		DRM_INFO("AGP %d.%d on %s @ 0x%08lx %ZuMB\n",
 			 head->agp_info.version.major,
 			 head->agp_info.version.minor,
 			 head->chipset,
diff -urN linux-2.4.0-test6/drivers/char/drm/lists.c linux-2.4.0-test6-lia/drivers/char/drm/lists.c
--- linux-2.4.0-test6/drivers/char/drm/lists.c	Wed Aug  2 18:54:13 2000
+++ linux-2.4.0-test6-lia/drivers/char/drm/lists.c	Mon Jul 31 14:01:22 2000
@@ -153,6 +153,7 @@
 #endif
 	buf->list	= DRM_LIST_FREE;
 	do {
+		/* XXX this is wrong due to the ABA problem! --davidm 00/08/07 */
 		old       = bl->next;
 		buf->next = old;
 		prev      = cmpxchg(&bl->next, old, buf);
@@ -185,6 +186,7 @@
 	
 				/* Get buffer */
 	do {
+		/* XXX this is wrong due to the ABA problem! --davidm 00/08/07 */
 		old = bl->next;
 		if (!old) return NULL;
 		new  = bl->next->next;
diff -urN linux-2.4.0-test6/drivers/char/drm/vm.c linux-2.4.0-test6-lia/drivers/char/drm/vm.c
--- linux-2.4.0-test6/drivers/char/drm/vm.c	Thu Aug 10 19:56:21 2000
+++ linux-2.4.0-test6-lia/drivers/char/drm/vm.c	Fri Aug 11 15:38:46 2000
@@ -250,7 +250,7 @@
 		  vma->vm_start, vma->vm_end, VM_OFFSET(vma));
 
 				/* Length must match exact page count */
-	if ((length >> PAGE_SHIFT) != dma->page_count) {
+	if (!dma || (length >> PAGE_SHIFT) != dma->page_count) {
 		unlock_kernel();
 		return -EINVAL;
 	}
@@ -323,6 +323,9 @@
 				pgprot_val(vma->vm_page_prot) |= _PAGE_PCD;
 				pgprot_val(vma->vm_page_prot) &= ~_PAGE_PWT;
 			}
+#elif defined(__ia64__)
+			if (map->type != _DRM_AGP)
+				vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 #endif
 			vma->vm_flags |= VM_IO;	/* not in core dump */
 		}
diff -urN linux-2.4.0-test6/drivers/char/efirtc.c linux-2.4.0-test6-lia/drivers/char/efirtc.c
--- linux-2.4.0-test6/drivers/char/efirtc.c	Wed Aug  2 18:54:14 2000
+++ linux-2.4.0-test6-lia/drivers/char/efirtc.c	Fri Aug 11 17:21:50 2000
@@ -395,11 +395,10 @@
 	return 0;
 }
 
-static int __exit
+static void __exit
 efi_rtc_exit(void)
 {
 	/* not yet used */
-	return 0;
 }
 
 module_init(efi_rtc_init);
diff -urN linux-2.4.0-test6/drivers/char/simserial.c linux-2.4.0-test6-lia/drivers/char/simserial.c
--- linux-2.4.0-test6/drivers/char/simserial.c	Wed Dec 31 16:00:00 1969
+++ linux-2.4.0-test6-lia/drivers/char/simserial.c	Fri Aug 11 14:56:04 2000
@@ -0,0 +1,1094 @@
+/*
+ * Simulated Serial Driver (fake serial)
+ *
+ * This driver is mostly used for bringup purposes and will go away.
+ * It has a strong dependency on the system console. All outputs
+ * are rerouted to the same facility as the one used by printk which, in our
+ * case means sys_sim.c console (goes via the simulator). The code hereafter
+ * is completely leveraged from the serial.c driver.
+ *
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 02/04/00 D. Mosberger	Merged in serial.c bug fixes in rs_close().
+ * 02/25/00 D. Mosberger	Synced up with 2.3.99pre-5 version of serial.c.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/major.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/malloc.h>
+#include <linux/console.h>
+#include <linux/module.h>
+#include <linux/serial.h>
+#include <linux/serialP.h>
+
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+
+#undef SIMSERIAL_DEBUG	/* define this to get some debug information */
+
+#define KEYBOARD_INTR	3	/* must match with simulator! */
+#define SIMSERIAL_IRQ	0xee
+
+#define NR_PORTS	1	/* only one port for now */
+#define SERIAL_INLINE	1
+
+#ifdef SERIAL_INLINE
+#define _INLINE_ inline
+#endif
+
+#ifndef MIN
+#define MIN(a,b)	((a) < (b) ? (a) : (b))
+#endif
+
+#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT)
+
+#define SSC_GETCHAR	21
+
+extern long ia64_ssc (long, long, long, long, int);
+extern void ia64_ssc_connect_irq (long intr, long irq);
+
+static char *serial_name = "SimSerial driver";
+static char *serial_version = "0.6";
+
+/*
+ * This has been extracted from asm/serial.h. We need one eventually but
+ * I don't know exactly what we're going to put in it so just fake one
+ * for now.
+ */
+#define BASE_BAUD ( 1843200 / 16 )
+
+#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
+
+/*
+ * Most of the values here are meaningless to this particular driver.
+ * However some values must be preserved for the code (leveraged from serial.c
+ * to work correctly).
+ * port must not be 0
+ * type must not be UNKNOWN
+ * So I picked arbitrary (guess from where?) values instead
+ */
+static struct serial_state rs_table[NR_PORTS]={
+  /* UART CLK   PORT IRQ     FLAGS        */
+  { 0, BASE_BAUD, 0x3F8, SIMSERIAL_IRQ, STD_COM_FLAGS,0,PORT_16550 }  /* ttyS0 */ 
+};
+
+/*
+ * Just for the fun of it !
+ */
+static struct serial_uart_config uart_config[] = {
+	{ "unknown", 1, 0 }, 
+	{ "8250", 1, 0 }, 
+	{ "16450", 1, 0 }, 
+	{ "16550", 1, 0 }, 
+	{ "16550A", 16, UART_CLEAR_FIFO | UART_USE_FIFO }, 
+	{ "cirrus", 1, 0 }, 
+	{ "ST16650", 1, UART_CLEAR_FIFO | UART_STARTECH }, 
+	{ "ST16650V2", 32, UART_CLEAR_FIFO | UART_USE_FIFO |
+		  UART_STARTECH }, 
+	{ "TI16750", 64, UART_CLEAR_FIFO | UART_USE_FIFO},
+	{ 0, 0}
+};
+
+static struct tty_driver serial_driver, callout_driver;
+static int serial_refcount;
+
+static struct async_struct *IRQ_ports[NR_IRQS];
+static struct tty_struct *serial_table[NR_PORTS];
+static struct termios *serial_termios[NR_PORTS];
+static struct termios *serial_termios_locked[NR_PORTS];
+
+static struct console *console;
+
+static unsigned char *tmp_buf;
+static DECLARE_MUTEX(tmp_buf_sem);
+
+extern struct console *console_drivers; /* from kernel/printk.c */
+
+/*
+ * ------------------------------------------------------------
+ * rs_stop() and rs_start()
+ *
+ * This routines are called before setting or resetting tty->stopped.
+ * They enable or disable transmitter interrupts, as necessary.
+ * ------------------------------------------------------------
+ */
+static void rs_stop(struct tty_struct *tty)
+{
+#ifdef SIMSERIAL_DEBUG
+	printk("rs_stop: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
+		tty->stopped, tty->hw_stopped, tty->flow_stopped);
+#endif
+
+}
+
+static void rs_start(struct tty_struct *tty)
+{
+#if SIMSERIAL_DEBUG
+	printk("rs_start: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
+		tty->stopped, tty->hw_stopped, tty->flow_stopped);
+#endif
+}
+
+static  void receive_chars(struct tty_struct *tty)
+{
+	unsigned char ch;
+	static unsigned char seen_esc = 0;
+
+	while ( (ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR)) ) {
+		if ( ch == 27 && seen_esc == 0 ) {
+			seen_esc = 1;
+			continue;
+		} else {
+			if ( seen_esc==1 && ch == 'O' ) {
+				seen_esc = 2;
+				continue;
+			} else if ( seen_esc == 2 ) {
+				if ( ch == 'P' ) show_state();		/* F1 key */
+				if ( ch == 'Q' ) show_buffers();	/* F2 key */
+				seen_esc = 0;
+				continue;
+			}
+		}
+		seen_esc = 0;
+		if (tty->flip.count >= TTY_FLIPBUF_SIZE) break;
+
+		*tty->flip.char_buf_ptr = ch;
+		
+		*tty->flip.flag_buf_ptr = 0;
+
+		tty->flip.flag_buf_ptr++;
+		tty->flip.char_buf_ptr++;
+		tty->flip.count++;
+	}
+	tty_flip_buffer_push(tty);
+}
+
+/*
+ * This is the serial driver's interrupt routine for a single port
+ */
+static void rs_interrupt_single(int irq, void *dev_id, struct pt_regs * regs)
+{
+	struct async_struct * info;
+
+	/* 
+	 * I don't know exactly why they don't use the dev_id opaque data
+	 * pointer instead of this extra lookup table
+	 */
+	info = IRQ_ports[irq];
+	if (!info || !info->tty) {
+		printk("simrs_interrupt_single: info|tty=0 info=%p problem\n", info);
+		return;
+	}
+	/* 
+	 * pretty simple in our case, because we only get interrupts 
+	 * on inbound traffic
+	 */
+	receive_chars(info->tty);
+}
+
+/*
+ * -------------------------------------------------------------------
+ * Here ends the serial interrupt routines.
+ * -------------------------------------------------------------------
+ */
+
+#if 0
+/*
+ * not really used in our situation so keep them commented out for now
+ */
+static DECLARE_TASK_QUEUE(tq_serial); /* used to be at the top of the file */
+static void do_serial_bh(void)
+{
+	run_task_queue(&tq_serial);
+	printk("do_serial_bh: called\n");
+}
+#endif
+
+static void do_softint(void *private_)
+{
+	printk("simserial: do_softint called\n");
+}
+
+static void rs_put_char(struct tty_struct *tty, unsigned char ch)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	unsigned long flags;
+
+	if (!tty || !info->xmit.buf) return;
+
+	save_flags(flags); cli();
+	if (CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) == 0) {
+		restore_flags(flags);
+		return;
+	}
+	info->xmit.buf[info->xmit.head] = ch;
+	info->xmit.head = (info->xmit.head + 1) & (SERIAL_XMIT_SIZE-1);
+	restore_flags(flags);
+}
+
+static _INLINE_ void transmit_chars(struct async_struct *info, int *intr_done)
+{
+	int count;
+	unsigned long flags;
+
+	save_flags(flags); cli();
+
+	if (info->x_char) {
+		char c = info->x_char;
+
+		console->write(console, &c, 1);
+
+		info->state->icount.tx++;
+		info->x_char = 0;
+
+		goto out;
+	}
+
+	if (info->xmit.head == info->xmit.tail || info->tty->stopped || info->tty->hw_stopped) {
+#ifdef SIMSERIAL_DEBUG
+		printk("transmit_chars: head=%d, tail=%d, stopped=%d\n",
+		       info->xmit.head, info->xmit.tail, info->tty->stopped);
+#endif
+		goto out;
+	}
+	/*
+	 * We removed the loop and try to do it in to chunks. We need
+	 * 2 operations maximum because it's a ring buffer.
+	 *
+	 * First from current to tail if possible.
+	 * Then from the beginning of the buffer until necessary
+	 */
+
+	count = MIN(CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE),
+		    SERIAL_XMIT_SIZE - info->xmit.tail);
+	console->write(console, info->xmit.buf+info->xmit.tail, count);
+
+	info->xmit.tail = (info->xmit.tail+count) & (SERIAL_XMIT_SIZE-1);
+
+	/*
+	 * We have more at the beginning of the buffer
+	 */
+	count = CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+	if (count) {
+		console->write(console, info->xmit.buf, count);
+		info->xmit.tail += count;
+	}
+out:
+	restore_flags(flags);
+}
+
+static void rs_flush_chars(struct tty_struct *tty)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+				
+	if (info->xmit.head == info->xmit.tail || tty->stopped || tty->hw_stopped ||
+	    !info->xmit.buf)
+		return;
+
+	transmit_chars(info, NULL);
+}
+
+
+static int rs_write(struct tty_struct * tty, int from_user,
+		    const unsigned char *buf, int count)
+{
+	int	c, ret = 0;
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	unsigned long flags;
+				
+	if (!tty || !info->xmit.buf || !tmp_buf) return 0;
+
+	save_flags(flags);
+	if (from_user) {
+		down(&tmp_buf_sem);
+		while (1) {
+			int c1;
+			c = CIRC_SPACE_TO_END(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+			if (count < c)
+				c = count;
+			if (c <= 0)
+				break;
+
+			c -= copy_from_user(tmp_buf, buf, c);
+			if (!c) {
+				if (!ret)
+					ret = -EFAULT;
+				break;
+			}
+			cli();
+			c1 = CIRC_SPACE_TO_END(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+			if (c1 < c)
+				c = c1;
+			memcpy(info->xmit.buf + info->xmit.head, tmp_buf, c);
+			info->xmit.head = ((info->xmit.head + c) &
+					   (SERIAL_XMIT_SIZE-1));
+			restore_flags(flags);
+			buf += c;
+			count -= c;
+			ret += c;
+		}
+		up(&tmp_buf_sem);
+	} else {
+		cli();
+		while (1) {
+			c = CIRC_SPACE_TO_END(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+			if (count < c)
+				c = count;
+			if (c <= 0) {
+				break;
+			}
+			memcpy(info->xmit.buf + info->xmit.head, buf, c);
+			info->xmit.head = ((info->xmit.head + c) &
+					   (SERIAL_XMIT_SIZE-1));
+			buf += c;
+			count -= c;
+			ret += c;
+		}
+		restore_flags(flags);
+	}
+	/*
+	 * Hey, we transmit directly from here in our case
+	 */
+	if (CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE)
+	    && !tty->stopped && !tty->hw_stopped) {
+		transmit_chars(info, NULL);
+	}
+	return ret;
+}
+
+static int rs_write_room(struct tty_struct *tty)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+				
+	return CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+}
+
+static int rs_chars_in_buffer(struct tty_struct *tty)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+				
+	return CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
+}
+
+static void rs_flush_buffer(struct tty_struct *tty)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	unsigned long flags;
+	
+	save_flags(flags); cli();
+	info->xmit.head = info->xmit.tail = 0;
+	restore_flags(flags);
+
+	wake_up_interruptible(&tty->write_wait);
+
+	if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
+	    tty->ldisc.write_wakeup)
+		(tty->ldisc.write_wakeup)(tty);
+}
+
+/*
+ * This function is used to send a high-priority XON/XOFF character to
+ * the device
+ */
+static void rs_send_xchar(struct tty_struct *tty, char ch)
+{	
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+
+	info->x_char = ch;
+	if (ch) {
+		/*
+		 * I guess we could call console->write() directly but
+		 * let's do that for now.
+		 */
+		transmit_chars(info, NULL);
+	}
+}
+
+/*
+ * ------------------------------------------------------------
+ * rs_throttle()
+ * 
+ * This routine is called by the upper-layer tty layer to signal that
+ * incoming characters should be throttled.
+ * ------------------------------------------------------------
+ */
+static void rs_throttle(struct tty_struct * tty)
+{
+	if (I_IXOFF(tty)) rs_send_xchar(tty, STOP_CHAR(tty));
+
+	printk("simrs_throttle called\n");
+}
+
+static void rs_unthrottle(struct tty_struct * tty)
+{
+	struct async_struct *info = (struct async_struct *)tty->driver_data;
+	
+	if (I_IXOFF(tty)) {
+		if (info->x_char)
+			info->x_char = 0;
+		else
+			rs_send_xchar(tty, START_CHAR(tty));
+	}
+	printk("simrs_unthrottle called\n");
+}
+
+/*
+ * rs_break() --- routine which turns the break handling on or off
+ */
+static void rs_break(struct tty_struct *tty, int break_state)
+{
+}
+
+static int rs_ioctl(struct tty_struct *tty, struct file * file,
+		    unsigned int cmd, unsigned long arg)
+{
+	if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
+	    (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGSTRUCT) &&
+	    (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) {
+		if (tty->flags & (1 << TTY_IO_ERROR))
+		    return -EIO;
+	}
+	
+	switch (cmd) {
+		case TIOCMGET:
+			printk("rs_ioctl: TIOCMGET called\n");
+			return -EINVAL;
+		case TIOCMBIS:
+		case TIOCMBIC:
+		case TIOCMSET:
+			printk("rs_ioctl: TIOCMBIS/BIC/SET called\n");
+			return -EINVAL;
+		case TIOCGSERIAL:
+			printk("simrs_ioctl TIOCGSERIAL called\n");
+			return 0;
+		case TIOCSSERIAL:
+			printk("simrs_ioctl TIOCSSERIAL called\n");
+			return 0;
+		case TIOCSERCONFIG:
+			printk("rs_ioctl: TIOCSERCONFIG called\n");
+			return -EINVAL;
+
+		case TIOCSERGETLSR: /* Get line status register */
+			printk("rs_ioctl: TIOCSERGETLSR called\n");
+			return  -EINVAL;
+
+		case TIOCSERGSTRUCT:
+			printk("rs_ioctl: TIOCSERGSTRUCT called\n");
+#if 0
+			if (copy_to_user((struct async_struct *) arg,
+					 info, sizeof(struct async_struct)))
+				return -EFAULT;
+#endif
+			return 0;
+				
+		/*
+		 * Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change
+		 * - mask passed in arg for lines of interest
+ 		 *   (use |'ed TIOCM_RNG/DSR/CD/CTS for masking)
+		 * Caller should use TIOCGICOUNT to see which one it was
+		 */
+		case TIOCMIWAIT:
+			printk("rs_ioctl: TIOCMIWAIT: called\n");
+			return 0;
+		/* 
+		 * Get counter of input serial line interrupts (DCD,RI,DSR,CTS)
+		 * Return: write counters to the user passed counter struct
+		 * NB: both 1->0 and 0->1 transitions are counted except for
+		 *     RI where only 0->1 is counted.
+		 */
+		case TIOCGICOUNT:
+			printk("rs_ioctl: TIOCGICOUNT called\n");
+			return 0;
+
+		case TIOCSERGWILD:
+		case TIOCSERSWILD:
+			/* "setserial -W" is called in Debian boot */
+			printk ("TIOCSER?WILD ioctl obsolete, ignored.\n");
+			return 0;
+
+		default:
+			return -ENOIOCTLCMD;
+		}
+	return 0;
+}
+
+#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
+
+static void rs_set_termios(struct tty_struct *tty, struct termios *old_termios)
+{
+	unsigned int cflag = tty->termios->c_cflag;
+	
+	if (   (cflag == old_termios->c_cflag)
+	    && (   RELEVANT_IFLAG(tty->termios->c_iflag) 
+		== RELEVANT_IFLAG(old_termios->c_iflag)))
+	  return;
+
+	
+	/* Handle turning off CRTSCTS */
+	if ((old_termios->c_cflag & CRTSCTS) &&
+	    !(tty->termios->c_cflag & CRTSCTS)) {
+		tty->hw_stopped = 0;
+		rs_start(tty);
+	}
+}
+/*
+ * This routine will shutdown a serial port; interrupts are disabled, and
+ * DTR is dropped if the hangup on close termio flag is on.
+ */
+static void shutdown(struct async_struct * info)
+{
+	unsigned long	flags;
+	struct serial_state *state;
+	int		retval;
+
+	if (!(info->flags & ASYNC_INITIALIZED)) return;
+
+	state = info->state;
+
+#ifdef SIMSERIAL_DEBUG
+	printk("Shutting down serial port %d (irq %d)....", info->line,
+	       state->irq);
+#endif
+	
+	save_flags(flags); cli(); /* Disable interrupts */
+
+	/*
+	 * First unlink the serial port from the IRQ chain...
+	 */
+	if (info->next_port)
+		info->next_port->prev_port = info->prev_port;
+	if (info->prev_port)
+		info->prev_port->next_port = info->next_port;
+	else
+		IRQ_ports[state->irq] = info->next_port;
+	
+	/*
+	 * Free the IRQ, if necessary
+	 */
+	if (state->irq && (!IRQ_ports[state->irq] ||
+			  !IRQ_ports[state->irq]->next_port)) {
+		if (IRQ_ports[state->irq]) {
+			free_irq(state->irq, NULL);
+			retval = request_irq(state->irq, rs_interrupt_single,
+					     IRQ_T(info), "serial", NULL);
+			
+			if (retval)
+				printk("serial shutdown: request_irq: error %d"
+				       "  Couldn't reacquire IRQ.\n", retval);
+		} else
+			free_irq(state->irq, NULL);
+	}
+
+	if (info->xmit.buf) {
+		free_page((unsigned long) info->xmit.buf);
+		info->xmit.buf = 0;
+	}
+
+	if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
+
+	info->flags &= ~ASYNC_INITIALIZED;
+	restore_flags(flags);
+}
+
+/*
+ * ------------------------------------------------------------
+ * rs_close()
+ * 
+ * This routine is called when the serial port gets closed.  First, we
+ * wait for the last remaining data to be sent.  Then, we unlink its
+ * async structure from the interrupt chain if necessary, and we free
+ * that IRQ if nothing is left in the chain.
+ * ------------------------------------------------------------
+ */
+static void rs_close(struct tty_struct *tty, struct file * filp)
+{
+	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct serial_state *state;
+	unsigned long flags;
+
+	if (!info ) return;
+
+	state = info->state;
+	
+	save_flags(flags); cli();
+	
+	if (tty_hung_up_p(filp)) {
+#ifdef SIMSERIAL_DEBUG
+		printk("rs_close: hung_up\n");
+#endif
+		MOD_DEC_USE_COUNT;
+		restore_flags(flags);
+		return;
+	}
+#ifdef SIMSERIAL_DEBUG	
+	printk("rs_close ttys%d, count = %d\n", info->line, state->count);
+#endif
+	if ((tty->count == 1) && (state->count != 1)) {
+		/*
+		 * Uh, oh.  tty->count is 1, which means that the tty
+		 * structure will be freed.  state->count should always
+		 * be one in these conditions.  If it's greater than
+		 * one, we've got real problems, since it means the
+		 * serial port won't be shutdown.
+		 */
+		printk("rs_close: bad serial port count; tty->count is 1, "
+		       "state->count is %d\n", state->count);
+		state->count = 1;
+	}
+	if (--state->count < 0) {
+		printk("rs_close: bad serial port count for ttys%d: %d\n",
+		       info->line, state->count);
+		state->count = 0;
+	}
+	if (state->count) {
+		MOD_DEC_USE_COUNT;
+		restore_flags(flags);
+		return;
+	}
+	info->flags |= ASYNC_CLOSING;
+	restore_flags(flags);
+
+	/*
+	 * Now we wait for the transmit buffer to clear; and we notify 
+	 * the line discipline to only process XON/XOFF characters.
+	 */
+	shutdown(info);
+	if (tty->driver.flush_buffer) tty->driver.flush_buffer(tty);
+	if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty);
+	info->event = 0;
+	info->tty = 0;
+	if (info->blocked_open) {
+		if (info->close_delay) {
+			current->state = TASK_INTERRUPTIBLE;
+			schedule_timeout(info->close_delay);
+		}
+		wake_up_interruptible(&info->open_wait);
+	}
+	info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CALLOUT_ACTIVE|ASYNC_CLOSING);
+	wake_up_interruptible(&info->close_wait);
+	MOD_DEC_USE_COUNT;
+}
+
+/*
+ * rs_wait_until_sent() --- wait until the transmitter is empty
+ */
+static void rs_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+}
+
+
+/*
+ * rs_hangup() --- called by tty_hangup() when a hangup is signaled.
+ */
+static void rs_hangup(struct tty_struct *tty)
+{
+	struct async_struct * info = (struct async_struct *)tty->driver_data;
+	struct serial_state *state = info->state;
+
+#ifdef SIMSERIAL_DEBUG
+	printk("rs_hangup: called\n");
+#endif
+
+	state = info->state;
+	
+	rs_flush_buffer(tty);
+	if (info->flags & ASYNC_CLOSING)
+		return;
+	shutdown(info);
+
+	info->event = 0;
+	state->count = 0;
+	info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CALLOUT_ACTIVE);
+	info->tty = 0;
+	wake_up_interruptible(&info->open_wait);
+}
+
+
+static int get_async_struct(int line, struct async_struct **ret_info)
+{
+	struct async_struct *info;
+	struct serial_state *sstate;
+
+	sstate = rs_table + line;
+	sstate->count++;
+	if (sstate->info) {
+		*ret_info = sstate->info;
+		return 0;
+	}
+	info = kmalloc(sizeof(struct async_struct), GFP_KERNEL);
+	if (!info) {
+		sstate->count--;
+		return -ENOMEM;
+	}
+	memset(info, 0, sizeof(struct async_struct));
+	init_waitqueue_head(&info->open_wait);
+	init_waitqueue_head(&info->close_wait);
+	init_waitqueue_head(&info->delta_msr_wait);
+	info->magic = SERIAL_MAGIC;
+	info->port = sstate->port;
+	info->flags = sstate->flags;
+	info->xmit_fifo_size = sstate->xmit_fifo_size;
+	info->line = line;
+	info->tqueue.routine = do_softint;
+	info->tqueue.data = info;
+	info->state = sstate;
+	if (sstate->info) {
+		kfree(info);
+		*ret_info = sstate->info;
+		return 0;
+	}
+	*ret_info = sstate->info = info;
+	return 0;
+}
+
+static int
+startup(struct async_struct *info)
+{
+	unsigned long flags;
+	int	retval=0;
+	void (*handler)(int, void *, struct pt_regs *);
+	struct serial_state *state= info->state;
+	unsigned long page;
+
+	page = get_free_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	save_flags(flags); cli();
+
+	if (info->flags & ASYNC_INITIALIZED) {
+		free_page(page);
+		goto errout;
+	}
+
+	if (!state->port || !state->type) {
+		if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
+		free_page(page);
+		goto errout;
+	}
+	if (info->xmit.buf)
+		free_page(page);
+	else
+		info->xmit.buf = (unsigned char *) page;
+
+#ifdef SIMSERIAL_DEBUG
+	printk("startup: ttys%d (irq %d)...", info->line, state->irq);
+#endif
+
+	/*
+	 * Allocate the IRQ if necessary
+	 */
+	if (state->irq && (!IRQ_ports[state->irq] ||
+			  !IRQ_ports[state->irq]->next_port)) {
+		if (IRQ_ports[state->irq]) {
+			retval = -EBUSY;
+			goto errout;
+		} else 
+			handler = rs_interrupt_single;
+
+		retval = request_irq(state->irq, handler, IRQ_T(info),
+				     "simserial", NULL);
+		if (retval) {
+			if (capable(CAP_SYS_ADMIN)) {
+				if (info->tty)
+					set_bit(TTY_IO_ERROR,
+						&info->tty->flags);
+				retval = 0;
+			}
+			goto errout;
+		}
+	}
+
+	/*
+	 * Insert serial port into IRQ chain.
+	 */
+	info->prev_port = 0;
+	info->next_port = IRQ_ports[state->irq];
+	if (info->next_port)
+		info->next_port->prev_port = info;
+	IRQ_ports[state->irq] = info;
+
+	if (info->tty) clear_bit(TTY_IO_ERROR, &info->tty->flags);
+
+	info->xmit.head = info->xmit.tail = 0;
+
+#if 0
+	/*
+	 * Set up serial timers...
+	 */
+	timer_table[RS_TIMER].expires = jiffies + 2*HZ/100;
+	timer_active |= 1 << RS_TIMER;
+#endif
+
+	/*
+	 * Set up the tty->alt_speed kludge
+	 */
+	if (info->tty) {
+		if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
+			info->tty->alt_speed = 57600;
+		if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
+			info->tty->alt_speed = 115200;
+		if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
+			info->tty->alt_speed = 230400;
+		if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
+			info->tty->alt_speed = 460800;
+	}
+	
+	info->flags |= ASYNC_INITIALIZED;
+	restore_flags(flags);
+	return 0;
+	
+errout:
+	restore_flags(flags);
+	return retval;
+}
+
+
+/*
+ * This routine is called whenever a serial port is opened.  It
+ * enables interrupts for a serial port, linking in its async structure into
+ * the IRQ chain.   It also performs the serial-specific
+ * initialization for the tty structure.
+ */
+static int rs_open(struct tty_struct *tty, struct file * filp)
+{
+	struct async_struct	*info;
+	int 			retval, line;
+	unsigned long		page;
+
+	MOD_INC_USE_COUNT;
+	line = MINOR(tty->device) - tty->driver.minor_start;
+	if ((line < 0) || (line >= NR_PORTS)) {
+		MOD_DEC_USE_COUNT;
+		return -ENODEV;
+	}
+	retval = get_async_struct(line, &info);
+	if (retval) {
+		MOD_DEC_USE_COUNT;
+		return retval;
+	}
+	tty->driver_data = info;
+	info->tty = tty;
+
+#ifdef SIMSERIAL_DEBUG
+	printk("rs_open %s%d, count = %d\n", tty->driver.name, info->line,
+	       info->state->count);
+#endif
+	info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+
+	if (!tmp_buf) {
+		page = get_free_page(GFP_KERNEL);
+		if (!page) {
+			/* MOD_DEC_USE_COUNT; "info->tty" will cause this? */
+			return -ENOMEM;
+		}
+		if (tmp_buf)
+			free_page(page);
+		else
+			tmp_buf = (unsigned char *) page;
+	}
+
+	/*
+	 * If the port is the middle of closing, bail out now
+	 */
+	if (tty_hung_up_p(filp) ||
+	    (info->flags & ASYNC_CLOSING)) {
+		if (info->flags & ASYNC_CLOSING)
+			interruptible_sleep_on(&info->close_wait);
+		/* MOD_DEC_USE_COUNT; "info->tty" will cause this? */
+#ifdef SERIAL_DO_RESTART
+		return ((info->flags & ASYNC_HUP_NOTIFY) ?
+			-EAGAIN : -ERESTARTSYS);
+#else
+		return -EAGAIN;
+#endif
+	}
+
+	/*
+	 * Start up serial port
+	 */
+	retval = startup(info);
+	if (retval) {
+		/* MOD_DEC_USE_COUNT; "info->tty" will cause this? */
+		return retval;
+	}
+
+	if ((info->state->count == 1) &&
+	    (info->flags & ASYNC_SPLIT_TERMIOS)) {
+		if (tty->driver.subtype == SERIAL_TYPE_NORMAL)
+			*tty->termios = info->state->normal_termios;
+		else 
+			*tty->termios = info->state->callout_termios;
+	}
+	
+	/*
+	 * figure out which console to use (should be one already)
+	 */
+	console = console_drivers;
+	while (console) {
+		if ((console->flags & CON_ENABLED) && console->write) break;
+		console = console->next;
+	}
+
+	info->session = current->session;
+	info->pgrp = current->pgrp;
+
+#ifdef SIMSERIAL_DEBUG
+	printk("rs_open ttys%d successful\n", info->line);
+#endif
+	return 0;
+}
+
+/*
+ * /proc fs routines....
+ */
+
+static inline int line_info(char *buf, struct serial_state *state)
+{
+	return sprintf(buf, "%d: uart:%s port:%lX irq:%d\n",
+		       state->line, uart_config[state->type].name, 
+		       state->port, state->irq);
+}
+
+int rs_read_proc(char *page, char **start, off_t off, int count,
+		 int *eof, void *data)
+{
+	int i, len = 0, l;
+	off_t	begin = 0;
+
+	len += sprintf(page, "simserinfo:1.0 driver:%s\n", serial_version);
+	for (i = 0; i < NR_PORTS && len < 4000; i++) {
+		l = line_info(page + len, &rs_table[i]);
+		len += l;
+		if (len+begin > off+count)
+			goto done;
+		if (len+begin < off) {
+			begin += len;
+			len = 0;
+		}
+	}
+	*eof = 1;
+done:
+	if (off >= len+begin)
+		return 0;
+	*start = page + (begin-off);
+	return ((count < begin+len-off) ? count : begin+len-off);
+}
+
+/*
+ * ---------------------------------------------------------------------
+ * rs_init() and friends
+ *
+ * rs_init() is called at boot-time to initialize the serial driver.
+ * ---------------------------------------------------------------------
+ */
+
+/*
+ * This routine prints out the appropriate serial driver version
+ * number, and identifies which options were configured into this
+ * driver.
+ */
+static inline void show_serial_version(void)
+{
+ 	printk(KERN_INFO "%s version %s with", serial_name, serial_version);
+	printk(" no serial options enabled\n");
+}
+
+/*
+ * The serial driver boot-time initialization code!
+ */
+static int __init
+simrs_init (void)
+{
+	int 			i;
+	struct serial_state	*state;
+
+	show_serial_version();
+
+	/* connect the platform's keyboard interrupt to SIMSERIAL_IRQ */
+	ia64_ssc_connect_irq(KEYBOARD_INTR, SIMSERIAL_IRQ);
+
+	/* Initialize the tty_driver structure */
+	
+	memset(&serial_driver, 0, sizeof(struct tty_driver));
+	serial_driver.magic = TTY_DRIVER_MAGIC;
+	serial_driver.driver_name = "simserial";
+	serial_driver.name = "ttyS";
+	serial_driver.major = TTY_MAJOR;
+	serial_driver.minor_start = 64;
+	serial_driver.num = 1;
+	serial_driver.type = TTY_DRIVER_TYPE_SERIAL;
+	serial_driver.subtype = SERIAL_TYPE_NORMAL;
+	serial_driver.init_termios = tty_std_termios;
+	serial_driver.init_termios.c_cflag =
+		B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+	serial_driver.flags = TTY_DRIVER_REAL_RAW;
+	serial_driver.refcount = &serial_refcount;
+	serial_driver.table = serial_table;
+	serial_driver.termios = serial_termios;
+	serial_driver.termios_locked = serial_termios_locked;
+
+	serial_driver.open = rs_open;
+	serial_driver.close = rs_close;
+	serial_driver.write = rs_write;
+	serial_driver.put_char = rs_put_char;
+	serial_driver.flush_chars = rs_flush_chars;
+	serial_driver.write_room = rs_write_room;
+	serial_driver.chars_in_buffer = rs_chars_in_buffer;
+	serial_driver.flush_buffer = rs_flush_buffer;
+	serial_driver.ioctl = rs_ioctl;
+	serial_driver.throttle = rs_throttle;
+	serial_driver.unthrottle = rs_unthrottle;
+	serial_driver.send_xchar = rs_send_xchar;
+	serial_driver.set_termios = rs_set_termios;
+	serial_driver.stop = rs_stop;
+	serial_driver.start = rs_start;
+	serial_driver.hangup = rs_hangup;
+	serial_driver.break_ctl = rs_break;
+	serial_driver.wait_until_sent = rs_wait_until_sent;
+	serial_driver.read_proc = rs_read_proc;
+
+	/*
+	 * Let's have a little bit of fun !
+	 */
+	for (i = 0, state = rs_table; i < NR_PORTS; i++,state++) {
+
+		if (state->type == PORT_UNKNOWN) continue;
+
+		printk(KERN_INFO "ttyS%02d at 0x%04lx (irq = %d) is a %s\n",
+		       state->line,
+		       state->port, state->irq,
+		       uart_config[state->type].name);
+	}
+	/*
+	 * The callout device is just like normal device except for
+	 * major number and the subtype code.
+	 */
+	callout_driver = serial_driver;
+	callout_driver.name = "cua";
+	callout_driver.major = TTYAUX_MAJOR;
+	callout_driver.subtype = SERIAL_TYPE_CALLOUT;
+	callout_driver.read_proc = 0;
+	callout_driver.proc_entry = 0;
+
+	if (tty_register_driver(&serial_driver))
+		panic("Couldn't register simserial driver\n");
+
+	if (tty_register_driver(&callout_driver))
+		panic("Couldn't register callout driver\n");
+	
+	return 0;
+}
+
+#ifndef MODULE
+__initcall(simrs_init);
+#endif
diff -urN linux-2.4.0-test6/drivers/net/Makefile linux-2.4.0-test6-lia/drivers/net/Makefile
--- linux-2.4.0-test6/drivers/net/Makefile	Thu Aug 10 19:56:22 2000
+++ linux-2.4.0-test6-lia/drivers/net/Makefile	Thu Aug 10 20:29:29 2000
@@ -196,6 +196,7 @@
 obj-$(CONFIG_ES3210) += es3210.o 8390.o
 obj-$(CONFIG_LNE390) += lne390.o 8390.o
 obj-$(CONFIG_NE3210) += ne3210.o 8390.o
+obj-$(CONFIG_SIMETH) += simeth.o
 
 obj-$(CONFIG_PPP) += ppp_generic.o slhc.o
 obj-$(CONFIG_PPP_ASYNC) += ppp_async.o
diff -urN linux-2.4.0-test6/drivers/net/eepro100.c linux-2.4.0-test6-lia/drivers/net/eepro100.c
--- linux-2.4.0-test6/drivers/net/eepro100.c	Thu Aug 10 19:56:22 2000
+++ linux-2.4.0-test6-lia/drivers/net/eepro100.c	Mon Jul 31 14:01:22 2000
@@ -23,6 +23,8 @@
 		Convert to new PCI driver interface
 	2000 Mar 24  Dragan Stancevic <visitor@valinux.com>
 		Disabled FC and ER, to avoid lockups when when we get FCP interrupts.
+	2000 Jul 17 Goutham Rao <goutham.rao@intel.com>
+		PCI DMA API fixes, adding pci_dma_sync_single calls where neccesary
 */
 
 static const char *version =
@@ -39,9 +41,18 @@
 static int txdmacount = 128;
 static int rxdmacount = 0;
 
+#ifdef __ia64__
+/*
+ * Bug: this driver may generate unaligned accesses when not copying
+ * an incoming packet.  Setting rx_copybreak to a large value force a
+ * copy and prevents unaligned accesses.
+ */
+static int rx_copybreak = 0x10000;
+#else
 /* Set the copy breakpoint for the copy-only-tiny-buffer Rx method.
    Lower values use more memory, but are faster. */
 static int rx_copybreak = 200;
+#endif
 
 /* Maximum events (Rx packets, etc.) to handle at each interrupt. */
 static int max_interrupt_work = 20;
@@ -427,14 +438,14 @@
 
 /* The Speedo3 Rx and Tx frame/buffer descriptors. */
 struct descriptor {			    /* A generic descriptor. */
-	s32 cmd_status;				/* All command and status fields. */
+	volatile s32 cmd_status;	/* All command and status fields. */
 	u32 link;				    /* struct descriptor *  */
 	unsigned char params[0];
 };
 
 /* The Speedo3 Rx and Tx buffer descriptors. */
 struct RxFD {					/* Receive frame descriptor. */
-	s32 status;
+	volatile s32 status;
 	u32 link;					/* struct RxFD * */
 	u32 rx_buf_addr;			/* void * */
 	u32 count;
@@ -515,6 +526,7 @@
 	spinlock_t lock;					/* Group with Tx control cache line. */
 	u32 tx_threshold;					/* The value for txdesc.count. */
 	struct RxFD *last_rxf;				/* Last filled RX buffer. */
+	dma_addr_t last_rxf_dma;
 	unsigned int cur_rx, dirty_rx;		/* The next free ring entry */
 	long last_rx_time;			/* Last Rx, in jiffies, to handle Rx hang. */
 	const char *product_name;
@@ -1213,19 +1225,24 @@
 		sp->rx_ring_dma[i] =
 			pci_map_single(sp->pdev, rxf, PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
 		skb_reserve(skb, sizeof(struct RxFD));
-		if (last_rxf)
+		if (last_rxf) {
 			last_rxf->link = cpu_to_le32(sp->rx_ring_dma[i]);
+			pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[i-1], sizeof(struct RxFD), PCI_DMA_TODEVICE);
+		}
 		last_rxf = rxf;
 		rxf->status = cpu_to_le32(0x00000001);	/* '1' is flag value only. */
 		rxf->link = 0;						/* None yet. */
 		/* This field unused by i82557. */
 		rxf->rx_buf_addr = 0xffffffff;
 		rxf->count = cpu_to_le32(PKT_BUF_SZ << 16);
+		pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[i], sizeof(struct RxFD), PCI_DMA_TODEVICE);
 	}
 	sp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
 	/* Mark the last entry as end-of-list. */
 	last_rxf->status = cpu_to_le32(0xC0000002);	/* '2' is flag value only. */
+	pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[RX_RING_SIZE-1], sizeof(struct RxFD), PCI_DMA_TODEVICE);
 	sp->last_rxf = last_rxf;
+	sp->last_rxf_dma = sp->rx_ring_dma[RX_RING_SIZE-1];
 }
 
 static void speedo_purge_tx(struct net_device *dev)
@@ -1660,6 +1677,7 @@
 	skb->dev = dev;
 	skb_reserve(skb, sizeof(struct RxFD));
 	rxf->rx_buf_addr = 0xffffffff;
+	pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry], sizeof(struct RxFD), PCI_DMA_TODEVICE);
 	return rxf;
 }
 
@@ -1672,7 +1690,9 @@
 	rxf->count = cpu_to_le32(PKT_BUF_SZ << 16);
 	sp->last_rxf->link = cpu_to_le32(rxf_dma);
 	sp->last_rxf->status &= cpu_to_le32(~0xC0000000);
+	pci_dma_sync_single(sp->pdev, sp->last_rxf_dma, sizeof(struct RxFD), PCI_DMA_TODEVICE);
 	sp->last_rxf = rxf;
+	sp->last_rxf_dma = rxf_dma;
 }
 
 static int speedo_refill_rx_buf(struct net_device *dev, int force)
@@ -1738,9 +1758,17 @@
 	if (speedo_debug > 4)
 		printk(KERN_DEBUG " In speedo_rx().\n");
 	/* If we own the next entry, it's a new packet. Send it up. */
-	while (sp->rx_ringp[entry] != NULL &&
-		   (status = le32_to_cpu(sp->rx_ringp[entry]->status)) & RxComplete) {
-		int pkt_len = le32_to_cpu(sp->rx_ringp[entry]->count) & 0x3fff;
+	while (sp->rx_ringp[entry] != NULL) {
+		int pkt_len;
+
+		pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry],
+			sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
+
+		if(!((status = le32_to_cpu(sp->rx_ringp[entry]->status)) & RxComplete)) {
+			break;
+		}
+
+		pkt_len = le32_to_cpu(sp->rx_ringp[entry]->count) & 0x3fff;
 
 		if (--rx_work_limit < 0)
 			break;
@@ -1782,7 +1810,8 @@
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				pci_dma_sync_single(sp->pdev, sp->rx_ring_dma[entry],
-						PKT_BUF_SZ + sizeof(struct RxFD), PCI_DMA_FROMDEVICE);
+					sizeof(struct RxFD) + pkt_len, PCI_DMA_FROMDEVICE);
+
 #if 1 || USE_IP_CSUM
 				/* Packet is in one chunk -- we can copy + cksum. */
 				eth_copy_and_sum(skb, sp->rx_skbuff[entry]->tail, pkt_len, 0);
@@ -2165,6 +2194,8 @@
 		/* Set the link in the setup frame. */
 		mc_setup_frm->link =
 			cpu_to_le32(TX_RING_ELEM_DMA(sp, (entry + 1) % TX_RING_SIZE));
+
+		pci_dma_sync_single(sp->pdev, mc_blk->frame_dma, mc_blk->len, PCI_DMA_TODEVICE);
 
 		wait_for_cmd_done(ioaddr + SCBCmd);
 		clear_suspend(last_cmd);
diff -urN linux-2.4.0-test6/drivers/net/simeth.c linux-2.4.0-test6-lia/drivers/net/simeth.c
--- linux-2.4.0-test6/drivers/net/simeth.c	Wed Dec 31 16:00:00 1969
+++ linux-2.4.0-test6-lia/drivers/net/simeth.c	Mon Jul 31 14:01:22 2000
@@ -0,0 +1,600 @@
+/*
+ * Simulated Ethernet Driver
+ *
+ * Copyright (C) 1999-2000 Hewlett-Packard Co
+ * Copyright (C) 1999-2000 Stephane Eranain <eranian@hpl.hp.com>
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/in.h>
+#include <linux/malloc.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <linux/skbuff.h>
+#include <linux/notifier.h>
+#include <asm/bitops.h>
+#include <asm/system.h>
+#include <asm/irq.h>
+
+
+#define SIMETH_IRQ	0xed
+#define SIMETH_RECV_MAX	10
+
+/*
+ * Maximum possible received frame for Ethernet.
+ * We preallocate an sk_buff of that size to avoid costly 
+ * memcpy for temporary buffer into sk_buff. We do basically
+ * what's done in other drivers, like eepro with a ring.
+ * The difference is, of course, that we don't have real DMA !!!
+ */
+#define SIMETH_FRAME_SIZE	ETH_FRAME_LEN	
+
+
+#define SSC_NETDEV_PROBE		100
+#define SSC_NETDEV_SEND			101
+#define SSC_NETDEV_RECV			102
+#define SSC_NETDEV_ATTACH		103
+#define SSC_NETDEV_DETACH		104
+
+#define NETWORK_INTR			8
+
+/*
+ * This structure is need for the module version
+ * It hasn't been tested yet
+ */
+struct simeth_local {
+	struct net_device	*next_module;
+	struct net_device_stats stats;
+	int 			simfd;	 /* descriptor in the simulator */
+};
+
+static int simeth_probe1(void);
+static int simeth_open(struct net_device *dev);
+static int simeth_close(struct net_device *dev);
+static int simeth_tx(struct sk_buff *skb, struct net_device *dev);
+static int simeth_rx(struct net_device *dev);
+static struct net_device_stats *simeth_get_stats(struct net_device *dev);
+static void simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs);
+static void set_multicast_list(struct net_device *dev);
+static int simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr);
+
+static char *simeth_version="v0.2";
+
+/*
+ * This variable is used to establish a mapping between the Linux/ia64 kernel
+ * and the host linux kernel.
+ *
+ * As of today, we support only one card, even though most of the code
+ * is ready for many more. The mapping is then:
+ *	linux/ia64 -> linux/x86
+ * 	   eth0    -> eth1
+ *
+ * In the future, we some string operations, we could easily support up
+ * to 10 cards (0-9).
+ *
+ * The default mapping can be changed on the kernel command line by
+ * specifying simeth=ethX (or whatever string you want).
+ */
+static char *simeth_device="eth0";	 /* default host interface to use */
+
+
+
+static volatile unsigned int card_count; /* how many cards "found" so far */
+static int simeth_debug=0;		/* set to 1 to get debug information */
+
+/*
+ * Used to catch IFF_UP & IFF_DOWN events
+ */
+static struct notifier_block simeth_dev_notifier = {
+	simeth_device_event,
+	0
+};
+
+
+/*
+ * Function used when using a kernel command line option.
+ *
+ * Format: simeth=interface_name (like eth0)
+ */
+static int __init
+simeth_setup(char *str)
+{
+	simeth_device = str;
+	return 1;
+}
+
+__setup("simeth=", simeth_setup);
+
+/*
+ * Function used to probe for simeth devices when not installed
+ * as a loadable module
+ */
+
+int __init
+simeth_probe (void)
+{
+	return simeth_probe1();
+}
+
+extern long ia64_ssc (long, long, long, long, int);
+extern void ia64_ssc_connect_irq (long intr, long irq);
+
+static inline int
+netdev_probe(char *name, unsigned char *ether)
+{
+	return ia64_ssc(__pa(name), __pa(ether), 0,0, SSC_NETDEV_PROBE);
+}
+
+
+static inline int
+netdev_connect(int irq)
+{
+	/* XXX Fix me
+	 * this does not support multiple cards
+	 * also no return value
+	 */
+	ia64_ssc_connect_irq(NETWORK_INTR, irq);
+	return 0;
+}
+
+static inline int
+netdev_attach(int fd, int irq, unsigned int ipaddr)
+{
+	/* this puts the host interface in the right mode (start interupting) */
+	return ia64_ssc(fd, ipaddr, 0,0, SSC_NETDEV_ATTACH);
+}
+
+
+static inline int
+netdev_detach(int fd)
+{
+	/*
+	 * inactivate the host interface (don't interrupt anymore) */
+	return ia64_ssc(fd, 0,0,0, SSC_NETDEV_DETACH);
+}
+
+static inline int
+netdev_send(int fd, unsigned char *buf, unsigned int len)
+{
+	return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_SEND);
+}
+
+static inline int
+netdev_read(int fd, unsigned char *buf, unsigned int len)
+{
+	return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_RECV);
+}
+
+/*
+ * Function shared with module code, so cannot be in init section
+ *
+ * So far this function "detects" only one card (test_&_set) but could 
+ * be extended easily.
+ *
+ * Return:
+ * 	- -ENODEV is no device found
+ *	- -ENOMEM is no more memory
+ *	- 0 otherwise
+ */
+static int
+simeth_probe1(void)
+{
+	unsigned char mac_addr[ETH_ALEN];
+	struct simeth_local *local;
+	struct net_device *dev;
+	int fd, i;
+
+	/*
+	 * XXX Fix me 
+	 * let's support just one card for now
+	 */
+	if (test_and_set_bit(0, &card_count))
+		return -ENODEV;
+
+	/*
+	 * check with the simulator for the device
+	 */
+	fd = netdev_probe(simeth_device, mac_addr);
+	if (fd == -1)
+		return -ENODEV;
+
+	dev = init_etherdev(NULL, sizeof(struct simeth_local));
+	if (!dev)
+		return -ENOMEM;
+
+	memcpy(dev->dev_addr, mac_addr, sizeof(mac_addr));
+	/*
+	 * XXX Fix me
+	 * does not support more than one card !
+	 */
+	dev->irq = SIMETH_IRQ;
+
+	/*
+	 * attach the interrupt in the simulator, this does enable interrupts
+	 * until a netdev_attach() is called
+	 */
+	netdev_connect(dev->irq);
+
+	memset(dev->priv, 0, sizeof(struct simeth_local));
+
+	local = dev->priv;
+	local->simfd = fd; /* keep track of underlying file descriptor */
+	local->next_module = NULL;
+
+	dev->open		= simeth_open;
+	dev->stop		= simeth_close;
+	dev->hard_start_xmit	= simeth_tx;
+	dev->get_stats		= simeth_get_stats;
+	dev->set_multicast_list = set_multicast_list; /* no yet used */
+
+	/* Fill in the fields of the device structure with ethernet-generic values. */
+	ether_setup(dev);
+
+	printk("simeth:  %s alpha\n", simeth_version);
+	printk("%s: hosteth=%s simfd=%d, HwAddr", dev->name, simeth_device, local->simfd);
+	for(i = 0; i < ETH_ALEN; i++) {
+		printk(" %2.2x", dev->dev_addr[i]);
+	}
+	printk(", IRQ %d\n", dev->irq);
+
+#ifdef MODULE
+	local->next_module = simeth_dev;
+	simeth_dev  = dev;
+#endif
+	/*
+	 * XXX Fix me
+	 * would not work with more than one device !
+	 */
+	register_netdevice_notifier(&simeth_dev_notifier);
+
+	return 0;
+}
+
+/*
+ * actually binds the device to an interrupt vector
+ */
+static int
+simeth_open(struct net_device *dev)
+{
+	if (request_irq(dev->irq, simeth_interrupt, 0, "simeth", dev)) {
+		printk ("simeth: unable to get IRQ %d.\n", dev->irq);
+		return -EAGAIN;
+	}
+
+	netif_start_queue(dev);
+	MOD_INC_USE_COUNT;
+
+	return 0;
+}
+
+/* copied from lapbether.c */
+static __inline__ int dev_is_ethdev(struct net_device *dev)
+{
+       return ( dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5));
+}
+
+
+/*
+ * Handler for IFF_UP or IFF_DOWN
+ *
+ * The reason for that is that we don't want to be interrupted when the
+ * interface is down. There is no way to unconnect in the simualtor. Instead
+ * we use this function to shutdown packet processing in the frame filter 
+ * in the simulator. Thus no interrupts are generated
+ *
+ *
+ * That's also the place where we pass the IP address of this device to the
+ * simulator so that that we can start filtering packets for it
+ *
+ * There may be a better way of doing this, but I don't know which yet.
+ */
+static int
+simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
+{
+	struct net_device *dev = (struct net_device *)ptr;
+	struct simeth_local *local;
+	struct in_device *in_dev;
+	struct in_ifaddr **ifap = NULL;
+	struct in_ifaddr *ifa = NULL;
+	int r;
+
+
+	if ( ! dev ) {
+		printk(KERN_WARNING "simeth_device_event dev=0\n");
+		return NOTIFY_DONE;
+	}
+
+	if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE;
+
+	/*
+	 * Check whether or not it's for an ethernet device
+	 *
+	 * XXX Fixme: This works only as long as we support one
+	 * type of ethernet device.
+	 */
+	if ( !dev_is_ethdev(dev) ) return NOTIFY_DONE;
+
+	if ((in_dev=dev->ip_ptr) != NULL) {
+		for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next)
+			if (strcmp(dev->name, ifa->ifa_label) == 0) break;
+	}
+	if ( ifa == NULL ) {
+		printk("simeth_open: can't find device %s's ifa\n", dev->name);
+		return NOTIFY_DONE;
+	}
+
+	printk("simeth_device_event: %s ipaddr=0x%x\n", dev->name, htonl(ifa->ifa_local));
+
+	/*
+	 * XXX Fix me
+	 * if the device was up, and we're simply reconfiguring it, not sure
+	 * we get DOWN then UP.
+	 */
+
+	local = dev->priv;
+	/* now do it for real */
+	r = event == NETDEV_UP ? 
+		netdev_attach(local->simfd, dev->irq, htonl(ifa->ifa_local)):
+		netdev_detach(local->simfd);
+
+	printk("simeth: netdev_attach/detach: event=%s ->%d\n", event == NETDEV_UP ? "attach":"detach", r);
+
+	return NOTIFY_DONE;
+}
+
+static int
+simeth_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+
+	free_irq(dev->irq, dev);
+
+	MOD_DEC_USE_COUNT;
+
+	return 0;
+}
+
+/*
+ * Only used for debug
+ */
+static void
+frame_print(unsigned char *from, unsigned char *frame, int len)
+{
+	int i;
+
+	printk("%s: (%d) %02x", from, len, frame[0] & 0xff);
+	for(i=1; i < 6; i++ ) {
+		printk(":%02x", frame[i] &0xff);
+	}
+	printk(" %2x", frame[6] &0xff);
+	for(i=7; i < 12; i++ ) {
+		printk(":%02x", frame[i] &0xff);
+	}
+	printk(" [%02x%02x]\n", frame[12], frame[13]);
+
+	for(i=14; i < len; i++ ) {
+		printk("%02x ", frame[i] &0xff);
+		if ( (i%10)==0) printk("\n");
+	}
+	printk("\n");
+}
+
+
+/*
+ * Function used to transmit of frame, very last one on the path before
+ * going to the simulator.
+ */
+static int
+simeth_tx(struct sk_buff *skb, struct net_device *dev)
+{
+	struct simeth_local *local = (struct simeth_local *)dev->priv;
+
+#if 0
+	/* ensure we have at least ETH_ZLEN bytes (min frame size) */
+	unsigned int length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN;
+	/* Where do the extra padding bytes comes from inthe skbuff ? */
+#else
+	/* the real driver in the host system is going to take care of that
+	 * or maybe it's the NIC itself.
+	 */
+	unsigned int length = skb->len;
+#endif
+
+	local->stats.tx_bytes += skb->len;
+	local->stats.tx_packets++;
+
+
+	if (simeth_debug > 5) frame_print("simeth_tx", skb->data, length);
+
+	netdev_send(local->simfd, skb->data, length);
+
+	/*
+	 * we are synchronous on write, so we don't simulate a
+	 * trasnmit complete interrupt, thus we don't need to arm a tx
+	 */
+
+	dev_kfree_skb(skb);
+	return 0;
+}
+
+static inline struct sk_buff * 
+make_new_skb(struct net_device *dev)
+{
+	struct sk_buff *nskb;
+
+	/*
+	 * The +2 is used to make sure that the IP header is nicely
+	 * aligned (on 4byte boundary I assume 14+2=16)
+	 */
+	nskb = dev_alloc_skb(SIMETH_FRAME_SIZE + 2);
+	if ( nskb == NULL ) {
+		printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
+		return NULL;
+	}
+	nskb->dev = dev;
+
+	skb_reserve(nskb, 2);	/* Align IP on 16 byte boundaries */
+
+	skb_put(nskb,SIMETH_FRAME_SIZE);
+
+	return nskb;
+}
+
+/*
+ * called from interrupt handler to process a received frame
+ */
+static int
+simeth_rx(struct net_device *dev)
+{
+	struct simeth_local	*local;
+	struct sk_buff		*skb;
+	int			len;
+	int			rcv_count = SIMETH_RECV_MAX;
+
+	local = (struct simeth_local *)dev->priv;
+	/*
+	 * the loop concept has been borrowed from other drivers
+	 * looks to me like it's a throttling thing to avoid pushing to many
+	 * packets at one time into the stack. Making sure we can process them
+	 * upstream and make forward progress overall
+	 */
+	do { 
+		if ( (skb=make_new_skb(dev)) == NULL ) {
+			printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
+			local->stats.rx_dropped++;
+			return 0;
+		}
+		/*
+		 * Read only one frame at a time
+		 */
+		len = netdev_read(local->simfd, skb->data, SIMETH_FRAME_SIZE);
+		if ( len == 0 ) {
+			if ( simeth_debug > 0 ) printk(KERN_WARNING "%s: count=%d netdev_read=0\n", dev->name, SIMETH_RECV_MAX-rcv_count);
+			break;
+		}
+#if 0
+		/*
+		 * XXX Fix me
+		 * Should really do a csum+copy here
+		 */
+		memcpy(skb->data, frame, len);
+#endif
+		skb->protocol = eth_type_trans(skb, dev);
+
+		if ( simeth_debug > 6 ) frame_print("simeth_rx", skb->data, len);
+
+		/*
+		 * push the packet up & trigger software interrupt
+		 */
+		netif_rx(skb);
+
+		local->stats.rx_packets++;
+		local->stats.rx_bytes += len;
+
+	} while ( --rcv_count );
+
+	return len; /* 0 = nothing left to read, otherwise, we can try again */
+}
+
+/*
+ * Interrupt handler (Yes, we can do it too !!!)
+ */
+static void
+simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs)
+{
+	struct net_device *dev = dev_id;
+
+	if ( dev == NULL ) {
+		printk(KERN_WARNING "simeth: irq %d for unknown device\n", irq);
+		return;
+	}
+
+	/*
+	 * very simple loop because we get interrupts only when receving
+	 */
+	while (simeth_rx(dev));
+}
+
+static struct net_device_stats *
+simeth_get_stats(struct net_device *dev)
+{
+	struct simeth_local  *local = (struct simeth_local *) dev->priv;
+
+	return &local->stats;
+}
+
+/* fake multicast ability */
+static void
+set_multicast_list(struct net_device *dev)
+{
+	printk(KERN_WARNING "%s: set_multicast_list called\n", dev->name);
+}
+
+#ifdef CONFIG_NET_FASTROUTE
+static int
+simeth_accept_fastpath(struct net_device *dev, struct dst_entry *dst)
+{
+	printk(KERN_WARNING "%s: simeth_accept_fastpath called\n", dev->name);
+	return -1;
+}
+#endif
+
+
+#ifdef MODULE
+static int
+simeth_init(void)
+{
+	unsigned int cards_found = 0;
+
+	/* iterate over probe */
+
+	while ( simeth_probe1() == 0 ) cards_found++;
+
+	return cards_found ? 0 : -ENODEV;
+}
+
+
+int
+init_module(void)
+{
+	simeth_dev = NULL;
+
+	/* the register_netdev is done "indirectly by ether_initdev() */
+
+	return simeth_init();
+}
+
+void
+cleanup_module(void)
+{
+	struct net_device *next;
+
+	while ( simeth_dev ) {
+
+		next = ((struct simeth_private *)simeth_dev->priv)->next_module;
+
+		unregister_netdev(simeth_dev);
+
+		kfree(simeth_dev);
+
+		simeth_dev = next;
+	}
+	/*
+	 * XXX fix me
+	 * not clean wihen multiple devices
+	 */
+	unregister_netdevice_notifier(&simeth_dev_notifier);
+}
+#else /* !MODULE */
+__initcall(simeth_probe);
+#endif /* !MODULE */
diff -urN linux-2.4.0-test6/drivers/pci/pci.ids linux-2.4.0-test6-lia/drivers/pci/pci.ids
--- linux-2.4.0-test6/drivers/pci/pci.ids	Thu Aug 10 19:56:23 2000
+++ linux-2.4.0-test6-lia/drivers/pci/pci.ids	Thu Aug 10 20:29:30 2000
@@ -4635,7 +4635,12 @@
 	84c4  450KX/GX [Orion] - 82454KX/GX PCI bridge
 	84c5  450KX/GX [Orion] - 82453KX/GX Memory controller
 	84ca  450NX - 82451NX Memory & I/O Controller
-	84cb  450NX - 82454NX PCI Expander Bridge
+	84cb  450NX - 82454NX/84460GX PCI Expander Bridge
+	84e0  460GX - 84460GX System Address Controller (SAC)
+	84e1  460GX - 84460GX System Data Controller (SDC)
+	84e2  460GX - 84460GX AGP Bridge (GXB)
+	84e3  460GX - 84460GX Memory Address Controller (MAC)
+	84e4  460GX - 84460GX Memory Data Controller (MDC)
 	ffff  450NX/GX [Orion] - 82453KX/GX Memory controller [BUG]
 8800  Trigem Computer Inc.
 	2008  Video assistent component
diff -urN linux-2.4.0-test6/drivers/scsi/Makefile linux-2.4.0-test6-lia/drivers/scsi/Makefile
--- linux-2.4.0-test6/drivers/scsi/Makefile	Thu Aug 10 19:56:23 2000
+++ linux-2.4.0-test6-lia/drivers/scsi/Makefile	Thu Aug 10 20:29:30 2000
@@ -40,6 +40,7 @@
 obj-$(CONFIG_SCSI_PSI240I)	+= psi240i.o
 obj-$(CONFIG_MVME16x_SCSI)	+= mvme16x.o	53c7xx.o
 obj-$(CONFIG_BVME6000_SCSI)	+= bvme6000.o	53c7xx.o
+obj-$(CONFIG_SCSI_SIM)		+= simscsi.o
 obj-$(CONFIG_SCSI_SIM710)	+= sim710.o
 obj-$(CONFIG_A4000T_SCSI)	+= amiga7xx.o	53c7xx.o
 obj-$(CONFIG_A4091_SCSI)	+= amiga7xx.o	53c7xx.o
diff -urN linux-2.4.0-test6/drivers/scsi/hosts.c linux-2.4.0-test6-lia/drivers/scsi/hosts.c
--- linux-2.4.0-test6/drivers/scsi/hosts.c	Tue Jul 11 11:17:45 2000
+++ linux-2.4.0-test6-lia/drivers/scsi/hosts.c	Mon Jul 31 14:01:22 2000
@@ -325,6 +325,10 @@
 #include "scsi_debug.h"
 #endif
 
+#ifdef CONFIG_SCSI_SIM
+#include "simscsi.h"
+#endif
+
 #ifdef CONFIG_SCSI_ACORNSCSI_3
 #include "../acorn/scsi/acornscsi.h"
 #endif
@@ -637,6 +641,9 @@
 #endif
 #ifdef CONFIG_SCSI_PLUTO
     PLUTO,
+#endif
+#ifdef CONFIG_SCSI_SIM
+    SIMSCSI,
 #endif
 #ifdef CONFIG_ARCH_ACORN
 #ifdef CONFIG_SCSI_ACORNSCSI_3
diff -urN linux-2.4.0-test6/drivers/scsi/qla1280.c linux-2.4.0-test6-lia/drivers/scsi/qla1280.c
--- linux-2.4.0-test6/drivers/scsi/qla1280.c	Mon Jun 19 13:42:40 2000
+++ linux-2.4.0-test6-lia/drivers/scsi/qla1280.c	Mon Jul 31 14:01:22 2000
@@ -809,6 +809,7 @@
 			index++, &pci_bus, &pci_devfn)) )  {
 #endif
                 /* found a adapter */
+		template->unchecked_isa_dma = 1;
 		host = scsi_register(template, sizeof(scsi_qla_host_t));
 		ha = (scsi_qla_host_t *) host->hostdata;
 		/* Clear our data area */
diff -urN linux-2.4.0-test6/drivers/scsi/simscsi.c linux-2.4.0-test6-lia/drivers/scsi/simscsi.c
--- linux-2.4.0-test6/drivers/scsi/simscsi.c	Wed Dec 31 16:00:00 1969
+++ linux-2.4.0-test6-lia/drivers/scsi/simscsi.c	Mon Jul 31 14:01:22 2000
@@ -0,0 +1,359 @@
+/*
+ * Simulated SCSI driver.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * 99/12/18 David Mosberger	Added support for READ10/WRITE10 needed by linux v2.3.33
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/blk.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+
+#include <scsi/scsi.h>
+
+#include <asm/irq.h>
+
+#include "scsi.h"
+#include "sd.h"
+#include "hosts.h"
+#include "simscsi.h"
+
+#define DEBUG_SIMSCSI	0
+
+/* Simulator system calls: */
+
+#define SSC_OPEN			50
+#define SSC_CLOSE			51
+#define SSC_READ			52
+#define SSC_WRITE			53
+#define SSC_GET_COMPLETION		54
+#define SSC_WAIT_COMPLETION		55
+
+#define SSC_WRITE_ACCESS		2
+#define SSC_READ_ACCESS			1
+
+struct timer_list disk_timer;
+
+struct disk_req {
+	unsigned long addr;
+	unsigned len;
+};
+
+struct disk_stat {
+	int fd;
+	unsigned count;
+};
+
+extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
+
+static int desc[8] = { -1, -1, -1, -1, -1, -1, -1, -1 };
+
+static struct queue_entry {
+	Scsi_Cmnd *sc;
+} queue[SIMSCSI_REQ_QUEUE_LEN];
+
+static int rd, wr;
+static atomic_t num_reqs = ATOMIC_INIT(0);
+
+/* base name for default disks */
+static char *simscsi_root = DEFAULT_SIMSCSI_ROOT;
+
+#define MAX_ROOT_LEN	128
+
+/*
+ * used to setup a new base for disk images
+ * to use /foo/bar/disk[a-z] as disk images
+ * you have to specify simscsi=/foo/bar/disk on the command line
+ */
+static int __init
+simscsi_setup (char *s)
+{
+	/* XXX Fix me we may need to strcpy() ? */
+	if (strlen(s) > MAX_ROOT_LEN) {
+		printk("simscsi_setup: prefix too long---using default %s\n", simscsi_root);
+	}
+	simscsi_root = s;
+	return 1;
+}
+
+__setup("simscsi=", simscsi_setup);
+
+static void
+simscsi_interrupt (unsigned long val)
+{
+	unsigned long flags;
+	Scsi_Cmnd *sc;
+
+	spin_lock_irqsave(&io_request_lock, flags);
+	{
+		while ((sc = queue[rd].sc) != 0) {
+			atomic_dec(&num_reqs);
+			queue[rd].sc = 0;
+#if DEBUG_SIMSCSI
+			printk("simscsi_interrupt: done with %ld\n", sc->serial_number);
+#endif
+			(*sc->scsi_done)(sc);
+			rd = (rd + 1) % SIMSCSI_REQ_QUEUE_LEN;
+		}
+	}
+	spin_unlock_irqrestore(&io_request_lock, flags);
+}
+
+int
+simscsi_detect (Scsi_Host_Template *templ)
+{
+	templ->proc_name = "simscsi";
+	init_timer(&disk_timer);
+	disk_timer.function = simscsi_interrupt;
+	return 1;	/* fake one SCSI host adapter */
+}
+
+int
+simscsi_release (struct Scsi_Host *host)
+{
+	return 0;	/* this is easy...  */
+}
+
+const char *
+simscsi_info (struct Scsi_Host *host)
+{
+	return "simulated SCSI host adapter";
+}
+
+int
+simscsi_abort (Scsi_Cmnd *cmd)
+{
+	printk ("simscsi_abort: unimplemented\n");
+	return SCSI_ABORT_SUCCESS;
+}
+
+int
+simscsi_reset (Scsi_Cmnd *cmd, unsigned int reset_flags)
+{
+	printk ("simscsi_reset: unimplemented\n");
+	return SCSI_RESET_SUCCESS;
+}
+
+int
+simscsi_biosparam (Disk *disk, kdev_t n, int ip[])
+{
+	int size = disk->capacity;
+
+	ip[0] = 64;
+	ip[1] = 32;
+	ip[2] = size >> 11;
+	return 0;
+}
+
+static void
+simscsi_readwrite (Scsi_Cmnd *sc, int mode, unsigned long offset, unsigned long len)
+{
+	struct disk_stat stat;
+	struct disk_req req;
+
+	req.addr = __pa(sc->request_buffer);
+	req.len  = len;			/* # of bytes to transfer */
+
+	if (sc->request_bufflen < req.len)
+		return;
+
+	stat.fd = desc[sc->target];
+#if DEBUG_SIMSCSI
+	printk("simscsi_%s @ %lx (off %lx)\n",
+	       mode == SSC_READ ? "read":"write", req.addr, offset);
+#endif
+	ia64_ssc(stat.fd, 1, __pa(&req), offset, mode);
+	ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
+
+	if (stat.count == req.len) {
+		sc->result = GOOD;
+	} else {
+		sc->result = DID_ERROR << 16;
+	}
+}
+
+static void
+simscsi_sg_readwrite (Scsi_Cmnd *sc, int mode, unsigned long offset)
+{
+	int list_len = sc->use_sg;
+	struct scatterlist *sl = (struct scatterlist *)sc->buffer;
+	struct disk_stat stat;
+	struct disk_req req;
+
+	stat.fd = desc[sc->target];
+
+	while (list_len) {
+		req.addr = __pa(sl->address);
+		req.len  = sl->length;
+#if DEBUG_SIMSCSI
+		printk("simscsi_sg_%s @ %lx (off %lx) use_sg=%d len=%d\n",
+		       mode == SSC_READ ? "read":"write", req.addr, offset, list_len, sl->length);
+#endif
+		ia64_ssc(stat.fd, 1, __pa(&req), offset, mode);
+		ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
+
+		/* should not happen in our case */
+		if (stat.count != req.len) {
+			sc->result = DID_ERROR << 16;
+			return;
+		}
+		offset +=  sl->length;
+		sl++;
+		list_len--;
+	}
+	sc->result = GOOD;
+}
+
+/*
+ * function handling both READ_6/WRITE_6 (non-scatter/gather mode)
+ * commands.
+ * Added 02/26/99 S.Eranian
+ */
+static void
+simscsi_readwrite6 (Scsi_Cmnd *sc, int mode)
+{
+	unsigned long offset;
+
+	offset = (((sc->cmnd[1] & 0x1f) << 16) | (sc->cmnd[2] << 8) | sc->cmnd[3])*512;
+	if (sc->use_sg > 0)
+		simscsi_sg_readwrite(sc, mode, offset);
+	else
+		simscsi_readwrite(sc, mode, offset, sc->cmnd[4]*512);
+}
+
+
+static void
+simscsi_readwrite10 (Scsi_Cmnd *sc, int mode)
+{
+	unsigned long offset;
+
+	offset = (  (sc->cmnd[2] << 24) | (sc->cmnd[3] << 16)
+		  | (sc->cmnd[4] <<  8) | (sc->cmnd[5] <<  0))*512;
+	if (sc->use_sg > 0)
+		simscsi_sg_readwrite(sc, mode, offset);
+	else
+		simscsi_readwrite(sc, mode, offset, ((sc->cmnd[7] << 8) | sc->cmnd[8])*512);
+}
+
+int
+simscsi_queuecommand (Scsi_Cmnd *sc, void (*done)(Scsi_Cmnd *))
+{
+	char fname[MAX_ROOT_LEN+16];
+	char *buf;
+
+#if DEBUG_SIMSCSI
+	register long sp asm ("sp");
+	printk("simscsi_queuecommand: target=%d,cmnd=%u,sc=%lu,sp=%lx,done=%p\n",
+	       sc->target, sc->cmnd[0], sc->serial_number, sp, done);
+#endif
+
+	sc->result = DID_BAD_TARGET << 16;
+	sc->scsi_done = done;
+	if (sc->target <= 7 && sc->lun == 0) {
+		switch (sc->cmnd[0]) {
+		      case INQUIRY:
+			if (sc->request_bufflen < 35) {
+				break;
+			}
+			sprintf (fname, "%s%c", simscsi_root, 'a' + sc->target);
+			desc[sc->target] = ia64_ssc (__pa(fname), SSC_READ_ACCESS|SSC_WRITE_ACCESS,
+						     0, 0, SSC_OPEN);
+			if (desc[sc->target] < 0) {
+				/* disk doesn't exist... */
+				break;
+			}
+			buf = sc->request_buffer;
+			buf[0] = 0;	/* magnetic disk */
+			buf[1] = 0;	/* not a removable medium */
+			buf[2] = 2;	/* SCSI-2 compliant device */
+			buf[3] = 2;	/* SCSI-2 response data format */
+			buf[4] = 31;	/* additional length (bytes) */
+			buf[5] = 0;	/* reserved */
+			buf[6] = 0;	/* reserved */
+			buf[7] = 0;	/* various flags */
+			memcpy(buf + 8, "HP      SIMULATED DISK  0.00",  28);
+			sc->result = GOOD;
+			break;
+
+		      case TEST_UNIT_READY:
+			sc->result = GOOD;
+			break;
+
+		      case READ_6:
+			if (desc[sc->target] < 0 )
+				break;
+			simscsi_readwrite6(sc, SSC_READ);
+			break;
+
+		      case READ_10:
+			if (desc[sc->target] < 0 )
+				break;
+			simscsi_readwrite10(sc, SSC_READ);
+			break;
+
+		      case WRITE_6:
+			if (desc[sc->target] < 0)
+				break;
+			simscsi_readwrite6(sc, SSC_WRITE);
+			break;
+
+		      case WRITE_10:
+			if (desc[sc->target] < 0)
+				break;
+			simscsi_readwrite10(sc, SSC_WRITE);
+			break;
+
+
+		      case READ_CAPACITY:
+			if (desc[sc->target] < 0 || sc->request_bufflen < 8) {
+				break;
+			}
+			buf = sc->request_buffer;
+
+			/* pretend to be a 1GB disk (partition table contains real stuff): */
+			buf[0] = 0x00;
+			buf[1] = 0x1f;
+			buf[2] = 0xff;
+			buf[3] = 0xff;
+			/* set block size of 512 bytes: */
+			buf[4] = 0;
+			buf[5] = 0;
+			buf[6] = 2;
+			buf[7] = 0;
+			sc->result = GOOD;
+			break;
+
+		      case MODE_SENSE:
+			printk("MODE_SENSE\n");
+			break;
+
+		      case START_STOP:
+			printk("START_STOP\n");
+			break;
+
+		      default:
+			panic("simscsi: unknown SCSI command %u\n", sc->cmnd[0]);
+		}
+	}
+	if (sc->result == DID_BAD_TARGET) {
+		sc->result |= DRIVER_SENSE << 24;
+		sc->sense_buffer[0] = 0x70;
+		sc->sense_buffer[2] = 0x00;
+	}
+	if (atomic_read(&num_reqs) >= SIMSCSI_REQ_QUEUE_LEN) {
+		panic("Attempt to queue command while command is pending!!");
+	}
+	atomic_inc(&num_reqs);
+	queue[wr].sc = sc;
+	wr = (wr + 1) % SIMSCSI_REQ_QUEUE_LEN;
+
+	if (!timer_pending(&disk_timer)) {
+		disk_timer.expires = jiffies + HZ/20;
+		add_timer(&disk_timer);
+	}
+	return 0;
+}
diff -urN linux-2.4.0-test6/drivers/scsi/simscsi.h linux-2.4.0-test6-lia/drivers/scsi/simscsi.h
--- linux-2.4.0-test6/drivers/scsi/simscsi.h	Wed Dec 31 16:00:00 1969
+++ linux-2.4.0-test6-lia/drivers/scsi/simscsi.h	Mon Jul 31 14:01:22 2000
@@ -0,0 +1,39 @@
+/*
+ * Simulated SCSI driver.
+ *
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+#ifndef SIMSCSI_H
+#define SIMSCSI_H
+
+#define SIMSCSI_REQ_QUEUE_LEN	64
+
+#define DEFAULT_SIMSCSI_ROOT	"/var/ski-disks/sd"
+
+extern int simscsi_detect (Scsi_Host_Template *);
+extern int simscsi_release (struct Scsi_Host *);
+extern const char *simscsi_info (struct Scsi_Host *);
+extern int simscsi_queuecommand (Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+extern int simscsi_abort (Scsi_Cmnd *);
+extern int simscsi_reset (Scsi_Cmnd *, unsigned int);
+extern int simscsi_biosparam (Disk *, kdev_t, int[]);
+
+#define SIMSCSI {					\
+	detect:			simscsi_detect,		\
+	release:		simscsi_release,	\
+	info:			simscsi_info,		\
+	queuecommand:		simscsi_queuecommand,	\
+	abort:			simscsi_abort,		\
+	reset:			simscsi_reset,		\
+	bios_param:		simscsi_biosparam,	\
+	can_queue:		SIMSCSI_REQ_QUEUE_LEN,	\
+	this_id:		-1,			\
+	sg_tablesize:		32,			\
+	cmd_per_lun:		SIMSCSI_REQ_QUEUE_LEN,	\
+	present:		0,			\
+	unchecked_isa_dma:	0,			\
+	use_clustering:		DISABLE_CLUSTERING	\
+}
+
+#endif /* SIMSCSI_H */
diff -urN linux-2.4.0-test6/drivers/usb/ibmcam.c linux-2.4.0-test6-lia/drivers/usb/ibmcam.c
--- linux-2.4.0-test6/drivers/usb/ibmcam.c	Thu Aug 10 19:56:30 2000
+++ linux-2.4.0-test6-lia/drivers/usb/ibmcam.c	Mon Jul 31 14:01:22 2000
@@ -62,7 +62,7 @@
 #define FLAGS_SEPARATE_FRAMES		(1 << 5)
 #define FLAGS_CLEAN_FRAMES		(1 << 6)
 
-static int flags = 0; /* FLAGS_DISPLAY_HINTS | FLAGS_OVERLAY_STATS; */
+static int flags = FLAGS_RETRY_VIDIOCSYNC; /* FLAGS_DISPLAY_HINTS | FLAGS_OVERLAY_STATS; */
 
 /* This is the size of V4L frame that we provide */
 static const int imgwidth = V4L_FRAME_WIDTH_USED;
diff -urN linux-2.4.0-test6/drivers/usb/uhci.c linux-2.4.0-test6-lia/drivers/usb/uhci.c
--- linux-2.4.0-test6/drivers/usb/uhci.c	Thu Aug 10 19:56:30 2000
+++ linux-2.4.0-test6-lia/drivers/usb/uhci.c	Mon Jul 31 14:01:22 2000
@@ -33,7 +33,7 @@
 #include <linux/unistd.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
-#define DEBUG
+#undef DEBUG
 #include <linux/usb.h>
 
 #include <asm/uaccess.h>
@@ -68,6 +68,46 @@
 /* If a transfer is still active after this much time, turn off FSBR */
 #define IDLE_TIMEOUT	(HZ / 20)	/* 50 ms */
 
+#ifdef CONFIG_ITANIUM_A1_SPECIFIC
+
+static struct uhci *guhci;
+
+void
+disable_usb (void)
+{
+	unsigned short cmd;
+	unsigned int io_addr;
+
+	if (guhci == NULL)
+		return;
+
+	io_addr = guhci->io_addr;
+
+	cmd = inw (io_addr + USBCMD); 
+
+	outw(cmd & ~ USBCMD_RS, io_addr+USBCMD);
+
+	while ((inw (io_addr + USBSTS) & USBSTS_HCH) == 0);
+}
+
+void
+reenable_usb (void)
+{
+	unsigned int io_addr;
+	unsigned short cmd;
+
+	if (guhci == NULL)
+		return;
+
+	io_addr = guhci->io_addr;
+
+	cmd = inw (io_addr + USBCMD); 
+
+	outw(cmd | USBCMD_RS, io_addr+USBCMD);
+}
+
+#endif /* CONFIG_ITANIUM_A1_SPECIFIC */
+
 /*
  * Only the USB core should call uhci_alloc_dev and uhci_free_dev
  */
@@ -2356,6 +2396,11 @@
 		if (!uhci_start_root_hub(uhci)) {
 			struct pm_dev *pmdev;
 
+#ifdef CONFIG_ITANIUM_A1_SPECIFIC
+			guhci = uhci;
+			printk("%s: enabling Lion USB workaround io_addr=%x\n",
+			       __FILE__, guhci->io_addr);
+#endif
 			pmdev = pm_register(PM_PCI_DEV,
 					    PM_PCI_ID(dev),
 					    handle_pm_event);
diff -urN linux-2.4.0-test6/fs/binfmt_elf.c linux-2.4.0-test6-lia/fs/binfmt_elf.c
--- linux-2.4.0-test6/fs/binfmt_elf.c	Thu Aug 10 19:56:31 2000
+++ linux-2.4.0-test6-lia/fs/binfmt_elf.c	Mon Jul 31 14:01:22 2000
@@ -482,6 +482,20 @@
 			if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
 			    strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
 				ibcs2_interpreter = 1;
+#if defined(__ia64__) && !defined(CONFIG_BINFMT_ELF32)
+			/*
+			 * XXX temporary gross hack until all IA-64 Linux binaries
+			 * use /lib/ld-linux-ia64.so.1 as the linker name.
+			 */
+#define INTRP64	"/lib/ld-linux-ia64.so.1"
+ 			if (strcmp(elf_interpreter,"/lib/ld-linux.so.2") == 0) {
+				kfree(elf_interpreter);
+				elf_interpreter=(char *)kmalloc(sizeof(INTRP64), GFP_KERNEL);
+ 				if (!elf_interpreter)
+ 					goto out_free_file;
+				strcpy(elf_interpreter, INTRP64);
+ 			}
+#endif /* defined(__ia64__) && !defined(CONFIG_BINFMT_ELF32) */
 #if 0
 			printk("Using ELF interpreter %s\n", elf_interpreter);
 #endif
diff -urN linux-2.4.0-test6/fs/dcache.c linux-2.4.0-test6-lia/fs/dcache.c
--- linux-2.4.0-test6/fs/dcache.c	Thu Aug 10 19:56:31 2000
+++ linux-2.4.0-test6-lia/fs/dcache.c	Fri Aug 11 17:23:00 2000
@@ -1189,7 +1189,9 @@
 	if (!dentry_cache)
 		panic("Cannot create dentry cache");
 
+#if PAGE_SHIFT < 13
 	mempages >>= (13 - PAGE_SHIFT);
+#endif
 	mempages *= sizeof(struct list_head);
 	for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
 		;
diff -urN linux-2.4.0-test6/fs/nfsd/nfsfh.c linux-2.4.0-test6-lia/fs/nfsd/nfsfh.c
--- linux-2.4.0-test6/fs/nfsd/nfsfh.c	Sat Jul  8 19:26:13 2000
+++ linux-2.4.0-test6-lia/fs/nfsd/nfsfh.c	Mon Jul 31 14:01:22 2000
@@ -379,7 +379,7 @@
 	/* It's a directory, or we are required to confirm the file's
 	 * location in the tree.
 	 */
-	dprintk("nfs_fh: need to look harder for %d/%ld\n",sb->s_dev,ino);
+	dprintk("nfs_fh: need to look harder for %d/%ld\n",sb->s_dev,(long) ino);
 	down(&sb->s_nfsd_free_path_sem);
 
 	/* claiming the semaphore might have allowed things to get fixed up */
diff -urN linux-2.4.0-test6/include/asm-ia64/acpi-ext.h linux-2.4.0-test6-lia/include/asm-ia64/acpi-ext.h
--- linux-2.4.0-test6/include/asm-ia64/acpi-ext.h	Tue Feb  8 12:01:59 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/acpi-ext.h	Mon Jul 31 14:01:22 2000
@@ -69,7 +69,7 @@
 	u8 eid;
 } acpi_entry_lsapic_t;
 
-typedef struct {
+typedef struct acpi_entry_iosapic {
 	u8 type;
 	u8 length;
 	u16 reserved;
diff -urN linux-2.4.0-test6/include/asm-ia64/asmmacro.h linux-2.4.0-test6-lia/include/asm-ia64/asmmacro.h
--- linux-2.4.0-test6/include/asm-ia64/asmmacro.h	Thu Jun 22 07:09:45 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/asmmacro.h	Fri Aug 11 16:43:58 2000
@@ -23,7 +23,7 @@
 #endif
 
 #define ENTRY(name)				\
-	.align 16;				\
+	.align 32;				\
 	.proc name;				\
 name:
 
diff -urN linux-2.4.0-test6/include/asm-ia64/efi.h linux-2.4.0-test6-lia/include/asm-ia64/efi.h
--- linux-2.4.0-test6/include/asm-ia64/efi.h	Fri Mar 10 15:24:02 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/efi.h	Fri Aug 11 16:43:57 2000
@@ -226,6 +226,7 @@
 }
 
 extern void efi_init (void);
+extern void efi_map_pal_code (void);
 extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
 extern void efi_gettimeofday (struct timeval *tv);
 extern void efi_enter_virtual_mode (void);	/* switch EFI to virtual mode, if possible */
diff -urN linux-2.4.0-test6/include/asm-ia64/ia32.h linux-2.4.0-test6-lia/include/asm-ia64/ia32.h
--- linux-2.4.0-test6/include/asm-ia64/ia32.h	Wed Aug  2 18:54:53 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/ia32.h	Fri Aug 11 16:44:13 2000
@@ -40,7 +40,6 @@
        __kernel_off_t32 l_start;
        __kernel_off_t32 l_len;
        __kernel_pid_t32 l_pid;
-       short __unused;
 };
 
 
@@ -105,11 +104,21 @@
 } sigset32_t;
 
 struct sigaction32 {
-       unsigned int  sa_handler;       /* Really a pointer, but need to deal 
-					  with 32 bits */
+       unsigned int  sa_handler;	/* Really a pointer, but need to deal 
+					     with 32 bits */
        unsigned int sa_flags;
-       unsigned int sa_restorer;       /* Another 32 bit pointer */
-       sigset32_t sa_mask;     /* A 32 bit mask */
+       unsigned int sa_restorer;	/* Another 32 bit pointer */
+       sigset32_t sa_mask;		/* A 32 bit mask */
+};
+
+typedef unsigned int old_sigset32_t;	/* at least 32 bits */
+
+struct old_sigaction32 {
+       unsigned int  sa_handler;	/* Really a pointer, but need to deal 
+					     with 32 bits */
+       old_sigset32_t sa_mask;		/* A 32 bit mask */
+       unsigned int sa_flags;
+       unsigned int sa_restorer;	/* Another 32 bit pointer */
 };
 
 typedef struct sigaltstack_ia32 {
diff -urN linux-2.4.0-test6/include/asm-ia64/io.h linux-2.4.0-test6-lia/include/asm-ia64/io.h
--- linux-2.4.0-test6/include/asm-ia64/io.h	Fri Apr 21 15:21:24 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/io.h	Fri Aug 11 16:43:56 2000
@@ -47,6 +47,10 @@
 	return (void *) (address + PAGE_OFFSET);
 }
 
+/*
+ * The following two macros are deprecated and scheduled for removal.
+ * Please use the PCI-DMA interface defined in <asm/pci.h> instead.
+ */
 #define bus_to_virt	phys_to_virt
 #define virt_to_bus	virt_to_phys
 
@@ -315,6 +319,7 @@
 #define writeq(v,a)	__writeq((v), (void *) (a))
 #define __raw_writeb	writeb
 #define __raw_writew	writew
+#define __raw_writel	writel
 #define __raw_writeq	writeq
 
 #ifndef inb_p
diff -urN linux-2.4.0-test6/include/asm-ia64/machvec.h linux-2.4.0-test6-lia/include/asm-ia64/machvec.h
--- linux-2.4.0-test6/include/asm-ia64/machvec.h	Fri Mar 10 15:24:02 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/machvec.h	Fri Aug 11 16:43:55 2000
@@ -4,8 +4,8 @@
  * Copyright (C) 1999 Silicon Graphics, Inc.
  * Copyright (C) Srinivasa Thirumalachar <sprasad@engr.sgi.com>
  * Copyright (C) Vijay Chander <vijay@engr.sgi.com>
- * Copyright (C) 1999 Hewlett-Packard Co.
- * Copyright (C) David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999-2000 Hewlett-Packard Co.
+ * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
  */
 #ifndef _ASM_IA64_MACHVEC_H
 #define _ASM_IA64_MACHVEC_H
@@ -21,6 +21,7 @@
 struct task_struct;
 struct timeval;
 struct vm_area_struct;
+struct acpi_entry_iosapic;
 
 typedef void ia64_mv_setup_t (char **);
 typedef void ia64_mv_irq_init_t (void);
@@ -30,15 +31,33 @@
 typedef void ia64_mv_mca_handler_t (void);
 typedef void ia64_mv_cmci_handler_t (int, void *, struct pt_regs *);
 typedef void ia64_mv_log_print_t (void);
+typedef void ia64_mv_register_iosapic_t (struct acpi_entry_iosapic *);
+
+extern void machvec_noop (void);
 
 # if defined (CONFIG_IA64_HP_SIM)
 #  include <asm/machvec_hpsim.h>
 # elif defined (CONFIG_IA64_DIG)
 #  include <asm/machvec_dig.h>
 # elif defined (CONFIG_IA64_SGI_SN1_SIM)
-#  include <asm/machvec_sgi_sn1_SIM.h>
+#  include <asm/machvec_sn1.h>
 # elif defined (CONFIG_IA64_GENERIC)
 
+# ifdef MACHVEC_PLATFORM_HEADER
+#  include MACHVEC_PLATFORM_HEADER
+# else
+#  define platform_name		ia64_mv.name
+#  define platform_setup	ia64_mv.setup
+#  define platform_irq_init	ia64_mv.irq_init
+#  define platform_map_nr	ia64_mv.map_nr
+#  define platform_mca_init	ia64_mv.mca_init
+#  define platform_mca_handler	ia64_mv.mca_handler
+#  define platform_cmci_handler	ia64_mv.cmci_handler
+#  define platform_log_print	ia64_mv.log_print
+#  define platform_pci_fixup	ia64_mv.pci_fixup
+#  define platform_register_iosapic	ia64_mv.register_iosapic
+# endif
+
 struct ia64_machine_vector {
 	const char *name;
 	ia64_mv_setup_t *setup;
@@ -49,6 +68,7 @@
 	ia64_mv_mca_handler_t *mca_handler;
 	ia64_mv_cmci_handler_t *cmci_handler;
 	ia64_mv_log_print_t *log_print;
+	ia64_mv_register_iosapic_t *register_iosapic;
 };
 
 #define MACHVEC_INIT(name)			\
@@ -61,22 +81,12 @@
 	platform_mca_init,			\
 	platform_mca_handler,			\
 	platform_cmci_handler,			\
-	platform_log_print			\
+	platform_log_print,			\
+	platform_register_iosapic			\
 }
 
-# ifndef MACHVEC_INHIBIT_RENAMING
-#  define platform_name		ia64_mv.name
-#  define platform_setup	ia64_mv.setup
-#  define platform_irq_init	ia64_mv.irq_init
-#  define platform_map_nr	ia64_mv.map_nr
-#  define platform_mca_init	ia64_mv.mca_init
-#  define platform_mca_handler	ia64_mv.mca_handler
-#  define platform_cmci_handler	ia64_mv.cmci_handler
-#  define platform_log_print	ia64_mv.log_print
-# endif
-
 extern struct ia64_machine_vector ia64_mv;
-extern void machvec_noop (void);
+extern void machvec_init (const char *name);
 
 # else
 #  error Unknown configuration.  Update asm-ia64/machvec.h.
@@ -103,6 +113,12 @@
 #endif
 #ifndef platform_log_print
 # define platform_log_print	((ia64_mv_log_print_t *) machvec_noop)
+#endif
+#ifndef platform_pci_fixup
+# define platform_pci_fixup	((ia64_mv_pci_fixup_t *) machvec_noop)
+#endif
+#ifndef platform_register_iosapic
+# define platform_register_iosapic	((ia64_mv_register_iosapic_t *) machvec_noop)
 #endif
 
 #endif /* _ASM_IA64_MACHVEC_H */
diff -urN linux-2.4.0-test6/include/asm-ia64/machvec_dig.h linux-2.4.0-test6-lia/include/asm-ia64/machvec_dig.h
--- linux-2.4.0-test6/include/asm-ia64/machvec_dig.h	Sun Feb  6 18:42:40 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/machvec_dig.h	Mon Jul 31 14:01:22 2000
@@ -5,6 +5,7 @@
 extern ia64_mv_irq_init_t dig_irq_init;
 extern ia64_mv_pci_fixup_t dig_pci_fixup;
 extern ia64_mv_map_nr_t map_nr_dense;
+extern ia64_mv_register_iosapic_t dig_register_iosapic;
 
 /*
  * This stuff has dual use!
@@ -18,5 +19,6 @@
 #define platform_irq_init	dig_irq_init
 #define platform_pci_fixup	dig_pci_fixup
 #define platform_map_nr		map_nr_dense
+#define platform_register_iosapic dig_register_iosapic
 
 #endif /* _ASM_IA64_MACHVEC_DIG_h */
diff -urN linux-2.4.0-test6/include/asm-ia64/machvec_init.h linux-2.4.0-test6-lia/include/asm-ia64/machvec_init.h
--- linux-2.4.0-test6/include/asm-ia64/machvec_init.h	Sun Feb  6 18:42:40 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/machvec_init.h	Mon Jul 31 14:01:22 2000
@@ -1,4 +1,6 @@
-#define MACHVEC_INHIBIT_RENAMING
+#define __MACHVEC_HDR(n)		<asm/machvec_##n##.h>
+#define __MACHVEC_EXPAND(n)		__MACHVEC_HDR(n)
+#define MACHVEC_PLATFORM_HEADER		__MACHVEC_EXPAND(MACHVEC_PLATFORM_NAME)
 
 #include <asm/machvec.h>
 
@@ -7,3 +9,5 @@
 	= MACHVEC_INIT(name);
 
 #define MACHVEC_DEFINE(name)	MACHVEC_HELPER(name)
+
+MACHVEC_DEFINE(MACHVEC_PLATFORM_NAME)
diff -urN linux-2.4.0-test6/include/asm-ia64/mmu_context.h linux-2.4.0-test6-lia/include/asm-ia64/mmu_context.h
--- linux-2.4.0-test6/include/asm-ia64/mmu_context.h	Sun Feb 13 10:31:06 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/mmu_context.h	Fri Aug 11 16:43:56 2000
@@ -2,12 +2,13 @@
 #define _ASM_IA64_MMU_CONTEXT_H
 
 /*
- * Copyright (C) 1998, 1999 Hewlett-Packard Co
- * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1998-2000 Hewlett-Packard Co
+ * Copyright (C) 1998-2000 David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
 #include <linux/config.h>
 #include <linux/sched.h>
+#include <linux/spinlock.h>
 
 #include <asm/processor.h>
 
@@ -26,21 +27,6 @@
  * architecture manual guarantees this number to be in the range
  * 18-24.
  *
- * A context number has the following format:
- *
- *  +--------------------+---------------------+
- *  |  generation number |    region id        |
- *  +--------------------+---------------------+
- *
- * A context number of 0 is considered "invalid".
- *
- * The generation number is incremented whenever we end up having used
- * up all available region ids.  At that point with flush the entire
- * TLB and reuse the first region id.  The new generation number
- * ensures that when we context switch back to an old process, we do
- * not inadvertently end up using its possibly reused region id.
- * Instead, we simply allocate a new region id for that process.
- *
  * Copyright (C) 1998 David Mosberger-Tang <davidm@hpl.hp.com>
  */
 
@@ -56,9 +42,15 @@
 
 #define IA64_HW_CONTEXT_MASK	((1UL << IA64_HW_CONTEXT_BITS) - 1)
 
-extern unsigned long ia64_next_context;
+struct ia64_ctx {
+	spinlock_t lock;
+	unsigned int next;	/* next context number to use */
+	unsigned int limit;	/* next >= limit => must call wrap_mmu_context() */
+};
+
+extern struct ia64_ctx ia64_ctx;
 
-extern void get_new_mmu_context (struct mm_struct *mm);
+extern void wrap_mmu_context (struct mm_struct *mm);
 
 static inline void
 enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
@@ -76,12 +68,24 @@
 }
 
 extern inline void
+get_new_mmu_context (struct mm_struct *mm)
+{
+	spin_lock(&ia64_ctx.lock);
+	{
+		if (ia64_ctx.next >= ia64_ctx.limit)
+			wrap_mmu_context(mm);
+		mm->context = ia64_ctx.next++;
+	}
+	spin_unlock(&ia64_ctx.lock);
+
+}
+
+extern inline void
 get_mmu_context (struct mm_struct *mm)
 {
 	/* check if our ASN is of an older generation and thus invalid: */
-	if (((mm->context ^ ia64_next_context) & ~IA64_HW_CONTEXT_MASK) != 0) {
+	if (mm->context == 0)
 		get_new_mmu_context(mm);
-	}
 }
 
 extern inline void
@@ -103,7 +107,7 @@
 	unsigned long rid_incr = 0;
 	unsigned long rr0, rr1, rr2, rr3, rr4;
 
-	rid = (mm->context & IA64_HW_CONTEXT_MASK);
+	rid = mm->context;
 
 #ifndef CONFIG_IA64_TLB_CHECKS_REGION_NUMBER
 	rid <<= 3;	/* make space for encoding the region number */
diff -urN linux-2.4.0-test6/include/asm-ia64/offsets.h linux-2.4.0-test6-lia/include/asm-ia64/offsets.h
--- linux-2.4.0-test6/include/asm-ia64/offsets.h	Wed Aug  2 18:54:53 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/offsets.h	Fri Aug 11 15:53:25 2000
@@ -11,10 +11,10 @@
 #define PT_PTRACED_BIT			0
 #define PT_TRACESYS_BIT			1
 
-#define IA64_TASK_SIZE			2768	/* 0xad0 */
+#define IA64_TASK_SIZE			2864	/* 0xb30 */
 #define IA64_PT_REGS_SIZE		400	/* 0x190 */
 #define IA64_SWITCH_STACK_SIZE		560	/* 0x230 */
-#define IA64_SIGINFO_SIZE		136	/* 0x88 */
+#define IA64_SIGINFO_SIZE		128	/* 0x80 */
 #define UNW_FRAME_INFO_SIZE		448	/* 0x1c0 */
 
 #define IA64_TASK_PTRACE_OFFSET		48	/* 0x30 */
@@ -23,7 +23,7 @@
 #define IA64_TASK_PROCESSOR_OFFSET	100	/* 0x64 */
 #define IA64_TASK_THREAD_OFFSET		896	/* 0x380 */
 #define IA64_TASK_THREAD_KSP_OFFSET	896	/* 0x380 */
-#define IA64_TASK_THREAD_SIGMASK_OFFSET	2648	/* 0xa58 */
+#define IA64_TASK_THREAD_SIGMASK_OFFSET	2744	/* 0xab8 */
 #define IA64_TASK_PID_OFFSET		188	/* 0xbc */
 #define IA64_TASK_MM_OFFSET		88	/* 0x58 */
 #define IA64_PT_REGS_CR_IPSR_OFFSET	0	/* 0x0 */
diff -urN linux-2.4.0-test6/include/asm-ia64/page.h linux-2.4.0-test6-lia/include/asm-ia64/page.h
--- linux-2.4.0-test6/include/asm-ia64/page.h	Thu Aug 10 19:56:31 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/page.h	Fri Aug 11 16:43:55 2000
@@ -100,13 +100,14 @@
 #define MAP_NR_SN1(addr)	(((unsigned long) (addr) - PAGE_OFFSET) >> PAGE_SHIFT)
 
 #ifdef CONFIG_IA64_GENERIC
-# define virt_to_page(kaddr)	(mem_map + platform_map_nr(kaddr))
+# include <asm/machvec.h>
+# define virt_to_page(kaddr)   (mem_map + platform_map_nr(kaddr))
 #elif defined (CONFIG_IA64_SN_SN1_SIM)
-# define virt_to_page(kaddr)	(mem_map + MAP_NR_SN1(kaddr))
+# define virt_to_page(kaddr)   (mem_map + MAP_NR_SN1(kaddr))
 #else
-# define virt_to_page(kaddr)	(mem_map + MAP_NR_DENSE(kaddr))
+# define virt_to_page(kaddr)   (mem_map + MAP_NR_DENSE(kaddr))
 #endif
-#define VALID_PAGE(page)	((page - mem_map) < max_mapnr)
+#define VALID_PAGE(page)       ((page - mem_map) < max_mapnr)
 
 # endif /* __KERNEL__ */
 
diff -urN linux-2.4.0-test6/include/asm-ia64/pal.h linux-2.4.0-test6-lia/include/asm-ia64/pal.h
--- linux-2.4.0-test6/include/asm-ia64/pal.h	Thu Jun 22 07:09:45 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/pal.h	Mon Jul 31 14:01:22 2000
@@ -18,7 +18,8 @@
  * 00/03/07	davidm	Updated pal_cache_flush() to be in sync with PAL v2.6.
  * 00/03/23     cfleck  Modified processor min-state save area to match updated PAL & SAL info
  * 00/05/24     eranian Updated to latest PAL spec, fix structures bugs, added 
- * 00/05/25	eranian Support for stack calls, and statis physical calls
+ * 00/05/25	eranian Support for stack calls, and static physical calls
+ * 00/06/18	eranian Support for stacked physical calls
  */
 
 /*
@@ -646,10 +647,12 @@
 extern struct ia64_pal_retval ia64_pal_call_static (u64, u64, u64, u64); 
 extern struct ia64_pal_retval ia64_pal_call_stacked (u64, u64, u64, u64); 
 extern struct ia64_pal_retval ia64_pal_call_phys_static (u64, u64, u64, u64); 
+extern struct ia64_pal_retval ia64_pal_call_phys_stacked (u64, u64, u64, u64); 
 
 #define PAL_CALL(iprv,a0,a1,a2,a3)	iprv = ia64_pal_call_static(a0, a1, a2, a3)
 #define PAL_CALL_STK(iprv,a0,a1,a2,a3)	iprv = ia64_pal_call_stacked(a0, a1, a2, a3)
 #define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_phys_static(a0, a1, a2, a3)
+#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3)
 
 typedef int (*ia64_pal_handler) (u64, ...);
 extern ia64_pal_handler ia64_pal;
@@ -951,7 +954,7 @@
 /* Return information about processor's optional power management capabilities. */
 extern inline s64 
 ia64_pal_halt_info (pal_power_mgmt_info_u_t *power_buf) 
-{
+{	
 	struct ia64_pal_retval iprv;
 	PAL_CALL_STK(iprv, PAL_HALT_INFO, (unsigned long) power_buf, 0, 0);
 	return iprv.status; 
@@ -1370,17 +1373,17 @@
 				dirty_bit_valid		: 1,
 				mem_attr_valid		: 1,
 				reserved		: 60;
-	} pal_itr_valid_s;
-} pal_itr_valid_u_t;
+	} pal_tr_valid_s;
+} pal_tr_valid_u_t;
 
 /* Read a translation register */
 extern inline s64 
-ia64_pal_vm_tr_read (u64 reg_num, u64 tr_type, u64 tr_buffer, pal_itr_valid_u_t *itr_valid) 
-{	
+ia64_pal_tr_read (u64 reg_num, u64 tr_type, u64 *tr_buffer, pal_tr_valid_u_t *tr_valid)
+{
 	struct ia64_pal_retval iprv;
-	PAL_CALL(iprv, PAL_VM_TR_READ, reg_num, tr_type, tr_buffer);
-	if (itr_valid)
-		itr_valid->piv_val = iprv.v0;
+	PAL_CALL_PHYS_STK(iprv, PAL_VM_TR_READ, reg_num, tr_type,(u64)__pa(tr_buffer));
+	if (tr_valid)
+		tr_valid->piv_val = iprv.v0;
 	return iprv.status; 
 }
 
diff -urN linux-2.4.0-test6/include/asm-ia64/param.h linux-2.4.0-test6-lia/include/asm-ia64/param.h
--- linux-2.4.0-test6/include/asm-ia64/param.h	Sun Feb  6 18:42:40 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/param.h	Fri Aug 11 16:43:56 2000
@@ -10,23 +10,13 @@
 
 #include <linux/config.h>
 
-#ifdef CONFIG_IA64_HP_SIM
+#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_SOFTSDV_HACKS)
 /*
  * Yeah, simulating stuff is slow, so let us catch some breath between
  * timer interrupts...
  */
 # define HZ 20
-#endif
-
-#ifdef CONFIG_IA64_DIG
-# ifdef CONFIG_IA64_SOFTSDV_HACKS
-#  define HZ 20
-# else
-#  define HZ 100
-# endif
-#endif
-
-#ifndef HZ
+#else
 # define HZ	1024
 #endif
 
diff -urN linux-2.4.0-test6/include/asm-ia64/pci.h linux-2.4.0-test6-lia/include/asm-ia64/pci.h
--- linux-2.4.0-test6/include/asm-ia64/pci.h	Thu Jun 22 07:17:16 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/pci.h	Fri Aug 11 16:43:56 2000
@@ -1,6 +1,15 @@
 #ifndef _ASM_IA64_PCI_H
 #define _ASM_IA64_PCI_H
 
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+
+#include <asm/io.h>
+#include <asm/scatterlist.h>
+
 /*
  * Can be used to override the logic in pci_scan_bus for skipping
  * already-configured bus numbers - to be used for buggy BIOSes or
@@ -11,6 +20,8 @@
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0x10000000
 
+struct pci_dev;
+
 extern inline void pcibios_set_master(struct pci_dev *dev)
 {
 	/* No special bus mastering setup handling */
@@ -23,18 +34,8 @@
 
 /*
  * Dynamic DMA mapping API.
- * IA-64 has everything mapped statically.
  */
 
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-#include <asm/io.h>
-#include <asm/scatterlist.h>
-
-struct pci_dev;
-
 /*
  * Allocate and map kernel buffer using consistent mode DMA for a device.
  * hwdev should be valid struct pci_dev pointer for PCI devices,
@@ -64,13 +65,7 @@
  * Once the device is given the dma address, the device owns this memory
  * until either pci_unmap_single or pci_dma_sync_single is performed.
  */
-extern inline dma_addr_t
-pci_map_single (struct pci_dev *hwdev, void *ptr, size_t size, int direction)
-{
-	if (direction == PCI_DMA_NONE)
-		BUG();
-	return virt_to_bus(ptr);
-}
+extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction);
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size
@@ -80,13 +75,7 @@
  * After this call, reads by the cpu to the buffer are guarenteed to see
  * whatever the device wrote there.
  */
-extern inline void
-pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction)
-{
-	if (direction == PCI_DMA_NONE)
-		BUG();
-	/* Nothing to do */
-}
+extern void pci_unmap_single (struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction);
 
 /*
  * Map a set of buffers described by scatterlist in streaming
@@ -104,26 +93,14 @@
  * Device ownership issues as mentioned above for pci_map_single are
  * the same here.
  */
-extern inline int
-pci_map_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
-{
-	if (direction == PCI_DMA_NONE)
-		BUG();
-	return nents;
-}
+extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction);
 
 /*
  * Unmap a set of streaming mode DMA translations.
  * Again, cpu read rules concerning calls here are the same as for
  * pci_unmap_single() above.
  */
-extern inline void
-pci_unmap_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
-{
-	if (direction == PCI_DMA_NONE)
-		BUG();
-	/* Nothing to do */
-}
+extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction);
 
 /*
  * Make physical memory consistent for a single
@@ -135,13 +112,7 @@
  * next point you give the PCI dma address back to the card, the
  * device again owns the buffer.
  */
-extern inline void
-pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction)
-{
-	if (direction == PCI_DMA_NONE)
-		BUG();
-	/* Nothing to do */
-}
+extern void pci_dma_sync_single (struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction);
 
 /*
  * Make physical memory consistent for a set of streaming mode DMA
@@ -150,20 +121,15 @@
  * The same as pci_dma_sync_single but for a scatter-gather list,
  * same rules and usage.
  */
-extern inline void
-pci_dma_sync_sg (struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction)
-{
-	if (direction == PCI_DMA_NONE)
-		BUG();
-	/* Nothing to do */
-}
+extern void pci_dma_sync_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction);
 
 /* Return whether the given PCI device DMA address mask can
  * be supported properly.  For example, if your device can
  * only drive the low 24-bits during PCI bus mastering, then
  * you would pass 0x00ffffff as the mask to this function.
  */
-extern inline int pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask)
+extern inline int
+pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask)
 {
 	return 1;
 }
diff -urN linux-2.4.0-test6/include/asm-ia64/pgtable.h linux-2.4.0-test6-lia/include/asm-ia64/pgtable.h
--- linux-2.4.0-test6/include/asm-ia64/pgtable.h	Thu Aug 10 19:56:31 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/pgtable.h	Fri Aug 11 16:43:56 2000
@@ -111,6 +111,7 @@
 
 #include <asm/bitops.h>
 #include <asm/mmu_context.h>
+#include <asm/processor.h>
 #include <asm/system.h>
 
 /*
@@ -286,7 +287,17 @@
  * contains the memory attribute bits, dirty bits, and various other
  * bits as well.
  */
-#define pgprot_noncached(prot)	__pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC)
+#define pgprot_noncached(prot)		__pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC)
+
+/*
+ * Macro to make mark a page protection value as "write-combining".
+ * Note that "protection" is really a misnomer here as the protection
+ * value contains the memory attribute bits, dirty bits, and various
+ * other bits as well.  Accesses through a write-combining translation
+ * works bypasses the caches, but does allow for consecutive writes to
+ * be combined into single (but larger) write transactions.
+ */
+#define pgprot_writecombine(prot)	__pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WC)
 
 /*
  * Return the region index for virtual address ADDRESS.
diff -urN linux-2.4.0-test6/include/asm-ia64/processor.h linux-2.4.0-test6-lia/include/asm-ia64/processor.h
--- linux-2.4.0-test6/include/asm-ia64/processor.h	Thu Aug 10 19:56:31 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/processor.h	Fri Aug 11 16:43:56 2000
@@ -19,6 +19,7 @@
 #include <asm/types.h>
 
 #define IA64_NUM_DBG_REGS	8
+#define IA64_NUM_PM_REGS	4
 
 /*
  * TASK_SIZE really is a mis-named.  It really is the maximum user
@@ -152,12 +153,13 @@
 
 #define IA64_THREAD_FPH_VALID	(__IA64_UL(1) << 0)	/* floating-point high state valid? */
 #define IA64_THREAD_DBG_VALID	(__IA64_UL(1) << 1)	/* debug registers valid? */
-#define IA64_THREAD_UAC_NOPRINT	(__IA64_UL(1) << 2)	/* don't log unaligned accesses */
-#define IA64_THREAD_UAC_SIGBUS	(__IA64_UL(1) << 3)	/* generate SIGBUS on unaligned acc. */
-#define IA64_THREAD_KRBS_SYNCED	(__IA64_UL(1) << 4)	/* krbs synced with process vm? */
+#define IA64_THREAD_PM_VALID	(__IA64_UL(1) << 2)	/* performance registers valid? */
+#define IA64_THREAD_UAC_NOPRINT	(__IA64_UL(1) << 3)	/* don't log unaligned accesses */
+#define IA64_THREAD_UAC_SIGBUS	(__IA64_UL(1) << 4)	/* generate SIGBUS on unaligned acc. */
+#define IA64_THREAD_KRBS_SYNCED	(__IA64_UL(1) << 5)	/* krbs synced with process vm? */
 #define IA64_KERNEL_DEATH	(__IA64_UL(1) << 63)	/* see die_if_kernel()... */
 
-#define IA64_THREAD_UAC_SHIFT	2	
+#define IA64_THREAD_UAC_SHIFT	3
 #define IA64_THREAD_UAC_MASK	(IA64_THREAD_UAC_NOPRINT | IA64_THREAD_UAC_SIGBUS)
 
 #ifndef __ASSEMBLY__
@@ -285,6 +287,14 @@
 	struct ia64_fpreg fph[96];	/* saved/loaded on demand */
 	__u64 dbr[IA64_NUM_DBG_REGS];
 	__u64 ibr[IA64_NUM_DBG_REGS];
+#ifdef CONFIG_PERFMON
+	__u64 pmc[IA64_NUM_PM_REGS];
+	__u64 pmd[IA64_NUM_PM_REGS];
+	__u64 pmod[IA64_NUM_PM_REGS];
+# define INIT_THREAD_PM		{0, }, {0, }, {0, },
+#else
+# define INIT_THREAD_PM
+#endif
 	__u64 map_base;			/* base address for mmap() */
 #ifdef CONFIG_IA32_SUPPORT
 	__u64 eflag;			/* IA32 EFLAGS reg */
@@ -316,6 +326,7 @@
 	{{{{0}}}, },			/* fph */	\
 	{0, },				/* dbr */	\
 	{0, },				/* ibr */	\
+	INIT_THREAD_PM					\
 	0x2000000000000000		/* map_base */	\
 	INIT_THREAD_IA32,				\
 	0				/* siginfo */	\
@@ -338,8 +349,12 @@
 struct mm_struct;
 struct task_struct;
 
-/* Free all resources held by a thread. */
-extern void release_thread (struct task_struct *);
+/*
+ * Free all resources held by a thread. This is called after the
+ * parent of DEAD_TASK has collected the exist status of the task via
+ * wait().  This is a no-op on IA-64.
+ */
+#define release_thread(dead_task)
 
 /*
  * This is the mechanism for creating a new kernel thread.
@@ -392,6 +407,18 @@
 extern void __ia64_init_fpu (void);
 extern void __ia64_save_fpu (struct ia64_fpreg *fph);
 extern void __ia64_load_fpu (struct ia64_fpreg *fph);
+extern void ia64_save_debug_regs (unsigned long *save_area);
+extern void ia64_load_debug_regs (unsigned long *save_area);
+
+#ifdef CONFIG_IA32_SUPPORT
+extern void ia32_save_state (struct thread_struct *thread);
+extern void ia32_load_state (struct thread_struct *thread);
+#endif
+
+#ifdef CONFIG_PERFMON
+extern void ia64_save_pm_regs (struct thread_struct *thread);
+extern void ia64_load_pm_regs (struct thread_struct *thread);
+#endif
 
 #define ia64_fph_enable()	__asm__ __volatile__ (";; rsm psr.dfh;; srlz.d;;" ::: "memory");
 #define ia64_fph_disable()	__asm__ __volatile__ (";; ssm psr.dfh;; srlz.d;;" ::: "memory");
diff -urN linux-2.4.0-test6/include/asm-ia64/scatterlist.h linux-2.4.0-test6-lia/include/asm-ia64/scatterlist.h
--- linux-2.4.0-test6/include/asm-ia64/scatterlist.h	Sun Feb  6 18:42:40 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/scatterlist.h	Mon Jul 31 14:01:22 2000
@@ -13,6 +13,7 @@
 	 * indirection buffer, NULL otherwise:
 	 */
 	char *alt_address;
+	char *orig_address;	/* Save away the original buffer address (used by pci-dma.c) */
 	unsigned int length;	/* buffer length */
 };
 
diff -urN linux-2.4.0-test6/include/asm-ia64/siginfo.h linux-2.4.0-test6-lia/include/asm-ia64/siginfo.h
--- linux-2.4.0-test6/include/asm-ia64/siginfo.h	Thu Jun 22 07:09:45 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/siginfo.h	Mon Jul 31 14:01:22 2000
@@ -14,12 +14,13 @@
 } sigval_t;
 
 #define SI_MAX_SIZE	128
-#define SI_PAD_SIZE	((SI_MAX_SIZE/sizeof(int)) - 3)
+#define SI_PAD_SIZE	((SI_MAX_SIZE/sizeof(int)) - 4)
 
 typedef struct siginfo {
 	int si_signo;
 	int si_errno;
 	int si_code;
+	int __pad0;
 
 	union {
 		int _pad[SI_PAD_SIZE];
@@ -212,7 +213,7 @@
 #define SIGEV_THREAD	2	/* deliver via thread creation */
 
 #define SIGEV_MAX_SIZE	64
-#define SIGEV_PAD_SIZE	((SIGEV_MAX_SIZE/sizeof(int)) - 3)
+#define SIGEV_PAD_SIZE	((SIGEV_MAX_SIZE/sizeof(int)) - 4)
 
 typedef struct sigevent {
 	sigval_t sigev_value;
diff -urN linux-2.4.0-test6/include/asm-ia64/smp.h linux-2.4.0-test6-lia/include/asm-ia64/smp.h
--- linux-2.4.0-test6/include/asm-ia64/smp.h	Fri Apr 21 15:21:24 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/smp.h	Fri Aug 11 16:43:56 2000
@@ -99,5 +99,9 @@
 extern void __init init_smp_config (void);
 extern void smp_do_timer (struct pt_regs *regs);
 
+extern int smp_call_function_single (int cpuid, void (*func) (void *info), void *info,
+			      int retry, int wait);
+
+
 #endif /* CONFIG_SMP */
 #endif /* _ASM_IA64_SMP_H */
diff -urN linux-2.4.0-test6/include/asm-ia64/spinlock.h linux-2.4.0-test6-lia/include/asm-ia64/spinlock.h
--- linux-2.4.0-test6/include/asm-ia64/spinlock.h	Thu Jun 22 07:09:45 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/spinlock.h	Fri Aug 11 16:43:56 2000
@@ -15,8 +15,11 @@
 #include <asm/bitops.h>
 #include <asm/atomic.h>
 
+#undef NEW_LOCK
+
+#ifdef NEW_LOCK
 typedef struct { 
-	volatile unsigned int lock;
+	volatile unsigned char lock;
 } spinlock_t;
 
 #define SPIN_LOCK_UNLOCKED			(spinlock_t) { 0 }
@@ -26,44 +29,86 @@
  * Streamlined test_and_set_bit(0, (x)).  We use test-and-test-and-set
  * rather than a simple xchg to avoid writing the cache-line when
  * there is contention.
+ *
+ * XXX Fix me: instead of preserving ar.pfs, we should just mark it
+ * XXX as "clobbered".  Unfortunately, the Mar 2000 release of the compiler
+ * XXX doesn't let us do that.  The August release fixes that.
  */
-#if 1 /* Bad code generation? */
-#define spin_lock(x) __asm__ __volatile__ ( \
-       "mov ar.ccv = r0\n" \
-       "mov r29 = 1\n" \
-       ";;\n" \
-       "1:\n" \
-       "ld4 r2 = %0\n" \
-       ";;\n" \
-       "cmp4.eq p0,p7 = r0,r2\n" \
-       "(p7) br.cond.spnt.few 1b \n" \
-       "cmpxchg4.acq r2 = %0, r29, ar.ccv\n" \
-       ";;\n" \
-       "cmp4.eq p0,p7 = r0, r2\n" \
-       "(p7) br.cond.spnt.few 1b\n" \
-       ";;\n" \
-       :: "m" __atomic_fool_gcc((x)) : "r2", "r29", "memory")
- 
-#else 
-#define spin_lock(x)					\
-{							\
-	spinlock_t *__x = (x);				\
-							\
-	do {						\
-		while (__x->lock);			\
-	} while (cmpxchg_acq(&__x->lock, 0, 1));	\
+#define spin_lock(x)								\
+{										\
+	register char *addr __asm__ ("r31") = (char *) &(x)->lock;		\
+	long saved_pfs;								\
+										\
+	__asm__ __volatile__ (							\
+		"mov r30=1\n"							\
+		"mov ar.ccv=r0\n"						\
+		";;\n"								\
+		IA64_SEMFIX"cmpxchg1.acq r30=[%1],r30,ar.ccv\n"			\
+		";;\n"								\
+		"cmp.ne p15,p0=r30,r0\n"					\
+		"mov %0=ar.pfs\n"						\
+		"(p15) br.call.spnt.few b7=ia64_spinlock_contention\n"		\
+		";;\n"								\
+		"1: (p15) mov ar.pfs=%0;;\n"	/* force a new bundle */	\
+		: "=&r"(saved_pfs) : "r"(addr)					\
+		: "p15", "r28", "r29", "r30", "memory");			\
 }
-#endif
+
+#define spin_trylock(x)							\
+({									\
+	register char *addr __asm__ ("r31") = (char *) &(x)->lock;	\
+	register long result;						\
+									\
+	__asm__ __volatile__ (						\
+		"mov r30=1\n"						\
+		"mov ar.ccv=r0\n"					\
+		";;\n"							\
+		IA64_SEMFIX"cmpxchg1.acq %0=[%1],r30,ar.ccv\n"		\
+		: "=r"(result) : "r"(addr) : "r30", "memory");		\
+	(result == 0);							\
+})
 
 #define spin_is_locked(x)	((x)->lock != 0)
+#define spin_unlock(x)		({((spinlock_t *) x)->lock = 0;})
+#define spin_unlock_wait(x)	({ while ((x)->lock); })
 
-#define spin_unlock(x)		({((spinlock_t *) x)->lock = 0; barrier();})
+#else /* !NEW_LOCK */
 
-/* Streamlined !test_and_set_bit(0, (x)) */
-#define spin_trylock(x)		(cmpxchg_acq(&(x)->lock, 0, 1) == 0)
+typedef struct { 
+	volatile unsigned int lock;
+} spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED			(spinlock_t) { 0 }
+#define spin_lock_init(x)			((x)->lock = 0)
 
+/*
+ * Streamlined test_and_set_bit(0, (x)).  We use test-and-test-and-set
+ * rather than a simple xchg to avoid writing the cache-line when
+ * there is contention.
+ */
+#define spin_lock(x) __asm__ __volatile__ (			\
+	"mov ar.ccv = r0\n"					\
+	"mov r29 = 1\n"						\
+	";;\n"							\
+	"1:\n"							\
+	"ld4 r2 = %0\n"						\
+	";;\n"							\
+	"cmp4.eq p0,p7 = r0,r2\n"				\
+	"(p7) br.cond.spnt.few 1b \n"				\
+	IA64_SEMFIX"cmpxchg4.acq r2 = %0, r29, ar.ccv\n"	\
+	";;\n"							\
+	"cmp4.eq p0,p7 = r0, r2\n"				\
+	"(p7) br.cond.spnt.few 1b\n"				\
+	";;\n"							\
+	:: "m" __atomic_fool_gcc((x)) : "r2", "r29", "memory")
+
+#define spin_is_locked(x)	((x)->lock != 0)
+#define spin_unlock(x)		({((spinlock_t *) x)->lock = 0; barrier();})
+#define spin_trylock(x)		(cmpxchg_acq(&(x)->lock, 0, 1) == 0)
 #define spin_unlock_wait(x)	({ do { barrier(); } while ((x)->lock); })
 
+#endif /* !NEW_LOCK */
+
 typedef struct {
 	volatile int read_counter:31;
 	volatile int write_lock:1;
@@ -73,12 +118,12 @@
 #define read_lock(rw)							 \
 do {									 \
 	int tmp = 0;							 \
-	__asm__ __volatile__ ("1:\tfetchadd4.acq %0 = %1, 1\n"		 \
+	__asm__ __volatile__ ("1:\t"IA64_SEMFIX"fetchadd4.acq %0 = %1, 1\n"		 \
 			      ";;\n"					 \
 			      "tbit.nz p6,p0 = %0, 31\n"		 \
 			      "(p6) br.cond.sptk.few 2f\n"		 \
 			      ".section .text.lock,\"ax\"\n"		 \
-			      "2:\tfetchadd4.rel %0 = %1, -1\n"		 \
+			      "2:\t"IA64_SEMFIX"fetchadd4.rel %0 = %1, -1\n"		 \
 			      ";;\n"					 \
 			      "3:\tld4.acq %0 = %1\n"			 \
 			      ";;\n"					 \
@@ -94,7 +139,7 @@
 #define read_unlock(rw)						\
 do {								\
 	int tmp = 0;						\
-	__asm__ __volatile__ ("fetchadd4.rel %0 = %1, -1\n"	\
+	__asm__ __volatile__ (IA64_SEMFIX"fetchadd4.rel %0 = %1, -1\n"	\
 			      : "=r" (tmp)			\
 			      : "m" (__atomic_fool_gcc(rw))	\
 			      : "memory");			\
diff -urN linux-2.4.0-test6/include/asm-ia64/system.h linux-2.4.0-test6-lia/include/asm-ia64/system.h
--- linux-2.4.0-test6/include/asm-ia64/system.h	Thu Aug 10 19:56:31 2000
+++ linux-2.4.0-test6-lia/include/asm-ia64/system.h	Fri Aug 11 16:43:55 2000
@@ -27,6 +27,15 @@
 
 #define GATE_ADDR		(0xa000000000000000 + PAGE_SIZE)
 
+#if defined(CONFIG_ITANIUM_ASTEP_SPECIFIC) || defined(CONFIG_ITANIUM_BSTEP_SPECIFIC)
+  /* Workaround for Errata 97.  */
+# define IA64_SEMFIX_INSN	mf;
+# define IA64_SEMFIX	"mf;"
+#else
+# define IA64_SEMFIX_INSN
+# define IA64_SEMFIX	""
+#endif
+
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
@@ -231,13 +240,13 @@
 ({										\
 	switch (sz) {								\
 	      case 4:								\
-		__asm__ __volatile__ ("fetchadd4.rel %0=%1,%3"			\
+		__asm__ __volatile__ (IA64_SEMFIX"fetchadd4.rel %0=%1,%3"	\
 				      : "=r"(tmp), "=m"(__atomic_fool_gcc(v))	\
 				      : "m" (__atomic_fool_gcc(v)), "i"(n));	\
 		break;								\
 										\
 	      case 8:								\
-		__asm__ __volatile__ ("fetchadd8.rel %0=%1,%3"			\
+		__asm__ __volatile__ (IA64_SEMFIX"fetchadd8.rel %0=%1,%3"	\
 				      : "=r"(tmp), "=m"(__atomic_fool_gcc(v))	\
 				      : "m" (__atomic_fool_gcc(v)), "i"(n));	\
 		break;								\
@@ -280,22 +289,22 @@
 
 	switch (size) {
 	      case 1:
-		__asm__ __volatile ("xchg1 %0=%1,%2" : "=r" (result)
+		__asm__ __volatile (IA64_SEMFIX"xchg1 %0=%1,%2" : "=r" (result)
 				    : "m" (*(char *) ptr), "r" (x) : "memory");
 		return result;
 
 	      case 2:
-		__asm__ __volatile ("xchg2 %0=%1,%2" : "=r" (result)
+		__asm__ __volatile (IA64_SEMFIX"xchg2 %0=%1,%2" : "=r" (result)
 				    : "m" (*(short *) ptr), "r" (x) : "memory");
 		return result;
 
 	      case 4:
-		__asm__ __volatile ("xchg4 %0=%1,%2" : "=r" (result)
+		__asm__ __volatile (IA64_SEMFIX"xchg4 %0=%1,%2" : "=r" (result)
 				    : "m" (*(int *) ptr), "r" (x) : "memory");
 		return result;
 
 	      case 8:
-		__asm__ __volatile ("xchg8 %0=%1,%2" : "=r" (result)
+		__asm__ __volatile (IA64_SEMFIX"xchg8 %0=%1,%2" : "=r" (result)
 				    : "m" (*(long *) ptr), "r" (x) : "memory");
 		return result;
 	}
@@ -305,7 +314,6 @@
 
 #define xchg(ptr,x)							     \
   ((__typeof__(*(ptr))) __xchg ((unsigned long) (x), (ptr), sizeof(*(ptr))))
-#define tas(ptr)	(xchg ((ptr), 1))
 
 /* 
  * Atomic compare and exchange.  Compare OLD with MEM, if identical,
@@ -324,50 +332,50 @@
 struct __xchg_dummy { unsigned long a[100]; };
 #define __xg(x) (*(struct __xchg_dummy *)(x))
 
-#define ia64_cmpxchg(sem,ptr,old,new,size)				\
-({									\
-	__typeof__(ptr) _p_ = (ptr);					\
-	__typeof__(new) _n_ = (new);					\
-	__u64 _o_, _r_;							\
-									\
-	switch (size) {							\
-	      case 1: _o_ = (__u8 ) (old); break;			\
-	      case 2: _o_ = (__u16) (old); break;			\
-	      case 4: _o_ = (__u32) (old); break;			\
-	      case 8: _o_ = (__u64) (old); break;			\
-	      default:							\
-	}								\
-	 __asm__ __volatile__ ("mov ar.ccv=%0;;" :: "rO"(_o_));		\
-	switch (size) {							\
-	      case 1:							\
-		__asm__ __volatile__ ("cmpxchg1."sem" %0=%2,%3,ar.ccv"	\
-				      : "=r"(_r_), "=m"(__xg(_p_))	\
-				      : "m"(__xg(_p_)), "r"(_n_));	\
-		break;							\
-									\
-	      case 2:							\
-		__asm__ __volatile__ ("cmpxchg2."sem" %0=%2,%3,ar.ccv"	\
-				      : "=r"(_r_), "=m"(__xg(_p_))	\
-				      : "m"(__xg(_p_)), "r"(_n_));	\
-		break;							\
-									\
-	      case 4:							\
-		__asm__ __volatile__ ("cmpxchg4."sem" %0=%2,%3,ar.ccv"	\
-				      : "=r"(_r_), "=m"(__xg(_p_))	\
-				      : "m"(__xg(_p_)), "r"(_n_));	\
-		break;							\
-									\
-	      case 8:							\
-		__asm__ __volatile__ ("cmpxchg8."sem" %0=%2,%3,ar.ccv"	\
-				      : "=r"(_r_), "=m"(__xg(_p_))	\
-				      : "m"(__xg(_p_)), "r"(_n_));	\
-		break;							\
-									\
-	      default:							\
-		_r_ = __cmpxchg_called_with_bad_pointer();		\
-		break;							\
-	}								\
-	(__typeof__(old)) _r_;						\
+#define ia64_cmpxchg(sem,ptr,old,new,size)						\
+({											\
+	__typeof__(ptr) _p_ = (ptr);							\
+	__typeof__(new) _n_ = (new);							\
+	__u64 _o_, _r_;									\
+											\
+	switch (size) {									\
+	      case 1: _o_ = (__u8 ) (long) (old); break;				\
+	      case 2: _o_ = (__u16) (long) (old); break;				\
+	      case 4: _o_ = (__u32) (long) (old); break;				\
+	      case 8: _o_ = (__u64) (long) (old); break;				\
+	      default:									\
+	}										\
+	 __asm__ __volatile__ ("mov ar.ccv=%0;;" :: "rO"(_o_));				\
+	switch (size) {									\
+	      case 1:									\
+		__asm__ __volatile__ (IA64_SEMFIX"cmpxchg1."sem" %0=%2,%3,ar.ccv"	\
+				      : "=r"(_r_), "=m"(__xg(_p_))			\
+				      : "m"(__xg(_p_)), "r"(_n_));			\
+		break;									\
+											\
+	      case 2:									\
+		__asm__ __volatile__ (IA64_SEMFIX"cmpxchg2."sem" %0=%2,%3,ar.ccv"	\
+				      : "=r"(_r_), "=m"(__xg(_p_))			\
+				      : "m"(__xg(_p_)), "r"(_n_));			\
+		break;									\
+											\
+	      case 4:									\
+		__asm__ __volatile__ (IA64_SEMFIX"cmpxchg4."sem" %0=%2,%3,ar.ccv"	\
+				      : "=r"(_r_), "=m"(__xg(_p_))			\
+				      : "m"(__xg(_p_)), "r"(_n_));			\
+		break;									\
+											\
+	      case 8:									\
+		__asm__ __volatile__ (IA64_SEMFIX"cmpxchg8."sem" %0=%2,%3,ar.ccv"	\
+				      : "=r"(_r_), "=m"(__xg(_p_))			\
+				      : "m"(__xg(_p_)), "r"(_n_));			\
+		break;									\
+											\
+	      default:									\
+		_r_ = __cmpxchg_called_with_bad_pointer();				\
+		break;									\
+	}										\
+	(__typeof__(old)) _r_;								\
 })
 
 #define cmpxchg_acq(ptr,o,n)	ia64_cmpxchg("acq", (ptr), (o), (n), sizeof(*(ptr)))
@@ -418,15 +426,15 @@
 extern void ia64_save_extra (struct task_struct *task);
 extern void ia64_load_extra (struct task_struct *task);
 
-#define __switch_to(prev,next,last) do {					\
-	if (((prev)->thread.flags & IA64_THREAD_DBG_VALID)			\
-	    || IS_IA32_PROCESS(ia64_task_regs(prev)))				\
-		ia64_save_extra(prev);						\
-	if (((next)->thread.flags & IA64_THREAD_DBG_VALID)			\
-	    || IS_IA32_PROCESS(ia64_task_regs(next)))				\
-		ia64_load_extra(next);						\
-	ia64_psr(ia64_task_regs(next))->dfh = (ia64_get_fpu_owner() != (next));	\
-	(last) = ia64_switch_to((next));					\
+#define __switch_to(prev,next,last) do {						\
+	if (((prev)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID))	\
+	    || IS_IA32_PROCESS(ia64_task_regs(prev)))					\
+		ia64_save_extra(prev);							\
+	if (((next)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID))	\
+	    || IS_IA32_PROCESS(ia64_task_regs(next)))					\
+		ia64_load_extra(next);							\
+	ia64_psr(ia64_task_regs(next))->dfh = (ia64_get_fpu_owner() != (next));		\
+	(last) = ia64_switch_to((next));						\
 } while (0)
 
 #ifdef CONFIG_SMP 
@@ -444,6 +452,7 @@
    */
 # define switch_to(prev,next,last) do {							\
 	if (ia64_get_fpu_owner() == (prev) && ia64_psr(ia64_task_regs(prev))->mfh) {	\
+		ia64_psr(ia64_task_regs(prev))->mfh = 0;				\
 		(prev)->thread.flags |= IA64_THREAD_FPH_VALID;				\
 		__ia64_save_fpu((prev)->thread.fph);					\
 	}										\
diff -urN linux-2.4.0-test6/include/linux/irq.h linux-2.4.0-test6-lia/include/linux/irq.h
--- linux-2.4.0-test6/include/linux/irq.h	Fri Jul 14 17:19:28 2000
+++ linux-2.4.0-test6-lia/include/linux/irq.h	Fri Aug 11 16:43:56 2000
@@ -56,6 +56,7 @@
 
 #include <asm/hw_irq.h> /* the arch dependent stuff */
 
+extern unsigned int do_IRQ (unsigned long irq, struct pt_regs *regs);
 extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
 extern int setup_irq(unsigned int , struct irqaction * );
 
diff -urN linux-2.4.0-test6/include/linux/mmzone.h linux-2.4.0-test6-lia/include/linux/mmzone.h
--- linux-2.4.0-test6/include/linux/mmzone.h	Fri Jul 14 17:19:28 2000
+++ linux-2.4.0-test6-lia/include/linux/mmzone.h	Fri Aug 11 16:43:56 2000
@@ -26,10 +26,10 @@
 	 * Commonly accessed fields:
 	 */
 	spinlock_t		lock;
-	unsigned long		offset;
-	unsigned long		free_pages;
 	char			low_on_memory;
 	char			zone_wake_kswapd;
+	unsigned long		offset;
+	unsigned long		free_pages;
 	unsigned long		pages_min, pages_low, pages_high;
 
 	/*
diff -urN linux-2.4.0-test6/include/linux/sched.h linux-2.4.0-test6-lia/include/linux/sched.h
--- linux-2.4.0-test6/include/linux/sched.h	Thu Aug 10 19:56:32 2000
+++ linux-2.4.0-test6-lia/include/linux/sched.h	Fri Aug 11 16:43:56 2000
@@ -697,7 +697,8 @@
 extern int expand_fdset(struct files_struct *, int nr);
 extern void free_fdset(fd_set *, int);
 
-extern int  copy_thread(int, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
+extern int  copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *,
+			struct pt_regs *);
 extern void flush_thread(void);
 extern void exit_thread(void);
 
@@ -708,7 +709,7 @@
 extern void daemonize(void);
 
 extern int do_execve(char *, char **, char **, struct pt_regs *);
-extern int do_fork(unsigned long, unsigned long, struct pt_regs *);
+extern int do_fork(unsigned long, unsigned long, unsigned long, struct pt_regs *);
 
 extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
 extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
diff -urN linux-2.4.0-test6/init/main.c linux-2.4.0-test6-lia/init/main.c
--- linux-2.4.0-test6/init/main.c	Thu Aug 10 19:56:32 2000
+++ linux-2.4.0-test6-lia/init/main.c	Fri Aug 11 14:55:42 2000
@@ -112,6 +112,9 @@
 #if defined(CONFIG_QUOTA)
 extern void dquot_init_hash(void);
 #endif
+#ifdef CONFIG_PERFMON
+extern void perfmon_init(void);
+#endif
 
 /*
  * Boot command-line arguments
@@ -553,6 +556,9 @@
 #endif
 	mem_init();
 	kmem_cache_sizes_init();
+#ifdef CONFIG_PERFMON
+	perfmon_init();
+#endif
 #ifdef CONFIG_3215_CONSOLE
         con3215_activate();
 #endif
diff -urN linux-2.4.0-test6/kernel/fork.c linux-2.4.0-test6-lia/kernel/fork.c
--- linux-2.4.0-test6/kernel/fork.c	Thu Aug 10 19:56:32 2000
+++ linux-2.4.0-test6-lia/kernel/fork.c	Mon Jul 31 14:01:22 2000
@@ -530,10 +530,15 @@
 
 /*
  *  Ok, this is the main fork-routine. It copies the system process
- * information (task[nr]) and sets up the necessary registers. It
- * also copies the data segment in its entirety.
+ * information (task[nr]) and sets up the necessary registers. It also
+ * copies the data segment in its entirety.  The "stack_start" and
+ * "stack_top" arguments are simply passed along to the platform
+ * specific copy_thread() routine.  Most platforms ignore stack_top.
+ * For an example that's using stack_top, see
+ * arch/ia64/kernel/process.c.
  */
-int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
+int do_fork(unsigned long clone_flags, unsigned long stack_start, unsigned long stack_top,
+	    struct pt_regs *regs)
 {
 	int retval = -ENOMEM;
 	struct task_struct *p;
@@ -632,7 +637,7 @@
 		goto bad_fork_cleanup_fs;
 	if (copy_mm(clone_flags, p))
 		goto bad_fork_cleanup_sighand;
-	retval = copy_thread(0, clone_flags, usp, p, regs);
+	retval = copy_thread(0, clone_flags, stack_start, stack_top, p, regs);
 	if (retval)
 		goto bad_fork_cleanup_sighand;
 	p->semundo = NULL;
diff -urN linux-2.4.0-test6/kernel/printk.c linux-2.4.0-test6-lia/kernel/printk.c
--- linux-2.4.0-test6/kernel/printk.c	Wed Jul  5 11:00:21 2000
+++ linux-2.4.0-test6-lia/kernel/printk.c	Fri Aug 11 19:10:10 2000
@@ -14,6 +14,8 @@
  *     manfreds@colorfullife.com
  */
 
+#include <linux/config.h>
+
 #include <linux/mm.h>
 #include <linux/tty_driver.h>
 #include <linux/smp_lock.h>
@@ -296,6 +298,12 @@
 				break;
 			}
 		}
+#ifdef CONFIG_IA64_EARLY_PRINTK
+		if (!console_drivers) {
+			static void early_printk (const char *str);
+			early_printk(msg);
+		} else
+#endif
 		if (msg_level < console_loglevel && console_drivers) {
 			struct console *c = console_drivers;
 			while(c) {
@@ -412,6 +420,10 @@
 	}
 	if ((console->flags & CON_PRINTBUFFER) == 0)
 		goto done;
+#ifdef CONFIG_IA64_EARLY_PRINTK
+	goto done;
+#endif
+
 	/*
 	 *	Print out buffered log messages.
 	 */
@@ -495,3 +507,47 @@
 		tty->driver.write(tty, 0, msg, strlen(msg));
 	return;
 }
+
+#ifdef CONFIG_IA64_EARLY_PRINTK
+
+#include <asm/io.h>
+
+#define VGABASE		((char *)0x00000000000b8000)
+
+static int current_ypos = 50, current_xpos = 0;
+
+void
+early_printk (const char *str)
+{
+	char c;
+	int  i, k, j;
+
+	while ((c = *str++) != '\0') {
+		if (current_ypos >= 50) {
+			/* scroll 1 line up */
+			for (k = 1, j = 0; k < 50; k++, j++) {
+				for (i = 0; i < 80; i++) {
+					writew(readw(VGABASE + 2*(80*k + i)),
+					       VGABASE + 2*(80*j + i));
+				}
+			}
+			for (i = 0; i < 80; i++) {
+				writew(0x720, VGABASE + 2*(80*j + i));
+			}
+			current_ypos = 49;
+		}
+		if (c == '\n') {
+			current_xpos = 0;
+			current_ypos++;
+		} else if (c != '\r')  {
+			writew(((0x7 << 8) | (unsigned short) c),
+			       VGABASE + 2*(80*current_ypos + current_xpos++));
+			if (current_xpos >= 80) {
+				current_xpos = 0;
+				current_ypos++;
+			}
+		}
+	}
+}
+
+#endif /* CONFIG_IA64_EARLY_PRINTK */
diff -urN linux-2.4.0-test6/kernel/timer.c linux-2.4.0-test6-lia/kernel/timer.c
--- linux-2.4.0-test6/kernel/timer.c	Thu Aug 10 19:56:32 2000
+++ linux-2.4.0-test6-lia/kernel/timer.c	Mon Jul 31 14:01:22 2000
@@ -680,7 +680,7 @@
 
 void do_timer(struct pt_regs *regs)
 {
-	(*(unsigned long *)&jiffies)++;
+	(*(volatile unsigned long *)&jiffies)++;
 #ifndef CONFIG_SMP
 	/* SMP process accounting uses the local APIC timer */
 
diff -urN linux-2.4.0-test6/lib/cmdline.c linux-2.4.0-test6-lia/lib/cmdline.c
--- linux-2.4.0-test6/lib/cmdline.c	Tue Jun 20 07:52:36 2000
+++ linux-2.4.0-test6-lia/lib/cmdline.c	Mon Jul 31 14:01:22 2000
@@ -85,12 +85,12 @@
  *	@ptr: Where parse begins
  *	@retptr: (output) Pointer to next char after parse completes
  *
- *	Parses a string into a number.  The number stored
- *	at @ptr is potentially suffixed with %K (for
- *	kilobytes, or 1024 bytes) or suffixed with %M (for
- *	megabytes, or 1048576 bytes).  If the number is suffixed
- *	with K or M, then the return value is the number
- *	multiplied by one kilobyte, or one megabyte, respectively.
+ *	Parses a string into a number.  The number stored at @ptr is
+ *	potentially suffixed with %K (for kilobytes, or 1024 bytes),
+ *	%M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
+ *	1073741824).  If the number is suffixed with K, M, or G, then
+ *	the return value is the number multiplied by one kilobyte, one
+ *	megabyte, or one gigabyte, respectively.
  */
 
 unsigned long memparse (char *ptr, char **retptr)
@@ -98,6 +98,9 @@
 	unsigned long ret = simple_strtoul (ptr, retptr, 0);
 
 	switch (**retptr) {
+	case 'G':
+	case 'g':
+		ret <<= 10;
 	case 'M':
 	case 'm':
 		ret <<= 10;